зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1839422 part 1 - Change RegExpSearcher interface so it can be used for longer strings. r=iain
Change the interface so that instead of packing the match index and limit in a single int32 value, we now only return the index and store the match limit in a field on the context where we can load it from with a separate intrinsic. This lets us simplify `RegExpReplace` because we don't need separate functions for short and long strings. Differential Revision: https://phabricator.services.mozilla.com/D181658
This commit is contained in:
Родитель
447af23337
Коммит
f9f4eefdf5
|
@ -277,13 +277,20 @@ bool js::CreateRegExpMatchResult(JSContext* cx, HandleRegExpShared re,
|
|||
return true;
|
||||
}
|
||||
|
||||
static int32_t CreateRegExpSearchResult(const MatchPairs& matches) {
|
||||
/* Fit the start and limit of match into a int32_t. */
|
||||
uint32_t position = matches[0].start;
|
||||
uint32_t lastIndex = matches[0].limit;
|
||||
MOZ_ASSERT(position < 0x8000);
|
||||
MOZ_ASSERT(lastIndex < 0x8000);
|
||||
return position | (lastIndex << 15);
|
||||
static int32_t CreateRegExpSearchResult(JSContext* cx,
|
||||
const MatchPairs& matches) {
|
||||
MOZ_ASSERT(matches[0].start >= 0);
|
||||
MOZ_ASSERT(matches[0].limit >= 0);
|
||||
|
||||
MOZ_ASSERT(cx->regExpSearcherLastLimit == RegExpSearcherLastLimitSentinel);
|
||||
|
||||
#ifdef DEBUG
|
||||
static_assert(JSString::MAX_LENGTH < RegExpSearcherLastLimitSentinel);
|
||||
MOZ_ASSERT(uint32_t(matches[0].limit) < RegExpSearcherLastLimitSentinel);
|
||||
#endif
|
||||
|
||||
cx->regExpSearcherLastLimit = matches[0].limit;
|
||||
return matches[0].start;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1235,6 +1242,12 @@ static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
|
|||
/* Execute regular expression and gather matches. */
|
||||
VectorMatchPairs matches;
|
||||
|
||||
#ifdef DEBUG
|
||||
// Ensure we assert if RegExpSearcherLastLimit is called when there's no
|
||||
// match.
|
||||
cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel;
|
||||
#endif
|
||||
|
||||
/* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
|
||||
RegExpRunStatus status =
|
||||
ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
|
||||
|
@ -1249,7 +1262,7 @@ static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
|
|||
}
|
||||
|
||||
/* Steps 16-25 */
|
||||
*result = CreateRegExpSearchResult(matches);
|
||||
*result = CreateRegExpSearchResult(cx, matches);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1292,12 +1305,31 @@ bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp,
|
|||
// RegExp execution was successful only if the pairs have actually been
|
||||
// filled in. Note that IC code always passes a nullptr maybeMatches.
|
||||
if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
|
||||
*result = CreateRegExpSearchResult(*maybeMatches);
|
||||
*result = CreateRegExpSearchResult(cx, *maybeMatches);
|
||||
return true;
|
||||
}
|
||||
return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
|
||||
}
|
||||
|
||||
bool js::RegExpSearcherLastLimit(JSContext* cx, unsigned argc, Value* vp) {
|
||||
CallArgs args = CallArgsFromVp(argc, vp);
|
||||
MOZ_ASSERT(args.length() == 1);
|
||||
MOZ_ASSERT(args[0].isString());
|
||||
|
||||
// Assert the limit is not the sentinel value and is valid for this string.
|
||||
MOZ_ASSERT(cx->regExpSearcherLastLimit != RegExpSearcherLastLimitSentinel);
|
||||
MOZ_ASSERT(cx->regExpSearcherLastLimit <= args[0].toString()->length());
|
||||
|
||||
args.rval().setInt32(cx->regExpSearcherLastLimit);
|
||||
|
||||
#ifdef DEBUG
|
||||
// Ensure we assert if this function is called again without a new call to
|
||||
// RegExpSearcher.
|
||||
cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
template <bool CalledFromJit>
|
||||
static bool RegExpBuiltinExecMatchRaw(JSContext* cx,
|
||||
Handle<RegExpObject*> regexp,
|
||||
|
|
|
@ -74,6 +74,9 @@ JSObject* InitRegExpClass(JSContext* cx, HandleObject obj);
|
|||
MatchPairs* maybeMatches,
|
||||
int32_t* result);
|
||||
|
||||
[[nodiscard]] extern bool RegExpSearcherLastLimit(JSContext* cx, unsigned argc,
|
||||
Value* vp);
|
||||
|
||||
[[nodiscard]] extern bool RegExpBuiltinExecMatchFromJit(
|
||||
JSContext* cx, Handle<RegExpObject*> regexp, HandleString input,
|
||||
MatchPairs* maybeMatches, MutableHandleValue output);
|
||||
|
@ -175,6 +178,11 @@ extern const JSFunctionSpec regexp_methods[];
|
|||
[[nodiscard]] extern bool regexp_unicodeSets(JSContext* cx, unsigned argc,
|
||||
JS::Value* vp);
|
||||
|
||||
#ifdef DEBUG
|
||||
// Sentinel value for cx->regExpSearcherLastLimit.
|
||||
constexpr uint32_t RegExpSearcherLastLimitSentinel = UINT32_MAX;
|
||||
#endif
|
||||
|
||||
} /* namespace js */
|
||||
|
||||
#endif /* builtin_RegExp_h */
|
||||
|
|
|
@ -363,10 +363,7 @@ function RegExpReplace(string, replaceValue) {
|
|||
firstDollarIndex
|
||||
);
|
||||
}
|
||||
if (lengthS < 0x7fff) {
|
||||
return RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags);
|
||||
}
|
||||
return RegExpGlobalReplaceOpt(rx, S, lengthS, replaceValue, flags);
|
||||
return RegExpGlobalReplaceOptSimple(rx, S, lengthS, replaceValue, flags);
|
||||
}
|
||||
|
||||
if (functionalReplace) {
|
||||
|
@ -381,10 +378,7 @@ function RegExpReplace(string, replaceValue) {
|
|||
firstDollarIndex
|
||||
);
|
||||
}
|
||||
if (lengthS < 0x7fff) {
|
||||
return RegExpLocalReplaceOptShort(rx, S, lengthS, replaceValue);
|
||||
}
|
||||
return RegExpLocalReplaceOpt(rx, S, lengthS, replaceValue);
|
||||
return RegExpLocalReplaceOptSimple(rx, S, lengthS, replaceValue);
|
||||
}
|
||||
|
||||
// Steps 7-17.
|
||||
|
@ -774,9 +768,8 @@ function RegExpGetFunctionalReplacement(result, S, position, replaceValue) {
|
|||
// Steps 9.b-17.
|
||||
// Optimized path for @@replace with the following conditions:
|
||||
// * global flag is true
|
||||
// * S is a short string (lengthS < 0x7fff)
|
||||
// * replaceValue is a string without "$"
|
||||
function RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags) {
|
||||
function RegExpGlobalReplaceOptSimple(rx, S, lengthS, replaceValue, flags) {
|
||||
// Step 9.a.
|
||||
var fullUnicode = !!(flags & REGEXP_UNICODE_FLAG);
|
||||
|
||||
|
@ -793,15 +786,14 @@ function RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags) {
|
|||
// Step 12.
|
||||
while (true) {
|
||||
// Step 12.a.
|
||||
var result = RegExpSearcher(rx, S, lastIndex);
|
||||
var position = RegExpSearcher(rx, S, lastIndex);
|
||||
|
||||
// Step 12.b.
|
||||
if (result === -1) {
|
||||
if (position === -1) {
|
||||
break;
|
||||
}
|
||||
|
||||
var position = result & 0x7fff;
|
||||
lastIndex = (result >> 15) & 0x7fff;
|
||||
lastIndex = RegExpSearcherLastLimit(S);
|
||||
|
||||
// Step 15.m.ii.
|
||||
accumulatedResult +=
|
||||
|
@ -839,14 +831,6 @@ function RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags) {
|
|||
// Steps 7-17.
|
||||
// Optimized path for @@replace.
|
||||
|
||||
// Conditions:
|
||||
// * global flag is true
|
||||
// * replaceValue is a string without "$"
|
||||
#define FUNC_NAME RegExpGlobalReplaceOpt
|
||||
#include "RegExpGlobalReplaceOpt.h.js"
|
||||
#undef FUNC_NAME
|
||||
/* global RegExpGlobalReplaceOpt */
|
||||
|
||||
// Conditions:
|
||||
// * global flag is true
|
||||
// * replaceValue is a function
|
||||
|
@ -880,21 +864,12 @@ function RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags) {
|
|||
// Conditions:
|
||||
// * global flag is false
|
||||
// * replaceValue is a string without "$"
|
||||
#define FUNC_NAME RegExpLocalReplaceOpt
|
||||
#define FUNC_NAME RegExpLocalReplaceOptSimple
|
||||
#define SIMPLE
|
||||
#include "RegExpLocalReplaceOpt.h.js"
|
||||
#undef SIMPLE
|
||||
#undef FUNC_NAME
|
||||
/* global RegExpLocalReplaceOpt */
|
||||
|
||||
// Conditions:
|
||||
// * global flag is false
|
||||
// * S is a short string (lengthS < 0x7fff)
|
||||
// * replaceValue is a string without "$"
|
||||
#define FUNC_NAME RegExpLocalReplaceOptShort
|
||||
#define SHORT_STRING
|
||||
#include "RegExpLocalReplaceOpt.h.js"
|
||||
#undef SHORT_STRING
|
||||
#undef FUNC_NAME
|
||||
/* global RegExpLocalReplaceOptShort */
|
||||
/* global RegExpLocalReplaceOptSimple */
|
||||
|
||||
// Conditions:
|
||||
// * global flag is false
|
||||
|
@ -965,13 +940,8 @@ function RegExpSearch(string) {
|
|||
}
|
||||
}
|
||||
|
||||
// Step 9.
|
||||
if (result === -1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Step 10.
|
||||
return result & 0x7fff;
|
||||
// Steps 9-10.
|
||||
return result;
|
||||
}
|
||||
|
||||
return RegExpSearchSlowPath(rx, S, previousLastIndex);
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// Function template for the following functions:
|
||||
// * RegExpGlobalReplaceOpt
|
||||
// * RegExpGlobalReplaceOptFunc
|
||||
// * RegExpGlobalReplaceOptSubst
|
||||
// * RegExpGlobalReplaceOptElemBase
|
||||
|
@ -11,12 +10,11 @@
|
|||
// * FUNC_NAME -- function name (required)
|
||||
// e.g.
|
||||
// #define FUNC_NAME RegExpGlobalReplaceOpt
|
||||
// Define the following macro (without value) to switch the code:
|
||||
// Define one of the following macros (without value) to switch the code:
|
||||
// * SUBSTITUTION -- replaceValue is a string with "$"
|
||||
// * FUNCTIONAL -- replaceValue is a function
|
||||
// * ELEMBASE -- replaceValue is a function that returns an element
|
||||
// of an object
|
||||
// * none of above -- replaceValue is a string without "$"
|
||||
|
||||
// ES2023 draft rev 2c78e6f6b5bc6bfbf79dd8a12a9593e5b57afcd2
|
||||
// 22.2.5.11 RegExp.prototype [ @@replace ] ( string, replaceValue )
|
||||
|
@ -126,7 +124,7 @@ function FUNC_NAME(
|
|||
);
|
||||
}
|
||||
#else
|
||||
replacement = replaceValue;
|
||||
#error "Unexpected case"
|
||||
#endif
|
||||
|
||||
// Step 15.m.ii.
|
||||
|
|
|
@ -3,18 +3,17 @@
|
|||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// Function template for the following functions:
|
||||
// * RegExpLocalReplaceOpt
|
||||
// * RegExpLocalReplaceOptSimple
|
||||
// * RegExpLocalReplaceOptFunc
|
||||
// * RegExpLocalReplaceOptSubst
|
||||
// Define the following macro and include this file to declare function:
|
||||
// * FUNC_NAME -- function name (required)
|
||||
// e.g.
|
||||
// #define FUNC_NAME RegExpLocalReplaceOpt
|
||||
// Define the following macro (without value) to switch the code:
|
||||
// Define one of the following macros (without value) to switch the code:
|
||||
// * SUBSTITUTION -- replaceValue is a string with "$"
|
||||
// * FUNCTIONAL -- replaceValue is a function
|
||||
// * SHORT_STRING -- replaceValue is a string without "$" and lengthS < 0x7fff
|
||||
// * neither of above -- replaceValue is a string without "$"
|
||||
// * SIMPLE -- replaceValue is a string without "$"
|
||||
|
||||
// ES2023 draft rev 2c78e6f6b5bc6bfbf79dd8a12a9593e5b57afcd2
|
||||
// 22.2.5.11 RegExp.prototype [ @@replace ] ( string, replaceValue )
|
||||
|
@ -57,7 +56,7 @@ function FUNC_NAME(
|
|||
lastIndex = 0;
|
||||
}
|
||||
|
||||
#if !defined(SHORT_STRING)
|
||||
#if !defined(SIMPLE)
|
||||
// Step 12.a.
|
||||
var result = RegExpMatcher(rx, S, lastIndex);
|
||||
|
||||
|
@ -73,10 +72,10 @@ function FUNC_NAME(
|
|||
}
|
||||
#else
|
||||
// Step 12.a.
|
||||
var result = RegExpSearcher(rx, S, lastIndex);
|
||||
var position = RegExpSearcher(rx, S, lastIndex);
|
||||
|
||||
// Step 12.b.
|
||||
if (result === -1) {
|
||||
if (position === -1) {
|
||||
// 21.2.5.2.2 RegExpBuiltinExec, steps 12.a.i, 12.c.i.
|
||||
if (globalOrSticky) {
|
||||
rx.lastIndex = 0;
|
||||
|
@ -89,7 +88,7 @@ function FUNC_NAME(
|
|||
|
||||
// Steps 12.c, 13-14.
|
||||
|
||||
#if !defined(SHORT_STRING)
|
||||
#if !defined(SIMPLE)
|
||||
// Steps 15.a-b.
|
||||
assert(result.length >= 1, "RegExpMatcher doesn't return an empty array");
|
||||
|
||||
|
@ -106,13 +105,10 @@ function FUNC_NAME(
|
|||
// To set rx.lastIndex before RegExpGetFunctionalReplacement.
|
||||
var nextSourcePosition = position + matchLength;
|
||||
#else
|
||||
// Steps 15.a-d (skipped).
|
||||
|
||||
// Step 15.e-f.
|
||||
var position = result & 0x7fff;
|
||||
// Steps 15.a-f (skipped).
|
||||
|
||||
// Step 15.m.iii (reordered)
|
||||
var nextSourcePosition = (result >> 15) & 0x7fff;
|
||||
var nextSourcePosition = RegExpSearcherLastLimit(S);
|
||||
#endif
|
||||
|
||||
// 21.2.5.2.2 RegExpBuiltinExec, step 15.
|
||||
|
|
|
@ -2948,6 +2948,14 @@ JitCode* JitRealm::generateRegExpSearcherStub(JSContext* cx) {
|
|||
masm.push(FramePointer);
|
||||
masm.moveStackPtrTo(FramePointer);
|
||||
|
||||
#ifdef DEBUG
|
||||
// Store sentinel value to cx->regExpSearcherLastLimit.
|
||||
// See comment in RegExpSearcherImpl.
|
||||
masm.loadJSContext(temp1);
|
||||
masm.store32(Imm32(RegExpSearcherLastLimitSentinel),
|
||||
Address(temp1, JSContext::offsetOfRegExpSearcherLastLimit()));
|
||||
#endif
|
||||
|
||||
// The InputOutputData is placed above the frame pointer and return address on
|
||||
// the stack.
|
||||
int32_t inputOutputDataStartOffset = 2 * sizeof(void*);
|
||||
|
@ -3000,10 +3008,12 @@ JitCode* JitRealm::generateRegExpSearcherStub(JSContext* cx) {
|
|||
Address matchPairLimit(FramePointer,
|
||||
pairsVectorStartOffset + MatchPair::offsetOfLimit());
|
||||
|
||||
// Store match limit to cx->regExpSearcherLastLimit and return the index.
|
||||
masm.load32(matchPairLimit, result);
|
||||
masm.loadJSContext(input);
|
||||
masm.store32(result,
|
||||
Address(input, JSContext::offsetOfRegExpSearcherLastLimit()));
|
||||
masm.load32(matchPairStart, result);
|
||||
masm.load32(matchPairLimit, input);
|
||||
masm.lshiftPtr(Imm32(15), input);
|
||||
masm.or32(input, result);
|
||||
masm.pop(FramePointer);
|
||||
masm.ret();
|
||||
|
||||
|
|
|
@ -1075,6 +1075,7 @@
|
|||
possibly_calls: true
|
||||
can_recover: true
|
||||
|
||||
# Note: this instruction writes to cx->regExpSearcherLastLimit.
|
||||
- name: RegExpSearcher
|
||||
operands:
|
||||
regexp: Object
|
||||
|
@ -1082,7 +1083,7 @@
|
|||
lastIndex: Int32
|
||||
result_type: Int32
|
||||
possibly_calls: true
|
||||
can_recover: true
|
||||
can_recover: false
|
||||
|
||||
- name: RegExpExecMatch
|
||||
operands:
|
||||
|
|
|
@ -1447,28 +1447,6 @@ bool RRegExpMatcher::recover(JSContext* cx, SnapshotIterator& iter) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool MRegExpSearcher::writeRecoverData(CompactBufferWriter& writer) const {
|
||||
MOZ_ASSERT(canRecoverOnBailout());
|
||||
writer.writeUnsigned(uint32_t(RInstruction::Recover_RegExpSearcher));
|
||||
return true;
|
||||
}
|
||||
|
||||
RRegExpSearcher::RRegExpSearcher(CompactBufferReader& reader) {}
|
||||
|
||||
bool RRegExpSearcher::recover(JSContext* cx, SnapshotIterator& iter) const {
|
||||
RootedObject regexp(cx, &iter.read().toObject());
|
||||
RootedString input(cx, iter.read().toString());
|
||||
int32_t lastIndex = iter.read().toInt32();
|
||||
|
||||
int32_t result;
|
||||
if (!RegExpSearcherRaw(cx, regexp, input, lastIndex, nullptr, &result)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
iter.storeInstructionResult(Int32Value(result));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MTypeOf::writeRecoverData(CompactBufferWriter& writer) const {
|
||||
MOZ_ASSERT(canRecoverOnBailout());
|
||||
writer.writeUnsigned(uint32_t(RInstruction::Recover_TypeOf));
|
||||
|
|
|
@ -109,7 +109,6 @@ namespace jit {
|
|||
_(StringSplit) \
|
||||
_(NaNToZero) \
|
||||
_(RegExpMatcher) \
|
||||
_(RegExpSearcher) \
|
||||
_(StringReplace) \
|
||||
_(Substr) \
|
||||
_(TypeOf) \
|
||||
|
@ -685,14 +684,6 @@ class RRegExpMatcher final : public RInstruction {
|
|||
SnapshotIterator& iter) const override;
|
||||
};
|
||||
|
||||
class RRegExpSearcher final : public RInstruction {
|
||||
public:
|
||||
RINSTRUCTION_HEADER_NUM_OP_(RegExpSearcher, 3)
|
||||
|
||||
[[nodiscard]] bool recover(JSContext* cx,
|
||||
SnapshotIterator& iter) const override;
|
||||
};
|
||||
|
||||
class RStringReplace final : public RInstruction {
|
||||
private:
|
||||
bool isFlatReplacement_;
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "jsexn.h"
|
||||
#include "jstypes.h"
|
||||
|
||||
#include "builtin/RegExp.h" // js::RegExpSearcherLastLimitSentinel
|
||||
#include "frontend/FrontendContext.h"
|
||||
#include "gc/GC.h"
|
||||
#include "irregexp/RegExpAPI.h"
|
||||
|
@ -983,6 +984,11 @@ JSContext::JSContext(JSRuntime* runtime, const JS::ContextOptions& options)
|
|||
inUnsafeRegion(this, 0),
|
||||
generationalDisabled(this, 0),
|
||||
compactingDisabledCount(this, 0),
|
||||
#ifdef DEBUG
|
||||
regExpSearcherLastLimit(this, RegExpSearcherLastLimitSentinel),
|
||||
#else
|
||||
regExpSearcherLastLimit(this, 0),
|
||||
#endif
|
||||
frontendCollectionPool_(this),
|
||||
suppressProfilerSampling(false),
|
||||
tempLifoAlloc_(this, (size_t)TEMP_LIFO_ALLOC_PRIMARY_CHUNK_SIZE),
|
||||
|
|
|
@ -496,6 +496,13 @@ struct JS_PUBLIC_API JSContext : public JS::RootingContext,
|
|||
// with AutoDisableCompactingGC which uses this counter.
|
||||
js::ContextData<unsigned> compactingDisabledCount;
|
||||
|
||||
// Match limit result for the most recent call to RegExpSearcher.
|
||||
js::ContextData<uint32_t> regExpSearcherLastLimit;
|
||||
|
||||
static constexpr size_t offsetOfRegExpSearcherLastLimit() {
|
||||
return offsetof(JSContext, regExpSearcherLastLimit);
|
||||
}
|
||||
|
||||
private:
|
||||
// Pools used for recycling name maps and vectors when parsing and
|
||||
// emitting bytecode. Purged on GC when there are no active script
|
||||
|
|
|
@ -2070,6 +2070,7 @@ static const JSFunctionSpec intrinsic_functions[] = {
|
|||
JS_INLINABLE_FN("RegExpPrototypeOptimizable", RegExpPrototypeOptimizable, 1,
|
||||
0, RegExpPrototypeOptimizable),
|
||||
JS_INLINABLE_FN("RegExpSearcher", RegExpSearcher, 3, 0, RegExpSearcher),
|
||||
JS_FN("RegExpSearcherLastLimit", RegExpSearcherLastLimit, 0, 0),
|
||||
JS_INLINABLE_FN("SameValue", js::obj_is, 2, 0, ObjectIs),
|
||||
JS_FN("SharedArrayBufferByteLength",
|
||||
intrinsic_ArrayBufferByteLength<SharedArrayBufferObject>, 1, 0),
|
||||
|
|
|
@ -12,7 +12,8 @@ const path = require("path");
|
|||
const fs = require("fs");
|
||||
|
||||
const selfHostedRegex = /js\/src\/builtin\/.*?\.js$/;
|
||||
const macroRegex = /\s*\#(if|ifdef|else|elif|endif|include|define|undef).*/;
|
||||
const macroRegex =
|
||||
/\s*\#(if|ifdef|else|elif|endif|include|define|undef|error).*/;
|
||||
|
||||
function isSelfHostedFile(filename) {
|
||||
if (path.win32) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче