Bug 1839422 part 1 - Change RegExpSearcher interface so it can be used for longer strings. r=iain

Change the interface so that instead of packing the match index and limit in a
single int32 value, we now only return the index and store the match limit in a
field on the context where we can load it from with a separate intrinsic.

This lets us simplify `RegExpReplace` because we don't need separate functions
for short and long strings.

Differential Revision: https://phabricator.services.mozilla.com/D181658
This commit is contained in:
Jan de Mooij 2023-07-31 10:29:00 +00:00
Родитель 447af23337
Коммит f9f4eefdf5
13 изменённых файлов: 103 добавлений и 104 удалений

Просмотреть файл

@ -277,13 +277,20 @@ bool js::CreateRegExpMatchResult(JSContext* cx, HandleRegExpShared re,
return true;
}
static int32_t CreateRegExpSearchResult(const MatchPairs& matches) {
/* Fit the start and limit of match into a int32_t. */
uint32_t position = matches[0].start;
uint32_t lastIndex = matches[0].limit;
MOZ_ASSERT(position < 0x8000);
MOZ_ASSERT(lastIndex < 0x8000);
return position | (lastIndex << 15);
static int32_t CreateRegExpSearchResult(JSContext* cx,
const MatchPairs& matches) {
MOZ_ASSERT(matches[0].start >= 0);
MOZ_ASSERT(matches[0].limit >= 0);
MOZ_ASSERT(cx->regExpSearcherLastLimit == RegExpSearcherLastLimitSentinel);
#ifdef DEBUG
static_assert(JSString::MAX_LENGTH < RegExpSearcherLastLimitSentinel);
MOZ_ASSERT(uint32_t(matches[0].limit) < RegExpSearcherLastLimitSentinel);
#endif
cx->regExpSearcherLastLimit = matches[0].limit;
return matches[0].start;
}
/*
@ -1235,6 +1242,12 @@ static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
/* Execute regular expression and gather matches. */
VectorMatchPairs matches;
#ifdef DEBUG
// Ensure we assert if RegExpSearcherLastLimit is called when there's no
// match.
cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel;
#endif
/* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
RegExpRunStatus status =
ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
@ -1249,7 +1262,7 @@ static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
}
/* Steps 16-25 */
*result = CreateRegExpSearchResult(matches);
*result = CreateRegExpSearchResult(cx, matches);
return true;
}
@ -1292,12 +1305,31 @@ bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp,
// RegExp execution was successful only if the pairs have actually been
// filled in. Note that IC code always passes a nullptr maybeMatches.
if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
*result = CreateRegExpSearchResult(*maybeMatches);
*result = CreateRegExpSearchResult(cx, *maybeMatches);
return true;
}
return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
}
bool js::RegExpSearcherLastLimit(JSContext* cx, unsigned argc, Value* vp) {
CallArgs args = CallArgsFromVp(argc, vp);
MOZ_ASSERT(args.length() == 1);
MOZ_ASSERT(args[0].isString());
// Assert the limit is not the sentinel value and is valid for this string.
MOZ_ASSERT(cx->regExpSearcherLastLimit != RegExpSearcherLastLimitSentinel);
MOZ_ASSERT(cx->regExpSearcherLastLimit <= args[0].toString()->length());
args.rval().setInt32(cx->regExpSearcherLastLimit);
#ifdef DEBUG
// Ensure we assert if this function is called again without a new call to
// RegExpSearcher.
cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel;
#endif
return true;
}
template <bool CalledFromJit>
static bool RegExpBuiltinExecMatchRaw(JSContext* cx,
Handle<RegExpObject*> regexp,

Просмотреть файл

@ -74,6 +74,9 @@ JSObject* InitRegExpClass(JSContext* cx, HandleObject obj);
MatchPairs* maybeMatches,
int32_t* result);
[[nodiscard]] extern bool RegExpSearcherLastLimit(JSContext* cx, unsigned argc,
Value* vp);
[[nodiscard]] extern bool RegExpBuiltinExecMatchFromJit(
JSContext* cx, Handle<RegExpObject*> regexp, HandleString input,
MatchPairs* maybeMatches, MutableHandleValue output);
@ -175,6 +178,11 @@ extern const JSFunctionSpec regexp_methods[];
[[nodiscard]] extern bool regexp_unicodeSets(JSContext* cx, unsigned argc,
JS::Value* vp);
#ifdef DEBUG
// Sentinel value for cx->regExpSearcherLastLimit.
constexpr uint32_t RegExpSearcherLastLimitSentinel = UINT32_MAX;
#endif
} /* namespace js */
#endif /* builtin_RegExp_h */

Просмотреть файл

@ -363,10 +363,7 @@ function RegExpReplace(string, replaceValue) {
firstDollarIndex
);
}
if (lengthS < 0x7fff) {
return RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags);
}
return RegExpGlobalReplaceOpt(rx, S, lengthS, replaceValue, flags);
return RegExpGlobalReplaceOptSimple(rx, S, lengthS, replaceValue, flags);
}
if (functionalReplace) {
@ -381,10 +378,7 @@ function RegExpReplace(string, replaceValue) {
firstDollarIndex
);
}
if (lengthS < 0x7fff) {
return RegExpLocalReplaceOptShort(rx, S, lengthS, replaceValue);
}
return RegExpLocalReplaceOpt(rx, S, lengthS, replaceValue);
return RegExpLocalReplaceOptSimple(rx, S, lengthS, replaceValue);
}
// Steps 7-17.
@ -774,9 +768,8 @@ function RegExpGetFunctionalReplacement(result, S, position, replaceValue) {
// Steps 9.b-17.
// Optimized path for @@replace with the following conditions:
// * global flag is true
// * S is a short string (lengthS < 0x7fff)
// * replaceValue is a string without "$"
function RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags) {
function RegExpGlobalReplaceOptSimple(rx, S, lengthS, replaceValue, flags) {
// Step 9.a.
var fullUnicode = !!(flags & REGEXP_UNICODE_FLAG);
@ -793,15 +786,14 @@ function RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags) {
// Step 12.
while (true) {
// Step 12.a.
var result = RegExpSearcher(rx, S, lastIndex);
var position = RegExpSearcher(rx, S, lastIndex);
// Step 12.b.
if (result === -1) {
if (position === -1) {
break;
}
var position = result & 0x7fff;
lastIndex = (result >> 15) & 0x7fff;
lastIndex = RegExpSearcherLastLimit(S);
// Step 15.m.ii.
accumulatedResult +=
@ -839,14 +831,6 @@ function RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags) {
// Steps 7-17.
// Optimized path for @@replace.
// Conditions:
// * global flag is true
// * replaceValue is a string without "$"
#define FUNC_NAME RegExpGlobalReplaceOpt
#include "RegExpGlobalReplaceOpt.h.js"
#undef FUNC_NAME
/* global RegExpGlobalReplaceOpt */
// Conditions:
// * global flag is true
// * replaceValue is a function
@ -880,21 +864,12 @@ function RegExpGlobalReplaceShortOpt(rx, S, lengthS, replaceValue, flags) {
// Conditions:
// * global flag is false
// * replaceValue is a string without "$"
#define FUNC_NAME RegExpLocalReplaceOpt
#define FUNC_NAME RegExpLocalReplaceOptSimple
#define SIMPLE
#include "RegExpLocalReplaceOpt.h.js"
#undef SIMPLE
#undef FUNC_NAME
/* global RegExpLocalReplaceOpt */
// Conditions:
// * global flag is false
// * S is a short string (lengthS < 0x7fff)
// * replaceValue is a string without "$"
#define FUNC_NAME RegExpLocalReplaceOptShort
#define SHORT_STRING
#include "RegExpLocalReplaceOpt.h.js"
#undef SHORT_STRING
#undef FUNC_NAME
/* global RegExpLocalReplaceOptShort */
/* global RegExpLocalReplaceOptSimple */
// Conditions:
// * global flag is false
@ -965,13 +940,8 @@ function RegExpSearch(string) {
}
}
// Step 9.
if (result === -1) {
return -1;
}
// Step 10.
return result & 0x7fff;
// Steps 9-10.
return result;
}
return RegExpSearchSlowPath(rx, S, previousLastIndex);

Просмотреть файл

@ -3,7 +3,6 @@
* You can obtain one at http://mozilla.org/MPL/2.0/. */
// Function template for the following functions:
// * RegExpGlobalReplaceOpt
// * RegExpGlobalReplaceOptFunc
// * RegExpGlobalReplaceOptSubst
// * RegExpGlobalReplaceOptElemBase
@ -11,12 +10,11 @@
// * FUNC_NAME -- function name (required)
// e.g.
// #define FUNC_NAME RegExpGlobalReplaceOpt
// Define the following macro (without value) to switch the code:
// Define one of the following macros (without value) to switch the code:
// * SUBSTITUTION -- replaceValue is a string with "$"
// * FUNCTIONAL -- replaceValue is a function
// * ELEMBASE -- replaceValue is a function that returns an element
// of an object
// * none of above -- replaceValue is a string without "$"
// ES2023 draft rev 2c78e6f6b5bc6bfbf79dd8a12a9593e5b57afcd2
// 22.2.5.11 RegExp.prototype [ @@replace ] ( string, replaceValue )
@ -126,7 +124,7 @@ function FUNC_NAME(
);
}
#else
replacement = replaceValue;
#error "Unexpected case"
#endif
// Step 15.m.ii.

Просмотреть файл

@ -3,18 +3,17 @@
* You can obtain one at http://mozilla.org/MPL/2.0/. */
// Function template for the following functions:
// * RegExpLocalReplaceOpt
// * RegExpLocalReplaceOptSimple
// * RegExpLocalReplaceOptFunc
// * RegExpLocalReplaceOptSubst
// Define the following macro and include this file to declare function:
// * FUNC_NAME -- function name (required)
// e.g.
// #define FUNC_NAME RegExpLocalReplaceOpt
// Define the following macro (without value) to switch the code:
// Define one of the following macros (without value) to switch the code:
// * SUBSTITUTION -- replaceValue is a string with "$"
// * FUNCTIONAL -- replaceValue is a function
// * SHORT_STRING -- replaceValue is a string without "$" and lengthS < 0x7fff
// * neither of above -- replaceValue is a string without "$"
// * SIMPLE -- replaceValue is a string without "$"
// ES2023 draft rev 2c78e6f6b5bc6bfbf79dd8a12a9593e5b57afcd2
// 22.2.5.11 RegExp.prototype [ @@replace ] ( string, replaceValue )
@ -57,7 +56,7 @@ function FUNC_NAME(
lastIndex = 0;
}
#if !defined(SHORT_STRING)
#if !defined(SIMPLE)
// Step 12.a.
var result = RegExpMatcher(rx, S, lastIndex);
@ -73,10 +72,10 @@ function FUNC_NAME(
}
#else
// Step 12.a.
var result = RegExpSearcher(rx, S, lastIndex);
var position = RegExpSearcher(rx, S, lastIndex);
// Step 12.b.
if (result === -1) {
if (position === -1) {
// 21.2.5.2.2 RegExpBuiltinExec, steps 12.a.i, 12.c.i.
if (globalOrSticky) {
rx.lastIndex = 0;
@ -89,7 +88,7 @@ function FUNC_NAME(
// Steps 12.c, 13-14.
#if !defined(SHORT_STRING)
#if !defined(SIMPLE)
// Steps 15.a-b.
assert(result.length >= 1, "RegExpMatcher doesn't return an empty array");
@ -106,13 +105,10 @@ function FUNC_NAME(
// To set rx.lastIndex before RegExpGetFunctionalReplacement.
var nextSourcePosition = position + matchLength;
#else
// Steps 15.a-d (skipped).
// Step 15.e-f.
var position = result & 0x7fff;
// Steps 15.a-f (skipped).
// Step 15.m.iii (reordered)
var nextSourcePosition = (result >> 15) & 0x7fff;
var nextSourcePosition = RegExpSearcherLastLimit(S);
#endif
// 21.2.5.2.2 RegExpBuiltinExec, step 15.

Просмотреть файл

@ -2948,6 +2948,14 @@ JitCode* JitRealm::generateRegExpSearcherStub(JSContext* cx) {
masm.push(FramePointer);
masm.moveStackPtrTo(FramePointer);
#ifdef DEBUG
// Store sentinel value to cx->regExpSearcherLastLimit.
// See comment in RegExpSearcherImpl.
masm.loadJSContext(temp1);
masm.store32(Imm32(RegExpSearcherLastLimitSentinel),
Address(temp1, JSContext::offsetOfRegExpSearcherLastLimit()));
#endif
// The InputOutputData is placed above the frame pointer and return address on
// the stack.
int32_t inputOutputDataStartOffset = 2 * sizeof(void*);
@ -3000,10 +3008,12 @@ JitCode* JitRealm::generateRegExpSearcherStub(JSContext* cx) {
Address matchPairLimit(FramePointer,
pairsVectorStartOffset + MatchPair::offsetOfLimit());
// Store match limit to cx->regExpSearcherLastLimit and return the index.
masm.load32(matchPairLimit, result);
masm.loadJSContext(input);
masm.store32(result,
Address(input, JSContext::offsetOfRegExpSearcherLastLimit()));
masm.load32(matchPairStart, result);
masm.load32(matchPairLimit, input);
masm.lshiftPtr(Imm32(15), input);
masm.or32(input, result);
masm.pop(FramePointer);
masm.ret();

Просмотреть файл

@ -1075,6 +1075,7 @@
possibly_calls: true
can_recover: true
# Note: this instruction writes to cx->regExpSearcherLastLimit.
- name: RegExpSearcher
operands:
regexp: Object
@ -1082,7 +1083,7 @@
lastIndex: Int32
result_type: Int32
possibly_calls: true
can_recover: true
can_recover: false
- name: RegExpExecMatch
operands:

Просмотреть файл

@ -1447,28 +1447,6 @@ bool RRegExpMatcher::recover(JSContext* cx, SnapshotIterator& iter) const {
return true;
}
bool MRegExpSearcher::writeRecoverData(CompactBufferWriter& writer) const {
MOZ_ASSERT(canRecoverOnBailout());
writer.writeUnsigned(uint32_t(RInstruction::Recover_RegExpSearcher));
return true;
}
RRegExpSearcher::RRegExpSearcher(CompactBufferReader& reader) {}
bool RRegExpSearcher::recover(JSContext* cx, SnapshotIterator& iter) const {
RootedObject regexp(cx, &iter.read().toObject());
RootedString input(cx, iter.read().toString());
int32_t lastIndex = iter.read().toInt32();
int32_t result;
if (!RegExpSearcherRaw(cx, regexp, input, lastIndex, nullptr, &result)) {
return false;
}
iter.storeInstructionResult(Int32Value(result));
return true;
}
bool MTypeOf::writeRecoverData(CompactBufferWriter& writer) const {
MOZ_ASSERT(canRecoverOnBailout());
writer.writeUnsigned(uint32_t(RInstruction::Recover_TypeOf));

Просмотреть файл

@ -109,7 +109,6 @@ namespace jit {
_(StringSplit) \
_(NaNToZero) \
_(RegExpMatcher) \
_(RegExpSearcher) \
_(StringReplace) \
_(Substr) \
_(TypeOf) \
@ -685,14 +684,6 @@ class RRegExpMatcher final : public RInstruction {
SnapshotIterator& iter) const override;
};
class RRegExpSearcher final : public RInstruction {
public:
RINSTRUCTION_HEADER_NUM_OP_(RegExpSearcher, 3)
[[nodiscard]] bool recover(JSContext* cx,
SnapshotIterator& iter) const override;
};
class RStringReplace final : public RInstruction {
private:
bool isFlatReplacement_;

Просмотреть файл

@ -30,6 +30,7 @@
#include "jsexn.h"
#include "jstypes.h"
#include "builtin/RegExp.h" // js::RegExpSearcherLastLimitSentinel
#include "frontend/FrontendContext.h"
#include "gc/GC.h"
#include "irregexp/RegExpAPI.h"
@ -983,6 +984,11 @@ JSContext::JSContext(JSRuntime* runtime, const JS::ContextOptions& options)
inUnsafeRegion(this, 0),
generationalDisabled(this, 0),
compactingDisabledCount(this, 0),
#ifdef DEBUG
regExpSearcherLastLimit(this, RegExpSearcherLastLimitSentinel),
#else
regExpSearcherLastLimit(this, 0),
#endif
frontendCollectionPool_(this),
suppressProfilerSampling(false),
tempLifoAlloc_(this, (size_t)TEMP_LIFO_ALLOC_PRIMARY_CHUNK_SIZE),

Просмотреть файл

@ -496,6 +496,13 @@ struct JS_PUBLIC_API JSContext : public JS::RootingContext,
// with AutoDisableCompactingGC which uses this counter.
js::ContextData<unsigned> compactingDisabledCount;
// Match limit result for the most recent call to RegExpSearcher.
js::ContextData<uint32_t> regExpSearcherLastLimit;
static constexpr size_t offsetOfRegExpSearcherLastLimit() {
return offsetof(JSContext, regExpSearcherLastLimit);
}
private:
// Pools used for recycling name maps and vectors when parsing and
// emitting bytecode. Purged on GC when there are no active script

Просмотреть файл

@ -2070,6 +2070,7 @@ static const JSFunctionSpec intrinsic_functions[] = {
JS_INLINABLE_FN("RegExpPrototypeOptimizable", RegExpPrototypeOptimizable, 1,
0, RegExpPrototypeOptimizable),
JS_INLINABLE_FN("RegExpSearcher", RegExpSearcher, 3, 0, RegExpSearcher),
JS_FN("RegExpSearcherLastLimit", RegExpSearcherLastLimit, 0, 0),
JS_INLINABLE_FN("SameValue", js::obj_is, 2, 0, ObjectIs),
JS_FN("SharedArrayBufferByteLength",
intrinsic_ArrayBufferByteLength<SharedArrayBufferObject>, 1, 0),

Просмотреть файл

@ -12,7 +12,8 @@ const path = require("path");
const fs = require("fs");
const selfHostedRegex = /js\/src\/builtin\/.*?\.js$/;
const macroRegex = /\s*\#(if|ifdef|else|elif|endif|include|define|undef).*/;
const macroRegex =
/\s*\#(if|ifdef|else|elif|endif|include|define|undef|error).*/;
function isSelfHostedFile(filename) {
if (path.win32) {