Bug 808245, Part 6/6 - Add MatchOnly mode and lazify RegExpStatics. r=dvander

This commit is contained in:
Sean Stangl 2012-12-12 18:11:28 -08:00
Родитель b129f378d1
Коммит 3147f85d0c
9 изменённых файлов: 221 добавлений и 19 удалений

Просмотреть файл

@ -119,14 +119,20 @@ ExecuteRegExpImpl(JSContext *cx, RegExpStatics *res, RegExpShared &re, RegExpObj
size_t *lastIndex, MatchConduit &matches) size_t *lastIndex, MatchConduit &matches)
{ {
RegExpRunStatus status; RegExpRunStatus status;
/* Ahem, not handled in this patch. But it was a pain to rip out. */
JS_ASSERT(!matches.isPair);
/* Vector of MatchPairs provided: execute full regexp. */ /* Switch between MatchOnly and IncludeSubpatterns modes. */
status = re.execute(cx, chars, length, lastIndex, *matches.u.pairs); if (matches.isPair) {
if (status == RegExpRunStatus_Success && res) size_t lastIndex_orig = *lastIndex;
res->updateFromMatchPairs(cx, input, *matches.u.pairs); /* Only one MatchPair slot provided: execute short-circuiting regexp. */
status = re.executeMatchOnly(cx, chars, length, lastIndex, *matches.u.pair);
if (status == RegExpRunStatus_Success && res)
res->updateLazily(cx, input, &regexp, lastIndex_orig);
} else {
/* Vector of MatchPairs provided: execute full regexp. */
status = re.execute(cx, chars, length, lastIndex, *matches.u.pairs);
if (status == RegExpRunStatus_Success && res)
res->updateFromMatchPairs(cx, input, *matches.u.pairs);
}
return status; return status;
} }

Просмотреть файл

@ -312,6 +312,7 @@ DeclMarkerImpl(Object, DebugScopeObject)
DeclMarkerImpl(Object, GlobalObject) DeclMarkerImpl(Object, GlobalObject)
DeclMarkerImpl(Object, JSObject) DeclMarkerImpl(Object, JSObject)
DeclMarkerImpl(Object, JSFunction) DeclMarkerImpl(Object, JSFunction)
DeclMarkerImpl(Object, RegExpObject)
DeclMarkerImpl(Object, ScopeObject) DeclMarkerImpl(Object, ScopeObject)
DeclMarkerImpl(Script, JSScript) DeclMarkerImpl(Script, JSScript)
DeclMarkerImpl(Shape, Shape) DeclMarkerImpl(Shape, Shape)

Просмотреть файл

@ -97,6 +97,7 @@ DeclMarker(Object, DebugScopeObject)
DeclMarker(Object, GlobalObject) DeclMarker(Object, GlobalObject)
DeclMarker(Object, JSObject) DeclMarker(Object, JSObject)
DeclMarker(Object, JSFunction) DeclMarker(Object, JSFunction)
DeclMarker(Object, RegExpObject)
DeclMarker(Object, ScopeObject) DeclMarker(Object, ScopeObject)
DeclMarker(Script, JSScript) DeclMarker(Script, JSScript)
DeclMarker(Shape, Shape) DeclMarker(Shape, Shape)

Просмотреть файл

@ -674,6 +674,9 @@ Shape::Range::AutoRooter::trace(JSTracer *trc)
void void
RegExpStatics::AutoRooter::trace(JSTracer *trc) RegExpStatics::AutoRooter::trace(JSTracer *trc)
{ {
if (statics->regexp)
MarkObjectRoot(trc, reinterpret_cast<JSObject**>(&statics->regexp),
"RegExpStatics::AutoRooter regexp");
if (statics->matchesInput) if (statics->matchesInput)
MarkStringRoot(trc, reinterpret_cast<JSString**>(&statics->matchesInput), MarkStringRoot(trc, reinterpret_cast<JSString**>(&statics->matchesInput),
"RegExpStatics::AutoRooter matchesInput"); "RegExpStatics::AutoRooter matchesInput");

Просмотреть файл

@ -448,10 +448,10 @@ RegExpShared::checkSyntax(JSContext *cx, TokenStream *tokenStream, JSLinearStrin
} }
bool bool
RegExpShared::compile(JSContext *cx) RegExpShared::compile(JSContext *cx, bool matchOnly)
{ {
if (!sticky()) if (!sticky())
return compile(cx, *source); return compile(cx, *source, matchOnly);
/* /*
* The sticky case we implement hackily by prepending a caret onto the front * The sticky case we implement hackily by prepending a caret onto the front
@ -472,11 +472,11 @@ RegExpShared::compile(JSContext *cx)
if (!fakeySource) if (!fakeySource)
return false; return false;
return compile(cx, *fakeySource); return compile(cx, *fakeySource, matchOnly);
} }
bool bool
RegExpShared::compile(JSContext *cx, JSLinearString &pattern) RegExpShared::compile(JSContext *cx, JSLinearString &pattern, bool matchOnly)
{ {
/* Parse the pattern. */ /* Parse the pattern. */
ErrorCode yarrError; ErrorCode yarrError;
@ -494,7 +494,8 @@ RegExpShared::compile(JSContext *cx, JSLinearString &pattern)
return false; return false;
JSGlobalData globalData(execAlloc); JSGlobalData globalData(execAlloc);
YarrJITCompileMode compileMode = JSC::Yarr::IncludeSubpatterns; YarrJITCompileMode compileMode = matchOnly ? JSC::Yarr::MatchOnly
: JSC::Yarr::IncludeSubpatterns;
jitCompile(yarrPattern, JSC::Yarr::Char16, &globalData, codeBlock, compileMode); jitCompile(yarrPattern, JSC::Yarr::Char16, &globalData, codeBlock, compileMode);
@ -520,7 +521,15 @@ RegExpShared::compileIfNecessary(JSContext *cx)
{ {
if (hasCode() || hasBytecode()) if (hasCode() || hasBytecode())
return true; return true;
return compile(cx); return compile(cx, false);
}
bool
RegExpShared::compileMatchOnlyIfNecessary(JSContext *cx)
{
if (hasMatchOnlyCode() || hasBytecode())
return true;
return compile(cx, true);
} }
RegExpRunStatus RegExpRunStatus
@ -571,6 +580,62 @@ RegExpShared::execute(JSContext *cx, StableCharPtr chars, size_t length,
return RegExpRunStatus_Success; return RegExpRunStatus_Success;
} }
RegExpRunStatus
RegExpShared::executeMatchOnly(JSContext *cx, StableCharPtr chars, size_t length,
size_t *lastIndex, MatchPair &match)
{
/* Compile the code at point-of-use. */
if (!compileMatchOnlyIfNecessary(cx))
return RegExpRunStatus_Error;
const size_t origLength = length;
size_t start = *lastIndex;
size_t displacement = 0;
if (sticky()) {
displacement = start;
chars += displacement;
length -= displacement;
start = 0;
}
#if ENABLE_YARR_JIT
if (!codeBlock.isFallBack()) {
MatchResult result = codeBlock.execute(chars.get(), start, length);
if (!result)
return RegExpRunStatus_Success_NotFound;
match = MatchPair(result.start, result.end);
match.displace(displacement);
*lastIndex = match.limit;
return RegExpRunStatus_Success;
}
#endif
/*
* The JIT could not be used, so fall back to the Yarr interpreter.
* Unfortunately, the interpreter does not have a MatchOnly mode, so a
* temporary output vector must be provided.
*/
JS_ASSERT(hasBytecode());
ScopedMatchPairs matches(&cx->tempLifoAlloc());
if (!matches.initArray(pairCount()))
return RegExpRunStatus_Error;
unsigned result =
JSC::Yarr::interpret(bytecode, chars.get(), length, start, matches.rawBuf());
if (result == JSC::Yarr::offsetNoMatch)
return RegExpRunStatus_Success_NotFound;
matches.displace(displacement);
matches.checkAgainst(origLength);
*lastIndex = matches[0].limit;
match = MatchPair(result, matches[0].limit);
return RegExpRunStatus_Success;
}
/* RegExpCompartment */ /* RegExpCompartment */
RegExpCompartment::RegExpCompartment(JSRuntime *rt) RegExpCompartment::RegExpCompartment(JSRuntime *rt)

Просмотреть файл

@ -139,10 +139,11 @@ class RegExpShared
uint64_t gcNumberWhenUsed; uint64_t gcNumberWhenUsed;
/* Internal functions. */ /* Internal functions. */
bool compile(JSContext *cx); bool compile(JSContext *cx, bool matchOnly);
bool compile(JSContext *cx, JSLinearString &pattern); bool compile(JSContext *cx, JSLinearString &pattern, bool matchOnly);
bool compileIfNecessary(JSContext *cx); bool compileIfNecessary(JSContext *cx);
bool compileMatchOnlyIfNecessary(JSContext *cx);
public: public:
RegExpShared(JSRuntime *rt, JSAtom *source, RegExpFlag flags); RegExpShared(JSRuntime *rt, JSAtom *source, RegExpFlag flags);
@ -160,6 +161,10 @@ class RegExpShared
RegExpRunStatus execute(JSContext *cx, StableCharPtr chars, size_t length, RegExpRunStatus execute(JSContext *cx, StableCharPtr chars, size_t length,
size_t *lastIndex, MatchPairs &matches); size_t *lastIndex, MatchPairs &matches);
/* Run the regular expression without collecting matches, for test(). */
RegExpRunStatus executeMatchOnly(JSContext *cx, StableCharPtr chars, size_t length,
size_t *lastIndex, MatchPair &match);
/* Accessors */ /* Accessors */
size_t getParenCount() const { JS_ASSERT(isCompiled()); return parenCount; } size_t getParenCount() const { JS_ASSERT(isCompiled()); return parenCount; }
@ -177,11 +182,13 @@ class RegExpShared
#ifdef ENABLE_YARR_JIT #ifdef ENABLE_YARR_JIT
bool hasCode() const { return codeBlock.has16BitCode(); } bool hasCode() const { return codeBlock.has16BitCode(); }
bool hasMatchOnlyCode() const { return codeBlock.has16BitCodeMatchOnly(); }
#else #else
bool hasCode() const { return false; } bool hasCode() const { return false; }
bool hasMatchOnlyCode() const { return false; }
#endif #endif
bool hasBytecode() const { return bytecode != NULL; } bool hasBytecode() const { return bytecode != NULL; }
bool isCompiled() const { return hasBytecode() || hasCode(); } bool isCompiled() const { return hasBytecode() || hasCode() || hasMatchOnlyCode(); }
}; };
/* /*
@ -252,7 +259,6 @@ class RegExpCompartment
bool init(JSContext *cx); bool init(JSContext *cx);
void sweep(JSRuntime *rt); void sweep(JSRuntime *rt);
/* Return a regexp corresponding to the given (source, flags) pair. */
bool get(JSContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g); bool get(JSContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g);
/* Like 'get', but compile 'maybeOpt' (if non-null). */ /* Like 'get', but compile 'maybeOpt' (if non-null). */

Просмотреть файл

@ -29,7 +29,8 @@ SizeOfRegExpStaticsData(const JSObject *obj, JSMallocSizeOfFun mallocSizeOf)
inline inline
RegExpStatics::RegExpStatics() RegExpStatics::RegExpStatics()
: bufferLink(NULL), : pendingLazyEvaluation(false),
bufferLink(NULL),
copied(false) copied(false)
{ {
clear(); clear();
@ -38,6 +39,8 @@ RegExpStatics::RegExpStatics()
inline bool inline bool
RegExpStatics::createDependent(JSContext *cx, size_t start, size_t end, Value *out) RegExpStatics::createDependent(JSContext *cx, size_t start, size_t end, Value *out)
{ {
/* Private function: caller must perform lazy evaluation. */
JS_ASSERT(!pendingLazyEvaluation);
JS_ASSERT(start <= end); JS_ASSERT(start <= end);
JS_ASSERT(end <= matchesInput->length()); JS_ASSERT(end <= matchesInput->length());
@ -59,6 +62,9 @@ RegExpStatics::createPendingInput(JSContext *cx, Value *out)
inline bool inline bool
RegExpStatics::makeMatch(JSContext *cx, size_t checkValidIndex, size_t pairNum, Value *out) RegExpStatics::makeMatch(JSContext *cx, size_t checkValidIndex, size_t pairNum, Value *out)
{ {
/* Private function: caller must perform lazy evaluation. */
JS_ASSERT(!pendingLazyEvaluation);
bool checkWhich = checkValidIndex % 2; bool checkWhich = checkValidIndex % 2;
size_t checkPair = checkValidIndex / 2; size_t checkPair = checkValidIndex / 2;
@ -75,12 +81,17 @@ RegExpStatics::makeMatch(JSContext *cx, size_t checkValidIndex, size_t pairNum,
inline bool inline bool
RegExpStatics::createLastMatch(JSContext *cx, Value *out) RegExpStatics::createLastMatch(JSContext *cx, Value *out)
{ {
if (!executeLazy(cx))
return false;
return makeMatch(cx, 0, 0, out); return makeMatch(cx, 0, 0, out);
} }
inline bool inline bool
RegExpStatics::createLastParen(JSContext *cx, Value *out) RegExpStatics::createLastParen(JSContext *cx, Value *out)
{ {
if (!executeLazy(cx))
return false;
if (matches.empty() || matches.pairCount() == 1) { if (matches.empty() || matches.pairCount() == 1) {
out->setString(cx->runtime->emptyString); out->setString(cx->runtime->emptyString);
return true; return true;
@ -99,6 +110,9 @@ inline bool
RegExpStatics::createParen(JSContext *cx, size_t pairNum, Value *out) RegExpStatics::createParen(JSContext *cx, size_t pairNum, Value *out)
{ {
JS_ASSERT(pairNum >= 1); JS_ASSERT(pairNum >= 1);
if (!executeLazy(cx))
return false;
if (matches.empty() || pairNum >= matches.pairCount()) { if (matches.empty() || pairNum >= matches.pairCount()) {
out->setString(cx->runtime->emptyString); out->setString(cx->runtime->emptyString);
return true; return true;
@ -109,6 +123,9 @@ RegExpStatics::createParen(JSContext *cx, size_t pairNum, Value *out)
inline bool inline bool
RegExpStatics::createLeftContext(JSContext *cx, Value *out) RegExpStatics::createLeftContext(JSContext *cx, Value *out)
{ {
if (!executeLazy(cx))
return false;
if (matches.empty()) { if (matches.empty()) {
out->setString(cx->runtime->emptyString); out->setString(cx->runtime->emptyString);
return true; return true;
@ -123,6 +140,9 @@ RegExpStatics::createLeftContext(JSContext *cx, Value *out)
inline bool inline bool
RegExpStatics::createRightContext(JSContext *cx, Value *out) RegExpStatics::createRightContext(JSContext *cx, Value *out)
{ {
if (!executeLazy(cx))
return false;
if (matches.empty()) { if (matches.empty()) {
out->setString(cx->runtime->emptyString); out->setString(cx->runtime->emptyString);
return true; return true;
@ -137,6 +157,8 @@ RegExpStatics::createRightContext(JSContext *cx, Value *out)
inline void inline void
RegExpStatics::getParen(size_t pairNum, JSSubString *out) const RegExpStatics::getParen(size_t pairNum, JSSubString *out) const
{ {
JS_ASSERT(!pendingLazyEvaluation);
JS_ASSERT(pairNum >= 1 && pairNum < matches.pairCount()); JS_ASSERT(pairNum >= 1 && pairNum < matches.pairCount());
const MatchPair &pair = matches[pairNum]; const MatchPair &pair = matches[pairNum];
if (pair.isUndefined()) { if (pair.isUndefined()) {
@ -150,6 +172,8 @@ RegExpStatics::getParen(size_t pairNum, JSSubString *out) const
inline void inline void
RegExpStatics::getLastMatch(JSSubString *out) const RegExpStatics::getLastMatch(JSSubString *out) const
{ {
JS_ASSERT(!pendingLazyEvaluation);
if (matches.empty()) { if (matches.empty()) {
*out = js_EmptySubString; *out = js_EmptySubString;
return; return;
@ -163,6 +187,8 @@ RegExpStatics::getLastMatch(JSSubString *out) const
inline void inline void
RegExpStatics::getLastParen(JSSubString *out) const RegExpStatics::getLastParen(JSSubString *out) const
{ {
JS_ASSERT(!pendingLazyEvaluation);
/* Note: the first pair is the whole match. */ /* Note: the first pair is the whole match. */
if (matches.empty() || matches.pairCount() == 1) { if (matches.empty() || matches.pairCount() == 1) {
*out = js_EmptySubString; *out = js_EmptySubString;
@ -174,6 +200,8 @@ RegExpStatics::getLastParen(JSSubString *out) const
inline void inline void
RegExpStatics::getLeftContext(JSSubString *out) const RegExpStatics::getLeftContext(JSSubString *out) const
{ {
JS_ASSERT(!pendingLazyEvaluation);
if (matches.empty()) { if (matches.empty()) {
*out = js_EmptySubString; *out = js_EmptySubString;
return; return;
@ -185,6 +213,8 @@ RegExpStatics::getLeftContext(JSSubString *out) const
inline void inline void
RegExpStatics::getRightContext(JSSubString *out) const RegExpStatics::getRightContext(JSSubString *out) const
{ {
JS_ASSERT(!pendingLazyEvaluation);
if (matches.empty()) { if (matches.empty()) {
*out = js_EmptySubString; *out = js_EmptySubString;
return; return;
@ -197,10 +227,19 @@ RegExpStatics::getRightContext(JSSubString *out) const
inline void inline void
RegExpStatics::copyTo(RegExpStatics &dst) RegExpStatics::copyTo(RegExpStatics &dst)
{ {
dst.matches.initArrayFrom(matches); /* Destination buffer has already been reserved by save(). */
if (!pendingLazyEvaluation)
dst.matches.initArrayFrom(matches);
dst.matchesInput = matchesInput; dst.matchesInput = matchesInput;
dst.regexp = regexp;
dst.lastIndex = lastIndex;
dst.pendingInput = pendingInput; dst.pendingInput = pendingInput;
dst.flags = flags; dst.flags = flags;
dst.pendingLazyEvaluation = pendingLazyEvaluation;
JS_ASSERT_IF(pendingLazyEvaluation, regexp);
JS_ASSERT_IF(pendingLazyEvaluation, matchesInput);
} }
inline void inline void
@ -220,12 +259,32 @@ RegExpStatics::restore()
bufferLink = bufferLink->bufferLink; bufferLink = bufferLink->bufferLink;
} }
inline void
RegExpStatics::updateLazily(JSContext *cx, JSLinearString *input,
RegExpObject *regexp, size_t lastIndex)
{
JS_ASSERT(input && regexp);
aboutToWrite();
BarrieredSetPair<JSString, JSLinearString>(cx->compartment,
pendingInput, input,
matchesInput, input);
pendingLazyEvaluation = true;
this->regexp = regexp;
this->lastIndex = lastIndex;
}
inline bool inline bool
RegExpStatics::updateFromMatchPairs(JSContext *cx, JSLinearString *input, MatchPairs &newPairs) RegExpStatics::updateFromMatchPairs(JSContext *cx, JSLinearString *input, MatchPairs &newPairs)
{ {
JS_ASSERT(input); JS_ASSERT(input);
aboutToWrite(); aboutToWrite();
/* Unset all lazy state. */
pendingLazyEvaluation = false;
this->regexp = NULL;
this->lastIndex = size_t(-1);
BarrieredSetPair<JSString, JSLinearString>(cx->compartment, BarrieredSetPair<JSString, JSLinearString>(cx->compartment,
pendingInput, input, pendingInput, input,
matchesInput, input); matchesInput, input);
@ -244,6 +303,7 @@ RegExpStatics::clear()
aboutToWrite(); aboutToWrite();
flags = RegExpFlag(0); flags = RegExpFlag(0);
pendingInput = NULL; pendingInput = NULL;
pendingLazyEvaluation = false;
matchesInput = NULL; matchesInput = NULL;
matches.forgetArray(); matches.forgetArray();
} }
@ -302,6 +362,12 @@ inline void
RegExpStatics::checkInvariants() RegExpStatics::checkInvariants()
{ {
#ifdef DEBUG #ifdef DEBUG
if (pendingLazyEvaluation) {
JS_ASSERT(regexp);
JS_ASSERT(pendingInput);
return;
}
if (matches.empty()) { if (matches.empty()) {
JS_ASSERT(!matchesInput); JS_ASSERT(!matchesInput);
return; return;

Просмотреть файл

@ -67,3 +67,37 @@ RegExpStatics::create(JSContext *cx, GlobalObject *parent)
obj->setPrivate(static_cast<void *>(res)); obj->setPrivate(static_cast<void *>(res));
return obj; return obj;
} }
bool
RegExpStatics::executeLazy(JSContext *cx)
{
if (!pendingLazyEvaluation)
return true;
JS_ASSERT(regexp);
JS_ASSERT(matchesInput);
JS_ASSERT(lastIndex != size_t(-1));
/*
* It is not necessary to call aboutToWrite(): evaluation of
* implicit copies is safe.
*/
size_t length = matchesInput->length();
StableCharPtr chars(matchesInput->chars(), length);
/* Execute the full regular expression. */
RegExpGuard shared;
if (!regexp->getShared(cx, &shared))
return false;
RegExpRunStatus status = shared->execute(cx, chars, length, &this->lastIndex, this->matches);
if (status == RegExpRunStatus_Error)
return false;
/* Unset lazy state and remove rooted values that now have no use. */
pendingLazyEvaluation = false;
regexp = NULL;
return true;
}

Просмотреть файл

@ -24,15 +24,27 @@ class RegExpStatics
VectorMatchPairs matches; VectorMatchPairs matches;
HeapPtr<JSLinearString> matchesInput; HeapPtr<JSLinearString> matchesInput;
/* The previous RegExp input, used to resolve lazy state. */
HeapPtr<RegExpObject> regexp;
size_t lastIndex;
/* The latest RegExp input, set before execution. */ /* The latest RegExp input, set before execution. */
HeapPtr<JSString> pendingInput; HeapPtr<JSString> pendingInput;
RegExpFlag flags; RegExpFlag flags;
/*
* If true, |matchesInput|, |regexp|, and |lastIndex| may be used
* to replay the last executed RegExp, and |matches| is invalid.
*/
bool pendingLazyEvaluation;
/* Linkage for preserving RegExpStatics during nested RegExp execution. */ /* Linkage for preserving RegExpStatics during nested RegExp execution. */
RegExpStatics *bufferLink; RegExpStatics *bufferLink;
bool copied; bool copied;
private: private:
bool executeLazy(JSContext *cx);
inline void aboutToWrite(); inline void aboutToWrite();
inline void copyTo(RegExpStatics &dst); inline void copyTo(RegExpStatics &dst);
@ -72,6 +84,8 @@ class RegExpStatics
/* Mutators. */ /* Mutators. */
inline void updateLazily(JSContext *cx, JSLinearString *input,
RegExpObject *regexp, size_t lastIndex);
inline bool updateFromMatchPairs(JSContext *cx, JSLinearString *input, MatchPairs &newPairs); inline bool updateFromMatchPairs(JSContext *cx, JSLinearString *input, MatchPairs &newPairs);
inline void setMultiline(JSContext *cx, bool enabled); inline void setMultiline(JSContext *cx, bool enabled);
@ -85,6 +99,8 @@ class RegExpStatics
public: public:
/* Default match accessor. */ /* Default match accessor. */
const MatchPairs &getMatches() const { const MatchPairs &getMatches() const {
/* Safe: only used by String methods, which do not set lazy mode. */
JS_ASSERT(!pendingLazyEvaluation);
return matches; return matches;
} }
@ -95,11 +111,15 @@ class RegExpStatics
/* Returns whether results for a non-empty match are present. */ /* Returns whether results for a non-empty match are present. */
bool matched() const { bool matched() const {
/* Safe: only used by String methods, which do not set lazy mode. */
JS_ASSERT(!pendingLazyEvaluation);
JS_ASSERT(matches.pairCount() > 0); JS_ASSERT(matches.pairCount() > 0);
return matches[0].limit - matches[0].start > 0; return matches[0].limit - matches[0].start > 0;
} }
void mark(JSTracer *trc) { void mark(JSTracer *trc) {
if (regexp)
gc::MarkObject(trc, &regexp, "res->regexp");
if (pendingInput) if (pendingInput)
MarkString(trc, &pendingInput, "res->pendingInput"); MarkString(trc, &pendingInput, "res->pendingInput");
if (matchesInput) if (matchesInput)