From 5d406f0ff6ff6670c74c567d861e474bfb897fa0 Mon Sep 17 00:00:00 2001 From: Tooru Fujisawa Date: Sun, 20 Sep 2015 00:00:36 +0900 Subject: [PATCH] Bug 773687 - Fix assertion pattern in RegExp with sticky flag. r=till --HG-- extra : rebase_source : 42307a81dcee87426ace32a276f34281b550bc7f --- js/src/irregexp/RegExpEngine.cpp | 4 +- js/src/irregexp/RegExpEngine.h | 2 +- js/src/jsstr.cpp | 4 +- js/src/jsstr.h | 4 + js/src/tests/ecma_6/RegExp/sticky.js | 126 +++++++++++++++++++++++++++ js/src/vm/MatchPairs.h | 1 - js/src/vm/RegExpObject.cpp | 101 +++++++-------------- 7 files changed, 167 insertions(+), 75 deletions(-) create mode 100644 js/src/tests/ecma_6/RegExp/sticky.js diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp index b510530f4a0e..c08ef5020f13 100644 --- a/js/src/irregexp/RegExpEngine.cpp +++ b/js/src/irregexp/RegExpEngine.cpp @@ -1651,7 +1651,7 @@ IsNativeRegExpEnabled(JSContext* cx) RegExpCode irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data, HandleLinearString sample, bool is_global, bool ignore_case, - bool is_ascii, bool match_only, bool force_bytecode) + bool is_ascii, bool match_only, bool force_bytecode, bool sticky) { if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { JS_ReportError(cx, "regexp too big"); @@ -1677,7 +1677,7 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* compiler.accept()); RegExpNode* node = captured_body; bool is_end_anchored = data->tree->IsAnchoredAtEnd(); - bool is_start_anchored = data->tree->IsAnchoredAtStart(); + bool is_start_anchored = sticky || data->tree->IsAnchoredAtStart(); int max_length = data->tree->max_match(); if (!is_start_anchored) { // Add a .*? at the beginning, outside the body capture, unless diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h index 67c3fb827d3f..b687c6c52daa 100644 --- a/js/src/irregexp/RegExpEngine.h +++ b/js/src/irregexp/RegExpEngine.h @@ -88,7 +88,7 @@ struct RegExpCode RegExpCode CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data, HandleLinearString sample, bool is_global, bool ignore_case, - bool is_ascii, bool match_only, bool force_bytecode); + bool is_ascii, bool match_only, bool force_bytecode, bool sticky); // Note: this may return RegExpRunStatus_Error if an interrupt was requested // while the code was executing. diff --git a/js/src/jsstr.cpp b/js/src/jsstr.cpp index 6b97938cc482..b108827c314a 100644 --- a/js/src/jsstr.cpp +++ b/js/src/jsstr.cpp @@ -1739,8 +1739,8 @@ js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) return true; } -static bool -HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start) +bool +js::HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start) { MOZ_ASSERT(start + pat->length() <= text->length()); diff --git a/js/src/jsstr.h b/js/src/jsstr.h index 5a05ab07c291..d5c6606bd88b 100644 --- a/js/src/jsstr.h +++ b/js/src/jsstr.h @@ -219,6 +219,10 @@ StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patlen); extern int StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start); +/* Return true if the string contains a pattern at |start|. */ +extern bool +HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start); + template extern bool HasRegExpMetaChars(const CharT* chars, size_t length); diff --git a/js/src/tests/ecma_6/RegExp/sticky.js b/js/src/tests/ecma_6/RegExp/sticky.js new file mode 100644 index 000000000000..13a6debe9588 --- /dev/null +++ b/js/src/tests/ecma_6/RegExp/sticky.js @@ -0,0 +1,126 @@ +var BUGNUMBER = 773687; +var summary = 'sticky flag should not break assertion behavior.'; + +print(BUGNUMBER + ": " + summary); + +function test(re, text, expectations) { + // Sanity check for test data itself. + assertEq(expectations.length, text.length + 1); + + for (var i = 0; i < expectations.length; i++) { + var result = expectations[i]; + + re.lastIndex = i; + var match = re.exec(text); + if (result === null) { + assertEq(re.lastIndex, 0); + assertEq(match, null); + } else { + assertEq(re.lastIndex, result.lastIndex); + assertEq(match !== null, true); + assertEq(match.length, result.matches.length); + for (var j = 0; j < result.matches.length; j++) + assertEq(match[j], result.matches[j]); + assertEq(match.index, result.index); + } + } +} + +// simple text +test(/bc/y, "abcabd", [ + null, + { lastIndex: 3, matches: ["bc"], index: 1 }, + null, + null, + null, + null, + null, +]); + +// complex pattern +test(/bc|c|d/y, "abcabd", [ + null, + { lastIndex: 3, matches: ["bc"], index: 1 }, + { lastIndex: 3, matches: ["c"], index: 2 }, + null, + null, + { lastIndex: 6, matches: ["d"], index: 5 }, + null, +]); + +test(/.*(bc|c|d)/y, "abcabd", [ + { lastIndex: 6, matches: ["abcabd", "d"], index: 0 }, + { lastIndex: 6, matches: ["bcabd", "d"], index: 1 }, + { lastIndex: 6, matches: ["cabd", "d"], index: 2 }, + { lastIndex: 6, matches: ["abd", "d"], index: 3 }, + { lastIndex: 6, matches: ["bd", "d"], index: 4 }, + { lastIndex: 6, matches: ["d", "d"], index: 5 }, + null, +]); + +test(/.*?(bc|c|d)/y, "abcabd", [ + { lastIndex: 3, matches: ["abc", "bc"], index: 0 }, + { lastIndex: 3, matches: ["bc", "bc"], index: 1 }, + { lastIndex: 3, matches: ["c", "c"], index: 2 }, + { lastIndex: 6, matches: ["abd", "d"], index: 3 }, + { lastIndex: 6, matches: ["bd", "d"], index: 4 }, + { lastIndex: 6, matches: ["d", "d"], index: 5 }, + null, +]); + +test(/(bc|.*c|d)/y, "abcabd", [ + { lastIndex: 3, matches: ["abc", "abc"], index: 0 }, + { lastIndex: 3, matches: ["bc", "bc"], index: 1 }, + { lastIndex: 3, matches: ["c", "c"], index: 2 }, + null, + null, + { lastIndex: 6, matches: ["d", "d"], index: 5 }, + null, +]); + +// ^ assertions +test(/^/y, "abcabc", [ + { lastIndex: 0, matches: [""], index: 0 }, + null, + null, + null, + null, + null, + null, +]); + +test(/^a/my, "abc\nabc", [ + { lastIndex: 1, matches: ["a"], index: 0 }, + null, + null, + null, + { lastIndex: 5, matches: ["a"], index: 4 }, + null, + null, + null, +]); + +// \b assertions +test(/\b/y, "abc bc", [ + { lastIndex: 0, matches: [""], index: 0 }, + null, + null, + { lastIndex: 3, matches: [""], index: 3 }, + { lastIndex: 4, matches: [""], index: 4 }, + null, + { lastIndex: 6, matches: [""], index: 6 }, +]); + +// \B assertions +test(/\B/y, "abc bc", [ + null, + { lastIndex: 1, matches: [""], index: 1 }, + { lastIndex: 2, matches: [""], index: 2 }, + null, + null, + { lastIndex: 5, matches: [""], index: 5 }, + null, +]); + +if (typeof reportCompare === "function") + reportCompare(true, true); diff --git a/js/src/vm/MatchPairs.h b/js/src/vm/MatchPairs.h index 0dc3f420c87e..14dee5c66443 100644 --- a/js/src/vm/MatchPairs.h +++ b/js/src/vm/MatchPairs.h @@ -79,7 +79,6 @@ class MatchPairs bool initArrayFrom(MatchPairs& copyFrom); void forgetArray() { pairs_ = nullptr; } - void displace(size_t disp); void checkAgainst(size_t inputLength) { #ifdef DEBUG for (size_t i = 0; i < pairCount_; i++) { diff --git a/js/src/vm/RegExpObject.cpp b/js/src/vm/RegExpObject.cpp index 6beecdad4832..7e29b7b66019 100644 --- a/js/src/vm/RegExpObject.cpp +++ b/js/src/vm/RegExpObject.cpp @@ -147,19 +147,6 @@ MatchPairs::initArrayFrom(MatchPairs& copyFrom) return true; } -void -MatchPairs::displace(size_t disp) -{ - if (disp == 0) - return; - - for (size_t i = 0; i < pairCount_; i++) { - MOZ_ASSERT(pairs_[i].check()); - pairs_[i].start += (pairs_[i].start < 0) ? 0 : disp; - pairs_[i].limit += (pairs_[i].limit < 0) ? 0 : disp; - } -} - bool ScopedMatchPairs::allocOrExpandArray(size_t pairCount) { @@ -580,32 +567,8 @@ RegExpShared::compile(JSContext* cx, HandleLinearString input, TraceLoggerThread* logger = TraceLoggerForMainThread(cx->runtime()); AutoTraceLog logCompile(logger, TraceLogger_IrregexpCompile); - if (!sticky()) { - RootedAtom pattern(cx, source); - return compile(cx, pattern, input, mode, force); - } - - /* - * The sticky case we implement hackily by prepending a caret onto the front - * and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp. - */ - static const char prefix[] = {'^', '(', '?', ':'}; - static const char postfix[] = {')'}; - - using mozilla::ArrayLength; - StringBuffer sb(cx); - if (!sb.reserve(ArrayLength(prefix) + source->length() + ArrayLength(postfix))) - return false; - sb.infallibleAppend(prefix, ArrayLength(prefix)); - if (!sb.append(source)) - return false; - sb.infallibleAppend(postfix, ArrayLength(postfix)); - - RootedAtom fakeySource(cx, sb.finishAtom()); - if (!fakeySource) - return false; - - return compile(cx, fakeySource, input, mode, force); + RootedAtom pattern(cx, source); + return compile(cx, pattern, input, mode, force); } bool @@ -635,7 +598,8 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu ignoreCase(), input->hasLatin1Chars(), mode == MatchOnly, - force == ForceByteCode); + force == ForceByteCode, + sticky()); if (code.empty()) return false; @@ -681,36 +645,39 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start, return RegExpRunStatus_Error; } - /* - * |displacement| emulates sticky mode by matching from this offset - * into the char buffer and subtracting the delta off at the end. - */ - size_t charsOffset = 0; size_t length = input->length(); - size_t origLength = length; - size_t displacement = 0; - - if (sticky()) { - displacement = start; - charsOffset += displacement; - length -= displacement; - start = 0; - } // Reset the Irregexp backtrack stack if it grows during execution. irregexp::RegExpStackScope stackScope(cx->runtime()); if (canStringMatch) { MOZ_ASSERT(pairCount() == 1); - int res = StringFindPattern(input, source, start + charsOffset); + size_t sourceLength = source->length(); + if (sticky()) { + // First part checks size_t overflow. + if (sourceLength + start < sourceLength || sourceLength + start > length) + return RegExpRunStatus_Success_NotFound; + if (!HasSubstringAt(input, source, start)) + return RegExpRunStatus_Success_NotFound; + + if (matches) { + (*matches)[0].start = start; + (*matches)[0].limit = start + sourceLength; + + matches->checkAgainst(length); + } + return RegExpRunStatus_Success; + } + + int res = StringFindPattern(input, source, start); if (res == -1) return RegExpRunStatus_Success_NotFound; if (matches) { (*matches)[0].start = res; - (*matches)[0].limit = res + source->length(); + (*matches)[0].limit = res + sourceLength; - matches->checkAgainst(origLength); + matches->checkAgainst(length); } return RegExpRunStatus_Success; } @@ -725,10 +692,10 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start, AutoTraceLog logJIT(logger, TraceLogger_IrregexpExecute); AutoCheckCannotGC nogc; if (input->hasLatin1Chars()) { - const Latin1Char* chars = input->latin1Chars(nogc) + charsOffset; + const Latin1Char* chars = input->latin1Chars(nogc); result = irregexp::ExecuteCode(cx, code, chars, start, length, matches); } else { - const char16_t* chars = input->twoByteChars(nogc) + charsOffset; + const char16_t* chars = input->twoByteChars(nogc); result = irregexp::ExecuteCode(cx, code, chars, start, length, matches); } } @@ -749,10 +716,8 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start, MOZ_ASSERT(result == RegExpRunStatus_Success); - if (matches) { - matches->displace(displacement); - matches->checkAgainst(origLength); - } + if (matches) + matches->checkAgainst(length); return RegExpRunStatus_Success; } while (false); @@ -769,17 +734,15 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start, RegExpRunStatus result; if (inputChars.isLatin1()) { - const Latin1Char* chars = inputChars.latin1Range().start().get() + charsOffset; + const Latin1Char* chars = inputChars.latin1Range().start().get(); result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches); } else { - const char16_t* chars = inputChars.twoByteRange().start().get() + charsOffset; + const char16_t* chars = inputChars.twoByteRange().start().get(); result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches); } - if (result == RegExpRunStatus_Success && matches) { - matches->displace(displacement); - matches->checkAgainst(origLength); - } + if (result == RegExpRunStatus_Success && matches) + matches->checkAgainst(length); return result; }