зеркало из https://github.com/mozilla/gecko-dev.git
Bug 773687 - Fix assertion pattern in RegExp with sticky flag. r=till
--HG-- extra : rebase_source : 42307a81dcee87426ace32a276f34281b550bc7f
This commit is contained in:
Родитель
3b34b7fda2
Коммит
5d406f0ff6
|
@ -1651,7 +1651,7 @@ IsNativeRegExpEnabled(JSContext* cx)
|
|||
RegExpCode
|
||||
irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data,
|
||||
HandleLinearString sample, bool is_global, bool ignore_case,
|
||||
bool is_ascii, bool match_only, bool force_bytecode)
|
||||
bool is_ascii, bool match_only, bool force_bytecode, bool sticky)
|
||||
{
|
||||
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
|
||||
JS_ReportError(cx, "regexp too big");
|
||||
|
@ -1677,7 +1677,7 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData*
|
|||
compiler.accept());
|
||||
RegExpNode* node = captured_body;
|
||||
bool is_end_anchored = data->tree->IsAnchoredAtEnd();
|
||||
bool is_start_anchored = data->tree->IsAnchoredAtStart();
|
||||
bool is_start_anchored = sticky || data->tree->IsAnchoredAtStart();
|
||||
int max_length = data->tree->max_match();
|
||||
if (!is_start_anchored) {
|
||||
// Add a .*? at the beginning, outside the body capture, unless
|
||||
|
|
|
@ -88,7 +88,7 @@ struct RegExpCode
|
|||
RegExpCode
|
||||
CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data,
|
||||
HandleLinearString sample, bool is_global, bool ignore_case,
|
||||
bool is_ascii, bool match_only, bool force_bytecode);
|
||||
bool is_ascii, bool match_only, bool force_bytecode, bool sticky);
|
||||
|
||||
// Note: this may return RegExpRunStatus_Error if an interrupt was requested
|
||||
// while the code was executing.
|
||||
|
|
|
@ -1739,8 +1739,8 @@ js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
|
||||
bool
|
||||
js::HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
|
||||
{
|
||||
MOZ_ASSERT(start + pat->length() <= text->length());
|
||||
|
||||
|
|
|
@ -219,6 +219,10 @@ StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patlen);
|
|||
extern int
|
||||
StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start);
|
||||
|
||||
/* Return true if the string contains a pattern at |start|. */
|
||||
extern bool
|
||||
HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start);
|
||||
|
||||
template <typename CharT>
|
||||
extern bool
|
||||
HasRegExpMetaChars(const CharT* chars, size_t length);
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
var BUGNUMBER = 773687;
|
||||
var summary = 'sticky flag should not break assertion behavior.';
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
function test(re, text, expectations) {
|
||||
// Sanity check for test data itself.
|
||||
assertEq(expectations.length, text.length + 1);
|
||||
|
||||
for (var i = 0; i < expectations.length; i++) {
|
||||
var result = expectations[i];
|
||||
|
||||
re.lastIndex = i;
|
||||
var match = re.exec(text);
|
||||
if (result === null) {
|
||||
assertEq(re.lastIndex, 0);
|
||||
assertEq(match, null);
|
||||
} else {
|
||||
assertEq(re.lastIndex, result.lastIndex);
|
||||
assertEq(match !== null, true);
|
||||
assertEq(match.length, result.matches.length);
|
||||
for (var j = 0; j < result.matches.length; j++)
|
||||
assertEq(match[j], result.matches[j]);
|
||||
assertEq(match.index, result.index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// simple text
|
||||
test(/bc/y, "abcabd", [
|
||||
null,
|
||||
{ lastIndex: 3, matches: ["bc"], index: 1 },
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
]);
|
||||
|
||||
// complex pattern
|
||||
test(/bc|c|d/y, "abcabd", [
|
||||
null,
|
||||
{ lastIndex: 3, matches: ["bc"], index: 1 },
|
||||
{ lastIndex: 3, matches: ["c"], index: 2 },
|
||||
null,
|
||||
null,
|
||||
{ lastIndex: 6, matches: ["d"], index: 5 },
|
||||
null,
|
||||
]);
|
||||
|
||||
test(/.*(bc|c|d)/y, "abcabd", [
|
||||
{ lastIndex: 6, matches: ["abcabd", "d"], index: 0 },
|
||||
{ lastIndex: 6, matches: ["bcabd", "d"], index: 1 },
|
||||
{ lastIndex: 6, matches: ["cabd", "d"], index: 2 },
|
||||
{ lastIndex: 6, matches: ["abd", "d"], index: 3 },
|
||||
{ lastIndex: 6, matches: ["bd", "d"], index: 4 },
|
||||
{ lastIndex: 6, matches: ["d", "d"], index: 5 },
|
||||
null,
|
||||
]);
|
||||
|
||||
test(/.*?(bc|c|d)/y, "abcabd", [
|
||||
{ lastIndex: 3, matches: ["abc", "bc"], index: 0 },
|
||||
{ lastIndex: 3, matches: ["bc", "bc"], index: 1 },
|
||||
{ lastIndex: 3, matches: ["c", "c"], index: 2 },
|
||||
{ lastIndex: 6, matches: ["abd", "d"], index: 3 },
|
||||
{ lastIndex: 6, matches: ["bd", "d"], index: 4 },
|
||||
{ lastIndex: 6, matches: ["d", "d"], index: 5 },
|
||||
null,
|
||||
]);
|
||||
|
||||
test(/(bc|.*c|d)/y, "abcabd", [
|
||||
{ lastIndex: 3, matches: ["abc", "abc"], index: 0 },
|
||||
{ lastIndex: 3, matches: ["bc", "bc"], index: 1 },
|
||||
{ lastIndex: 3, matches: ["c", "c"], index: 2 },
|
||||
null,
|
||||
null,
|
||||
{ lastIndex: 6, matches: ["d", "d"], index: 5 },
|
||||
null,
|
||||
]);
|
||||
|
||||
// ^ assertions
|
||||
test(/^/y, "abcabc", [
|
||||
{ lastIndex: 0, matches: [""], index: 0 },
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
]);
|
||||
|
||||
test(/^a/my, "abc\nabc", [
|
||||
{ lastIndex: 1, matches: ["a"], index: 0 },
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{ lastIndex: 5, matches: ["a"], index: 4 },
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
]);
|
||||
|
||||
// \b assertions
|
||||
test(/\b/y, "abc bc", [
|
||||
{ lastIndex: 0, matches: [""], index: 0 },
|
||||
null,
|
||||
null,
|
||||
{ lastIndex: 3, matches: [""], index: 3 },
|
||||
{ lastIndex: 4, matches: [""], index: 4 },
|
||||
null,
|
||||
{ lastIndex: 6, matches: [""], index: 6 },
|
||||
]);
|
||||
|
||||
// \B assertions
|
||||
test(/\B/y, "abc bc", [
|
||||
null,
|
||||
{ lastIndex: 1, matches: [""], index: 1 },
|
||||
{ lastIndex: 2, matches: [""], index: 2 },
|
||||
null,
|
||||
null,
|
||||
{ lastIndex: 5, matches: [""], index: 5 },
|
||||
null,
|
||||
]);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
|
@ -79,7 +79,6 @@ class MatchPairs
|
|||
bool initArrayFrom(MatchPairs& copyFrom);
|
||||
void forgetArray() { pairs_ = nullptr; }
|
||||
|
||||
void displace(size_t disp);
|
||||
void checkAgainst(size_t inputLength) {
|
||||
#ifdef DEBUG
|
||||
for (size_t i = 0; i < pairCount_; i++) {
|
||||
|
|
|
@ -147,19 +147,6 @@ MatchPairs::initArrayFrom(MatchPairs& copyFrom)
|
|||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
MatchPairs::displace(size_t disp)
|
||||
{
|
||||
if (disp == 0)
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < pairCount_; i++) {
|
||||
MOZ_ASSERT(pairs_[i].check());
|
||||
pairs_[i].start += (pairs_[i].start < 0) ? 0 : disp;
|
||||
pairs_[i].limit += (pairs_[i].limit < 0) ? 0 : disp;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
ScopedMatchPairs::allocOrExpandArray(size_t pairCount)
|
||||
{
|
||||
|
@ -580,32 +567,8 @@ RegExpShared::compile(JSContext* cx, HandleLinearString input,
|
|||
TraceLoggerThread* logger = TraceLoggerForMainThread(cx->runtime());
|
||||
AutoTraceLog logCompile(logger, TraceLogger_IrregexpCompile);
|
||||
|
||||
if (!sticky()) {
|
||||
RootedAtom pattern(cx, source);
|
||||
return compile(cx, pattern, input, mode, force);
|
||||
}
|
||||
|
||||
/*
|
||||
* The sticky case we implement hackily by prepending a caret onto the front
|
||||
* and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
|
||||
*/
|
||||
static const char prefix[] = {'^', '(', '?', ':'};
|
||||
static const char postfix[] = {')'};
|
||||
|
||||
using mozilla::ArrayLength;
|
||||
StringBuffer sb(cx);
|
||||
if (!sb.reserve(ArrayLength(prefix) + source->length() + ArrayLength(postfix)))
|
||||
return false;
|
||||
sb.infallibleAppend(prefix, ArrayLength(prefix));
|
||||
if (!sb.append(source))
|
||||
return false;
|
||||
sb.infallibleAppend(postfix, ArrayLength(postfix));
|
||||
|
||||
RootedAtom fakeySource(cx, sb.finishAtom());
|
||||
if (!fakeySource)
|
||||
return false;
|
||||
|
||||
return compile(cx, fakeySource, input, mode, force);
|
||||
RootedAtom pattern(cx, source);
|
||||
return compile(cx, pattern, input, mode, force);
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -635,7 +598,8 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
|
|||
ignoreCase(),
|
||||
input->hasLatin1Chars(),
|
||||
mode == MatchOnly,
|
||||
force == ForceByteCode);
|
||||
force == ForceByteCode,
|
||||
sticky());
|
||||
if (code.empty())
|
||||
return false;
|
||||
|
||||
|
@ -681,36 +645,39 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start,
|
|||
return RegExpRunStatus_Error;
|
||||
}
|
||||
|
||||
/*
|
||||
* |displacement| emulates sticky mode by matching from this offset
|
||||
* into the char buffer and subtracting the delta off at the end.
|
||||
*/
|
||||
size_t charsOffset = 0;
|
||||
size_t length = input->length();
|
||||
size_t origLength = length;
|
||||
size_t displacement = 0;
|
||||
|
||||
if (sticky()) {
|
||||
displacement = start;
|
||||
charsOffset += displacement;
|
||||
length -= displacement;
|
||||
start = 0;
|
||||
}
|
||||
|
||||
// Reset the Irregexp backtrack stack if it grows during execution.
|
||||
irregexp::RegExpStackScope stackScope(cx->runtime());
|
||||
|
||||
if (canStringMatch) {
|
||||
MOZ_ASSERT(pairCount() == 1);
|
||||
int res = StringFindPattern(input, source, start + charsOffset);
|
||||
size_t sourceLength = source->length();
|
||||
if (sticky()) {
|
||||
// First part checks size_t overflow.
|
||||
if (sourceLength + start < sourceLength || sourceLength + start > length)
|
||||
return RegExpRunStatus_Success_NotFound;
|
||||
if (!HasSubstringAt(input, source, start))
|
||||
return RegExpRunStatus_Success_NotFound;
|
||||
|
||||
if (matches) {
|
||||
(*matches)[0].start = start;
|
||||
(*matches)[0].limit = start + sourceLength;
|
||||
|
||||
matches->checkAgainst(length);
|
||||
}
|
||||
return RegExpRunStatus_Success;
|
||||
}
|
||||
|
||||
int res = StringFindPattern(input, source, start);
|
||||
if (res == -1)
|
||||
return RegExpRunStatus_Success_NotFound;
|
||||
|
||||
if (matches) {
|
||||
(*matches)[0].start = res;
|
||||
(*matches)[0].limit = res + source->length();
|
||||
(*matches)[0].limit = res + sourceLength;
|
||||
|
||||
matches->checkAgainst(origLength);
|
||||
matches->checkAgainst(length);
|
||||
}
|
||||
return RegExpRunStatus_Success;
|
||||
}
|
||||
|
@ -725,10 +692,10 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start,
|
|||
AutoTraceLog logJIT(logger, TraceLogger_IrregexpExecute);
|
||||
AutoCheckCannotGC nogc;
|
||||
if (input->hasLatin1Chars()) {
|
||||
const Latin1Char* chars = input->latin1Chars(nogc) + charsOffset;
|
||||
const Latin1Char* chars = input->latin1Chars(nogc);
|
||||
result = irregexp::ExecuteCode(cx, code, chars, start, length, matches);
|
||||
} else {
|
||||
const char16_t* chars = input->twoByteChars(nogc) + charsOffset;
|
||||
const char16_t* chars = input->twoByteChars(nogc);
|
||||
result = irregexp::ExecuteCode(cx, code, chars, start, length, matches);
|
||||
}
|
||||
}
|
||||
|
@ -749,10 +716,8 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start,
|
|||
|
||||
MOZ_ASSERT(result == RegExpRunStatus_Success);
|
||||
|
||||
if (matches) {
|
||||
matches->displace(displacement);
|
||||
matches->checkAgainst(origLength);
|
||||
}
|
||||
if (matches)
|
||||
matches->checkAgainst(length);
|
||||
return RegExpRunStatus_Success;
|
||||
} while (false);
|
||||
|
||||
|
@ -769,17 +734,15 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start,
|
|||
|
||||
RegExpRunStatus result;
|
||||
if (inputChars.isLatin1()) {
|
||||
const Latin1Char* chars = inputChars.latin1Range().start().get() + charsOffset;
|
||||
const Latin1Char* chars = inputChars.latin1Range().start().get();
|
||||
result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
|
||||
} else {
|
||||
const char16_t* chars = inputChars.twoByteRange().start().get() + charsOffset;
|
||||
const char16_t* chars = inputChars.twoByteRange().start().get();
|
||||
result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
|
||||
}
|
||||
|
||||
if (result == RegExpRunStatus_Success && matches) {
|
||||
matches->displace(displacement);
|
||||
matches->checkAgainst(origLength);
|
||||
}
|
||||
if (result == RegExpRunStatus_Success && matches)
|
||||
matches->checkAgainst(length);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче