Bug 773687 - Fix assertion pattern in RegExp with sticky flag. r=till

--HG--
extra : rebase_source : 42307a81dcee87426ace32a276f34281b550bc7f
This commit is contained in:
Tooru Fujisawa 2015-09-20 00:00:36 +09:00
Родитель 3b34b7fda2
Коммит 5d406f0ff6
7 изменённых файлов: 167 добавлений и 75 удалений

Просмотреть файл

@ -1651,7 +1651,7 @@ IsNativeRegExpEnabled(JSContext* cx)
RegExpCode
irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data,
HandleLinearString sample, bool is_global, bool ignore_case,
bool is_ascii, bool match_only, bool force_bytecode)
bool is_ascii, bool match_only, bool force_bytecode, bool sticky)
{
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
JS_ReportError(cx, "regexp too big");
@ -1677,7 +1677,7 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData*
compiler.accept());
RegExpNode* node = captured_body;
bool is_end_anchored = data->tree->IsAnchoredAtEnd();
bool is_start_anchored = data->tree->IsAnchoredAtStart();
bool is_start_anchored = sticky || data->tree->IsAnchoredAtStart();
int max_length = data->tree->max_match();
if (!is_start_anchored) {
// Add a .*? at the beginning, outside the body capture, unless

Просмотреть файл

@ -88,7 +88,7 @@ struct RegExpCode
RegExpCode
CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data,
HandleLinearString sample, bool is_global, bool ignore_case,
bool is_ascii, bool match_only, bool force_bytecode);
bool is_ascii, bool match_only, bool force_bytecode, bool sticky);
// Note: this may return RegExpRunStatus_Error if an interrupt was requested
// while the code was executing.

Просмотреть файл

@ -1739,8 +1739,8 @@ js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp)
return true;
}
static bool
HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
bool
js::HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
{
MOZ_ASSERT(start + pat->length() <= text->length());

Просмотреть файл

@ -219,6 +219,10 @@ StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patlen);
extern int
StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start);
/* Return true if the string contains a pattern at |start|. */
extern bool
HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start);
template <typename CharT>
extern bool
HasRegExpMetaChars(const CharT* chars, size_t length);

Просмотреть файл

@ -0,0 +1,126 @@
var BUGNUMBER = 773687;
var summary = 'sticky flag should not break assertion behavior.';
print(BUGNUMBER + ": " + summary);
function test(re, text, expectations) {
// Sanity check for test data itself.
assertEq(expectations.length, text.length + 1);
for (var i = 0; i < expectations.length; i++) {
var result = expectations[i];
re.lastIndex = i;
var match = re.exec(text);
if (result === null) {
assertEq(re.lastIndex, 0);
assertEq(match, null);
} else {
assertEq(re.lastIndex, result.lastIndex);
assertEq(match !== null, true);
assertEq(match.length, result.matches.length);
for (var j = 0; j < result.matches.length; j++)
assertEq(match[j], result.matches[j]);
assertEq(match.index, result.index);
}
}
}
// simple text
test(/bc/y, "abcabd", [
null,
{ lastIndex: 3, matches: ["bc"], index: 1 },
null,
null,
null,
null,
null,
]);
// complex pattern
test(/bc|c|d/y, "abcabd", [
null,
{ lastIndex: 3, matches: ["bc"], index: 1 },
{ lastIndex: 3, matches: ["c"], index: 2 },
null,
null,
{ lastIndex: 6, matches: ["d"], index: 5 },
null,
]);
test(/.*(bc|c|d)/y, "abcabd", [
{ lastIndex: 6, matches: ["abcabd", "d"], index: 0 },
{ lastIndex: 6, matches: ["bcabd", "d"], index: 1 },
{ lastIndex: 6, matches: ["cabd", "d"], index: 2 },
{ lastIndex: 6, matches: ["abd", "d"], index: 3 },
{ lastIndex: 6, matches: ["bd", "d"], index: 4 },
{ lastIndex: 6, matches: ["d", "d"], index: 5 },
null,
]);
test(/.*?(bc|c|d)/y, "abcabd", [
{ lastIndex: 3, matches: ["abc", "bc"], index: 0 },
{ lastIndex: 3, matches: ["bc", "bc"], index: 1 },
{ lastIndex: 3, matches: ["c", "c"], index: 2 },
{ lastIndex: 6, matches: ["abd", "d"], index: 3 },
{ lastIndex: 6, matches: ["bd", "d"], index: 4 },
{ lastIndex: 6, matches: ["d", "d"], index: 5 },
null,
]);
test(/(bc|.*c|d)/y, "abcabd", [
{ lastIndex: 3, matches: ["abc", "abc"], index: 0 },
{ lastIndex: 3, matches: ["bc", "bc"], index: 1 },
{ lastIndex: 3, matches: ["c", "c"], index: 2 },
null,
null,
{ lastIndex: 6, matches: ["d", "d"], index: 5 },
null,
]);
// ^ assertions
test(/^/y, "abcabc", [
{ lastIndex: 0, matches: [""], index: 0 },
null,
null,
null,
null,
null,
null,
]);
test(/^a/my, "abc\nabc", [
{ lastIndex: 1, matches: ["a"], index: 0 },
null,
null,
null,
{ lastIndex: 5, matches: ["a"], index: 4 },
null,
null,
null,
]);
// \b assertions
test(/\b/y, "abc bc", [
{ lastIndex: 0, matches: [""], index: 0 },
null,
null,
{ lastIndex: 3, matches: [""], index: 3 },
{ lastIndex: 4, matches: [""], index: 4 },
null,
{ lastIndex: 6, matches: [""], index: 6 },
]);
// \B assertions
test(/\B/y, "abc bc", [
null,
{ lastIndex: 1, matches: [""], index: 1 },
{ lastIndex: 2, matches: [""], index: 2 },
null,
null,
{ lastIndex: 5, matches: [""], index: 5 },
null,
]);
if (typeof reportCompare === "function")
reportCompare(true, true);

Просмотреть файл

@ -79,7 +79,6 @@ class MatchPairs
bool initArrayFrom(MatchPairs& copyFrom);
void forgetArray() { pairs_ = nullptr; }
void displace(size_t disp);
void checkAgainst(size_t inputLength) {
#ifdef DEBUG
for (size_t i = 0; i < pairCount_; i++) {

Просмотреть файл

@ -147,19 +147,6 @@ MatchPairs::initArrayFrom(MatchPairs& copyFrom)
return true;
}
void
MatchPairs::displace(size_t disp)
{
if (disp == 0)
return;
for (size_t i = 0; i < pairCount_; i++) {
MOZ_ASSERT(pairs_[i].check());
pairs_[i].start += (pairs_[i].start < 0) ? 0 : disp;
pairs_[i].limit += (pairs_[i].limit < 0) ? 0 : disp;
}
}
bool
ScopedMatchPairs::allocOrExpandArray(size_t pairCount)
{
@ -580,32 +567,8 @@ RegExpShared::compile(JSContext* cx, HandleLinearString input,
TraceLoggerThread* logger = TraceLoggerForMainThread(cx->runtime());
AutoTraceLog logCompile(logger, TraceLogger_IrregexpCompile);
if (!sticky()) {
RootedAtom pattern(cx, source);
return compile(cx, pattern, input, mode, force);
}
/*
* The sticky case we implement hackily by prepending a caret onto the front
* and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
*/
static const char prefix[] = {'^', '(', '?', ':'};
static const char postfix[] = {')'};
using mozilla::ArrayLength;
StringBuffer sb(cx);
if (!sb.reserve(ArrayLength(prefix) + source->length() + ArrayLength(postfix)))
return false;
sb.infallibleAppend(prefix, ArrayLength(prefix));
if (!sb.append(source))
return false;
sb.infallibleAppend(postfix, ArrayLength(postfix));
RootedAtom fakeySource(cx, sb.finishAtom());
if (!fakeySource)
return false;
return compile(cx, fakeySource, input, mode, force);
RootedAtom pattern(cx, source);
return compile(cx, pattern, input, mode, force);
}
bool
@ -635,7 +598,8 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
ignoreCase(),
input->hasLatin1Chars(),
mode == MatchOnly,
force == ForceByteCode);
force == ForceByteCode,
sticky());
if (code.empty())
return false;
@ -681,36 +645,39 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start,
return RegExpRunStatus_Error;
}
/*
* |displacement| emulates sticky mode by matching from this offset
* into the char buffer and subtracting the delta off at the end.
*/
size_t charsOffset = 0;
size_t length = input->length();
size_t origLength = length;
size_t displacement = 0;
if (sticky()) {
displacement = start;
charsOffset += displacement;
length -= displacement;
start = 0;
}
// Reset the Irregexp backtrack stack if it grows during execution.
irregexp::RegExpStackScope stackScope(cx->runtime());
if (canStringMatch) {
MOZ_ASSERT(pairCount() == 1);
int res = StringFindPattern(input, source, start + charsOffset);
size_t sourceLength = source->length();
if (sticky()) {
// First part checks size_t overflow.
if (sourceLength + start < sourceLength || sourceLength + start > length)
return RegExpRunStatus_Success_NotFound;
if (!HasSubstringAt(input, source, start))
return RegExpRunStatus_Success_NotFound;
if (matches) {
(*matches)[0].start = start;
(*matches)[0].limit = start + sourceLength;
matches->checkAgainst(length);
}
return RegExpRunStatus_Success;
}
int res = StringFindPattern(input, source, start);
if (res == -1)
return RegExpRunStatus_Success_NotFound;
if (matches) {
(*matches)[0].start = res;
(*matches)[0].limit = res + source->length();
(*matches)[0].limit = res + sourceLength;
matches->checkAgainst(origLength);
matches->checkAgainst(length);
}
return RegExpRunStatus_Success;
}
@ -725,10 +692,10 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start,
AutoTraceLog logJIT(logger, TraceLogger_IrregexpExecute);
AutoCheckCannotGC nogc;
if (input->hasLatin1Chars()) {
const Latin1Char* chars = input->latin1Chars(nogc) + charsOffset;
const Latin1Char* chars = input->latin1Chars(nogc);
result = irregexp::ExecuteCode(cx, code, chars, start, length, matches);
} else {
const char16_t* chars = input->twoByteChars(nogc) + charsOffset;
const char16_t* chars = input->twoByteChars(nogc);
result = irregexp::ExecuteCode(cx, code, chars, start, length, matches);
}
}
@ -749,10 +716,8 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start,
MOZ_ASSERT(result == RegExpRunStatus_Success);
if (matches) {
matches->displace(displacement);
matches->checkAgainst(origLength);
}
if (matches)
matches->checkAgainst(length);
return RegExpRunStatus_Success;
} while (false);
@ -769,17 +734,15 @@ RegExpShared::execute(JSContext* cx, HandleLinearString input, size_t start,
RegExpRunStatus result;
if (inputChars.isLatin1()) {
const Latin1Char* chars = inputChars.latin1Range().start().get() + charsOffset;
const Latin1Char* chars = inputChars.latin1Range().start().get();
result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
} else {
const char16_t* chars = inputChars.twoByteRange().start().get() + charsOffset;
const char16_t* chars = inputChars.twoByteRange().start().get();
result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
}
if (result == RegExpRunStatus_Success && matches) {
matches->displace(displacement);
matches->checkAgainst(origLength);
}
if (result == RegExpRunStatus_Success && matches)
matches->checkAgainst(length);
return result;
}