diff --git a/js/src/assembler/assembler/MacroAssemblerCodeRef.h b/js/src/assembler/assembler/MacroAssemblerCodeRef.h index 8bcc28b5103a..841fa9647128 100644 --- a/js/src/assembler/assembler/MacroAssemblerCodeRef.h +++ b/js/src/assembler/assembler/MacroAssemblerCodeRef.h @@ -146,7 +146,9 @@ public: ASSERT_VALID_CODE_POINTER(m_value); } - void* executableAddress() const { return m_value; } + void* executableAddress() const { + return m_value; + } #if WTF_CPU_ARM_THUMB2 // To use this pointer as a data address remove the decoration. void* dataLocation() const { ASSERT_VALID_CODE_POINTER(m_value); return reinterpret_cast(m_value) - 1; } diff --git a/js/src/jit-test/tests/basic/bug606882-1.js b/js/src/jit-test/tests/basic/bug606882-1.js new file mode 100644 index 000000000000..3d8bdf4a2ac3 --- /dev/null +++ b/js/src/jit-test/tests/basic/bug606882-1.js @@ -0,0 +1,3 @@ +// don't crash + +"ABC".match("A+(?:X?(?:|(?:))(?:(?:B)?C+w?w?)?)*"); diff --git a/js/src/jit-test/tests/basic/bug606882-2.js b/js/src/jit-test/tests/basic/bug606882-2.js new file mode 100644 index 000000000000..b7542170f556 --- /dev/null +++ b/js/src/jit-test/tests/basic/bug606882-2.js @@ -0,0 +1,16 @@ +// don't crash +var book = 'Ps'; +var pattern = "(?:" ++ "(?:" ++ "(?:" ++ "(?:-|)" ++ "\\s?" ++ ")" ++ "|" ++ ")" ++ " ?" ++ "\\d+" ++ "\\w?" ++ ")*"; +var re = new RegExp(pattern); +'8:5-8'.match(re); diff --git a/js/src/yarr/yarr/RegexCompiler.cpp b/js/src/yarr/yarr/RegexCompiler.cpp index c43ab05189c9..c8fd98765241 100644 --- a/js/src/yarr/yarr/RegexCompiler.cpp +++ b/js/src/yarr/yarr/RegexCompiler.cpp @@ -531,14 +531,17 @@ public: case PatternTerm::TypeParenthesesSubpattern: // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own. term.frameLocation = currentCallFrameSize; - if ((term.quantityCount == 1) && !term.parentheses.isCopy) { - if (term.quantityType == QuantifierFixedCount) { - currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); - currentInputPosition += term.parentheses.disjunction->m_minimumSize; - } else { + if (term.quantityCount == 1 && !term.parentheses.isCopy) { + if (term.quantityType != QuantifierFixedCount) currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce; - currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); - } + currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); + // If quantity is fixed, then pre-check its minimum size. + if (term.quantityType == QuantifierFixedCount) + currentInputPosition += term.parentheses.disjunction->m_minimumSize; + term.inputPosition = currentInputPosition; + } else if (term.parentheses.isTerminal) { + currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesTerminal; + currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); term.inputPosition = currentInputPosition; } else { term.inputPosition = currentInputPosition; @@ -592,6 +595,33 @@ public: setupDisjunctionOffsets(m_pattern.m_body, 0, 0); } + // This optimization identifies sets of parentheses that we will never need to backtrack. + // In these cases we do not need to store state from prior iterations. + // We can presently avoid backtracking for: + // * a set of parens at the end of the regular expression (last term in any of the alternatives of the main body disjunction). + // * where the parens are non-capturing, and quantified unbounded greedy (*). + // * where the parens do not contain any capturing subpatterns. + void checkForTerminalParentheses() + { + // This check is much too crude; should be just checking whether the candidate + // node contains nested capturing subpatterns, not the whole expression! + if (m_pattern.m_numSubpatterns) + return; + + js::Vector& alternatives = m_pattern.m_body->m_alternatives; + for (unsigned i =0; i < alternatives.length(); ++i) { + js::Vector& terms = alternatives[i]->m_terms; + if (terms.length()) { + PatternTerm& term = terms.back(); + if (term.type == PatternTerm::TypeParenthesesSubpattern + && term.quantityType == QuantifierGreedy + && term.quantityCount == UINT_MAX + && !term.capture()) + term.parentheses.isTerminal = true; + } + } + } + private: RegexPattern& m_pattern; PatternAlternative* m_alternative; @@ -624,6 +654,7 @@ int compileRegex(const UString& patternString, RegexPattern& pattern) JS_ASSERT(numSubpatterns == pattern.m_numSubpatterns); } + constructor.checkForTerminalParentheses(); constructor.setupOffsets(); return 0; diff --git a/js/src/yarr/yarr/RegexJIT.cpp b/js/src/yarr/yarr/RegexJIT.cpp index d12350075afe..9b58ee7e7c57 100644 --- a/js/src/yarr/yarr/RegexJIT.cpp +++ b/js/src/yarr/yarr/RegexJIT.cpp @@ -917,12 +917,7 @@ class RegexGenerator : private MacroAssembler { PatternDisjunction* disjunction = term.parentheses.disjunction; ASSERT(term.quantityCount == 1); - if (term.parentheses.isCopy) { - m_shouldFallBack = true; - return; - } - - unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0; + unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0; unsigned parenthesesFrameLocation = term.frameLocation; unsigned alternativeFrameLocation = parenthesesFrameLocation; @@ -941,12 +936,12 @@ class RegexGenerator : private MacroAssembler { Jump nonGreedySkipParentheses; Label nonGreedyTryParentheses; if (term.quantityType == QuantifierGreedy) - storeToFrame(Imm32(1), parenthesesFrameLocation); + storeToFrame(index, parenthesesFrameLocation); else if (term.quantityType == QuantifierNonGreedy) { - storeToFrame(Imm32(0), parenthesesFrameLocation); + storeToFrame(Imm32(-1), parenthesesFrameLocation); nonGreedySkipParentheses = jump(); nonGreedyTryParentheses = label(); - storeToFrame(Imm32(1), parenthesesFrameLocation); + storeToFrame(index, parenthesesFrameLocation); } // store the match start index @@ -964,29 +959,21 @@ class RegexGenerator : private MacroAssembler { TermGenerationState parenthesesState(disjunction, state.checkedTotal); generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); - // store the match end index - if (term.invertOrCapture) { - int inputOffset = state.inputOffset(); - if (inputOffset) { - move(index, indexTemporary); - add32(Imm32(state.inputOffset()), indexTemporary); - store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); - } else - store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); - } - Jump success = jump(); + Jump success = (term.quantityType == QuantifierFixedCount) ? + jump() : + branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*)))); // A failure AFTER the parens jumps here Label backtrackFromAfterParens(this); if (term.quantityType == QuantifierGreedy) { - // If this is zero we have now tested with both with and without the parens. + // If this is -1 we have now tested with both with and without the parens. loadFromFrame(parenthesesFrameLocation, indexTemporary); - state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this); + state.jumpToBacktrack(branch32(Equal, indexTemporary, Imm32(-1)), this); } else if (term.quantityType == QuantifierNonGreedy) { - // If this is zero we have now tested with both with and without the parens. + // If this is -1 we have now tested without the parens, now test with. loadFromFrame(parenthesesFrameLocation, indexTemporary); - branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this); + branch32(Equal, indexTemporary, Imm32(-1)).linkTo(nonGreedyTryParentheses, this); } parenthesesState.plantJumpToBacktrackIfExists(this); @@ -1000,7 +987,7 @@ class RegexGenerator : private MacroAssembler { } if (term.quantityType == QuantifierGreedy) - storeToFrame(Imm32(0), parenthesesFrameLocation); + storeToFrame(Imm32(-1), parenthesesFrameLocation); else state.jumpToBacktrack(jump(), this); @@ -1008,6 +995,17 @@ class RegexGenerator : private MacroAssembler { if (term.quantityType == QuantifierNonGreedy) nonGreedySkipParentheses.link(this); success.link(this); + + // store the match end index + if (term.invertOrCapture) { + int inputOffset = state.inputOffset(); + if (inputOffset) { + move(index, indexTemporary); + add32(Imm32(state.inputOffset()), indexTemporary); + store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); + } else + store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); + } } } @@ -1018,25 +1016,6 @@ class RegexGenerator : private MacroAssembler { ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern); ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle. - // Capturing not yet implemented! - if (parenthesesTerm.invertOrCapture) { - m_shouldFallBack = true; - return; - } - - // Quantification limit not yet implemented! - if (parenthesesTerm.quantityCount != 0xffffffff) { - m_shouldFallBack = true; - return; - } - - // Need to reset nested subpatterns between iterations... - // for the minute this crude check rejects all patterns with any subpatterns! - if (m_pattern.m_numSubpatterns) { - m_shouldFallBack = true; - return; - } - TermGenerationState parenthesesState(disjunction, state.checkedTotal); Label matchAgain(this); @@ -1058,7 +1037,11 @@ class RegexGenerator : private MacroAssembler { generateTerm(parenthesesState); // If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet. - branch32(GreaterThan, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain); + branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain); + + // If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack, + // or fall through to try the next alternative if no backtrack is available. + parenthesesState.plantJumpToBacktrackIfExists(this); parenthesesState.linkAlternativeBacktracks(this); // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop. @@ -1191,17 +1174,12 @@ class RegexGenerator : private MacroAssembler { break; case PatternTerm::TypeParenthesesSubpattern: - if (term.quantityCount == 1) { + if (term.quantityCount == 1 && !term.parentheses.isCopy) generateParenthesesSingle(state); - break; - } else if (state.isLastTerm() && state.isMainDisjunction()) { // Is this is the last term of the main disjunction? - // If this has a greedy quantifier, then it will never need to backtrack! - if (term.quantityType == QuantifierGreedy) { - generateParenthesesGreedyNoBacktrack(state); - break; - } - } - m_shouldFallBack = true; + else if (term.parentheses.isTerminal) + generateParenthesesGreedyNoBacktrack(state); + else + m_shouldFallBack = true; break; case PatternTerm::TypeParentheticalAssertion: diff --git a/js/src/yarr/yarr/RegexJIT.h b/js/src/yarr/yarr/RegexJIT.h index ee379e27bce5..cc3491d06e5c 100644 --- a/js/src/yarr/yarr/RegexJIT.h +++ b/js/src/yarr/yarr/RegexJIT.h @@ -74,7 +74,8 @@ public: int execute(const UChar* input, unsigned start, unsigned length, int* output) { - return JS_EXTENSION((reinterpret_cast(m_ref.m_code.executableAddress()))(input, start, length, output)); + void *code = m_ref.m_code.executableAddress(); + return JS_EXTENSION((reinterpret_cast(code))(input, start, length, output)); } private: diff --git a/js/src/yarr/yarr/RegexPattern.h b/js/src/yarr/yarr/RegexPattern.h index b437ce85ee33..cf31d8ca106e 100644 --- a/js/src/yarr/yarr/RegexPattern.h +++ b/js/src/yarr/yarr/RegexPattern.h @@ -39,6 +39,7 @@ namespace JSC { namespace Yarr { #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. +#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1 #define RegexStackSpaceForBackTrackInfoParentheses 4 struct PatternDisjunction; @@ -137,6 +138,7 @@ struct PatternTerm { unsigned subpatternId; unsigned lastSubpatternId; bool isCopy; + bool isTerminal; } parentheses; }; QuantifierType quantityType; @@ -168,6 +170,7 @@ struct PatternTerm { parentheses.disjunction = disjunction; parentheses.subpatternId = subpatternId; parentheses.isCopy = false; + parentheses.isTerminal = false; quantityType = QuantifierFixedCount; quantityCount = 1; }