Bug 606882: Import YARR changeset 72781, r=cdleary

This commit is contained in:
David Mandelin 2011-01-18 13:33:43 -08:00
Родитель fe9eb28c86
Коммит 97970924f4
7 изменённых файлов: 98 добавлений и 64 удалений

Просмотреть файл

@ -146,7 +146,9 @@ public:
ASSERT_VALID_CODE_POINTER(m_value);
}
void* executableAddress() const { return m_value; }
void* executableAddress() const {
return m_value;
}
#if WTF_CPU_ARM_THUMB2
// To use this pointer as a data address remove the decoration.
void* dataLocation() const { ASSERT_VALID_CODE_POINTER(m_value); return reinterpret_cast<char*>(m_value) - 1; }

Просмотреть файл

@ -0,0 +1,3 @@
// don't crash
"ABC".match("A+(?:X?(?:|(?:))(?:(?:B)?C+w?w?)?)*");

Просмотреть файл

@ -0,0 +1,16 @@
// don't crash
var book = 'Ps';
var pattern = "(?:"
+ "(?:"
+ "(?:"
+ "(?:-|)"
+ "\\s?"
+ ")"
+ "|"
+ ")"
+ " ?"
+ "\\d+"
+ "\\w?"
+ ")*";
var re = new RegExp(pattern);
'8:5-8'.match(re);

Просмотреть файл

@ -531,14 +531,17 @@ public:
case PatternTerm::TypeParenthesesSubpattern:
// Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
term.frameLocation = currentCallFrameSize;
if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
if (term.quantityType == QuantifierFixedCount) {
currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
currentInputPosition += term.parentheses.disjunction->m_minimumSize;
} else {
if (term.quantityCount == 1 && !term.parentheses.isCopy) {
if (term.quantityType != QuantifierFixedCount)
currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce;
currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
}
currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
// If quantity is fixed, then pre-check its minimum size.
if (term.quantityType == QuantifierFixedCount)
currentInputPosition += term.parentheses.disjunction->m_minimumSize;
term.inputPosition = currentInputPosition;
} else if (term.parentheses.isTerminal) {
currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesTerminal;
currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
term.inputPosition = currentInputPosition;
} else {
term.inputPosition = currentInputPosition;
@ -592,6 +595,33 @@ public:
setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
}
// This optimization identifies sets of parentheses that we will never need to backtrack.
// In these cases we do not need to store state from prior iterations.
// We can presently avoid backtracking for:
// * a set of parens at the end of the regular expression (last term in any of the alternatives of the main body disjunction).
// * where the parens are non-capturing, and quantified unbounded greedy (*).
// * where the parens do not contain any capturing subpatterns.
void checkForTerminalParentheses()
{
// This check is much too crude; should be just checking whether the candidate
// node contains nested capturing subpatterns, not the whole expression!
if (m_pattern.m_numSubpatterns)
return;
js::Vector<PatternAlternative*, 0, js::SystemAllocPolicy>& alternatives = m_pattern.m_body->m_alternatives;
for (unsigned i =0; i < alternatives.length(); ++i) {
js::Vector<PatternTerm, 0, js::SystemAllocPolicy>& terms = alternatives[i]->m_terms;
if (terms.length()) {
PatternTerm& term = terms.back();
if (term.type == PatternTerm::TypeParenthesesSubpattern
&& term.quantityType == QuantifierGreedy
&& term.quantityCount == UINT_MAX
&& !term.capture())
term.parentheses.isTerminal = true;
}
}
}
private:
RegexPattern& m_pattern;
PatternAlternative* m_alternative;
@ -624,6 +654,7 @@ int compileRegex(const UString& patternString, RegexPattern& pattern)
JS_ASSERT(numSubpatterns == pattern.m_numSubpatterns);
}
constructor.checkForTerminalParentheses();
constructor.setupOffsets();
return 0;

Просмотреть файл

@ -917,12 +917,7 @@ class RegexGenerator : private MacroAssembler {
PatternDisjunction* disjunction = term.parentheses.disjunction;
ASSERT(term.quantityCount == 1);
if (term.parentheses.isCopy) {
m_shouldFallBack = true;
return;
}
unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0;
unsigned parenthesesFrameLocation = term.frameLocation;
unsigned alternativeFrameLocation = parenthesesFrameLocation;
@ -941,12 +936,12 @@ class RegexGenerator : private MacroAssembler {
Jump nonGreedySkipParentheses;
Label nonGreedyTryParentheses;
if (term.quantityType == QuantifierGreedy)
storeToFrame(Imm32(1), parenthesesFrameLocation);
storeToFrame(index, parenthesesFrameLocation);
else if (term.quantityType == QuantifierNonGreedy) {
storeToFrame(Imm32(0), parenthesesFrameLocation);
storeToFrame(Imm32(-1), parenthesesFrameLocation);
nonGreedySkipParentheses = jump();
nonGreedyTryParentheses = label();
storeToFrame(Imm32(1), parenthesesFrameLocation);
storeToFrame(index, parenthesesFrameLocation);
}
// store the match start index
@ -964,29 +959,21 @@ class RegexGenerator : private MacroAssembler {
TermGenerationState parenthesesState(disjunction, state.checkedTotal);
generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
// store the match end index
if (term.invertOrCapture) {
int inputOffset = state.inputOffset();
if (inputOffset) {
move(index, indexTemporary);
add32(Imm32(state.inputOffset()), indexTemporary);
store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
} else
store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
}
Jump success = jump();
Jump success = (term.quantityType == QuantifierFixedCount) ?
jump() :
branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*))));
// A failure AFTER the parens jumps here
Label backtrackFromAfterParens(this);
if (term.quantityType == QuantifierGreedy) {
// If this is zero we have now tested with both with and without the parens.
// If this is -1 we have now tested with both with and without the parens.
loadFromFrame(parenthesesFrameLocation, indexTemporary);
state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
state.jumpToBacktrack(branch32(Equal, indexTemporary, Imm32(-1)), this);
} else if (term.quantityType == QuantifierNonGreedy) {
// If this is zero we have now tested with both with and without the parens.
// If this is -1 we have now tested without the parens, now test with.
loadFromFrame(parenthesesFrameLocation, indexTemporary);
branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
branch32(Equal, indexTemporary, Imm32(-1)).linkTo(nonGreedyTryParentheses, this);
}
parenthesesState.plantJumpToBacktrackIfExists(this);
@ -1000,7 +987,7 @@ class RegexGenerator : private MacroAssembler {
}
if (term.quantityType == QuantifierGreedy)
storeToFrame(Imm32(0), parenthesesFrameLocation);
storeToFrame(Imm32(-1), parenthesesFrameLocation);
else
state.jumpToBacktrack(jump(), this);
@ -1008,6 +995,17 @@ class RegexGenerator : private MacroAssembler {
if (term.quantityType == QuantifierNonGreedy)
nonGreedySkipParentheses.link(this);
success.link(this);
// store the match end index
if (term.invertOrCapture) {
int inputOffset = state.inputOffset();
if (inputOffset) {
move(index, indexTemporary);
add32(Imm32(state.inputOffset()), indexTemporary);
store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
} else
store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
}
}
}
@ -1018,25 +1016,6 @@ class RegexGenerator : private MacroAssembler {
ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern);
ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle.
// Capturing not yet implemented!
if (parenthesesTerm.invertOrCapture) {
m_shouldFallBack = true;
return;
}
// Quantification limit not yet implemented!
if (parenthesesTerm.quantityCount != 0xffffffff) {
m_shouldFallBack = true;
return;
}
// Need to reset nested subpatterns between iterations...
// for the minute this crude check rejects all patterns with any subpatterns!
if (m_pattern.m_numSubpatterns) {
m_shouldFallBack = true;
return;
}
TermGenerationState parenthesesState(disjunction, state.checkedTotal);
Label matchAgain(this);
@ -1058,7 +1037,11 @@ class RegexGenerator : private MacroAssembler {
generateTerm(parenthesesState);
// If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet.
branch32(GreaterThan, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain);
branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain);
// If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack,
// or fall through to try the next alternative if no backtrack is available.
parenthesesState.plantJumpToBacktrackIfExists(this);
parenthesesState.linkAlternativeBacktracks(this);
// We get here if the alternative fails to match - fall through to the next iteration, or out of the loop.
@ -1191,17 +1174,12 @@ class RegexGenerator : private MacroAssembler {
break;
case PatternTerm::TypeParenthesesSubpattern:
if (term.quantityCount == 1) {
if (term.quantityCount == 1 && !term.parentheses.isCopy)
generateParenthesesSingle(state);
break;
} else if (state.isLastTerm() && state.isMainDisjunction()) { // Is this is the last term of the main disjunction?
// If this has a greedy quantifier, then it will never need to backtrack!
if (term.quantityType == QuantifierGreedy) {
generateParenthesesGreedyNoBacktrack(state);
break;
}
}
m_shouldFallBack = true;
else if (term.parentheses.isTerminal)
generateParenthesesGreedyNoBacktrack(state);
else
m_shouldFallBack = true;
break;
case PatternTerm::TypeParentheticalAssertion:

Просмотреть файл

@ -74,7 +74,8 @@ public:
int execute(const UChar* input, unsigned start, unsigned length, int* output)
{
return JS_EXTENSION((reinterpret_cast<RegexJITCode>(m_ref.m_code.executableAddress()))(input, start, length, output));
void *code = m_ref.m_code.executableAddress();
return JS_EXTENSION((reinterpret_cast<RegexJITCode>(code))(input, start, length, output));
}
private:

Просмотреть файл

@ -39,6 +39,7 @@ namespace JSC { namespace Yarr {
#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1
#define RegexStackSpaceForBackTrackInfoParentheses 4
struct PatternDisjunction;
@ -137,6 +138,7 @@ struct PatternTerm {
unsigned subpatternId;
unsigned lastSubpatternId;
bool isCopy;
bool isTerminal;
} parentheses;
};
QuantifierType quantityType;
@ -168,6 +170,7 @@ struct PatternTerm {
parentheses.disjunction = disjunction;
parentheses.subpatternId = subpatternId;
parentheses.isCopy = false;
parentheses.isTerminal = false;
quantityType = QuantifierFixedCount;
quantityCount = 1;
}