Port of performance fixes from Monkey (see bug #85721).

Also, fixes for :
#91343, (non-latin1 fails for [\S])
#78156, (Unicode line terminator matching)
#87231, (/(A)?(A.*)/ didn't reset paren state for empty first match)
This commit is contained in:
rogerl%netscape.com 2001-08-27 18:03:19 +00:00
Родитель 42a0388ddb
Коммит 43c0ad4889
1 изменённых файлов: 148 добавлений и 63 удалений

Просмотреть файл

@ -1358,13 +1358,22 @@ public class NativeRegExp extends IdScriptable implements Function {
*/ */
num = grState.state.parenCount; num = grState.state.parenCount;
boolean oldBroke = grState.state.goForBroke;
grState.state.goForBroke = false;
kidMatch = matchRENodes(grState.state, grState.kid, grState.next, index); kidMatch = matchRENodes(grState.state, grState.kid, grState.next, index);
grState.state.complete = -1;
grState.state.goForBroke = oldBroke;
if (kidMatch == -1) { if (kidMatch == -1) {
grState.state.parenCount = num;
if (previousKid != -1)
matchRENodes(grState.state, grState.kid, grState.next, previousKid);
match = matchRENodes(grState.state, grState.next, grState.stop, index); match = matchRENodes(grState.state, grState.next, grState.stop, index);
if (match != -1) { if (match != -1) {
grState.state.parenCount = num; if (grState.stop == null) {
if (previousKid != -1) grState.state.complete = match;
matchRENodes(grState.state, grState.kid, grState.next, previousKid); return index;
}
return index; return index;
} }
else else
@ -1382,8 +1391,14 @@ public class NativeRegExp extends IdScriptable implements Function {
if (grState.maxKid != 0) --grState.kidCount; if (grState.maxKid != 0) --grState.kidCount;
} }
grState.state.parenCount = num; grState.state.parenCount = num;
matchRENodes(grState.state, grState.kid, grState.next, index);
match = matchRENodes(grState.state, grState.next, grState.stop, kidMatch); match = matchRENodes(grState.state, grState.next, grState.stop, kidMatch);
if (match != -1) { if (match != -1) {
if (grState.stop == null) {
grState.state.complete = match;
return kidMatch;
}
matchRENodes(grState.state, grState.kid, grState.next, index); matchRENodes(grState.state, grState.kid, grState.next, index);
return kidMatch; return kidMatch;
} }
@ -1419,13 +1434,27 @@ public class NativeRegExp extends IdScriptable implements Function {
int match; int match;
match = matchRENodes(state, ren.next, null, index); match = matchRENodes(state, ren.next, null, index);
if (match != -1) return index; if (state.goForBroke && (state.complete != -1))
return state.complete;
if (match != -1) {
state.complete = match;
return index;
}
if (match != -1)
return index;
kidMatch = matchRENodes(state, (RENode)ren.kid, ren.next, index); kidMatch = matchRENodes(state, (RENode)ren.kid, ren.next, index);
if (kidMatch == -1) return -1; if (kidMatch == -1) return -1;
if (state.goForBroke && (state.complete != -1))
return state.complete;
if (kidMatch == index) return kidMatch; /* no point pursuing an empty match forever */ if (kidMatch == index) return kidMatch; /* no point pursuing an empty match forever */
return matchNonGreedyKid(state, ren, kidCount, maxKid, kidMatch); return matchNonGreedyKid(state, ren, kidCount, maxKid, kidMatch);
} }
boolean isLineTerminator(char c) {
return TokenStream.isJSLineTerminator(c);
}
int matchRENodes(MatchState state, RENode ren, RENode stop, int index) { int matchRENodes(MatchState state, RENode ren, RENode stop, int index) {
int num; int num;
char[] input = state.input; char[] input = state.input;
@ -1442,6 +1471,8 @@ public class NativeRegExp extends IdScriptable implements Function {
num = state.parenCount; num = state.parenCount;
int kidMatch = matchRENodes(state, (RENode)ren.kid, int kidMatch = matchRENodes(state, (RENode)ren.kid,
stop, index); stop, index);
if (state.goForBroke && (state.complete != -1))
return state.complete;
if (kidMatch != -1) return kidMatch; if (kidMatch != -1) return kidMatch;
for (int i = num; i < state.parenCount; i++) for (int i = num; i < state.parenCount; i++)
state.parens[i] = null; state.parens[i] = null;
@ -1456,10 +1487,10 @@ public class NativeRegExp extends IdScriptable implements Function {
ren.next, index); ren.next, index);
if (kidMatch == -1) if (kidMatch == -1)
return -1; return -1;
else { if (state.goForBroke && (state.complete != -1))
lastKid = index; return state.complete;
index = kidMatch; lastKid = index;
} index = kidMatch;
} }
if (num == ren.max) if (num == ren.max)
// Have matched the exact count required, // Have matched the exact count required,
@ -1468,17 +1499,28 @@ public class NativeRegExp extends IdScriptable implements Function {
if ((ren.flags & RENode.MINIMAL) == 0) { if ((ren.flags & RENode.MINIMAL) == 0) {
int kidMatch = matchGreedyKid(state, ren, stop, num, int kidMatch = matchGreedyKid(state, ren, stop, num,
index, lastKid); index, lastKid);
if (kidMatch == -1) if (kidMatch == -1) {
index = matchRENodes(state, (RENode)ren.kid, if (lastKid != -1) {
ren.next, index); index = matchRENodes(state, (RENode)ren.kid,
else ren.next, lastKid);
if (state.goForBroke && (state.complete != -1))
return state.complete;
}
}
else {
if (state.goForBroke && (state.complete != -1))
return state.complete;
index = kidMatch; index = kidMatch;
}
} }
else { else {
index = matchNonGreedyKid(state, ren, num, index = matchNonGreedyKid(state, ren, num,
ren.max, index); ren.max, index);
if (index == -1)
return -1;
if (state.goForBroke && (state.complete != -1))
return state.complete;
} }
if (index == -1) return -1;
} }
break; break;
case REOP_PLUS: { case REOP_PLUS: {
@ -1489,26 +1531,40 @@ public class NativeRegExp extends IdScriptable implements Function {
if ((ren.flags & RENode.MINIMAL) == 0) { if ((ren.flags & RENode.MINIMAL) == 0) {
kidMatch = matchGreedyKid(state, ren, stop, 1, kidMatch = matchGreedyKid(state, ren, stop, 1,
kidMatch, index); kidMatch, index);
if (kidMatch == -1) if (kidMatch == -1) {
index = matchRENodes(state,(RENode)ren.kid, index = matchRENodes(state,(RENode)ren.kid,
ren.next, index); ren.next, index);
else if (state.goForBroke && (state.complete != -1))
return state.complete;
}
else {
if (state.goForBroke && (state.complete != -1))
return state.complete;
index = kidMatch; index = kidMatch;
}
} }
else else {
index = matchNonGreedyKid(state, ren, 1, 0, kidMatch); index = matchNonGreedyKid(state, ren, 1, 0, kidMatch);
if (state.goForBroke && (state.complete != -1))
return state.complete;
}
if (index == -1) return -1; if (index == -1) return -1;
} }
break; break;
case REOP_STAR: case REOP_STAR:
if ((ren.flags & RENode.MINIMAL) == 0) { if ((ren.flags & RENode.MINIMAL) == 0) {
int kidMatch = matchGreedyKid(state, ren, stop, 0, index, -1); int kidMatch = matchGreedyKid(state, ren, stop, 0, index, -1);
if (kidMatch != -1) if (kidMatch != -1) {
if (state.goForBroke && (state.complete != -1))
return state.complete;
index = kidMatch; index = kidMatch;
}
} }
else { else {
index = matchNonGreedyKid(state, ren, 0, 0, index); index = matchNonGreedyKid(state, ren, 0, 0, index);
if (index == -1) return -1; if (index == -1) return -1;
if (state.goForBroke && (state.complete != -1))
return state.complete;
} }
break; break;
case REOP_OPT: { case REOP_OPT: {
@ -1516,19 +1572,29 @@ public class NativeRegExp extends IdScriptable implements Function {
if (((ren.flags & RENode.MINIMAL) != 0)) { if (((ren.flags & RENode.MINIMAL) != 0)) {
int restMatch = matchRENodes(state, ren.next, int restMatch = matchRENodes(state, ren.next,
stop, index); stop, index);
if (state.goForBroke && (state.complete != -1))
return state.complete;
if (restMatch != -1) return restMatch; if (restMatch != -1) return restMatch;
} }
int kidMatch = matchRENodes(state, (RENode)ren.kid, int kidMatch = matchRENodes(state, (RENode)ren.kid,
ren.next, index); ren.next, index);
if (state.goForBroke && (state.complete != -1))
return state.complete;
if (kidMatch == -1) { if (kidMatch == -1) {
for (int i = saveNum; i < state.parenCount; i++)
state.parens[i] = null;
state.parenCount = saveNum; state.parenCount = saveNum;
break; break;
} }
else { else {
int restMatch = matchRENodes(state, ren.next, int restMatch = matchRENodes(state, ren.next,
stop, kidMatch); stop, kidMatch);
if (state.goForBroke && (state.complete != -1))
return state.complete;
if (restMatch == -1) { if (restMatch == -1) {
// need to undo the result of running the kid // need to undo the result of running the kid
for (int i = saveNum; i < state.parenCount; i++)
state.parens[i] = null;
state.parenCount = saveNum; state.parenCount = saveNum;
break; break;
} }
@ -1566,12 +1632,16 @@ public class NativeRegExp extends IdScriptable implements Function {
case REOP_ASSERT: { case REOP_ASSERT: {
int kidMatch = matchRENodes(state, (RENode)ren.kid, int kidMatch = matchRENodes(state, (RENode)ren.kid,
ren.next, index); ren.next, index);
if (state.goForBroke && (state.complete != -1))
return state.complete;
if (kidMatch == -1) return -1; if (kidMatch == -1) return -1;
break; break;
} }
case REOP_ASSERT_NOT: { case REOP_ASSERT_NOT: {
int kidMatch = matchRENodes(state, (RENode)ren.kid, int kidMatch = matchRENodes(state, (RENode)ren.kid,
ren.next, index); ren.next, index);
if (state.goForBroke && (state.complete != -1))
return state.complete;
if (kidMatch != -1) return -1; if (kidMatch != -1) return -1;
break; break;
} }
@ -1627,7 +1697,7 @@ public class NativeRegExp extends IdScriptable implements Function {
if (index >= input.length) { if (index >= input.length) {
return state.noMoreInput(); return state.noMoreInput();
} }
if (input[index] != '\n') if (!isLineTerminator(input[index]))
index++; index++;
else else
return -1; return -1;
@ -1637,7 +1707,7 @@ public class NativeRegExp extends IdScriptable implements Function {
for (cp2 = index; cp2 < input.length; cp2++) { for (cp2 = index; cp2 < input.length; cp2++) {
int cp3 = matchRENodes(state, ren.next, stop, cp2); int cp3 = matchRENodes(state, ren.next, stop, cp2);
if (cp3 != -1) return cp3; if (cp3 != -1) return cp3;
if (input[cp2] == '\n') if (isLineTerminator(input[cp2]))
return -1; return -1;
} }
return state.noMoreInput(); return state.noMoreInput();
@ -1645,7 +1715,7 @@ public class NativeRegExp extends IdScriptable implements Function {
case REOP_DOTSTAR: { case REOP_DOTSTAR: {
int cp2; int cp2;
for (cp2 = index; cp2 < input.length; cp2++) for (cp2 = index; cp2 < input.length; cp2++)
if (input[cp2] == '\n') if (isLineTerminator(input[cp2]))
break; break;
while (cp2 >= index) { while (cp2 >= index) {
int cp3 = matchRENodes(state, ren.next, stop, cp2); int cp3 = matchRENodes(state, ren.next, stop, cp2);
@ -1690,7 +1760,7 @@ public class NativeRegExp extends IdScriptable implements Function {
RegExpImpl reImpl = getImpl(cx); RegExpImpl reImpl = getImpl(cx);
if ((reImpl.multiline) if ((reImpl.multiline)
|| ((state.flags & MULTILINE) != 0)) || ((state.flags & MULTILINE) != 0))
if (input[index] == '\n') if (isLineTerminator(input[index]))
;// leave index ;// leave index
else else
return -1; return -1;
@ -1708,7 +1778,7 @@ public class NativeRegExp extends IdScriptable implements Function {
if (index >= input.length) { if (index >= input.length) {
return state.noMoreInput(); return state.noMoreInput();
} }
if (input[index - 1] == '\n') { if (isLineTerminator(input[index - 1])) {
break; break;
} }
} }
@ -1838,6 +1908,8 @@ public class NativeRegExp extends IdScriptable implements Function {
state.start = start; state.start = start;
state.skipped = 0; state.skipped = 0;
state.input = charArray; state.input = charArray;
state.complete = -1;
state.goForBroke = true;
state.parenCount = 0; state.parenCount = 0;
state.maybeParens = new SubString[re.parenCount]; state.maybeParens = new SubString[re.parenCount];
@ -2183,7 +2255,7 @@ class RENode {
} }
private void calcBMSize(char[] s, int index, int cp2, boolean fold) { private void calcBMSize(char[] s, int index, int cp2, boolean fold) {
char maxc = 0; int maxc = 0;
while (index < cp2) { while (index < cp2) {
char c = s[index++]; char c = s[index++];
if (c == '\\') { if (c == '\\') {
@ -2200,13 +2272,24 @@ class RENode {
c = (char) x; c = (char) x;
index += 5; index += 5;
} else { } else {
/* /*
* Octal and hex escapes can't be > 255. Skip this * For the not whitespace, not word or not digit cases
* backslash and let the loop pass over the remaining * we widen the range to the complete unicode range.
* escape sequence as if it were text to match.
*/ */
if (maxc < 255) maxc = 255; if ((s[index] == 'S')
continue; || (s[index] == 'W') || (s[index] == 'D')) {
maxc = 65535;
break; /* leave now, it can't get worse */
}
else {
/*
* Octal and hex escapes can't be > 255. Skip this
* backslash and let the loop pass over the remaining
* escape sequence as if it were text to match.
*/
if (maxc < 255) maxc = 255;
continue;
}
} }
} }
if (fold) { if (fold) {
@ -2228,7 +2311,7 @@ class RENode {
/ NativeRegExp.JS_BITS_PER_BYTE); / NativeRegExp.JS_BITS_PER_BYTE);
} }
private void matchBit(char c, int fill) { private void matchBit(int c, int fill) {
int i = (c) >> 3; int i = (c) >> 3;
byte b = (byte) (c & 7); byte b = (byte) (c & 7);
b = (byte) (1 << b); b = (byte) (1 << b);
@ -2238,7 +2321,7 @@ class RENode {
bitmap[i] |= b; bitmap[i] |= b;
} }
private void checkRange(char lastc, int fill) { private void checkRange(int lastc, int fill) {
matchBit(lastc, fill); matchBit(lastc, fill);
matchBit('-', fill); matchBit('-', fill);
} }
@ -2266,11 +2349,11 @@ class RENode {
bitmap[0] = (byte)0xfe; bitmap[0] = (byte)0xfe;
} }
int nchars = bmsize * NativeRegExp.JS_BITS_PER_BYTE; int nchars = bmsize * NativeRegExp.JS_BITS_PER_BYTE;
char lastc = (char)nchars; int lastc = nchars;
boolean inrange = false; boolean inrange = false;
while (index < end) { while (index < end) {
char c = s[index++]; int c = s[index++];
if (c == '\\') { if (c == '\\') {
c = s[index++]; c = s[index++];
switch (c) { switch (c) {
@ -2280,13 +2363,13 @@ class RENode {
case 'r': case 'r':
case 't': case 't':
case 'v': case 'v':
c = NativeRegExp.getEscape(c); c = NativeRegExp.getEscape((char)c);
break; break;
case 'd': case 'd':
if (inrange) if (inrange)
checkRange(lastc, fill); checkRange(lastc, fill);
lastc = (char) nchars; lastc = nchars;
for (c = '0'; c <= '9'; c++) for (c = '0'; c <= '9'; c++)
matchBit(c, fill); matchBit(c, fill);
continue; continue;
@ -2294,7 +2377,7 @@ class RENode {
case 'D': case 'D':
if (inrange) if (inrange)
checkRange(lastc, fill); checkRange(lastc, fill);
lastc = (char) nchars; lastc = nchars;
for (c = 0; c < '0'; c++) for (c = 0; c < '0'; c++)
matchBit(c, fill); matchBit(c, fill);
for (c = '9' + 1; c < nchars; c++) for (c = '9' + 1; c < nchars; c++)
@ -2304,36 +2387,36 @@ class RENode {
case 'w': case 'w':
if (inrange) if (inrange)
checkRange(lastc, fill); checkRange(lastc, fill);
lastc = (char) nchars; lastc = nchars;
for (c = 0; c < nchars; c++) for (c = 0; c < nchars; c++)
if (NativeRegExp.isWord(c)) if (NativeRegExp.isWord((char)c))
matchBit(c, fill); matchBit(c, fill);
continue; continue;
case 'W': case 'W':
if (inrange) if (inrange)
checkRange(lastc, fill); checkRange(lastc, fill);
lastc = (char) nchars; lastc = nchars;
for (c = 0; c < nchars; c++) for (c = 0; c < nchars; c++)
if (!NativeRegExp.isWord(c)) if (!NativeRegExp.isWord((char)c))
matchBit(c, fill); matchBit(c, fill);
continue; continue;
case 's': case 's':
if (inrange) if (inrange)
checkRange(lastc, fill); checkRange(lastc, fill);
lastc = (char) nchars; lastc = nchars;
for (c = 0; c < nchars; c++) for (c = 0; c < nchars; c++)
if (Character.isWhitespace(c)) if (Character.isWhitespace((char)c))
matchBit(c, fill); matchBit(c, fill);
continue; continue;
case 'S': case 'S':
if (inrange) if (inrange)
checkRange(lastc, fill); checkRange(lastc, fill);
lastc = (char) nchars; lastc = nchars;
for (c = 0; c < nchars; c++) for (c = 0; c < nchars; c++)
if (!Character.isWhitespace(c)) if (!Character.isWhitespace((char)c))
matchBit(c, fill); matchBit(c, fill);
continue; continue;
@ -2345,43 +2428,43 @@ class RENode {
case '5': case '5':
case '6': case '6':
case '7': case '7':
n = NativeRegExp.unDigit(c); n = NativeRegExp.unDigit((char)c);
ocp = index - 2; ocp = index - 2;
c = s[index]; c = s[index];
if ('0' <= c && c <= '7') { if ('0' <= c && c <= '7') {
index++; index++;
n = 8 * n + NativeRegExp.unDigit(c); n = 8 * n + NativeRegExp.unDigit((char)c);
c = s[index]; c = s[index];
if ('0' <= c && c <= '7') { if ('0' <= c && c <= '7') {
index++; index++;
i = 8 * n + NativeRegExp.unDigit(c); i = 8 * n + NativeRegExp.unDigit((char)c);
if (i <= 0377) if (i <= 0377)
n = i; n = i;
else else
index--; index--;
} }
} }
c = (char) n; c = n;
break; break;
case 'x': case 'x':
ocp = index; ocp = index;
if (index < s.length && if (index < s.length &&
NativeRegExp.isHex(c = s[index++])) NativeRegExp.isHex((char)(c = s[index++])))
{ {
n = NativeRegExp.unHex(c); n = NativeRegExp.unHex((char)c);
if (index < s.length && if (index < s.length &&
NativeRegExp.isHex(c = s[index++])) NativeRegExp.isHex((char)(c = s[index++])))
{ {
n <<= 4; n <<= 4;
n += NativeRegExp.unHex(c); n += NativeRegExp.unHex((char)c);
} }
} else { } else {
index = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */ index = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */
n = 'x'; n = 'x';
} }
c = (char) n; c = n;
break; break;
case 'u': case 'u':
@ -2394,15 +2477,15 @@ class RENode {
NativeRegExp.unHex(s[index+1])) << 4) + NativeRegExp.unHex(s[index+1])) << 4) +
NativeRegExp.unHex(s[index+2])) << 4) + NativeRegExp.unHex(s[index+2])) << 4) +
NativeRegExp.unHex(s[index+3]); NativeRegExp.unHex(s[index+3]);
c = (char) n; c = n;
index += 4; index += 4;
} }
break; break;
case 'c': case 'c':
c = s[index++]; c = s[index++];
c = Character.toUpperCase(c); c = Character.toUpperCase((char)c);
c = (char) (c ^ 64); // JS_TOCTRL c = (c ^ 64); // JS_TOCTRL
break; break;
} }
} }
@ -2410,10 +2493,10 @@ class RENode {
if (inrange) { if (inrange) {
if (lastc > c) { if (lastc > c) {
throw NativeGlobal.constructError( throw NativeGlobal.constructError(
Context.getCurrentContext(), "RangeError", Context.getCurrentContext(), "RangeError",
ScriptRuntime.getMessage( ScriptRuntime.getMessage(
"msg.bad.range", null), "msg.bad.range", null),
state.scope); state.scope);
} }
inrange = false; inrange = false;
} else { } else {
@ -2438,9 +2521,9 @@ class RENode {
* Must do both upper and lower for Turkish dotless i, * Must do both upper and lower for Turkish dotless i,
* Georgian, etc. * Georgian, etc.
*/ */
char foldc = Character.toUpperCase(lastc); int foldc = Character.toUpperCase((char)lastc);
matchBit(foldc, fill); matchBit(foldc, fill);
foldc = Character.toLowerCase(foldc); foldc = Character.toLowerCase((char)foldc);
matchBit(foldc, fill); matchBit(foldc, fill);
} }
} }
@ -2476,6 +2559,8 @@ class MatchState {
SubString[] parens; /* certain paren substring matches */ SubString[] parens; /* certain paren substring matches */
Scriptable scope; Scriptable scope;
char[] input; char[] input;
boolean goForBroke; /* pursue any match to the end of the re */
int complete; /* match acheived by attempted early completion */
public int noMoreInput() { public int noMoreInput() {
inputExhausted = true; inputExhausted = true;