I changed TokenStream to use internal buffer in place of LineBuffer#startString/getString to store currently read characters for identifiers, strings and numbers. For the price of yet another character coping it allows to simplify code logic especially regarding interaction with LineBuffer.

I also fixed http://bugzilla.mozilla.org/show_bug.cgi?id=157509
This commit is contained in:
igor%mir2.org 2002-07-15 18:49:42 +00:00
Родитель abb61cbd8d
Коммит 307d75e312
1 изменённых файлов: 170 добавлений и 185 удалений

Просмотреть файл

@ -751,82 +751,78 @@ public class TokenStream {
// identifier/keyword/instanceof?
// watch out for starting with a <backslash>
boolean identifierStart;
boolean isUnicodeEscapeStart = false;
if (c == '\\') {
c = in.read();
if (c == 'u')
if (c == 'u') {
identifierStart = true;
isUnicodeEscapeStart = true;
else
stringBufferTop = 0;
} else {
identifierStart = false;
c = '\\';
// always unread the 'u' or whatever, we need
// to start the string below at the <backslash>.
in.unread();
in.unread();
}
} else {
identifierStart = Character.isJavaIdentifierStart((char)c);
if (identifierStart) {
stringBufferTop = 0;
addToString(c);
}
}
if (isUnicodeEscapeStart ||
Character.isJavaIdentifierStart((char)c)) {
in.startString();
if (identifierStart) {
boolean containsEscape = isUnicodeEscapeStart;
do {
c = in.read();
if (c == '\\') {
for (;;) {
if (isUnicodeEscapeStart) {
// strictly speaking we should probably push-back
// all the bad characters if the <backslash>uXXXX
// sequence is malformed. But since there isn't a
// correct context(is there?) for a bad Unicode
// escape sequence in an identifier, we can report
// an error here.
int escapeVal = 0;
for (int i = 0; i != 4; ++i) {
c = in.read();
escapeVal = (escapeVal << 4) | xDigitToInt(c);
// Next check takes care about c < 0 and bad escape
if (escapeVal < 0) { break; }
}
if (escapeVal < 0) {
reportSyntaxError("msg.invalid.escape", null);
return ERROR;
}
addToString(escapeVal);
isUnicodeEscapeStart = false;
} else {
c = in.read();
containsEscape = (c == 'u');
}
} while (Character.isJavaIdentifierPart((char)c));
in.unread();
int result;
String str = in.getString();
// OPT we shouldn't have to make a string (object!) to
// check if it's a keyword.
// strictly speaking we should probably push-back
// all the bad characters if the <backslash>uXXXX
// sequence is malformed. But since there isn't a
// correct context(is there?) for a bad Unicode
// escape sequence after an identifier, we can report
// an error here.
if (containsEscape) {
char ca[] = str.toCharArray();
int L = str.length();
int destination = 0;
for (int i = 0; i != L;) {
c = ca[i];
++i;
if (c == '\\' && i != L && ca[i] == 'u') {
boolean goodEscape = false;
if (i + 4 < L) {
int val = xDigitToInt(ca[i + 1]);
if (val >= 0) {
val = (val << 4) | xDigitToInt(ca[i + 2]);
if (val >= 0) {
val = (val << 4) | xDigitToInt(ca[i + 3]);
if (val >= 0) {
val = (val << 4) | xDigitToInt(ca[i + 4]);
if (val >= 0) {
c = (char)val;
i += 5;
goodEscape = true;
}
}
}
}
}
if (!goodEscape) {
reportSyntaxError("msg.invalid.escape", null);
if (c == '\\') {
c = in.read();
if (c == 'u') {
isUnicodeEscapeStart = true;
containsEscape = true;
} else {
reportSyntaxError("msg.illegal.character", null);
return ERROR;
}
} else {
if (!Character.isJavaIdentifierPart((char)c)) {
break;
}
addToString(c);
}
ca[destination] = (char)c;
++destination;
}
str = new String(ca, 0, destination);
}
else {
in.unread();
String str = getStringFromBuffer();
if (!containsEscape) {
// OPT we shouldn't have to make a string (object!) to
// check if it's a keyword.
// Return the corresponding token if it's a keyword
result = stringToKeyword(str);
int result = stringToKeyword(str);
if (result != EOF) {
if (result != RESERVED) {
return result;
@ -842,36 +838,38 @@ public class TokenStream {
// treat it as name but issue warning
Object[] errArgs = { str };
reportSyntaxWarning("msg.reserved.keyword", errArgs);
result = EOF;
}
}
}
this.string = str;
return NAME;
}
// is it a number?
if (isDigit(c) || (c == '.' && isDigit(in.peek()))) {
stringBufferTop = 0;
int base = 10;
in.startString();
if (c == '0') {
c = in.read();
if (c == 'x' || c == 'X') {
c = in.read();
base = 16;
// restart the string, losing leading 0x
in.startString();
c = in.read();
} else if (isDigit(c)) {
base = 8;
} else {
addToString('0');
}
}
while (0 <= xDigitToInt(c)) {
if (base < 16) {
if (isAlpha(c))
break;
if (base == 16) {
while (0 <= xDigitToInt(c)) {
addToString(c);
c = in.read();
}
} else {
while ('0' <= c && c <= '9') {
/*
* We permit 08 and 09 as decimal numbers, which
* makes our behavior a superset of the ECMA
@ -883,8 +881,9 @@ public class TokenStream {
reportSyntaxWarning("msg.bad.octal.literal", errArgs);
base = 10;
}
addToString(c);
c = in.read();
}
c = in.read();
}
boolean isInteger = true;
@ -893,27 +892,29 @@ public class TokenStream {
isInteger = false;
if (c == '.') {
do {
addToString(c);
c = in.read();
} while (isDigit(c));
}
if (c == 'e' || c == 'E') {
addToString(c);
c = in.read();
if (c == '+' || c == '-') {
addToString(c);
c = in.read();
}
if (!isDigit(c)) {
in.getString(); // throw away string in progress
reportSyntaxError("msg.missing.exponent", null);
return ERROR;
}
do {
addToString(c);
c = in.read();
} while (isDigit(c));
}
}
in.unread();
String numString = in.getString();
String numString = getStringFromBuffer();
double dval;
if (base == 10 && !isInteger) {
@ -941,43 +942,81 @@ public class TokenStream {
// are any escaped characters in the string, we revert to
// building it out of a StringBuffer.
StringBuffer stringBuf = null;
int quoteChar = c;
int val = 0;
stringBufferTop = 0;
c = in.read();
in.startString(); // start after the first "
while(c != quoteChar) {
strLoop: while (c != quoteChar) {
if (c == '\n' || c == EOF_CHAR) {
in.unread();
in.getString(); // throw away the string in progress
reportSyntaxError("msg.unterminated.string.lit", null);
return ERROR;
}
if (c == '\\') {
// We've hit an escaped character; revert to the
// slow method of building a string.
if (stringBuf == null) {
// Don't include the backslash
in.unread();
stringBuf = new StringBuffer(in.getString());
in.read();
}
// We've hit an escaped character
switch (c = in.read()) {
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\u000B'; break;
// \v a late addition to the ECMA spec.
// '\v' doesn't seem to be valid Java.
c = in.read();
switch (c) {
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default:
if (isDigit(c) && c < '8') {
// \v a late addition to the ECMA spec,
// it is not in Java, so use 0xb
case 'v': c = 0xb; break;
case 'u': {
/*
* Get 4 hex digits; if the u escape is not
* followed by 4 hex digits, use 'u' + the literal
* character sequence that follows.
*/
int escapeStart = stringBufferTop;
addToString('u');
int escapeVal = 0;
for (int i = 0; i != 4; ++i) {
c = in.read();
escapeVal = (escapeVal << 4) | xDigitToInt(c);
if (escapeVal < 0) {
continue strLoop;
}
addToString(c);
}
// prepare for replace of stored 'u' sequence
// by escape value
stringBufferTop = escapeStart;
c = escapeVal;
} break;
case 'x': {
/* Get 2 hex digits, defaulting to 'x' + literal
* sequence, as above.
*/
c = in.read();
int escapeVal = xDigitToInt(c);
if (escapeVal < 0) {
addToString('x');
continue strLoop;
} else {
int c1 = c;
c = in.read();
escapeVal = (escapeVal << 4) | xDigitToInt(c);
if (escapeVal < 0) {
addToString('x');
addToString(c1);
continue strLoop;
} else {
// got 2 hex digits
c = escapeVal;
}
}
} break;
default: if (isDigit(c) && c < '8') {
val = c - '0';
c = in.read();
if (isDigit(c) && c < '8') {
@ -990,89 +1029,19 @@ public class TokenStream {
}
in.unread();
if (val > 0377) {
reportSyntaxError("msg.oct.esc.too.large", null);
reportSyntaxError("msg.oct.esc.too.large",
null);
return ERROR;
}
c = val;
} else if (c == 'u') {
/*
* Get 4 hex digits; if the u escape is not
* followed by 4 hex digits, use 'u' + the literal
* character sequence that follows. Do some manual
* match (OK because we're in a string) to avoid
* multi-char match on the underlying stream.
*/
int c1 = in.read();
c = xDigitToInt(c1);
if (c < 0) {
in.unread();
c = 'u';
} else {
int c2 = in.read();
c = (c << 4) | xDigitToInt(c2);
if (c < 0) {
in.unread();
stringBuf.append('u');
c = c1;
} else {
int c3 = in.read();
c = (c << 4) | xDigitToInt(c3);
if (c < 0) {
in.unread();
stringBuf.append('u');
stringBuf.append((char)c1);
c = c2;
} else {
int c4 = in.read();
c = (c << 4) | xDigitToInt(c4);
if (c < 0) {
in.unread();
stringBuf.append('u');
stringBuf.append((char)c1);
stringBuf.append((char)c2);
c = c3;
} else {
// got 4 hex digits! Woo Hoo!
}
}
}
}
} else if (c == 'x') {
/* Get 2 hex digits, defaulting to 'x' + literal
* sequence, as above.
*/
int c1 = in.read();
c = xDigitToInt(c1);
if (c < 0) {
in.unread();
c = 'x';
} else {
int c2 = in.read();
c = (c << 4) | xDigitToInt(c2);
if (c < 0) {
in.unread();
stringBuf.append('x');
c = c1;
} else {
// got 2 hex digits
}
}
}
}
}
if (stringBuf != null)
stringBuf.append((char) c);
addToString(c);
c = in.read();
}
if (stringBuf != null)
this.string = stringBuf.toString();
else {
in.unread(); // miss the trailing "
this.string = in.getString();
in.read();
}
this.string = getStringFromBuffer();
return STRING;
}
@ -1234,10 +1203,7 @@ public class TokenStream {
// is it a regexp?
if ((flags & TSF_REGEXP) != 0) {
// We don't try to use the in.startString/in.getString
// approach, because escaped characters (which break it)
// seem likely to be common.
StringBuffer re = new StringBuffer();
stringBufferTop = 0;
while ((c = in.read()) != '/') {
if (c == '\n' || c == EOF_CHAR) {
in.unread();
@ -1245,21 +1211,21 @@ public class TokenStream {
return ERROR;
}
if (c == '\\') {
re.append((char) c);
addToString(c);
c = in.read();
}
re.append((char) c);
addToString(c);
}
int reEnd = stringBufferTop;
StringBuffer flagsBuf = new StringBuffer();
while (true) {
if (in.match('g'))
flagsBuf.append('g');
addToString('g');
else if (in.match('i'))
flagsBuf.append('i');
addToString('i');
else if (in.match('m'))
flagsBuf.append('m');
addToString('m');
else
break;
}
@ -1269,8 +1235,9 @@ public class TokenStream {
return ERROR;
}
this.string = re.toString();
this.regExpFlags = flagsBuf.toString();
this.string = new String(stringBuffer, 0, reEnd);
this.regExpFlags = new String(stringBuffer, reEnd,
stringBufferTop - reEnd);
return OBJECT;
}
@ -1322,6 +1289,19 @@ public class TokenStream {
}
}
private String getStringFromBuffer() {
return new String(stringBuffer, 0, stringBufferTop);
}
private void addToString(int c) {
if (stringBufferTop == stringBuffer.length) {
char[] tmp = new char[stringBuffer.length * 2];
System.arraycopy(stringBuffer, 0, tmp, 0, stringBufferTop);
stringBuffer = tmp;
}
stringBuffer[stringBufferTop++] = (char)c;
}
public void reportSyntaxError(String messageProperty, Object[] args) {
String message = Context.getMessage(messageProperty, args);
if (scope != null) {
@ -1377,4 +1357,9 @@ public class TokenStream {
// code.
private String string = "";
private double number;
private char[] stringBuffer = new char[128];
private int stringBufferTop;
private static final boolean checkSelf = Context.check && true;
}