gecko-dev/js/js2/java/TokenStream.java

1300 строки
42 KiB
Java

/* -*- Mode: Java; tab-width: 8 -*-
* Copyright © 1997, 1998 Netscape Communications Corporation,
* All Rights Reserved.
*/
import java.io.*;
/**
* This class implements the JavaScript scanner.
*
* It is based on the C source files jsscan.c and jsscan.h
* in the jsref package.
*
* @see com.netscape.javascript.Parser
*
* @author Mike McCabe
* @author Brendan Eich
*/
public class TokenStream {
/*
* JSTokenStream flags, mirroring those in jsscan.h. These are used
* by the parser to change/check the state of the scanner.
*/
public final static int
TSF_ERROR = 0x0001, // fatal error while scanning
// TSF_EOF = 0x0002, // hit end of file
TSF_NEWLINES = 0x0004, // tokenize newlines
TSF_FUNCTION = 0x0008, // scanning inside function body
TSF_RETURN_EXPR = 0x0010, // function has 'return expr;'
TSF_RETURN_VOID = 0x0020, // function has 'return;'
// TSF_INTERACTIVE = 0x0040, // interactive parsing mode
// TSF_COMMAND = 0x0080, // command parsing mode
// TSF_LOOKAHEAD = 0x0100, // looking ahead for a token
TSF_REGEXP = 0x0200; // looking for a regular expression
/*
* For chars - because we need something out-of-range
* to check. (And checking EOF by exception is annoying.)
* Note distinction from EOF token type!
*/
private final static int
EOF_CHAR = -1;
/**
* Token types. These values correspond to JSTokenType values in
* jsscan.c.
*/
public final static int
// start enum
ERROR = -1, // well-known as the only code < EOF
EOF = 0, // end of file token - (not EOF_CHAR)
EOL = 1, // end of line
// Beginning here are interpreter bytecodes. Their values
// must not exceed 127.
POPV = 2,
ENTERWITH = 3,
LEAVEWITH = 4,
RETURN = 5,
GOTO = 6,
IFEQ = 7,
IFNE = 8,
DUP = 9,
SETNAME = 10,
BITOR = 11,
BITXOR = 12,
BITAND = 13,
EQ = 14,
NE = 15,
LT = 16,
LE = 17,
GT = 18,
GE = 19,
LSH = 20,
RSH = 21,
URSH = 22,
ADD = 23,
SUB = 24,
MUL = 25,
DIV = 26,
MOD = 27,
BITNOT = 28,
NEG = 29,
NEW = 30,
DELPROP = 31,
TYPEOF = 32,
NAMEINC = 33,
PROPINC = 34,
ELEMINC = 35,
NAMEDEC = 36,
PROPDEC = 37,
ELEMDEC = 38,
GETPROP = 39,
SETPROP = 40,
GETELEM = 41,
SETELEM = 42,
CALL = 43,
NAME = 44,
NUMBER = 45,
STRING = 46,
ZERO = 47,
ONE = 48,
NULL = 49,
THIS = 50,
FALSE = 51,
TRUE = 52,
SHEQ = 53, // shallow equality (===)
SHNE = 54, // shallow inequality (!==)
CLOSURE = 55,
OBJECT = 56,
POP = 57,
POS = 58,
VARINC = 59,
VARDEC = 60,
BINDNAME = 61,
THROW = 62,
IN = 63,
INSTANCEOF = 64,
GOSUB = 65,
RETSUB = 66,
CALLSPECIAL = 67,
GETTHIS = 68,
NEWTEMP = 69,
USETEMP = 70,
GETBASE = 71,
GETVAR = 72,
SETVAR = 73,
UNDEFINED = 74,
TRY = 75,
ENDTRY = 76,
NEWSCOPE = 77,
TYPEOFNAME = 78,
ENUMINIT = 79,
ENUMNEXT = 80,
GETPROTO = 81,
GETPARENT = 82,
SETPROTO = 83,
SETPARENT = 84,
SCOPE = 85,
GETSCOPEPARENT = 86,
JTHROW = 87,
// End of interpreter bytecodes
SEMI = 88, // semicolon
LB = 89, // left and right brackets
RB = 90,
LC = 91, // left and right curlies (braces)
RC = 92,
LP = 93, // left and right parentheses
RP = 94,
COMMA = 95, // comma operator
ASSIGN = 96, // assignment ops (= += -= etc.)
HOOK = 97, // conditional (?:)
COLON = 98,
OR = 99, // logical or (||)
AND = 100, // logical and (&&)
EQOP = 101, // equality ops (== !=)
RELOP = 102, // relational ops (< <= > >=)
SHOP = 103, // shift ops (<< >> >>>)
UNARYOP = 104, // unary prefix operator
INC = 105, // increment/decrement (++ --)
DEC = 106,
DOT = 107, // member operator (.)
PRIMARY = 108, // true, false, null, this, super
FUNCTION = 109, // function keyword
EXPORT = 110, // export keyword
IMPORT = 111, // import keyword
IF = 112, // if keyword
ELSE = 113, // else keyword
SWITCH = 114, // switch keyword
CASE = 115, // case keyword
DEFAULT = 116, // default keyword
WHILE = 117, // while keyword
DO = 118, // do keyword
FOR = 119, // for keyword
BREAK = 120, // break keyword
CONTINUE = 121, // continue keyword
VAR = 122, // var keyword
WITH = 123, // with keyword
CATCH = 124, // catch keyword
FINALLY = 125, // finally keyword
RESERVED = 126, // reserved keywords
/** Added by Mike - these are JSOPs in the jsref, but I
* don't have them yet in the java implementation...
* so they go here. Also whatever I needed.
* Most of these go in the 'op' field when returning
* more general token types, eg. 'DIV' as the op of 'ASSIGN'.
*/
NOP = 127, // NOP
NOT = 128, // etc.
PRE = 129, // for INC, DEC nodes.
POST = 130,
/**
* For JSOPs associated with keywords...
* eg. op = THIS; token = PRIMARY
*/
VOID = 131,
/* types used for the parse tree - these never get returned
* by the scanner.
*/
BLOCK = 132, // statement block
ARRAYLIT = 133, // array literal
OBJLIT = 134, // object literal
LABEL = 135, // label
TARGET = 136,
LOOP = 137,
ENUMDONE = 138,
EXPRSTMT = 139,
PARENT = 140,
CONVERT = 141,
JSR = 142,
NEWLOCAL = 143,
USELOCAL = 144,
SCRIPT = 145; // top-level node for entire script
// end enum
/* for mapping int token types to printable strings.
* make sure to add 1 to index before using these!
*/
private static String names[];
private static void checkNames() {
if (Context.printTrees && names == null) {
String[] a = {
"error",
"eof",
"eol",
"popv",
"enterwith",
"leavewith",
"return",
"goto",
"ifeq",
"ifne",
"dup",
"setname",
"bitor",
"bitxor",
"bitand",
"eq",
"ne",
"lt",
"le",
"gt",
"ge",
"lsh",
"rsh",
"ursh",
"add",
"sub",
"mul",
"div",
"mod",
"bitnot",
"neg",
"new",
"delprop",
"typeof",
"nameinc",
"propinc",
"eleminc",
"namedec",
"propdec",
"elemdec",
"getprop",
"setprop",
"getelem",
"setelem",
"call",
"name",
"number",
"string",
"zero",
"one",
"null",
"this",
"false",
"true",
"sheq",
"shne",
"closure",
"object",
"pop",
"pos",
"varinc",
"vardec",
"bindname",
"throw",
"in",
"instanceof",
"gosub",
"retsub",
"callspecial",
"getthis",
"newtemp",
"usetemp",
"getbase",
"getvar",
"setvar",
"undefined",
"try",
"endtry",
"newscope",
"typeofname",
"enuminit",
"enumnext",
"getproto",
"getparent",
"setproto",
"setparent",
"scope",
"getscopeparent",
"jthrow",
"semi",
"lb",
"rb",
"lc",
"rc",
"lp",
"rp",
"comma",
"assign",
"hook",
"colon",
"or",
"and",
"eqop",
"relop",
"shop",
"unaryop",
"inc",
"dec",
"dot",
"primary",
"function",
"export",
"import",
"if",
"else",
"switch",
"case",
"default",
"while",
"do",
"for",
"break",
"continue",
"var",
"with",
"catch",
"finally",
"reserved",
"nop",
"not",
"pre",
"post",
"void",
"block",
"arraylit",
"objlit",
"label",
"target",
"loop",
"enumdone",
"exprstmt",
"parent",
"convert",
"jsr",
"newlocal",
"uselocal",
"script"
};
names = a;
}
}
/* This function uses the cached op, string and number fields in
* TokenStream; if getToken has been called since the passed token
* was scanned, the op or string printed may be incorrect.
*/
public String tokenToString(int token) {
if (Context.printTrees) {
checkNames();
if (token + 1 >= names.length)
return null;
if (token == UNARYOP ||
token == ASSIGN ||
token == PRIMARY ||
token == EQOP ||
token == SHOP ||
token == RELOP) {
return names[token + 1] + " " + names[this.op + 1];
}
if (token == STRING || token == OBJECT || token == NAME)
return names[token + 1] + " `" + this.string + "'";
if (token == NUMBER)
return "NUMBER " + this.number;
return names[token + 1];
}
return "";
}
public static String tokenToName(int type) {
checkNames();
return names == null ? "" : names[type + 1];
}
private static java.util.Hashtable keywords;
static {
String[] strings = {
"break",
"case",
"continue",
"default",
"delete",
"do",
"else",
"export",
"false",
"for",
"function",
"if",
"in",
"new",
"null",
"return",
"switch",
"this",
"true",
"typeof",
"var",
"void",
"while",
"with",
// the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c
"abstract",
"boolean",
"byte",
"catch",
"char",
"class",
"const",
"debugger",
"double",
"enum",
"extends",
"final",
"finally",
"float",
"goto",
"implements",
"import",
"instanceof",
"int",
"interface",
"long",
"native",
"package",
"private",
"protected",
"public",
"short",
"static",
"super",
"synchronized",
"throw",
"throws",
"transient",
"try",
"volatile"
};
int[] values = {
BREAK, // break
CASE, // case
CONTINUE, // continue
DEFAULT, // default
DELPROP, // delete
DO, // do
ELSE, // else
EXPORT, // export
PRIMARY | (FALSE << 8), // false
FOR, // for
FUNCTION, // function
IF, // if
RELOP | (IN << 8), // in
NEW, // new
PRIMARY | (NULL << 8), // null
RETURN, // return
SWITCH, // switch
PRIMARY | (THIS << 8), // this
PRIMARY | (TRUE << 8), // true
UNARYOP | (TYPEOF << 8), // typeof
VAR, // var
UNARYOP | (VOID << 8), // void
WHILE, // while
WITH, // with
RESERVED, // abstract
RESERVED, // boolean
RESERVED, // byte
CATCH, // catch
RESERVED, // char
RESERVED, // class
RESERVED, // const
RESERVED, // debugger
RESERVED, // double
RESERVED, // enum
RESERVED, // extends
RESERVED, // final
FINALLY, // finally
RESERVED, // float
RESERVED, // goto
RESERVED, // implements
IMPORT, // import
RELOP | (INSTANCEOF << 8), // instanceof
RESERVED, // int
RESERVED, // interface
RESERVED, // long
RESERVED, // native
RESERVED, // package
RESERVED, // private
RESERVED, // protected
RESERVED, // public
RESERVED, // short
RESERVED, // static
PRIMARY | (NOP << 8), // super
RESERVED, // synchronized
THROW, // throw
RESERVED, // throws
RESERVED, // transient
TRY, // try
RESERVED // volatile
};
keywords = new java.util.Hashtable(strings.length);
Integer res = new Integer(RESERVED);
for (int i=0; i < strings.length; i++)
keywords.put(strings[i], values[i] == RESERVED
? res
: new Integer(values[i]));
}
private int stringToKeyword(String name) {
Integer result = (Integer) keywords.get(name);
if (result == null)
return EOF;
int x = result.intValue();
this.op = x >> 8;
return x & 0xff;
}
public TokenStream(Reader in,
String sourceName, int lineno)
{
this.in = new LineBuffer(in, lineno);
this.pushbackToken = EOF;
this.sourceName = sourceName;
flags = 0;
}
/* return and pop the token from the stream if it matches...
* otherwise return null
*/
public boolean matchToken(int toMatch) throws IOException {
int token = getToken();
if (token == toMatch)
return true;
// didn't match, push back token
tokenno--;
this.pushbackToken = token;
return false;
}
public void clearPushback() {
this.pushbackToken = EOF;
}
public void ungetToken(int tt) {
if (this.pushbackToken != EOF && tt != ERROR) {
Object[] errArgs = { tokenToString(tt),
tokenToString(this.pushbackToken) };
String message = Context.getMessage("msg.token.replaces.pushback",
errArgs);
throw new RuntimeException(message);
}
this.pushbackToken = tt;
tokenno--;
}
public int peekToken() throws IOException {
int result = getToken();
this.pushbackToken = result;
tokenno--;
return result;
}
public int peekTokenSameLine() throws IOException {
int result;
flags |= TSF_NEWLINES; // SCAN_NEWLINES from jsscan.h
result = peekToken();
flags &= ~TSF_NEWLINES; // HIDE_NEWLINES from jsscan.h
if (this.pushbackToken == EOL)
this.pushbackToken = EOF;
return result;
}
/* helper functions... these might be better inlined.
* These are needed because the java.lang.Character.isWhatever
* functions accept unicode, and we want to limit
* identifiers to ASCII.
*/
protected static boolean isJSIdentifier(String s) {
int length = s.length();
if (length == 0 || !isJSIdentifierStart(s.charAt(0)))
return false;
for (int i=1; i<length; i++) {
if (!isJSIdentifierPart(s.charAt(i)))
return false;
}
return true;
}
protected static boolean isJSIdentifierStart(int c) {
return ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| c == '_'
|| c == '$');
}
protected static boolean isJSIdentifierPart(int c) {
return ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
|| c == '_'
|| c == '$');
}
private static boolean isAlpha(int c) {
return ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z'));
}
static boolean isDigit(int c) {
return (c >= '0' && c <= '9');
}
static boolean isXDigit(int c) {
return ((c >= '0' && c <= '9')
|| (c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F'));
}
/* As defined in ECMA. jsscan.c uses C isspace() (which allows
* \v, I think.) note that code in in.read() implicitly accepts
* '\r' == \u000D as well.
*/
static boolean isJSSpace(int c) {
return (c == '\u0009' || c == '\u000B'
|| c == '\u000C' || c == '\u0020');
}
public int getToken() throws IOException {
int c;
tokenno++;
// If there was a fatal error, keep returning TOK_ERROR.
if ((flags & TSF_ERROR) != 0)
return ERROR;
// Check for pushed-back token
if (this.pushbackToken != EOF) {
int result = this.pushbackToken;
this.pushbackToken = EOF;
return result;
}
// Eat whitespace, possibly sensitive to newlines.
do {
c = in.read();
if (c == '\n')
if ((flags & TSF_NEWLINES) != 0)
break;
} while (isJSSpace(c) || c == '\n');
if (c == EOF_CHAR)
return EOF;
// identifier/keyword/instanceof?
if (isJSIdentifierStart(c)) {
in.startString();
do {
c = in.read();
} while (isJSIdentifierPart(c));
in.unread();
int result;
String str = in.getString();
// OPT we shouldn't have to make a string (object!) to
// check if it's a keyword.
// Return the corresponding token if it's a keyword
if ((result = stringToKeyword(str)) != EOF) {
return result;
}
this.string = str;
return NAME;
}
// is it a number?
if (isDigit(c) || (c == '.' && isDigit(in.peek()))) {
int base = 10;
in.startString();
double dval = ScriptRuntime.NaN;
long longval = 0;
boolean isInteger = true;
if (c == '0') {
c = in.read();
if (c == 'x' || c == 'X') {
c = in.read();
base = 16;
// restart the string, losing leading 0x
in.startString();
} else if (isDigit(c)) {
if (c < '8') {
base = 8;
// Restart the string, losing the leading 0
in.startString();
} else {
/* Checking against c < '8' is non-ECMA, but
* is required to support legacy code; we've
* supported it in the past, and it's likely
* that "08" and "09" are in use in code
* having to do with dates. So we need to
* support it, which makes our behavior a
* superset of ECMA in this area. We raise a
* warning if a non-octal digit is
* encountered, then proceed as if it were
* decimal.
*/
Object[] errArgs = { String.valueOf((char)c) };
Context.reportWarning
(Context.getMessage("msg.bad.octal.literal",
errArgs),
getSourceName(), in.getLineno(),
getLine(), getOffset());
// implicitly retain the leading 0
}
} else {
// implicitly retain the leading 0
}
}
while (isXDigit(c)) {
if (base < 16 && (isAlpha(c)
|| (base == 8 && c >= '8'))) {
break;
}
c = in.read();
}
if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
isInteger = false;
if (c == '.') {
do {
c = in.read();
} while (isDigit(c));
}
if (c == 'e' || c == 'E') {
c = in.read();
if (c == '+' || c == '-') {
c = in.read();
}
if (!isDigit(c)) {
in.getString(); // throw away string in progress
reportError("msg.missing.exponent", null);
return ERROR;
}
do {
c = in.read();
} while (isDigit(c));
}
}
in.unread();
String numString = in.getString();
if (base == 10 && !isInteger) {
try {
// Use Java conversion to number from string...
dval = (Double.valueOf(numString)).doubleValue();
}
catch (NumberFormatException ex) {
Object[] errArgs = { ex.getMessage() };
reportError("msg.caught.nfe", errArgs);
return ERROR;
}
} else {
dval = ScriptRuntime.stringToNumber(numString, 0, base);
longval = (long) dval;
// is it an integral fits-in-a-long value?
if (longval != dval)
isInteger = false;
}
if (!isInteger) {
/* Can't handle floats right now, because postfix INC/DEC
generate Doubles, but I would generate a Float through this
path, and it causes a stack mismatch. FIXME (MS)
if (Float.MIN_VALUE <= dval && dval <= Float.MAX_VALUE)
this.number = new Xloat((float) dval);
else
*/
this.number = new Double(dval);
} else {
// We generate the smallest possible type here
if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE)
this.number = new Byte((byte)longval);
else if (Short.MIN_VALUE <= longval &&
longval <= Short.MAX_VALUE)
this.number = new Short((short)longval);
else if (Integer.MIN_VALUE <= longval &&
longval <= Integer.MAX_VALUE)
this.number = new Integer((int)longval);
else {
// May lose some precision here, but that's the
// appropriate semantics.
this.number = new Double(longval);
}
}
return NUMBER;
}
// is it a string?
if (c == '"' || c == '\'') {
// We attempt to accumulate a string the fast way, by
// building it directly out of the reader. But if there
// are any escaped characters in the string, we revert to
// building it out of a StringBuffer.
StringBuffer stringBuf = null;
int quoteChar = c;
int val = 0;
c = in.read();
in.startString(); // start after the first "
while(c != quoteChar) {
if (c == '\n' || c == EOF_CHAR) {
in.unread();
in.getString(); // throw away the string in progress
reportError("msg.unterminated.string.lit", null);
return ERROR;
}
if (c == '\\') {
// We've hit an escaped character; revert to the
// slow method of building a string.
if (stringBuf == null) {
// Don't include the backslash
in.unread();
stringBuf = new StringBuffer(in.getString());
in.read();
}
switch (c = in.read()) {
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\u000B'; break;
// \v a late addition to the ECMA spec.
// '\v' doesn't seem to be valid Java.
default:
if (isDigit(c) && c < '8') {
val = c - '0';
c = in.read();
if (isDigit(c) && c < '8') {
val = 8 * val + c - '0';
c = in.read();
if (isDigit(c) && c < '8') {
val = 8 * val + c - '0';
c = in.read();
}
}
in.unread();
if (val > 0377) {
reportError("msg.oct.esc.too.large", null);
return ERROR;
}
c = val;
} else if (c == 'u') {
/*
* Get 4 hex digits; if the u escape is not
* followed by 4 hex digits, use 'u' + the literal
* character sequence that follows. Do some manual
* match (OK because we're in a string) to avoid
* multi-char match on the underlying stream.
*/
int c1, c2, c3, c4;
c1 = in.read();
if (!isXDigit(c1)) {
in.unread();
c = 'u';
} else {
val = Character.digit((char) c1, 16);
c2 = in.read();
if (!isXDigit(c2)) {
in.unread();
stringBuf.append('u');
c = c1;
} else {
val = 16 * val
+ Character.digit((char) c2, 16);
c3 = in.read();
if (!isXDigit(c3)) {
in.unread();
stringBuf.append('u');
stringBuf.append((char)c1);
c = c2;
} else {
val = 16 * val
+ Character.digit((char) c3, 16);
c4 = in.read();
if (!isXDigit(c4)) {
in.unread();
stringBuf.append('u');
stringBuf.append((char)c1);
stringBuf.append((char)c2);
c = c3;
} else {
// got 4 hex digits! Woo Hoo!
val = 16 * val
+ Character.digit((char) c4, 16);
c = val;
}
}
}
}
} else if (c == 'x') {
/* Get 2 hex digits, defaulting to 'x' + literal
* sequence, as above.
*/
int c1, c2;
c1 = in.read();
if (!isXDigit(c1)) {
in.unread();
c = 'x';
} else {
val = Character.digit((char) c1, 16);
c2 = in.read();
if (!isXDigit(c2)) {
in.unread();
stringBuf.append('x');
c = c1;
} else {
// got 2 hex digits
val = 16 * val
+ Character.digit((char) c2, 16);
c = val;
}
}
}
}
}
if (stringBuf != null)
stringBuf.append((char) c);
c = in.read();
}
if (stringBuf != null)
this.string = stringBuf.toString();
else {
in.unread(); // miss the trailing "
this.string = in.getString();
in.read();
}
return STRING;
}
switch (c)
{
case '\n': return EOL;
case ';': return SEMI;
case '[': return LB;
case ']': return RB;
case '{': return LC;
case '}': return RC;
case '(': return LP;
case ')': return RP;
case ',': return COMMA;
case '?': return HOOK;
case ':': return COLON;
case '.': return DOT;
case '|':
if (in.match('|')) {
return OR;
} else if (in.match('=')) {
this.op = BITOR;
return ASSIGN;
} else {
return BITOR;
}
case '^':
if (in.match('=')) {
this.op = BITXOR;
return ASSIGN;
} else {
return BITXOR;
}
case '&':
if (in.match('&')) {
return AND;
} else if (in.match('=')) {
this.op = BITAND;
return ASSIGN;
} else {
return BITAND;
}
case '=':
if (in.match('=')) {
if (in.match('='))
this.op = SHEQ;
else
this.op = EQ;
return EQOP;
} else {
this.op = NOP;
return ASSIGN;
}
case '!':
if (in.match('=')) {
if (in.match('='))
this.op = SHNE;
else
this.op = NE;
return EQOP;
} else {
this.op = NOT;
return UNARYOP;
}
case '<':
/* NB:treat HTML begin-comment as comment-till-eol */
if (in.match('!')) {
if (in.match('-')) {
if (in.match('-')) {
while ((c = in.read()) != EOF_CHAR && c != '\n')
/* skip to end of line */;
in.unread();
return getToken(); // in place of 'goto retry'
}
in.unread();
}
in.unread();
}
if (in.match('<')) {
if (in.match('=')) {
this.op = LSH;
return ASSIGN;
} else {
this.op = LSH;
return SHOP;
}
} else {
if (in.match('=')) {
this.op = LE;
return RELOP;
} else {
this.op = LT;
return RELOP;
}
}
case '>':
if (in.match('>')) {
if (in.match('>')) {
if (in.match('=')) {
this.op = URSH;
return ASSIGN;
} else {
this.op = URSH;
return SHOP;
}
} else {
if (in.match('=')) {
this.op = RSH;
return ASSIGN;
} else {
this.op = RSH;
return SHOP;
}
}
} else {
if (in.match('=')) {
this.op = GE;
return RELOP;
} else {
this.op = GT;
return RELOP;
}
}
case '*':
if (in.match('=')) {
this.op = MUL;
return ASSIGN;
} else {
return MUL;
}
case '/':
// is it a // comment?
if (in.match('/')) {
while ((c = in.read()) != EOF_CHAR && c != '\n')
/* skip to end of line */;
in.unread();
return getToken();
}
if (in.match('*')) {
while ((c = in.read()) != -1
&& !(c == '*' && in.match('/'))) {
if (c == '\n') {
} else if (c == '/' && in.match('*')) {
if (in.match('/'))
return getToken();
reportError("msg.nested.comment", null);
return ERROR;
}
}
if (c == EOF_CHAR) {
reportError("msg.unterminated.comment", null);
return ERROR;
}
return getToken(); // `goto retry'
}
// is it a regexp?
if ((flags & TSF_REGEXP) != 0) {
// We don't try to use the in.startString/in.getString
// approach, because escaped characters (which break it)
// seem likely to be common.
StringBuffer re = new StringBuffer();
while ((c = in.read()) != '/') {
if (c == '\n' || c == EOF_CHAR) {
in.unread();
reportError("msg.unterminated.re.lit", null);
return ERROR;
}
if (c == '\\') {
re.append((char) c);
c = in.read();
}
re.append((char) c);
}
StringBuffer flagsBuf = new StringBuffer();
while (true) {
if (in.match('g'))
flagsBuf.append('g');
else if (in.match('i'))
flagsBuf.append('i');
else if (in.match('m'))
flagsBuf.append('m');
else
break;
}
if (isAlpha(in.peek())) {
reportError("msg.invalid.re.flag", null);
return ERROR;
}
this.string = re.toString();
this.regExpFlags = flagsBuf.toString();
return OBJECT;
}
if (in.match('=')) {
this.op = DIV;
return ASSIGN;
} else {
return DIV;
}
case '%':
this.op = MOD;
if (in.match('=')) {
return ASSIGN;
} else {
return MOD;
}
case '~':
this.op = BITNOT;
return UNARYOP;
case '+':
case '-':
if (in.match('=')) {
if (c == '+') {
this.op = ADD;
return ASSIGN;
} else {
this.op = SUB;
return ASSIGN;
}
} else if (in.match((char) c)) {
if (c == '+') {
return INC;
} else {
return DEC;
}
} else if (c == '-') {
return SUB;
} else {
return ADD;
}
default:
reportError("msg.illegal.character", null);
return ERROR;
}
}
private void reportError(String messageProperty, Object[] args) {
flags |= TSF_ERROR;
String message = Context.getMessage(messageProperty, args);
Context.reportError(message, getSourceName(),
getLineno(), getLine(), getOffset());
}
public String getSourceName() { return sourceName; }
public int getLineno() { return in.getLineno(); }
public int getOp() { return op; }
public String getString() { return string; }
public Number getNumber() { return number; }
public String getLine() { return in.getLine(); }
public int getOffset() { return in.getOffset(); }
public int getTokenno() { return tokenno; }
public boolean eof() { return in.eof(); }
// instance variables
private LineBuffer in;
/* for TSF_REGEXP, etc.
* should this be manipulated by gettor/settor functions?
* should it be passed to getToken();
*/
public int flags;
public String regExpFlags;
private String sourceName;
private String line;
private int pushbackToken;
private int tokenno;
private int op;
// Set this to an inital non-null value so that the Parser has
// something to retrieve even if an error has occured and no
// string is found. Fosters one class of error, but saves lots of
// code.
private String string = "";
private Number number;
}