зеркало из https://github.com/mozilla/gecko-dev.git
1300 строки
42 KiB
Java
1300 строки
42 KiB
Java
/* -*- Mode: Java; tab-width: 8 -*-
|
|
* Copyright © 1997, 1998 Netscape Communications Corporation,
|
|
* All Rights Reserved.
|
|
*/
|
|
|
|
import java.io.*;
|
|
|
|
/**
|
|
* This class implements the JavaScript scanner.
|
|
*
|
|
* It is based on the C source files jsscan.c and jsscan.h
|
|
* in the jsref package.
|
|
*
|
|
* @see com.netscape.javascript.Parser
|
|
*
|
|
* @author Mike McCabe
|
|
* @author Brendan Eich
|
|
*/
|
|
|
|
public class TokenStream {
|
|
/*
|
|
* JSTokenStream flags, mirroring those in jsscan.h. These are used
|
|
* by the parser to change/check the state of the scanner.
|
|
*/
|
|
|
|
public final static int
|
|
TSF_ERROR = 0x0001, // fatal error while scanning
|
|
// TSF_EOF = 0x0002, // hit end of file
|
|
TSF_NEWLINES = 0x0004, // tokenize newlines
|
|
TSF_FUNCTION = 0x0008, // scanning inside function body
|
|
TSF_RETURN_EXPR = 0x0010, // function has 'return expr;'
|
|
TSF_RETURN_VOID = 0x0020, // function has 'return;'
|
|
// TSF_INTERACTIVE = 0x0040, // interactive parsing mode
|
|
// TSF_COMMAND = 0x0080, // command parsing mode
|
|
// TSF_LOOKAHEAD = 0x0100, // looking ahead for a token
|
|
TSF_REGEXP = 0x0200; // looking for a regular expression
|
|
|
|
/*
|
|
* For chars - because we need something out-of-range
|
|
* to check. (And checking EOF by exception is annoying.)
|
|
* Note distinction from EOF token type!
|
|
*/
|
|
private final static int
|
|
EOF_CHAR = -1;
|
|
|
|
/**
|
|
* Token types. These values correspond to JSTokenType values in
|
|
* jsscan.c.
|
|
*/
|
|
|
|
public final static int
|
|
// start enum
|
|
ERROR = -1, // well-known as the only code < EOF
|
|
EOF = 0, // end of file token - (not EOF_CHAR)
|
|
EOL = 1, // end of line
|
|
// Beginning here are interpreter bytecodes. Their values
|
|
// must not exceed 127.
|
|
POPV = 2,
|
|
ENTERWITH = 3,
|
|
LEAVEWITH = 4,
|
|
RETURN = 5,
|
|
GOTO = 6,
|
|
IFEQ = 7,
|
|
IFNE = 8,
|
|
DUP = 9,
|
|
SETNAME = 10,
|
|
BITOR = 11,
|
|
BITXOR = 12,
|
|
BITAND = 13,
|
|
EQ = 14,
|
|
NE = 15,
|
|
LT = 16,
|
|
LE = 17,
|
|
GT = 18,
|
|
GE = 19,
|
|
LSH = 20,
|
|
RSH = 21,
|
|
URSH = 22,
|
|
ADD = 23,
|
|
SUB = 24,
|
|
MUL = 25,
|
|
DIV = 26,
|
|
MOD = 27,
|
|
BITNOT = 28,
|
|
NEG = 29,
|
|
NEW = 30,
|
|
DELPROP = 31,
|
|
TYPEOF = 32,
|
|
NAMEINC = 33,
|
|
PROPINC = 34,
|
|
ELEMINC = 35,
|
|
NAMEDEC = 36,
|
|
PROPDEC = 37,
|
|
ELEMDEC = 38,
|
|
GETPROP = 39,
|
|
SETPROP = 40,
|
|
GETELEM = 41,
|
|
SETELEM = 42,
|
|
CALL = 43,
|
|
NAME = 44,
|
|
NUMBER = 45,
|
|
STRING = 46,
|
|
ZERO = 47,
|
|
ONE = 48,
|
|
NULL = 49,
|
|
THIS = 50,
|
|
FALSE = 51,
|
|
TRUE = 52,
|
|
SHEQ = 53, // shallow equality (===)
|
|
SHNE = 54, // shallow inequality (!==)
|
|
CLOSURE = 55,
|
|
OBJECT = 56,
|
|
POP = 57,
|
|
POS = 58,
|
|
VARINC = 59,
|
|
VARDEC = 60,
|
|
BINDNAME = 61,
|
|
THROW = 62,
|
|
IN = 63,
|
|
INSTANCEOF = 64,
|
|
GOSUB = 65,
|
|
RETSUB = 66,
|
|
CALLSPECIAL = 67,
|
|
GETTHIS = 68,
|
|
NEWTEMP = 69,
|
|
USETEMP = 70,
|
|
GETBASE = 71,
|
|
GETVAR = 72,
|
|
SETVAR = 73,
|
|
UNDEFINED = 74,
|
|
TRY = 75,
|
|
ENDTRY = 76,
|
|
NEWSCOPE = 77,
|
|
TYPEOFNAME = 78,
|
|
ENUMINIT = 79,
|
|
ENUMNEXT = 80,
|
|
GETPROTO = 81,
|
|
GETPARENT = 82,
|
|
SETPROTO = 83,
|
|
SETPARENT = 84,
|
|
SCOPE = 85,
|
|
GETSCOPEPARENT = 86,
|
|
JTHROW = 87,
|
|
// End of interpreter bytecodes
|
|
SEMI = 88, // semicolon
|
|
LB = 89, // left and right brackets
|
|
RB = 90,
|
|
LC = 91, // left and right curlies (braces)
|
|
RC = 92,
|
|
LP = 93, // left and right parentheses
|
|
RP = 94,
|
|
COMMA = 95, // comma operator
|
|
ASSIGN = 96, // assignment ops (= += -= etc.)
|
|
HOOK = 97, // conditional (?:)
|
|
COLON = 98,
|
|
OR = 99, // logical or (||)
|
|
AND = 100, // logical and (&&)
|
|
EQOP = 101, // equality ops (== !=)
|
|
RELOP = 102, // relational ops (< <= > >=)
|
|
SHOP = 103, // shift ops (<< >> >>>)
|
|
UNARYOP = 104, // unary prefix operator
|
|
INC = 105, // increment/decrement (++ --)
|
|
DEC = 106,
|
|
DOT = 107, // member operator (.)
|
|
PRIMARY = 108, // true, false, null, this, super
|
|
FUNCTION = 109, // function keyword
|
|
EXPORT = 110, // export keyword
|
|
IMPORT = 111, // import keyword
|
|
IF = 112, // if keyword
|
|
ELSE = 113, // else keyword
|
|
SWITCH = 114, // switch keyword
|
|
CASE = 115, // case keyword
|
|
DEFAULT = 116, // default keyword
|
|
WHILE = 117, // while keyword
|
|
DO = 118, // do keyword
|
|
FOR = 119, // for keyword
|
|
BREAK = 120, // break keyword
|
|
CONTINUE = 121, // continue keyword
|
|
VAR = 122, // var keyword
|
|
WITH = 123, // with keyword
|
|
CATCH = 124, // catch keyword
|
|
FINALLY = 125, // finally keyword
|
|
RESERVED = 126, // reserved keywords
|
|
|
|
/** Added by Mike - these are JSOPs in the jsref, but I
|
|
* don't have them yet in the java implementation...
|
|
* so they go here. Also whatever I needed.
|
|
|
|
* Most of these go in the 'op' field when returning
|
|
* more general token types, eg. 'DIV' as the op of 'ASSIGN'.
|
|
*/
|
|
NOP = 127, // NOP
|
|
NOT = 128, // etc.
|
|
PRE = 129, // for INC, DEC nodes.
|
|
POST = 130,
|
|
|
|
/**
|
|
* For JSOPs associated with keywords...
|
|
* eg. op = THIS; token = PRIMARY
|
|
*/
|
|
|
|
VOID = 131,
|
|
|
|
/* types used for the parse tree - these never get returned
|
|
* by the scanner.
|
|
*/
|
|
BLOCK = 132, // statement block
|
|
ARRAYLIT = 133, // array literal
|
|
OBJLIT = 134, // object literal
|
|
LABEL = 135, // label
|
|
TARGET = 136,
|
|
LOOP = 137,
|
|
ENUMDONE = 138,
|
|
EXPRSTMT = 139,
|
|
PARENT = 140,
|
|
CONVERT = 141,
|
|
JSR = 142,
|
|
NEWLOCAL = 143,
|
|
USELOCAL = 144,
|
|
SCRIPT = 145; // top-level node for entire script
|
|
// end enum
|
|
|
|
|
|
/* for mapping int token types to printable strings.
|
|
* make sure to add 1 to index before using these!
|
|
*/
|
|
private static String names[];
|
|
private static void checkNames() {
|
|
if (Context.printTrees && names == null) {
|
|
String[] a = {
|
|
"error",
|
|
"eof",
|
|
"eol",
|
|
"popv",
|
|
"enterwith",
|
|
"leavewith",
|
|
"return",
|
|
"goto",
|
|
"ifeq",
|
|
"ifne",
|
|
"dup",
|
|
"setname",
|
|
"bitor",
|
|
"bitxor",
|
|
"bitand",
|
|
"eq",
|
|
"ne",
|
|
"lt",
|
|
"le",
|
|
"gt",
|
|
"ge",
|
|
"lsh",
|
|
"rsh",
|
|
"ursh",
|
|
"add",
|
|
"sub",
|
|
"mul",
|
|
"div",
|
|
"mod",
|
|
"bitnot",
|
|
"neg",
|
|
"new",
|
|
"delprop",
|
|
"typeof",
|
|
"nameinc",
|
|
"propinc",
|
|
"eleminc",
|
|
"namedec",
|
|
"propdec",
|
|
"elemdec",
|
|
"getprop",
|
|
"setprop",
|
|
"getelem",
|
|
"setelem",
|
|
"call",
|
|
"name",
|
|
"number",
|
|
"string",
|
|
"zero",
|
|
"one",
|
|
"null",
|
|
"this",
|
|
"false",
|
|
"true",
|
|
"sheq",
|
|
"shne",
|
|
"closure",
|
|
"object",
|
|
"pop",
|
|
"pos",
|
|
"varinc",
|
|
"vardec",
|
|
"bindname",
|
|
"throw",
|
|
"in",
|
|
"instanceof",
|
|
"gosub",
|
|
"retsub",
|
|
"callspecial",
|
|
"getthis",
|
|
"newtemp",
|
|
"usetemp",
|
|
"getbase",
|
|
"getvar",
|
|
"setvar",
|
|
"undefined",
|
|
"try",
|
|
"endtry",
|
|
"newscope",
|
|
"typeofname",
|
|
"enuminit",
|
|
"enumnext",
|
|
"getproto",
|
|
"getparent",
|
|
"setproto",
|
|
"setparent",
|
|
"scope",
|
|
"getscopeparent",
|
|
"jthrow",
|
|
"semi",
|
|
"lb",
|
|
"rb",
|
|
"lc",
|
|
"rc",
|
|
"lp",
|
|
"rp",
|
|
"comma",
|
|
"assign",
|
|
"hook",
|
|
"colon",
|
|
"or",
|
|
"and",
|
|
"eqop",
|
|
"relop",
|
|
"shop",
|
|
"unaryop",
|
|
"inc",
|
|
"dec",
|
|
"dot",
|
|
"primary",
|
|
"function",
|
|
"export",
|
|
"import",
|
|
"if",
|
|
"else",
|
|
"switch",
|
|
"case",
|
|
"default",
|
|
"while",
|
|
"do",
|
|
"for",
|
|
"break",
|
|
"continue",
|
|
"var",
|
|
"with",
|
|
"catch",
|
|
"finally",
|
|
"reserved",
|
|
"nop",
|
|
"not",
|
|
"pre",
|
|
"post",
|
|
"void",
|
|
"block",
|
|
"arraylit",
|
|
"objlit",
|
|
"label",
|
|
"target",
|
|
"loop",
|
|
"enumdone",
|
|
"exprstmt",
|
|
"parent",
|
|
"convert",
|
|
"jsr",
|
|
"newlocal",
|
|
"uselocal",
|
|
"script"
|
|
};
|
|
names = a;
|
|
}
|
|
}
|
|
|
|
/* This function uses the cached op, string and number fields in
|
|
* TokenStream; if getToken has been called since the passed token
|
|
* was scanned, the op or string printed may be incorrect.
|
|
*/
|
|
public String tokenToString(int token) {
|
|
if (Context.printTrees) {
|
|
checkNames();
|
|
if (token + 1 >= names.length)
|
|
return null;
|
|
|
|
if (token == UNARYOP ||
|
|
token == ASSIGN ||
|
|
token == PRIMARY ||
|
|
token == EQOP ||
|
|
token == SHOP ||
|
|
token == RELOP) {
|
|
return names[token + 1] + " " + names[this.op + 1];
|
|
}
|
|
|
|
if (token == STRING || token == OBJECT || token == NAME)
|
|
return names[token + 1] + " `" + this.string + "'";
|
|
|
|
if (token == NUMBER)
|
|
return "NUMBER " + this.number;
|
|
|
|
return names[token + 1];
|
|
}
|
|
return "";
|
|
}
|
|
|
|
public static String tokenToName(int type) {
|
|
checkNames();
|
|
return names == null ? "" : names[type + 1];
|
|
}
|
|
|
|
private static java.util.Hashtable keywords;
|
|
|
|
static {
|
|
String[] strings = {
|
|
"break",
|
|
"case",
|
|
"continue",
|
|
"default",
|
|
"delete",
|
|
"do",
|
|
"else",
|
|
"export",
|
|
"false",
|
|
"for",
|
|
"function",
|
|
"if",
|
|
"in",
|
|
"new",
|
|
"null",
|
|
"return",
|
|
"switch",
|
|
"this",
|
|
"true",
|
|
"typeof",
|
|
"var",
|
|
"void",
|
|
"while",
|
|
"with",
|
|
|
|
// the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c
|
|
"abstract",
|
|
"boolean",
|
|
"byte",
|
|
"catch",
|
|
"char",
|
|
"class",
|
|
"const",
|
|
"debugger",
|
|
"double",
|
|
"enum",
|
|
"extends",
|
|
"final",
|
|
"finally",
|
|
"float",
|
|
"goto",
|
|
"implements",
|
|
"import",
|
|
"instanceof",
|
|
"int",
|
|
"interface",
|
|
"long",
|
|
"native",
|
|
"package",
|
|
"private",
|
|
"protected",
|
|
"public",
|
|
"short",
|
|
"static",
|
|
"super",
|
|
"synchronized",
|
|
"throw",
|
|
"throws",
|
|
"transient",
|
|
"try",
|
|
"volatile"
|
|
};
|
|
int[] values = {
|
|
BREAK, // break
|
|
CASE, // case
|
|
CONTINUE, // continue
|
|
DEFAULT, // default
|
|
DELPROP, // delete
|
|
DO, // do
|
|
ELSE, // else
|
|
EXPORT, // export
|
|
PRIMARY | (FALSE << 8), // false
|
|
FOR, // for
|
|
FUNCTION, // function
|
|
IF, // if
|
|
RELOP | (IN << 8), // in
|
|
NEW, // new
|
|
PRIMARY | (NULL << 8), // null
|
|
RETURN, // return
|
|
SWITCH, // switch
|
|
PRIMARY | (THIS << 8), // this
|
|
PRIMARY | (TRUE << 8), // true
|
|
UNARYOP | (TYPEOF << 8), // typeof
|
|
VAR, // var
|
|
UNARYOP | (VOID << 8), // void
|
|
WHILE, // while
|
|
WITH, // with
|
|
RESERVED, // abstract
|
|
RESERVED, // boolean
|
|
RESERVED, // byte
|
|
CATCH, // catch
|
|
RESERVED, // char
|
|
RESERVED, // class
|
|
RESERVED, // const
|
|
RESERVED, // debugger
|
|
RESERVED, // double
|
|
RESERVED, // enum
|
|
RESERVED, // extends
|
|
RESERVED, // final
|
|
FINALLY, // finally
|
|
RESERVED, // float
|
|
RESERVED, // goto
|
|
RESERVED, // implements
|
|
IMPORT, // import
|
|
RELOP | (INSTANCEOF << 8), // instanceof
|
|
RESERVED, // int
|
|
RESERVED, // interface
|
|
RESERVED, // long
|
|
RESERVED, // native
|
|
RESERVED, // package
|
|
RESERVED, // private
|
|
RESERVED, // protected
|
|
RESERVED, // public
|
|
RESERVED, // short
|
|
RESERVED, // static
|
|
PRIMARY | (NOP << 8), // super
|
|
RESERVED, // synchronized
|
|
THROW, // throw
|
|
RESERVED, // throws
|
|
RESERVED, // transient
|
|
TRY, // try
|
|
RESERVED // volatile
|
|
};
|
|
keywords = new java.util.Hashtable(strings.length);
|
|
Integer res = new Integer(RESERVED);
|
|
for (int i=0; i < strings.length; i++)
|
|
keywords.put(strings[i], values[i] == RESERVED
|
|
? res
|
|
: new Integer(values[i]));
|
|
}
|
|
|
|
private int stringToKeyword(String name) {
|
|
Integer result = (Integer) keywords.get(name);
|
|
if (result == null)
|
|
return EOF;
|
|
int x = result.intValue();
|
|
this.op = x >> 8;
|
|
return x & 0xff;
|
|
}
|
|
|
|
public TokenStream(Reader in,
|
|
String sourceName, int lineno)
|
|
{
|
|
this.in = new LineBuffer(in, lineno);
|
|
|
|
this.pushbackToken = EOF;
|
|
this.sourceName = sourceName;
|
|
flags = 0;
|
|
}
|
|
|
|
/* return and pop the token from the stream if it matches...
|
|
* otherwise return null
|
|
*/
|
|
public boolean matchToken(int toMatch) throws IOException {
|
|
int token = getToken();
|
|
if (token == toMatch)
|
|
return true;
|
|
|
|
// didn't match, push back token
|
|
tokenno--;
|
|
this.pushbackToken = token;
|
|
return false;
|
|
}
|
|
|
|
public void clearPushback() {
|
|
this.pushbackToken = EOF;
|
|
}
|
|
|
|
public void ungetToken(int tt) {
|
|
if (this.pushbackToken != EOF && tt != ERROR) {
|
|
Object[] errArgs = { tokenToString(tt),
|
|
tokenToString(this.pushbackToken) };
|
|
String message = Context.getMessage("msg.token.replaces.pushback",
|
|
errArgs);
|
|
throw new RuntimeException(message);
|
|
}
|
|
this.pushbackToken = tt;
|
|
tokenno--;
|
|
}
|
|
|
|
public int peekToken() throws IOException {
|
|
int result = getToken();
|
|
|
|
this.pushbackToken = result;
|
|
tokenno--;
|
|
return result;
|
|
}
|
|
|
|
public int peekTokenSameLine() throws IOException {
|
|
int result;
|
|
|
|
flags |= TSF_NEWLINES; // SCAN_NEWLINES from jsscan.h
|
|
result = peekToken();
|
|
flags &= ~TSF_NEWLINES; // HIDE_NEWLINES from jsscan.h
|
|
if (this.pushbackToken == EOL)
|
|
this.pushbackToken = EOF;
|
|
return result;
|
|
}
|
|
|
|
/* helper functions... these might be better inlined.
|
|
* These are needed because the java.lang.Character.isWhatever
|
|
* functions accept unicode, and we want to limit
|
|
* identifiers to ASCII.
|
|
*/
|
|
protected static boolean isJSIdentifier(String s) {
|
|
int length = s.length();
|
|
|
|
if (length == 0 || !isJSIdentifierStart(s.charAt(0)))
|
|
return false;
|
|
|
|
for (int i=1; i<length; i++) {
|
|
if (!isJSIdentifierPart(s.charAt(i)))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
protected static boolean isJSIdentifierStart(int c) {
|
|
return ((c >= 'a' && c <= 'z')
|
|
|| (c >= 'A' && c <= 'Z')
|
|
|| c == '_'
|
|
|| c == '$');
|
|
}
|
|
|
|
protected static boolean isJSIdentifierPart(int c) {
|
|
return ((c >= 'a' && c <= 'z')
|
|
|| (c >= 'A' && c <= 'Z')
|
|
|| (c >= '0' && c <= '9')
|
|
|| c == '_'
|
|
|| c == '$');
|
|
}
|
|
|
|
private static boolean isAlpha(int c) {
|
|
return ((c >= 'a' && c <= 'z')
|
|
|| (c >= 'A' && c <= 'Z'));
|
|
}
|
|
|
|
static boolean isDigit(int c) {
|
|
return (c >= '0' && c <= '9');
|
|
}
|
|
|
|
static boolean isXDigit(int c) {
|
|
return ((c >= '0' && c <= '9')
|
|
|| (c >= 'a' && c <= 'f')
|
|
|| (c >= 'A' && c <= 'F'));
|
|
}
|
|
|
|
/* As defined in ECMA. jsscan.c uses C isspace() (which allows
|
|
* \v, I think.) note that code in in.read() implicitly accepts
|
|
* '\r' == \u000D as well.
|
|
*/
|
|
static boolean isJSSpace(int c) {
|
|
return (c == '\u0009' || c == '\u000B'
|
|
|| c == '\u000C' || c == '\u0020');
|
|
}
|
|
|
|
public int getToken() throws IOException {
|
|
int c;
|
|
tokenno++;
|
|
// If there was a fatal error, keep returning TOK_ERROR.
|
|
if ((flags & TSF_ERROR) != 0)
|
|
return ERROR;
|
|
|
|
// Check for pushed-back token
|
|
if (this.pushbackToken != EOF) {
|
|
int result = this.pushbackToken;
|
|
this.pushbackToken = EOF;
|
|
return result;
|
|
}
|
|
|
|
// Eat whitespace, possibly sensitive to newlines.
|
|
do {
|
|
c = in.read();
|
|
if (c == '\n')
|
|
if ((flags & TSF_NEWLINES) != 0)
|
|
break;
|
|
} while (isJSSpace(c) || c == '\n');
|
|
|
|
if (c == EOF_CHAR)
|
|
return EOF;
|
|
|
|
// identifier/keyword/instanceof?
|
|
if (isJSIdentifierStart(c)) {
|
|
in.startString();
|
|
do {
|
|
c = in.read();
|
|
} while (isJSIdentifierPart(c));
|
|
in.unread();
|
|
|
|
int result;
|
|
|
|
String str = in.getString();
|
|
// OPT we shouldn't have to make a string (object!) to
|
|
// check if it's a keyword.
|
|
|
|
// Return the corresponding token if it's a keyword
|
|
if ((result = stringToKeyword(str)) != EOF) {
|
|
return result;
|
|
}
|
|
|
|
this.string = str;
|
|
return NAME;
|
|
}
|
|
|
|
// is it a number?
|
|
if (isDigit(c) || (c == '.' && isDigit(in.peek()))) {
|
|
int base = 10;
|
|
in.startString();
|
|
|
|
double dval = ScriptRuntime.NaN;
|
|
long longval = 0;
|
|
boolean isInteger = true;
|
|
|
|
if (c == '0') {
|
|
c = in.read();
|
|
if (c == 'x' || c == 'X') {
|
|
c = in.read();
|
|
base = 16;
|
|
// restart the string, losing leading 0x
|
|
in.startString();
|
|
} else if (isDigit(c)) {
|
|
if (c < '8') {
|
|
base = 8;
|
|
// Restart the string, losing the leading 0
|
|
in.startString();
|
|
} else {
|
|
/* Checking against c < '8' is non-ECMA, but
|
|
* is required to support legacy code; we've
|
|
* supported it in the past, and it's likely
|
|
* that "08" and "09" are in use in code
|
|
* having to do with dates. So we need to
|
|
* support it, which makes our behavior a
|
|
* superset of ECMA in this area. We raise a
|
|
* warning if a non-octal digit is
|
|
* encountered, then proceed as if it were
|
|
* decimal.
|
|
*/
|
|
Object[] errArgs = { String.valueOf((char)c) };
|
|
Context.reportWarning
|
|
(Context.getMessage("msg.bad.octal.literal",
|
|
errArgs),
|
|
getSourceName(), in.getLineno(),
|
|
getLine(), getOffset());
|
|
// implicitly retain the leading 0
|
|
}
|
|
} else {
|
|
// implicitly retain the leading 0
|
|
}
|
|
}
|
|
|
|
while (isXDigit(c)) {
|
|
if (base < 16 && (isAlpha(c)
|
|
|| (base == 8 && c >= '8'))) {
|
|
break;
|
|
}
|
|
c = in.read();
|
|
}
|
|
|
|
if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
|
|
isInteger = false;
|
|
if (c == '.') {
|
|
do {
|
|
c = in.read();
|
|
} while (isDigit(c));
|
|
}
|
|
|
|
if (c == 'e' || c == 'E') {
|
|
c = in.read();
|
|
if (c == '+' || c == '-') {
|
|
c = in.read();
|
|
}
|
|
if (!isDigit(c)) {
|
|
in.getString(); // throw away string in progress
|
|
reportError("msg.missing.exponent", null);
|
|
return ERROR;
|
|
}
|
|
do {
|
|
c = in.read();
|
|
} while (isDigit(c));
|
|
}
|
|
}
|
|
in.unread();
|
|
String numString = in.getString();
|
|
|
|
if (base == 10 && !isInteger) {
|
|
try {
|
|
// Use Java conversion to number from string...
|
|
dval = (Double.valueOf(numString)).doubleValue();
|
|
}
|
|
catch (NumberFormatException ex) {
|
|
Object[] errArgs = { ex.getMessage() };
|
|
reportError("msg.caught.nfe", errArgs);
|
|
return ERROR;
|
|
}
|
|
} else {
|
|
dval = ScriptRuntime.stringToNumber(numString, 0, base);
|
|
longval = (long) dval;
|
|
|
|
// is it an integral fits-in-a-long value?
|
|
if (longval != dval)
|
|
isInteger = false;
|
|
}
|
|
|
|
if (!isInteger) {
|
|
/* Can't handle floats right now, because postfix INC/DEC
|
|
generate Doubles, but I would generate a Float through this
|
|
path, and it causes a stack mismatch. FIXME (MS)
|
|
if (Float.MIN_VALUE <= dval && dval <= Float.MAX_VALUE)
|
|
this.number = new Xloat((float) dval);
|
|
else
|
|
*/
|
|
this.number = new Double(dval);
|
|
} else {
|
|
// We generate the smallest possible type here
|
|
if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE)
|
|
this.number = new Byte((byte)longval);
|
|
else if (Short.MIN_VALUE <= longval &&
|
|
longval <= Short.MAX_VALUE)
|
|
this.number = new Short((short)longval);
|
|
else if (Integer.MIN_VALUE <= longval &&
|
|
longval <= Integer.MAX_VALUE)
|
|
this.number = new Integer((int)longval);
|
|
else {
|
|
// May lose some precision here, but that's the
|
|
// appropriate semantics.
|
|
this.number = new Double(longval);
|
|
}
|
|
}
|
|
return NUMBER;
|
|
}
|
|
|
|
// is it a string?
|
|
if (c == '"' || c == '\'') {
|
|
// We attempt to accumulate a string the fast way, by
|
|
// building it directly out of the reader. But if there
|
|
// are any escaped characters in the string, we revert to
|
|
// building it out of a StringBuffer.
|
|
|
|
StringBuffer stringBuf = null;
|
|
|
|
int quoteChar = c;
|
|
int val = 0;
|
|
|
|
c = in.read();
|
|
in.startString(); // start after the first "
|
|
while(c != quoteChar) {
|
|
if (c == '\n' || c == EOF_CHAR) {
|
|
in.unread();
|
|
in.getString(); // throw away the string in progress
|
|
reportError("msg.unterminated.string.lit", null);
|
|
return ERROR;
|
|
}
|
|
|
|
if (c == '\\') {
|
|
// We've hit an escaped character; revert to the
|
|
// slow method of building a string.
|
|
if (stringBuf == null) {
|
|
// Don't include the backslash
|
|
in.unread();
|
|
stringBuf = new StringBuffer(in.getString());
|
|
in.read();
|
|
}
|
|
|
|
switch (c = in.read()) {
|
|
case 'b': c = '\b'; break;
|
|
case 'f': c = '\f'; break;
|
|
case 'n': c = '\n'; break;
|
|
case 'r': c = '\r'; break;
|
|
case 't': c = '\t'; break;
|
|
case 'v': c = '\u000B'; break;
|
|
// \v a late addition to the ECMA spec.
|
|
// '\v' doesn't seem to be valid Java.
|
|
|
|
default:
|
|
if (isDigit(c) && c < '8') {
|
|
val = c - '0';
|
|
c = in.read();
|
|
if (isDigit(c) && c < '8') {
|
|
val = 8 * val + c - '0';
|
|
c = in.read();
|
|
if (isDigit(c) && c < '8') {
|
|
val = 8 * val + c - '0';
|
|
c = in.read();
|
|
}
|
|
}
|
|
in.unread();
|
|
if (val > 0377) {
|
|
reportError("msg.oct.esc.too.large", null);
|
|
return ERROR;
|
|
}
|
|
c = val;
|
|
} else if (c == 'u') {
|
|
/*
|
|
* Get 4 hex digits; if the u escape is not
|
|
* followed by 4 hex digits, use 'u' + the literal
|
|
* character sequence that follows. Do some manual
|
|
* match (OK because we're in a string) to avoid
|
|
* multi-char match on the underlying stream.
|
|
*/
|
|
int c1, c2, c3, c4;
|
|
|
|
c1 = in.read();
|
|
if (!isXDigit(c1)) {
|
|
in.unread();
|
|
c = 'u';
|
|
} else {
|
|
val = Character.digit((char) c1, 16);
|
|
c2 = in.read();
|
|
if (!isXDigit(c2)) {
|
|
in.unread();
|
|
stringBuf.append('u');
|
|
c = c1;
|
|
} else {
|
|
val = 16 * val
|
|
+ Character.digit((char) c2, 16);
|
|
c3 = in.read();
|
|
if (!isXDigit(c3)) {
|
|
in.unread();
|
|
stringBuf.append('u');
|
|
stringBuf.append((char)c1);
|
|
c = c2;
|
|
} else {
|
|
val = 16 * val
|
|
+ Character.digit((char) c3, 16);
|
|
c4 = in.read();
|
|
if (!isXDigit(c4)) {
|
|
in.unread();
|
|
stringBuf.append('u');
|
|
stringBuf.append((char)c1);
|
|
stringBuf.append((char)c2);
|
|
c = c3;
|
|
} else {
|
|
// got 4 hex digits! Woo Hoo!
|
|
val = 16 * val
|
|
+ Character.digit((char) c4, 16);
|
|
c = val;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else if (c == 'x') {
|
|
/* Get 2 hex digits, defaulting to 'x' + literal
|
|
* sequence, as above.
|
|
*/
|
|
int c1, c2;
|
|
|
|
c1 = in.read();
|
|
if (!isXDigit(c1)) {
|
|
in.unread();
|
|
c = 'x';
|
|
} else {
|
|
val = Character.digit((char) c1, 16);
|
|
c2 = in.read();
|
|
if (!isXDigit(c2)) {
|
|
in.unread();
|
|
stringBuf.append('x');
|
|
c = c1;
|
|
} else {
|
|
// got 2 hex digits
|
|
val = 16 * val
|
|
+ Character.digit((char) c2, 16);
|
|
c = val;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (stringBuf != null)
|
|
stringBuf.append((char) c);
|
|
c = in.read();
|
|
}
|
|
|
|
if (stringBuf != null)
|
|
this.string = stringBuf.toString();
|
|
else {
|
|
in.unread(); // miss the trailing "
|
|
this.string = in.getString();
|
|
in.read();
|
|
}
|
|
return STRING;
|
|
}
|
|
|
|
switch (c)
|
|
{
|
|
case '\n': return EOL;
|
|
case ';': return SEMI;
|
|
case '[': return LB;
|
|
case ']': return RB;
|
|
case '{': return LC;
|
|
case '}': return RC;
|
|
case '(': return LP;
|
|
case ')': return RP;
|
|
case ',': return COMMA;
|
|
case '?': return HOOK;
|
|
case ':': return COLON;
|
|
case '.': return DOT;
|
|
|
|
case '|':
|
|
if (in.match('|')) {
|
|
return OR;
|
|
} else if (in.match('=')) {
|
|
this.op = BITOR;
|
|
return ASSIGN;
|
|
} else {
|
|
return BITOR;
|
|
}
|
|
|
|
case '^':
|
|
if (in.match('=')) {
|
|
this.op = BITXOR;
|
|
return ASSIGN;
|
|
} else {
|
|
return BITXOR;
|
|
}
|
|
|
|
case '&':
|
|
if (in.match('&')) {
|
|
return AND;
|
|
} else if (in.match('=')) {
|
|
this.op = BITAND;
|
|
return ASSIGN;
|
|
} else {
|
|
return BITAND;
|
|
}
|
|
|
|
case '=':
|
|
if (in.match('=')) {
|
|
if (in.match('='))
|
|
this.op = SHEQ;
|
|
else
|
|
this.op = EQ;
|
|
return EQOP;
|
|
} else {
|
|
this.op = NOP;
|
|
return ASSIGN;
|
|
}
|
|
|
|
case '!':
|
|
if (in.match('=')) {
|
|
if (in.match('='))
|
|
this.op = SHNE;
|
|
else
|
|
this.op = NE;
|
|
return EQOP;
|
|
} else {
|
|
this.op = NOT;
|
|
return UNARYOP;
|
|
}
|
|
|
|
case '<':
|
|
/* NB:treat HTML begin-comment as comment-till-eol */
|
|
if (in.match('!')) {
|
|
if (in.match('-')) {
|
|
if (in.match('-')) {
|
|
while ((c = in.read()) != EOF_CHAR && c != '\n')
|
|
/* skip to end of line */;
|
|
in.unread();
|
|
return getToken(); // in place of 'goto retry'
|
|
}
|
|
in.unread();
|
|
}
|
|
in.unread();
|
|
}
|
|
if (in.match('<')) {
|
|
if (in.match('=')) {
|
|
this.op = LSH;
|
|
return ASSIGN;
|
|
} else {
|
|
this.op = LSH;
|
|
return SHOP;
|
|
}
|
|
} else {
|
|
if (in.match('=')) {
|
|
this.op = LE;
|
|
return RELOP;
|
|
} else {
|
|
this.op = LT;
|
|
return RELOP;
|
|
}
|
|
}
|
|
|
|
case '>':
|
|
if (in.match('>')) {
|
|
if (in.match('>')) {
|
|
if (in.match('=')) {
|
|
this.op = URSH;
|
|
return ASSIGN;
|
|
} else {
|
|
this.op = URSH;
|
|
return SHOP;
|
|
}
|
|
} else {
|
|
if (in.match('=')) {
|
|
this.op = RSH;
|
|
return ASSIGN;
|
|
} else {
|
|
this.op = RSH;
|
|
return SHOP;
|
|
}
|
|
}
|
|
} else {
|
|
if (in.match('=')) {
|
|
this.op = GE;
|
|
return RELOP;
|
|
} else {
|
|
this.op = GT;
|
|
return RELOP;
|
|
}
|
|
}
|
|
|
|
case '*':
|
|
if (in.match('=')) {
|
|
this.op = MUL;
|
|
return ASSIGN;
|
|
} else {
|
|
return MUL;
|
|
}
|
|
|
|
case '/':
|
|
// is it a // comment?
|
|
if (in.match('/')) {
|
|
while ((c = in.read()) != EOF_CHAR && c != '\n')
|
|
/* skip to end of line */;
|
|
in.unread();
|
|
return getToken();
|
|
}
|
|
if (in.match('*')) {
|
|
while ((c = in.read()) != -1
|
|
&& !(c == '*' && in.match('/'))) {
|
|
if (c == '\n') {
|
|
} else if (c == '/' && in.match('*')) {
|
|
if (in.match('/'))
|
|
return getToken();
|
|
reportError("msg.nested.comment", null);
|
|
return ERROR;
|
|
}
|
|
}
|
|
if (c == EOF_CHAR) {
|
|
reportError("msg.unterminated.comment", null);
|
|
return ERROR;
|
|
}
|
|
return getToken(); // `goto retry'
|
|
}
|
|
|
|
// is it a regexp?
|
|
if ((flags & TSF_REGEXP) != 0) {
|
|
// We don't try to use the in.startString/in.getString
|
|
// approach, because escaped characters (which break it)
|
|
// seem likely to be common.
|
|
StringBuffer re = new StringBuffer();
|
|
while ((c = in.read()) != '/') {
|
|
if (c == '\n' || c == EOF_CHAR) {
|
|
in.unread();
|
|
reportError("msg.unterminated.re.lit", null);
|
|
return ERROR;
|
|
}
|
|
if (c == '\\') {
|
|
re.append((char) c);
|
|
c = in.read();
|
|
}
|
|
|
|
re.append((char) c);
|
|
}
|
|
|
|
StringBuffer flagsBuf = new StringBuffer();
|
|
while (true) {
|
|
if (in.match('g'))
|
|
flagsBuf.append('g');
|
|
else if (in.match('i'))
|
|
flagsBuf.append('i');
|
|
else if (in.match('m'))
|
|
flagsBuf.append('m');
|
|
else
|
|
break;
|
|
}
|
|
|
|
if (isAlpha(in.peek())) {
|
|
reportError("msg.invalid.re.flag", null);
|
|
return ERROR;
|
|
}
|
|
|
|
this.string = re.toString();
|
|
this.regExpFlags = flagsBuf.toString();
|
|
return OBJECT;
|
|
}
|
|
|
|
|
|
if (in.match('=')) {
|
|
this.op = DIV;
|
|
return ASSIGN;
|
|
} else {
|
|
return DIV;
|
|
}
|
|
|
|
case '%':
|
|
this.op = MOD;
|
|
if (in.match('=')) {
|
|
return ASSIGN;
|
|
} else {
|
|
return MOD;
|
|
}
|
|
|
|
case '~':
|
|
this.op = BITNOT;
|
|
return UNARYOP;
|
|
|
|
case '+':
|
|
case '-':
|
|
if (in.match('=')) {
|
|
if (c == '+') {
|
|
this.op = ADD;
|
|
return ASSIGN;
|
|
} else {
|
|
this.op = SUB;
|
|
return ASSIGN;
|
|
}
|
|
} else if (in.match((char) c)) {
|
|
if (c == '+') {
|
|
return INC;
|
|
} else {
|
|
return DEC;
|
|
}
|
|
} else if (c == '-') {
|
|
return SUB;
|
|
} else {
|
|
return ADD;
|
|
}
|
|
|
|
default:
|
|
reportError("msg.illegal.character", null);
|
|
return ERROR;
|
|
}
|
|
}
|
|
|
|
private void reportError(String messageProperty, Object[] args) {
|
|
flags |= TSF_ERROR;
|
|
String message = Context.getMessage(messageProperty, args);
|
|
Context.reportError(message, getSourceName(),
|
|
getLineno(), getLine(), getOffset());
|
|
}
|
|
|
|
public String getSourceName() { return sourceName; }
|
|
public int getLineno() { return in.getLineno(); }
|
|
public int getOp() { return op; }
|
|
public String getString() { return string; }
|
|
public Number getNumber() { return number; }
|
|
public String getLine() { return in.getLine(); }
|
|
public int getOffset() { return in.getOffset(); }
|
|
public int getTokenno() { return tokenno; }
|
|
public boolean eof() { return in.eof(); }
|
|
|
|
// instance variables
|
|
private LineBuffer in;
|
|
|
|
|
|
/* for TSF_REGEXP, etc.
|
|
* should this be manipulated by gettor/settor functions?
|
|
* should it be passed to getToken();
|
|
*/
|
|
public int flags;
|
|
public String regExpFlags;
|
|
|
|
private String sourceName;
|
|
private String line;
|
|
private int pushbackToken;
|
|
private int tokenno;
|
|
|
|
private int op;
|
|
|
|
// Set this to an inital non-null value so that the Parser has
|
|
// something to retrieve even if an error has occured and no
|
|
// string is found. Fosters one class of error, but saves lots of
|
|
// code.
|
|
private String string = "";
|
|
private Number number;
|
|
}
|