diff --git a/js/rhino/src/org/mozilla/javascript/Context.java b/js/rhino/src/org/mozilla/javascript/Context.java index b57e3e277aa2..112bf19250f8 100644 --- a/js/rhino/src/org/mozilla/javascript/Context.java +++ b/js/rhino/src/org/mozilla/javascript/Context.java @@ -779,14 +779,12 @@ public class Context { Object securityDomain) throws JavaScriptException { - try { - Reader in = new StringReader(source); - return evaluateReader(scope, in, sourceName, lineno, - securityDomain); - } - catch (IOException ioe) { - // Should never occur because we just made the reader from a String - throw new RuntimeException(); + Script script = compileString(scope, source, sourceName, lineno, + securityDomain); + if (script != null) { + return script.exec(this, scope); + } else { + return null; } } @@ -816,10 +814,11 @@ public class Context { { Script script = compileReader(scope, in, sourceName, lineno, securityDomain); - if (script != null) + if (script != null) { return script.exec(this, scope); - else + } else { return null; + } } /** @@ -840,10 +839,9 @@ public class Context { */ synchronized public boolean stringIsCompilableUnit(String source) { - Reader in = new StringReader(source); // no source name or source text manager, because we're just // going to throw away the result. - TokenStream ts = new TokenStream(in, null, null, 1); + TokenStream ts = new TokenStream(null, source, null, null, 1); // Temporarily set error reporter to always be the exception-throwing // DefaultErrorReporter. (This is why the method is synchronized...) @@ -896,10 +894,41 @@ public class Context { int lineno, Object securityDomain) throws IOException { - return (Script) compile(scope, in, sourceName, lineno, securityDomain, - false); + return (Script) compile(scope, in, null, sourceName, lineno, + securityDomain, false); } + /** + * Compiles the source in the given string. + *

+ * Returns a script that may later be executed. + * + * @param scope if nonnull, will be the scope in which the script object + * is created. The script object will be a valid JavaScript object + * as if it were created using the JavaScript1.3 Script constructor + * @param source the source string + * @param sourceName a string describing the source, such as a filename + * @param lineno the starting line number for reporting errors + * @param securityDomain an arbitrary object that specifies security + * information about the origin or owner of the script. For + * implementations that don't care about security, this value + * may be null. + * @return a script that may later be executed + * @see org.mozilla.javascript.Script#exec + * @exception IOException if an IOException was generated by the Reader + */ + public Script compileString(Scriptable scope, String source, + String sourceName, int lineno, + Object securityDomain) + { + try { + return (Script) compile(scope, null, source, sourceName, lineno, + securityDomain, false); + } catch (IOException ex) { + // Should not happen when dealing with source as string + throw new RuntimeException(); + } + } /** * Compile a JavaScript function. @@ -922,9 +951,8 @@ public class Context { String sourceName, int lineno, Object securityDomain) { - Reader in = new StringReader(source); try { - return (Function) compile(scope, in, sourceName, lineno, + return (Function) compile(scope, null, source, sourceName, lineno, securityDomain, true); } catch (IOException ioe) { @@ -1904,6 +1932,24 @@ public class Context { return formatter.format(arguments); } + private static String readReader(Reader r) + throws IOException + { + char[] buffer = new char[512]; + int cursor = 0; + for (;;) { + int n = r.read(buffer, cursor, buffer.length - cursor); + if (n < 0) { break; } + cursor += n; + if (cursor == buffer.length) { + char[] tmp = new char[buffer.length]; + System.arraycopy(buffer, 0, tmp, 0, cursor); + buffer = tmp; + } + } + return new String(buffer, 0, cursor); + } + // debug flags static final boolean printTrees = false; static final boolean printICode = false; @@ -1930,22 +1976,30 @@ public class Context { * @return a class for the script or function * @see org.mozilla.javascript.Context#compileReader */ - private Object compile(Scriptable scope, Reader in, String sourceName, - int lineno, Object securityDomain, - boolean returnFunction) + private Object compile(Scriptable scope, + Reader sourceReader, String sourceString, + String sourceName, int lineno, + Object securityDomain, boolean returnFunction) throws IOException { + // One of sourceReader or sourceString has to be null + if (!(sourceReader == null ^ sourceString == null)) Context.codeBug(); + Object dynamicDoamin = null; if (securityController != null) { dynamicDoamin = securityController. getDynamicSecurityDomain(securityDomain); } - if (debugger != null && in != null) { - in = new DebugReader(in); + if (debugger != null) { + if (sourceReader != null) { + sourceString = readReader(sourceReader); + sourceReader = null; + } } - TokenStream ts = new TokenStream(in, scope, sourceName, lineno); - return compile(scope, ts, dynamicDoamin, in, returnFunction); + TokenStream ts = new TokenStream(sourceReader, sourceString, + scope, sourceName, lineno); + return compile(scope, ts, dynamicDoamin, sourceString, returnFunction); } private static Class codegenClass; @@ -1978,7 +2032,7 @@ public class Context { } private Object compile(Scriptable scope, TokenStream ts, - Object dynamicSecurityDomain, Reader in, + Object dynamicSecurityDomain, String sourceString, boolean returnFunction) throws IOException { @@ -2010,9 +2064,9 @@ public class Context { return null; } - if (in instanceof DebugReader) { - DebugReader dr = (DebugReader) in; - tree.putProp(Node.DEBUGSOURCE_PROP, dr.getSaved()); + if (debugger != null) { + if (sourceString == null) Context.codeBug(); + tree.putProp(Node.DEBUGSOURCE_PROP, sourceString); } Object result = compiler.compile(this, scope, tree, diff --git a/js/rhino/src/org/mozilla/javascript/LineBuffer.java b/js/rhino/src/org/mozilla/javascript/LineBuffer.java deleted file mode 100644 index e545758cf3f2..000000000000 --- a/js/rhino/src/org/mozilla/javascript/LineBuffer.java +++ /dev/null @@ -1,343 +0,0 @@ -/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- - * - * The contents of this file are subject to the Netscape Public - * License Version 1.1 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.mozilla.org/NPL/ - * - * Software distributed under the License is distributed on an "AS - * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or - * implied. See the License for the specific language governing - * rights and limitations under the License. - * - * The Original Code is Rhino code, released - * May 6, 1999. - * - * The Initial Developer of the Original Code is Netscape - * Communications Corporation. Portions created by Netscape are - * Copyright (C) 1997-1999 Netscape Communications Corporation. All - * Rights Reserved. - * - * Contributor(s): - * Mike McCabe - * - * Alternatively, the contents of this file may be used under the - * terms of the GNU Public License (the "GPL"), in which case the - * provisions of the GPL are applicable instead of those above. - * If you wish to allow use of your version of this file only - * under the terms of the GPL and not to allow others to use your - * version of this file under the NPL, indicate your decision by - * deleting the provisions above and replace them with the notice - * and other provisions required by the GPL. If you do not delete - * the provisions above, a recipient may use your version of this - * file under either the NPL or the GPL. - */ - -package org.mozilla.javascript; - -import java.io.Reader; -import java.io.IOException; - -/** - * An input buffer that combines fast character-based access with - * (slower) support for retrieving the text of the current line. It - * also supports building strings directly out of the internal buffer - * to support fast scanning with minimal object creation. - * - * Note that it is customized in several ways to support the - * TokenStream class, and should not be considered general. - * - * Credits to Kipp Hickman and John Bandhauer. - * - * @author Mike McCabe - */ -final class LineBuffer { - /* - * for smooth operation of getLine(), this should be greater than - * the length of any expected line. Currently, 256 is 3% slower - * than 4096 for large compiles, but seems safer given evaluateString. - * Strings for the scanner are are built with StringBuffers - * instead of directly out of the buffer whenever a string crosses - * a buffer boundary, so small buffer sizes will mean that more - * objects are created. - */ - static final int BUFLEN = 256; - - LineBuffer(Reader in, int lineno) { - this.in = in; - this.lineno = lineno; - } - - int read() throws IOException { - for(;;) { - if (end == offset && !fill()) - return -1; - - int c = buffer[offset]; - ++offset; - - if ((c & EOL_HINT_MASK) == 0) { - switch (c) { - case '\r': - // if the next character is a newline, skip past it. - if (offset != end) { - if (buffer[offset] == '\n') - ++offset; - } else { - // set a flag for fill(), in case the first char - // of the next fill is a newline. - lastWasCR = true; - } - // NO break here! - case '\n': case '\u2028': case '\u2029': - prevStart = lineStart; - lineStart = offset; - lineno++; - return '\n'; - } - } - - if (c < 128 || !formatChar(c)) { - return c; - } - } - } - - void unread() { - // offset can only be 0 when we're asked to unread() an implicit - // EOF_CHAR. - - // This would be wrong behavior in the general case, - // because a peek() could map a buffer.length offset to 0 - // in the process of a fill(), and leave it there. But - // the scanner never calls peek() or a failed match() - // followed by unread()... this would violate 1-character - // lookahead. - if (Context.check && offset == 0 && !hitEOF) Context.codeBug(); - - if (offset == 0) // Same as if (hitEOF) - return; - offset--; - int c = buffer[offset]; - if ((c & EOL_HINT_MASK) == 0 && eolChar(c)) { - lineStart = prevStart; - lineno--; - } - } - - private void skipFormatChar() { - if (checkSelf && !formatChar(buffer[offset])) Context.codeBug(); - - // swap prev character with format one so possible call to - // startString can assume that previous non-format char is at - // offset - 1. Note it causes getLine to return not exactly the - // source LineBuffer read, but it is used only in error reporting - // and should not be a problem. - if (offset != 0) { - char tmp = buffer[offset]; - buffer[offset] = buffer[offset - 1]; - buffer[offset - 1] = tmp; - } - else if (otherEnd != 0) { - char tmp = buffer[offset]; - buffer[offset] = otherBuffer[otherEnd - 1]; - otherBuffer[otherEnd - 1] = tmp; - } - - ++offset; - } - - int peek() throws IOException { - for (;;) { - if (end == offset && !fill()) { - return -1; - } - - int c = buffer[offset]; - if ((c & EOL_HINT_MASK) == 0 && eolChar(c)) { - return '\n'; - } - if (c < 128 || !formatChar(c)) { - return c; - } - - skipFormatChar(); - } - } - - boolean match(int test) throws IOException { - if (Context.check) { - // TokenStream never looks ahead for '\n', which allows simple code - if ((test & EOL_HINT_MASK) == 0 && eolChar(test)) - Context.codeBug(); - // Format chars are not allowed either - if (test >= 128 && formatChar(test)) - Context.codeBug(); - } - - for (;;) { - if (end == offset && !fill()) - return false; - - int c = buffer[offset]; - if (test == c) { - ++offset; - return true; - } - if (c < 128 || !formatChar(c)) { - return false; - } - skipFormatChar(); - } - } - - // Reconstruct a source line from the buffers. This can be slow... - String getLine() { - // Look for line end in the unprocessed buffer - int i = offset; - while(true) { - if (i == end) { - // if we're out of buffer, let's just expand it. We do - // this instead of reading into a StringBuffer to - // preserve the stream for later reads. - if (end == buffer.length) { - char[] tmp = new char[buffer.length * 2]; - System.arraycopy(buffer, 0, tmp, 0, end); - buffer = tmp; - } - int charsRead = 0; - try { - charsRead = in.read(buffer, end, buffer.length - end); - } catch (IOException ioe) { - // ignore it, we're already displaying an error... - break; - } - if (charsRead < 0) - break; - end += charsRead; - } - int c = buffer[i]; - if ((c & EOL_HINT_MASK) == 0 && eolChar(c)) - break; - i++; - } - - int start = lineStart; - if (lineStart < 0) { - // the line begins somewhere in the other buffer; get that first. - StringBuffer sb = new StringBuffer(otherEnd - otherStart + i); - sb.append(otherBuffer, otherStart, otherEnd - otherStart); - sb.append(buffer, 0, i); - return sb.toString(); - } else { - return new String(buffer, lineStart, i - lineStart); - } - } - - // Get the offset of the current character, relative to - // the line that getLine() returns. - int getOffset() { - if (lineStart < 0) - // The line begins somewhere in the other buffer. - return offset + (otherEnd - otherStart); - else - return offset - lineStart; - } - - private boolean fill() throws IOException { - // fill should be caled only for emty buffer - if (checkSelf && !(end == offset)) Context.codeBug(); - - // swap buffers - char[] tempBuffer = buffer; - buffer = otherBuffer; - otherBuffer = tempBuffer; - - // allocate the buffers lazily, in case we're handed a short string. - if (buffer == null) { - buffer = new char[BUFLEN]; - } - - // buffers have switched, so move the newline marker. - if (lineStart >= 0) { - otherStart = lineStart; - } else { - // discard beging of the old line - otherStart = 0; - } - - otherEnd = end; - - // set lineStart to a sentinel value, unless this is the first - // time around. - prevStart = lineStart = (otherBuffer == null) ? 0 : -1; - - offset = 0; - end = in.read(buffer, 0, buffer.length); - if (end < 0) { - end = 0; - - // can't null buffers here, because a string might be retrieved - // out of the other buffer, and a 0-length string might be - // retrieved out of this one. - - hitEOF = true; - return false; - } - - // If the last character of the previous fill was a carriage return, - // then ignore a newline. - - // There's another bizzare special case here. If lastWasCR is - // true, and we see a newline, and the buffer length is - // 1... then we probably just read the last character of the - // file, and returning after advancing offset is not the right - // thing to do. Instead, we try to ignore the newline (and - // likely get to EOF for real) by doing yet another fill(). - if (lastWasCR) { - if (buffer[0] == '\n') { - offset++; - if (end == 1) - return fill(); - } - lineStart = offset; - lastWasCR = false; - } - return true; - } - - int getLineno() { return lineno; } - boolean eof() { return hitEOF; } - - private static boolean formatChar(int c) { - return Character.getType((char)c) == Character.FORMAT; - } - - private static boolean eolChar(int c) { - return c == '\r' || c == '\n' || c == '\u2028' || c == '\u2029'; - } - - // Optimization for faster check for eol character: eolChar(c) returns - // true only when (c & EOL_HINT_MASK) == 0 - private static final int EOL_HINT_MASK = 0xdfd0; - - private Reader in; - private char[] otherBuffer = null; - private char[] buffer = null; - - // Yes, there are too too many of these. - private int offset = 0; - private int end = 0; - private int otherEnd; - private int lineno; - - private int lineStart = 0; - private int otherStart = 0; - private int prevStart = 0; - - private boolean lastWasCR = false; - private boolean hitEOF = false; - -// Rudimentary support for Design-by-Contract - private static final boolean checkSelf = Context.check && true; -} diff --git a/js/rhino/src/org/mozilla/javascript/TokenStream.java b/js/rhino/src/org/mozilla/javascript/TokenStream.java index 2037769e8164..9365e8cc0202 100644 --- a/js/rhino/src/org/mozilla/javascript/TokenStream.java +++ b/js/rhino/src/org/mozilla/javascript/TokenStream.java @@ -21,6 +21,7 @@ * Contributor(s): * Roger Lawrence * Mike McCabe + * Igor Bukanov * * Alternatively, the contents of this file may be used under the * terms of the GNU Public License (the "GPL"), in which case the @@ -613,20 +614,45 @@ public class TokenStream { return id & 0xff; } - public TokenStream(Reader in, Scriptable scope, - String sourceName, int lineno) + public TokenStream(Reader sourceReader, String sourceString, + Scriptable scope, String sourceName, int lineno) { - this.in = new LineBuffer(in, lineno); this.scope = scope; this.pushbackToken = EOF; this.sourceName = sourceName; - flags = 0; + this.lineno = lineno; + this.flags = 0; + if (sourceReader != null) { + if (sourceString != null) Context.codeBug(); + this.sourceReader = sourceReader; + this.sourceBuffer = new char[512]; + this.sourceEnd = 0; + } else { + if (sourceString == null) Context.codeBug(); + this.sourceString = sourceString; + this.sourceEnd = sourceString.length(); + } + this.sourceCursor = 0; } public Scriptable getScope() { return scope; } + public String getSourceName() { return sourceName; } + + public int getLineno() { return lineno; } + + public int getOp() { return op; } + + public String getString() { return string; } + + public double getNumber() { return number; } + + public int getTokenno() { return tokenno; } + + public boolean eof() { return hitEOF; } + /* return and pop the token from the stream if it matches... * otherwise return null */ @@ -646,11 +672,8 @@ public class TokenStream { } public void ungetToken(int tt) { - if (this.pushbackToken != EOF && tt != ERROR) { - String message = Context.getMessage2("msg.token.replaces.pushback", - tokenToString(tt), tokenToString(this.pushbackToken)); - throw new RuntimeException(message); - } + // Can not unread more then one token + if (this.pushbackToken != EOF && tt != ERROR) Context.codeBug(); this.pushbackToken = tt; tokenno--; } @@ -674,68 +697,6 @@ public class TokenStream { return result; } - protected static boolean isJSIdentifier(String s) { - int length = s.length(); - - if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) - return false; - - for (int i=1; i= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z')); - } - - static boolean isDigit(int c) { - return (c >= '0' && c <= '9'); - } - - static int xDigitToInt(int c) { - if ('0' <= c && c <= '9') { return c - '0'; } - if ('a' <= c && c <= 'f') { return c - ('a' - 10); } - if ('A' <= c && c <= 'F') { return c - ('A' - 10); } - return -1; - } - - /* As defined in ECMA. jsscan.c uses C isspace() (which allows - * \v, I think.) note that code in in.read() implicitly accepts - * '\r' == \u000D as well. - */ - public static boolean isJSSpace(int c) { - return (c == '\u0020' || c == '\u0009' - || c == '\u000C' || c == '\u000B' - || c == '\u00A0' - || Character.getType((char)c) == Character.SPACE_SEPARATOR); - } - - public static boolean isJSLineTerminator(int c) { - return (c == '\n' || c == '\r' - || c == 0x2028 || c == 0x2029); - } - - private void skipLine() throws IOException { - // skip to end of line - int c; - while ((c = in.read()) != EOF_CHAR && c != '\n') { } - in.unread(); - } - public int getToken() throws IOException { int c; tokenno++; @@ -748,34 +709,37 @@ public class TokenStream { } // Eat whitespace, possibly sensitive to newlines. - do { - c = in.read(); - if (c == '\n') { + for (;;) { + c = getChar(); + if (c == EOF_CHAR) { + return EOF; + } else if (c == '\n') { flags &= ~TSF_DIRTYLINE; - if ((flags & TSF_NEWLINES) != 0) + if ((flags & TSF_NEWLINES) != 0) { break; + } + } else if (!isJSSpace(c)) { + if (c != '-') { + flags |= TSF_DIRTYLINE; + } + break; } - } while (isJSSpace(c) || c == '\n'); - - if (c == EOF_CHAR) - return EOF; - if (c != '-' && c != '\n') - flags |= TSF_DIRTYLINE; + } // identifier/keyword/instanceof? // watch out for starting with a boolean identifierStart; boolean isUnicodeEscapeStart = false; if (c == '\\') { - c = in.read(); + c = getChar(); if (c == 'u') { identifierStart = true; isUnicodeEscapeStart = true; stringBufferTop = 0; } else { identifierStart = false; + ungetChar(c); c = '\\'; - in.unread(); } } else { identifierStart = Character.isJavaIdentifierStart((char)c); @@ -797,7 +761,7 @@ public class TokenStream { // an error here. int escapeVal = 0; for (int i = 0; i != 4; ++i) { - c = in.read(); + c = getChar(); escapeVal = (escapeVal << 4) | xDigitToInt(c); // Next check takes care about c < 0 and bad escape if (escapeVal < 0) { break; } @@ -809,9 +773,9 @@ public class TokenStream { addToString(escapeVal); isUnicodeEscapeStart = false; } else { - c = in.read(); + c = getChar(); if (c == '\\') { - c = in.read(); + c = getChar(); if (c == 'u') { isUnicodeEscapeStart = true; containsEscape = true; @@ -820,16 +784,18 @@ public class TokenStream { return ERROR; } } else { - if (!Character.isJavaIdentifierPart((char)c)) { + if (c == EOF_CHAR + || !Character.isJavaIdentifierPart((char)c)) + { break; } addToString(c); } } } - in.unread(); + ungetChar(c); - String str = getStringFromBuffer(); + String str = getStringFromBuffer(); if (!containsEscape) { // OPT we shouldn't have to make a string (object!) to // check if it's a keyword. @@ -854,21 +820,21 @@ public class TokenStream { } } } - this.string = str; + this.string = (String)allStrings.intern(str); return NAME; } // is it a number? - if (isDigit(c) || (c == '.' && isDigit(in.peek()))) { + if (isDigit(c) || (c == '.' && isDigit(peekChar()))) { stringBufferTop = 0; int base = 10; if (c == '0') { - c = in.read(); + c = getChar(); if (c == 'x' || c == 'X') { base = 16; - c = in.read(); + c = getChar(); } else if (isDigit(c)) { base = 8; } else { @@ -879,7 +845,7 @@ public class TokenStream { if (base == 16) { while (0 <= xDigitToInt(c)) { addToString(c); - c = in.read(); + c = getChar(); } } else { while ('0' <= c && c <= '9') { @@ -895,7 +861,7 @@ public class TokenStream { base = 10; } addToString(c); - c = in.read(); + c = getChar(); } } @@ -906,15 +872,15 @@ public class TokenStream { if (c == '.') { do { addToString(c); - c = in.read(); + c = getChar(); } while (isDigit(c)); } if (c == 'e' || c == 'E') { addToString(c); - c = in.read(); + c = getChar(); if (c == '+' || c == '-') { addToString(c); - c = in.read(); + c = getChar(); } if (!isDigit(c)) { reportSyntaxError("msg.missing.exponent", null); @@ -922,11 +888,11 @@ public class TokenStream { } do { addToString(c); - c = in.read(); + c = getChar(); } while (isDigit(c)); } } - in.unread(); + ungetChar(c); String numString = getStringFromBuffer(); double dval; @@ -959,10 +925,10 @@ public class TokenStream { int val = 0; stringBufferTop = 0; - c = in.read(); + c = getChar(); strLoop: while (c != quoteChar) { if (c == '\n' || c == EOF_CHAR) { - in.unread(); + ungetChar(c); reportSyntaxError("msg.unterminated.string.lit", null); return ERROR; } @@ -970,7 +936,7 @@ public class TokenStream { if (c == '\\') { // We've hit an escaped character - c = in.read(); + c = getChar(); switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; @@ -992,7 +958,7 @@ public class TokenStream { addToString('u'); int escapeVal = 0; for (int i = 0; i != 4; ++i) { - c = in.read(); + c = getChar(); escapeVal = (escapeVal << 4) | xDigitToInt(c); if (escapeVal < 0) { continue strLoop; @@ -1009,14 +975,14 @@ public class TokenStream { /* Get 2 hex digits, defaulting to 'x' + literal * sequence, as above. */ - c = in.read(); + c = getChar(); int escapeVal = xDigitToInt(c); if (escapeVal < 0) { addToString('x'); continue strLoop; } else { int c1 = c; - c = in.read(); + c = getChar(); escapeVal = (escapeVal << 4) | xDigitToInt(c); if (escapeVal < 0) { addToString('x'); @@ -1031,27 +997,28 @@ public class TokenStream { default: if ('0' <= c && c < '8') { val = c - '0'; - c = in.read(); + c = getChar(); if ('0' <= c && c < '8') { val = 8 * val + c - '0'; - c = in.read(); + c = getChar(); if ('0' <= c && c < '8' && val <= 037) { // c is 3rd char of octal sequence only if // the resulting val <= 0377 val = 8 * val + c - '0'; - c = in.read(); + c = getChar(); } } - in.unread(); + ungetChar(c); c = val; } } } addToString(c); - c = in.read(); + c = getChar(); } - this.string = getStringFromBuffer(); + String str = getStringFromBuffer(); + this.string = (String)allStrings.intern(str); return STRING; } @@ -1071,9 +1038,9 @@ public class TokenStream { case '.': return DOT; case '|': - if (in.match('|')) { + if (matchChar('|')) { return OR; - } else if (in.match('=')) { + } else if (matchChar('=')) { this.op = BITOR; return ASSIGN; } else { @@ -1081,7 +1048,7 @@ public class TokenStream { } case '^': - if (in.match('=')) { + if (matchChar('=')) { this.op = BITXOR; return ASSIGN; } else { @@ -1089,9 +1056,9 @@ public class TokenStream { } case '&': - if (in.match('&')) { + if (matchChar('&')) { return AND; - } else if (in.match('=')) { + } else if (matchChar('=')) { this.op = BITAND; return ASSIGN; } else { @@ -1099,8 +1066,8 @@ public class TokenStream { } case '=': - if (in.match('=')) { - if (in.match('=')) + if (matchChar('=')) { + if (matchChar('=')) this.op = SHEQ; else this.op = EQ; @@ -1111,8 +1078,8 @@ public class TokenStream { } case '!': - if (in.match('=')) { - if (in.match('=')) + if (matchChar('=')) { + if (matchChar('=')) this.op = SHNE; else this.op = NE; @@ -1124,18 +1091,18 @@ public class TokenStream { case '<': /* NB:treat HTML begin-comment as comment-till-eol */ - if (in.match('!')) { - if (in.match('-')) { - if (in.match('-')) { + if (matchChar('!')) { + if (matchChar('-')) { + if (matchChar('-')) { skipLine(); return getToken(); // in place of 'goto retry' } - in.unread(); + ungetChar('-'); } - in.unread(); + ungetChar('!'); } - if (in.match('<')) { - if (in.match('=')) { + if (matchChar('<')) { + if (matchChar('=')) { this.op = LSH; return ASSIGN; } else { @@ -1143,7 +1110,7 @@ public class TokenStream { return SHOP; } } else { - if (in.match('=')) { + if (matchChar('=')) { this.op = LE; return RELOP; } else { @@ -1153,9 +1120,9 @@ public class TokenStream { } case '>': - if (in.match('>')) { - if (in.match('>')) { - if (in.match('=')) { + if (matchChar('>')) { + if (matchChar('>')) { + if (matchChar('=')) { this.op = URSH; return ASSIGN; } else { @@ -1163,7 +1130,7 @@ public class TokenStream { return SHOP; } } else { - if (in.match('=')) { + if (matchChar('=')) { this.op = RSH; return ASSIGN; } else { @@ -1172,7 +1139,7 @@ public class TokenStream { } } } else { - if (in.match('=')) { + if (matchChar('=')) { this.op = GE; return RELOP; } else { @@ -1182,7 +1149,7 @@ public class TokenStream { } case '*': - if (in.match('=')) { + if (matchChar('=')) { this.op = MUL; return ASSIGN; } else { @@ -1191,13 +1158,13 @@ public class TokenStream { case '/': // is it a // comment? - if (in.match('/')) { + if (matchChar('/')) { skipLine(); return getToken(); } - if (in.match('*')) { - while ((c = in.read()) != -1 && - !(c == '*' && in.match('/'))) { + if (matchChar('*')) { + while ((c = getChar()) != -1 && + !(c == '*' && matchChar('/'))) { ; // empty loop body } if (c == EOF_CHAR) { @@ -1210,15 +1177,15 @@ public class TokenStream { // is it a regexp? if ((flags & TSF_REGEXP) != 0) { stringBufferTop = 0; - while ((c = in.read()) != '/') { + while ((c = getChar()) != '/') { if (c == '\n' || c == EOF_CHAR) { - in.unread(); + ungetChar(c); reportSyntaxError("msg.unterminated.re.lit", null); return ERROR; } if (c == '\\') { addToString(c); - c = in.read(); + c = getChar(); } addToString(c); @@ -1226,17 +1193,17 @@ public class TokenStream { int reEnd = stringBufferTop; while (true) { - if (in.match('g')) + if (matchChar('g')) addToString('g'); - else if (in.match('i')) + else if (matchChar('i')) addToString('i'); - else if (in.match('m')) + else if (matchChar('m')) addToString('m'); else break; } - if (isAlpha(in.peek())) { + if (isAlpha(peekChar())) { reportSyntaxError("msg.invalid.re.flag", null); return ERROR; } @@ -1248,7 +1215,7 @@ public class TokenStream { } - if (in.match('=')) { + if (matchChar('=')) { this.op = DIV; return ASSIGN; } else { @@ -1257,7 +1224,7 @@ public class TokenStream { case '%': this.op = MOD; - if (in.match('=')) { + if (matchChar('=')) { return ASSIGN; } else { return MOD; @@ -1268,24 +1235,24 @@ public class TokenStream { return UNARYOP; case '+': - if (in.match('=')) { + if (matchChar('=')) { this.op = ADD; return ASSIGN; - } else if (in.match('+')) { + } else if (matchChar('+')) { return INC; } else { return ADD; } case '-': - if (in.match('=')) { + if (matchChar('=')) { this.op = SUB; c = ASSIGN; - } else if (in.match('-')) { + } else if (matchChar('-')) { if (0 == (flags & TSF_DIRTYLINE)) { // treat HTML end-comment after possible whitespace // after line start as comment-utill-eol - if (in.match('>')) { + if (matchChar('>')) { skipLine(); return getToken(); } @@ -1303,6 +1270,53 @@ public class TokenStream { } } + private static boolean isAlpha(int c) { + // Use 'Z' < 'a' + if (c <= 'Z') { + return 'A' <= c; + } else { + return 'a' <= c && c <= 'z'; + } + } + + static boolean isDigit(int c) { + return '0' <= c && c <= '9'; + } + + static int xDigitToInt(int c) { + // Use 0..9 < A..Z < a..z + if (c <= '9') { + c -= '0'; + if (0 <= c) { return c; } + } else if (c <= 'F') { + if ('A' <= c) { return c - ('A' - 10); } + } else if (c <= 'f') { + if ('a' <= c) { return c - ('a' - 10); } + } + return -1; + } + + /* As defined in ECMA. jsscan.c uses C isspace() (which allows + * \v, I think.) note that code in getChar() implicitly accepts + * '\r' == \u000D as well. + */ + public static boolean isJSSpace(int c) { + if (c <= 127) { + return c == 0x20 || c == 0x9 || c == 0xC || c == 0xB; + } else { + return c == 0xA0 + || Character.getType((char)c) == Character.SPACE_SEPARATOR; + } + } + + public static boolean isJSLineTerminator(int c) { + return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029; + } + + private static boolean isJSFormatChar(int c) { + return c > 127 && Character.getType((char)c) == Character.FORMAT; + } + private String getStringFromBuffer() { return new String(stringBuffer, 0, stringBufferTop); } @@ -1336,19 +1350,161 @@ public class TokenStream { getLineno(), getLine(), getOffset()); } - public String getSourceName() { return sourceName; } - public int getLineno() { return in.getLineno(); } - public int getOp() { return op; } - public String getString() { return string; } - public double getNumber() { return number; } - public String getLine() { return in.getLine(); } - public int getOffset() { return in.getOffset(); } - public int getTokenno() { return tokenno; } - public boolean eof() { return in.eof(); } + private void ungetChar(int c) { + // can not unread past across line boundary + if (ungetCursor != 0 && ungetBuffer[ungetCursor - 1] == '\n') + Context.codeBug(); + ungetBuffer[ungetCursor++] = c; + } - // instance variables - private LineBuffer in; + private boolean matchChar(int test) throws IOException { + int c = getChar(); + if (c == test) { + return true; + } else { + ungetChar(c); + return false; + } + } + private int peekChar() throws IOException { + int c = getChar(); + ungetChar(c); + return c; + } + + private int getChar() throws IOException { + if (ungetCursor != 0) { + return ungetBuffer[--ungetCursor]; + } + + for(;;) { + int c; + if (sourceString != null) { + if (sourceCursor == sourceEnd) { + hitEOF = true; + return EOF_CHAR; + } + c = sourceString.charAt(sourceCursor++); + } else { + if (sourceCursor == sourceEnd) { + if (!fillSourceBuffer()) { + hitEOF = true; + return EOF_CHAR; + } + } + c = sourceBuffer[sourceCursor++]; + } + + if (lineEndChar >= 0) { + if (lineEndChar == '\r' && c == '\n') { + lineEndChar = '\n'; + continue; + } + lineEndChar = -1; + lineStart = sourceCursor - 1; + lineno++; + } + + if (c <= 127) { + if (c == '\n' || c == '\r') { + lineEndChar = c; + c = '\n'; + } + } else { + if (isJSFormatChar(c)) { + continue; + } + if ((c & EOL_HINT_MASK) == 0 && isJSLineTerminator(c)) { + lineEndChar = c; + c = '\n'; + } + } + return c; + } + } + + private void skipLine() throws IOException { + // skip to end of line + int c; + while ((c = getChar()) != EOF_CHAR && c != '\n') { } + ungetChar(c); + } + + public int getOffset() { + int n = sourceCursor - lineStart; + if (lineEndChar >= 0) { --n; } + return n; + } + + public String getLine() { + if (sourceString != null) { + // String case + int lineEnd = sourceCursor; + if (lineEndChar >= 0) { + --lineEnd; + } else { + for (; lineEnd != sourceEnd; ++lineEnd) { + int c = sourceString.charAt(lineEnd); + if ((c & EOL_HINT_MASK) == 0 && isJSLineTerminator(c)) { + break; + } + } + } + return sourceString.substring(lineStart, lineEnd); + } else { + // Reader case + int lineLength = sourceCursor - lineStart; + if (lineEndChar >= 0) { + --lineLength; + } else { + // Read until the end of line + for (;; ++lineLength) { + int i = lineStart + lineLength; + if (i == sourceEnd) { + try { + if (!fillSourceBuffer()) { break; } + } catch (IOException ioe) { + // ignore it, we're already displaying an error... + break; + } + // i recalculuation as fillSourceBuffer can move saved + // line buffer and change lineStart + i = lineStart + lineLength; + } + int c = sourceBuffer[i]; + if ((c & EOL_HINT_MASK) == 0 && isJSLineTerminator(c)) { + break; + } + } + } + return new String(sourceBuffer, lineStart, lineLength); + } + } + + private boolean fillSourceBuffer() throws IOException { + if (sourceString != null) Context.codeBug(); + if (sourceEnd == sourceBuffer.length) { + if (lineStart != 0) { + System.arraycopy(sourceBuffer, lineStart, sourceBuffer, 0, + sourceEnd - lineStart); + sourceEnd -= lineStart; + sourceCursor -= lineStart; + lineStart = 0; + } else { + char[] tmp = new char[sourceBuffer.length * 2]; + System.arraycopy(sourceBuffer, 0, tmp, 0, sourceEnd); + sourceBuffer = tmp; + } + } + int n = sourceReader.read(sourceBuffer, sourceEnd, + sourceBuffer.length - sourceEnd); + if (n < 0) { + return false; + } + sourceEnd += n; + return true; + } /* for TSF_REGEXP, etc. * should this be manipulated by gettor/settor functions? @@ -1374,4 +1530,25 @@ public class TokenStream { private char[] stringBuffer = new char[128]; private int stringBufferTop; + private ObjToIntMap allStrings = new ObjToIntMap(50); + + // Room to backtrace from to < on failed match of the last - in