зеркало из https://github.com/mozilla/gecko-dev.git
Continuing to implement
This commit is contained in:
Родитель
b79a5dc2ab
Коммит
0f9dbf7003
|
@ -18,6 +18,8 @@
|
|||
// Rights Reserved.
|
||||
|
||||
#include "parser.h"
|
||||
#include "world.h"
|
||||
|
||||
namespace JS = JavaScript;
|
||||
|
||||
|
||||
|
@ -28,75 +30,226 @@ namespace JS = JavaScript;
|
|||
|
||||
// Create a Reader reading characters from begin up to but not including end.
|
||||
JS::Reader::Reader(const char16 *begin, const char16 *end):
|
||||
begin(begin), p(begin), end(end), nGetsPastEnd(0)
|
||||
begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0)
|
||||
{
|
||||
ASSERT(begin <= end);
|
||||
#ifdef DEBUG
|
||||
recordString = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Unread the last character.
|
||||
void JS::Reader::unget()
|
||||
// Unread the last n characters. unget cannot be called to back up past the position
|
||||
// of the last call to beginLine().
|
||||
void JS::Reader::unget(uint32 n)
|
||||
{
|
||||
if (nGetsPastEnd)
|
||||
--nGetsPastEnd;
|
||||
else {
|
||||
ASSERT(p != begin);
|
||||
--p;
|
||||
if (nGetsPastEnd) {
|
||||
if (nGetsPastEnd >= n) {
|
||||
nGetsPastEnd -= n;
|
||||
return;
|
||||
}
|
||||
n -= nGetsPastEnd;
|
||||
nGetsPastEnd = 0;
|
||||
}
|
||||
ASSERT(p >= begin + n);
|
||||
p -= n;
|
||||
}
|
||||
|
||||
|
||||
// Set s to the characters read in after the mark but before the current position
|
||||
// and then delete the Reader mark.
|
||||
void JS::Reader::unmark(String &s)
|
||||
// Return the characters read in from position begin inclusive to position end
|
||||
// exclusive relative to the current line. begin <= end <= charPos() is required.
|
||||
JS::String JS::Reader::extract(uint32 begin, uint32 end) const
|
||||
{
|
||||
ASSERT(markPos);
|
||||
s.assign(markPos, p);
|
||||
markPos = 0;
|
||||
ASSERT(begin <= end && end + nGetsPastEnd <= charPos());
|
||||
return String(lineStart + begin, lineStart + end);
|
||||
}
|
||||
|
||||
|
||||
// Begin accumulating characters into the recordString. Each character passed
|
||||
// to recordChar() is added to the end of the recordString. Recording ends when
|
||||
// endRecord() or beginLine() is called.
|
||||
// Recording is significantly optimized when the characters passed to readChar()
|
||||
// are the same characters as read by get(). In this case the record String does
|
||||
// not get allocated until endRecord() is called or a discrepancy appears between
|
||||
// get() and recordChar().
|
||||
void JS::Reader::beginRecording(String &recordString)
|
||||
{
|
||||
Reader::recordString = &recordString;
|
||||
recordBase = p;
|
||||
recordPos = p;
|
||||
}
|
||||
|
||||
|
||||
// Append ch to the recordString.
|
||||
void JS::Reader::recordChar(char16 ch)
|
||||
{
|
||||
ASSERT(recordString);
|
||||
if (recordPos) {
|
||||
if (recordPos != end && *recordPos == ch) {
|
||||
recordPos++;
|
||||
return;
|
||||
} else {
|
||||
recordString->assign(recordBase, recordPos);
|
||||
recordPos = 0;
|
||||
}
|
||||
}
|
||||
*recordString += ch;
|
||||
}
|
||||
|
||||
|
||||
// Finish recording characters into the recordString that was last passed to beginRecording().
|
||||
// Return that recordString.
|
||||
JS::String &JS::Reader::endRecording()
|
||||
{
|
||||
String *rs = recordString;
|
||||
ASSERT(rs);
|
||||
if (recordPos)
|
||||
rs->assign(recordBase, recordPos);
|
||||
recordString = 0;
|
||||
return *rs;
|
||||
}
|
||||
|
||||
|
||||
// Refill the source buffer after running off the end. Get and return
|
||||
// the next character.
|
||||
// The default implementation just returns ueof.
|
||||
JS::wint_t JS::Reader::underflow()
|
||||
// The default implementation just returns char16eof.
|
||||
JS::char16orEOF JS::Reader::underflow()
|
||||
{
|
||||
++nGetsPastEnd;
|
||||
return ueof;
|
||||
return char16eof;
|
||||
}
|
||||
|
||||
|
||||
// Perform a peek when begin == end.
|
||||
JS::wint_t JS::Reader::peekUnderflow()
|
||||
JS::char16orEOF JS::Reader::peekUnderflow()
|
||||
{
|
||||
wint_t ch = underflow();
|
||||
char16orEOF ch = underflow();
|
||||
unget();
|
||||
return ch;
|
||||
}
|
||||
|
||||
|
||||
// Create a StringReader reading characters from a copy of the given String.
|
||||
JS::StringReader::StringReader(const String &s):
|
||||
str(s)
|
||||
// Create a StringReader reading characters from s.
|
||||
// source describes the origin of string s and may be used for error messages.
|
||||
JS::StringReader::StringReader(const String &s, const String &source):
|
||||
str(s), source(source)
|
||||
{
|
||||
const char16 *begin = str.data();
|
||||
setBuffer(begin, begin, begin + str.size());
|
||||
}
|
||||
|
||||
|
||||
JS::String JS::StringReader::sourceFile() const
|
||||
{
|
||||
return source;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Lexer
|
||||
//
|
||||
|
||||
|
||||
// Create a new Lexer using the provided Reader.
|
||||
JS::Lexer::Lexer(Reader &reader): reader(reader)
|
||||
void JS::Token::setChars(const String &s)
|
||||
{
|
||||
chars = static_cast<auto_ptr<String> >(new String(s));
|
||||
}
|
||||
|
||||
|
||||
struct KeywordInit {
|
||||
const char *name; // Null-terminated ASCII name of keyword
|
||||
JS::Token::Kind tokenKind; // Keyword's number
|
||||
};
|
||||
|
||||
static KeywordInit keywordInits[] = {
|
||||
// Reserved words
|
||||
{"abstract", JS::Token::Abstract},
|
||||
{"abstract", JS::Token::Abstract},
|
||||
{"break", JS::Token::Break},
|
||||
{"case", JS::Token::Case},
|
||||
{"catch", JS::Token::Catch},
|
||||
{"class", JS::Token::Class},
|
||||
{"const", JS::Token::Const},
|
||||
{"continue", JS::Token::Continue},
|
||||
{"debugger", JS::Token::Debugger},
|
||||
{"default", JS::Token::Default},
|
||||
{"delete", JS::Token::Delete},
|
||||
{"do", JS::Token::Do},
|
||||
{"else", JS::Token::Else},
|
||||
{"enum", JS::Token::Enum},
|
||||
{"eval", JS::Token::Eval},
|
||||
{"export", JS::Token::Export},
|
||||
{"extends", JS::Token::Extends},
|
||||
{"false", JS::Token::False},
|
||||
{"final", JS::Token::Final},
|
||||
{"finally", JS::Token::Finally},
|
||||
{"for", JS::Token::For},
|
||||
{"function", JS::Token::Function},
|
||||
{"goto", JS::Token::Goto},
|
||||
{"if", JS::Token::If},
|
||||
{"implements", JS::Token::Implements},
|
||||
{"import", JS::Token::Import},
|
||||
{"in", JS::Token::In},
|
||||
{"instanceof", JS::Token::Instanceof},
|
||||
{"native", JS::Token::Native},
|
||||
{"new", JS::Token::New},
|
||||
{"null", JS::Token::Null},
|
||||
{"package", JS::Token::Package},
|
||||
{"private", JS::Token::Private},
|
||||
{"protected", JS::Token::Protected},
|
||||
{"public", JS::Token::Public},
|
||||
{"return", JS::Token::Return},
|
||||
{"static", JS::Token::Static},
|
||||
{"super", JS::Token::Super},
|
||||
{"switch", JS::Token::Switch},
|
||||
{"synchronized", JS::Token::Synchronized},
|
||||
{"this", JS::Token::This},
|
||||
{"throw", JS::Token::Throw},
|
||||
{"throws", JS::Token::Throws},
|
||||
{"transient", JS::Token::Transient},
|
||||
{"true", JS::Token::True},
|
||||
{"try", JS::Token::Try},
|
||||
{"typeof", JS::Token::Typeof},
|
||||
{"var", JS::Token::Var},
|
||||
{"volatile", JS::Token::Volatile},
|
||||
{"while", JS::Token::While},
|
||||
{"with", JS::Token::With},
|
||||
// Non-reserved words
|
||||
{"box", JS::Token::Box},
|
||||
{"constructor", JS::Token::Constructor},
|
||||
{"field", JS::Token::Field},
|
||||
{"get", JS::Token::Get},
|
||||
{"language", JS::Token::Language},
|
||||
{"local", JS::Token::Local},
|
||||
{"method", JS::Token::Method},
|
||||
{"override", JS::Token::Override},
|
||||
{"set", JS::Token::Set},
|
||||
{"version", JS::Token::Version}
|
||||
};
|
||||
|
||||
|
||||
// Initialize the keywords in the given world.
|
||||
void JS::initKeywords(World &world)
|
||||
{
|
||||
KeywordInit *ki = keywordInits;
|
||||
KeywordInit *kiEnd = keywordInits + sizeof(keywordInits)/sizeof(KeywordInit);
|
||||
for (; ki != kiEnd; ++ki)
|
||||
world.identifiers[widenCString(ki->name)].tokenKind = ki->tokenKind;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Create a new Lexer using the provided Reader and interning identifiers, keywords, and regular
|
||||
// expressions in the designated world.
|
||||
JS::Lexer::Lexer(Reader &reader, World &world): reader(reader), world(world)
|
||||
{
|
||||
nextToken = tokens;
|
||||
nTokensFwd = 0;
|
||||
#ifdef DEBUG
|
||||
nTokensBack = 0;
|
||||
#endif
|
||||
lineNum = 1;
|
||||
lexingUnit = false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -156,9 +309,536 @@ void JS::Lexer::unget()
|
|||
}
|
||||
|
||||
|
||||
// Report a syntax error at the backUp-th last character read by the Reader.
|
||||
// In other words, if backUp is 0, the error is at the next character to be read by the Reader;
|
||||
// if backUp is 1, the error is at the last character read by the Reader, and so forth.
|
||||
void JS::Lexer::syntaxError(const char *message, uint backUp)
|
||||
{
|
||||
reader.unget(backUp);
|
||||
uint32 charPos = reader.charPos();
|
||||
char16orEOF ch;
|
||||
do {
|
||||
ch = reader.get();
|
||||
} while (ch != char16eof && !isLineBreak(char16orEOFToChar16(ch)));
|
||||
reader.unget();
|
||||
Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), lineNum, charPos,
|
||||
reader.extract(0, reader.charPos()));
|
||||
throw e;
|
||||
}
|
||||
|
||||
|
||||
// Get the next character from the reader, skipping any Unicode format-control (Cf) characters.
|
||||
inline JS::char16orEOF JS::Lexer::getChar()
|
||||
{
|
||||
char16orEOF ch = reader.get();
|
||||
if (static_cast<uint32>(ch) >= firstFormatChar)
|
||||
ch = internalGetChar(ch);
|
||||
return ch;
|
||||
}
|
||||
|
||||
// Helper for getChar()
|
||||
JS::char16orEOF JS::Lexer::internalGetChar(char16orEOF ch)
|
||||
{
|
||||
while (isFormat(char16orEOFToChar16(ch)))
|
||||
ch = reader.get();
|
||||
return ch;
|
||||
}
|
||||
|
||||
|
||||
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
|
||||
// which are read and discarded.
|
||||
inline JS::char16orEOF JS::Lexer::peekChar()
|
||||
{
|
||||
char16orEOF ch = reader.peek();
|
||||
if (static_cast<uint32>(ch) >= firstFormatChar)
|
||||
ch = internalPeekChar(ch);
|
||||
return ch;
|
||||
}
|
||||
|
||||
// Helper for peekChar()
|
||||
JS::char16orEOF JS::Lexer::internalPeekChar(char16orEOF ch)
|
||||
{
|
||||
while (isFormat(char16orEOFToChar16(ch))) {
|
||||
reader.get();
|
||||
ch = reader.peek();
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
|
||||
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
|
||||
// which are read and discarded. If the peeked character matches ch, read that character and return true;
|
||||
// otherwise return false.
|
||||
bool JS::Lexer::testChar(char16 ch)
|
||||
{
|
||||
char16orEOF ch2 = peekChar();
|
||||
if (ch == ch2) {
|
||||
reader.get();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// A backslash has been read. Read the rest of the escape code.
|
||||
// Return the interpreted escaped character. Throw an exception if the escape is not valid.
|
||||
// If unicodeOnly is true, allow only \uxxxx escapes.
|
||||
char16 JS::Lexer::lexEscape(bool unicodeOnly)
|
||||
{
|
||||
char16orEOF ch = getChar();
|
||||
int nDigits;
|
||||
|
||||
if (!unicodeOnly || ch == 'u')
|
||||
switch (ch) {
|
||||
case '0':
|
||||
// Make sure that the next character isn't a digit.
|
||||
ch = peekChar();
|
||||
if (!isASCIIDecimalDigit(char16orEOFToChar16(ch)))
|
||||
return 0x00;
|
||||
getChar(); // Point to the next character in the error message
|
||||
case 'b':
|
||||
return 0x08;
|
||||
case 'f':
|
||||
return 0x0C;
|
||||
case 'n':
|
||||
return 0x0A;
|
||||
case 'r':
|
||||
return 0x0D;
|
||||
case 't':
|
||||
return 0x09;
|
||||
case 'v':
|
||||
return 0x0B;
|
||||
case 'x':
|
||||
nDigits = 2;
|
||||
goto lexHex;
|
||||
case 'u':
|
||||
nDigits = 4;
|
||||
lexHex:
|
||||
{
|
||||
uint32 n = 0;
|
||||
while (nDigits--) {
|
||||
ch = getChar();
|
||||
uint digit;
|
||||
if (!isASCIIHexDigit(char16orEOFToChar16(ch), digit))
|
||||
goto error;
|
||||
n = (n << 4) | digit;
|
||||
}
|
||||
return char16(n);
|
||||
}
|
||||
default:
|
||||
if (ch != char16eof) {
|
||||
CharInfo chi(char16orEOFToChar16(ch));
|
||||
if (!isAlphanumeric(chi) && !isLineBreak(chi))
|
||||
return char16orEOFToChar16(ch);
|
||||
}
|
||||
}
|
||||
error:
|
||||
syntaxError("Bad escape code");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Read an identifier into s. Return true if an escape code has been encountered.
|
||||
// If allowLeadingDigit is true, allow the first character of s to be a digit, just like any
|
||||
// continuing identifier character.
|
||||
bool JS::Lexer::lexIdentifier(String &s, bool allowLeadingDigit)
|
||||
{
|
||||
reader.beginRecording(s);
|
||||
bool hasEscape = false;
|
||||
|
||||
while (true) {
|
||||
char16orEOF ch = getChar();
|
||||
char16orEOF ch2 = ch;
|
||||
if (ch == '\\') {
|
||||
ch2 = lexEscape(true);
|
||||
hasEscape = true;
|
||||
}
|
||||
CharInfo chi2(char16orEOFToChar16(ch2));
|
||||
|
||||
if (!(allowLeadingDigit ? isIdContinuing(chi2) : isIdLeading(chi2))) {
|
||||
if (ch == '\\')
|
||||
syntaxError("Identifier escape expands into non-identifier character");
|
||||
else
|
||||
reader.unget();
|
||||
break;
|
||||
}
|
||||
reader.recordChar(char16orEOFToChar16(ch2));
|
||||
allowLeadingDigit = true;
|
||||
}
|
||||
reader.endRecording();
|
||||
return hasEscape;
|
||||
}
|
||||
|
||||
|
||||
// Read a numeric literal into nextToken->chars and nextToken->value.
|
||||
// Return true if the numeric literal is followed by a unit, but don't read the unit yet.
|
||||
bool JS::Lexer::lexNumeral()
|
||||
{
|
||||
int radix = 10;
|
||||
int hasDecimalPoint = 0;
|
||||
String s;
|
||||
uint digit;
|
||||
|
||||
reader.beginRecording(s);
|
||||
char16orEOF ch = getChar();
|
||||
if (ch == '0') {
|
||||
reader.recordChar('0');
|
||||
ch = getChar();
|
||||
if (ch&~0x20 == 'X') {
|
||||
uint32 pos = reader.charPos();
|
||||
char16orEOF ch2 = getChar();
|
||||
if (isASCIIHexDigit(char16orEOFToChar16(ch2), digit)) {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
do {
|
||||
reader.recordChar(char16orEOFToChar16(ch2));
|
||||
ch2 = getChar();
|
||||
} while (isASCIIHexDigit(char16orEOFToChar16(ch2), digit));
|
||||
ch = ch2;
|
||||
} else
|
||||
reader.backUpTo(pos);
|
||||
goto done;
|
||||
} else if (isASCIIDecimalDigit(char16orEOFToChar16(ch))) {
|
||||
syntaxError("Numeric constant syntax error");
|
||||
}
|
||||
}
|
||||
while (isASCIIDecimalDigit(char16orEOFToChar16(ch)) || ch == '.' && !hasDecimalPoint++) {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
ch = getChar();
|
||||
}
|
||||
if (ch&~0x20 == 'E') {
|
||||
uint32 pos = reader.charPos();
|
||||
char16orEOF ch2 = getChar();
|
||||
char16 sign = 0;
|
||||
if (ch2 == '+' || ch2 == '-') {
|
||||
sign = char16orEOFToChar16(ch2);
|
||||
ch2 = getChar();
|
||||
}
|
||||
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
if (sign)
|
||||
reader.recordChar(sign);
|
||||
do {
|
||||
reader.recordChar(char16orEOFToChar16(ch2));
|
||||
ch2 = getChar();
|
||||
} while (isASCIIDecimalDigit(char16orEOFToChar16(ch2)));
|
||||
ch = ch2;
|
||||
} else
|
||||
reader.backUpTo(pos);
|
||||
}
|
||||
|
||||
done:
|
||||
// At this point the reader is just past the character ch, which is the first non-formatting character
|
||||
// that is not part of the number.
|
||||
reader.endRecording();
|
||||
nextToken->setChars(s);
|
||||
reader.unget();
|
||||
ASSERT(ch == reader.peek());
|
||||
return isIdContinuing(char16orEOFToChar16(ch)) || ch == '\\';
|
||||
}
|
||||
|
||||
|
||||
// Read a string literal into a String and return that String.
|
||||
// The opening quote has already been read into separator.
|
||||
JS::String JS::Lexer::lexString(char16 separator)
|
||||
{
|
||||
String s;
|
||||
char16orEOF ch;
|
||||
|
||||
reader.beginRecording(s);
|
||||
while ((ch = reader.get()) != separator) {
|
||||
CharInfo chi(char16orEOFToChar16(ch));
|
||||
if (!isFormat(chi)) {
|
||||
if (ch == '\\')
|
||||
ch = lexEscape(false);
|
||||
else if (ch == char16eof || isLineBreak(chi))
|
||||
syntaxError("Unterminated string literal");
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
}
|
||||
}
|
||||
reader.endRecording();
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
// Read a regular expression literal. Store the regular expression in nextToken->identifier
|
||||
// and the flags in nextToken->flags.
|
||||
// The opening slash has already been read.
|
||||
void JS::Lexer::lexRegExp()
|
||||
{
|
||||
String s;
|
||||
char16orEOF prevCh = 0;
|
||||
|
||||
reader.beginRecording(s);
|
||||
while (true) {
|
||||
char16orEOF ch = getChar();
|
||||
CharInfo chi(char16orEOFToChar16(ch));
|
||||
if (ch == char16eof || isLineBreak(chi))
|
||||
syntaxError("Unterminated regular expression literal");
|
||||
if (prevCh == '\\') {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
prevCh = 0; // Ignore slashes and backslashes immediately after a \
|
||||
} else if (ch != '/') {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
prevCh = ch;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
reader.endRecording();
|
||||
nextToken->identifier = &world.identifiers[s];
|
||||
|
||||
String flags;
|
||||
lexIdentifier(flags, true);
|
||||
nextToken->setChars(flags);
|
||||
}
|
||||
|
||||
|
||||
// Read a token from the Reader and store it at *nextToken.
|
||||
// If the Reader reached the end of file, store a Token whose Kind is End.
|
||||
void JS::Lexer::lexToken(bool preferRegExp)
|
||||
{
|
||||
}
|
||||
Token &t = *nextToken;
|
||||
t.lineBreak = false;
|
||||
t.identifier = 0;
|
||||
t.chars.reset();
|
||||
t.value = 0;
|
||||
Token::Kind kind;
|
||||
|
||||
next:
|
||||
char16orEOF ch = reader.get();
|
||||
char16orEOF ch2;
|
||||
CharInfo chi(char16orEOFToChar16(ch));
|
||||
|
||||
switch (cGroup(chi)) {
|
||||
case CharInfo::FormatGroup:
|
||||
case CharInfo::WhiteGroup:
|
||||
goto next;
|
||||
|
||||
case CharInfo::IdGroup:
|
||||
t.charPos = reader.charPos() - 1;
|
||||
readIdentifier:
|
||||
{
|
||||
reader.unget();
|
||||
String s;
|
||||
bool hasEscape = lexIdentifier(s, false);
|
||||
t.identifier = &world.identifiers[s];
|
||||
kind = hasEscape ? Token::Id : t.identifier->tokenKind;
|
||||
}
|
||||
break;
|
||||
|
||||
case CharInfo::NonIdGroup:
|
||||
case CharInfo::IdContinueGroup:
|
||||
t.charPos = reader.charPos() - 1;
|
||||
switch (ch) {
|
||||
case '(':
|
||||
kind = Token::OpenParenthesis; // (
|
||||
break;
|
||||
case ')':
|
||||
kind = Token::CloseParenthesis; // )
|
||||
break;
|
||||
case '[':
|
||||
kind = Token::OpenBracket; // [
|
||||
break;
|
||||
case ']':
|
||||
kind = Token::CloseBracket; // ]
|
||||
break;
|
||||
case '{':
|
||||
kind = Token::OpenBrace; // {
|
||||
break;
|
||||
case '}':
|
||||
kind = Token::CloseBrace; // }
|
||||
break;
|
||||
case ',':
|
||||
kind = Token::Comma; // ,
|
||||
break;
|
||||
case ';':
|
||||
kind = Token::Semicolon; // ;
|
||||
break;
|
||||
case '.':
|
||||
kind = Token::Dot; // .
|
||||
ch2 = getChar();
|
||||
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
|
||||
reader.backUpTo(t.charPos);
|
||||
goto number; // decimal point
|
||||
} else if (ch2 == '.') {
|
||||
kind = Token::DoubleDot; // ..
|
||||
if (testChar('.'))
|
||||
kind = Token::TripleDot; // ...
|
||||
} else
|
||||
reader.unget();
|
||||
break;
|
||||
case ':':
|
||||
kind = Token::Colon; // :
|
||||
if (testChar(':'))
|
||||
kind = Token::DoubleColon; // ::
|
||||
break;
|
||||
case '#':
|
||||
kind = Token::Pound; // #
|
||||
break;
|
||||
case '@':
|
||||
kind = Token::At; // @
|
||||
break;
|
||||
case '?':
|
||||
kind = Token::Question; // ?
|
||||
break;
|
||||
|
||||
case '~':
|
||||
kind = Token::Complement; // ~
|
||||
break;
|
||||
case '!':
|
||||
kind = Token::Not; // !
|
||||
if (testChar('=')) {
|
||||
kind = Token::NotEqual; // !=
|
||||
if (testChar('='))
|
||||
kind = Token::NotIdentical; // !==
|
||||
}
|
||||
break;
|
||||
|
||||
case '*':
|
||||
kind = Token::Times; // * *=
|
||||
tryAssignment:
|
||||
if (testChar('='))
|
||||
kind = Token::Kind(kind + Token::TimesEquals - Token::Times);
|
||||
break;
|
||||
|
||||
case '/':
|
||||
kind = Token::Divide; // /
|
||||
ch = getChar();
|
||||
if (ch == '/') { // // comment
|
||||
do {
|
||||
ch = reader.get();
|
||||
if (ch == char16eof)
|
||||
goto endOfInput;
|
||||
} while (!isLineBreak(char16orEOFToChar16(ch)));
|
||||
goto endOfLine;
|
||||
} else if (ch == '*') { // /* comment */
|
||||
ch = 0;
|
||||
do {
|
||||
ch2 = ch;
|
||||
ch = getChar();
|
||||
if (isLineBreak(char16orEOFToChar16(ch))) {
|
||||
reader.beginLine();
|
||||
++lineNum;
|
||||
t.lineBreak = true;
|
||||
}
|
||||
if (ch == char16eof)
|
||||
syntaxError("Unterminated /* comment");
|
||||
} while (ch != '/' || ch2 != '*');
|
||||
goto next;
|
||||
} else {
|
||||
reader.unget();
|
||||
if (preferRegExp) { // Regular expression
|
||||
kind = Token::RegExp;
|
||||
lexRegExp();
|
||||
} else
|
||||
goto tryAssignment; // /=
|
||||
}
|
||||
break;
|
||||
|
||||
case '%':
|
||||
kind = Token::Modulo; // %
|
||||
goto tryAssignment; // %=
|
||||
|
||||
case '+':
|
||||
kind = Token::Plus; // +
|
||||
if (testChar('+'))
|
||||
kind = Token::Increment; // ++
|
||||
else
|
||||
goto tryAssignment; // +=
|
||||
break;
|
||||
|
||||
case '-':
|
||||
kind = Token::Minus; // -
|
||||
ch = getChar();
|
||||
if (ch == '-')
|
||||
kind = Token::Decrement; // --
|
||||
else if (ch == '>')
|
||||
kind = Token::Arrow; // ->
|
||||
else {
|
||||
reader.unget();
|
||||
goto tryAssignment; // -=
|
||||
}
|
||||
break;
|
||||
|
||||
case '&':
|
||||
kind = Token::And; // & && &= &&=
|
||||
logical:
|
||||
if (testChar(char16orEOFToChar16(ch)))
|
||||
kind = Token::Kind(kind - Token::And + Token::LogicalAnd);
|
||||
goto tryAssignment;
|
||||
case '^':
|
||||
kind = Token::Xor; // ^ ^^ ^= ^^=
|
||||
goto logical;
|
||||
case '|':
|
||||
kind = Token::Or; // | || |= ||=
|
||||
goto logical;
|
||||
|
||||
case '=':
|
||||
kind = Token::Assignment; // =
|
||||
if (testChar('=')) {
|
||||
kind = Token::Equal; // ==
|
||||
if (testChar('='))
|
||||
kind = Token::Identical; // ===
|
||||
}
|
||||
break;
|
||||
|
||||
case '<':
|
||||
kind = Token::LessThan; // <
|
||||
if (testChar('<')) {
|
||||
kind = Token::LeftShift; // <<
|
||||
goto tryAssignment; // <<=
|
||||
}
|
||||
comparison:
|
||||
if (testChar('=')) // <= >=
|
||||
kind = Token::Kind(kind + Token::LessThanOrEqual - Token::LessThan);
|
||||
break;
|
||||
case '>':
|
||||
kind = Token::GreaterThan; // >
|
||||
if (testChar('>')) {
|
||||
kind = Token::RightShift; // >>
|
||||
if (testChar('>'))
|
||||
kind = Token::LogicalRightShift; // >>>
|
||||
goto tryAssignment; // >>= >>>=
|
||||
}
|
||||
goto comparison;
|
||||
|
||||
case '\\':
|
||||
goto readIdentifier; // An identifier that starts with an escape
|
||||
|
||||
case '\'':
|
||||
case '"':
|
||||
kind = Token::Str; // 'string' "string"
|
||||
t.setChars(lexString(char16orEOFToChar16(ch)));
|
||||
break;
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
reader.unget(); // Number
|
||||
number:
|
||||
kind = Token::Num;
|
||||
lexNumeral();
|
||||
break;
|
||||
|
||||
case char16eof:
|
||||
endOfInput:
|
||||
kind = Token::End;
|
||||
}
|
||||
break;
|
||||
|
||||
case CharInfo::LineBreakGroup:
|
||||
endOfLine:
|
||||
reader.beginLine();
|
||||
++lineNum;
|
||||
t.lineBreak = true;
|
||||
goto next;
|
||||
}
|
||||
t.kind = kind;
|
||||
t.lineNum = lineNum;
|
||||
}
|
||||
|
|
117
js/js2/parser.h
117
js/js2/parser.h
|
@ -21,22 +21,28 @@
|
|||
#define parser_h
|
||||
|
||||
#include "utilities.h"
|
||||
#include "world.h"
|
||||
|
||||
namespace JavaScript {
|
||||
|
||||
class StringAtom;
|
||||
class World;
|
||||
|
||||
//
|
||||
// Reader
|
||||
//
|
||||
|
||||
// A Reader reads Unicode characters from some source -- either a file or a string.
|
||||
// get() returns all of the characters followed by a ueof.
|
||||
// get() returns all of the characters followed by a char16eof.
|
||||
class Reader {
|
||||
const char16 *begin; // Beginning of current buffer
|
||||
const char16 *p; // Position in current buffer
|
||||
const char16 *end; // End of current buffer
|
||||
const char16 *markPos; // Pointer to mark in current buffer or null if no mark
|
||||
uint32 nGetsPastEnd; // Number of times ueof has been returned
|
||||
const char16 *lineStart; // Pointer to start of current line
|
||||
uint32 nGetsPastEnd; // Number of times char16eof has been returned
|
||||
|
||||
String *recordString; // String, if any, into which recordChar() records characters
|
||||
const char16 *recordBase; // Position of last beginRecording() call
|
||||
const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred
|
||||
|
||||
protected:
|
||||
Reader(): nGetsPastEnd(0) {}
|
||||
|
@ -46,54 +52,70 @@ namespace JavaScript {
|
|||
Reader(const Reader&); // No copy constructor
|
||||
void operator=(const Reader&); // No assignment operator
|
||||
public:
|
||||
#ifdef DEBUG
|
||||
~Reader() {ASSERT(!markPos);}
|
||||
#endif
|
||||
|
||||
wint_t get();
|
||||
wint_t peek();
|
||||
void unget();
|
||||
char16orEOF get();
|
||||
char16orEOF peek();
|
||||
void unget(uint32 n = 1);
|
||||
|
||||
void mark();
|
||||
void unmark();
|
||||
void unmark(String &s);
|
||||
bool marked() const {return markPos;}
|
||||
void beginLine();
|
||||
uint32 charPos() const;
|
||||
void backUpTo(uint32 pos);
|
||||
|
||||
String extract(uint32 begin, uint32 end) const;
|
||||
void beginRecording(String &recordString);
|
||||
void recordChar(char16 ch);
|
||||
String &endRecording();
|
||||
|
||||
virtual String sourceFile() const = 0; // A description of the source code that caused the error
|
||||
|
||||
protected:
|
||||
void setBuffer(const char16 *begin, const char16 *p, const char16 *end);
|
||||
virtual wint_t underflow();
|
||||
wint_t peekUnderflow();
|
||||
virtual char16orEOF underflow();
|
||||
char16orEOF peekUnderflow();
|
||||
};
|
||||
|
||||
|
||||
// Get and return the next character or ueof if at end of input.
|
||||
inline wint_t Reader::get()
|
||||
// Get and return the next character or char16eof if at end of input.
|
||||
inline char16orEOF Reader::get()
|
||||
{
|
||||
if (p != end)
|
||||
return *p++;
|
||||
return underflow();
|
||||
}
|
||||
|
||||
// Return the next character without consuming it. Return ueof if at end of input.
|
||||
inline wint_t Reader::peek()
|
||||
// Return the next character without consuming it. Return char16eof if at end of input.
|
||||
inline char16orEOF Reader::peek()
|
||||
{
|
||||
if (p != end)
|
||||
return *p;
|
||||
return peekUnderflow();
|
||||
}
|
||||
|
||||
// Mark the current position in the Reader.
|
||||
inline void Reader::mark()
|
||||
|
||||
// Set the beginning of the current line. unget cannot be subsequently called past this point.
|
||||
inline void Reader::beginLine()
|
||||
{
|
||||
ASSERT(!markPos);
|
||||
markPos = p;
|
||||
lineStart = p;
|
||||
#ifdef DEBUG
|
||||
recordString = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Delete the Reader mark.
|
||||
inline void Reader::unmark()
|
||||
// Return the character offset relative to the current line. This cannot be called
|
||||
// if the current position is past the end of the input.
|
||||
inline uint32 Reader::charPos() const
|
||||
{
|
||||
ASSERT(markPos);
|
||||
markPos = 0;
|
||||
ASSERT(!nGetsPastEnd);
|
||||
return static_cast<uint32>(p - lineStart);
|
||||
}
|
||||
|
||||
|
||||
// Back up to the given character offset relative to the current line.
|
||||
inline void Reader::backUpTo(uint32 pos)
|
||||
{
|
||||
ASSERT(pos <= charPos());
|
||||
p = lineStart + pos;
|
||||
nGetsPastEnd = 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -103,14 +125,21 @@ namespace JavaScript {
|
|||
Reader::begin = begin;
|
||||
Reader::p = p;
|
||||
Reader::end = end;
|
||||
lineStart = begin;
|
||||
#ifdef DEBUG
|
||||
recordString = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// A Reader that reads from a String.
|
||||
class StringReader: public Reader {
|
||||
const String str;
|
||||
const String source;
|
||||
|
||||
public:
|
||||
StringReader(const String &s);
|
||||
StringReader(const String &s, const String &source);
|
||||
String sourceFile() const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -122,7 +151,6 @@ namespace JavaScript {
|
|||
public:
|
||||
enum Kind {
|
||||
End, // End of token stream
|
||||
Error, // Lexer error
|
||||
|
||||
Id, // Non-keyword identifier (may be same as a keyword if it contains an escape code)
|
||||
Num, // Numeral
|
||||
|
@ -165,12 +193,12 @@ namespace JavaScript {
|
|||
LogicalAnd, // &&
|
||||
LogicalXor, // ^^
|
||||
LogicalOr, // ||
|
||||
And, // &
|
||||
And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr
|
||||
Xor, // ^
|
||||
Or, // |
|
||||
|
||||
Assignment, // =
|
||||
TimesEquals, // *=
|
||||
TimesEquals, // *= // These must be at constant offsets from Times ... Or
|
||||
DivideEquals, // /=
|
||||
ModuloEquals, // %=
|
||||
PlusEquals, // +=
|
||||
|
@ -189,7 +217,7 @@ namespace JavaScript {
|
|||
NotEqual, // !=
|
||||
LessThan, // <
|
||||
LessThanOrEqual, // <=
|
||||
GreaterThan, // >
|
||||
GreaterThan, // > // >, >= must be at constant offsets from <, <=
|
||||
GreaterThanOrEqual, // >=
|
||||
Identical, // ===
|
||||
NotIdentical, // !==
|
||||
|
@ -268,13 +296,19 @@ namespace JavaScript {
|
|||
StringAtom *identifier; // The token's characters (identifiers, keywords, and regular expressions only)
|
||||
auto_ptr<String> chars; // The token's characters (strings, numbers, and regular expression flags only)
|
||||
float64 value; // The token's value (numbers only)
|
||||
|
||||
void setChars(const String &s);
|
||||
};
|
||||
|
||||
|
||||
void initKeywords(World &world);
|
||||
|
||||
|
||||
class Lexer {
|
||||
static const int tokenBufferSize = 3; // Token lookahead buffer size
|
||||
public:
|
||||
Reader &reader;
|
||||
World &world;
|
||||
private:
|
||||
Token tokens[tokenBufferSize]; // Circular buffer of recently read or lookahead tokens
|
||||
Token *nextToken; // Address of next Token in the circular buffer to be returned by get()
|
||||
|
@ -283,16 +317,31 @@ namespace JavaScript {
|
|||
int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken
|
||||
bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls
|
||||
#endif
|
||||
uint32 lineNum; // Current line number
|
||||
bool lexingUnit; // True if lexing a unit identifier immediately following a number
|
||||
|
||||
public:
|
||||
Lexer(Reader &reader);
|
||||
Lexer(Reader &reader, World &world);
|
||||
|
||||
Token &get(bool preferRegExp);
|
||||
const Token &peek(bool preferRegExp);
|
||||
void unget();
|
||||
|
||||
private:
|
||||
void syntaxError(const char *message, uint backUp = 1);
|
||||
char16orEOF getChar();
|
||||
char16orEOF internalGetChar(char16orEOF ch);
|
||||
char16orEOF peekChar();
|
||||
char16orEOF internalPeekChar(char16orEOF ch);
|
||||
bool testChar(char16 ch);
|
||||
|
||||
char16 lexEscape(bool unicodeOnly);
|
||||
bool lexIdentifier(String &s, bool allowLeadingDigit);
|
||||
bool lexNumeral();
|
||||
String lexString(char16 separator);
|
||||
void lexRegExp();
|
||||
void lexToken(bool preferRegExp);
|
||||
public:
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
// Rights Reserved.
|
||||
|
||||
#include "parser.h"
|
||||
#include "world.h"
|
||||
|
||||
namespace JS = JavaScript;
|
||||
|
||||
|
||||
|
@ -28,75 +30,226 @@ namespace JS = JavaScript;
|
|||
|
||||
// Create a Reader reading characters from begin up to but not including end.
|
||||
JS::Reader::Reader(const char16 *begin, const char16 *end):
|
||||
begin(begin), p(begin), end(end), nGetsPastEnd(0)
|
||||
begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0)
|
||||
{
|
||||
ASSERT(begin <= end);
|
||||
#ifdef DEBUG
|
||||
recordString = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Unread the last character.
|
||||
void JS::Reader::unget()
|
||||
// Unread the last n characters. unget cannot be called to back up past the position
|
||||
// of the last call to beginLine().
|
||||
void JS::Reader::unget(uint32 n)
|
||||
{
|
||||
if (nGetsPastEnd)
|
||||
--nGetsPastEnd;
|
||||
else {
|
||||
ASSERT(p != begin);
|
||||
--p;
|
||||
if (nGetsPastEnd) {
|
||||
if (nGetsPastEnd >= n) {
|
||||
nGetsPastEnd -= n;
|
||||
return;
|
||||
}
|
||||
n -= nGetsPastEnd;
|
||||
nGetsPastEnd = 0;
|
||||
}
|
||||
ASSERT(p >= begin + n);
|
||||
p -= n;
|
||||
}
|
||||
|
||||
|
||||
// Set s to the characters read in after the mark but before the current position
|
||||
// and then delete the Reader mark.
|
||||
void JS::Reader::unmark(String &s)
|
||||
// Return the characters read in from position begin inclusive to position end
|
||||
// exclusive relative to the current line. begin <= end <= charPos() is required.
|
||||
JS::String JS::Reader::extract(uint32 begin, uint32 end) const
|
||||
{
|
||||
ASSERT(markPos);
|
||||
s.assign(markPos, p);
|
||||
markPos = 0;
|
||||
ASSERT(begin <= end && end + nGetsPastEnd <= charPos());
|
||||
return String(lineStart + begin, lineStart + end);
|
||||
}
|
||||
|
||||
|
||||
// Begin accumulating characters into the recordString. Each character passed
|
||||
// to recordChar() is added to the end of the recordString. Recording ends when
|
||||
// endRecord() or beginLine() is called.
|
||||
// Recording is significantly optimized when the characters passed to readChar()
|
||||
// are the same characters as read by get(). In this case the record String does
|
||||
// not get allocated until endRecord() is called or a discrepancy appears between
|
||||
// get() and recordChar().
|
||||
void JS::Reader::beginRecording(String &recordString)
|
||||
{
|
||||
Reader::recordString = &recordString;
|
||||
recordBase = p;
|
||||
recordPos = p;
|
||||
}
|
||||
|
||||
|
||||
// Append ch to the recordString.
|
||||
void JS::Reader::recordChar(char16 ch)
|
||||
{
|
||||
ASSERT(recordString);
|
||||
if (recordPos) {
|
||||
if (recordPos != end && *recordPos == ch) {
|
||||
recordPos++;
|
||||
return;
|
||||
} else {
|
||||
recordString->assign(recordBase, recordPos);
|
||||
recordPos = 0;
|
||||
}
|
||||
}
|
||||
*recordString += ch;
|
||||
}
|
||||
|
||||
|
||||
// Finish recording characters into the recordString that was last passed to beginRecording().
|
||||
// Return that recordString.
|
||||
JS::String &JS::Reader::endRecording()
|
||||
{
|
||||
String *rs = recordString;
|
||||
ASSERT(rs);
|
||||
if (recordPos)
|
||||
rs->assign(recordBase, recordPos);
|
||||
recordString = 0;
|
||||
return *rs;
|
||||
}
|
||||
|
||||
|
||||
// Refill the source buffer after running off the end. Get and return
|
||||
// the next character.
|
||||
// The default implementation just returns ueof.
|
||||
JS::wint_t JS::Reader::underflow()
|
||||
// The default implementation just returns char16eof.
|
||||
JS::char16orEOF JS::Reader::underflow()
|
||||
{
|
||||
++nGetsPastEnd;
|
||||
return ueof;
|
||||
return char16eof;
|
||||
}
|
||||
|
||||
|
||||
// Perform a peek when begin == end.
|
||||
JS::wint_t JS::Reader::peekUnderflow()
|
||||
JS::char16orEOF JS::Reader::peekUnderflow()
|
||||
{
|
||||
wint_t ch = underflow();
|
||||
char16orEOF ch = underflow();
|
||||
unget();
|
||||
return ch;
|
||||
}
|
||||
|
||||
|
||||
// Create a StringReader reading characters from a copy of the given String.
|
||||
JS::StringReader::StringReader(const String &s):
|
||||
str(s)
|
||||
// Create a StringReader reading characters from s.
|
||||
// source describes the origin of string s and may be used for error messages.
|
||||
JS::StringReader::StringReader(const String &s, const String &source):
|
||||
str(s), source(source)
|
||||
{
|
||||
const char16 *begin = str.data();
|
||||
setBuffer(begin, begin, begin + str.size());
|
||||
}
|
||||
|
||||
|
||||
JS::String JS::StringReader::sourceFile() const
|
||||
{
|
||||
return source;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Lexer
|
||||
//
|
||||
|
||||
|
||||
// Create a new Lexer using the provided Reader.
|
||||
JS::Lexer::Lexer(Reader &reader): reader(reader)
|
||||
void JS::Token::setChars(const String &s)
|
||||
{
|
||||
chars = static_cast<auto_ptr<String> >(new String(s));
|
||||
}
|
||||
|
||||
|
||||
struct KeywordInit {
|
||||
const char *name; // Null-terminated ASCII name of keyword
|
||||
JS::Token::Kind tokenKind; // Keyword's number
|
||||
};
|
||||
|
||||
static KeywordInit keywordInits[] = {
|
||||
// Reserved words
|
||||
{"abstract", JS::Token::Abstract},
|
||||
{"abstract", JS::Token::Abstract},
|
||||
{"break", JS::Token::Break},
|
||||
{"case", JS::Token::Case},
|
||||
{"catch", JS::Token::Catch},
|
||||
{"class", JS::Token::Class},
|
||||
{"const", JS::Token::Const},
|
||||
{"continue", JS::Token::Continue},
|
||||
{"debugger", JS::Token::Debugger},
|
||||
{"default", JS::Token::Default},
|
||||
{"delete", JS::Token::Delete},
|
||||
{"do", JS::Token::Do},
|
||||
{"else", JS::Token::Else},
|
||||
{"enum", JS::Token::Enum},
|
||||
{"eval", JS::Token::Eval},
|
||||
{"export", JS::Token::Export},
|
||||
{"extends", JS::Token::Extends},
|
||||
{"false", JS::Token::False},
|
||||
{"final", JS::Token::Final},
|
||||
{"finally", JS::Token::Finally},
|
||||
{"for", JS::Token::For},
|
||||
{"function", JS::Token::Function},
|
||||
{"goto", JS::Token::Goto},
|
||||
{"if", JS::Token::If},
|
||||
{"implements", JS::Token::Implements},
|
||||
{"import", JS::Token::Import},
|
||||
{"in", JS::Token::In},
|
||||
{"instanceof", JS::Token::Instanceof},
|
||||
{"native", JS::Token::Native},
|
||||
{"new", JS::Token::New},
|
||||
{"null", JS::Token::Null},
|
||||
{"package", JS::Token::Package},
|
||||
{"private", JS::Token::Private},
|
||||
{"protected", JS::Token::Protected},
|
||||
{"public", JS::Token::Public},
|
||||
{"return", JS::Token::Return},
|
||||
{"static", JS::Token::Static},
|
||||
{"super", JS::Token::Super},
|
||||
{"switch", JS::Token::Switch},
|
||||
{"synchronized", JS::Token::Synchronized},
|
||||
{"this", JS::Token::This},
|
||||
{"throw", JS::Token::Throw},
|
||||
{"throws", JS::Token::Throws},
|
||||
{"transient", JS::Token::Transient},
|
||||
{"true", JS::Token::True},
|
||||
{"try", JS::Token::Try},
|
||||
{"typeof", JS::Token::Typeof},
|
||||
{"var", JS::Token::Var},
|
||||
{"volatile", JS::Token::Volatile},
|
||||
{"while", JS::Token::While},
|
||||
{"with", JS::Token::With},
|
||||
// Non-reserved words
|
||||
{"box", JS::Token::Box},
|
||||
{"constructor", JS::Token::Constructor},
|
||||
{"field", JS::Token::Field},
|
||||
{"get", JS::Token::Get},
|
||||
{"language", JS::Token::Language},
|
||||
{"local", JS::Token::Local},
|
||||
{"method", JS::Token::Method},
|
||||
{"override", JS::Token::Override},
|
||||
{"set", JS::Token::Set},
|
||||
{"version", JS::Token::Version}
|
||||
};
|
||||
|
||||
|
||||
// Initialize the keywords in the given world.
|
||||
void JS::initKeywords(World &world)
|
||||
{
|
||||
KeywordInit *ki = keywordInits;
|
||||
KeywordInit *kiEnd = keywordInits + sizeof(keywordInits)/sizeof(KeywordInit);
|
||||
for (; ki != kiEnd; ++ki)
|
||||
world.identifiers[widenCString(ki->name)].tokenKind = ki->tokenKind;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Create a new Lexer using the provided Reader and interning identifiers, keywords, and regular
|
||||
// expressions in the designated world.
|
||||
JS::Lexer::Lexer(Reader &reader, World &world): reader(reader), world(world)
|
||||
{
|
||||
nextToken = tokens;
|
||||
nTokensFwd = 0;
|
||||
#ifdef DEBUG
|
||||
nTokensBack = 0;
|
||||
#endif
|
||||
lineNum = 1;
|
||||
lexingUnit = false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -156,9 +309,536 @@ void JS::Lexer::unget()
|
|||
}
|
||||
|
||||
|
||||
// Report a syntax error at the backUp-th last character read by the Reader.
|
||||
// In other words, if backUp is 0, the error is at the next character to be read by the Reader;
|
||||
// if backUp is 1, the error is at the last character read by the Reader, and so forth.
|
||||
void JS::Lexer::syntaxError(const char *message, uint backUp)
|
||||
{
|
||||
reader.unget(backUp);
|
||||
uint32 charPos = reader.charPos();
|
||||
char16orEOF ch;
|
||||
do {
|
||||
ch = reader.get();
|
||||
} while (ch != char16eof && !isLineBreak(char16orEOFToChar16(ch)));
|
||||
reader.unget();
|
||||
Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), lineNum, charPos,
|
||||
reader.extract(0, reader.charPos()));
|
||||
throw e;
|
||||
}
|
||||
|
||||
|
||||
// Get the next character from the reader, skipping any Unicode format-control (Cf) characters.
|
||||
inline JS::char16orEOF JS::Lexer::getChar()
|
||||
{
|
||||
char16orEOF ch = reader.get();
|
||||
if (static_cast<uint32>(ch) >= firstFormatChar)
|
||||
ch = internalGetChar(ch);
|
||||
return ch;
|
||||
}
|
||||
|
||||
// Helper for getChar()
|
||||
JS::char16orEOF JS::Lexer::internalGetChar(char16orEOF ch)
|
||||
{
|
||||
while (isFormat(char16orEOFToChar16(ch)))
|
||||
ch = reader.get();
|
||||
return ch;
|
||||
}
|
||||
|
||||
|
||||
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
|
||||
// which are read and discarded.
|
||||
inline JS::char16orEOF JS::Lexer::peekChar()
|
||||
{
|
||||
char16orEOF ch = reader.peek();
|
||||
if (static_cast<uint32>(ch) >= firstFormatChar)
|
||||
ch = internalPeekChar(ch);
|
||||
return ch;
|
||||
}
|
||||
|
||||
// Helper for peekChar()
|
||||
JS::char16orEOF JS::Lexer::internalPeekChar(char16orEOF ch)
|
||||
{
|
||||
while (isFormat(char16orEOFToChar16(ch))) {
|
||||
reader.get();
|
||||
ch = reader.peek();
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
|
||||
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
|
||||
// which are read and discarded. If the peeked character matches ch, read that character and return true;
|
||||
// otherwise return false.
|
||||
bool JS::Lexer::testChar(char16 ch)
|
||||
{
|
||||
char16orEOF ch2 = peekChar();
|
||||
if (ch == ch2) {
|
||||
reader.get();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// A backslash has been read. Read the rest of the escape code.
|
||||
// Return the interpreted escaped character. Throw an exception if the escape is not valid.
|
||||
// If unicodeOnly is true, allow only \uxxxx escapes.
|
||||
char16 JS::Lexer::lexEscape(bool unicodeOnly)
|
||||
{
|
||||
char16orEOF ch = getChar();
|
||||
int nDigits;
|
||||
|
||||
if (!unicodeOnly || ch == 'u')
|
||||
switch (ch) {
|
||||
case '0':
|
||||
// Make sure that the next character isn't a digit.
|
||||
ch = peekChar();
|
||||
if (!isASCIIDecimalDigit(char16orEOFToChar16(ch)))
|
||||
return 0x00;
|
||||
getChar(); // Point to the next character in the error message
|
||||
case 'b':
|
||||
return 0x08;
|
||||
case 'f':
|
||||
return 0x0C;
|
||||
case 'n':
|
||||
return 0x0A;
|
||||
case 'r':
|
||||
return 0x0D;
|
||||
case 't':
|
||||
return 0x09;
|
||||
case 'v':
|
||||
return 0x0B;
|
||||
case 'x':
|
||||
nDigits = 2;
|
||||
goto lexHex;
|
||||
case 'u':
|
||||
nDigits = 4;
|
||||
lexHex:
|
||||
{
|
||||
uint32 n = 0;
|
||||
while (nDigits--) {
|
||||
ch = getChar();
|
||||
uint digit;
|
||||
if (!isASCIIHexDigit(char16orEOFToChar16(ch), digit))
|
||||
goto error;
|
||||
n = (n << 4) | digit;
|
||||
}
|
||||
return char16(n);
|
||||
}
|
||||
default:
|
||||
if (ch != char16eof) {
|
||||
CharInfo chi(char16orEOFToChar16(ch));
|
||||
if (!isAlphanumeric(chi) && !isLineBreak(chi))
|
||||
return char16orEOFToChar16(ch);
|
||||
}
|
||||
}
|
||||
error:
|
||||
syntaxError("Bad escape code");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Read an identifier into s. Return true if an escape code has been encountered.
|
||||
// If allowLeadingDigit is true, allow the first character of s to be a digit, just like any
|
||||
// continuing identifier character.
|
||||
bool JS::Lexer::lexIdentifier(String &s, bool allowLeadingDigit)
|
||||
{
|
||||
reader.beginRecording(s);
|
||||
bool hasEscape = false;
|
||||
|
||||
while (true) {
|
||||
char16orEOF ch = getChar();
|
||||
char16orEOF ch2 = ch;
|
||||
if (ch == '\\') {
|
||||
ch2 = lexEscape(true);
|
||||
hasEscape = true;
|
||||
}
|
||||
CharInfo chi2(char16orEOFToChar16(ch2));
|
||||
|
||||
if (!(allowLeadingDigit ? isIdContinuing(chi2) : isIdLeading(chi2))) {
|
||||
if (ch == '\\')
|
||||
syntaxError("Identifier escape expands into non-identifier character");
|
||||
else
|
||||
reader.unget();
|
||||
break;
|
||||
}
|
||||
reader.recordChar(char16orEOFToChar16(ch2));
|
||||
allowLeadingDigit = true;
|
||||
}
|
||||
reader.endRecording();
|
||||
return hasEscape;
|
||||
}
|
||||
|
||||
|
||||
// Read a numeric literal into nextToken->chars and nextToken->value.
|
||||
// Return true if the numeric literal is followed by a unit, but don't read the unit yet.
|
||||
bool JS::Lexer::lexNumeral()
|
||||
{
|
||||
int radix = 10;
|
||||
int hasDecimalPoint = 0;
|
||||
String s;
|
||||
uint digit;
|
||||
|
||||
reader.beginRecording(s);
|
||||
char16orEOF ch = getChar();
|
||||
if (ch == '0') {
|
||||
reader.recordChar('0');
|
||||
ch = getChar();
|
||||
if (ch&~0x20 == 'X') {
|
||||
uint32 pos = reader.charPos();
|
||||
char16orEOF ch2 = getChar();
|
||||
if (isASCIIHexDigit(char16orEOFToChar16(ch2), digit)) {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
do {
|
||||
reader.recordChar(char16orEOFToChar16(ch2));
|
||||
ch2 = getChar();
|
||||
} while (isASCIIHexDigit(char16orEOFToChar16(ch2), digit));
|
||||
ch = ch2;
|
||||
} else
|
||||
reader.backUpTo(pos);
|
||||
goto done;
|
||||
} else if (isASCIIDecimalDigit(char16orEOFToChar16(ch))) {
|
||||
syntaxError("Numeric constant syntax error");
|
||||
}
|
||||
}
|
||||
while (isASCIIDecimalDigit(char16orEOFToChar16(ch)) || ch == '.' && !hasDecimalPoint++) {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
ch = getChar();
|
||||
}
|
||||
if (ch&~0x20 == 'E') {
|
||||
uint32 pos = reader.charPos();
|
||||
char16orEOF ch2 = getChar();
|
||||
char16 sign = 0;
|
||||
if (ch2 == '+' || ch2 == '-') {
|
||||
sign = char16orEOFToChar16(ch2);
|
||||
ch2 = getChar();
|
||||
}
|
||||
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
if (sign)
|
||||
reader.recordChar(sign);
|
||||
do {
|
||||
reader.recordChar(char16orEOFToChar16(ch2));
|
||||
ch2 = getChar();
|
||||
} while (isASCIIDecimalDigit(char16orEOFToChar16(ch2)));
|
||||
ch = ch2;
|
||||
} else
|
||||
reader.backUpTo(pos);
|
||||
}
|
||||
|
||||
done:
|
||||
// At this point the reader is just past the character ch, which is the first non-formatting character
|
||||
// that is not part of the number.
|
||||
reader.endRecording();
|
||||
nextToken->setChars(s);
|
||||
reader.unget();
|
||||
ASSERT(ch == reader.peek());
|
||||
return isIdContinuing(char16orEOFToChar16(ch)) || ch == '\\';
|
||||
}
|
||||
|
||||
|
||||
// Read a string literal into a String and return that String.
|
||||
// The opening quote has already been read into separator.
|
||||
JS::String JS::Lexer::lexString(char16 separator)
|
||||
{
|
||||
String s;
|
||||
char16orEOF ch;
|
||||
|
||||
reader.beginRecording(s);
|
||||
while ((ch = reader.get()) != separator) {
|
||||
CharInfo chi(char16orEOFToChar16(ch));
|
||||
if (!isFormat(chi)) {
|
||||
if (ch == '\\')
|
||||
ch = lexEscape(false);
|
||||
else if (ch == char16eof || isLineBreak(chi))
|
||||
syntaxError("Unterminated string literal");
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
}
|
||||
}
|
||||
reader.endRecording();
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
// Read a regular expression literal. Store the regular expression in nextToken->identifier
|
||||
// and the flags in nextToken->flags.
|
||||
// The opening slash has already been read.
|
||||
void JS::Lexer::lexRegExp()
|
||||
{
|
||||
String s;
|
||||
char16orEOF prevCh = 0;
|
||||
|
||||
reader.beginRecording(s);
|
||||
while (true) {
|
||||
char16orEOF ch = getChar();
|
||||
CharInfo chi(char16orEOFToChar16(ch));
|
||||
if (ch == char16eof || isLineBreak(chi))
|
||||
syntaxError("Unterminated regular expression literal");
|
||||
if (prevCh == '\\') {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
prevCh = 0; // Ignore slashes and backslashes immediately after a \
|
||||
} else if (ch != '/') {
|
||||
reader.recordChar(char16orEOFToChar16(ch));
|
||||
prevCh = ch;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
reader.endRecording();
|
||||
nextToken->identifier = &world.identifiers[s];
|
||||
|
||||
String flags;
|
||||
lexIdentifier(flags, true);
|
||||
nextToken->setChars(flags);
|
||||
}
|
||||
|
||||
|
||||
// Read a token from the Reader and store it at *nextToken.
|
||||
// If the Reader reached the end of file, store a Token whose Kind is End.
|
||||
void JS::Lexer::lexToken(bool preferRegExp)
|
||||
{
|
||||
}
|
||||
Token &t = *nextToken;
|
||||
t.lineBreak = false;
|
||||
t.identifier = 0;
|
||||
t.chars.reset();
|
||||
t.value = 0;
|
||||
Token::Kind kind;
|
||||
|
||||
next:
|
||||
char16orEOF ch = reader.get();
|
||||
char16orEOF ch2;
|
||||
CharInfo chi(char16orEOFToChar16(ch));
|
||||
|
||||
switch (cGroup(chi)) {
|
||||
case CharInfo::FormatGroup:
|
||||
case CharInfo::WhiteGroup:
|
||||
goto next;
|
||||
|
||||
case CharInfo::IdGroup:
|
||||
t.charPos = reader.charPos() - 1;
|
||||
readIdentifier:
|
||||
{
|
||||
reader.unget();
|
||||
String s;
|
||||
bool hasEscape = lexIdentifier(s, false);
|
||||
t.identifier = &world.identifiers[s];
|
||||
kind = hasEscape ? Token::Id : t.identifier->tokenKind;
|
||||
}
|
||||
break;
|
||||
|
||||
case CharInfo::NonIdGroup:
|
||||
case CharInfo::IdContinueGroup:
|
||||
t.charPos = reader.charPos() - 1;
|
||||
switch (ch) {
|
||||
case '(':
|
||||
kind = Token::OpenParenthesis; // (
|
||||
break;
|
||||
case ')':
|
||||
kind = Token::CloseParenthesis; // )
|
||||
break;
|
||||
case '[':
|
||||
kind = Token::OpenBracket; // [
|
||||
break;
|
||||
case ']':
|
||||
kind = Token::CloseBracket; // ]
|
||||
break;
|
||||
case '{':
|
||||
kind = Token::OpenBrace; // {
|
||||
break;
|
||||
case '}':
|
||||
kind = Token::CloseBrace; // }
|
||||
break;
|
||||
case ',':
|
||||
kind = Token::Comma; // ,
|
||||
break;
|
||||
case ';':
|
||||
kind = Token::Semicolon; // ;
|
||||
break;
|
||||
case '.':
|
||||
kind = Token::Dot; // .
|
||||
ch2 = getChar();
|
||||
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
|
||||
reader.backUpTo(t.charPos);
|
||||
goto number; // decimal point
|
||||
} else if (ch2 == '.') {
|
||||
kind = Token::DoubleDot; // ..
|
||||
if (testChar('.'))
|
||||
kind = Token::TripleDot; // ...
|
||||
} else
|
||||
reader.unget();
|
||||
break;
|
||||
case ':':
|
||||
kind = Token::Colon; // :
|
||||
if (testChar(':'))
|
||||
kind = Token::DoubleColon; // ::
|
||||
break;
|
||||
case '#':
|
||||
kind = Token::Pound; // #
|
||||
break;
|
||||
case '@':
|
||||
kind = Token::At; // @
|
||||
break;
|
||||
case '?':
|
||||
kind = Token::Question; // ?
|
||||
break;
|
||||
|
||||
case '~':
|
||||
kind = Token::Complement; // ~
|
||||
break;
|
||||
case '!':
|
||||
kind = Token::Not; // !
|
||||
if (testChar('=')) {
|
||||
kind = Token::NotEqual; // !=
|
||||
if (testChar('='))
|
||||
kind = Token::NotIdentical; // !==
|
||||
}
|
||||
break;
|
||||
|
||||
case '*':
|
||||
kind = Token::Times; // * *=
|
||||
tryAssignment:
|
||||
if (testChar('='))
|
||||
kind = Token::Kind(kind + Token::TimesEquals - Token::Times);
|
||||
break;
|
||||
|
||||
case '/':
|
||||
kind = Token::Divide; // /
|
||||
ch = getChar();
|
||||
if (ch == '/') { // // comment
|
||||
do {
|
||||
ch = reader.get();
|
||||
if (ch == char16eof)
|
||||
goto endOfInput;
|
||||
} while (!isLineBreak(char16orEOFToChar16(ch)));
|
||||
goto endOfLine;
|
||||
} else if (ch == '*') { // /* comment */
|
||||
ch = 0;
|
||||
do {
|
||||
ch2 = ch;
|
||||
ch = getChar();
|
||||
if (isLineBreak(char16orEOFToChar16(ch))) {
|
||||
reader.beginLine();
|
||||
++lineNum;
|
||||
t.lineBreak = true;
|
||||
}
|
||||
if (ch == char16eof)
|
||||
syntaxError("Unterminated /* comment");
|
||||
} while (ch != '/' || ch2 != '*');
|
||||
goto next;
|
||||
} else {
|
||||
reader.unget();
|
||||
if (preferRegExp) { // Regular expression
|
||||
kind = Token::RegExp;
|
||||
lexRegExp();
|
||||
} else
|
||||
goto tryAssignment; // /=
|
||||
}
|
||||
break;
|
||||
|
||||
case '%':
|
||||
kind = Token::Modulo; // %
|
||||
goto tryAssignment; // %=
|
||||
|
||||
case '+':
|
||||
kind = Token::Plus; // +
|
||||
if (testChar('+'))
|
||||
kind = Token::Increment; // ++
|
||||
else
|
||||
goto tryAssignment; // +=
|
||||
break;
|
||||
|
||||
case '-':
|
||||
kind = Token::Minus; // -
|
||||
ch = getChar();
|
||||
if (ch == '-')
|
||||
kind = Token::Decrement; // --
|
||||
else if (ch == '>')
|
||||
kind = Token::Arrow; // ->
|
||||
else {
|
||||
reader.unget();
|
||||
goto tryAssignment; // -=
|
||||
}
|
||||
break;
|
||||
|
||||
case '&':
|
||||
kind = Token::And; // & && &= &&=
|
||||
logical:
|
||||
if (testChar(char16orEOFToChar16(ch)))
|
||||
kind = Token::Kind(kind - Token::And + Token::LogicalAnd);
|
||||
goto tryAssignment;
|
||||
case '^':
|
||||
kind = Token::Xor; // ^ ^^ ^= ^^=
|
||||
goto logical;
|
||||
case '|':
|
||||
kind = Token::Or; // | || |= ||=
|
||||
goto logical;
|
||||
|
||||
case '=':
|
||||
kind = Token::Assignment; // =
|
||||
if (testChar('=')) {
|
||||
kind = Token::Equal; // ==
|
||||
if (testChar('='))
|
||||
kind = Token::Identical; // ===
|
||||
}
|
||||
break;
|
||||
|
||||
case '<':
|
||||
kind = Token::LessThan; // <
|
||||
if (testChar('<')) {
|
||||
kind = Token::LeftShift; // <<
|
||||
goto tryAssignment; // <<=
|
||||
}
|
||||
comparison:
|
||||
if (testChar('=')) // <= >=
|
||||
kind = Token::Kind(kind + Token::LessThanOrEqual - Token::LessThan);
|
||||
break;
|
||||
case '>':
|
||||
kind = Token::GreaterThan; // >
|
||||
if (testChar('>')) {
|
||||
kind = Token::RightShift; // >>
|
||||
if (testChar('>'))
|
||||
kind = Token::LogicalRightShift; // >>>
|
||||
goto tryAssignment; // >>= >>>=
|
||||
}
|
||||
goto comparison;
|
||||
|
||||
case '\\':
|
||||
goto readIdentifier; // An identifier that starts with an escape
|
||||
|
||||
case '\'':
|
||||
case '"':
|
||||
kind = Token::Str; // 'string' "string"
|
||||
t.setChars(lexString(char16orEOFToChar16(ch)));
|
||||
break;
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
reader.unget(); // Number
|
||||
number:
|
||||
kind = Token::Num;
|
||||
lexNumeral();
|
||||
break;
|
||||
|
||||
case char16eof:
|
||||
endOfInput:
|
||||
kind = Token::End;
|
||||
}
|
||||
break;
|
||||
|
||||
case CharInfo::LineBreakGroup:
|
||||
endOfLine:
|
||||
reader.beginLine();
|
||||
++lineNum;
|
||||
t.lineBreak = true;
|
||||
goto next;
|
||||
}
|
||||
t.kind = kind;
|
||||
t.lineNum = lineNum;
|
||||
}
|
||||
|
|
117
js2/src/parser.h
117
js2/src/parser.h
|
@ -21,22 +21,28 @@
|
|||
#define parser_h
|
||||
|
||||
#include "utilities.h"
|
||||
#include "world.h"
|
||||
|
||||
namespace JavaScript {
|
||||
|
||||
class StringAtom;
|
||||
class World;
|
||||
|
||||
//
|
||||
// Reader
|
||||
//
|
||||
|
||||
// A Reader reads Unicode characters from some source -- either a file or a string.
|
||||
// get() returns all of the characters followed by a ueof.
|
||||
// get() returns all of the characters followed by a char16eof.
|
||||
class Reader {
|
||||
const char16 *begin; // Beginning of current buffer
|
||||
const char16 *p; // Position in current buffer
|
||||
const char16 *end; // End of current buffer
|
||||
const char16 *markPos; // Pointer to mark in current buffer or null if no mark
|
||||
uint32 nGetsPastEnd; // Number of times ueof has been returned
|
||||
const char16 *lineStart; // Pointer to start of current line
|
||||
uint32 nGetsPastEnd; // Number of times char16eof has been returned
|
||||
|
||||
String *recordString; // String, if any, into which recordChar() records characters
|
||||
const char16 *recordBase; // Position of last beginRecording() call
|
||||
const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred
|
||||
|
||||
protected:
|
||||
Reader(): nGetsPastEnd(0) {}
|
||||
|
@ -46,54 +52,70 @@ namespace JavaScript {
|
|||
Reader(const Reader&); // No copy constructor
|
||||
void operator=(const Reader&); // No assignment operator
|
||||
public:
|
||||
#ifdef DEBUG
|
||||
~Reader() {ASSERT(!markPos);}
|
||||
#endif
|
||||
|
||||
wint_t get();
|
||||
wint_t peek();
|
||||
void unget();
|
||||
char16orEOF get();
|
||||
char16orEOF peek();
|
||||
void unget(uint32 n = 1);
|
||||
|
||||
void mark();
|
||||
void unmark();
|
||||
void unmark(String &s);
|
||||
bool marked() const {return markPos;}
|
||||
void beginLine();
|
||||
uint32 charPos() const;
|
||||
void backUpTo(uint32 pos);
|
||||
|
||||
String extract(uint32 begin, uint32 end) const;
|
||||
void beginRecording(String &recordString);
|
||||
void recordChar(char16 ch);
|
||||
String &endRecording();
|
||||
|
||||
virtual String sourceFile() const = 0; // A description of the source code that caused the error
|
||||
|
||||
protected:
|
||||
void setBuffer(const char16 *begin, const char16 *p, const char16 *end);
|
||||
virtual wint_t underflow();
|
||||
wint_t peekUnderflow();
|
||||
virtual char16orEOF underflow();
|
||||
char16orEOF peekUnderflow();
|
||||
};
|
||||
|
||||
|
||||
// Get and return the next character or ueof if at end of input.
|
||||
inline wint_t Reader::get()
|
||||
// Get and return the next character or char16eof if at end of input.
|
||||
inline char16orEOF Reader::get()
|
||||
{
|
||||
if (p != end)
|
||||
return *p++;
|
||||
return underflow();
|
||||
}
|
||||
|
||||
// Return the next character without consuming it. Return ueof if at end of input.
|
||||
inline wint_t Reader::peek()
|
||||
// Return the next character without consuming it. Return char16eof if at end of input.
|
||||
inline char16orEOF Reader::peek()
|
||||
{
|
||||
if (p != end)
|
||||
return *p;
|
||||
return peekUnderflow();
|
||||
}
|
||||
|
||||
// Mark the current position in the Reader.
|
||||
inline void Reader::mark()
|
||||
|
||||
// Set the beginning of the current line. unget cannot be subsequently called past this point.
|
||||
inline void Reader::beginLine()
|
||||
{
|
||||
ASSERT(!markPos);
|
||||
markPos = p;
|
||||
lineStart = p;
|
||||
#ifdef DEBUG
|
||||
recordString = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Delete the Reader mark.
|
||||
inline void Reader::unmark()
|
||||
// Return the character offset relative to the current line. This cannot be called
|
||||
// if the current position is past the end of the input.
|
||||
inline uint32 Reader::charPos() const
|
||||
{
|
||||
ASSERT(markPos);
|
||||
markPos = 0;
|
||||
ASSERT(!nGetsPastEnd);
|
||||
return static_cast<uint32>(p - lineStart);
|
||||
}
|
||||
|
||||
|
||||
// Back up to the given character offset relative to the current line.
|
||||
inline void Reader::backUpTo(uint32 pos)
|
||||
{
|
||||
ASSERT(pos <= charPos());
|
||||
p = lineStart + pos;
|
||||
nGetsPastEnd = 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -103,14 +125,21 @@ namespace JavaScript {
|
|||
Reader::begin = begin;
|
||||
Reader::p = p;
|
||||
Reader::end = end;
|
||||
lineStart = begin;
|
||||
#ifdef DEBUG
|
||||
recordString = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// A Reader that reads from a String.
|
||||
class StringReader: public Reader {
|
||||
const String str;
|
||||
const String source;
|
||||
|
||||
public:
|
||||
StringReader(const String &s);
|
||||
StringReader(const String &s, const String &source);
|
||||
String sourceFile() const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -122,7 +151,6 @@ namespace JavaScript {
|
|||
public:
|
||||
enum Kind {
|
||||
End, // End of token stream
|
||||
Error, // Lexer error
|
||||
|
||||
Id, // Non-keyword identifier (may be same as a keyword if it contains an escape code)
|
||||
Num, // Numeral
|
||||
|
@ -165,12 +193,12 @@ namespace JavaScript {
|
|||
LogicalAnd, // &&
|
||||
LogicalXor, // ^^
|
||||
LogicalOr, // ||
|
||||
And, // &
|
||||
And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr
|
||||
Xor, // ^
|
||||
Or, // |
|
||||
|
||||
Assignment, // =
|
||||
TimesEquals, // *=
|
||||
TimesEquals, // *= // These must be at constant offsets from Times ... Or
|
||||
DivideEquals, // /=
|
||||
ModuloEquals, // %=
|
||||
PlusEquals, // +=
|
||||
|
@ -189,7 +217,7 @@ namespace JavaScript {
|
|||
NotEqual, // !=
|
||||
LessThan, // <
|
||||
LessThanOrEqual, // <=
|
||||
GreaterThan, // >
|
||||
GreaterThan, // > // >, >= must be at constant offsets from <, <=
|
||||
GreaterThanOrEqual, // >=
|
||||
Identical, // ===
|
||||
NotIdentical, // !==
|
||||
|
@ -268,13 +296,19 @@ namespace JavaScript {
|
|||
StringAtom *identifier; // The token's characters (identifiers, keywords, and regular expressions only)
|
||||
auto_ptr<String> chars; // The token's characters (strings, numbers, and regular expression flags only)
|
||||
float64 value; // The token's value (numbers only)
|
||||
|
||||
void setChars(const String &s);
|
||||
};
|
||||
|
||||
|
||||
void initKeywords(World &world);
|
||||
|
||||
|
||||
class Lexer {
|
||||
static const int tokenBufferSize = 3; // Token lookahead buffer size
|
||||
public:
|
||||
Reader &reader;
|
||||
World &world;
|
||||
private:
|
||||
Token tokens[tokenBufferSize]; // Circular buffer of recently read or lookahead tokens
|
||||
Token *nextToken; // Address of next Token in the circular buffer to be returned by get()
|
||||
|
@ -283,16 +317,31 @@ namespace JavaScript {
|
|||
int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken
|
||||
bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls
|
||||
#endif
|
||||
uint32 lineNum; // Current line number
|
||||
bool lexingUnit; // True if lexing a unit identifier immediately following a number
|
||||
|
||||
public:
|
||||
Lexer(Reader &reader);
|
||||
Lexer(Reader &reader, World &world);
|
||||
|
||||
Token &get(bool preferRegExp);
|
||||
const Token &peek(bool preferRegExp);
|
||||
void unget();
|
||||
|
||||
private:
|
||||
void syntaxError(const char *message, uint backUp = 1);
|
||||
char16orEOF getChar();
|
||||
char16orEOF internalGetChar(char16orEOF ch);
|
||||
char16orEOF peekChar();
|
||||
char16orEOF internalPeekChar(char16orEOF ch);
|
||||
bool testChar(char16 ch);
|
||||
|
||||
char16 lexEscape(bool unicodeOnly);
|
||||
bool lexIdentifier(String &s, bool allowLeadingDigit);
|
||||
bool lexNumeral();
|
||||
String lexString(char16 separator);
|
||||
void lexRegExp();
|
||||
void lexToken(bool preferRegExp);
|
||||
public:
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче