This commit is contained in:
waldemar%netscape.com 2000-01-25 22:57:32 +00:00
Родитель b79a5dc2ab
Коммит 0f9dbf7003
4 изменённых файлов: 1578 добавлений и 120 удалений

Просмотреть файл

@ -18,6 +18,8 @@
// Rights Reserved.
#include "parser.h"
#include "world.h"
namespace JS = JavaScript;
@ -28,75 +30,226 @@ namespace JS = JavaScript;
// Create a Reader reading characters from begin up to but not including end.
JS::Reader::Reader(const char16 *begin, const char16 *end):
begin(begin), p(begin), end(end), nGetsPastEnd(0)
begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0)
{
ASSERT(begin <= end);
#ifdef DEBUG
recordString = 0;
#endif
}
// Unread the last character.
void JS::Reader::unget()
// Unread the last n characters. unget cannot be called to back up past the position
// of the last call to beginLine().
void JS::Reader::unget(uint32 n)
{
if (nGetsPastEnd)
--nGetsPastEnd;
else {
ASSERT(p != begin);
--p;
if (nGetsPastEnd) {
if (nGetsPastEnd >= n) {
nGetsPastEnd -= n;
return;
}
n -= nGetsPastEnd;
nGetsPastEnd = 0;
}
ASSERT(p >= begin + n);
p -= n;
}
// Set s to the characters read in after the mark but before the current position
// and then delete the Reader mark.
void JS::Reader::unmark(String &s)
// Return the characters read in from position begin inclusive to position end
// exclusive relative to the current line. begin <= end <= charPos() is required.
JS::String JS::Reader::extract(uint32 begin, uint32 end) const
{
ASSERT(markPos);
s.assign(markPos, p);
markPos = 0;
ASSERT(begin <= end && end + nGetsPastEnd <= charPos());
return String(lineStart + begin, lineStart + end);
}
// Begin accumulating characters into the recordString. Each character passed
// to recordChar() is added to the end of the recordString. Recording ends when
// endRecord() or beginLine() is called.
// Recording is significantly optimized when the characters passed to readChar()
// are the same characters as read by get(). In this case the record String does
// not get allocated until endRecord() is called or a discrepancy appears between
// get() and recordChar().
void JS::Reader::beginRecording(String &recordString)
{
Reader::recordString = &recordString;
recordBase = p;
recordPos = p;
}
// Append ch to the recordString.
void JS::Reader::recordChar(char16 ch)
{
ASSERT(recordString);
if (recordPos) {
if (recordPos != end && *recordPos == ch) {
recordPos++;
return;
} else {
recordString->assign(recordBase, recordPos);
recordPos = 0;
}
}
*recordString += ch;
}
// Finish recording characters into the recordString that was last passed to beginRecording().
// Return that recordString.
JS::String &JS::Reader::endRecording()
{
String *rs = recordString;
ASSERT(rs);
if (recordPos)
rs->assign(recordBase, recordPos);
recordString = 0;
return *rs;
}
// Refill the source buffer after running off the end. Get and return
// the next character.
// The default implementation just returns ueof.
JS::wint_t JS::Reader::underflow()
// The default implementation just returns char16eof.
JS::char16orEOF JS::Reader::underflow()
{
++nGetsPastEnd;
return ueof;
return char16eof;
}
// Perform a peek when begin == end.
JS::wint_t JS::Reader::peekUnderflow()
JS::char16orEOF JS::Reader::peekUnderflow()
{
wint_t ch = underflow();
char16orEOF ch = underflow();
unget();
return ch;
}
// Create a StringReader reading characters from a copy of the given String.
JS::StringReader::StringReader(const String &s):
str(s)
// Create a StringReader reading characters from s.
// source describes the origin of string s and may be used for error messages.
JS::StringReader::StringReader(const String &s, const String &source):
str(s), source(source)
{
const char16 *begin = str.data();
setBuffer(begin, begin, begin + str.size());
}
JS::String JS::StringReader::sourceFile() const
{
return source;
}
//
// Lexer
//
// Create a new Lexer using the provided Reader.
JS::Lexer::Lexer(Reader &reader): reader(reader)
void JS::Token::setChars(const String &s)
{
chars = static_cast<auto_ptr<String> >(new String(s));
}
struct KeywordInit {
const char *name; // Null-terminated ASCII name of keyword
JS::Token::Kind tokenKind; // Keyword's number
};
static KeywordInit keywordInits[] = {
// Reserved words
{"abstract", JS::Token::Abstract},
{"abstract", JS::Token::Abstract},
{"break", JS::Token::Break},
{"case", JS::Token::Case},
{"catch", JS::Token::Catch},
{"class", JS::Token::Class},
{"const", JS::Token::Const},
{"continue", JS::Token::Continue},
{"debugger", JS::Token::Debugger},
{"default", JS::Token::Default},
{"delete", JS::Token::Delete},
{"do", JS::Token::Do},
{"else", JS::Token::Else},
{"enum", JS::Token::Enum},
{"eval", JS::Token::Eval},
{"export", JS::Token::Export},
{"extends", JS::Token::Extends},
{"false", JS::Token::False},
{"final", JS::Token::Final},
{"finally", JS::Token::Finally},
{"for", JS::Token::For},
{"function", JS::Token::Function},
{"goto", JS::Token::Goto},
{"if", JS::Token::If},
{"implements", JS::Token::Implements},
{"import", JS::Token::Import},
{"in", JS::Token::In},
{"instanceof", JS::Token::Instanceof},
{"native", JS::Token::Native},
{"new", JS::Token::New},
{"null", JS::Token::Null},
{"package", JS::Token::Package},
{"private", JS::Token::Private},
{"protected", JS::Token::Protected},
{"public", JS::Token::Public},
{"return", JS::Token::Return},
{"static", JS::Token::Static},
{"super", JS::Token::Super},
{"switch", JS::Token::Switch},
{"synchronized", JS::Token::Synchronized},
{"this", JS::Token::This},
{"throw", JS::Token::Throw},
{"throws", JS::Token::Throws},
{"transient", JS::Token::Transient},
{"true", JS::Token::True},
{"try", JS::Token::Try},
{"typeof", JS::Token::Typeof},
{"var", JS::Token::Var},
{"volatile", JS::Token::Volatile},
{"while", JS::Token::While},
{"with", JS::Token::With},
// Non-reserved words
{"box", JS::Token::Box},
{"constructor", JS::Token::Constructor},
{"field", JS::Token::Field},
{"get", JS::Token::Get},
{"language", JS::Token::Language},
{"local", JS::Token::Local},
{"method", JS::Token::Method},
{"override", JS::Token::Override},
{"set", JS::Token::Set},
{"version", JS::Token::Version}
};
// Initialize the keywords in the given world.
void JS::initKeywords(World &world)
{
KeywordInit *ki = keywordInits;
KeywordInit *kiEnd = keywordInits + sizeof(keywordInits)/sizeof(KeywordInit);
for (; ki != kiEnd; ++ki)
world.identifiers[widenCString(ki->name)].tokenKind = ki->tokenKind;
}
// Create a new Lexer using the provided Reader and interning identifiers, keywords, and regular
// expressions in the designated world.
JS::Lexer::Lexer(Reader &reader, World &world): reader(reader), world(world)
{
nextToken = tokens;
nTokensFwd = 0;
#ifdef DEBUG
nTokensBack = 0;
#endif
lineNum = 1;
lexingUnit = false;
}
@ -156,9 +309,536 @@ void JS::Lexer::unget()
}
// Report a syntax error at the backUp-th last character read by the Reader.
// In other words, if backUp is 0, the error is at the next character to be read by the Reader;
// if backUp is 1, the error is at the last character read by the Reader, and so forth.
void JS::Lexer::syntaxError(const char *message, uint backUp)
{
reader.unget(backUp);
uint32 charPos = reader.charPos();
char16orEOF ch;
do {
ch = reader.get();
} while (ch != char16eof && !isLineBreak(char16orEOFToChar16(ch)));
reader.unget();
Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), lineNum, charPos,
reader.extract(0, reader.charPos()));
throw e;
}
// Get the next character from the reader, skipping any Unicode format-control (Cf) characters.
inline JS::char16orEOF JS::Lexer::getChar()
{
char16orEOF ch = reader.get();
if (static_cast<uint32>(ch) >= firstFormatChar)
ch = internalGetChar(ch);
return ch;
}
// Helper for getChar()
JS::char16orEOF JS::Lexer::internalGetChar(char16orEOF ch)
{
while (isFormat(char16orEOFToChar16(ch)))
ch = reader.get();
return ch;
}
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
// which are read and discarded.
inline JS::char16orEOF JS::Lexer::peekChar()
{
char16orEOF ch = reader.peek();
if (static_cast<uint32>(ch) >= firstFormatChar)
ch = internalPeekChar(ch);
return ch;
}
// Helper for peekChar()
JS::char16orEOF JS::Lexer::internalPeekChar(char16orEOF ch)
{
while (isFormat(char16orEOFToChar16(ch))) {
reader.get();
ch = reader.peek();
}
return ch;
}
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
// which are read and discarded. If the peeked character matches ch, read that character and return true;
// otherwise return false.
bool JS::Lexer::testChar(char16 ch)
{
char16orEOF ch2 = peekChar();
if (ch == ch2) {
reader.get();
return true;
}
return false;
}
// A backslash has been read. Read the rest of the escape code.
// Return the interpreted escaped character. Throw an exception if the escape is not valid.
// If unicodeOnly is true, allow only \uxxxx escapes.
char16 JS::Lexer::lexEscape(bool unicodeOnly)
{
char16orEOF ch = getChar();
int nDigits;
if (!unicodeOnly || ch == 'u')
switch (ch) {
case '0':
// Make sure that the next character isn't a digit.
ch = peekChar();
if (!isASCIIDecimalDigit(char16orEOFToChar16(ch)))
return 0x00;
getChar(); // Point to the next character in the error message
case 'b':
return 0x08;
case 'f':
return 0x0C;
case 'n':
return 0x0A;
case 'r':
return 0x0D;
case 't':
return 0x09;
case 'v':
return 0x0B;
case 'x':
nDigits = 2;
goto lexHex;
case 'u':
nDigits = 4;
lexHex:
{
uint32 n = 0;
while (nDigits--) {
ch = getChar();
uint digit;
if (!isASCIIHexDigit(char16orEOFToChar16(ch), digit))
goto error;
n = (n << 4) | digit;
}
return char16(n);
}
default:
if (ch != char16eof) {
CharInfo chi(char16orEOFToChar16(ch));
if (!isAlphanumeric(chi) && !isLineBreak(chi))
return char16orEOFToChar16(ch);
}
}
error:
syntaxError("Bad escape code");
return 0;
}
// Read an identifier into s. Return true if an escape code has been encountered.
// If allowLeadingDigit is true, allow the first character of s to be a digit, just like any
// continuing identifier character.
bool JS::Lexer::lexIdentifier(String &s, bool allowLeadingDigit)
{
reader.beginRecording(s);
bool hasEscape = false;
while (true) {
char16orEOF ch = getChar();
char16orEOF ch2 = ch;
if (ch == '\\') {
ch2 = lexEscape(true);
hasEscape = true;
}
CharInfo chi2(char16orEOFToChar16(ch2));
if (!(allowLeadingDigit ? isIdContinuing(chi2) : isIdLeading(chi2))) {
if (ch == '\\')
syntaxError("Identifier escape expands into non-identifier character");
else
reader.unget();
break;
}
reader.recordChar(char16orEOFToChar16(ch2));
allowLeadingDigit = true;
}
reader.endRecording();
return hasEscape;
}
// Read a numeric literal into nextToken->chars and nextToken->value.
// Return true if the numeric literal is followed by a unit, but don't read the unit yet.
bool JS::Lexer::lexNumeral()
{
int radix = 10;
int hasDecimalPoint = 0;
String s;
uint digit;
reader.beginRecording(s);
char16orEOF ch = getChar();
if (ch == '0') {
reader.recordChar('0');
ch = getChar();
if (ch&~0x20 == 'X') {
uint32 pos = reader.charPos();
char16orEOF ch2 = getChar();
if (isASCIIHexDigit(char16orEOFToChar16(ch2), digit)) {
reader.recordChar(char16orEOFToChar16(ch));
do {
reader.recordChar(char16orEOFToChar16(ch2));
ch2 = getChar();
} while (isASCIIHexDigit(char16orEOFToChar16(ch2), digit));
ch = ch2;
} else
reader.backUpTo(pos);
goto done;
} else if (isASCIIDecimalDigit(char16orEOFToChar16(ch))) {
syntaxError("Numeric constant syntax error");
}
}
while (isASCIIDecimalDigit(char16orEOFToChar16(ch)) || ch == '.' && !hasDecimalPoint++) {
reader.recordChar(char16orEOFToChar16(ch));
ch = getChar();
}
if (ch&~0x20 == 'E') {
uint32 pos = reader.charPos();
char16orEOF ch2 = getChar();
char16 sign = 0;
if (ch2 == '+' || ch2 == '-') {
sign = char16orEOFToChar16(ch2);
ch2 = getChar();
}
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
reader.recordChar(char16orEOFToChar16(ch));
if (sign)
reader.recordChar(sign);
do {
reader.recordChar(char16orEOFToChar16(ch2));
ch2 = getChar();
} while (isASCIIDecimalDigit(char16orEOFToChar16(ch2)));
ch = ch2;
} else
reader.backUpTo(pos);
}
done:
// At this point the reader is just past the character ch, which is the first non-formatting character
// that is not part of the number.
reader.endRecording();
nextToken->setChars(s);
reader.unget();
ASSERT(ch == reader.peek());
return isIdContinuing(char16orEOFToChar16(ch)) || ch == '\\';
}
// Read a string literal into a String and return that String.
// The opening quote has already been read into separator.
JS::String JS::Lexer::lexString(char16 separator)
{
String s;
char16orEOF ch;
reader.beginRecording(s);
while ((ch = reader.get()) != separator) {
CharInfo chi(char16orEOFToChar16(ch));
if (!isFormat(chi)) {
if (ch == '\\')
ch = lexEscape(false);
else if (ch == char16eof || isLineBreak(chi))
syntaxError("Unterminated string literal");
reader.recordChar(char16orEOFToChar16(ch));
}
}
reader.endRecording();
return s;
}
// Read a regular expression literal. Store the regular expression in nextToken->identifier
// and the flags in nextToken->flags.
// The opening slash has already been read.
void JS::Lexer::lexRegExp()
{
String s;
char16orEOF prevCh = 0;
reader.beginRecording(s);
while (true) {
char16orEOF ch = getChar();
CharInfo chi(char16orEOFToChar16(ch));
if (ch == char16eof || isLineBreak(chi))
syntaxError("Unterminated regular expression literal");
if (prevCh == '\\') {
reader.recordChar(char16orEOFToChar16(ch));
prevCh = 0; // Ignore slashes and backslashes immediately after a \
} else if (ch != '/') {
reader.recordChar(char16orEOFToChar16(ch));
prevCh = ch;
} else
break;
}
reader.endRecording();
nextToken->identifier = &world.identifiers[s];
String flags;
lexIdentifier(flags, true);
nextToken->setChars(flags);
}
// Read a token from the Reader and store it at *nextToken.
// If the Reader reached the end of file, store a Token whose Kind is End.
void JS::Lexer::lexToken(bool preferRegExp)
{
}
Token &t = *nextToken;
t.lineBreak = false;
t.identifier = 0;
t.chars.reset();
t.value = 0;
Token::Kind kind;
next:
char16orEOF ch = reader.get();
char16orEOF ch2;
CharInfo chi(char16orEOFToChar16(ch));
switch (cGroup(chi)) {
case CharInfo::FormatGroup:
case CharInfo::WhiteGroup:
goto next;
case CharInfo::IdGroup:
t.charPos = reader.charPos() - 1;
readIdentifier:
{
reader.unget();
String s;
bool hasEscape = lexIdentifier(s, false);
t.identifier = &world.identifiers[s];
kind = hasEscape ? Token::Id : t.identifier->tokenKind;
}
break;
case CharInfo::NonIdGroup:
case CharInfo::IdContinueGroup:
t.charPos = reader.charPos() - 1;
switch (ch) {
case '(':
kind = Token::OpenParenthesis; // (
break;
case ')':
kind = Token::CloseParenthesis; // )
break;
case '[':
kind = Token::OpenBracket; // [
break;
case ']':
kind = Token::CloseBracket; // ]
break;
case '{':
kind = Token::OpenBrace; // {
break;
case '}':
kind = Token::CloseBrace; // }
break;
case ',':
kind = Token::Comma; // ,
break;
case ';':
kind = Token::Semicolon; // ;
break;
case '.':
kind = Token::Dot; // .
ch2 = getChar();
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
reader.backUpTo(t.charPos);
goto number; // decimal point
} else if (ch2 == '.') {
kind = Token::DoubleDot; // ..
if (testChar('.'))
kind = Token::TripleDot; // ...
} else
reader.unget();
break;
case ':':
kind = Token::Colon; // :
if (testChar(':'))
kind = Token::DoubleColon; // ::
break;
case '#':
kind = Token::Pound; // #
break;
case '@':
kind = Token::At; // @
break;
case '?':
kind = Token::Question; // ?
break;
case '~':
kind = Token::Complement; // ~
break;
case '!':
kind = Token::Not; // !
if (testChar('=')) {
kind = Token::NotEqual; // !=
if (testChar('='))
kind = Token::NotIdentical; // !==
}
break;
case '*':
kind = Token::Times; // * *=
tryAssignment:
if (testChar('='))
kind = Token::Kind(kind + Token::TimesEquals - Token::Times);
break;
case '/':
kind = Token::Divide; // /
ch = getChar();
if (ch == '/') { // // comment
do {
ch = reader.get();
if (ch == char16eof)
goto endOfInput;
} while (!isLineBreak(char16orEOFToChar16(ch)));
goto endOfLine;
} else if (ch == '*') { // /* comment */
ch = 0;
do {
ch2 = ch;
ch = getChar();
if (isLineBreak(char16orEOFToChar16(ch))) {
reader.beginLine();
++lineNum;
t.lineBreak = true;
}
if (ch == char16eof)
syntaxError("Unterminated /* comment");
} while (ch != '/' || ch2 != '*');
goto next;
} else {
reader.unget();
if (preferRegExp) { // Regular expression
kind = Token::RegExp;
lexRegExp();
} else
goto tryAssignment; // /=
}
break;
case '%':
kind = Token::Modulo; // %
goto tryAssignment; // %=
case '+':
kind = Token::Plus; // +
if (testChar('+'))
kind = Token::Increment; // ++
else
goto tryAssignment; // +=
break;
case '-':
kind = Token::Minus; // -
ch = getChar();
if (ch == '-')
kind = Token::Decrement; // --
else if (ch == '>')
kind = Token::Arrow; // ->
else {
reader.unget();
goto tryAssignment; // -=
}
break;
case '&':
kind = Token::And; // & && &= &&=
logical:
if (testChar(char16orEOFToChar16(ch)))
kind = Token::Kind(kind - Token::And + Token::LogicalAnd);
goto tryAssignment;
case '^':
kind = Token::Xor; // ^ ^^ ^= ^^=
goto logical;
case '|':
kind = Token::Or; // | || |= ||=
goto logical;
case '=':
kind = Token::Assignment; // =
if (testChar('=')) {
kind = Token::Equal; // ==
if (testChar('='))
kind = Token::Identical; // ===
}
break;
case '<':
kind = Token::LessThan; // <
if (testChar('<')) {
kind = Token::LeftShift; // <<
goto tryAssignment; // <<=
}
comparison:
if (testChar('=')) // <= >=
kind = Token::Kind(kind + Token::LessThanOrEqual - Token::LessThan);
break;
case '>':
kind = Token::GreaterThan; // >
if (testChar('>')) {
kind = Token::RightShift; // >>
if (testChar('>'))
kind = Token::LogicalRightShift; // >>>
goto tryAssignment; // >>= >>>=
}
goto comparison;
case '\\':
goto readIdentifier; // An identifier that starts with an escape
case '\'':
case '"':
kind = Token::Str; // 'string' "string"
t.setChars(lexString(char16orEOFToChar16(ch)));
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
reader.unget(); // Number
number:
kind = Token::Num;
lexNumeral();
break;
case char16eof:
endOfInput:
kind = Token::End;
}
break;
case CharInfo::LineBreakGroup:
endOfLine:
reader.beginLine();
++lineNum;
t.lineBreak = true;
goto next;
}
t.kind = kind;
t.lineNum = lineNum;
}

Просмотреть файл

@ -21,23 +21,29 @@
#define parser_h
#include "utilities.h"
#include "world.h"
namespace JavaScript {
class StringAtom;
class World;
//
// Reader
//
// A Reader reads Unicode characters from some source -- either a file or a string.
// get() returns all of the characters followed by a ueof.
// get() returns all of the characters followed by a char16eof.
class Reader {
const char16 *begin; // Beginning of current buffer
const char16 *p; // Position in current buffer
const char16 *end; // End of current buffer
const char16 *markPos; // Pointer to mark in current buffer or null if no mark
uint32 nGetsPastEnd; // Number of times ueof has been returned
const char16 *lineStart; // Pointer to start of current line
uint32 nGetsPastEnd; // Number of times char16eof has been returned
String *recordString; // String, if any, into which recordChar() records characters
const char16 *recordBase; // Position of last beginRecording() call
const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred
protected:
Reader(): nGetsPastEnd(0) {}
public:
@ -46,54 +52,70 @@ namespace JavaScript {
Reader(const Reader&); // No copy constructor
void operator=(const Reader&); // No assignment operator
public:
#ifdef DEBUG
~Reader() {ASSERT(!markPos);}
#endif
wint_t get();
wint_t peek();
void unget();
char16orEOF get();
char16orEOF peek();
void unget(uint32 n = 1);
void mark();
void unmark();
void unmark(String &s);
bool marked() const {return markPos;}
void beginLine();
uint32 charPos() const;
void backUpTo(uint32 pos);
String extract(uint32 begin, uint32 end) const;
void beginRecording(String &recordString);
void recordChar(char16 ch);
String &endRecording();
virtual String sourceFile() const = 0; // A description of the source code that caused the error
protected:
void setBuffer(const char16 *begin, const char16 *p, const char16 *end);
virtual wint_t underflow();
wint_t peekUnderflow();
virtual char16orEOF underflow();
char16orEOF peekUnderflow();
};
// Get and return the next character or ueof if at end of input.
inline wint_t Reader::get()
// Get and return the next character or char16eof if at end of input.
inline char16orEOF Reader::get()
{
if (p != end)
return *p++;
return underflow();
}
// Return the next character without consuming it. Return ueof if at end of input.
inline wint_t Reader::peek()
// Return the next character without consuming it. Return char16eof if at end of input.
inline char16orEOF Reader::peek()
{
if (p != end)
return *p;
return peekUnderflow();
}
// Mark the current position in the Reader.
inline void Reader::mark()
// Set the beginning of the current line. unget cannot be subsequently called past this point.
inline void Reader::beginLine()
{
ASSERT(!markPos);
markPos = p;
lineStart = p;
#ifdef DEBUG
recordString = 0;
#endif
}
// Delete the Reader mark.
inline void Reader::unmark()
// Return the character offset relative to the current line. This cannot be called
// if the current position is past the end of the input.
inline uint32 Reader::charPos() const
{
ASSERT(markPos);
markPos = 0;
ASSERT(!nGetsPastEnd);
return static_cast<uint32>(p - lineStart);
}
// Back up to the given character offset relative to the current line.
inline void Reader::backUpTo(uint32 pos)
{
ASSERT(pos <= charPos());
p = lineStart + pos;
nGetsPastEnd = 0;
}
@ -103,14 +125,21 @@ namespace JavaScript {
Reader::begin = begin;
Reader::p = p;
Reader::end = end;
lineStart = begin;
#ifdef DEBUG
recordString = 0;
#endif
}
// A Reader that reads from a String.
class StringReader: public Reader {
const String str;
const String source;
public:
StringReader(const String &s);
StringReader(const String &s, const String &source);
String sourceFile() const;
};
@ -122,7 +151,6 @@ namespace JavaScript {
public:
enum Kind {
End, // End of token stream
Error, // Lexer error
Id, // Non-keyword identifier (may be same as a keyword if it contains an escape code)
Num, // Numeral
@ -165,12 +193,12 @@ namespace JavaScript {
LogicalAnd, // &&
LogicalXor, // ^^
LogicalOr, // ||
And, // &
And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr
Xor, // ^
Or, // |
Assignment, // =
TimesEquals, // *=
TimesEquals, // *= // These must be at constant offsets from Times ... Or
DivideEquals, // /=
ModuloEquals, // %=
PlusEquals, // +=
@ -189,7 +217,7 @@ namespace JavaScript {
NotEqual, // !=
LessThan, // <
LessThanOrEqual, // <=
GreaterThan, // >
GreaterThan, // > // >, >= must be at constant offsets from <, <=
GreaterThanOrEqual, // >=
Identical, // ===
NotIdentical, // !==
@ -268,13 +296,19 @@ namespace JavaScript {
StringAtom *identifier; // The token's characters (identifiers, keywords, and regular expressions only)
auto_ptr<String> chars; // The token's characters (strings, numbers, and regular expression flags only)
float64 value; // The token's value (numbers only)
void setChars(const String &s);
};
void initKeywords(World &world);
class Lexer {
static const int tokenBufferSize = 3; // Token lookahead buffer size
public:
Reader &reader;
World &world;
private:
Token tokens[tokenBufferSize]; // Circular buffer of recently read or lookahead tokens
Token *nextToken; // Address of next Token in the circular buffer to be returned by get()
@ -283,16 +317,31 @@ namespace JavaScript {
int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken
bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls
#endif
uint32 lineNum; // Current line number
bool lexingUnit; // True if lexing a unit identifier immediately following a number
public:
Lexer(Reader &reader);
Lexer(Reader &reader, World &world);
Token &get(bool preferRegExp);
const Token &peek(bool preferRegExp);
void unget();
private:
void syntaxError(const char *message, uint backUp = 1);
char16orEOF getChar();
char16orEOF internalGetChar(char16orEOF ch);
char16orEOF peekChar();
char16orEOF internalPeekChar(char16orEOF ch);
bool testChar(char16 ch);
char16 lexEscape(bool unicodeOnly);
bool lexIdentifier(String &s, bool allowLeadingDigit);
bool lexNumeral();
String lexString(char16 separator);
void lexRegExp();
void lexToken(bool preferRegExp);
public:
};
}
#endif

Просмотреть файл

@ -18,6 +18,8 @@
// Rights Reserved.
#include "parser.h"
#include "world.h"
namespace JS = JavaScript;
@ -28,75 +30,226 @@ namespace JS = JavaScript;
// Create a Reader reading characters from begin up to but not including end.
JS::Reader::Reader(const char16 *begin, const char16 *end):
begin(begin), p(begin), end(end), nGetsPastEnd(0)
begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0)
{
ASSERT(begin <= end);
#ifdef DEBUG
recordString = 0;
#endif
}
// Unread the last character.
void JS::Reader::unget()
// Unread the last n characters. unget cannot be called to back up past the position
// of the last call to beginLine().
void JS::Reader::unget(uint32 n)
{
if (nGetsPastEnd)
--nGetsPastEnd;
else {
ASSERT(p != begin);
--p;
if (nGetsPastEnd) {
if (nGetsPastEnd >= n) {
nGetsPastEnd -= n;
return;
}
n -= nGetsPastEnd;
nGetsPastEnd = 0;
}
ASSERT(p >= begin + n);
p -= n;
}
// Set s to the characters read in after the mark but before the current position
// and then delete the Reader mark.
void JS::Reader::unmark(String &s)
// Return the characters read in from position begin inclusive to position end
// exclusive relative to the current line. begin <= end <= charPos() is required.
JS::String JS::Reader::extract(uint32 begin, uint32 end) const
{
ASSERT(markPos);
s.assign(markPos, p);
markPos = 0;
ASSERT(begin <= end && end + nGetsPastEnd <= charPos());
return String(lineStart + begin, lineStart + end);
}
// Begin accumulating characters into the recordString. Each character passed
// to recordChar() is added to the end of the recordString. Recording ends when
// endRecord() or beginLine() is called.
// Recording is significantly optimized when the characters passed to readChar()
// are the same characters as read by get(). In this case the record String does
// not get allocated until endRecord() is called or a discrepancy appears between
// get() and recordChar().
void JS::Reader::beginRecording(String &recordString)
{
Reader::recordString = &recordString;
recordBase = p;
recordPos = p;
}
// Append ch to the recordString.
void JS::Reader::recordChar(char16 ch)
{
ASSERT(recordString);
if (recordPos) {
if (recordPos != end && *recordPos == ch) {
recordPos++;
return;
} else {
recordString->assign(recordBase, recordPos);
recordPos = 0;
}
}
*recordString += ch;
}
// Finish recording characters into the recordString that was last passed to beginRecording().
// Return that recordString.
JS::String &JS::Reader::endRecording()
{
String *rs = recordString;
ASSERT(rs);
if (recordPos)
rs->assign(recordBase, recordPos);
recordString = 0;
return *rs;
}
// Refill the source buffer after running off the end. Get and return
// the next character.
// The default implementation just returns ueof.
JS::wint_t JS::Reader::underflow()
// The default implementation just returns char16eof.
JS::char16orEOF JS::Reader::underflow()
{
++nGetsPastEnd;
return ueof;
return char16eof;
}
// Perform a peek when begin == end.
JS::wint_t JS::Reader::peekUnderflow()
JS::char16orEOF JS::Reader::peekUnderflow()
{
wint_t ch = underflow();
char16orEOF ch = underflow();
unget();
return ch;
}
// Create a StringReader reading characters from a copy of the given String.
JS::StringReader::StringReader(const String &s):
str(s)
// Create a StringReader reading characters from s.
// source describes the origin of string s and may be used for error messages.
JS::StringReader::StringReader(const String &s, const String &source):
str(s), source(source)
{
const char16 *begin = str.data();
setBuffer(begin, begin, begin + str.size());
}
JS::String JS::StringReader::sourceFile() const
{
return source;
}
//
// Lexer
//
// Create a new Lexer using the provided Reader.
JS::Lexer::Lexer(Reader &reader): reader(reader)
void JS::Token::setChars(const String &s)
{
chars = static_cast<auto_ptr<String> >(new String(s));
}
struct KeywordInit {
const char *name; // Null-terminated ASCII name of keyword
JS::Token::Kind tokenKind; // Keyword's number
};
static KeywordInit keywordInits[] = {
// Reserved words
{"abstract", JS::Token::Abstract},
{"abstract", JS::Token::Abstract},
{"break", JS::Token::Break},
{"case", JS::Token::Case},
{"catch", JS::Token::Catch},
{"class", JS::Token::Class},
{"const", JS::Token::Const},
{"continue", JS::Token::Continue},
{"debugger", JS::Token::Debugger},
{"default", JS::Token::Default},
{"delete", JS::Token::Delete},
{"do", JS::Token::Do},
{"else", JS::Token::Else},
{"enum", JS::Token::Enum},
{"eval", JS::Token::Eval},
{"export", JS::Token::Export},
{"extends", JS::Token::Extends},
{"false", JS::Token::False},
{"final", JS::Token::Final},
{"finally", JS::Token::Finally},
{"for", JS::Token::For},
{"function", JS::Token::Function},
{"goto", JS::Token::Goto},
{"if", JS::Token::If},
{"implements", JS::Token::Implements},
{"import", JS::Token::Import},
{"in", JS::Token::In},
{"instanceof", JS::Token::Instanceof},
{"native", JS::Token::Native},
{"new", JS::Token::New},
{"null", JS::Token::Null},
{"package", JS::Token::Package},
{"private", JS::Token::Private},
{"protected", JS::Token::Protected},
{"public", JS::Token::Public},
{"return", JS::Token::Return},
{"static", JS::Token::Static},
{"super", JS::Token::Super},
{"switch", JS::Token::Switch},
{"synchronized", JS::Token::Synchronized},
{"this", JS::Token::This},
{"throw", JS::Token::Throw},
{"throws", JS::Token::Throws},
{"transient", JS::Token::Transient},
{"true", JS::Token::True},
{"try", JS::Token::Try},
{"typeof", JS::Token::Typeof},
{"var", JS::Token::Var},
{"volatile", JS::Token::Volatile},
{"while", JS::Token::While},
{"with", JS::Token::With},
// Non-reserved words
{"box", JS::Token::Box},
{"constructor", JS::Token::Constructor},
{"field", JS::Token::Field},
{"get", JS::Token::Get},
{"language", JS::Token::Language},
{"local", JS::Token::Local},
{"method", JS::Token::Method},
{"override", JS::Token::Override},
{"set", JS::Token::Set},
{"version", JS::Token::Version}
};
// Initialize the keywords in the given world.
void JS::initKeywords(World &world)
{
KeywordInit *ki = keywordInits;
KeywordInit *kiEnd = keywordInits + sizeof(keywordInits)/sizeof(KeywordInit);
for (; ki != kiEnd; ++ki)
world.identifiers[widenCString(ki->name)].tokenKind = ki->tokenKind;
}
// Create a new Lexer using the provided Reader and interning identifiers, keywords, and regular
// expressions in the designated world.
JS::Lexer::Lexer(Reader &reader, World &world): reader(reader), world(world)
{
nextToken = tokens;
nTokensFwd = 0;
#ifdef DEBUG
nTokensBack = 0;
#endif
lineNum = 1;
lexingUnit = false;
}
@ -156,9 +309,536 @@ void JS::Lexer::unget()
}
// Report a syntax error at the backUp-th last character read by the Reader.
// In other words, if backUp is 0, the error is at the next character to be read by the Reader;
// if backUp is 1, the error is at the last character read by the Reader, and so forth.
void JS::Lexer::syntaxError(const char *message, uint backUp)
{
reader.unget(backUp);
uint32 charPos = reader.charPos();
char16orEOF ch;
do {
ch = reader.get();
} while (ch != char16eof && !isLineBreak(char16orEOFToChar16(ch)));
reader.unget();
Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), lineNum, charPos,
reader.extract(0, reader.charPos()));
throw e;
}
// Get the next character from the reader, skipping any Unicode format-control (Cf) characters.
inline JS::char16orEOF JS::Lexer::getChar()
{
char16orEOF ch = reader.get();
if (static_cast<uint32>(ch) >= firstFormatChar)
ch = internalGetChar(ch);
return ch;
}
// Helper for getChar()
JS::char16orEOF JS::Lexer::internalGetChar(char16orEOF ch)
{
while (isFormat(char16orEOFToChar16(ch)))
ch = reader.get();
return ch;
}
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
// which are read and discarded.
inline JS::char16orEOF JS::Lexer::peekChar()
{
char16orEOF ch = reader.peek();
if (static_cast<uint32>(ch) >= firstFormatChar)
ch = internalPeekChar(ch);
return ch;
}
// Helper for peekChar()
JS::char16orEOF JS::Lexer::internalPeekChar(char16orEOF ch)
{
while (isFormat(char16orEOFToChar16(ch))) {
reader.get();
ch = reader.peek();
}
return ch;
}
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
// which are read and discarded. If the peeked character matches ch, read that character and return true;
// otherwise return false.
bool JS::Lexer::testChar(char16 ch)
{
char16orEOF ch2 = peekChar();
if (ch == ch2) {
reader.get();
return true;
}
return false;
}
// A backslash has been read. Read the rest of the escape code.
// Return the interpreted escaped character. Throw an exception if the escape is not valid.
// If unicodeOnly is true, allow only \uxxxx escapes.
char16 JS::Lexer::lexEscape(bool unicodeOnly)
{
char16orEOF ch = getChar();
int nDigits;
if (!unicodeOnly || ch == 'u')
switch (ch) {
case '0':
// Make sure that the next character isn't a digit.
ch = peekChar();
if (!isASCIIDecimalDigit(char16orEOFToChar16(ch)))
return 0x00;
getChar(); // Point to the next character in the error message
case 'b':
return 0x08;
case 'f':
return 0x0C;
case 'n':
return 0x0A;
case 'r':
return 0x0D;
case 't':
return 0x09;
case 'v':
return 0x0B;
case 'x':
nDigits = 2;
goto lexHex;
case 'u':
nDigits = 4;
lexHex:
{
uint32 n = 0;
while (nDigits--) {
ch = getChar();
uint digit;
if (!isASCIIHexDigit(char16orEOFToChar16(ch), digit))
goto error;
n = (n << 4) | digit;
}
return char16(n);
}
default:
if (ch != char16eof) {
CharInfo chi(char16orEOFToChar16(ch));
if (!isAlphanumeric(chi) && !isLineBreak(chi))
return char16orEOFToChar16(ch);
}
}
error:
syntaxError("Bad escape code");
return 0;
}
// Read an identifier into s. Return true if an escape code has been encountered.
// If allowLeadingDigit is true, allow the first character of s to be a digit, just like any
// continuing identifier character.
bool JS::Lexer::lexIdentifier(String &s, bool allowLeadingDigit)
{
reader.beginRecording(s);
bool hasEscape = false;
while (true) {
char16orEOF ch = getChar();
char16orEOF ch2 = ch;
if (ch == '\\') {
ch2 = lexEscape(true);
hasEscape = true;
}
CharInfo chi2(char16orEOFToChar16(ch2));
if (!(allowLeadingDigit ? isIdContinuing(chi2) : isIdLeading(chi2))) {
if (ch == '\\')
syntaxError("Identifier escape expands into non-identifier character");
else
reader.unget();
break;
}
reader.recordChar(char16orEOFToChar16(ch2));
allowLeadingDigit = true;
}
reader.endRecording();
return hasEscape;
}
// Read a numeric literal into nextToken->chars and nextToken->value.
// Return true if the numeric literal is followed by a unit, but don't read the unit yet.
bool JS::Lexer::lexNumeral()
{
int radix = 10;
int hasDecimalPoint = 0;
String s;
uint digit;
reader.beginRecording(s);
char16orEOF ch = getChar();
if (ch == '0') {
reader.recordChar('0');
ch = getChar();
if (ch&~0x20 == 'X') {
uint32 pos = reader.charPos();
char16orEOF ch2 = getChar();
if (isASCIIHexDigit(char16orEOFToChar16(ch2), digit)) {
reader.recordChar(char16orEOFToChar16(ch));
do {
reader.recordChar(char16orEOFToChar16(ch2));
ch2 = getChar();
} while (isASCIIHexDigit(char16orEOFToChar16(ch2), digit));
ch = ch2;
} else
reader.backUpTo(pos);
goto done;
} else if (isASCIIDecimalDigit(char16orEOFToChar16(ch))) {
syntaxError("Numeric constant syntax error");
}
}
while (isASCIIDecimalDigit(char16orEOFToChar16(ch)) || ch == '.' && !hasDecimalPoint++) {
reader.recordChar(char16orEOFToChar16(ch));
ch = getChar();
}
if (ch&~0x20 == 'E') {
uint32 pos = reader.charPos();
char16orEOF ch2 = getChar();
char16 sign = 0;
if (ch2 == '+' || ch2 == '-') {
sign = char16orEOFToChar16(ch2);
ch2 = getChar();
}
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
reader.recordChar(char16orEOFToChar16(ch));
if (sign)
reader.recordChar(sign);
do {
reader.recordChar(char16orEOFToChar16(ch2));
ch2 = getChar();
} while (isASCIIDecimalDigit(char16orEOFToChar16(ch2)));
ch = ch2;
} else
reader.backUpTo(pos);
}
done:
// At this point the reader is just past the character ch, which is the first non-formatting character
// that is not part of the number.
reader.endRecording();
nextToken->setChars(s);
reader.unget();
ASSERT(ch == reader.peek());
return isIdContinuing(char16orEOFToChar16(ch)) || ch == '\\';
}
// Read a string literal into a String and return that String.
// The opening quote has already been read into separator.
JS::String JS::Lexer::lexString(char16 separator)
{
String s;
char16orEOF ch;
reader.beginRecording(s);
while ((ch = reader.get()) != separator) {
CharInfo chi(char16orEOFToChar16(ch));
if (!isFormat(chi)) {
if (ch == '\\')
ch = lexEscape(false);
else if (ch == char16eof || isLineBreak(chi))
syntaxError("Unterminated string literal");
reader.recordChar(char16orEOFToChar16(ch));
}
}
reader.endRecording();
return s;
}
// Read a regular expression literal. Store the regular expression in nextToken->identifier
// and the flags in nextToken->flags.
// The opening slash has already been read.
void JS::Lexer::lexRegExp()
{
String s;
char16orEOF prevCh = 0;
reader.beginRecording(s);
while (true) {
char16orEOF ch = getChar();
CharInfo chi(char16orEOFToChar16(ch));
if (ch == char16eof || isLineBreak(chi))
syntaxError("Unterminated regular expression literal");
if (prevCh == '\\') {
reader.recordChar(char16orEOFToChar16(ch));
prevCh = 0; // Ignore slashes and backslashes immediately after a \
} else if (ch != '/') {
reader.recordChar(char16orEOFToChar16(ch));
prevCh = ch;
} else
break;
}
reader.endRecording();
nextToken->identifier = &world.identifiers[s];
String flags;
lexIdentifier(flags, true);
nextToken->setChars(flags);
}
// Read a token from the Reader and store it at *nextToken.
// If the Reader reached the end of file, store a Token whose Kind is End.
void JS::Lexer::lexToken(bool preferRegExp)
{
}
Token &t = *nextToken;
t.lineBreak = false;
t.identifier = 0;
t.chars.reset();
t.value = 0;
Token::Kind kind;
next:
char16orEOF ch = reader.get();
char16orEOF ch2;
CharInfo chi(char16orEOFToChar16(ch));
switch (cGroup(chi)) {
case CharInfo::FormatGroup:
case CharInfo::WhiteGroup:
goto next;
case CharInfo::IdGroup:
t.charPos = reader.charPos() - 1;
readIdentifier:
{
reader.unget();
String s;
bool hasEscape = lexIdentifier(s, false);
t.identifier = &world.identifiers[s];
kind = hasEscape ? Token::Id : t.identifier->tokenKind;
}
break;
case CharInfo::NonIdGroup:
case CharInfo::IdContinueGroup:
t.charPos = reader.charPos() - 1;
switch (ch) {
case '(':
kind = Token::OpenParenthesis; // (
break;
case ')':
kind = Token::CloseParenthesis; // )
break;
case '[':
kind = Token::OpenBracket; // [
break;
case ']':
kind = Token::CloseBracket; // ]
break;
case '{':
kind = Token::OpenBrace; // {
break;
case '}':
kind = Token::CloseBrace; // }
break;
case ',':
kind = Token::Comma; // ,
break;
case ';':
kind = Token::Semicolon; // ;
break;
case '.':
kind = Token::Dot; // .
ch2 = getChar();
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
reader.backUpTo(t.charPos);
goto number; // decimal point
} else if (ch2 == '.') {
kind = Token::DoubleDot; // ..
if (testChar('.'))
kind = Token::TripleDot; // ...
} else
reader.unget();
break;
case ':':
kind = Token::Colon; // :
if (testChar(':'))
kind = Token::DoubleColon; // ::
break;
case '#':
kind = Token::Pound; // #
break;
case '@':
kind = Token::At; // @
break;
case '?':
kind = Token::Question; // ?
break;
case '~':
kind = Token::Complement; // ~
break;
case '!':
kind = Token::Not; // !
if (testChar('=')) {
kind = Token::NotEqual; // !=
if (testChar('='))
kind = Token::NotIdentical; // !==
}
break;
case '*':
kind = Token::Times; // * *=
tryAssignment:
if (testChar('='))
kind = Token::Kind(kind + Token::TimesEquals - Token::Times);
break;
case '/':
kind = Token::Divide; // /
ch = getChar();
if (ch == '/') { // // comment
do {
ch = reader.get();
if (ch == char16eof)
goto endOfInput;
} while (!isLineBreak(char16orEOFToChar16(ch)));
goto endOfLine;
} else if (ch == '*') { // /* comment */
ch = 0;
do {
ch2 = ch;
ch = getChar();
if (isLineBreak(char16orEOFToChar16(ch))) {
reader.beginLine();
++lineNum;
t.lineBreak = true;
}
if (ch == char16eof)
syntaxError("Unterminated /* comment");
} while (ch != '/' || ch2 != '*');
goto next;
} else {
reader.unget();
if (preferRegExp) { // Regular expression
kind = Token::RegExp;
lexRegExp();
} else
goto tryAssignment; // /=
}
break;
case '%':
kind = Token::Modulo; // %
goto tryAssignment; // %=
case '+':
kind = Token::Plus; // +
if (testChar('+'))
kind = Token::Increment; // ++
else
goto tryAssignment; // +=
break;
case '-':
kind = Token::Minus; // -
ch = getChar();
if (ch == '-')
kind = Token::Decrement; // --
else if (ch == '>')
kind = Token::Arrow; // ->
else {
reader.unget();
goto tryAssignment; // -=
}
break;
case '&':
kind = Token::And; // & && &= &&=
logical:
if (testChar(char16orEOFToChar16(ch)))
kind = Token::Kind(kind - Token::And + Token::LogicalAnd);
goto tryAssignment;
case '^':
kind = Token::Xor; // ^ ^^ ^= ^^=
goto logical;
case '|':
kind = Token::Or; // | || |= ||=
goto logical;
case '=':
kind = Token::Assignment; // =
if (testChar('=')) {
kind = Token::Equal; // ==
if (testChar('='))
kind = Token::Identical; // ===
}
break;
case '<':
kind = Token::LessThan; // <
if (testChar('<')) {
kind = Token::LeftShift; // <<
goto tryAssignment; // <<=
}
comparison:
if (testChar('=')) // <= >=
kind = Token::Kind(kind + Token::LessThanOrEqual - Token::LessThan);
break;
case '>':
kind = Token::GreaterThan; // >
if (testChar('>')) {
kind = Token::RightShift; // >>
if (testChar('>'))
kind = Token::LogicalRightShift; // >>>
goto tryAssignment; // >>= >>>=
}
goto comparison;
case '\\':
goto readIdentifier; // An identifier that starts with an escape
case '\'':
case '"':
kind = Token::Str; // 'string' "string"
t.setChars(lexString(char16orEOFToChar16(ch)));
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
reader.unget(); // Number
number:
kind = Token::Num;
lexNumeral();
break;
case char16eof:
endOfInput:
kind = Token::End;
}
break;
case CharInfo::LineBreakGroup:
endOfLine:
reader.beginLine();
++lineNum;
t.lineBreak = true;
goto next;
}
t.kind = kind;
t.lineNum = lineNum;
}

Просмотреть файл

@ -21,23 +21,29 @@
#define parser_h
#include "utilities.h"
#include "world.h"
namespace JavaScript {
class StringAtom;
class World;
//
// Reader
//
// A Reader reads Unicode characters from some source -- either a file or a string.
// get() returns all of the characters followed by a ueof.
// get() returns all of the characters followed by a char16eof.
class Reader {
const char16 *begin; // Beginning of current buffer
const char16 *p; // Position in current buffer
const char16 *end; // End of current buffer
const char16 *markPos; // Pointer to mark in current buffer or null if no mark
uint32 nGetsPastEnd; // Number of times ueof has been returned
const char16 *lineStart; // Pointer to start of current line
uint32 nGetsPastEnd; // Number of times char16eof has been returned
String *recordString; // String, if any, into which recordChar() records characters
const char16 *recordBase; // Position of last beginRecording() call
const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred
protected:
Reader(): nGetsPastEnd(0) {}
public:
@ -46,54 +52,70 @@ namespace JavaScript {
Reader(const Reader&); // No copy constructor
void operator=(const Reader&); // No assignment operator
public:
#ifdef DEBUG
~Reader() {ASSERT(!markPos);}
#endif
wint_t get();
wint_t peek();
void unget();
char16orEOF get();
char16orEOF peek();
void unget(uint32 n = 1);
void mark();
void unmark();
void unmark(String &s);
bool marked() const {return markPos;}
void beginLine();
uint32 charPos() const;
void backUpTo(uint32 pos);
String extract(uint32 begin, uint32 end) const;
void beginRecording(String &recordString);
void recordChar(char16 ch);
String &endRecording();
virtual String sourceFile() const = 0; // A description of the source code that caused the error
protected:
void setBuffer(const char16 *begin, const char16 *p, const char16 *end);
virtual wint_t underflow();
wint_t peekUnderflow();
virtual char16orEOF underflow();
char16orEOF peekUnderflow();
};
// Get and return the next character or ueof if at end of input.
inline wint_t Reader::get()
// Get and return the next character or char16eof if at end of input.
inline char16orEOF Reader::get()
{
if (p != end)
return *p++;
return underflow();
}
// Return the next character without consuming it. Return ueof if at end of input.
inline wint_t Reader::peek()
// Return the next character without consuming it. Return char16eof if at end of input.
inline char16orEOF Reader::peek()
{
if (p != end)
return *p;
return peekUnderflow();
}
// Mark the current position in the Reader.
inline void Reader::mark()
// Set the beginning of the current line. unget cannot be subsequently called past this point.
inline void Reader::beginLine()
{
ASSERT(!markPos);
markPos = p;
lineStart = p;
#ifdef DEBUG
recordString = 0;
#endif
}
// Delete the Reader mark.
inline void Reader::unmark()
// Return the character offset relative to the current line. This cannot be called
// if the current position is past the end of the input.
inline uint32 Reader::charPos() const
{
ASSERT(markPos);
markPos = 0;
ASSERT(!nGetsPastEnd);
return static_cast<uint32>(p - lineStart);
}
// Back up to the given character offset relative to the current line.
inline void Reader::backUpTo(uint32 pos)
{
ASSERT(pos <= charPos());
p = lineStart + pos;
nGetsPastEnd = 0;
}
@ -103,14 +125,21 @@ namespace JavaScript {
Reader::begin = begin;
Reader::p = p;
Reader::end = end;
lineStart = begin;
#ifdef DEBUG
recordString = 0;
#endif
}
// A Reader that reads from a String.
class StringReader: public Reader {
const String str;
const String source;
public:
StringReader(const String &s);
StringReader(const String &s, const String &source);
String sourceFile() const;
};
@ -122,7 +151,6 @@ namespace JavaScript {
public:
enum Kind {
End, // End of token stream
Error, // Lexer error
Id, // Non-keyword identifier (may be same as a keyword if it contains an escape code)
Num, // Numeral
@ -165,12 +193,12 @@ namespace JavaScript {
LogicalAnd, // &&
LogicalXor, // ^^
LogicalOr, // ||
And, // &
And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr
Xor, // ^
Or, // |
Assignment, // =
TimesEquals, // *=
TimesEquals, // *= // These must be at constant offsets from Times ... Or
DivideEquals, // /=
ModuloEquals, // %=
PlusEquals, // +=
@ -189,7 +217,7 @@ namespace JavaScript {
NotEqual, // !=
LessThan, // <
LessThanOrEqual, // <=
GreaterThan, // >
GreaterThan, // > // >, >= must be at constant offsets from <, <=
GreaterThanOrEqual, // >=
Identical, // ===
NotIdentical, // !==
@ -268,13 +296,19 @@ namespace JavaScript {
StringAtom *identifier; // The token's characters (identifiers, keywords, and regular expressions only)
auto_ptr<String> chars; // The token's characters (strings, numbers, and regular expression flags only)
float64 value; // The token's value (numbers only)
void setChars(const String &s);
};
void initKeywords(World &world);
class Lexer {
static const int tokenBufferSize = 3; // Token lookahead buffer size
public:
Reader &reader;
World &world;
private:
Token tokens[tokenBufferSize]; // Circular buffer of recently read or lookahead tokens
Token *nextToken; // Address of next Token in the circular buffer to be returned by get()
@ -283,16 +317,31 @@ namespace JavaScript {
int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken
bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls
#endif
uint32 lineNum; // Current line number
bool lexingUnit; // True if lexing a unit identifier immediately following a number
public:
Lexer(Reader &reader);
Lexer(Reader &reader, World &world);
Token &get(bool preferRegExp);
const Token &peek(bool preferRegExp);
void unget();
private:
void syntaxError(const char *message, uint backUp = 1);
char16orEOF getChar();
char16orEOF internalGetChar(char16orEOF ch);
char16orEOF peekChar();
char16orEOF internalPeekChar(char16orEOF ch);
bool testChar(char16 ch);
char16 lexEscape(bool unicodeOnly);
bool lexIdentifier(String &s, bool allowLeadingDigit);
bool lexNumeral();
String lexString(char16 separator);
void lexRegExp();
void lexToken(bool preferRegExp);
public:
};
}
#endif