// -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- // // The contents of this file are subject to the Netscape Public // License Version 1.1 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of // the License at http://www.mozilla.org/NPL/ // // Software distributed under the License is distributed on an "AS // IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr // implied. See the License for the specific language governing // rights and limitations under the License. // // The Original Code is the JavaScript 2 Prototype. // // The Initial Developer of the Original Code is Netscape // Communications Corporation. Portions created by Netscape are // Copyright (C) 1998 Netscape Communications Corporation. All // Rights Reserved. #ifndef parser_h #define parser_h #include "utilities.h" namespace JavaScript { class StringAtom; class World; // // Reader // // A Reader reads Unicode characters from some source -- either a file or a string. // get() returns all of the characters followed by a char16eof. class Reader { const char16 *begin; // Beginning of current buffer const char16 *p; // Position in current buffer const char16 *end; // End of current buffer const char16 *lineStart; // Pointer to start of current line uint32 nGetsPastEnd; // Number of times char16eof has been returned String *recordString; // String, if any, into which recordChar() records characters const char16 *recordBase; // Position of last beginRecording() call const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred protected: Reader(): nGetsPastEnd(0) {} public: Reader(const char16 *begin, const char16 *end); private: Reader(const Reader&); // No copy constructor void operator=(const Reader&); // No assignment operator public: char16orEOF get(); char16orEOF peek(); void unget(uint32 n = 1); void beginLine(); uint32 charPos() const; void backUpTo(uint32 pos); String extract(uint32 begin, uint32 end) const; void beginRecording(String &recordString); void recordChar(char16 ch); String &endRecording(); virtual String sourceFile() const = 0; // A description of the source code that caused the error protected: void setBuffer(const char16 *begin, const char16 *p, const char16 *end); virtual char16orEOF underflow(); char16orEOF peekUnderflow(); }; // Get and return the next character or char16eof if at end of input. inline char16orEOF Reader::get() { if (p != end) return *p++; return underflow(); } // Return the next character without consuming it. Return char16eof if at end of input. inline char16orEOF Reader::peek() { if (p != end) return *p; return peekUnderflow(); } // Set the beginning of the current line. unget cannot be subsequently called past this point. inline void Reader::beginLine() { lineStart = p; #ifdef DEBUG recordString = 0; #endif } // Return the character offset relative to the current line. This cannot be called // if the current position is past the end of the input. inline uint32 Reader::charPos() const { ASSERT(!nGetsPastEnd); return static_cast(p - lineStart); } // Back up to the given character offset relative to the current line. inline void Reader::backUpTo(uint32 pos) { ASSERT(pos <= charPos()); p = lineStart + pos; nGetsPastEnd = 0; } inline void Reader::setBuffer(const char16 *begin, const char16 *p, const char16 *end) { ASSERT(begin <= p && p <= end); Reader::begin = begin; Reader::p = p; Reader::end = end; lineStart = begin; #ifdef DEBUG recordString = 0; #endif } // A Reader that reads from a String. class StringReader: public Reader { const String str; const String source; public: StringReader(const String &s, const String &source); String sourceFile() const; }; // // Lexer // class Token { static const char *const kindNames[]; public: enum Kind { // Keep synchronized with kindNames table // Special End, // End of token stream Id, // Non-keyword identifier (may be same as a keyword if it contains an escape code) Num, // Numeral Str, // String Unit, // Unit after numeral RegExp, // Regular expression // Punctuators OpenParenthesis, // ( CloseParenthesis, // ) OpenBracket, // [ CloseBracket, // ] OpenBrace, // { CloseBrace, // } Comma, // , Semicolon, // ; Dot, // . DoubleDot, // .. TripleDot, // ... Arrow, // -> Colon, // : DoubleColon, // :: Pound, // # At, // @ Increment, // ++ Decrement, // -- Complement, // ~ Not, // ! Times, // * Divide, // / Modulo, // % Plus, // + Minus, // - LeftShift, // << RightShift, // >> LogicalRightShift, // >>> LogicalAnd, // && LogicalXor, // ^^ LogicalOr, // || And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr Xor, // ^ Or, // | Assignment, // = TimesEquals, // *= // These must be at constant offsets from Times ... Or DivideEquals, // /= ModuloEquals, // %= PlusEquals, // += MinusEquals, // -= LeftShiftEquals, // <<= RightShiftEquals, // >>= LogicalRightShiftEquals, // >>>= LogicalAndEquals, // &&= LogicalXorEquals, // ^^= LogicalOrEquals, // ||= AndEquals, // &= XorEquals, // ^= OrEquals, // |= Equal, // == NotEqual, // != LessThan, // < LessThanOrEqual, // <= GreaterThan, // > // >, >= must be at constant offsets from <, <= GreaterThanOrEqual, // >= Identical, // === NotIdentical, // !== Question, // ? // Reserved words Abstract, // abstract Break, // break Case, // case Catch, // catch Class, // class Const, // const Continue, // continue Debugger, // debugger Default, // default Delete, // delete Do, // do Else, // else Enum, // enum Eval, // eval Export, // export Extends, // extends False, // false Final, // final Finally, // finally For, // for Function, // function Goto, // goto If, // if Implements, // implements Import, // import In, // in Instanceof, // instanceof Native, // native New, // new Null, // null Package, // package Private, // private Protected, // protected Public, // public Return, // return Static, // static Super, // super Switch, // switch Synchronized, // synchronized This, // this Throw, // throw Throws, // throws Transient, // transient True, // true Try, // try Typeof, // typeof Var, // var Volatile, // volatile While, // while With, // with // Non-reserved words Box, // box Constructor, // constructor Field, // field Get, // get Language, // language Local, // local Method, // method Override, // override Set, // set Version, // version KeywordsEnd, // End of range of special identifier tokens KeywordsBegin = Abstract, // Beginning of range of special identifier tokens KindsEnd = KeywordsEnd // End of token kinds }; Kind kind; // The token's kind bool lineBreak; // True if line break precedes this token uint32 lineNum; // One-based source line number uint32 charPos; // Zero-based character offset of this token in source line StringAtom *identifier; // The token's characters; non-null for identifiers, keywords, and regular expressions only String chars; // The token's characters; valid for strings, units, numbers, and regular expression flags only float64 value; // The token's value (numbers only) static void initKeywords(World &world); friend String &operator+=(String &s, Kind k) {ASSERT(uint(k) < KindsEnd); return s += kindNames[k];} friend String &operator+=(String &s, const Token &t) {t.print(s); return s;} void print(String &dst, bool debug = false) const; }; class Lexer { enum {tokenBufferSize = 3}; // Token lookahead buffer size public: Reader &reader; World &world; private: Token tokens[tokenBufferSize]; // Circular buffer of recently read or lookahead tokens Token *nextToken; // Address of next Token in the circular buffer to be returned by get() int nTokensFwd; // Net number of Tokens on which unget() has been called; these Tokens are ahead of nextToken #ifdef DEBUG int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls #endif uint32 lineNum; // Current line number bool lexingUnit; // True if lexing a unit identifier immediately following a number public: Lexer(Reader &reader, World &world); Token &get(bool preferRegExp); const Token &peek(bool preferRegExp); void unget(); private: void syntaxError(const char *message, uint backUp = 1); char16orEOF getChar(); char16orEOF internalGetChar(char16orEOF ch); char16orEOF peekChar(); char16orEOF internalPeekChar(char16orEOF ch); bool testChar(char16 ch); char16 lexEscape(bool unicodeOnly); bool lexIdentifier(String &s, bool allowLeadingDigit); bool lexNumeral(); void lexString(String &s, char16 separator); void lexRegExp(); void lexToken(bool preferRegExp); public: }; } #endif