gecko-dev/js/narcissus/jslex.js

/* vim: set sw=4 ts=4 et tw=78: */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is the Narcissus JavaScript engine.
 *
 * The Initial Developer of the Original Code is
 * Brendan Eich <brendan@mozilla.org>.
 * Portions created by the Initial Developer are Copyright (C) 2004
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

/*
 * Narcissus - JS implemented in JS.
 *
 * Lexical scanner.
 */

Narcissus.jslex = (function() {

    var jsdefs = Narcissus.jsdefs;
    
    // Set constants in the local scope.
    eval(jsdefs.consts);
    
    // Build up a trie of operator tokens.
    var opTokens = {};
    for (var op in jsdefs.opTypeNames) {
        if (op === '\n' || op === '.')
            continue;
    
        var node = opTokens;
        for (var i = 0; i < op.length; i++) {
            var ch = op[i];
            if (!(ch in node))
                node[ch] = {};
            node = node[ch];
            node.op = op;
        }
    }
    
    /*
     * Tokenizer :: (file ptr, path, line number) -> Tokenizer
     */
    function Tokenizer(s, f, l) {
        this.cursor = 0;
        this.source = String(s);
        this.tokens = [];
        this.tokenIndex = 0;
        this.lookahead = 0;
        this.scanNewlines = false;
        this.filename = f || "";
        this.lineno = l || 1;
    }
    
    Tokenizer.prototype = {
        get done() {
            // We need to set scanOperand to true here because the first thing
            // might be a regexp.
            return this.peek(true) == END;
        },
    
        get token() {
            return this.tokens[this.tokenIndex];
        },
    
        match: function (tt, scanOperand) {
            return this.get(scanOperand) == tt || this.unget();
        },
    
        mustMatch: function (tt) {
            if (!this.match(tt))
                throw this.newSyntaxError("Missing " + tokens[tt].toLowerCase());
            return this.token;
        },
    
        peek: function (scanOperand) {
            var tt, next;
            if (this.lookahead) {
                next = this.tokens[(this.tokenIndex + this.lookahead) & 3];
                tt = (this.scanNewlines && next.lineno != this.lineno)
                     ? NEWLINE
                     : next.type;
            } else {
                tt = this.get(scanOperand);
                this.unget();
            }
            return tt;
        },
    
        peekOnSameLine: function (scanOperand) {
            this.scanNewlines = true;
            var tt = this.peek(scanOperand);
            this.scanNewlines = false;
            return tt;
        },
    
        // Eats comments and whitespace.
        skip: function () {
            var input = this.source;
            for (;;) {
                var ch = input[this.cursor++];
                var next = input[this.cursor];
                if (ch === '\n' && !this.scanNewlines) {
                    this.lineno++;
                } else if (ch === '/' && next === '*') {
                    this.cursor++;
                    for (;;) {
                        ch = input[this.cursor++];
                        if (ch === undefined)
                            throw this.newSyntaxError("Unterminated comment");
    
                        if (ch === '*') {
                            next = input[this.cursor];
                            if (next === '/') {
                                this.cursor++;
                                break;
                            }
                        } else if (ch === '\n') {
                            this.lineno++;
                        }
                    }
                } else if (ch === '/' && next === '/') {
                    this.cursor++;
                    for (;;) {
                        ch = input[this.cursor++];
                        if (ch === undefined)
                            return;
    
                        if (ch === '\n') {
                            this.lineno++;
                            break;
                        }
                    }
                } else if (ch !== ' ' && ch !== '\t') {
                    this.cursor--;
                    return;
                }
            }
        },
    
        // Lexes the exponential part of a number, if present. Returns true iff an
        // exponential part was found.
        lexExponent: function() {
            var input = this.source;
            var next = input[this.cursor];
            if (next === 'e' || next === 'E') {
                this.cursor++;
                ch = input[this.cursor++];
                if (ch === '+' || ch === '-')
                    ch = input[this.cursor++];
    
                if (ch < '0' || ch > '9')
                    throw this.newSyntaxError("Missing exponent");
    
                do {
                    ch = input[this.cursor++];
                } while (ch >= '0' && ch <= '9');
                this.cursor--;
    
                return true;
            }
    
            return false;
        },
    
        lexZeroNumber: function (ch) {
            var token = this.token, input = this.source;
            token.type = NUMBER;
    
            ch = input[this.cursor++];
            if (ch === '.') {
                do {
                    ch = input[this.cursor++];
                } while (ch >= '0' && ch <= '9');
                this.cursor--;
    
                this.lexExponent();
                token.value = parseFloat(token.start, this.cursor);
            } else if (ch === 'x' || ch === 'X') {
                do {
                    ch = input[this.cursor++];
                } while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
                         (ch >= 'A' && ch <= 'F'));
                this.cursor--;
    
                token.value = parseInt(input.substring(token.start, this.cursor));
            } else if (ch >= '0' && ch <= '7') {
                do {
                    ch = input[this.cursor++];
                } while (ch >= '0' && ch <= '7');
                this.cursor--;
    
                token.value = parseInt(input.substring(token.start, this.cursor));
            } else {
                this.cursor--;
                this.lexExponent();     // 0E1, &c.
                token.value = 0;
            }
        },
    
        lexNumber: function (ch) {
            var token = this.token, input = this.source;
            token.type = NUMBER;
    
            var floating = false;
            do {
                ch = input[this.cursor++];
                if (ch === '.' && !floating) {
                    floating = true;
                    ch = input[this.cursor++];
                }
            } while (ch >= '0' && ch <= '9');
    
            this.cursor--;
    
            var exponent = this.lexExponent();
            floating = floating || exponent;
    
            var str = input.substring(token.start, this.cursor);
            token.value = floating ? parseFloat(str) : parseInt(str);
        },
    
        lexDot: function (ch) {
            var token = this.token, input = this.source;
            var next = input[this.cursor];
            if (next >= '0' && next <= '9') {
                do {
                    ch = input[this.cursor++];
                } while (ch >= '0' && ch <= '9');
                this.cursor--;
    
                this.lexExponent();
    
                token.type = NUMBER;
                token.value = parseFloat(token.start, this.cursor);
            } else {
                token.type = DOT;
                token.assignOp = null;
                token.value = '.';
            }
        },
    
        lexString: function (ch) {
            var token = this.token, input = this.source;
            token.type = STRING;
    
            var hasEscapes = false;
            var delim = ch;
            ch = input[this.cursor++];
            while (ch !== delim) {
                if (ch === '\\') {
                    hasEscapes = true;
                    this.cursor++;
                }
                ch = input[this.cursor++];
            }
    
            token.value = (hasEscapes)
                          ? eval(input.substring(token.start, this.cursor))
                          : input.substring(token.start + 1, this.cursor - 1);
        },
    
        lexRegExp: function (ch) {
            var token = this.token, input = this.source;
            token.type = REGEXP;
    
            do {
                ch = input[this.cursor++];
                if (ch === '\\') {
                    this.cursor++;
                } else if (ch === '[') {
                    do {
                        if (ch === undefined)
                            throw this.newSyntaxError("Unterminated character class");
    
                        if (ch === '\\')
                            this.cursor++;
    
                        ch = input[this.cursor++];
                    } while (ch !== ']');
                } else if (ch === undefined) {
                    throw this.newSyntaxError("Unterminated regex");
                }
            } while (ch !== '/');
    
            do {
                ch = input[this.cursor++];
            } while (ch >= 'a' && ch <= 'z');
    
            this.cursor--;
    
            token.value = eval(input.substring(token.start, this.cursor));
        },
    
        lexOp: function (ch) {
            var token = this.token, input = this.source;
    
            // A bit ugly, but it seems wasteful to write a trie lookup routine for
            // only 3 characters...
            var node = opTokens[ch];
            var next = input[this.cursor];
            if (next in node) {
                node = node[next];
                this.cursor++;
                next = input[this.cursor];
                if (next in node) {
                    node = node[next];
                    this.cursor++;
                    next = input[this.cursor];
                }
            }
    
            var op = node.op;
            if (jsdefs.assignOps[op] && input[this.cursor] === '=') {
                this.cursor++;
                token.type = ASSIGN;
                token.assignOp = jsdefs.tokenIds[jsdefs.opTypeNames[op]];
                op += '=';
            } else {
                token.type = jsdefs.tokenIds[jsdefs.opTypeNames[op]];
                token.assignOp = null;
            }
    
            token.value = op;
        },
    
        // FIXME: Unicode escape sequences
        // FIXME: Unicode identifiers
        lexIdent: function (ch) {
            var token = this.token, input = this.source;
    
            do {
                ch = input[this.cursor++];
            } while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
                     (ch >= '0' && ch <= '9') || ch === '$' || ch === '_');
    
            this.cursor--;  // Put the non-word character back.
    
            var id = input.substring(token.start, this.cursor);
            token.type = jsdefs.keywords[id] || IDENTIFIER;
            token.value = id;
        },
    
        /*
         * Tokenizer.get :: void -> token type
         *
         * Consumes input *only* if there is no lookahead.
         * Dispatch to the appropriate lexing function depending on the input.
         */
        get: function (scanOperand) {
            var token;
            while (this.lookahead) {
                --this.lookahead;
                this.tokenIndex = (this.tokenIndex + 1) & 3;
                token = this.tokens[this.tokenIndex];
                if (token.type != NEWLINE || this.scanNewlines)
                    return token.type;
            }
    
            this.skip();
    
            this.tokenIndex = (this.tokenIndex + 1) & 3;
            token = this.tokens[this.tokenIndex];
            if (!token)
                this.tokens[this.tokenIndex] = token = {};
    
            var input = this.source;
            if (this.cursor === input.length)
                return token.type = END;
    
            token.start = this.cursor;
            token.lineno = this.lineno;
    
            var ch = input[this.cursor++];
            if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
                    ch === '$' || ch === '_') {
                this.lexIdent(ch);
            } else if (scanOperand && ch === '/') {
                this.lexRegExp(ch);
            } else if (ch in opTokens) {
                this.lexOp(ch);
            } else if (ch === '.') {
                this.lexDot(ch);
            } else if (ch >= '1' && ch <= '9') {
                this.lexNumber(ch);
            } else if (ch === '0') {
                this.lexZeroNumber(ch);
            } else if (ch === '"' || ch === "'") {
                this.lexString(ch);
            } else if (this.scanNewlines && ch === '\n') {
                token.type = NEWLINE;
                token.value = '\n';
                this.lineno++;
            } else {
                throw this.newSyntaxError("Illegal token");
            }
    
            token.end = this.cursor;
            return token.type;
        },
    
        /*
         * Tokenizer.unget :: void -> undefined
         *
         * Match depends on unget returning undefined.
         */
        unget: function () {
            if (++this.lookahead == 4) throw "PANIC: too much lookahead!";
            this.tokenIndex = (this.tokenIndex - 1) & 3;
        },
    
        newSyntaxError: function (m) {
            var e = new SyntaxError(m, this.filename, this.lineno);
            e.source = this.source;
            e.cursor = this.cursor;
            return e;
        },
    
        save: function () {
            return {
                cursor: this.cursor,
                tokenIndex: this.tokenIndex,
                tokens: this.tokens.slice(),
                lookahead: this.lookahead,
                scanNewlines: this.scanNewlines,
                lineno: this.lineno
            };
        },
    
        rewind: function(point) {
            this.cursor = point.cursor;
            this.tokenIndex = point.tokenIndex;
            this.tokens = point.tokens.slice();
            this.lookahead = point.lookahead;
            this.scanNewline = point.scanNewline;
            this.lineno = point.lineno;
        }
    };
    
    return { "Tokenizer": Tokenizer };

}());
Bug 579230 - Rewrite narcissus expr parser, refactor into builder pattern. r=gal 2010-08-01 23:20:18 +04:00			`/* vim: set sw=4 ts=4 et tw=78: */`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`/* *** BEGIN LICENSE BLOCK ***`
			`* Version: MPL 1.1/GPL 2.0/LGPL 2.1`
			`*`
			`* The contents of this file are subject to the Mozilla Public License Version`
			`* 1.1 (the "License"); you may not use this file except in compliance with`
			`* the License. You may obtain a copy of the License at`
			`* http://www.mozilla.org/MPL/`
			`*`
			`* Software distributed under the License is distributed on an "AS IS" basis,`
			`* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License`
			`* for the specific language governing rights and limitations under the`
			`* License.`
			`*`
			`* The Original Code is the Narcissus JavaScript engine.`
			`*`
			`* The Initial Developer of the Original Code is`
			`* Brendan Eich <brendan@mozilla.org>.`
			`* Portions created by the Initial Developer are Copyright (C) 2004`
			`* the Initial Developer. All Rights Reserved.`
			`*`
			`* Contributor(s):`
			`*`
			`* Alternatively, the contents of this file may be used under the terms of`
			`* either the GNU General Public License Version 2 or later (the "GPL"), or`
			`* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),`
			`* in which case the provisions of the GPL or the LGPL are applicable instead`
			`* of those above. If you wish to allow use of your version of this file only`
			`* under the terms of either the GPL or the LGPL, and not to allow others to`
			`* use your version of this file under the terms of the MPL, indicate your`
			`* decision by deleting the provisions above and replace them with the notice`
			`* and other provisions required by the GPL or the LGPL. If you do not delete`
			`* the provisions above, a recipient may use your version of this file under`
			`* the terms of any one of the MPL, the GPL or the LGPL.`
			`*`
			`* *** END LICENSE BLOCK *** */`

			`/*`
			`* Narcissus - JS implemented in JS.`
			`*`
			`* Lexical scanner.`
			`*/`

Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`Narcissus.jslex = (function() {`

			`var jsdefs = Narcissus.jsdefs;`

			`// Set constants in the local scope.`
			`eval(jsdefs.consts);`

			`// Build up a trie of operator tokens.`
			`var opTokens = {};`
			`for (var op in jsdefs.opTypeNames) {`
			`if (op === '\n' \|\| op === '.')`
			`continue;`

			`var node = opTokens;`
			`for (var i = 0; i < op.length; i++) {`
			`var ch = op[i];`
			`if (!(ch in node))`
			`node[ch] = {};`
			`node = node[ch];`
			`node.op = op;`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`}`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`}`

			`/*`
			`* Tokenizer :: (file ptr, path, line number) -> Tokenizer`
			`*/`
			`function Tokenizer(s, f, l) {`
			`this.cursor = 0;`
			`this.source = String(s);`
			`this.tokens = [];`
			`this.tokenIndex = 0;`
			`this.lookahead = 0;`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`this.scanNewlines = false;`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`this.filename = f \|\| "";`
			`this.lineno = l \|\| 1;`
			`}`

			`Tokenizer.prototype = {`
			`get done() {`
			`// We need to set scanOperand to true here because the first thing`
			`// might be a regexp.`
			`return this.peek(true) == END;`
			`},`

			`get token() {`
			`return this.tokens[this.tokenIndex];`
			`},`

			`match: function (tt, scanOperand) {`
			`return this.get(scanOperand) == tt \|\| this.unget();`
			`},`

			`mustMatch: function (tt) {`
			`if (!this.match(tt))`
			`throw this.newSyntaxError("Missing " + tokens[tt].toLowerCase());`
			`return this.token;`
			`},`

			`peek: function (scanOperand) {`
			`var tt, next;`
			`if (this.lookahead) {`
			`next = this.tokens[(this.tokenIndex + this.lookahead) & 3];`
			`tt = (this.scanNewlines && next.lineno != this.lineno)`
			`? NEWLINE`
			`: next.type;`
			`} else {`
			`tt = this.get(scanOperand);`
			`this.unget();`
			`}`
			`return tt;`
			`},`

			`peekOnSameLine: function (scanOperand) {`
			`this.scanNewlines = true;`
			`var tt = this.peek(scanOperand);`
			`this.scanNewlines = false;`
			`return tt;`
			`},`

			`// Eats comments and whitespace.`
			`skip: function () {`
			`var input = this.source;`
			`for (;;) {`
			`var ch = input[this.cursor++];`
			`var next = input[this.cursor];`
			`if (ch === '\n' && !this.scanNewlines) {`
			`this.lineno++;`
			`} else if (ch === '/' && next === '*') {`
			`this.cursor++;`
			`for (;;) {`
			`ch = input[this.cursor++];`
			`if (ch === undefined)`
			`throw this.newSyntaxError("Unterminated comment");`

			`if (ch === '*') {`
			`next = input[this.cursor];`
			`if (next === '/') {`
			`this.cursor++;`
			`break;`
			`}`
			`} else if (ch === '\n') {`
			`this.lineno++;`
			`}`
			`}`
			`} else if (ch === '/' && next === '/') {`
			`this.cursor++;`
			`for (;;) {`
			`ch = input[this.cursor++];`
			`if (ch === undefined)`
			`return;`

			`if (ch === '\n') {`
			`this.lineno++;`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`break;`
			`}`
			`}`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`} else if (ch !== ' ' && ch !== '\t') {`
			`this.cursor--;`
			`return;`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`}`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`}`
			`},`

			`// Lexes the exponential part of a number, if present. Returns true iff an`
			`// exponential part was found.`
			`lexExponent: function() {`
			`var input = this.source;`
			`var next = input[this.cursor];`
			`if (next === 'e' \|\| next === 'E') {`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`this.cursor++;`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`ch = input[this.cursor++];`
			`if (ch === '+' \|\| ch === '-')`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`ch = input[this.cursor++];`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00
			`if (ch < '0' \|\| ch > '9')`
			`throw this.newSyntaxError("Missing exponent");`

			`do {`
			`ch = input[this.cursor++];`
			`} while (ch >= '0' && ch <= '9');`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`this.cursor--;`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00
			`return true;`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`}`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00
			`return false;`
			`},`

			`lexZeroNumber: function (ch) {`
			`var token = this.token, input = this.source;`
			`token.type = NUMBER;`

Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`ch = input[this.cursor++];`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`if (ch === '.') {`
			`do {`
			`ch = input[this.cursor++];`
			`} while (ch >= '0' && ch <= '9');`
			`this.cursor--;`

			`this.lexExponent();`
			`token.value = parseFloat(token.start, this.cursor);`
			`} else if (ch === 'x' \|\| ch === 'X') {`
			`do {`
			`ch = input[this.cursor++];`
			`} while ((ch >= '0' && ch <= '9') \|\| (ch >= 'a' && ch <= 'f') \|\|`
			`(ch >= 'A' && ch <= 'F'));`
			`this.cursor--;`

			`token.value = parseInt(input.substring(token.start, this.cursor));`
			`} else if (ch >= '0' && ch <= '7') {`
			`do {`
			`ch = input[this.cursor++];`
			`} while (ch >= '0' && ch <= '7');`
			`this.cursor--;`

			`token.value = parseInt(input.substring(token.start, this.cursor));`
			`} else {`
			`this.cursor--;`
			`this.lexExponent(); // 0E1, &c.`
			`token.value = 0;`
			`}`
			`},`

			`lexNumber: function (ch) {`
			`var token = this.token, input = this.source;`
			`token.type = NUMBER;`

			`var floating = false;`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`do {`
			`ch = input[this.cursor++];`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`if (ch === '.' && !floating) {`
			`floating = true;`
			`ch = input[this.cursor++];`
			`}`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`} while (ch >= '0' && ch <= '9');`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`this.cursor--;`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00
			`var exponent = this.lexExponent();`
			`floating = floating \|\| exponent;`

			`var str = input.substring(token.start, this.cursor);`
			`token.value = floating ? parseFloat(str) : parseInt(str);`
			`},`

			`lexDot: function (ch) {`
			`var token = this.token, input = this.source;`
			`var next = input[this.cursor];`
			`if (next >= '0' && next <= '9') {`
			`do {`
			`ch = input[this.cursor++];`
			`} while (ch >= '0' && ch <= '9');`
			`this.cursor--;`

			`this.lexExponent();`

			`token.type = NUMBER;`
			`token.value = parseFloat(token.start, this.cursor);`
			`} else {`
			`token.type = DOT;`
			`token.assignOp = null;`
			`token.value = '.';`
			`}`
			`},`

			`lexString: function (ch) {`
			`var token = this.token, input = this.source;`
			`token.type = STRING;`

			`var hasEscapes = false;`
			`var delim = ch;`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`ch = input[this.cursor++];`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`while (ch !== delim) {`
			`if (ch === '\\') {`
			`hasEscapes = true;`
			`this.cursor++;`
			`}`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`ch = input[this.cursor++];`
			`}`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00
			`token.value = (hasEscapes)`
			`? eval(input.substring(token.start, this.cursor))`
			`: input.substring(token.start + 1, this.cursor - 1);`
			`},`

			`lexRegExp: function (ch) {`
			`var token = this.token, input = this.source;`
			`token.type = REGEXP;`

Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`do {`
			`ch = input[this.cursor++];`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`if (ch === '\\') {`
			`this.cursor++;`
			`} else if (ch === '[') {`
			`do {`
			`if (ch === undefined)`
			`throw this.newSyntaxError("Unterminated character class");`

			`if (ch === '\\')`
			`this.cursor++;`

			`ch = input[this.cursor++];`
			`} while (ch !== ']');`
			`} else if (ch === undefined) {`
			`throw this.newSyntaxError("Unterminated regex");`
			`}`
			`} while (ch !== '/');`

			`do {`
			`ch = input[this.cursor++];`
			`} while (ch >= 'a' && ch <= 'z');`

Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`this.cursor--;`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00
			`token.value = eval(input.substring(token.start, this.cursor));`
			`},`

			`lexOp: function (ch) {`
			`var token = this.token, input = this.source;`

			`// A bit ugly, but it seems wasteful to write a trie lookup routine for`
			`// only 3 characters...`
			`var node = opTokens[ch];`
			`var next = input[this.cursor];`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`if (next in node) {`
			`node = node[next];`
			`this.cursor++;`
			`next = input[this.cursor];`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`if (next in node) {`
			`node = node[next];`
			`this.cursor++;`
			`next = input[this.cursor];`
			`}`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`}`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00
			`var op = node.op;`
			`if (jsdefs.assignOps[op] && input[this.cursor] === '=') {`
			`this.cursor++;`
			`token.type = ASSIGN;`
			`token.assignOp = jsdefs.tokenIds[jsdefs.opTypeNames[op]];`
			`op += '=';`
			`} else {`
			`token.type = jsdefs.tokenIds[jsdefs.opTypeNames[op]];`
			`token.assignOp = null;`
			`}`

			`token.value = op;`
			`},`

			`// FIXME: Unicode escape sequences`
			`// FIXME: Unicode identifiers`
			`lexIdent: function (ch) {`
			`var token = this.token, input = this.source;`

			`do {`
			`ch = input[this.cursor++];`
			`} while ((ch >= 'a' && ch <= 'z') \|\| (ch >= 'A' && ch <= 'Z') \|\|`
			`(ch >= '0' && ch <= '9') \|\| ch === '$' \|\| ch === '_');`

			`this.cursor--; // Put the non-word character back.`

			`var id = input.substring(token.start, this.cursor);`
			`token.type = jsdefs.keywords[id] \|\| IDENTIFIER;`
			`token.value = id;`
			`},`

			`/*`
			`* Tokenizer.get :: void -> token type`
			`*`
			`* Consumes input only if there is no lookahead.`
			`* Dispatch to the appropriate lexing function depending on the input.`
			`*/`
			`get: function (scanOperand) {`
			`var token;`
			`while (this.lookahead) {`
			`--this.lookahead;`
			`this.tokenIndex = (this.tokenIndex + 1) & 3;`
			`token = this.tokens[this.tokenIndex];`
			`if (token.type != NEWLINE \|\| this.scanNewlines)`
			`return token.type;`
			`}`

			`this.skip();`

Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`this.tokenIndex = (this.tokenIndex + 1) & 3;`
			`token = this.tokens[this.tokenIndex];`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`if (!token)`
			`this.tokens[this.tokenIndex] = token = {};`

			`var input = this.source;`
			`if (this.cursor === input.length)`
			`return token.type = END;`

			`token.start = this.cursor;`
			`token.lineno = this.lineno;`

			`var ch = input[this.cursor++];`
			`if ((ch >= 'a' && ch <= 'z') \|\| (ch >= 'A' && ch <= 'Z') \|\|`
			`ch === '$' \|\| ch === '_') {`
			`this.lexIdent(ch);`
			`} else if (scanOperand && ch === '/') {`
			`this.lexRegExp(ch);`
			`} else if (ch in opTokens) {`
			`this.lexOp(ch);`
			`} else if (ch === '.') {`
			`this.lexDot(ch);`
			`} else if (ch >= '1' && ch <= '9') {`
			`this.lexNumber(ch);`
			`} else if (ch === '0') {`
			`this.lexZeroNumber(ch);`
			`} else if (ch === '"' \|\| ch === "'") {`
			`this.lexString(ch);`
			`} else if (this.scanNewlines && ch === '\n') {`
			`token.type = NEWLINE;`
			`token.value = '\n';`
			`this.lineno++;`
			`} else {`
			`throw this.newSyntaxError("Illegal token");`
			`}`

			`token.end = this.cursor;`
			`return token.type;`
			`},`

			`/*`
			`* Tokenizer.unget :: void -> undefined`
			`*`
			`* Match depends on unget returning undefined.`
			`*/`
			`unget: function () {`
			`if (++this.lookahead == 4) throw "PANIC: too much lookahead!";`
			`this.tokenIndex = (this.tokenIndex - 1) & 3;`
			`},`

			`newSyntaxError: function (m) {`
			`var e = new SyntaxError(m, this.filename, this.lineno);`
			`e.source = this.source;`
			`e.cursor = this.cursor;`
			`return e;`
			`},`

			`save: function () {`
			`return {`
			`cursor: this.cursor,`
			`tokenIndex: this.tokenIndex,`
			`tokens: this.tokens.slice(),`
			`lookahead: this.lookahead,`
			`scanNewlines: this.scanNewlines,`
			`lineno: this.lineno`
			`};`
			`},`

			`rewind: function(point) {`
			`this.cursor = point.cursor;`
			`this.tokenIndex = point.tokenIndex;`
			`this.tokens = point.tokens.slice();`
			`this.lookahead = point.lookahead;`
			`this.scanNewline = point.scanNewline;`
			`this.lineno = point.lineno;`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00			`}`
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`};`

			`return { "Tokenizer": Tokenizer };`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00
Refactored Narcissus to use the module pattern (bug 583913, r=pwalton) 2010-08-04 04:15:02 +04:00			`}());`
Bug 546097 - Narcissus: replace the regex-based lexer with a handwritten one; remove GLOBAL. r=brendan --HG-- extra : rebase_source : a0cfac8bbbd86302530b3cbe9e82b161a5db2ac4 2010-05-25 00:43:21 +04:00