From 41be40172b601d0d8ba32f99520705781fb025d6 Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dvardoulakis@mozilla.com>
Date: Fri, 2 Jul 2010 10:29:24 -0700
Subject: [PATCH] Extension to the narcissus parser to handle Mozilla-specific
 extensions to JS (572014, r=pwalton).

---
 js/narcissus/jsdefs.js  |   4 +-
 js/narcissus/jslex.js   |   6 +
 js/narcissus/jsparse.js | 259 ++++++++++++++++++++++++++++++++--------
 3 files changed, 220 insertions(+), 49 deletions(-)

diff --git a/js/narcissus/jsdefs.js b/js/narcissus/jsdefs.js
index a1f8c927443..eeb7a3ccdf2 100644
--- a/js/narcissus/jsdefs.js
+++ b/js/narcissus/jsdefs.js
@@ -72,7 +72,7 @@ var tokens = [
     // Nonterminal tree node type codes.
     "SCRIPT", "BLOCK", "LABEL", "FOR_IN", "CALL", "NEW_WITH_ARGS", "INDEX",
     "ARRAY_INIT", "OBJECT_INIT", "PROPERTY_INIT", "GETTER", "SETTER",
-    "GROUP", "LIST",
+    "GROUP", "LIST", "LET_STM", "LET_EXP", "LET_DEF",
 
     // Terminals.
     "IDENTIFIER", "NUMBER", "STRING", "REGEXP",
@@ -84,11 +84,13 @@ var tokens = [
     "else", "enum",
     "false", "finally", "for", "function",
     "if", "in", "instanceof",
+    "let",
     "new", "null",
     "return",
     "switch",
     "this", "throw", "true", "try", "typeof",
     "var", "void",
+    "yield",
     "while", "with",
 ];
 
diff --git a/js/narcissus/jslex.js b/js/narcissus/jslex.js
index 6e848691c75..89944ba8789 100644
--- a/js/narcissus/jslex.js
+++ b/js/narcissus/jslex.js
@@ -57,6 +57,7 @@ for (var op in opTypeNames) {
     }
 }
 
+// file ptr, path to file, line number -> Tokenizer
 function Tokenizer(s, f, l) {
     this.cursor = 0;
     this.source = String(s);
@@ -363,6 +364,9 @@ Tokenizer.prototype = {
         token.value = id;
     },
 
+    // void -> token type
+    // It consumes input *only* if there is no lookahead.
+    // Dispatch to the appropriate lexing function depending on the input.
     get: function () {
         var token;
         while (this.lookahead) {
@@ -415,6 +419,8 @@ Tokenizer.prototype = {
         return token.type;
     },
 
+    // void -> undefined
+    // match depends on unget returning undefined.
     unget: function () {
         if (++this.lookahead == 4) throw "PANIC: too much lookahead!";
         this.tokenIndex = (this.tokenIndex - 1) & 3;
diff --git a/js/narcissus/jsparse.js b/js/narcissus/jsparse.js
index 4d9ad84d3ca..78445e27b29 100644
--- a/js/narcissus/jsparse.js
+++ b/js/narcissus/jsparse.js
@@ -43,10 +43,15 @@
  * Parser.
  */
 
+// boolean -> undefined
+// inFunction is used to check if a return stm appears in a valid context.
 function CompilerContext(inFunction) {
     this.inFunction = inFunction;
+    //The elms of stmtStack are used to find the target label of CONTINUEs and
+    // BREAKs. Its length is used in function definitions.
     this.stmtStack = [];
     this.funDecls = [];
+    //varDecls accumulate when we process decls w/ the var keyword.
     this.varDecls = [];
 }
 
@@ -59,11 +64,15 @@ CompilerContext.prototype = {
     inForLoopInit: false,
 };
 
+// tokenizer, compiler context -> node
+// parses the toplevel and function bodies
 function Script(t, x) {
     var n = Statements(t, x);
     n.type = SCRIPT;
     n.funDecls = x.funDecls;
-    n.varDecls = x.varDecls;
+    // LETs may add varDecls to blocks.
+    n.varDecls = n.varDecls || [];
+    Array.prototype.push.apply(n.varDecls, x.varDecls);
     return n;
 }
 
@@ -73,18 +82,21 @@ defineProperty(Array.prototype, "top",
                    return this.length && this[this.length-1];
                }, false, false, true);
 
+// tokenizer, optional type -> node
 function Node(t, type) {
     var token = t.token;
     if (token) {
         this.type = type || token.type;
         this.value = token.value;
         this.lineno = token.lineno;
+        // start & end are file positions for error handling
         this.start = token.start;
         this.end = token.end;
     } else {
         this.type = type;
         this.lineno = t.lineno;
     }
+    // nodes use a tokenizer for debugging (getSource, filename getter)
     this.tokenizer = t;
 
     for (var i = 2; i < arguments.length; i++)
@@ -97,10 +109,12 @@ Np.toSource = Object.prototype.toSource;
 
 // Always use push to add operands to an expression, to update start and end.
 Np.push = function (kid) {
-    if (kid.start < this.start)
-        this.start = kid.start;
-    if (this.end < kid.end)
-        this.end = kid.end;
+    if (kid !== null) { // kids can be null e.g. [1, , 2]
+        if (kid.start < this.start)
+            this.start = kid.start;
+        if (this.end < kid.end)
+            this.end = kid.end;
+    }
     return Array.prototype.push.call(this, kid);
 }
 
@@ -154,6 +168,8 @@ function nest(t, x, node, func, end) {
     return n;
 }
 
+// tokenizer, compiler context -> node
+// parses a list of Statements
 function Statements(t, x) {
     var n = new Node(t, BLOCK);
     x.stmtStack.push(n);
@@ -172,13 +188,28 @@ function Block(t, x) {
 
 const DECLARED_FORM = 0, EXPRESSED_FORM = 1, STATEMENT_FORM = 2;
 
+// tokenizer, compiler context -> node
+// parses a Statement
 function Statement(t, x) {
     var i, label, n, n2, ss, tt = t.get();
 
     // Cases for statements ending in a right curly return early, avoiding the
     // common semicolon insertion magic after this switch.
     switch (tt) {
+      case LET:
+        n = LetForm(t, x, STATEMENT_FORM);
+        if (n.type === LET_STM)
+            return n;
+        if (n.type === LET_EXP) {// exps in stm context are semi nodes
+            n2 = new Node(t, SEMICOLON);
+            n2.expression = n;
+            n = n2;
+            n.end = n.expression.end;
+        }
+        break;
+
       case FUNCTION:
+        // DECLD_FORM extends fundefs of x, STM_FORM doesn't.
         return FunctionDefinition(t, x, true,
                                   (x.stmtStack.length > 1)
                                   ? STATEMENT_FORM
@@ -199,10 +230,10 @@ function Statement(t, x) {
         return n;
 
       case SWITCH:
+        // This allows CASEs after a DEFAULT, which is in the standard.
         n = new Node(t);
-        t.mustMatch(LEFT_PAREN);
-        n.discriminant = Expression(t, x);
-        t.mustMatch(RIGHT_PAREN);
+
+        n.discriminant = ParenExpression(t, x);
         n.cases = [];
         n.defaultIndex = -1;
         x.stmtStack.push(n);
@@ -235,34 +266,51 @@ function Statement(t, x) {
       case FOR:
         n = new Node(t);
         n.isLoop = true;
+        if (t.match(IDENTIFIER)) {
+            if (t.token.value !== "each")
+                throw t.newSyntaxError("Illegal identifier after for");
+            else
+                n.foreach = true;
+        }
         t.mustMatch(LEFT_PAREN);
         if ((tt = t.peek()) != SEMICOLON) {
             x.inForLoopInit = true;
-            if (tt == VAR || tt == CONST) {
+            switch (tt) {
+              case VAR: case CONST:
                 t.get();
                 n2 = Variables(t, x);
-            } else {
+                break;
+              case LET:
+                t.get();
+                n2 = Variables(t, x, "local decls");
+                // don't confuse w/ n.varDecl used by for/in.
+                n.varDecls = [];
+                for (var i = 0, len = n2.length, vdecls = n.varDecls; i < len; i++)
+                    vdecls.push(n2[i]);
+                break;
+              default:
                 n2 = Expression(t, x);
+                break;
             }
             x.inForLoopInit = false;
         }
-        if (n2 && t.match(IN)) {
+        if (n2 && t.match(IN)) { // for...in
+            var n2t = n2.type,
+                se = t.newSyntaxError("Invalid for..in left-hand side");
             n.type = FOR_IN;
-            if (n2.type == VAR) {
-                if (n2.length != 1) {
-                    throw new SyntaxError("Invalid for..in left-hand side",
-                                          t.filename, n2.lineno);
-                }
-
-                // NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name.
+            if (n2t === VAR || n2t === LET) {
+                if (n2.length != 1) throw se;
                 n.iterator = n2[0];
                 n.varDecl = n2;
+            } else if (n2t !== IDENTIFIER) {
+                throw se;
             } else {
                 n.iterator = n2;
                 n.varDecl = null;
             }
             n.object = Expression(t, x);
-        } else {
+        } else { // classic for
+            if (n.foreach) throw t.newSyntaxError("Illegal for-each syntax");
             n.setup = n2 || null;
             t.mustMatch(SEMICOLON);
             n.condition = (t.peek() == SEMICOLON) ? null : Expression(t, x);
@@ -310,8 +358,7 @@ function Statement(t, x) {
                     throw t.newSyntaxError("Label not found");
             } while (ss[i].label != label);
 
-            /*
-             * Both break and continue to label need to be handled specially
+            /* Both break and continue to label need to be handled specially
              * within a labeled loop, so that they target that loop. If not in
              * a loop, then break targets its labeled statement. Labels can be
              * nested so we skip all labels immediately enclosing the nearest
@@ -332,7 +379,7 @@ function Statement(t, x) {
                 }
             } while (!ss[i].isLoop && !(tt == BREAK && ss[i].type == SWITCH));
         }
-        n.target = ss[i];
+        n.target = ss[i]; // cycle in the AST
         break;
 
       case TRY:
@@ -386,7 +433,8 @@ function Statement(t, x) {
         n.body = nest(t, x, n, Statement);
         return n;
 
-      case VAR:
+
+      case VAR: // for variable declarations using the VAR and CONST keywords.
       case CONST:
         n = Variables(t, x);
         break;
@@ -406,6 +454,7 @@ function Statement(t, x) {
             t.scanOperand = false;
             tt = t.peek();
             t.scanOperand = true;
+            // labeled statement
             if (tt == COLON) {
                 label = t.token.value;
                 ss = x.stmtStack;
@@ -420,7 +469,8 @@ function Statement(t, x) {
                 return n;
             }
         }
-
+        // expression statement.
+        // We unget the current token to parse the expr as a whole.
         n = new Node(t, SEMICOLON);
         t.unget();
         n.expression = Expression(t, x);
@@ -428,6 +478,7 @@ function Statement(t, x) {
         break;
     }
 
+    // semicolon-insertion magic
     if (t.lineno == t.token.lineno) {
         tt = t.peekOnSameLine();
         if (tt != END && tt != NEWLINE && tt != SEMICOLON && tt != RIGHT_CURLY)
@@ -437,6 +488,8 @@ function Statement(t, x) {
     return n;
 }
 
+// tokenizer, compiler context, boolean,
+// DECLARED_FORM or EXPRESSED_FORM or STATEMENT_FORM -> node
 function FunctionDefinition(t, x, requireName, functionForm) {
     var f = new Node(t);
     if (f.type != FUNCTION)
@@ -457,10 +510,13 @@ function FunctionDefinition(t, x, requireName, functionForm) {
             t.mustMatch(COMMA);
     }
 
-    t.mustMatch(LEFT_CURLY);
-    var x2 = new CompilerContext(true);
-    f.body = Script(t, x2);
-    t.mustMatch(RIGHT_CURLY);
+    if (t.match(LEFT_CURLY)) {
+        var x2 = new CompilerContext(true);
+        f.body = Script(t, x2);
+        t.mustMatch(RIGHT_CURLY);
+    } else { /* Expression closures (1.8) */
+        f.body = Expression(t, x, COMMA);
+    }
     f.end = t.token.end;
 
     f.functionForm = functionForm;
@@ -469,12 +525,19 @@ function FunctionDefinition(t, x, requireName, functionForm) {
     return f;
 }
 
+// tokenizer, compiler context -> node
+// parses a comma-separated list of var decls (and maybe initializations)
 function Variables(t, x) {
-    var n = new Node(t);
+    var n = new Node(t), tt, n2;
     do {
-        t.mustMatch(IDENTIFIER);
-        var n2 = new Node(t);
-        n2.name = n2.value;
+        tt = t.peek();
+        if (tt === LEFT_CURLY || tt === LEFT_BRACKET) {
+            n2 = Expression(t, x); // for destructuring
+        } else {
+            t.mustMatch(IDENTIFIER);
+            n2 = new Node(t);
+            n2.name = n2.value;
+        }
         if (t.match(ASSIGN)) {
             if (t.token.assignOp)
                 throw t.newSyntaxError("Invalid variable initialization");
@@ -482,11 +545,52 @@ function Variables(t, x) {
         }
         n2.readOnly = (n.type == CONST);
         n.push(n2);
-        x.varDecls.push(n2);
+        // LETs use "local decls"
+        if (arguments[2] !== "local decls") x.varDecls.push(n2);
     } while (t.match(COMMA));
     return n;
 }
 
+// tokenizer, comp. context, EXPRESSED_FORM or STATEMENT_FORM -> node
+// doesn't handle lets in the toplevel of forloop heads
+function LetForm(t, x, form) {
+    var i, n, n2, s, ss, hasLeftParen;
+
+    n = new Node(t);
+    hasLeftParen = t.match(LEFT_PAREN);
+    n2 = Variables(t, x, "local decls");
+    if (hasLeftParen) {//let statement and let expression
+        t.mustMatch(RIGHT_PAREN);
+        n.varDecls = [];
+        for (i = 0; i < n2.length; i++)
+            n.varDecls.push(n2[i]);
+        if (form === STATEMENT_FORM && t.peek() === RIGHT_CURLY) {
+            n.type = LET_STM;
+            n.body = nest(t, x, n, Block);
+        } else {
+            n.type = LET_EXP;
+            n.body = Expression(t, x, COMMA);
+        }
+    } else if (form === EXPRESSED_FORM) {
+        throw t.newSyntaxError("Let-definition used as expression.");
+    } else {//let definition
+        n.type = LET_DEF;
+        //search context to find enclosing BLOCK
+        ss = x.stmtStack;
+        i = ss.length;
+        while (ss[--i].type !== BLOCK) ; // a BLOCK *must* be found.
+        s = ss[i];
+        s.varDecls = s.varDecls || [];
+        n.varDecls = [];
+        for (i = 0; i < n2.length; i++) {
+            s.varDecls.push(n2[i]); // the vars must go in the correct scope
+            n.varDecls.push(n2[i]); // but the assignments must stay here
+        }
+    }
+    return n;
+}
+
+// tokenizer, compiler context -> node
 function ParenExpression(t, x) {
     t.mustMatch(LEFT_PAREN);
     var n = Expression(t, x);
@@ -545,11 +649,18 @@ var opArity = {
 for (i in opArity)
     opArity[tokenIds[i]] = opArity[i];
 
+// tokenizer, compiler context, optional COMMA or COLON -> node
+// When scanOperand is true the parser wants an operand (the "default" mode).
+// When it's false, the parser is expecting an operator.
 function Expression(t, x, stop) {
     var n, id, tt, operators = [], operands = [];
     var bl = x.bracketLevel, cl = x.curlyLevel, pl = x.parenLevel,
         hl = x.hookLevel;
 
+    // void -> node
+    // Uses an operator and its operands to construct a whole expression.
+    // The result of reduce isn't used by its callers. It's left on the operands
+    // stack and it's retrieved from there.
     function reduce() {
         var n = operators.pop();
         var op = n.type;
@@ -578,7 +689,12 @@ function Expression(t, x, stop) {
         return n;
     }
 
-loop:
+    // If we are expecting an operator and find sth else it may not be an error,
+    // because of semicolon insertion. So Expression doesn't throw for this.
+    // If it turns out to be an error it is detected by various other parts of
+    // the code and the msg may be obscure.
+
+    loop: // tt stands for token type
     while ((tt = t.get()) != END) {
         if (tt == stop &&
             x.bracketLevel == bl && x.curlyLevel == cl && x.parenLevel == pl &&
@@ -592,12 +708,24 @@ loop:
             // NB: cannot be empty, Statement handled that.
             break loop;
 
+          case LET: //parse let expressions
+            //LET is not an operator, no need to assign precedence to it.
+            if (!t.scanOperand) break loop;
+            operands.push(LetForm(t, x, EXPRESSED_FORM));
+            t.scanOperand = false;
+            break;
+
           case ASSIGN:
+          //the parser doesn't check that the lhs of an assignment is legal,
+          //so it unintentionally allows destructuring here.
+          //FIXME: report illegal lhs`s in assignments.
           case HOOK:
           case COLON:
             if (t.scanOperand)
                 break loop;
+
             // Use >, not >=, for right-associative ASSIGN and HOOK/COLON.
+            // if operators is empty, operators.top().type is undefined.
             while (opPrecedence[operators.top().type] > opPrecedence[tt] ||
                    (tt == COLON && operators.top().type == ASSIGN)) {
                 reduce();
@@ -655,11 +783,14 @@ loop:
             }
             break;
 
+          case YIELD:
+            if (!x.inFunction) throw t.newSyntaxError("yield not in function");
+            // fall thru
+
           case DELETE: case VOID: case TYPEOF:
           case NOT: case BITWISE_NOT: case UNARY_PLUS: case UNARY_MINUS:
           case NEW:
-            if (!t.scanOperand)
-                break loop;
+            if (!t.scanOperand) break loop;
             operators.push(new Node(t));
             break;
 
@@ -683,16 +814,20 @@ loop:
             break;
 
           case FUNCTION:
-            if (!t.scanOperand)
-                break loop;
+            if (!t.scanOperand) break loop;
             operands.push(FunctionDefinition(t, x, false, EXPRESSED_FORM));
             t.scanOperand = false;
             break;
 
-          case NULL: case THIS: case TRUE: case FALSE:
-          case IDENTIFIER: case NUMBER: case STRING: case REGEXP:
-            if (!t.scanOperand)
-                break loop;
+          case NULL:
+          case THIS:
+          case TRUE:
+          case FALSE:
+          case IDENTIFIER:
+          case NUMBER:
+          case STRING:
+          case REGEXP:
+            if (!t.scanOperand) break loop;
             operands.push(new Node(t));
             t.scanOperand = false;
             break;
@@ -701,16 +836,43 @@ loop:
             if (t.scanOperand) {
                 // Array initialiser.  Parse using recursive descent, as the
                 // sub-grammar here is not an operator grammar.
+                var fi, iter, elms, x2;
                 n = new Node(t, ARRAY_INIT);
+                elms = 0
                 while ((tt = t.peek()) != RIGHT_BRACKET) {
+                    elms++;
                     if (tt == COMMA) {
                         t.get();
                         n.push(null);
                         continue;
                     }
                     n.push(Expression(t, x, COMMA));
-                    if (!t.match(COMMA))
+                    if (t.match(FOR)) { // array comprehensions
+                        if (elms !== 1)
+                            throw t.newSyntaxError("Invalid comprehension");
+                        fi = new Node(t, FOR_IN);
+                        if (t.match(IDENTIFIER)) {
+                            if (t.token.value !== "each")
+                                throw t.newSyntaxError("Invalid comprehension");
+                            else
+                                n.foreach = true;
+                        }
+                        t.mustMatch(LEFT_PAREN);
+                        // x.inForLoopInit = true;  won't work because this FOR
+                        // may be inside another expression => parenLevel !== 0
+                        x2 = new CompilerContext(x.inFunction);
+                        x2.inForLoopInit = true;
+                        iter = Expression(t, x2);
+                        if (iter.type !== IDENTIFIER)
+                            throw t.newSyntaxError("Invalid comprehension");
+                        fi.iterator = iter;
+                        t.mustMatch(IN);
+                        fi.object = Expression(t, x);
+                        t.mustMatch(RIGHT_PAREN);
+                        if (t.match(IF)) fi.condition = Expression(t, x);
                         break;
+                    }
+                    if (!t.match(COMMA)) break;
                 }
                 t.mustMatch(RIGHT_BRACKET);
                 operands.push(n);
@@ -732,8 +894,7 @@ loop:
             break;
 
           case LEFT_CURLY:
-            if (!t.scanOperand)
-                break loop;
+            if (!t.scanOperand) break loop;
             // Object initialiser.  As for array initialisers (see above),
             // parse using recursive descent.
             ++x.curlyLevel;
@@ -827,9 +988,9 @@ loop:
             --x.parenLevel;
             break;
 
-          // Automatic semicolon insertion means we may scan across a newline
-          // and into the beginning of another statement.  If so, break out of
-          // the while loop and let the t.scanOperand logic handle errors.
+            // Automatic semicolon insertion means we may scan across a newline
+            // and into the beginning of another statement.  If so, break out of
+            // the while loop and let the t.scanOperand logic handle errors.
           default:
             break loop;
         }
@@ -852,6 +1013,7 @@ loop:
     return operands.pop();
 }
 
+// file ptr, path to file, line number -> node
 function parse(s, f, l) {
     var t = new Tokenizer(s, f, l);
     var x = new CompilerContext(false);
@@ -860,3 +1022,4 @@ function parse(s, f, l) {
         throw t.newSyntaxError("Syntax error");
     return n;
 }
+