Bug 1204027 - Forbid escapes within keywords when parsing/tokenizing. r=arai

--HG-- extra : rebase_source : 3def5ff0e7e6a2e2e6490c8d9b700dc7fa98a630
2015-09-14 14:11:26 -07:00 · 2015-09-14 14:11:26 -07:00 · 4ef4061b96
--- a/js/src/frontend/Parser.cpp
+++ b/js/src/frontend/Parser.cpp
@ -2943,6 +2943,17 @@ IsEscapeFreeStringLiteral(const TokenPos& pos, JSAtom* str)
    return pos.begin + str->length() + 2 == pos.end;
 }

+template <typename ParseHandler>
+bool
+Parser<ParseHandler>::checkUnescapedName(const Token& token)
+{
+    if (!token.nameContainsEscape())
+        return true;
+
+    reportWithOffset(ParseError, false, token.pos.begin, JSMSG_ESCAPED_KEYWORD);
+    return false;
+}
+
 template <>
 bool
 Parser<SyntaxParseHandler>::asmJS(Node list)
@ -4528,10 +4539,11 @@ Parser<FullParseHandler>::namedImportsOrNamespaceImport(TokenKind tt, Node impor
            if (!importName)
                return false;

-            if (!tokenStream.getToken(&tt))
+            bool foundAs;
+            if (!tokenStream.matchContextualKeyword(&foundAs, context->names().as))
                return false;

-            if (tt == TOK_NAME && tokenStream.currentName() == context->names().as) {
+            if (foundAs) {
                MUST_MATCH_TOKEN(TOK_NAME, JSMSG_NO_BINDING_NAME);
            } else {
                // Keywords cannot be bound to themselves, so an import name
@ -4545,7 +4557,6 @@ Parser<FullParseHandler>::namedImportsOrNamespaceImport(TokenKind tt, Node impor
                    report(ParseError, false, null(), JSMSG_AS_AFTER_RESERVED_WORD, bytes.ptr());
                    return false;
                }
-                tokenStream.ungetToken();
            }

            Node bindingName = newBoundImportForCurrentName();
@ -4579,6 +4590,9 @@ Parser<FullParseHandler>::namedImportsOrNamespaceImport(TokenKind tt, Node impor
            return false;
        }

+        if (!checkUnescapedName(tokenStream.currentToken()))
+            return false;
+
        MUST_MATCH_TOKEN(TOK_NAME, JSMSG_NO_BINDING_NAME);

        Node importName = newName(context->names().star);
@ -4675,6 +4689,9 @@ Parser<ParseHandler>::importDeclaration()
            return null();
        }

+        if (!checkUnescapedName(tokenStream.currentToken()))
+            return null();
+
        MUST_MATCH_TOKEN(TOK_STRING, JSMSG_MODULE_SPEC_AFTER_FROM);
    } else if (tt == TOK_STRING) {
        // Handle the form |import 'a'| by leaving the list empty. This is
@ -4772,18 +4789,18 @@ Parser<FullParseHandler>::exportDeclaration()
            if (!bindingName)
                return null();

-            if (!tokenStream.getToken(&tt))
+            bool foundAs;
+            if (!tokenStream.matchContextualKeyword(&foundAs, context->names().as))
                return null();
-            if (tt == TOK_NAME && tokenStream.currentName() == context->names().as) {
+            if (foundAs) {
                if (!tokenStream.getToken(&tt, TokenStream::KeywordIsName))
                    return null();
                if (tt != TOK_NAME) {
                    report(ParseError, false, null(), JSMSG_NO_EXPORT_NAME);
                    return null();
                }
-            } else {
-                tokenStream.ungetToken();
            }
+
            Node exportName = newName(tokenStream.currentName());
            if (!exportName)
                return null();
@ -4806,9 +4823,28 @@ Parser<FullParseHandler>::exportDeclaration()

        MUST_MATCH_TOKEN(TOK_RC, JSMSG_RC_AFTER_EXPORT_SPEC_LIST);

-        if (!tokenStream.getToken(&tt))
+        // Careful!  If |from| follows, even on a new line, it must start a
+        // FromClause:
+        //
+        //   export { x }
+        //   from "foo"; // a single ExportDeclaration
+        //
+        // But if it doesn't, we might have an ASI opportunity in Operand
+        // context, so simply matching a contextual keyword won't work:
+        //
+        //   export { x }   // ExportDeclaration, terminated by ASI
+        //   fro\u006D      // ExpressionStatement, the name "from"
+        //
+        // In that case let MatchOrInsertSemicolon sort out ASI or any
+        // necessary error.
+        TokenKind tt;
+        if (!tokenStream.getToken(&tt, TokenStream::Operand))
            return null();
-        if (tt == TOK_NAME && tokenStream.currentName() == context->names().from) {
+
+        if (tt == TOK_NAME &&
+            tokenStream.currentToken().name() == context->names().from &&
+            !tokenStream.currentToken().nameContainsEscape())
+        {
            MUST_MATCH_TOKEN(TOK_STRING, JSMSG_MODULE_SPEC_AFTER_FROM);

            Node moduleSpec = stringLiteral();
@ -4823,7 +4859,7 @@ Parser<FullParseHandler>::exportDeclaration()
            tokenStream.ungetToken();
        }

-        if (!MatchOrInsertSemicolon(tokenStream))
+        if (!MatchOrInsertSemicolon(tokenStream, TokenStream::Operand))
            return null();
        break;
      }
@ -4844,6 +4880,9 @@ Parser<FullParseHandler>::exportDeclaration()
        if (!tokenStream.getToken(&tt))
            return null();
        if (tt == TOK_NAME && tokenStream.currentName() == context->names().from) {
+            if (!checkUnescapedName(tokenStream.currentToken()))
+                return null();
+
            MUST_MATCH_TOKEN(TOK_STRING, JSMSG_MODULE_SPEC_AFTER_FROM);

            Node moduleSpec = stringLiteral();
@ -5085,8 +5124,12 @@ Parser<ParseHandler>::matchInOrOf(bool* isForInp, bool* isForOfp)
        return false;
    *isForInp = tt == TOK_IN;
    *isForOfp = tt == TOK_NAME && tokenStream.currentToken().name() == context->names().of;
-    if (!*isForInp && !*isForOfp)
+    if (!*isForInp && !*isForOfp) {
        tokenStream.ungetToken();
+    } else {
+        if (tt == TOK_NAME && !checkUnescapedName(tokenStream.currentToken()))
+            return false;
+    }
    return true;
 }

@ -6485,6 +6528,9 @@ Parser<FullParseHandler>::classDefinition(YieldHandling yieldHandling,
            if (!tokenStream.peekToken(&tt, TokenStream::KeywordIsName))
                return null();
            if (tt != TOK_LP) {
+                if (!checkUnescapedName(tokenStream.currentToken()))
+                    return null();
+
                isStatic = true;
            } else {
                tokenStream.addModifierException(TokenStream::NoneIsKeywordIsName);
@ -8879,16 +8925,24 @@ Parser<ParseHandler>::propertyName(YieldHandling yieldHandling, Node propList,
        *propType = propAtom.get() == context->names().get ? PropertyType::Getter
                                                           : PropertyType::Setter;

+        Token getSetToken = tokenStream.currentToken();
+
        // We have parsed |get| or |set|. Look for an accessor property
        // name next.
        TokenKind tt;
        if (!tokenStream.getToken(&tt, TokenStream::KeywordIsName))
            return null();
        if (tt == TOK_NAME) {
+            if (!checkUnescapedName(getSetToken))
+                return null();
+
            propAtom.set(tokenStream.currentName());
            return handler.newObjectLiteralPropertyName(propAtom, pos());
        }
        if (tt == TOK_STRING) {
+            if (!checkUnescapedName(getSetToken))
+                return null();
+
            propAtom.set(tokenStream.currentToken().atom());

            uint32_t index;
@ -8901,13 +8955,20 @@ Parser<ParseHandler>::propertyName(YieldHandling yieldHandling, Node propList,
            return stringLiteral();
        }
        if (tt == TOK_NUMBER) {
+            if (!checkUnescapedName(getSetToken))
+                return null();
+
            propAtom.set(DoubleToAtom(context, tokenStream.currentToken().number()));
            if (!propAtom.get())
                return null();
            return newNumber(tokenStream.currentToken());
        }
-        if (tt == TOK_LB)
+        if (tt == TOK_LB) {
+            if (!checkUnescapedName(getSetToken))
+                return null();
+
            return computedPropertyName(yieldHandling, propList);
+        }

        // Not an accessor property after all.
        tokenStream.ungetToken();
@ -9145,6 +9206,9 @@ Parser<ParseHandler>::tryNewTarget(Node &newTarget)
        return false;
    }

+    if (!checkUnescapedName(tokenStream.currentToken()))
+        return false;
+
    if (!pc->sc->allowNewTarget()) {
        reportWithOffset(ParseError, false, begin, JSMSG_BAD_NEWTARGET);
        return false;
--- a/js/src/frontend/Parser.h
+++ b/js/src/frontend/Parser.h
@ -552,6 +552,8 @@ class Parser : private JS::AutoGCRooter, public StrictModeGetter

    bool isUnexpectedEOF() const { return isUnexpectedEOF_; }

+    bool checkUnescapedName(const Token& token);
+
  private:
    Parser* thisForCtor() { return this; }

--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@ -1015,16 +1015,6 @@ TokenStream::checkForKeyword(const KeywordInfo* kw, TokenKind* ttp)
    return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
 }

-bool
-TokenStream::checkForKeyword(const char16_t* s, size_t length, TokenKind* ttp)
-{
-    const KeywordInfo* kw = FindKeyword(s, length);
-    if (!kw)
-        return true;
-
-    return checkForKeyword(kw, ttp);
-}
-
 bool
 TokenStream::checkForKeyword(JSAtom* atom, TokenKind* ttp)
 {
@ -1235,13 +1225,23 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
            length = userbuf.addressOfNextRawChar() - identStart;
        }

-        // Check for keywords unless the parser told us not to.
+        // Represent keywords as keyword tokens unless told otherwise.
        if (modifier != KeywordIsName) {
-            tp->type = TOK_NAME;
-            if (!checkForKeyword(chars, length, &tp->type))
-                goto error;
-            if (tp->type != TOK_NAME)
-                goto out;
+            if (const KeywordInfo* kw = FindKeyword(chars, length)) {
+                // That said, keywords can't contain escapes.  (Contexts where
+                // keywords are treated as names, that also sometimes treat
+                // keywords as keywords, must manually check this requirement.)
+                if (hadUnicodeEscape) {
+                    reportError(JSMSG_ESCAPED_KEYWORD);
+                    goto error;
+                }
+
+                tp->type = TOK_NAME;
+                if (!checkForKeyword(kw, &tp->type))
+                    goto error;
+                if (tp->type != TOK_NAME)
+                    goto out;
+            }
        }

        JSAtom* atom = AtomizeChars(cx, chars, length);
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@ -226,6 +226,11 @@ struct Token
        return u.name->asPropertyName(); // poor-man's type verification
    }

+    bool nameContainsEscape() const {
+        PropertyName* n = name();
+        return pos.begin + n->length() != pos.end;
+    }
+
    JSAtom* atom() const {
        MOZ_ASSERT(type == TOK_STRING ||
                   type == TOK_TEMPLATE_HEAD ||
@ -642,11 +647,28 @@ class MOZ_STACK_CLASS TokenStream
        MOZ_ALWAYS_TRUE(matched);
    }

-    bool matchContextualKeyword(bool* matchedp, Handle<PropertyName*> keyword) {
+    // Like matchToken(..., TOK_NAME) but further matching the name token only
+    // if it has the given characters, without containing escape sequences.
+    // If the name token has the given characters yet *does* contain an escape,
+    // a syntax error will be reported.
+    //
+    // This latter behavior makes this method unsuitable for use in any context
+    // where ASI might occur.  In such places, an escaped "contextual keyword"
+    // on a new line is the start of an ExpressionStatement, not a continuation
+    // of a StatementListItem (or ImportDeclaration or ExportDeclaration, in
+    // modules).
+    bool matchContextualKeyword(bool* matchedp, Handle<PropertyName*> keyword,
+                                Modifier modifier = None)
+    {
        TokenKind token;
-        if (!getToken(&token))
+        if (!getToken(&token, modifier))
            return false;
        if (token == TOK_NAME && currentToken().name() == keyword) {
+            if (currentToken().nameContainsEscape()) {
+                reportError(JSMSG_ESCAPED_KEYWORD);
+                return false;
+            }
+
            *matchedp = true;
        } else {
            *matchedp = false;
@ -720,8 +742,8 @@ class MOZ_STACK_CLASS TokenStream
        return sourceMapURL_.get();
    }

-    // If the name at s[0:length] is not a keyword in this version, return
-    // true with *ttp unchanged.
+    // If |atom| is not a keyword in this version, return true with *ttp
+    // unchanged.
    //
    // If it is a reserved word in this version and strictness mode, and thus
    // can't be present in correct code, report a SyntaxError and return false.
@ -730,10 +752,11 @@ class MOZ_STACK_CLASS TokenStream
    // null, report a SyntaxError ("if is a reserved identifier") and return
    // false. If ttp is non-null, return true with the keyword's TokenKind in
    // *ttp.
-    bool checkForKeyword(const KeywordInfo* kw, TokenKind* ttp);
-    bool checkForKeyword(const char16_t* s, size_t length, TokenKind* ttp);
    bool checkForKeyword(JSAtom* atom, TokenKind* ttp);

+    // Same semantics as above, but for the provided keyword.
+    bool checkForKeyword(const KeywordInfo* kw, TokenKind* ttp);
+
    // This class maps a userbuf offset (which is 0-indexed) to a line number
    // (which is 1-indexed) and a column index (which is 0-indexed).
    class SourceCoords
--- a/js/src/js.msg
+++ b/js/src/js.msg
@ -342,6 +342,7 @@ MSG_DEF(JSMSG_BAD_COLUMN_NUMBER,       0, JSEXN_RANGEERR, "column number out of
 MSG_DEF(JSMSG_COMPUTED_NAME_IN_PATTERN,0, JSEXN_SYNTAXERR, "computed property names aren't supported in this destructuring declaration")
 MSG_DEF(JSMSG_DEFAULT_IN_PATTERN,      0, JSEXN_SYNTAXERR, "destructuring defaults aren't supported in this destructuring declaration")
 MSG_DEF(JSMSG_BAD_NEWTARGET,           0, JSEXN_SYNTAXERR, "new.target only allowed in non-exotic functions")
+MSG_DEF(JSMSG_ESCAPED_KEYWORD,         0, JSEXN_SYNTAXERR, "keywords must be written literally, without embedded escapes")

 // asm.js
 MSG_DEF(JSMSG_USE_ASM_TYPE_FAIL,       1, JSEXN_TYPEERR, "asm.js type error: {0}")
--- a/js/src/tests/ecma_6/Syntax/browser.js
+++ b/js/src/tests/ecma_6/Syntax/browser.js
--- a/js/src/tests/ecma_6/Syntax/keyword-unescaped-requirement.js
+++ b/js/src/tests/ecma_6/Syntax/keyword-unescaped-requirement.js
@ -0,0 +1,73 @@
+/*
+ * Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/licenses/publicdomain/
+ */
+
+//-----------------------------------------------------------------------------
+var BUGNUMBER = 1204027;
+var summary =
+  "Escape sequences aren't allowed in bolded grammar tokens (that is, in " +
+  "keywords, possibly contextual keywords)";
+
+print(BUGNUMBER + ": " + summary);
+
+/**************
+ * BEGIN TEST *
+ **************/
+
+function classSyntax(code)
+{
+  return classesEnabled() ? "(class { constructor() {} " + code + " });" : "@";
+}
+
+function memberVariants(code)
+{
+  return [classesEnabled() ? "(class { constructor() {} " + code + " });" : "@",
+          "({ " + code + " })"];
+}
+
+var badScripts =
+  [
+   classSyntax("st\\u0061tic m() { return 0; }"),
+   classSyntax("st\\u0061tic get foo() { return 0; }"),
+   classSyntax("st\\u0061tic set foo(v) {}"),
+   classSyntax("st\\u0061tic get ['hi']() { return 0; }"),
+   classSyntax("st\\u0061tic set ['hi'](v) {}"),
+   classSyntax("st\\u0061tic get 'hi'() { return 0; }"),
+   classSyntax("st\\u0061tic set 'hi'(v) {}"),
+   classSyntax("st\\u0061tic get 42() { return 0; }"),
+   classSyntax("st\\u0061tic set 42(v) {}"),
+   ...memberVariants("\\u0067et foo() { return 0; }"),
+   ...memberVariants("\\u0073et foo() {}"),
+   ...memberVariants("g\\u0065t foo() { return 0; }"),
+   ...memberVariants("s\\u0065t foo() {}"),
+   ...memberVariants("g\\u0065t ['hi']() { return 0; }"),
+   ...memberVariants("s\\u0065t ['hi']() {}"),
+   ...memberVariants("g\\u0065t 'hi'() { return 0; }"),
+   ...memberVariants("s\\u0065t 'hi'() {}"),
+   ...memberVariants("g\\u0065t 42() { return 0; }"),
+   ...memberVariants("s\\u0065t 42() {}"),
+   "for (var foo o\\u0066 [1]) ;",
+   "for (var foo \\u006ff [1]) ;",
+   "for (var foo i\\u006e [1]) ;",
+   "for (var foo \\u0069n [1]) ;",
+   "function f() { return n\\u0065w.target }",
+   "function f() { return \\u006eew.target }",
+   "function f() { return new.t\\u0061rget }",
+   "function f() { return new.\\u0074arget }",
+   "function f() { return n\\u0065w Array }",
+   "function f() { return \\u006eew Array }",
+   "\\u0064o {  } while (0)",
+   "[for (x \\u006ff [1]) x]",
+   "[for (x o\\u0066 [1]) x]",
+  ];
+
+for (var script of badScripts)
+  assertThrowsInstanceOf(() => Function(script), SyntaxError);
+
+/******************************************************************************/
+
+if (typeof reportCompare === "function")
+  reportCompare(true, true);
+
+print("Tests complete");
--- a/js/src/tests/ecma_6/Syntax/shell.js
+++ b/js/src/tests/ecma_6/Syntax/shell.js
--- a/js/src/tests/ecma_6/extensions/keyword-unescaped-requirement-modules.js
+++ b/js/src/tests/ecma_6/extensions/keyword-unescaped-requirement-modules.js
@ -0,0 +1,71 @@
+/*
+ * Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/licenses/publicdomain/
+ */
+
+//-----------------------------------------------------------------------------
+var BUGNUMBER = 1204027;
+var summary =
+  "Escape sequences aren't allowed in bolded grammar tokens (that is, in " +
+  "keywords, possibly contextual keywords)";
+
+print(BUGNUMBER + ": " + summary);
+
+/**************
+ * BEGIN TEST *
+ **************/
+
+var badModules =
+  [
+   "\\u0069mport f from 'g'",
+   "i\\u006dport g from 'h'",
+   "import * \\u0061s foo",
+   "import {} fro\\u006d 'bar'",
+   "import { x \\u0061s y } from 'baz'",
+
+   "\\u0065xport function f() {}",
+   "e\\u0078port function g() {}",
+   "export * fro\\u006d 'fnord'",
+   "export d\\u0065fault var x = 3;",
+   "export { q } fro\\u006d 'qSupplier';",
+
+  ];
+
+if (typeof parseModule === "function")
+{
+  for (var module of badModules)
+  {
+    assertThrowsInstanceOf(() => parseModule(module), SyntaxError,
+                           "bad behavior for: " + module);
+  }
+}
+
+if (typeof Reflect.parse === "function")
+{
+  var twoStatementAST =
+    Reflect.parse(`export { x } /* ASI should trigger here */
+                  fro\\u006D`,
+                  { target: "module" });
+
+  var statements = twoStatementAST.body;
+  assertEq(statements.length, 2,
+           "should have two items in the module, not one ExportDeclaration");
+  assertEq(statements[0].type, "ExportDeclaration");
+  assertEq(statements[1].type, "ExpressionStatement");
+  assertEq(statements[1].expression.name, "from");
+
+  var oneStatementAST =
+    Reflect.parse(`export { x } /* no ASI here */
+                  from 'foo'`,
+                  { target: "module" });
+
+  assertEq(oneStatementAST.body.length, 1);
+  assertEq(oneStatementAST.body[0].type, "ExportDeclaration");
+}
+
+/******************************************************************************/
+
+if (typeof reportCompare === "function")
+  reportCompare(true, true);
+
+print("Tests complete");
--- a/js/src/tests/ecma_6/extensions/keyword-unescaped-requirement.js
+++ b/js/src/tests/ecma_6/extensions/keyword-unescaped-requirement.js
@ -0,0 +1,43 @@
+/*
+ * Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/licenses/publicdomain/
+ */
+
+//-----------------------------------------------------------------------------
+var BUGNUMBER = 1204027;
+var summary =
+  "Escape sequences aren't allowed in bolded grammar tokens (that is, in " +
+  "keywords, possibly contextual keywords)";
+
+print(BUGNUMBER + ": " + summary);
+
+/**************
+ * BEGIN TEST *
+ **************/
+
+var randomExtensions =
+  [
+   "for \\u0065ach (var x in []);",
+   "for e\\u0061ch (var x in []);",
+   "[0 for \\u0065ach (var x in [])]",
+   "[0 for e\\u0061ch (var x in [])]",
+   "(0 for \\u0065ach (var x in []))",
+   "(0 for e\\u0061ch (var x in []))",
+
+   // Soon to be not an extension, maybe...
+   "(for (x \\u006ff [1]) x)",
+   "(for (x o\\u0066 [1]) x)",
+  ];
+
+for (var extension of randomExtensions)
+{
+  assertThrowsInstanceOf(() => Function(extension), SyntaxError,
+                         "bad behavior for: " + extension);
+}
+
+/******************************************************************************/
+
+if (typeof reportCompare === "function")
+  reportCompare(true, true);
+
+print("Tests complete");
--- a/js/src/vm/Xdr.h
+++ b/js/src/vm/Xdr.h
@ -29,11 +29,11 @@ namespace js {
 *
 *  https://developer.mozilla.org/en-US/docs/SpiderMonkey/Internals/Bytecode
 */
-static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 306;
+static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 307;
 static const uint32_t XDR_BYTECODE_VERSION =
    uint32_t(0xb973c0de - XDR_BYTECODE_VERSION_SUBTRAHEND);

-static_assert(JSErr_Limit == 407,
+static_assert(JSErr_Limit == 408,
              "GREETINGS, POTENTIAL SUBTRAHEND INCREMENTER! If you added or "
              "removed MSG_DEFs from js.msg, you should increment "
              "XDR_BYTECODE_VERSION_SUBTRAHEND and update this assertion's "