Bug 1456296 - Move IdentifierName parsing into a separate function from TSS::getTokenInternal to simplify some control flow. r=arai

* * * [mq]: more --HG-- extra : rebase_source : 1ba08d6801d5dff326921cf9b88655ae5b151b91
2018-04-23 13:37:39 -07:00 · 2018-04-23 13:37:39 -07:00 · e2c8f8433f
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@ -1332,6 +1332,67 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::putIdentInTokenbuf(const CharT* iden
    return true;
 }

+template<typename CharT, class AnyCharsAccess>
+MOZ_MUST_USE bool
+TokenStreamSpecific<CharT, AnyCharsAccess>::identifierName(Token* token, const CharT* identStart,
+                                                           IdentifierEscapes escaping)
+{
+    int c;
+    while (true) {
+        c = getCharIgnoreEOL();
+        if (c == EOF)
+            break;
+
+        uint32_t codePoint;
+        if (!matchMultiUnitCodePoint(c, &codePoint))
+            return false;
+        if (codePoint) {
+            if (!unicode::IsIdentifierPart(codePoint))
+                break;
+
+            continue;
+        }
+
+        if (!unicode::IsIdentifierPart(char16_t(c))) {
+            uint32_t qc;
+            if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
+                break;
+            escaping = IdentifierEscapes::SawUnicodeEscape;
+        }
+    }
+    ungetCharIgnoreEOL(c);
+
+    const CharT* chars;
+    size_t length;
+    if (escaping == IdentifierEscapes::SawUnicodeEscape) {
+        // Identifiers containing Unicode escapes have to be converted into
+        // tokenbuf before atomizing.
+        if (!putIdentInTokenbuf(identStart))
+            return false;
+
+        chars = tokenbuf.begin();
+        length = tokenbuf.length();
+    } else {
+        // Escape-free identifiers can be created directly from userbuf.
+        chars = identStart;
+        length = userbuf.addressOfNextRawChar() - identStart;
+
+        // Represent reserved words lacking escapes as reserved word tokens.
+        if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
+            token->type = rw->tokentype;
+            return true;
+        }
+    }
+
+    JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
+    if (!atom)
+        return false;
+
+    token->type = TokenKind::Name;
+    token->setName(atom->asPropertyName());
+    return true;
+}
+
 enum FirstCharKind {
    // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
    // token that cannot also be a prefix of a longer token.  E.g. ';' has the
@ -1420,14 +1481,11 @@ MOZ_MUST_USE bool
 TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Modifier modifier)
 {
    int c;
-    uint32_t qc;
    Token* tp;
    FirstCharKind c1kind;
    const CharT* numStart;
    bool hasExp;
    DecimalPoint decimalPoint;
-    const CharT* identStart;
-    bool hadUnicodeEscape;

    // Check if in the middle of a template string. Have to get this out of
    // the way first.
@ -1467,7 +1525,7 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
        // If the first codepoint is really the start of an identifier, the
        // identifier starts at the previous raw char.  If it isn't, it's a bad
        // char and this assignment won't be examined anyway.
-        identStart = userbuf.addressOfNextRawChar() - 1;
+        const CharT* identStart = userbuf.addressOfNextRawChar() - 1;

        static_assert('$' < 128,
                      "IdentifierStart contains '$', but as !IsUnicodeIDStart('$'), "
@ -1476,16 +1534,20 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
                      "IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), "
                      "ensure that '_' is never handled here");
        if (unicode::IsUnicodeIDStart(char16_t(c))) {
-            hadUnicodeEscape = false;
-            goto identifier;
+            if (!identifierName(tp, identStart, IdentifierEscapes::None))
+                goto error;
+
+            goto out;
        }

        uint32_t codePoint = c;
        if (!matchMultiUnitCodePoint(c, &codePoint))
            goto error;
        if (codePoint && unicode::IsUnicodeIDStart(codePoint)) {
-            hadUnicodeEscape = false;
-            goto identifier;
+            if (!identifierName(tp, identStart, IdentifierEscapes::None))
+                goto error;
+
+            goto out;
        }

        ungetCodePointIgnoreEOL(codePoint);
@ -1532,62 +1594,10 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
    //
    if (c1kind == Ident) {
        tp = newToken(-1);
-        identStart = userbuf.addressOfNextRawChar() - 1;
-        hadUnicodeEscape = false;

-      identifier:
-        for (;;) {
-            c = getCharIgnoreEOL();
-            if (c == EOF)
-                break;
-
-            uint32_t codePoint;
-            if (!matchMultiUnitCodePoint(c, &codePoint))
-                goto error;
-            if (codePoint) {
-                if (!unicode::IsIdentifierPart(codePoint))
-                    break;
-
-                continue;
-            }
-
-            if (!unicode::IsIdentifierPart(char16_t(c))) {
-                if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
-                    break;
-                hadUnicodeEscape = true;
-            }
-        }
-        ungetCharIgnoreEOL(c);
-
-        // Identifiers containing no Unicode escapes can be processed directly
-        // from userbuf.  The rest must use the escapes converted via tokenbuf
-        // before atomizing.
-        const CharT* chars;
-        size_t length;
-        if (hadUnicodeEscape) {
-            if (!putIdentInTokenbuf(identStart))
-                goto error;
-
-            chars = tokenbuf.begin();
-            length = tokenbuf.length();
-        } else {
-            chars = identStart;
-            length = userbuf.addressOfNextRawChar() - identStart;
-        }
-
-        // Represent reserved words as reserved word tokens.
-        if (!hadUnicodeEscape) {
-            if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
-                tp->type = rw->tokentype;
-                goto out;
-            }
-        }
-
-        JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
-        if (!atom)
+        if (!identifierName(tp, userbuf.addressOfNextRawChar() - 1, IdentifierEscapes::None))
            goto error;
-        tp->type = TokenKind::Name;
-        tp->setName(atom->asPropertyName());
+
        goto out;
    }

@ -1819,11 +1829,15 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
        goto out;

      case '\\': {
-        uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc);
-        if (escapeLength > 0) {
-            identStart = userbuf.addressOfNextRawChar() - escapeLength - 1;
-            hadUnicodeEscape = true;
-            goto identifier;
+        uint32_t qc;
+        if (uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc)) {
+            if (!identifierName(tp, userbuf.addressOfNextRawChar() - escapeLength - 1,
+                                IdentifierEscapes::SawUnicodeEscape))
+            {
+                goto error;
+            }
+
+            goto out;
        }

        // We could point "into" a mistyped escape, e.g. for "\u{41H}" we could
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@ -249,6 +249,9 @@ enum class InvalidEscapeType {
    Octal
 };

+// The only escapes found in IdentifierName are of the Unicode flavor.
+enum class IdentifierEscapes { None, SawUnicodeEscape };
+
 class TokenStreamShared;

 struct Token
@ -1445,6 +1448,9 @@ class MOZ_STACK_CLASS TokenStreamSpecific
        return userbuf.limit();
    }

+    MOZ_MUST_USE bool identifierName(Token* token, const CharT* identStart,
+                                     IdentifierEscapes escaping);
+
    MOZ_MUST_USE bool getTokenInternal(TokenKind* ttp, Modifier modifier);

    MOZ_MUST_USE bool getStringOrTemplateToken(char untilChar, Token** tp);