Bug 1434429 - Implement a TokenStreamChars::ungetCodePointIgnoreEOL and use it to report errors at precise locations, rather than blindly at the beginning of the token (which happens to be the same thing, just not nearly as clear about it). r=arai

--HG-- extra : rebase_source : 2402cd48df9d1a554756b1242a706d103f519901
2018-01-18 11:34:27 -08:00 · 2018-01-18 11:34:27 -08:00 · fb577bbcc0
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@ -601,6 +601,22 @@ TokenStreamCharsBase<CharT>::ungetCharIgnoreEOL(int32_t c)
    userbuf.ungetRawChar();
 }

+template<class AnyCharsAccess>
+void
+TokenStreamChars<char16_t, AnyCharsAccess>::ungetCodePointIgnoreEOL(uint32_t codePoint)
+{
+    MOZ_ASSERT(!userbuf.atStart());
+
+    unsigned numUnits = 0;
+    char16_t units[2];
+    unicode::UTF16Encode(codePoint, units, &numUnits);
+
+    MOZ_ASSERT(numUnits == 1 || numUnits == 2);
+
+    while (numUnits-- > 0)
+        ungetCharIgnoreEOL(units[numUnits]);
+}
+
 // Return true iff |n| raw characters can be read from this without reading past
 // EOF or a newline, and copy those characters into |cp| if so.  The characters
 // are not consumed: use skipChars(n) to do so after checking that the consumed
@ -1499,13 +1515,15 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
            goto identifier;
        }

-        uint32_t codePoint;
+        uint32_t codePoint = c;
        if (isMultiUnitCodepoint(c, &codePoint) && unicode::IsUnicodeIDStart(codePoint)) {
            hadUnicodeEscape = false;
            goto identifier;
        }

-        goto badchar;
+        ungetCodePointIgnoreEOL(codePoint);
+        error(JSMSG_ILLEGAL_CHARACTER);
+        goto error;
    }

    // Get the token kind, based on the first char.  The ordering of c1kind
@ -1838,7 +1856,14 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
            hadUnicodeEscape = true;
            goto identifier;
        }
-        goto badchar;
+
+        // We could point "into" a mistyped escape, e.g. for "\u{41H}" we could
+        // point at the 'H'.  But we don't do that now, so the character after
+        // the '\' isn't necessarily bad, so just point at the start of
+        // the actually-invalid escape.
+        ungetCharIgnoreEOL('\\');
+        error(JSMSG_BAD_ESCAPE);
+        goto error;
      }

      case '|':
@ -2055,9 +2080,11 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
        }
        goto out;

-      badchar:
      default:
-        reportError(JSMSG_ILLEGAL_CHARACTER);
+        // We consumed a bad character/code point.  Put it back so the error
+        // location is the bad character.
+        ungetCodePointIgnoreEOL(c);
+        error(JSMSG_ILLEGAL_CHARACTER);
        goto error;
    }

--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@ -1042,6 +1042,7 @@ class TokenStreamChars<char16_t, AnyCharsAccess>

    using GeneralCharsBase::getCharIgnoreEOL;
    using CharsSharedBase::ungetCharIgnoreEOL;
+    using CharsSharedBase::userbuf;

    bool matchTrailForLeadSurrogate(char16_t lead, uint32_t* codePoint);

@ -1060,6 +1061,8 @@ class TokenStreamChars<char16_t, AnyCharsAccess>

        return matchTrailForLeadSurrogate(c, codepoint);
    }
+
+    void ungetCodePointIgnoreEOL(uint32_t codePoint);
 };

 // TokenStream is the lexical scanner for JavaScript source text.
@ -1144,6 +1147,7 @@ class MOZ_STACK_CLASS TokenStreamSpecific
    using CharsSharedBase::tokenbuf;
    using GeneralCharsBase::ungetChar;
    using CharsSharedBase::ungetCharIgnoreEOL;
+    using CharsBase::ungetCodePointIgnoreEOL;
    using CharsSharedBase::userbuf;

  public:
--- a/js/src/js.msg
+++ b/js/src/js.msg
@ -261,6 +261,7 @@ MSG_DEF(JSMSG_FROM_AFTER_IMPORT_CLAUSE, 0, JSEXN_SYNTAXERR, "missing keyword 'fr
 MSG_DEF(JSMSG_FROM_AFTER_EXPORT_STAR,  0, JSEXN_SYNTAXERR, "missing keyword 'from' after export *")
 MSG_DEF(JSMSG_GARBAGE_AFTER_INPUT,     2, JSEXN_SYNTAXERR, "unexpected garbage after {0}, starting with {1}")
 MSG_DEF(JSMSG_IDSTART_AFTER_NUMBER,    0, JSEXN_SYNTAXERR, "identifier starts immediately after numeric literal")
+MSG_DEF(JSMSG_BAD_ESCAPE,              0, JSEXN_SYNTAXERR, "invalid escape sequence")
 MSG_DEF(JSMSG_ILLEGAL_CHARACTER,       0, JSEXN_SYNTAXERR, "illegal character")
 MSG_DEF(JSMSG_IMPORT_DECL_AT_TOP_LEVEL, 0, JSEXN_SYNTAXERR, "import declarations may only appear at top level of a module")
 MSG_DEF(JSMSG_OF_AFTER_FOR_LOOP_DECL,  0, JSEXN_SYNTAXERR, "a declaration in the head of a for-of loop can't have an initializer")