Bug 1456296 - Move IdentifierName parsing into a separate function from TSS::getTokenInternal to simplify some control flow. r=arai

* * *
[mq]: more

--HG--
extra : rebase_source : 1ba08d6801d5dff326921cf9b88655ae5b151b91
This commit is contained in:
Jeff Walden 2018-04-23 13:37:39 -07:00
Родитель fc9af7ef8b
Коммит e2c8f8433f
2 изменённых файлов: 87 добавлений и 67 удалений

Просмотреть файл

@ -1332,6 +1332,67 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::putIdentInTokenbuf(const CharT* iden
return true;
}
template<typename CharT, class AnyCharsAccess>
MOZ_MUST_USE bool
TokenStreamSpecific<CharT, AnyCharsAccess>::identifierName(Token* token, const CharT* identStart,
IdentifierEscapes escaping)
{
int c;
while (true) {
c = getCharIgnoreEOL();
if (c == EOF)
break;
uint32_t codePoint;
if (!matchMultiUnitCodePoint(c, &codePoint))
return false;
if (codePoint) {
if (!unicode::IsIdentifierPart(codePoint))
break;
continue;
}
if (!unicode::IsIdentifierPart(char16_t(c))) {
uint32_t qc;
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
escaping = IdentifierEscapes::SawUnicodeEscape;
}
}
ungetCharIgnoreEOL(c);
const CharT* chars;
size_t length;
if (escaping == IdentifierEscapes::SawUnicodeEscape) {
// Identifiers containing Unicode escapes have to be converted into
// tokenbuf before atomizing.
if (!putIdentInTokenbuf(identStart))
return false;
chars = tokenbuf.begin();
length = tokenbuf.length();
} else {
// Escape-free identifiers can be created directly from userbuf.
chars = identStart;
length = userbuf.addressOfNextRawChar() - identStart;
// Represent reserved words lacking escapes as reserved word tokens.
if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
token->type = rw->tokentype;
return true;
}
}
JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
if (!atom)
return false;
token->type = TokenKind::Name;
token->setName(atom->asPropertyName());
return true;
}
enum FirstCharKind {
// A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
// token that cannot also be a prefix of a longer token. E.g. ';' has the
@ -1420,14 +1481,11 @@ MOZ_MUST_USE bool
TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Modifier modifier)
{
int c;
uint32_t qc;
Token* tp;
FirstCharKind c1kind;
const CharT* numStart;
bool hasExp;
DecimalPoint decimalPoint;
const CharT* identStart;
bool hadUnicodeEscape;
// Check if in the middle of a template string. Have to get this out of
// the way first.
@ -1467,7 +1525,7 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
// If the first codepoint is really the start of an identifier, the
// identifier starts at the previous raw char. If it isn't, it's a bad
// char and this assignment won't be examined anyway.
identStart = userbuf.addressOfNextRawChar() - 1;
const CharT* identStart = userbuf.addressOfNextRawChar() - 1;
static_assert('$' < 128,
"IdentifierStart contains '$', but as !IsUnicodeIDStart('$'), "
@ -1476,16 +1534,20 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
"IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), "
"ensure that '_' is never handled here");
if (unicode::IsUnicodeIDStart(char16_t(c))) {
hadUnicodeEscape = false;
goto identifier;
if (!identifierName(tp, identStart, IdentifierEscapes::None))
goto error;
goto out;
}
uint32_t codePoint = c;
if (!matchMultiUnitCodePoint(c, &codePoint))
goto error;
if (codePoint && unicode::IsUnicodeIDStart(codePoint)) {
hadUnicodeEscape = false;
goto identifier;
if (!identifierName(tp, identStart, IdentifierEscapes::None))
goto error;
goto out;
}
ungetCodePointIgnoreEOL(codePoint);
@ -1532,62 +1594,10 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
//
if (c1kind == Ident) {
tp = newToken(-1);
identStart = userbuf.addressOfNextRawChar() - 1;
hadUnicodeEscape = false;
identifier:
for (;;) {
c = getCharIgnoreEOL();
if (c == EOF)
break;
uint32_t codePoint;
if (!matchMultiUnitCodePoint(c, &codePoint))
goto error;
if (codePoint) {
if (!unicode::IsIdentifierPart(codePoint))
break;
continue;
}
if (!unicode::IsIdentifierPart(char16_t(c))) {
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
hadUnicodeEscape = true;
}
}
ungetCharIgnoreEOL(c);
// Identifiers containing no Unicode escapes can be processed directly
// from userbuf. The rest must use the escapes converted via tokenbuf
// before atomizing.
const CharT* chars;
size_t length;
if (hadUnicodeEscape) {
if (!putIdentInTokenbuf(identStart))
if (!identifierName(tp, userbuf.addressOfNextRawChar() - 1, IdentifierEscapes::None))
goto error;
chars = tokenbuf.begin();
length = tokenbuf.length();
} else {
chars = identStart;
length = userbuf.addressOfNextRawChar() - identStart;
}
// Represent reserved words as reserved word tokens.
if (!hadUnicodeEscape) {
if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
tp->type = rw->tokentype;
goto out;
}
}
JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
if (!atom)
goto error;
tp->type = TokenKind::Name;
tp->setName(atom->asPropertyName());
goto out;
}
@ -1819,11 +1829,15 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
goto out;
case '\\': {
uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc);
if (escapeLength > 0) {
identStart = userbuf.addressOfNextRawChar() - escapeLength - 1;
hadUnicodeEscape = true;
goto identifier;
uint32_t qc;
if (uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc)) {
if (!identifierName(tp, userbuf.addressOfNextRawChar() - escapeLength - 1,
IdentifierEscapes::SawUnicodeEscape))
{
goto error;
}
goto out;
}
// We could point "into" a mistyped escape, e.g. for "\u{41H}" we could

Просмотреть файл

@ -249,6 +249,9 @@ enum class InvalidEscapeType {
Octal
};
// The only escapes found in IdentifierName are of the Unicode flavor.
enum class IdentifierEscapes { None, SawUnicodeEscape };
class TokenStreamShared;
struct Token
@ -1445,6 +1448,9 @@ class MOZ_STACK_CLASS TokenStreamSpecific
return userbuf.limit();
}
MOZ_MUST_USE bool identifierName(Token* token, const CharT* identStart,
IdentifierEscapes escaping);
MOZ_MUST_USE bool getTokenInternal(TokenKind* ttp, Modifier modifier);
MOZ_MUST_USE bool getStringOrTemplateToken(char untilChar, Token** tp);