зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1456296 - Move IdentifierName parsing into a separate function from TSS::getTokenInternal to simplify some control flow. r=arai
* * * [mq]: more --HG-- extra : rebase_source : 1ba08d6801d5dff326921cf9b88655ae5b151b91
This commit is contained in:
Родитель
fc9af7ef8b
Коммит
e2c8f8433f
|
@ -1332,6 +1332,67 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::putIdentInTokenbuf(const CharT* iden
|
|||
return true;
|
||||
}
|
||||
|
||||
template<typename CharT, class AnyCharsAccess>
|
||||
MOZ_MUST_USE bool
|
||||
TokenStreamSpecific<CharT, AnyCharsAccess>::identifierName(Token* token, const CharT* identStart,
|
||||
IdentifierEscapes escaping)
|
||||
{
|
||||
int c;
|
||||
while (true) {
|
||||
c = getCharIgnoreEOL();
|
||||
if (c == EOF)
|
||||
break;
|
||||
|
||||
uint32_t codePoint;
|
||||
if (!matchMultiUnitCodePoint(c, &codePoint))
|
||||
return false;
|
||||
if (codePoint) {
|
||||
if (!unicode::IsIdentifierPart(codePoint))
|
||||
break;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!unicode::IsIdentifierPart(char16_t(c))) {
|
||||
uint32_t qc;
|
||||
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
|
||||
break;
|
||||
escaping = IdentifierEscapes::SawUnicodeEscape;
|
||||
}
|
||||
}
|
||||
ungetCharIgnoreEOL(c);
|
||||
|
||||
const CharT* chars;
|
||||
size_t length;
|
||||
if (escaping == IdentifierEscapes::SawUnicodeEscape) {
|
||||
// Identifiers containing Unicode escapes have to be converted into
|
||||
// tokenbuf before atomizing.
|
||||
if (!putIdentInTokenbuf(identStart))
|
||||
return false;
|
||||
|
||||
chars = tokenbuf.begin();
|
||||
length = tokenbuf.length();
|
||||
} else {
|
||||
// Escape-free identifiers can be created directly from userbuf.
|
||||
chars = identStart;
|
||||
length = userbuf.addressOfNextRawChar() - identStart;
|
||||
|
||||
// Represent reserved words lacking escapes as reserved word tokens.
|
||||
if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
|
||||
token->type = rw->tokentype;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
|
||||
if (!atom)
|
||||
return false;
|
||||
|
||||
token->type = TokenKind::Name;
|
||||
token->setName(atom->asPropertyName());
|
||||
return true;
|
||||
}
|
||||
|
||||
enum FirstCharKind {
|
||||
// A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
|
||||
// token that cannot also be a prefix of a longer token. E.g. ';' has the
|
||||
|
@ -1420,14 +1481,11 @@ MOZ_MUST_USE bool
|
|||
TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Modifier modifier)
|
||||
{
|
||||
int c;
|
||||
uint32_t qc;
|
||||
Token* tp;
|
||||
FirstCharKind c1kind;
|
||||
const CharT* numStart;
|
||||
bool hasExp;
|
||||
DecimalPoint decimalPoint;
|
||||
const CharT* identStart;
|
||||
bool hadUnicodeEscape;
|
||||
|
||||
// Check if in the middle of a template string. Have to get this out of
|
||||
// the way first.
|
||||
|
@ -1467,7 +1525,7 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
|
|||
// If the first codepoint is really the start of an identifier, the
|
||||
// identifier starts at the previous raw char. If it isn't, it's a bad
|
||||
// char and this assignment won't be examined anyway.
|
||||
identStart = userbuf.addressOfNextRawChar() - 1;
|
||||
const CharT* identStart = userbuf.addressOfNextRawChar() - 1;
|
||||
|
||||
static_assert('$' < 128,
|
||||
"IdentifierStart contains '$', but as !IsUnicodeIDStart('$'), "
|
||||
|
@ -1476,16 +1534,20 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
|
|||
"IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), "
|
||||
"ensure that '_' is never handled here");
|
||||
if (unicode::IsUnicodeIDStart(char16_t(c))) {
|
||||
hadUnicodeEscape = false;
|
||||
goto identifier;
|
||||
if (!identifierName(tp, identStart, IdentifierEscapes::None))
|
||||
goto error;
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
uint32_t codePoint = c;
|
||||
if (!matchMultiUnitCodePoint(c, &codePoint))
|
||||
goto error;
|
||||
if (codePoint && unicode::IsUnicodeIDStart(codePoint)) {
|
||||
hadUnicodeEscape = false;
|
||||
goto identifier;
|
||||
if (!identifierName(tp, identStart, IdentifierEscapes::None))
|
||||
goto error;
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
ungetCodePointIgnoreEOL(codePoint);
|
||||
|
@ -1532,62 +1594,10 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
|
|||
//
|
||||
if (c1kind == Ident) {
|
||||
tp = newToken(-1);
|
||||
identStart = userbuf.addressOfNextRawChar() - 1;
|
||||
hadUnicodeEscape = false;
|
||||
|
||||
identifier:
|
||||
for (;;) {
|
||||
c = getCharIgnoreEOL();
|
||||
if (c == EOF)
|
||||
break;
|
||||
|
||||
uint32_t codePoint;
|
||||
if (!matchMultiUnitCodePoint(c, &codePoint))
|
||||
goto error;
|
||||
if (codePoint) {
|
||||
if (!unicode::IsIdentifierPart(codePoint))
|
||||
break;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!unicode::IsIdentifierPart(char16_t(c))) {
|
||||
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
|
||||
break;
|
||||
hadUnicodeEscape = true;
|
||||
}
|
||||
}
|
||||
ungetCharIgnoreEOL(c);
|
||||
|
||||
// Identifiers containing no Unicode escapes can be processed directly
|
||||
// from userbuf. The rest must use the escapes converted via tokenbuf
|
||||
// before atomizing.
|
||||
const CharT* chars;
|
||||
size_t length;
|
||||
if (hadUnicodeEscape) {
|
||||
if (!putIdentInTokenbuf(identStart))
|
||||
if (!identifierName(tp, userbuf.addressOfNextRawChar() - 1, IdentifierEscapes::None))
|
||||
goto error;
|
||||
|
||||
chars = tokenbuf.begin();
|
||||
length = tokenbuf.length();
|
||||
} else {
|
||||
chars = identStart;
|
||||
length = userbuf.addressOfNextRawChar() - identStart;
|
||||
}
|
||||
|
||||
// Represent reserved words as reserved word tokens.
|
||||
if (!hadUnicodeEscape) {
|
||||
if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
|
||||
tp->type = rw->tokentype;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
|
||||
if (!atom)
|
||||
goto error;
|
||||
tp->type = TokenKind::Name;
|
||||
tp->setName(atom->asPropertyName());
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -1819,11 +1829,15 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Mod
|
|||
goto out;
|
||||
|
||||
case '\\': {
|
||||
uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc);
|
||||
if (escapeLength > 0) {
|
||||
identStart = userbuf.addressOfNextRawChar() - escapeLength - 1;
|
||||
hadUnicodeEscape = true;
|
||||
goto identifier;
|
||||
uint32_t qc;
|
||||
if (uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc)) {
|
||||
if (!identifierName(tp, userbuf.addressOfNextRawChar() - escapeLength - 1,
|
||||
IdentifierEscapes::SawUnicodeEscape))
|
||||
{
|
||||
goto error;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
// We could point "into" a mistyped escape, e.g. for "\u{41H}" we could
|
||||
|
|
|
@ -249,6 +249,9 @@ enum class InvalidEscapeType {
|
|||
Octal
|
||||
};
|
||||
|
||||
// The only escapes found in IdentifierName are of the Unicode flavor.
|
||||
enum class IdentifierEscapes { None, SawUnicodeEscape };
|
||||
|
||||
class TokenStreamShared;
|
||||
|
||||
struct Token
|
||||
|
@ -1445,6 +1448,9 @@ class MOZ_STACK_CLASS TokenStreamSpecific
|
|||
return userbuf.limit();
|
||||
}
|
||||
|
||||
MOZ_MUST_USE bool identifierName(Token* token, const CharT* identStart,
|
||||
IdentifierEscapes escaping);
|
||||
|
||||
MOZ_MUST_USE bool getTokenInternal(TokenKind* ttp, Modifier modifier);
|
||||
|
||||
MOZ_MUST_USE bool getStringOrTemplateToken(char untilChar, Token** tp);
|
||||
|
|
Загрузка…
Ссылка в новой задаче