From 63a91f32ace89529954a21cfd41364f8f30ea326 Mon Sep 17 00:00:00 2001 From: Jeff Walden Date: Tue, 29 Nov 2016 13:35:46 -0800 Subject: [PATCH] Bug 1326454 - Rename TokenStream::getBracedUnicode to TokenStream::matchBracedUnicode and make its signature fallible. r=arai --HG-- extra : rebase_source : 18eedfc991915a241132960cf42efece21330a7f --- js/src/frontend/TokenStream.cpp | 61 +++++++++++++++++++++++--------- js/src/frontend/TokenStream.h | 5 ++- js/src/irregexp/RegExpParser.cpp | 6 ++-- js/src/irregexp/RegExpParser.h | 2 +- js/src/js.msg | 2 +- 5 files changed, 53 insertions(+), 23 deletions(-) diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp index 829cad72c525..5c2c89cf5cf1 100644 --- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -815,6 +815,19 @@ TokenStream::error(unsigned errorNumber, ...) va_end(args); } +void +TokenStream::errorAt(uint32_t offset, unsigned errorNumber, ...) +{ + va_list args; + va_start(args, errorNumber); +#ifdef DEBUG + bool result = +#endif + reportCompileErrorNumberVA(offset, JSREPORT_ERROR, errorNumber, args); + MOZ_ASSERT(!result, "reporting an error returned true?"); + va_end(args); +} + // We have encountered a '\': check for a Unicode escape sequence after it. // Return the length of the escape sequence and the character code point (by // value) if we found a Unicode escape sequence. Otherwise, return 0. In both @@ -1863,32 +1876,48 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) } bool -TokenStream::getBracedUnicode(uint32_t* cp) +TokenStream::matchBracedUnicode(bool* matched, uint32_t* cp) { + if (peekChar() != '{') { + *matched = false; + return true; + } + consumeKnownChar('{'); + uint32_t start = userbuf.offset(); + bool first = true; - int32_t c; uint32_t code = 0; - while (true) { - c = getCharIgnoreEOL(); - if (c == EOF) + do { + int32_t c = getCharIgnoreEOL(); + if (c == EOF) { + error(JSMSG_MALFORMED_ESCAPE, "Unicode"); return false; + } if (c == '}') { - if (first) + if (first) { + error(JSMSG_MALFORMED_ESCAPE, "Unicode"); return false; + } break; } - if (!JS7_ISHEX(c)) + if (!JS7_ISHEX(c)) { + error(JSMSG_MALFORMED_ESCAPE, "Unicode"); return false; + } code = (code << 4) | JS7_UNHEX(c); - if (code > unicode::NonBMPMax) + if (code > unicode::NonBMPMax) { + errorAt(start, JSMSG_UNICODE_OVERFLOW, "escape sequence"); return false; - first = false; - } + } + first = false; + } while (true); + + *matched = true; *cp = code; return true; } @@ -1930,13 +1959,11 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) // Unicode character specification. case 'u': { - if (peekChar() == '{') { - uint32_t code; - if (!getBracedUnicode(&code)) { - error(JSMSG_MALFORMED_ESCAPE, "Unicode"); - return false; - } - + bool matched; + uint32_t code; + if (!matchBracedUnicode(&matched, &code)) + return false; + if (matched) { MOZ_ASSERT(code <= unicode::NonBMPMax); if (code < unicode::NonBMPMin) { c = code; diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h index 18e9cb3ca45a..19385b499e9f 100644 --- a/js/src/frontend/TokenStream.h +++ b/js/src/frontend/TokenStream.h @@ -368,6 +368,9 @@ class MOZ_STACK_CLASS TokenStream // Report the given error at the current offset. void error(unsigned errorNumber, ...); + // Report the given error at the given offset. + void errorAt(uint32_t offset, unsigned errorNumber, ...); + // Warn at the current offset. MOZ_MUST_USE bool warning(unsigned errorNumber, ...); @@ -952,7 +955,7 @@ class MOZ_STACK_CLASS TokenStream MOZ_MUST_USE bool getTokenInternal(TokenKind* ttp, Modifier modifier); - MOZ_MUST_USE bool getBracedUnicode(uint32_t* code); + MOZ_MUST_USE bool matchBracedUnicode(bool* matched, uint32_t* code); MOZ_MUST_USE bool getStringOrTemplateToken(int untilChar, Token** tp); int32_t getChar(); diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp index fe2e61603f6b..62aaf5c710bb 100644 --- a/js/src/irregexp/RegExpParser.cpp +++ b/js/src/irregexp/RegExpParser.cpp @@ -243,10 +243,10 @@ RegExpParser::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, template RegExpTree* -RegExpParser::ReportError(unsigned errorNumber) +RegExpParser::ReportError(unsigned errorNumber, const char* param /* = nullptr */) { gc::AutoSuppressGC suppressGC(ts.context()); - ts.reportError(errorNumber); + ts.reportError(errorNumber, param); return nullptr; } @@ -350,7 +350,7 @@ RegExpParser::ParseBracedHexEscape(widechar* value) } code = (code << 4) | d; if (code > unicode::NonBMPMax) { - ReportError(JSMSG_UNICODE_OVERFLOW); + ReportError(JSMSG_UNICODE_OVERFLOW, "regular expression"); return false; } Advance(); diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h index b5228a86f99a..0a7e61858976 100644 --- a/js/src/irregexp/RegExpParser.h +++ b/js/src/irregexp/RegExpParser.h @@ -211,7 +211,7 @@ class RegExpParser bool ParseBackReferenceIndex(int* index_out); bool ParseClassAtom(char16_t* char_class, widechar *value); - RegExpTree* ReportError(unsigned errorNumber); + RegExpTree* ReportError(unsigned errorNumber, const char* param = nullptr); void Advance(); void Advance(int dist) { next_pos_ += dist - 1; diff --git a/js/src/js.msg b/js/src/js.msg index 0be7ddbf3f7c..aacc40e5282d 100644 --- a/js/src/js.msg +++ b/js/src/js.msg @@ -503,7 +503,7 @@ MSG_DEF(JSMSG_RANGE_WITH_CLASS_ESCAPE, 0, JSEXN_SYNTAXERR, "character class esca MSG_DEF(JSMSG_RAW_BRACE_IN_REGEP, 0, JSEXN_SYNTAXERR, "raw brace is not allowed in regular expression with unicode flag") MSG_DEF(JSMSG_RAW_BRACKET_IN_REGEP, 0, JSEXN_SYNTAXERR, "raw bracket is not allowed in regular expression with unicode flag") MSG_DEF(JSMSG_TOO_MANY_PARENS, 0, JSEXN_INTERNALERR, "too many parentheses in regular expression") -MSG_DEF(JSMSG_UNICODE_OVERFLOW, 0, JSEXN_SYNTAXERR, "unicode codepoint should not be greater than 0x10FFFF in regular expression") +MSG_DEF(JSMSG_UNICODE_OVERFLOW, 1, JSEXN_SYNTAXERR, "Unicode codepoint must not be greater than 0x10FFFF in {0}") MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN, 0, JSEXN_SYNTAXERR, "unmatched ) in regular expression") MSG_DEF(JSMSG_UNTERM_CLASS, 0, JSEXN_SYNTAXERR, "unterminated character class")