Backed out 2 changesets (bug 1314037) for win64 wpt(1) failures a=backout

Backed out changeset 0223902c2353 (bug 1314037)
Backed out changeset c9b3a1252363 (bug 1314037)
This commit is contained in:
Wes Kocher 2016-11-02 12:12:02 -07:00
Родитель 5f755e9bc4
Коммит 7bcb4fd669
7 изменённых файлов: 138 добавлений и 444 удалений

Просмотреть файл

@ -28,10 +28,10 @@
#include "vm/HelperThreads.h"
#include "vm/Keywords.h"
#include "vm/StringBuffer.h"
#include "vm/Unicode.h"
using namespace js;
using namespace js::frontend;
using namespace js::unicode;
using mozilla::Maybe;
using mozilla::PodAssign;
@ -106,12 +106,12 @@ IsIdentifier(const CharT* chars, size_t length)
if (length == 0)
return false;
if (!unicode::IsIdentifierStart(char16_t(*chars)))
if (!IsIdentifierStart(*chars))
return false;
const CharT* end = chars + length;
while (++chars != end) {
if (!unicode::IsIdentifierPart(char16_t(*chars)))
if (!IsIdentifierPart(*chars))
return false;
}
@ -757,96 +757,42 @@ TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
}
// We have encountered a '\': check for a Unicode escape sequence after it.
// Return the length of the escape sequence and the character code point (by
// value) if we found a Unicode escape sequence. Otherwise, return 0. In both
// cases, do not advance along the buffer.
uint32_t
TokenStream::peekUnicodeEscape(uint32_t* codePoint)
// Return 'true' and the character code value (by value) if we found a
// Unicode escape sequence. Otherwise, return 'false'. In both cases, do not
// advance along the buffer.
bool
TokenStream::peekUnicodeEscape(int* result)
{
int32_t c = getCharIgnoreEOL();
if (c != 'u') {
ungetCharIgnoreEOL(c);
return 0;
}
char16_t cp[5];
char16_t cp[3];
uint32_t length;
c = getCharIgnoreEOL();
if (JS7_ISHEX(c) && peekChars(3, cp) &&
JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]))
if (peekChars(5, cp) && cp[0] == 'u' &&
JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
{
*codePoint = (JS7_UNHEX(c) << 12) |
(JS7_UNHEX(cp[0]) << 8) |
(JS7_UNHEX(cp[1]) << 4) |
JS7_UNHEX(cp[2]);
length = 5;
} else if (c == '{') {
length = peekExtendedUnicodeEscape(codePoint);
} else {
length = 0;
*result = (((((JS7_UNHEX(cp[1]) << 4)
+ JS7_UNHEX(cp[2])) << 4)
+ JS7_UNHEX(cp[3])) << 4)
+ JS7_UNHEX(cp[4]);
return true;
}
ungetCharIgnoreEOL(c);
ungetCharIgnoreEOL('u');
return length;
}
uint32_t
TokenStream::peekExtendedUnicodeEscape(uint32_t* codePoint)
{
// The opening brace character was already read.
int32_t c = getCharIgnoreEOL();
// Skip leading zeros.
uint32_t leadingZeros = 0;
while (c == '0') {
leadingZeros++;
c = getCharIgnoreEOL();
}
char16_t cp[6];
size_t i = 0;
uint32_t code = 0;
while (JS7_ISHEX(c) && i < 6) {
cp[i++] = c;
code = code << 4 | JS7_UNHEX(c);
c = getCharIgnoreEOL();
}
uint32_t length;
if (c == '}' && (leadingZeros > 0 || i > 0) && code <= unicode::NonBMPMax) {
*codePoint = code;
length = leadingZeros + i + 3;
} else {
length = 0;
}
ungetCharIgnoreEOL(c);
while (i--)
ungetCharIgnoreEOL(cp[i]);
while (leadingZeros--)
ungetCharIgnoreEOL('0');
return length;
}
uint32_t
TokenStream::matchUnicodeEscapeIdStart(uint32_t* codePoint)
{
uint32_t length = peekUnicodeEscape(codePoint);
if (length > 0 && unicode::IsIdentifierStart(*codePoint)) {
skipChars(length);
return length;
}
return 0;
return false;
}
bool
TokenStream::matchUnicodeEscapeIdent(uint32_t* codePoint)
TokenStream::matchUnicodeEscapeIdStart(int32_t* cp)
{
uint32_t length = peekUnicodeEscape(codePoint);
if (length > 0 && unicode::IsIdentifierPart(*codePoint)) {
skipChars(length);
if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
skipChars(5);
return true;
}
return false;
}
bool
TokenStream::matchUnicodeEscapeIdent(int32_t* cp)
{
if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
skipChars(5);
return true;
}
return false;
@ -900,7 +846,7 @@ TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,
skipChars(directiveLength);
tokenbuf.clear();
while ((c = peekChar()) && c != EOF && !unicode::IsSpaceOrBOM2(c)) {
while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
getChar();
// Debugging directives can occur in both single- and multi-line
// comments. If we're currently inside a multi-line comment, we also
@ -995,15 +941,14 @@ IsTokenSane(Token* tp)
bool
TokenStream::putIdentInTokenbuf(const char16_t* identStart)
{
int32_t c;
uint32_t qc;
int32_t c, qc;
const char16_t* tmp = userbuf.addressOfNextRawChar();
userbuf.setAddressOfNextRawChar(identStart);
tokenbuf.clear();
for (;;) {
c = getCharIgnoreEOL();
if (!unicode::IsIdentifierPart(char16_t(c))) {
if (!IsIdentifierPart(c)) {
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
c = qc;
@ -1119,8 +1064,7 @@ static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
bool
TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
{
int c;
uint32_t qc;
int c, qc;
Token* tp;
FirstCharKind c1kind;
const char16_t* numStart;
@ -1151,7 +1095,7 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
// Chars not in the range 0..127 are rare. Getting them out of the way
// early allows subsequent checking to be faster.
if (MOZ_UNLIKELY(c >= 128)) {
if (unicode::IsSpaceOrBOM2(c)) {
if (IsSpaceOrBOM2(c)) {
if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
updateLineInfoForEOL();
updateFlagsForEOL();
@ -1168,7 +1112,7 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
static_assert('_' < 128,
"IdentifierStart contains '_', but as !IsLetter('_'), "
"ensure that '_' is never handled here");
if (unicode::IsLetter(c)) {
if (IsLetter(c)) {
identStart = userbuf.addressOfNextRawChar() - 1;
hadUnicodeEscape = false;
goto identifier;
@ -1224,7 +1168,7 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
c = getCharIgnoreEOL();
if (c == EOF)
break;
if (!unicode::IsIdentifierPart(char16_t(c))) {
if (!IsIdentifierPart(c)) {
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
hadUnicodeEscape = true;
@ -1318,7 +1262,7 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
}
ungetCharIgnoreEOL(c);
if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
if (c != EOF && IsIdentifierStart(c)) {
reportError(JSMSG_IDSTART_AFTER_NUMBER);
goto error;
}
@ -1425,7 +1369,7 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
}
ungetCharIgnoreEOL(c);
if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
if (c != EOF && IsIdentifierStart(c)) {
reportError(JSMSG_IDSTART_AFTER_NUMBER);
goto error;
}
@ -1478,15 +1422,13 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;
goto out;
case '\\': {
uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc);
if (escapeLength > 0) {
identStart = userbuf.addressOfNextRawChar() - escapeLength - 1;
hadUnicodeEscape = true;
case '\\':
hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
if (hadUnicodeEscape) {
identStart = userbuf.addressOfNextRawChar() - 6;
goto identifier;
}
goto badchar;
}
case '|':
if (matchChar('|'))
@ -1732,7 +1674,7 @@ TokenStream::getBracedUnicode(uint32_t* cp)
return false;
code = (code << 4) | JS7_UNHEX(c);
if (code > unicode::NonBMPMax)
if (code > 0x10FFFF)
return false;
first = false;
}
@ -1785,13 +1727,13 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp)
return false;
}
MOZ_ASSERT(code <= unicode::NonBMPMax);
if (code < unicode::NonBMPMin) {
MOZ_ASSERT(code <= 0x10FFFF);
if (code < 0x10000) {
c = code;
} else {
if (!tokenbuf.append(unicode::LeadSurrogate(code)))
if (!tokenbuf.append((code - 0x10000) / 1024 + 0xD800))
return false;
c = unicode::TrailSurrogate(code);
c = ((code - 0x10000) % 1024) + 0xDC00;
}
break;
}

Просмотреть файл

@ -948,10 +948,9 @@ class MOZ_STACK_CLASS TokenStream
void ungetChar(int32_t c);
void ungetCharIgnoreEOL(int32_t c);
Token* newToken(ptrdiff_t adjust);
uint32_t peekUnicodeEscape(uint32_t* codePoint);
uint32_t peekExtendedUnicodeEscape(uint32_t* codePoint);
uint32_t matchUnicodeEscapeIdStart(uint32_t* codePoint);
bool matchUnicodeEscapeIdent(uint32_t* codePoint);
bool peekUnicodeEscape(int32_t* c);
bool matchUnicodeEscapeIdStart(int32_t* c);
bool matchUnicodeEscapeIdent(int32_t* c);
bool peekChars(int n, char16_t* cp);
MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);

Просмотреть файл

@ -276,7 +276,7 @@ HexValue(uint32_t c)
}
template <typename CharT>
widechar
size_t
RegExpParser<CharT>::ParseOctalLiteral()
{
MOZ_ASSERT('0' <= current() && current() <= '7');
@ -297,7 +297,7 @@ RegExpParser<CharT>::ParseOctalLiteral()
template <typename CharT>
bool
RegExpParser<CharT>::ParseHexEscape(int length, widechar* value)
RegExpParser<CharT>::ParseHexEscape(int length, size_t* value)
{
const CharT* start = position();
uint32_t val = 0;
@ -321,7 +321,7 @@ RegExpParser<CharT>::ParseHexEscape(int length, widechar* value)
template <typename CharT>
bool
RegExpParser<CharT>::ParseBracedHexEscape(widechar* value)
RegExpParser<CharT>::ParseBracedHexEscape(size_t* value)
{
MOZ_ASSERT(current() == '{');
Advance();
@ -363,7 +363,7 @@ RegExpParser<CharT>::ParseBracedHexEscape(widechar* value)
template <typename CharT>
bool
RegExpParser<CharT>::ParseTrailSurrogate(widechar* value)
RegExpParser<CharT>::ParseTrailSurrogate(size_t* value)
{
if (current() != '\\')
return false;
@ -541,7 +541,7 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
return true;
case 'x': {
Advance();
widechar value;
size_t value;
if (ParseHexEscape(2, &value)) {
*code = value;
return true;
@ -557,7 +557,7 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
}
case 'u': {
Advance();
widechar value;
size_t value;
if (unicode_) {
if (current() == '{') {
if (!ParseBracedHexEscape(&value))
@ -567,7 +567,7 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
}
if (ParseHexEscape(4, &value)) {
if (unicode::IsLeadSurrogate(value)) {
widechar trail;
size_t trail;
if (ParseTrailSurrogate(&trail)) {
*code = unicode::UTF16Decode(value, trail);
return true;
@ -782,10 +782,10 @@ NegateUnicodeRanges(LifoAlloc* alloc, InfallibleVector<RangeType, 1>** ranges,
const RangeType& range = (**ranges)[i];
for (size_t j = 0; j < tmp_ranges->length(); j++) {
const RangeType& tmpRange = (*tmp_ranges)[j];
auto from1 = tmpRange.from();
auto to1 = tmpRange.to();
auto from2 = range.from();
auto to2 = range.to();
size_t from1 = tmpRange.from();
size_t to1 = tmpRange.to();
size_t from2 = range.from();
size_t to2 = range.to();
if (from1 < from2) {
if (to1 < from2) {
@ -926,8 +926,8 @@ UnicodeRangesAtom(LifoAlloc* alloc,
const WideCharRange& range = (*wide_ranges)[i];
widechar from = range.from();
widechar to = range.to();
char16_t from_lead, from_trail;
char16_t to_lead, to_trail;
size_t from_lead, from_trail;
size_t to_lead, to_trail;
unicode::UTF16Encode(from, &from_lead, &from_trail);
if (from == to) {
@ -1636,7 +1636,7 @@ RegExpParser<CharT>::ParseDisjunction()
}
Advance();
widechar octal = ParseOctalLiteral();
size_t octal = ParseOctalLiteral();
builder->AddCharacter(octal);
break;
}
@ -1684,7 +1684,7 @@ RegExpParser<CharT>::ParseDisjunction()
}
case 'x': {
Advance(2);
widechar value;
size_t value;
if (ParseHexEscape(2, &value)) {
builder->AddCharacter(value);
} else {
@ -1696,7 +1696,7 @@ RegExpParser<CharT>::ParseDisjunction()
}
case 'u': {
Advance(2);
widechar value;
size_t value;
if (unicode_) {
if (current() == '{') {
if (!ParseBracedHexEscape(&value))
@ -1706,7 +1706,7 @@ RegExpParser<CharT>::ParseDisjunction()
} else if (unicode::IsTrailSurrogate(value)) {
builder->AddAtom(TrailSurrogateAtom(alloc, value));
} else if (value >= unicode::NonBMPMin) {
char16_t lead, trail;
size_t lead, trail;
unicode::UTF16Encode(value, &lead, &trail);
builder->AddAtom(SurrogatePairAtom(alloc, lead, trail,
ignore_case_));
@ -1715,7 +1715,7 @@ RegExpParser<CharT>::ParseDisjunction()
}
} else if (ParseHexEscape(4, &value)) {
if (unicode::IsLeadSurrogate(value)) {
widechar trail;
size_t trail;
if (ParseTrailSurrogate(&trail)) {
builder->AddAtom(SurrogatePairAtom(alloc, value, trail,
ignore_case_));

Просмотреть файл

@ -196,13 +196,13 @@ class RegExpParser
// Checks whether the following is a length-digit hexadecimal number,
// and sets the value if it is.
bool ParseHexEscape(int length, widechar* value);
bool ParseHexEscape(int length, size_t* value);
bool ParseBracedHexEscape(widechar* value);
bool ParseTrailSurrogate(widechar* value);
bool ParseBracedHexEscape(size_t* value);
bool ParseTrailSurrogate(size_t* value);
bool ParseRawSurrogatePair(char16_t* lead, char16_t* trail);
widechar ParseOctalLiteral();
size_t ParseOctalLiteral();
// Tries to parse the input as a back reference. If successful it
// stores the result in the output parameter and returns true. If

Просмотреть файл

@ -46,7 +46,6 @@
#include "vm/RegExpObject.h"
#include "vm/RegExpStatics.h"
#include "vm/StringBuffer.h"
#include "vm/Unicode.h"
#include "vm/Interpreter-inl.h"
#include "vm/String-inl.h"
@ -55,6 +54,7 @@
using namespace js;
using namespace js::gc;
using namespace js::unicode;
using JS::Symbol;
using JS::SymbolCode;
@ -2762,6 +2762,35 @@ js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue
return CodeUnitToString(cx, ucode, rval);
}
static inline bool
IsSupplementary(uint32_t codePoint)
{
return codePoint > 0xFFFF;
}
static inline char16_t
LeadSurrogate(uint32_t codePoint)
{
return char16_t((codePoint >> 10) + 0xD7C0);
}
static inline char16_t
TrailSurrogate(uint32_t codePoint)
{
return char16_t((codePoint & 0x3FF) | 0xDC00);
}
static inline void
UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index)
{
if (!IsSupplementary(codePoint)) {
elements[(*index)++] = char16_t(codePoint);
} else {
elements[(*index)++] = LeadSurrogate(codePoint);
elements[(*index)++] = TrailSurrogate(codePoint);
}
}
static MOZ_ALWAYS_INLINE bool
ToCodePoint(JSContext* cx, HandleValue code, uint32_t* codePoint)
{
@ -2771,7 +2800,7 @@ ToCodePoint(JSContext* cx, HandleValue code, uint32_t* codePoint)
return false;
// String.fromCodePoint, Steps 5.c-d.
if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 || nextCP > unicode::NonBMPMax) {
if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 || nextCP > 0x10FFFF) {
ToCStringBuf cbuf;
if (char* numStr = NumberToCString(cx, &cbuf, nextCP))
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_NOT_A_CODEPOINT, numStr);
@ -2793,10 +2822,10 @@ js::str_fromCodePoint_one_arg(JSContext* cx, HandleValue code, MutableHandleValu
return false;
// Steps 5.e, 6.
if (!unicode::IsSupplementary(codePoint))
if (!IsSupplementary(codePoint))
return CodeUnitToString(cx, uint16_t(codePoint), rval);
char16_t chars[] = { unicode::LeadSurrogate(codePoint), unicode::TrailSurrogate(codePoint) };
char16_t chars[] = { LeadSurrogate(codePoint), TrailSurrogate(codePoint) };
JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2);
if (!str)
return false;
@ -2824,7 +2853,7 @@ str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args)
return false;
// Step 5.e.
unicode::UTF16Encode(codePoint, elements, &length);
UTF16Encode(codePoint, elements, &length);
}
// Step 6.
@ -2875,7 +2904,7 @@ js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp)
}
// Step 5.e.
unicode::UTF16Encode(codePoint, elements, &length);
UTF16Encode(codePoint, elements, &length);
}
elements[length] = 0;
@ -3584,11 +3613,11 @@ Encode(StringBuffer& sb, const CharT* chars, size_t length,
if (!sb.append(c))
return Encode_Failure;
} else {
if (unicode::IsTrailSurrogate(c))
if (c >= 0xDC00 && c <= 0xDFFF)
return Encode_BadUri;
uint32_t v;
if (!unicode::IsLeadSurrogate(c)) {
if (c < 0xD800 || c > 0xDBFF) {
v = c;
} else {
k++;
@ -3596,10 +3625,10 @@ Encode(StringBuffer& sb, const CharT* chars, size_t length,
return Encode_BadUri;
char16_t c2 = chars[k];
if (!unicode::IsTrailSurrogate(c2))
if (c2 < 0xDC00 || c2 > 0xDFFF)
return Encode_BadUri;
v = unicode::UTF16Decode(c, c2);
v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
}
uint8_t utf8buf[4];
size_t L = OneUcs4ToUtf8Char(utf8buf, v);
@ -3699,14 +3728,15 @@ Decode(StringBuffer& sb, const CharT* chars, size_t length, const bool* reserved
octets[j] = char(B);
}
uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
if (v >= unicode::NonBMPMin) {
if (v > unicode::NonBMPMax)
if (v >= 0x10000) {
v -= 0x10000;
if (v > 0xFFFFF)
return Decode_BadUri;
char16_t H = unicode::LeadSurrogate(v);
c = char16_t((v & 0x3FF) + 0xDC00);
char16_t H = char16_t((v >> 10) + 0xD800);
if (!sb.append(H))
return Decode_Failure;
c = unicode::TrailSurrogate(v);
} else {
c = char16_t(v);
}
@ -3810,7 +3840,7 @@ str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp)
uint32_t
js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char)
{
MOZ_ASSERT(ucs4Char <= unicode::NonBMPMax);
MOZ_ASSERT(ucs4Char <= 0x10FFFF);
if (ucs4Char < 0x80) {
utf8Buffer[0] = uint8_t(ucs4Char);

Просмотреть файл

@ -1,229 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Simple cases, not using eval.
{
let \u{61} = 123;
assertEq(a, 123);
let \u{6A} = 123;
assertEq(j, 123);
let a\u{62} = 456;
assertEq(ab, 456);
let \u{63}\u{6b} = 789;
assertEq(ck, 789);
}
const leadingZeros = [0, 1, 2, 3, 4, 100].map(c => "0".repeat(c));
// From DerivedCoreProperties.txt (Unicode 9):
// Derived Property: ID_Start
// Characters that can start an identifier.
// Generated from:
// Lu + Ll + Lt + Lm + Lo + Nl
// + Other_ID_Start
// - Pattern_Syntax
// - Pattern_White_Space
const idStart = [
0x0041, // LATIN CAPITAL LETTER A, Gc=Lu
0x006A, // LATIN SMALL LETTER J, Gc=Ll
0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE, Gc=Lu
0x00FF, // LATIN SMALL LETTER Y WITH DIAERESIS, Gc=Ll
0x01C5, // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON, Gc=Lt
0x0294, // LATIN LETTER GLOTTAL STOP, Gc=Lo
0x037A, // GREEK YPOGEGRAMMENI, Gc=Lm
0x16EE, // RUNIC ARLAUG SYMBOL, Gc=Nl
0xFF70, // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK, Gc=Lm
];
const idStartSupplemental = [
0x10140, // GREEK ACROPHONIC ATTIC ONE QUARTER, Gc=Nl
0x10300, // OLD ITALIC LETTER A, Gc=Lo
0x10400, // DESERET CAPITAL LETTER LONG I, Gc=Lu
0x10430, // DESERET SMALL LETTER SHORT A, Gc=Ll
0x16B40, // PAHAWH HMONG SIGN VOS SEEV, Gc=Lm
];
// From PropList.txt (Unicode 9):
const otherIdStart = [
// Enable the following lines when Bug 1282724 is fixed.
// 0x1885, // MONGOLIAN LETTER ALI GALI BALUDA, Gc=Mn
// 0x1886, // MONGOLIAN LETTER ALI GALI THREE BALUDA, Gc=Mn
0x2118, // SCRIPT CAPITAL P, Gc=Sm
0x212E, // ESTIMATED SYMBOL, Gc=So
0x309B, // KATAKANA-HIRAGANA VOICED SOUND MARK, Gc=Sk
0x309C, // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK, Gc=Sk
];
// Remove this list when we support Unicode 9 (Bug 1282724).
const otherIdStart_Unicode9 = [
0x1885, // MONGOLIAN LETTER ALI GALI BALUDA, Gc=Mn
0x1886, // MONGOLIAN LETTER ALI GALI THREE BALUDA, Gc=Mn
];
// From DerivedCoreProperties.txt (Unicode 9):
// Derived Property: ID_Continue
// Characters that can continue an identifier.
// Generated from:
// ID_Start
// + Mn + Mc + Nd + Pc
// + Other_ID_Continue
// - Pattern_Syntax
// - Pattern_White_Space
const idContinue = [
0x0030, // DIGIT ZERO, Gc=Nd
0x0300, // COMBINING GRAVE ACCENT, Gc=Mn
0x0660, // ARABIC-INDIC DIGIT ZERO, Gc=Nd
0x0903, // DEVANAGARI SIGN VISARGA, Gc=Mc
0xFF10, // FULLWIDTH DIGIT ZERO, Gc=Nd
0xFF3F, // FULLWIDTH LOW LINE, Gc=Pc
];
const idContinueSupplemental = [
0x101FD, // PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE, Gc=Mn
0x104A0, // OSMANYA DIGIT ZERO, Gc=Nd
0x11000, // BRAHMI SIGN CANDRABINDU, Gc=Mc
];
// From PropList.txt (Unicode 9):
const otherIdContinue = [
0x00B7, // MIDDLE DOT, Gc=Po
0x0387, // GREEK ANO TELEIA, Gc=Po
0x1369, // ETHIOPIC DIGIT ONE, Gc=No
0x136A, // ETHIOPIC DIGIT TWO, Gc=No
0x136B, // ETHIOPIC DIGIT THREE, Gc=No
0x136C, // ETHIOPIC DIGIT FOUR, Gc=No
0x136D, // ETHIOPIC DIGIT FIVE, Gc=No
0x136E, // ETHIOPIC DIGIT SIX, Gc=No
0x136F, // ETHIOPIC DIGIT SEVEN, Gc=No
0x1370, // ETHIOPIC DIGIT EIGHT, Gc=No
0x1371, // ETHIOPIC DIGIT NINE, Gc=No
0x19DA, // NEW TAI LUE THAM DIGIT ONE, Gc=No
];
for (let ident of [...idStart, ...otherIdStart_Unicode9]) {
for (let count of leadingZeros) {
let zeros = "0".repeat(count);
eval(`
let \\u{${zeros}${ident.toString(16)}} = 123;
assertEq(${String.fromCodePoint(ident)}, 123);
`);
}
}
// Move this to the loop above when Bug 917436 is fixed.
for (let ident of [...idStartSupplemental, ...otherIdStart]) {
for (let zeros of leadingZeros) {
assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError);
}
}
for (let ident of [...idContinue, ...idContinueSupplemental, ...otherIdContinue]) {
for (let zeros of leadingZeros) {
assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError);
}
}
for (let ident of [...idStart, ...otherIdStart_Unicode9, ...idContinue]) {
for (let zeros of leadingZeros) {
eval(`
let A\\u{${zeros}${ident.toString(16)}} = 123;
assertEq(${String.fromCodePoint(0x41, ident)}, 123);
`);
}
}
// Move this to the loop above when Bug 917436 is fixed.
for (let ident of [...idStartSupplemental, ...otherIdStart, ...idContinueSupplemental, ...otherIdContinue]) {
for (let zeros of leadingZeros) {
assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError);
}
}
const notIdentifiers = [
0x0000, // NULL, Gc=Cc
0x000A, // LINE FEED (LF), Gc=Cc
0x005E, // CIRCUMFLEX ACCENT, Gc=Sk
0x00B1, // PLUS-MINUS SIGN, Gc=Sm
0xFF61, // HALFWIDTH IDEOGRAPHIC FULL STOP, Gc=Po
0x10061, // Not assigned.
0x10100, // AEGEAN WORD SEPARATOR LINE, Gc=Po
0x100061, // <Plane 16 Private Use>, Gc=Co
];
for (let ident of notIdentifiers) {
for (let zeros of leadingZeros) {
assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError);
}
}
const incompleteEscapes = [
"\\u{",
"\\u{6",
"\\u{61",
"\\u{061",
"\\u{0061",
"\\u{00061",
"\\u{000061",
"\\u{0000061",
"\\u}",
];
for (let invalid of incompleteEscapes) {
// Ends with EOF.
assertThrowsInstanceOf(() => eval(invalid), SyntaxError);
// Ends with EOL.
assertThrowsInstanceOf(() => eval(invalid + "\n"), SyntaxError);
// Ends with space.
assertThrowsInstanceOf(() => eval(invalid + " "), SyntaxError);
}
const invalidEscapes = [
// Empty escape.
"",
// Not hexadecimal characters.
"\0",
"G",
"Z",
"\uFFFF",
"\uDBFF\uDFFF",
// Has space characters.
" 61",
"61 ",
// Has newline characters.
"\n61",
"61\n",
// Exceeds 0x10FFFF, six characters.
"110000",
"110001",
"fffffe",
"ffffff",
// Exceeds 0x10FFFF, more than six characters.
"10ffff0",
"10ffffabcdef",
];
for (let invalid of invalidEscapes) {
for (let zeros of leadingZeros) {
assertThrowsInstanceOf(() => eval(`\\u{${zeros}${invalid}}`), SyntaxError);
assertThrowsInstanceOf(() => eval(`var \\u{${zeros}${invalid}}`), SyntaxError);
}
}
if (typeof reportCompare === "function")
reportCompare(0, 0, "ok");

Просмотреть файл

@ -68,15 +68,6 @@ struct CharFlag {
const char16_t BYTE_ORDER_MARK2 = 0xFFFE;
const char16_t NO_BREAK_SPACE = 0x00A0;
const char16_t LeadSurrogateMin = 0xD800;
const char16_t LeadSurrogateMax = 0xDBFF;
const char16_t TrailSurrogateMin = 0xDC00;
const char16_t TrailSurrogateMax = 0xDFFF;
const uint32_t UTF16Max = 0xFFFF;
const uint32_t NonBMPMin = 0x10000;
const uint32_t NonBMPMax = 0x10FFFF;
class CharacterInfo {
/*
* upperCase and lowerCase normally store the delta between two
@ -144,13 +135,6 @@ IsIdentifierStart(char16_t ch)
return CharInfo(ch).isLetter();
}
inline bool
IsIdentifierStart(uint32_t codePoint)
{
// TODO: Supplemental code points not yet supported (bug 1197230).
return codePoint <= UTF16Max && IsIdentifierStart(char16_t(codePoint));
}
inline bool
IsIdentifierPart(char16_t ch)
{
@ -162,13 +146,6 @@ IsIdentifierPart(char16_t ch)
return CharInfo(ch).isIdentifierPart();
}
inline bool
IsIdentifierPart(uint32_t codePoint)
{
// TODO: Supplemental code points not yet supported (bug 1197230).
return codePoint <= UTF16Max && IsIdentifierPart(char16_t(codePoint));
}
inline bool
IsLetter(char16_t ch)
{
@ -421,67 +398,42 @@ ReverseFoldCase3(char16_t ch)
return uint16_t(ch) + info.reverse3;
}
const size_t LeadSurrogateMin = 0xD800;
const size_t LeadSurrogateMax = 0xDBFF;
const size_t TrailSurrogateMin = 0xDC00;
const size_t TrailSurrogateMax = 0xDFFF;
const size_t UTF16Max = 0xFFFF;
const size_t NonBMPMin = 0x10000;
const size_t NonBMPMax = 0x10FFFF;
inline bool
IsSupplementary(uint32_t codePoint)
IsLeadSurrogate(size_t value)
{
return codePoint >= NonBMPMin && codePoint <= NonBMPMax;
return value >= LeadSurrogateMin && value <= LeadSurrogateMax;
}
inline bool
IsLeadSurrogate(uint32_t codePoint)
IsTrailSurrogate(size_t value)
{
return codePoint >= LeadSurrogateMin && codePoint <= LeadSurrogateMax;
}
inline bool
IsTrailSurrogate(uint32_t codePoint)
{
return codePoint >= TrailSurrogateMin && codePoint <= TrailSurrogateMax;
}
inline char16_t
LeadSurrogate(uint32_t codePoint)
{
MOZ_ASSERT(IsSupplementary(codePoint));
return char16_t((codePoint >> 10) + (LeadSurrogateMin - (NonBMPMin >> 10)));
}
inline char16_t
TrailSurrogate(uint32_t codePoint)
{
MOZ_ASSERT(IsSupplementary(codePoint));
return char16_t((codePoint & 0x3FF) | TrailSurrogateMin);
return value >= TrailSurrogateMin && value <= TrailSurrogateMax;
}
inline void
UTF16Encode(uint32_t codePoint, char16_t* lead, char16_t* trail)
UTF16Encode(size_t cp, size_t* lead, size_t* trail)
{
MOZ_ASSERT(IsSupplementary(codePoint));
MOZ_ASSERT(cp >= NonBMPMin && cp <= NonBMPMax);
*lead = LeadSurrogate(codePoint);
*trail = TrailSurrogate(codePoint);
*lead = (cp - NonBMPMin) / 1024 + LeadSurrogateMin;
*trail = ((cp - NonBMPMin) % 1024) + TrailSurrogateMin;
}
static inline void
UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index)
{
if (!IsSupplementary(codePoint)) {
elements[(*index)++] = char16_t(codePoint);
} else {
elements[(*index)++] = LeadSurrogate(codePoint);
elements[(*index)++] = TrailSurrogate(codePoint);
}
}
inline uint32_t
UTF16Decode(char16_t lead, char16_t trail)
inline size_t
UTF16Decode(size_t lead, size_t trail)
{
MOZ_ASSERT(IsLeadSurrogate(lead));
MOZ_ASSERT(IsTrailSurrogate(trail));
return (lead << 10) + trail + (NonBMPMin - (LeadSurrogateMin << 10) - TrailSurrogateMin);
return (lead - LeadSurrogateMin) * 1024 + (trail - TrailSurrogateMin) + NonBMPMin;
}
} /* namespace unicode */