From 312ab71575692f6527fe903bf82f7c669cf9ae68 Mon Sep 17 00:00:00 2001 From: Sebastian Hengst Date: Thu, 6 Apr 2017 15:23:13 +0200 Subject: [PATCH] Backed out changeset c34970cf36e9 (bug 1318403) --- config/check_spidermonkey_style.py | 1 - js/src/jsstr.cpp | 557 +++++------------------- js/src/jsstr.h | 6 - js/src/tests/ecma/String/15.5.4.12-5.js | 3 - js/src/vm/Unicode.h | 57 +-- 5 files changed, 111 insertions(+), 513 deletions(-) diff --git a/config/check_spidermonkey_style.py b/config/check_spidermonkey_style.py index 66a28ee59653..416cfb84c270 100644 --- a/config/check_spidermonkey_style.py +++ b/config/check_spidermonkey_style.py @@ -83,7 +83,6 @@ included_inclnames_to_ignore = set([ 'unicode/timezone.h', # ICU 'unicode/plurrule.h', # ICU 'unicode/ucal.h', # ICU - 'unicode/uchar.h', # ICU 'unicode/uclean.h', # ICU 'unicode/ucol.h', # ICU 'unicode/udat.h', # ICU diff --git a/js/src/jsstr.cpp b/js/src/jsstr.cpp index 8d462b5e03b8..faffef29385d 100644 --- a/js/src/jsstr.cpp +++ b/js/src/jsstr.cpp @@ -37,7 +37,6 @@ #include "js/Conversions.h" #include "js/UniquePtr.h" #if ENABLE_INTL_API -#include "unicode/uchar.h" #include "unicode/unorm2.h" #endif #include "vm/GlobalObject.h" @@ -603,190 +602,19 @@ js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t l return NewDependentString(cx, str, begin, len); } -/** - * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings - * depending on its context: - * When it's preceded by a cased character and not followed by another cased - * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA. - * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA. - * - * Unicode 9.0, ยง3.13 Default Case Algorithms - */ -static char16_t -Final_Sigma(const char16_t* chars, size_t length, size_t index) -{ - MOZ_ASSERT(index < length); - MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA); - MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) == - unicode::GREEK_SMALL_LETTER_SIGMA); - -#if ENABLE_INTL_API - bool precededByCased = false; - for (size_t i = index; i > 0; ) { - char16_t c = chars[--i]; - uint32_t codePoint = c; - if (unicode::IsTrailSurrogate(c) && i > 0) { - char16_t lead = chars[i - 1]; - if (unicode::IsLeadSurrogate(lead)) { - codePoint = unicode::UTF16Decode(lead, c); - i--; - } - } - - // Ignore any characters with the property Case_Ignorable. - // NB: We need to skip over all Case_Ignorable characters, even when - // they also have the Cased binary property. - if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) - continue; - - precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED); - break; - } - if (!precededByCased) - return unicode::GREEK_SMALL_LETTER_SIGMA; - - bool followedByCased = false; - for (size_t i = index + 1; i < length; ) { - char16_t c = chars[i++]; - uint32_t codePoint = c; - if (unicode::IsLeadSurrogate(c) && i < length) { - char16_t trail = chars[i]; - if (unicode::IsTrailSurrogate(trail)) { - codePoint = unicode::UTF16Decode(c, trail); - i++; - } - } - - // Ignore any characters with the property Case_Ignorable. - // NB: We need to skip over all Case_Ignorable characters, even when - // they also have the Cased binary property. - if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) - continue; - - followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED); - break; - } - if (!followedByCased) - return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA; -#endif - - return unicode::GREEK_SMALL_LETTER_SIGMA; -} - -static Latin1Char -Final_Sigma(const Latin1Char* chars, size_t length, size_t index) -{ - MOZ_ASSERT_UNREACHABLE("U+03A3 is not a Latin-1 character"); - return 0; -} - -// If |srcLength == destLength| is true, the destination buffer was allocated -// with the same size as the source buffer. When we append characters which -// have special casing mappings, we test |srcLength == destLength| to decide -// if we need to back out and reallocate a sufficiently large destination -// buffer. Otherwise the destination buffer was allocated with the correct -// size to hold all lower case mapped characters, i.e. -// |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true. -template -static size_t -ToLowerCaseImpl(CharT* destChars, const CharT* srcChars, size_t startIndex, size_t srcLength, - size_t destLength) -{ - MOZ_ASSERT(startIndex < srcLength); - MOZ_ASSERT(srcLength <= destLength); - MOZ_ASSERT_IF((IsSame::value), srcLength == destLength); - - size_t j = startIndex; - for (size_t i = startIndex; i < srcLength; i++) { - char16_t c = srcChars[i]; - if (!IsSame::value) { - if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) { - char16_t trail = srcChars[i + 1]; - if (unicode::IsTrailSurrogate(trail)) { - trail = unicode::ToLowerCaseNonBMPTrail(c, trail); - destChars[j++] = c; - destChars[j++] = trail; - i++; - continue; - } - } - - // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE - // lowercases to . - if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { - // Return if the output buffer is too small. - if (srcLength == destLength) - return i; - - destChars[j++] = CharT('i'); - destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE); - continue; - } - - // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to - // one of two codepoints depending on context. - if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) { - destChars[j++] = Final_Sigma(srcChars, srcLength, i); - continue; - } - } - - c = unicode::ToLowerCase(c); - MOZ_ASSERT_IF((IsSame::value), c <= JSString::MAX_LATIN1_CHAR); - destChars[j++] = c; - } - - MOZ_ASSERT(j == destLength); - destChars[destLength] = '\0'; - - return srcLength; -} - -static size_t -ToLowerCaseLength(const char16_t* chars, size_t startIndex, size_t length) -{ - size_t lowerLength = length; - for (size_t i = startIndex; i < length; i++) { - char16_t c = chars[i]; - - // U+0130 is lowercased to the two-element sequence . - if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) - lowerLength += 1; - } - return lowerLength; -} - -static size_t -ToLowerCaseLength(const Latin1Char* chars, size_t startIndex, size_t length) -{ - MOZ_ASSERT_UNREACHABLE("never called for Latin-1 strings"); - return 0; -} - template static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) { - // Unlike toUpperCase, toLowerCase has the nice invariant that if the - // input is a Latin-1 string, the output is also a Latin-1 string. - using AnyCharPtr = UniquePtr; - - AnyCharPtr newChars; - const size_t length = str->length(); - size_t resultLength; + // Unlike toUpperCase, toLowerCase has the nice invariant that if the input + // is a Latin1 string, the output is also a Latin1 string. + UniquePtr newChars; + size_t length = str->length(); { AutoCheckCannotGC nogc; const CharT* chars = str->chars(nogc); - // We don't need extra special casing checks in the loop below, - // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3 - // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings. - MOZ_ASSERT(unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE), - "U+0130 has a simple lower case mapping"); - MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA), - "U+03A3 has a simple lower case mapping"); - - // Look for the first character that changes when lowercased. + // Look for the first upper case character. size_t i = 0; for (; i < length; i++) { char16_t c = chars[i]; @@ -806,36 +634,40 @@ ToLowerCase(JSContext* cx, JSLinearString* str) break; } - // If no character needs to change, return the input string. + // If all characters are lower case, return the input string. if (i == length) return str; - resultLength = length; - newChars = cx->make_pod_array(resultLength + 1); + newChars = cx->make_pod_array(length + 1); if (!newChars) return nullptr; PodCopy(newChars.get(), chars, i); - size_t readChars = ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength); - if (readChars < length) { - MOZ_ASSERT((!IsSame::value), - "Latin-1 strings don't have special lower case mappings"); - resultLength = ToLowerCaseLength(chars, readChars, length); + for (; i < length; i++) { + char16_t c = chars[i]; + if (!IsSame::value) { + if (unicode::IsLeadSurrogate(c) && i + 1 < length) { + char16_t trail = chars[i + 1]; + if (unicode::IsTrailSurrogate(trail)) { + trail = unicode::ToLowerCaseNonBMPTrail(c, trail); + newChars[i] = c; + newChars[i + 1] = trail; + i++; + continue; + } + } + } - AnyCharPtr buf = cx->make_pod_array(resultLength + 1); - if (!buf) - return nullptr; - - PodCopy(buf.get(), newChars.get(), readChars); - newChars = Move(buf); - - MOZ_ALWAYS_TRUE(length == - ToLowerCaseImpl(newChars.get(), chars, readChars, length, resultLength)); + c = unicode::ToLowerCase(c); + MOZ_ASSERT_IF((IsSame::value), c <= JSString::MAX_LATIN1_CHAR); + newChars[i] = c; } + + newChars[length] = 0; } - JSString* res = NewStringDontDeflate(cx, newChars.get(), resultLength); + JSString* res = NewStringDontDeflate(cx, newChars.get(), length); if (!res) return nullptr; @@ -843,33 +675,32 @@ ToLowerCase(JSContext* cx, JSLinearString* str) return res; } -JSString* -js::StringToLowerCase(JSContext* cx, HandleLinearString string) +static inline bool +ToLowerCaseHelper(JSContext* cx, const CallArgs& args) { - if (string->hasLatin1Chars()) - return ToLowerCase(cx, string); - return ToLowerCase(cx, string); + RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); + if (!str) + return false; + + JSLinearString* linear = str->ensureLinear(cx); + if (!linear) + return false; + + if (linear->hasLatin1Chars()) + str = ToLowerCase(cx, linear); + else + str = ToLowerCase(cx, linear); + if (!str) + return false; + + args.rval().setString(str); + return true; } bool js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) { - CallArgs args = CallArgsFromVp(argc, vp); - - RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); - if (!str) - return false; - - RootedLinearString linear(cx, str->ensureLinear(cx)); - if (!linear) - return false; - - JSString* result = StringToLowerCase(cx, linear); - if (!result) - return false; - - args.rval().setString(result); - return true; + return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp)); } bool @@ -877,15 +708,15 @@ js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); - RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); - if (!str) - return false; - /* * Forcefully ignore the first (or any) argument and return toLowerCase(), * ECMA has reserved that argument, presumably for defining the locale. */ if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) { + RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); + if (!str) + return false; + RootedValue result(cx); if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) return false; @@ -894,170 +725,54 @@ js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) return true; } - RootedLinearString linear(cx, str->ensureLinear(cx)); - if (!linear) - return false; - - JSString* result = StringToLowerCase(cx, linear); - if (!result) - return false; - - args.rval().setString(result); - return true; + return ToLowerCaseHelper(cx, args); } -static inline bool -CanUpperCaseSpecialCasing(Latin1Char charCode) -{ - // Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1 - // characters don't have special casing rules. - MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S, - !unicode::CanUpperCaseSpecialCasing(charCode)); - - return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S; -} - -static inline bool -CanUpperCaseSpecialCasing(char16_t charCode) -{ - return unicode::CanUpperCaseSpecialCasing(charCode); -} - -static inline size_t -LengthUpperCaseSpecialCasing(Latin1Char charCode) -{ - // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'. - MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S); - - return 2; -} - -static inline size_t -LengthUpperCaseSpecialCasing(char16_t charCode) -{ - MOZ_ASSERT(CanUpperCaseSpecialCasing(charCode)); - - return unicode::LengthUpperCaseSpecialCasing(charCode); -} - -static inline void -AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index) -{ - // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'. - MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S); - static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character"); - - elements[(*index)++] = 'S'; - elements[(*index)++] = 'S'; -} - -static inline void -AppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index) -{ - unicode::AppendUpperCaseSpecialCasing(charCode, elements, index); -} - -// See ToLowerCaseImpl for an explanation of the parameters. template -static size_t -ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex, size_t srcLength, - size_t destLength) +static void +ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length) { - static_assert(IsSame::value || !IsSame::value, - "cannot write non-Latin-1 characters into Latin-1 string"); - MOZ_ASSERT(startIndex < srcLength); - MOZ_ASSERT(srcLength <= destLength); + MOZ_ASSERT(firstLowerCase < length); - size_t j = startIndex; - for (size_t i = startIndex; i < srcLength; i++) { + for (size_t i = 0; i < firstLowerCase; i++) + destChars[i] = srcChars[i]; + + for (size_t i = firstLowerCase; i < length; i++) { char16_t c = srcChars[i]; if (!IsSame::value) { - if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) { + if (unicode::IsLeadSurrogate(c) && i + 1 < length) { char16_t trail = srcChars[i + 1]; if (unicode::IsTrailSurrogate(trail)) { trail = unicode::ToUpperCaseNonBMPTrail(c, trail); - destChars[j++] = c; - destChars[j++] = trail; + destChars[i] = c; + destChars[i + 1] = trail; i++; continue; } } } - - if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast(c)))) { - // Return if the output buffer is too small. - if (srcLength == destLength) - return i; - - AppendUpperCaseSpecialCasing(c, destChars, &j); - continue; - } - c = unicode::ToUpperCase(c); MOZ_ASSERT_IF((IsSame::value), c <= JSString::MAX_LATIN1_CHAR); - destChars[j++] = c; + destChars[i] = c; } - MOZ_ASSERT(j == destLength); - destChars[destLength] = '\0'; - - return srcLength; -} - -// Explicit instantiation so we don't hit the static_assert from above. -static bool -ToUpperCaseImpl(Latin1Char* destChars, const char16_t* srcChars, size_t startIndex, - size_t srcLength, size_t destLength) -{ - MOZ_ASSERT_UNREACHABLE("cannot write non-Latin-1 characters into Latin-1 string"); - return false; -} - -template -static size_t -ToUpperCaseLength(const CharT* chars, size_t startIndex, size_t length) -{ - size_t upperLength = length; - for (size_t i = startIndex; i < length; i++) { - char16_t c = chars[i]; - - if (c > 0x7f && CanUpperCaseSpecialCasing(static_cast(c))) - upperLength += LengthUpperCaseSpecialCasing(static_cast(c)) - 1; - } - return upperLength; -} - -template -static inline void -CopyChars(DestChar* destChars, const SrcChar* srcChars, size_t length) -{ - static_assert(!IsSame::value, "PodCopy is used for the same type case"); - for (size_t i = 0; i < length; i++) - destChars[i] = srcChars[i]; -} - -template -static inline void -CopyChars(CharT* destChars, const CharT* srcChars, size_t length) -{ - PodCopy(destChars, srcChars, length); + destChars[length] = '\0'; } template static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) { - using Latin1CharPtr = UniquePtr; - using TwoByteCharPtr = UniquePtr; + typedef UniquePtr Latin1CharPtr; + typedef UniquePtr TwoByteCharPtr; mozilla::MaybeOneOf newChars; - const size_t length = str->length(); - size_t resultLength; + size_t length = str->length(); { AutoCheckCannotGC nogc; const CharT* chars = str->chars(nogc); - // Look for the first character that changes when uppercased. + // Look for the first lower case character. size_t i = 0; for (; i < length; i++) { char16_t c = chars[i]; @@ -1075,33 +790,21 @@ ToUpperCase(JSContext* cx, JSLinearString* str) } if (unicode::CanUpperCase(c)) break; - if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast(c)))) - break; } - // If no character needs to change, return the input string. + // If all characters are upper case, return the input string. if (i == length) return str; - // The string changes when uppercased, so we must create a new string. - // Can it be Latin-1? - // - // If the original string is Latin-1, it can -- unless the string - // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS, - // the only Latin-1 codepoints that don't uppercase within Latin-1. - // Search for those codepoints to decide whether the new string can be - // Latin-1. - // If the original string is a two-byte string, its uppercase form is - // so rarely Latin-1 that we don't even consider creating a new - // Latin-1 string. + // If the string is Latin1, check if it contains the MICRO SIGN (0xb5) + // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding + // upper case characters are not in the Latin1 range. bool resultIsLatin1; if (IsSame::value) { resultIsLatin1 = true; for (size_t j = i; j < length; j++) { Latin1Char c = chars[j]; - if (c == unicode::MICRO_SIGN || - c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) - { + if (c == 0xb5 || c == 0xff) { MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR); resultIsLatin1 = false; break; @@ -1114,63 +817,31 @@ ToUpperCase(JSContext* cx, JSLinearString* str) } if (resultIsLatin1) { - resultLength = length; - Latin1CharPtr buf = cx->make_pod_array(resultLength + 1); + Latin1CharPtr buf = cx->make_pod_array(length + 1); if (!buf) return nullptr; - CopyChars(buf.get(), chars, i); - - size_t readChars = ToUpperCaseImpl(buf.get(), chars, i, length, resultLength); - if (readChars < length) { - resultLength = ToUpperCaseLength(chars, readChars, length); - - Latin1CharPtr buf2 = cx->make_pod_array(resultLength + 1); - if (!buf2) - return nullptr; - - CopyChars(buf2.get(), buf.get(), readChars); - buf = Move(buf2); - - MOZ_ALWAYS_TRUE(length == - ToUpperCaseImpl(buf.get(), chars, readChars, length, resultLength)); - } + ToUpperCaseImpl(buf.get(), chars, i, length); newChars.construct(Move(buf)); } else { - resultLength = length; - TwoByteCharPtr buf = cx->make_pod_array(resultLength + 1); + TwoByteCharPtr buf = cx->make_pod_array(length + 1); if (!buf) return nullptr; - CopyChars(buf.get(), chars, i); - - size_t readChars = ToUpperCaseImpl(buf.get(), chars, i, length, resultLength); - if (readChars < length) { - resultLength = ToUpperCaseLength(chars, readChars, length); - - TwoByteCharPtr buf2 = cx->make_pod_array(resultLength + 1); - if (!buf2) - return nullptr; - - CopyChars(buf2.get(), buf.get(), readChars); - buf = Move(buf2); - - MOZ_ALWAYS_TRUE(length == - ToUpperCaseImpl(buf.get(), chars, readChars, length, resultLength)); - } + ToUpperCaseImpl(buf.get(), chars, i, length); newChars.construct(Move(buf)); } } JSString* res; if (newChars.constructed()) { - res = NewStringDontDeflate(cx, newChars.ref().get(), resultLength); + res = NewStringDontDeflate(cx, newChars.ref().get(), length); if (!res) return nullptr; mozilla::Unused << newChars.ref().release(); } else { - res = NewStringDontDeflate(cx, newChars.ref().get(), resultLength); + res = NewStringDontDeflate(cx, newChars.ref().get(), length); if (!res) return nullptr; @@ -1180,33 +851,32 @@ ToUpperCase(JSContext* cx, JSLinearString* str) return res; } -JSString* -js::StringToUpperCase(JSContext* cx, HandleLinearString string) +static bool +ToUpperCaseHelper(JSContext* cx, const CallArgs& args) { - if (string->hasLatin1Chars()) - return ToUpperCase(cx, string); - return ToUpperCase(cx, string); + RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); + if (!str) + return false; + + JSLinearString* linear = str->ensureLinear(cx); + if (!linear) + return false; + + if (linear->hasLatin1Chars()) + str = ToUpperCase(cx, linear); + else + str = ToUpperCase(cx, linear); + if (!str) + return false; + + args.rval().setString(str); + return true; } bool js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) { - CallArgs args = CallArgsFromVp(argc, vp); - - RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); - if (!str) - return false; - - RootedLinearString linear(cx, str->ensureLinear(cx)); - if (!linear) - return false; - - JSString* result = StringToUpperCase(cx, linear); - if (!result) - return false; - - args.rval().setString(result); - return true; + return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp)); } bool @@ -1214,15 +884,15 @@ js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); - RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); - if (!str) - return false; - /* * Forcefully ignore the first (or any) argument and return toUpperCase(), * ECMA has reserved that argument, presumably for defining the locale. */ if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) { + RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); + if (!str) + return false; + RootedValue result(cx); if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) return false; @@ -1231,16 +901,7 @@ js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) return true; } - RootedLinearString linear(cx, str->ensureLinear(cx)); - if (!linear) - return false; - - JSString* result = StringToUpperCase(cx, linear); - if (!result) - return false; - - args.rval().setString(result); - return true; + return ToUpperCaseHelper(cx, args); } #if !EXPOSE_INTL_API @@ -1320,7 +981,7 @@ js::str_normalize(JSContext* cx, unsigned argc, Value* vp) if (!linear) return false; - // Latin-1 strings are already in Normalization Form C. + // Latin1 strings are already in Normalization Form C. if (form == NFC && linear->hasLatin1Chars()) { // Step 7. args.rval().setString(str); @@ -1736,7 +1397,7 @@ StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t /* * For big patterns with large potential overlap we want the SIMD-optimized * speed of memcmp. For small patterns, a simple loop is faster. We also can't - * use memcmp if one of the strings is TwoByte and the other is Latin-1. + * use memcmp if one of the strings is TwoByte and the other is Latin1. * * FIXME: Linux memcmp performance is sad and the manual loop is faster. */ @@ -1933,7 +1594,7 @@ RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match) * need to build the list of leaf nodes. Do both here: iterate over the * nodes so long as there are not too many. * - * We also don't use rope matching if the rope contains both Latin-1 and + * We also don't use rope matching if the rope contains both Latin1 and * TwoByte nodes, to simplify the match algorithm. */ { @@ -3115,7 +2776,7 @@ js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) // string (thin or fat) and so we don't need to malloc the chars. (We could // cover some cases where args.length() goes up to // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are - // all Latin-1, but it doesn't seem worth the effort.) + // all Latin1, but it doesn't seem worth the effort.) if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE) return str_fromCharCode_few_args(cx, args); @@ -3258,7 +2919,7 @@ js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) // string (thin or fat) and so we don't need to malloc the chars. (We could // cover some cases where |args.length()| goes up to // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars - // are all Latin-1, but it doesn't seem worth the effort.) + // are all Latin1, but it doesn't seem worth the effort.) if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2) return str_fromCodePoint_few_args(cx, args); diff --git a/js/src/jsstr.h b/js/src/jsstr.h index edf818a17317..25c61bec1b6b 100644 --- a/js/src/jsstr.h +++ b/js/src/jsstr.h @@ -481,12 +481,6 @@ JSString* str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern, HandleString replacement); -extern JSString* -StringToLowerCase(JSContext* cx, HandleLinearString string); - -extern JSString* -StringToUpperCase(JSContext* cx, HandleLinearString string); - extern bool StringConstructor(JSContext* cx, unsigned argc, Value* vp); diff --git a/js/src/tests/ecma/String/15.5.4.12-5.js b/js/src/tests/ecma/String/15.5.4.12-5.js index 6e4ab906ab61..505fadd0dc91 100644 --- a/js/src/tests/ecma/String/15.5.4.12-5.js +++ b/js/src/tests/ecma/String/15.5.4.12-5.js @@ -35,9 +35,6 @@ writeHeaderToLog( SECTION + " "+ TITLE); // Armenian // Range: U+0530 to U+058F for ( var i = 0x0530; i <= 0x058F; i++ ) { - // U+0587 (ARMENIAN SMALL LIGATURE ECH YIWN) has special upper casing. - if (i == 0x0587) continue; - var U = new Unicode( i ); /* new TestCase( SECTION, diff --git a/js/src/vm/Unicode.h b/js/src/vm/Unicode.h index 2980cb185885..a234557a37aa 100644 --- a/js/src/vm/Unicode.h +++ b/js/src/vm/Unicode.h @@ -63,16 +63,8 @@ namespace CharFlag { const uint8_t UNICODE_ID_CONTINUE = UNICODE_ID_START + UNICODE_ID_CONTINUE_ONLY; } -const char16_t NO_BREAK_SPACE = 0x00A0; -const char16_t MICRO_SIGN = 0x00B5; -const char16_t LATIN_SMALL_LETTER_SHARP_S = 0x00DF; -const char16_t LATIN_SMALL_LETTER_Y_WITH_DIAERESIS = 0x00FF; -const char16_t LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE = 0x0130; -const char16_t COMBINING_DOT_ABOVE = 0x0307; -const char16_t GREEK_CAPITAL_LETTER_SIGMA = 0x03A3; -const char16_t GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2; -const char16_t GREEK_SMALL_LETTER_SIGMA = 0x03C3; const char16_t BYTE_ORDER_MARK2 = 0xFFFE; +const char16_t NO_BREAK_SPACE = 0x00A0; const char16_t LeadSurrogateMin = 0xD800; const char16_t LeadSurrogateMax = 0xDBFF; @@ -247,10 +239,6 @@ IsSpaceOrBOM2(char16_t ch) return CharInfo(ch).isSpace(); } -/* - * Returns the simple upper case mapping (see CanUpperCaseSpecialCasing for - * details) of the given UTF-16 code unit. - */ inline char16_t ToUpperCase(char16_t ch) { @@ -265,10 +253,6 @@ ToUpperCase(char16_t ch) return uint16_t(ch) + info.upperCase; } -/* - * Returns the simple lower case mapping (see CanUpperCaseSpecialCasing for - * details) of the given UTF-16 code unit. - */ inline char16_t ToLowerCase(char16_t ch) { @@ -345,43 +329,6 @@ ToLowerCaseNonBMPTrail(char16_t lead, char16_t trail) return trail; } -/* - * Returns true if the given UTF-16 code unit has a language-independent, - * unconditional or conditional special upper case mapping. - * - * Unicode defines two case mapping modes: - * 1. "simple case mappings" for one-to-one mappings which are independent of - * context and language (defined in UnicodeData.txt). - * 2. "special case mappings" for mappings which can increase or decrease the - * string length; or are dependent on context or locale (defined in - * SpecialCasing.txt). - * - * The CanUpperCase() method defined above only supports simple case mappings. - * In order to support the full case mappings of all Unicode characters, - * callers need to check this method in addition to CanUpperCase(). - * - * NOTE: All special upper case mappings are unconditional in Unicode 9. - */ -bool -CanUpperCaseSpecialCasing(char16_t ch); - -/* - * Returns the length of the upper case mapping of |ch|. - * - * This function asserts if |ch| doesn't have a special upper case mapping. - */ -size_t -LengthUpperCaseSpecialCasing(char16_t ch); - -/* - * Appends the upper case mapping of |ch| to the given output buffer, - * starting at the provided index. - * - * This function asserts if |ch| doesn't have a special upper case mapping. - */ -void -AppendUpperCaseSpecialCasing(char16_t ch, char16_t* elements, size_t* index); - /* * For a codepoint C, CodepointsWithSameUpperCaseInfo stores three offsets * from C to up to three codepoints with same uppercase (no codepoint in @@ -544,7 +491,7 @@ UTF16Encode(uint32_t codePoint, char16_t* lead, char16_t* trail) *trail = TrailSurrogate(codePoint); } -inline void +static inline void UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index) { if (!IsSupplementary(codePoint)) {