Backed out changeset c34970cf36e9 (bug 1318403)

This commit is contained in:
Sebastian Hengst 2017-04-06 15:23:13 +02:00
Родитель 7efc9665ca
Коммит 312ab71575
5 изменённых файлов: 111 добавлений и 513 удалений

Просмотреть файл

@ -83,7 +83,6 @@ included_inclnames_to_ignore = set([
'unicode/timezone.h', # ICU
'unicode/plurrule.h', # ICU
'unicode/ucal.h', # ICU
'unicode/uchar.h', # ICU
'unicode/uclean.h', # ICU
'unicode/ucol.h', # ICU
'unicode/udat.h', # ICU

Просмотреть файл

@ -37,7 +37,6 @@
#include "js/Conversions.h"
#include "js/UniquePtr.h"
#if ENABLE_INTL_API
#include "unicode/uchar.h"
#include "unicode/unorm2.h"
#endif
#include "vm/GlobalObject.h"
@ -603,190 +602,19 @@ js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t l
return NewDependentString(cx, str, begin, len);
}
/**
* U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
* depending on its context:
* When it's preceded by a cased character and not followed by another cased
* character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
* Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
*
* Unicode 9.0, §3.13 Default Case Algorithms
*/
static char16_t
Final_Sigma(const char16_t* chars, size_t length, size_t index)
{
MOZ_ASSERT(index < length);
MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
unicode::GREEK_SMALL_LETTER_SIGMA);
#if ENABLE_INTL_API
bool precededByCased = false;
for (size_t i = index; i > 0; ) {
char16_t c = chars[--i];
uint32_t codePoint = c;
if (unicode::IsTrailSurrogate(c) && i > 0) {
char16_t lead = chars[i - 1];
if (unicode::IsLeadSurrogate(lead)) {
codePoint = unicode::UTF16Decode(lead, c);
i--;
}
}
// Ignore any characters with the property Case_Ignorable.
// NB: We need to skip over all Case_Ignorable characters, even when
// they also have the Cased binary property.
if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE))
continue;
precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
break;
}
if (!precededByCased)
return unicode::GREEK_SMALL_LETTER_SIGMA;
bool followedByCased = false;
for (size_t i = index + 1; i < length; ) {
char16_t c = chars[i++];
uint32_t codePoint = c;
if (unicode::IsLeadSurrogate(c) && i < length) {
char16_t trail = chars[i];
if (unicode::IsTrailSurrogate(trail)) {
codePoint = unicode::UTF16Decode(c, trail);
i++;
}
}
// Ignore any characters with the property Case_Ignorable.
// NB: We need to skip over all Case_Ignorable characters, even when
// they also have the Cased binary property.
if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE))
continue;
followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
break;
}
if (!followedByCased)
return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
#endif
return unicode::GREEK_SMALL_LETTER_SIGMA;
}
static Latin1Char
Final_Sigma(const Latin1Char* chars, size_t length, size_t index)
{
MOZ_ASSERT_UNREACHABLE("U+03A3 is not a Latin-1 character");
return 0;
}
// If |srcLength == destLength| is true, the destination buffer was allocated
// with the same size as the source buffer. When we append characters which
// have special casing mappings, we test |srcLength == destLength| to decide
// if we need to back out and reallocate a sufficiently large destination
// buffer. Otherwise the destination buffer was allocated with the correct
// size to hold all lower case mapped characters, i.e.
// |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
template <typename CharT>
static size_t
ToLowerCaseImpl(CharT* destChars, const CharT* srcChars, size_t startIndex, size_t srcLength,
size_t destLength)
{
MOZ_ASSERT(startIndex < srcLength);
MOZ_ASSERT(srcLength <= destLength);
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), srcLength == destLength);
size_t j = startIndex;
for (size_t i = startIndex; i < srcLength; i++) {
char16_t c = srcChars[i];
if (!IsSame<CharT, Latin1Char>::value) {
if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
char16_t trail = srcChars[i + 1];
if (unicode::IsTrailSurrogate(trail)) {
trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
destChars[j++] = c;
destChars[j++] = trail;
i++;
continue;
}
}
// Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
// lowercases to <U+0069 U+0307>.
if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
// Return if the output buffer is too small.
if (srcLength == destLength)
return i;
destChars[j++] = CharT('i');
destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
continue;
}
// Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
// one of two codepoints depending on context.
if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
destChars[j++] = Final_Sigma(srcChars, srcLength, i);
continue;
}
}
c = unicode::ToLowerCase(c);
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
destChars[j++] = c;
}
MOZ_ASSERT(j == destLength);
destChars[destLength] = '\0';
return srcLength;
}
static size_t
ToLowerCaseLength(const char16_t* chars, size_t startIndex, size_t length)
{
size_t lowerLength = length;
for (size_t i = startIndex; i < length; i++) {
char16_t c = chars[i];
// U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)
lowerLength += 1;
}
return lowerLength;
}
static size_t
ToLowerCaseLength(const Latin1Char* chars, size_t startIndex, size_t length)
{
MOZ_ASSERT_UNREACHABLE("never called for Latin-1 strings");
return 0;
}
template <typename CharT>
static JSString*
ToLowerCase(JSContext* cx, JSLinearString* str)
{
// Unlike toUpperCase, toLowerCase has the nice invariant that if the
// input is a Latin-1 string, the output is also a Latin-1 string.
using AnyCharPtr = UniquePtr<CharT[], JS::FreePolicy>;
AnyCharPtr newChars;
const size_t length = str->length();
size_t resultLength;
// Unlike toUpperCase, toLowerCase has the nice invariant that if the input
// is a Latin1 string, the output is also a Latin1 string.
UniquePtr<CharT[], JS::FreePolicy> newChars;
size_t length = str->length();
{
AutoCheckCannotGC nogc;
const CharT* chars = str->chars<CharT>(nogc);
// We don't need extra special casing checks in the loop below,
// because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
// GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
MOZ_ASSERT(unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
"U+0130 has a simple lower case mapping");
MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
"U+03A3 has a simple lower case mapping");
// Look for the first character that changes when lowercased.
// Look for the first upper case character.
size_t i = 0;
for (; i < length; i++) {
char16_t c = chars[i];
@ -806,36 +634,40 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
break;
}
// If no character needs to change, return the input string.
// If all characters are lower case, return the input string.
if (i == length)
return str;
resultLength = length;
newChars = cx->make_pod_array<CharT>(resultLength + 1);
newChars = cx->make_pod_array<CharT>(length + 1);
if (!newChars)
return nullptr;
PodCopy(newChars.get(), chars, i);
size_t readChars = ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
if (readChars < length) {
MOZ_ASSERT((!IsSame<CharT, Latin1Char>::value),
"Latin-1 strings don't have special lower case mappings");
resultLength = ToLowerCaseLength(chars, readChars, length);
for (; i < length; i++) {
char16_t c = chars[i];
if (!IsSame<CharT, Latin1Char>::value) {
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
char16_t trail = chars[i + 1];
if (unicode::IsTrailSurrogate(trail)) {
trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
newChars[i] = c;
newChars[i + 1] = trail;
i++;
continue;
}
}
}
AnyCharPtr buf = cx->make_pod_array<CharT>(resultLength + 1);
if (!buf)
return nullptr;
PodCopy(buf.get(), newChars.get(), readChars);
newChars = Move(buf);
MOZ_ALWAYS_TRUE(length ==
ToLowerCaseImpl(newChars.get(), chars, readChars, length, resultLength));
c = unicode::ToLowerCase(c);
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
newChars[i] = c;
}
newChars[length] = 0;
}
JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), resultLength);
JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
if (!res)
return nullptr;
@ -843,33 +675,32 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
return res;
}
JSString*
js::StringToLowerCase(JSContext* cx, HandleLinearString string)
static inline bool
ToLowerCaseHelper(JSContext* cx, const CallArgs& args)
{
if (string->hasLatin1Chars())
return ToLowerCase<Latin1Char>(cx, string);
return ToLowerCase<char16_t>(cx, string);
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
JSLinearString* linear = str->ensureLinear(cx);
if (!linear)
return false;
if (linear->hasLatin1Chars())
str = ToLowerCase<Latin1Char>(cx, linear);
else
str = ToLowerCase<char16_t>(cx, linear);
if (!str)
return false;
args.rval().setString(str);
return true;
}
bool
js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
RootedLinearString linear(cx, str->ensureLinear(cx));
if (!linear)
return false;
JSString* result = StringToLowerCase(cx, linear);
if (!result)
return false;
args.rval().setString(result);
return true;
return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
}
bool
@ -877,15 +708,15 @@ js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
/*
* Forcefully ignore the first (or any) argument and return toLowerCase(),
* ECMA has reserved that argument, presumably for defining the locale.
*/
if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) {
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
RootedValue result(cx);
if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
return false;
@ -894,170 +725,54 @@ js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
return true;
}
RootedLinearString linear(cx, str->ensureLinear(cx));
if (!linear)
return false;
JSString* result = StringToLowerCase(cx, linear);
if (!result)
return false;
args.rval().setString(result);
return true;
return ToLowerCaseHelper(cx, args);
}
static inline bool
CanUpperCaseSpecialCasing(Latin1Char charCode)
{
// Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1
// characters don't have special casing rules.
MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S,
!unicode::CanUpperCaseSpecialCasing(charCode));
return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
}
static inline bool
CanUpperCaseSpecialCasing(char16_t charCode)
{
return unicode::CanUpperCaseSpecialCasing(charCode);
}
static inline size_t
LengthUpperCaseSpecialCasing(Latin1Char charCode)
{
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
return 2;
}
static inline size_t
LengthUpperCaseSpecialCasing(char16_t charCode)
{
MOZ_ASSERT(CanUpperCaseSpecialCasing(charCode));
return unicode::LengthUpperCaseSpecialCasing(charCode);
}
static inline void
AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index)
{
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
elements[(*index)++] = 'S';
elements[(*index)++] = 'S';
}
static inline void
AppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index)
{
unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
}
// See ToLowerCaseImpl for an explanation of the parameters.
template <typename DestChar, typename SrcChar>
static size_t
ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex, size_t srcLength,
size_t destLength)
static void
ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
{
static_assert(IsSame<SrcChar, Latin1Char>::value || !IsSame<DestChar, Latin1Char>::value,
"cannot write non-Latin-1 characters into Latin-1 string");
MOZ_ASSERT(startIndex < srcLength);
MOZ_ASSERT(srcLength <= destLength);
MOZ_ASSERT(firstLowerCase < length);
size_t j = startIndex;
for (size_t i = startIndex; i < srcLength; i++) {
for (size_t i = 0; i < firstLowerCase; i++)
destChars[i] = srcChars[i];
for (size_t i = firstLowerCase; i < length; i++) {
char16_t c = srcChars[i];
if (!IsSame<DestChar, Latin1Char>::value) {
if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
char16_t trail = srcChars[i + 1];
if (unicode::IsTrailSurrogate(trail)) {
trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
destChars[j++] = c;
destChars[j++] = trail;
destChars[i] = c;
destChars[i + 1] = trail;
i++;
continue;
}
}
}
if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast<SrcChar>(c)))) {
// Return if the output buffer is too small.
if (srcLength == destLength)
return i;
AppendUpperCaseSpecialCasing(c, destChars, &j);
continue;
}
c = unicode::ToUpperCase(c);
MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
destChars[j++] = c;
destChars[i] = c;
}
MOZ_ASSERT(j == destLength);
destChars[destLength] = '\0';
return srcLength;
}
// Explicit instantiation so we don't hit the static_assert from above.
static bool
ToUpperCaseImpl(Latin1Char* destChars, const char16_t* srcChars, size_t startIndex,
size_t srcLength, size_t destLength)
{
MOZ_ASSERT_UNREACHABLE("cannot write non-Latin-1 characters into Latin-1 string");
return false;
}
template <typename CharT>
static size_t
ToUpperCaseLength(const CharT* chars, size_t startIndex, size_t length)
{
size_t upperLength = length;
for (size_t i = startIndex; i < length; i++) {
char16_t c = chars[i];
if (c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c)))
upperLength += LengthUpperCaseSpecialCasing(static_cast<CharT>(c)) - 1;
}
return upperLength;
}
template <typename DestChar, typename SrcChar>
static inline void
CopyChars(DestChar* destChars, const SrcChar* srcChars, size_t length)
{
static_assert(!IsSame<DestChar, SrcChar>::value, "PodCopy is used for the same type case");
for (size_t i = 0; i < length; i++)
destChars[i] = srcChars[i];
}
template <typename CharT>
static inline void
CopyChars(CharT* destChars, const CharT* srcChars, size_t length)
{
PodCopy(destChars, srcChars, length);
destChars[length] = '\0';
}
template <typename CharT>
static JSString*
ToUpperCase(JSContext* cx, JSLinearString* str)
{
using Latin1CharPtr = UniquePtr<Latin1Char[], JS::FreePolicy>;
using TwoByteCharPtr = UniquePtr<char16_t[], JS::FreePolicy>;
typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr;
typedef UniquePtr<char16_t[], JS::FreePolicy> TwoByteCharPtr;
mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
const size_t length = str->length();
size_t resultLength;
size_t length = str->length();
{
AutoCheckCannotGC nogc;
const CharT* chars = str->chars<CharT>(nogc);
// Look for the first character that changes when uppercased.
// Look for the first lower case character.
size_t i = 0;
for (; i < length; i++) {
char16_t c = chars[i];
@ -1075,33 +790,21 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
}
if (unicode::CanUpperCase(c))
break;
if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c))))
break;
}
// If no character needs to change, return the input string.
// If all characters are upper case, return the input string.
if (i == length)
return str;
// The string changes when uppercased, so we must create a new string.
// Can it be Latin-1?
//
// If the original string is Latin-1, it can -- unless the string
// contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
// the only Latin-1 codepoints that don't uppercase within Latin-1.
// Search for those codepoints to decide whether the new string can be
// Latin-1.
// If the original string is a two-byte string, its uppercase form is
// so rarely Latin-1 that we don't even consider creating a new
// Latin-1 string.
// If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
// or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
// upper case characters are not in the Latin1 range.
bool resultIsLatin1;
if (IsSame<CharT, Latin1Char>::value) {
resultIsLatin1 = true;
for (size_t j = i; j < length; j++) {
Latin1Char c = chars[j];
if (c == unicode::MICRO_SIGN ||
c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)
{
if (c == 0xb5 || c == 0xff) {
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
resultIsLatin1 = false;
break;
@ -1114,63 +817,31 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
}
if (resultIsLatin1) {
resultLength = length;
Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(resultLength + 1);
Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1);
if (!buf)
return nullptr;
CopyChars(buf.get(), chars, i);
size_t readChars = ToUpperCaseImpl(buf.get(), chars, i, length, resultLength);
if (readChars < length) {
resultLength = ToUpperCaseLength(chars, readChars, length);
Latin1CharPtr buf2 = cx->make_pod_array<Latin1Char>(resultLength + 1);
if (!buf2)
return nullptr;
CopyChars(buf2.get(), buf.get(), readChars);
buf = Move(buf2);
MOZ_ALWAYS_TRUE(length ==
ToUpperCaseImpl(buf.get(), chars, readChars, length, resultLength));
}
ToUpperCaseImpl(buf.get(), chars, i, length);
newChars.construct<Latin1CharPtr>(Move(buf));
} else {
resultLength = length;
TwoByteCharPtr buf = cx->make_pod_array<char16_t>(resultLength + 1);
TwoByteCharPtr buf = cx->make_pod_array<char16_t>(length + 1);
if (!buf)
return nullptr;
CopyChars(buf.get(), chars, i);
size_t readChars = ToUpperCaseImpl(buf.get(), chars, i, length, resultLength);
if (readChars < length) {
resultLength = ToUpperCaseLength(chars, readChars, length);
TwoByteCharPtr buf2 = cx->make_pod_array<char16_t>(resultLength + 1);
if (!buf2)
return nullptr;
CopyChars(buf2.get(), buf.get(), readChars);
buf = Move(buf2);
MOZ_ALWAYS_TRUE(length ==
ToUpperCaseImpl(buf.get(), chars, readChars, length, resultLength));
}
ToUpperCaseImpl(buf.get(), chars, i, length);
newChars.construct<TwoByteCharPtr>(Move(buf));
}
}
JSString* res;
if (newChars.constructed<Latin1CharPtr>()) {
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), resultLength);
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length);
if (!res)
return nullptr;
mozilla::Unused << newChars.ref<Latin1CharPtr>().release();
} else {
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), resultLength);
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length);
if (!res)
return nullptr;
@ -1180,33 +851,32 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
return res;
}
JSString*
js::StringToUpperCase(JSContext* cx, HandleLinearString string)
static bool
ToUpperCaseHelper(JSContext* cx, const CallArgs& args)
{
if (string->hasLatin1Chars())
return ToUpperCase<Latin1Char>(cx, string);
return ToUpperCase<char16_t>(cx, string);
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
JSLinearString* linear = str->ensureLinear(cx);
if (!linear)
return false;
if (linear->hasLatin1Chars())
str = ToUpperCase<Latin1Char>(cx, linear);
else
str = ToUpperCase<char16_t>(cx, linear);
if (!str)
return false;
args.rval().setString(str);
return true;
}
bool
js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
RootedLinearString linear(cx, str->ensureLinear(cx));
if (!linear)
return false;
JSString* result = StringToUpperCase(cx, linear);
if (!result)
return false;
args.rval().setString(result);
return true;
return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
}
bool
@ -1214,15 +884,15 @@ js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
/*
* Forcefully ignore the first (or any) argument and return toUpperCase(),
* ECMA has reserved that argument, presumably for defining the locale.
*/
if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) {
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
RootedValue result(cx);
if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
return false;
@ -1231,16 +901,7 @@ js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
return true;
}
RootedLinearString linear(cx, str->ensureLinear(cx));
if (!linear)
return false;
JSString* result = StringToUpperCase(cx, linear);
if (!result)
return false;
args.rval().setString(result);
return true;
return ToUpperCaseHelper(cx, args);
}
#if !EXPOSE_INTL_API
@ -1320,7 +981,7 @@ js::str_normalize(JSContext* cx, unsigned argc, Value* vp)
if (!linear)
return false;
// Latin-1 strings are already in Normalization Form C.
// Latin1 strings are already in Normalization Form C.
if (form == NFC && linear->hasLatin1Chars()) {
// Step 7.
args.rval().setString(str);
@ -1736,7 +1397,7 @@ StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t
/*
* For big patterns with large potential overlap we want the SIMD-optimized
* speed of memcmp. For small patterns, a simple loop is faster. We also can't
* use memcmp if one of the strings is TwoByte and the other is Latin-1.
* use memcmp if one of the strings is TwoByte and the other is Latin1.
*
* FIXME: Linux memcmp performance is sad and the manual loop is faster.
*/
@ -1933,7 +1594,7 @@ RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match)
* need to build the list of leaf nodes. Do both here: iterate over the
* nodes so long as there are not too many.
*
* We also don't use rope matching if the rope contains both Latin-1 and
* We also don't use rope matching if the rope contains both Latin1 and
* TwoByte nodes, to simplify the match algorithm.
*/
{
@ -3115,7 +2776,7 @@ js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp)
// string (thin or fat) and so we don't need to malloc the chars. (We could
// cover some cases where args.length() goes up to
// JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
// all Latin-1, but it doesn't seem worth the effort.)
// all Latin1, but it doesn't seem worth the effort.)
if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE)
return str_fromCharCode_few_args(cx, args);
@ -3258,7 +2919,7 @@ js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp)
// string (thin or fat) and so we don't need to malloc the chars. (We could
// cover some cases where |args.length()| goes up to
// JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
// are all Latin-1, but it doesn't seem worth the effort.)
// are all Latin1, but it doesn't seem worth the effort.)
if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2)
return str_fromCodePoint_few_args(cx, args);

Просмотреть файл

@ -481,12 +481,6 @@ JSString*
str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern,
HandleString replacement);
extern JSString*
StringToLowerCase(JSContext* cx, HandleLinearString string);
extern JSString*
StringToUpperCase(JSContext* cx, HandleLinearString string);
extern bool
StringConstructor(JSContext* cx, unsigned argc, Value* vp);

Просмотреть файл

@ -35,9 +35,6 @@ writeHeaderToLog( SECTION + " "+ TITLE);
// Armenian
// Range: U+0530 to U+058F
for ( var i = 0x0530; i <= 0x058F; i++ ) {
// U+0587 (ARMENIAN SMALL LIGATURE ECH YIWN) has special upper casing.
if (i == 0x0587) continue;
var U = new Unicode( i );
/*
new TestCase( SECTION,

Просмотреть файл

@ -63,16 +63,8 @@ namespace CharFlag {
const uint8_t UNICODE_ID_CONTINUE = UNICODE_ID_START + UNICODE_ID_CONTINUE_ONLY;
}
const char16_t NO_BREAK_SPACE = 0x00A0;
const char16_t MICRO_SIGN = 0x00B5;
const char16_t LATIN_SMALL_LETTER_SHARP_S = 0x00DF;
const char16_t LATIN_SMALL_LETTER_Y_WITH_DIAERESIS = 0x00FF;
const char16_t LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE = 0x0130;
const char16_t COMBINING_DOT_ABOVE = 0x0307;
const char16_t GREEK_CAPITAL_LETTER_SIGMA = 0x03A3;
const char16_t GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2;
const char16_t GREEK_SMALL_LETTER_SIGMA = 0x03C3;
const char16_t BYTE_ORDER_MARK2 = 0xFFFE;
const char16_t NO_BREAK_SPACE = 0x00A0;
const char16_t LeadSurrogateMin = 0xD800;
const char16_t LeadSurrogateMax = 0xDBFF;
@ -247,10 +239,6 @@ IsSpaceOrBOM2(char16_t ch)
return CharInfo(ch).isSpace();
}
/*
* Returns the simple upper case mapping (see CanUpperCaseSpecialCasing for
* details) of the given UTF-16 code unit.
*/
inline char16_t
ToUpperCase(char16_t ch)
{
@ -265,10 +253,6 @@ ToUpperCase(char16_t ch)
return uint16_t(ch) + info.upperCase;
}
/*
* Returns the simple lower case mapping (see CanUpperCaseSpecialCasing for
* details) of the given UTF-16 code unit.
*/
inline char16_t
ToLowerCase(char16_t ch)
{
@ -345,43 +329,6 @@ ToLowerCaseNonBMPTrail(char16_t lead, char16_t trail)
return trail;
}
/*
* Returns true if the given UTF-16 code unit has a language-independent,
* unconditional or conditional special upper case mapping.
*
* Unicode defines two case mapping modes:
* 1. "simple case mappings" for one-to-one mappings which are independent of
* context and language (defined in UnicodeData.txt).
* 2. "special case mappings" for mappings which can increase or decrease the
* string length; or are dependent on context or locale (defined in
* SpecialCasing.txt).
*
* The CanUpperCase() method defined above only supports simple case mappings.
* In order to support the full case mappings of all Unicode characters,
* callers need to check this method in addition to CanUpperCase().
*
* NOTE: All special upper case mappings are unconditional in Unicode 9.
*/
bool
CanUpperCaseSpecialCasing(char16_t ch);
/*
* Returns the length of the upper case mapping of |ch|.
*
* This function asserts if |ch| doesn't have a special upper case mapping.
*/
size_t
LengthUpperCaseSpecialCasing(char16_t ch);
/*
* Appends the upper case mapping of |ch| to the given output buffer,
* starting at the provided index.
*
* This function asserts if |ch| doesn't have a special upper case mapping.
*/
void
AppendUpperCaseSpecialCasing(char16_t ch, char16_t* elements, size_t* index);
/*
* For a codepoint C, CodepointsWithSameUpperCaseInfo stores three offsets
* from C to up to three codepoints with same uppercase (no codepoint in
@ -544,7 +491,7 @@ UTF16Encode(uint32_t codePoint, char16_t* lead, char16_t* trail)
*trail = TrailSurrogate(codePoint);
}
inline void
static inline void
UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index)
{
if (!IsSupplementary(codePoint)) {