зеркало из https://github.com/mozilla/gecko-dev.git
Backed out changeset c34970cf36e9 (bug 1318403)
This commit is contained in:
Родитель
7efc9665ca
Коммит
312ab71575
|
@ -83,7 +83,6 @@ included_inclnames_to_ignore = set([
|
|||
'unicode/timezone.h', # ICU
|
||||
'unicode/plurrule.h', # ICU
|
||||
'unicode/ucal.h', # ICU
|
||||
'unicode/uchar.h', # ICU
|
||||
'unicode/uclean.h', # ICU
|
||||
'unicode/ucol.h', # ICU
|
||||
'unicode/udat.h', # ICU
|
||||
|
|
557
js/src/jsstr.cpp
557
js/src/jsstr.cpp
|
@ -37,7 +37,6 @@
|
|||
#include "js/Conversions.h"
|
||||
#include "js/UniquePtr.h"
|
||||
#if ENABLE_INTL_API
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/unorm2.h"
|
||||
#endif
|
||||
#include "vm/GlobalObject.h"
|
||||
|
@ -603,190 +602,19 @@ js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t l
|
|||
return NewDependentString(cx, str, begin, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
|
||||
* depending on its context:
|
||||
* When it's preceded by a cased character and not followed by another cased
|
||||
* character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
|
||||
* Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
|
||||
*
|
||||
* Unicode 9.0, §3.13 Default Case Algorithms
|
||||
*/
|
||||
static char16_t
|
||||
Final_Sigma(const char16_t* chars, size_t length, size_t index)
|
||||
{
|
||||
MOZ_ASSERT(index < length);
|
||||
MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
|
||||
MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
|
||||
unicode::GREEK_SMALL_LETTER_SIGMA);
|
||||
|
||||
#if ENABLE_INTL_API
|
||||
bool precededByCased = false;
|
||||
for (size_t i = index; i > 0; ) {
|
||||
char16_t c = chars[--i];
|
||||
uint32_t codePoint = c;
|
||||
if (unicode::IsTrailSurrogate(c) && i > 0) {
|
||||
char16_t lead = chars[i - 1];
|
||||
if (unicode::IsLeadSurrogate(lead)) {
|
||||
codePoint = unicode::UTF16Decode(lead, c);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore any characters with the property Case_Ignorable.
|
||||
// NB: We need to skip over all Case_Ignorable characters, even when
|
||||
// they also have the Cased binary property.
|
||||
if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE))
|
||||
continue;
|
||||
|
||||
precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
|
||||
break;
|
||||
}
|
||||
if (!precededByCased)
|
||||
return unicode::GREEK_SMALL_LETTER_SIGMA;
|
||||
|
||||
bool followedByCased = false;
|
||||
for (size_t i = index + 1; i < length; ) {
|
||||
char16_t c = chars[i++];
|
||||
uint32_t codePoint = c;
|
||||
if (unicode::IsLeadSurrogate(c) && i < length) {
|
||||
char16_t trail = chars[i];
|
||||
if (unicode::IsTrailSurrogate(trail)) {
|
||||
codePoint = unicode::UTF16Decode(c, trail);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore any characters with the property Case_Ignorable.
|
||||
// NB: We need to skip over all Case_Ignorable characters, even when
|
||||
// they also have the Cased binary property.
|
||||
if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE))
|
||||
continue;
|
||||
|
||||
followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
|
||||
break;
|
||||
}
|
||||
if (!followedByCased)
|
||||
return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
|
||||
#endif
|
||||
|
||||
return unicode::GREEK_SMALL_LETTER_SIGMA;
|
||||
}
|
||||
|
||||
static Latin1Char
|
||||
Final_Sigma(const Latin1Char* chars, size_t length, size_t index)
|
||||
{
|
||||
MOZ_ASSERT_UNREACHABLE("U+03A3 is not a Latin-1 character");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If |srcLength == destLength| is true, the destination buffer was allocated
|
||||
// with the same size as the source buffer. When we append characters which
|
||||
// have special casing mappings, we test |srcLength == destLength| to decide
|
||||
// if we need to back out and reallocate a sufficiently large destination
|
||||
// buffer. Otherwise the destination buffer was allocated with the correct
|
||||
// size to hold all lower case mapped characters, i.e.
|
||||
// |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
|
||||
template <typename CharT>
|
||||
static size_t
|
||||
ToLowerCaseImpl(CharT* destChars, const CharT* srcChars, size_t startIndex, size_t srcLength,
|
||||
size_t destLength)
|
||||
{
|
||||
MOZ_ASSERT(startIndex < srcLength);
|
||||
MOZ_ASSERT(srcLength <= destLength);
|
||||
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), srcLength == destLength);
|
||||
|
||||
size_t j = startIndex;
|
||||
for (size_t i = startIndex; i < srcLength; i++) {
|
||||
char16_t c = srcChars[i];
|
||||
if (!IsSame<CharT, Latin1Char>::value) {
|
||||
if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
|
||||
char16_t trail = srcChars[i + 1];
|
||||
if (unicode::IsTrailSurrogate(trail)) {
|
||||
trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
|
||||
destChars[j++] = c;
|
||||
destChars[j++] = trail;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
// lowercases to <U+0069 U+0307>.
|
||||
if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
|
||||
// Return if the output buffer is too small.
|
||||
if (srcLength == destLength)
|
||||
return i;
|
||||
|
||||
destChars[j++] = CharT('i');
|
||||
destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
|
||||
// one of two codepoints depending on context.
|
||||
if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
|
||||
destChars[j++] = Final_Sigma(srcChars, srcLength, i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
c = unicode::ToLowerCase(c);
|
||||
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
|
||||
destChars[j++] = c;
|
||||
}
|
||||
|
||||
MOZ_ASSERT(j == destLength);
|
||||
destChars[destLength] = '\0';
|
||||
|
||||
return srcLength;
|
||||
}
|
||||
|
||||
static size_t
|
||||
ToLowerCaseLength(const char16_t* chars, size_t startIndex, size_t length)
|
||||
{
|
||||
size_t lowerLength = length;
|
||||
for (size_t i = startIndex; i < length; i++) {
|
||||
char16_t c = chars[i];
|
||||
|
||||
// U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
|
||||
if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)
|
||||
lowerLength += 1;
|
||||
}
|
||||
return lowerLength;
|
||||
}
|
||||
|
||||
static size_t
|
||||
ToLowerCaseLength(const Latin1Char* chars, size_t startIndex, size_t length)
|
||||
{
|
||||
MOZ_ASSERT_UNREACHABLE("never called for Latin-1 strings");
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
static JSString*
|
||||
ToLowerCase(JSContext* cx, JSLinearString* str)
|
||||
{
|
||||
// Unlike toUpperCase, toLowerCase has the nice invariant that if the
|
||||
// input is a Latin-1 string, the output is also a Latin-1 string.
|
||||
using AnyCharPtr = UniquePtr<CharT[], JS::FreePolicy>;
|
||||
|
||||
AnyCharPtr newChars;
|
||||
const size_t length = str->length();
|
||||
size_t resultLength;
|
||||
// Unlike toUpperCase, toLowerCase has the nice invariant that if the input
|
||||
// is a Latin1 string, the output is also a Latin1 string.
|
||||
UniquePtr<CharT[], JS::FreePolicy> newChars;
|
||||
size_t length = str->length();
|
||||
{
|
||||
AutoCheckCannotGC nogc;
|
||||
const CharT* chars = str->chars<CharT>(nogc);
|
||||
|
||||
// We don't need extra special casing checks in the loop below,
|
||||
// because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
|
||||
// GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
|
||||
MOZ_ASSERT(unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
|
||||
"U+0130 has a simple lower case mapping");
|
||||
MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
|
||||
"U+03A3 has a simple lower case mapping");
|
||||
|
||||
// Look for the first character that changes when lowercased.
|
||||
// Look for the first upper case character.
|
||||
size_t i = 0;
|
||||
for (; i < length; i++) {
|
||||
char16_t c = chars[i];
|
||||
|
@ -806,36 +634,40 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
|
|||
break;
|
||||
}
|
||||
|
||||
// If no character needs to change, return the input string.
|
||||
// If all characters are lower case, return the input string.
|
||||
if (i == length)
|
||||
return str;
|
||||
|
||||
resultLength = length;
|
||||
newChars = cx->make_pod_array<CharT>(resultLength + 1);
|
||||
newChars = cx->make_pod_array<CharT>(length + 1);
|
||||
if (!newChars)
|
||||
return nullptr;
|
||||
|
||||
PodCopy(newChars.get(), chars, i);
|
||||
|
||||
size_t readChars = ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
|
||||
if (readChars < length) {
|
||||
MOZ_ASSERT((!IsSame<CharT, Latin1Char>::value),
|
||||
"Latin-1 strings don't have special lower case mappings");
|
||||
resultLength = ToLowerCaseLength(chars, readChars, length);
|
||||
for (; i < length; i++) {
|
||||
char16_t c = chars[i];
|
||||
if (!IsSame<CharT, Latin1Char>::value) {
|
||||
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
|
||||
char16_t trail = chars[i + 1];
|
||||
if (unicode::IsTrailSurrogate(trail)) {
|
||||
trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
|
||||
newChars[i] = c;
|
||||
newChars[i + 1] = trail;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AnyCharPtr buf = cx->make_pod_array<CharT>(resultLength + 1);
|
||||
if (!buf)
|
||||
return nullptr;
|
||||
|
||||
PodCopy(buf.get(), newChars.get(), readChars);
|
||||
newChars = Move(buf);
|
||||
|
||||
MOZ_ALWAYS_TRUE(length ==
|
||||
ToLowerCaseImpl(newChars.get(), chars, readChars, length, resultLength));
|
||||
c = unicode::ToLowerCase(c);
|
||||
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
|
||||
newChars[i] = c;
|
||||
}
|
||||
|
||||
newChars[length] = 0;
|
||||
}
|
||||
|
||||
JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), resultLength);
|
||||
JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
|
||||
if (!res)
|
||||
return nullptr;
|
||||
|
||||
|
@ -843,33 +675,32 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
|
|||
return res;
|
||||
}
|
||||
|
||||
JSString*
|
||||
js::StringToLowerCase(JSContext* cx, HandleLinearString string)
|
||||
static inline bool
|
||||
ToLowerCaseHelper(JSContext* cx, const CallArgs& args)
|
||||
{
|
||||
if (string->hasLatin1Chars())
|
||||
return ToLowerCase<Latin1Char>(cx, string);
|
||||
return ToLowerCase<char16_t>(cx, string);
|
||||
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
JSLinearString* linear = str->ensureLinear(cx);
|
||||
if (!linear)
|
||||
return false;
|
||||
|
||||
if (linear->hasLatin1Chars())
|
||||
str = ToLowerCase<Latin1Char>(cx, linear);
|
||||
else
|
||||
str = ToLowerCase<char16_t>(cx, linear);
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
args.rval().setString(str);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp)
|
||||
{
|
||||
CallArgs args = CallArgsFromVp(argc, vp);
|
||||
|
||||
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
RootedLinearString linear(cx, str->ensureLinear(cx));
|
||||
if (!linear)
|
||||
return false;
|
||||
|
||||
JSString* result = StringToLowerCase(cx, linear);
|
||||
if (!result)
|
||||
return false;
|
||||
|
||||
args.rval().setString(result);
|
||||
return true;
|
||||
return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -877,15 +708,15 @@ js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
|
|||
{
|
||||
CallArgs args = CallArgsFromVp(argc, vp);
|
||||
|
||||
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Forcefully ignore the first (or any) argument and return toLowerCase(),
|
||||
* ECMA has reserved that argument, presumably for defining the locale.
|
||||
*/
|
||||
if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) {
|
||||
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
RootedValue result(cx);
|
||||
if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
|
||||
return false;
|
||||
|
@ -894,170 +725,54 @@ js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
|
|||
return true;
|
||||
}
|
||||
|
||||
RootedLinearString linear(cx, str->ensureLinear(cx));
|
||||
if (!linear)
|
||||
return false;
|
||||
|
||||
JSString* result = StringToLowerCase(cx, linear);
|
||||
if (!result)
|
||||
return false;
|
||||
|
||||
args.rval().setString(result);
|
||||
return true;
|
||||
return ToLowerCaseHelper(cx, args);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
CanUpperCaseSpecialCasing(Latin1Char charCode)
|
||||
{
|
||||
// Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1
|
||||
// characters don't have special casing rules.
|
||||
MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S,
|
||||
!unicode::CanUpperCaseSpecialCasing(charCode));
|
||||
|
||||
return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
CanUpperCaseSpecialCasing(char16_t charCode)
|
||||
{
|
||||
return unicode::CanUpperCaseSpecialCasing(charCode);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
LengthUpperCaseSpecialCasing(Latin1Char charCode)
|
||||
{
|
||||
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
|
||||
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
LengthUpperCaseSpecialCasing(char16_t charCode)
|
||||
{
|
||||
MOZ_ASSERT(CanUpperCaseSpecialCasing(charCode));
|
||||
|
||||
return unicode::LengthUpperCaseSpecialCasing(charCode);
|
||||
}
|
||||
|
||||
static inline void
|
||||
AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index)
|
||||
{
|
||||
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
|
||||
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
|
||||
static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
|
||||
|
||||
elements[(*index)++] = 'S';
|
||||
elements[(*index)++] = 'S';
|
||||
}
|
||||
|
||||
static inline void
|
||||
AppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index)
|
||||
{
|
||||
unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
|
||||
}
|
||||
|
||||
// See ToLowerCaseImpl for an explanation of the parameters.
|
||||
template <typename DestChar, typename SrcChar>
|
||||
static size_t
|
||||
ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex, size_t srcLength,
|
||||
size_t destLength)
|
||||
static void
|
||||
ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
|
||||
{
|
||||
static_assert(IsSame<SrcChar, Latin1Char>::value || !IsSame<DestChar, Latin1Char>::value,
|
||||
"cannot write non-Latin-1 characters into Latin-1 string");
|
||||
MOZ_ASSERT(startIndex < srcLength);
|
||||
MOZ_ASSERT(srcLength <= destLength);
|
||||
MOZ_ASSERT(firstLowerCase < length);
|
||||
|
||||
size_t j = startIndex;
|
||||
for (size_t i = startIndex; i < srcLength; i++) {
|
||||
for (size_t i = 0; i < firstLowerCase; i++)
|
||||
destChars[i] = srcChars[i];
|
||||
|
||||
for (size_t i = firstLowerCase; i < length; i++) {
|
||||
char16_t c = srcChars[i];
|
||||
if (!IsSame<DestChar, Latin1Char>::value) {
|
||||
if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
|
||||
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
|
||||
char16_t trail = srcChars[i + 1];
|
||||
if (unicode::IsTrailSurrogate(trail)) {
|
||||
trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
|
||||
destChars[j++] = c;
|
||||
destChars[j++] = trail;
|
||||
destChars[i] = c;
|
||||
destChars[i + 1] = trail;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast<SrcChar>(c)))) {
|
||||
// Return if the output buffer is too small.
|
||||
if (srcLength == destLength)
|
||||
return i;
|
||||
|
||||
AppendUpperCaseSpecialCasing(c, destChars, &j);
|
||||
continue;
|
||||
}
|
||||
|
||||
c = unicode::ToUpperCase(c);
|
||||
MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
|
||||
destChars[j++] = c;
|
||||
destChars[i] = c;
|
||||
}
|
||||
|
||||
MOZ_ASSERT(j == destLength);
|
||||
destChars[destLength] = '\0';
|
||||
|
||||
return srcLength;
|
||||
}
|
||||
|
||||
// Explicit instantiation so we don't hit the static_assert from above.
|
||||
static bool
|
||||
ToUpperCaseImpl(Latin1Char* destChars, const char16_t* srcChars, size_t startIndex,
|
||||
size_t srcLength, size_t destLength)
|
||||
{
|
||||
MOZ_ASSERT_UNREACHABLE("cannot write non-Latin-1 characters into Latin-1 string");
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
static size_t
|
||||
ToUpperCaseLength(const CharT* chars, size_t startIndex, size_t length)
|
||||
{
|
||||
size_t upperLength = length;
|
||||
for (size_t i = startIndex; i < length; i++) {
|
||||
char16_t c = chars[i];
|
||||
|
||||
if (c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c)))
|
||||
upperLength += LengthUpperCaseSpecialCasing(static_cast<CharT>(c)) - 1;
|
||||
}
|
||||
return upperLength;
|
||||
}
|
||||
|
||||
template <typename DestChar, typename SrcChar>
|
||||
static inline void
|
||||
CopyChars(DestChar* destChars, const SrcChar* srcChars, size_t length)
|
||||
{
|
||||
static_assert(!IsSame<DestChar, SrcChar>::value, "PodCopy is used for the same type case");
|
||||
for (size_t i = 0; i < length; i++)
|
||||
destChars[i] = srcChars[i];
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
static inline void
|
||||
CopyChars(CharT* destChars, const CharT* srcChars, size_t length)
|
||||
{
|
||||
PodCopy(destChars, srcChars, length);
|
||||
destChars[length] = '\0';
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
static JSString*
|
||||
ToUpperCase(JSContext* cx, JSLinearString* str)
|
||||
{
|
||||
using Latin1CharPtr = UniquePtr<Latin1Char[], JS::FreePolicy>;
|
||||
using TwoByteCharPtr = UniquePtr<char16_t[], JS::FreePolicy>;
|
||||
typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr;
|
||||
typedef UniquePtr<char16_t[], JS::FreePolicy> TwoByteCharPtr;
|
||||
|
||||
mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
|
||||
const size_t length = str->length();
|
||||
size_t resultLength;
|
||||
size_t length = str->length();
|
||||
{
|
||||
AutoCheckCannotGC nogc;
|
||||
const CharT* chars = str->chars<CharT>(nogc);
|
||||
|
||||
// Look for the first character that changes when uppercased.
|
||||
// Look for the first lower case character.
|
||||
size_t i = 0;
|
||||
for (; i < length; i++) {
|
||||
char16_t c = chars[i];
|
||||
|
@ -1075,33 +790,21 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
|
|||
}
|
||||
if (unicode::CanUpperCase(c))
|
||||
break;
|
||||
if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c))))
|
||||
break;
|
||||
}
|
||||
|
||||
// If no character needs to change, return the input string.
|
||||
// If all characters are upper case, return the input string.
|
||||
if (i == length)
|
||||
return str;
|
||||
|
||||
// The string changes when uppercased, so we must create a new string.
|
||||
// Can it be Latin-1?
|
||||
//
|
||||
// If the original string is Latin-1, it can -- unless the string
|
||||
// contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
|
||||
// the only Latin-1 codepoints that don't uppercase within Latin-1.
|
||||
// Search for those codepoints to decide whether the new string can be
|
||||
// Latin-1.
|
||||
// If the original string is a two-byte string, its uppercase form is
|
||||
// so rarely Latin-1 that we don't even consider creating a new
|
||||
// Latin-1 string.
|
||||
// If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
|
||||
// or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
|
||||
// upper case characters are not in the Latin1 range.
|
||||
bool resultIsLatin1;
|
||||
if (IsSame<CharT, Latin1Char>::value) {
|
||||
resultIsLatin1 = true;
|
||||
for (size_t j = i; j < length; j++) {
|
||||
Latin1Char c = chars[j];
|
||||
if (c == unicode::MICRO_SIGN ||
|
||||
c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)
|
||||
{
|
||||
if (c == 0xb5 || c == 0xff) {
|
||||
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
|
||||
resultIsLatin1 = false;
|
||||
break;
|
||||
|
@ -1114,63 +817,31 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
|
|||
}
|
||||
|
||||
if (resultIsLatin1) {
|
||||
resultLength = length;
|
||||
Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(resultLength + 1);
|
||||
Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1);
|
||||
if (!buf)
|
||||
return nullptr;
|
||||
|
||||
CopyChars(buf.get(), chars, i);
|
||||
|
||||
size_t readChars = ToUpperCaseImpl(buf.get(), chars, i, length, resultLength);
|
||||
if (readChars < length) {
|
||||
resultLength = ToUpperCaseLength(chars, readChars, length);
|
||||
|
||||
Latin1CharPtr buf2 = cx->make_pod_array<Latin1Char>(resultLength + 1);
|
||||
if (!buf2)
|
||||
return nullptr;
|
||||
|
||||
CopyChars(buf2.get(), buf.get(), readChars);
|
||||
buf = Move(buf2);
|
||||
|
||||
MOZ_ALWAYS_TRUE(length ==
|
||||
ToUpperCaseImpl(buf.get(), chars, readChars, length, resultLength));
|
||||
}
|
||||
ToUpperCaseImpl(buf.get(), chars, i, length);
|
||||
newChars.construct<Latin1CharPtr>(Move(buf));
|
||||
} else {
|
||||
resultLength = length;
|
||||
TwoByteCharPtr buf = cx->make_pod_array<char16_t>(resultLength + 1);
|
||||
TwoByteCharPtr buf = cx->make_pod_array<char16_t>(length + 1);
|
||||
if (!buf)
|
||||
return nullptr;
|
||||
|
||||
CopyChars(buf.get(), chars, i);
|
||||
|
||||
size_t readChars = ToUpperCaseImpl(buf.get(), chars, i, length, resultLength);
|
||||
if (readChars < length) {
|
||||
resultLength = ToUpperCaseLength(chars, readChars, length);
|
||||
|
||||
TwoByteCharPtr buf2 = cx->make_pod_array<char16_t>(resultLength + 1);
|
||||
if (!buf2)
|
||||
return nullptr;
|
||||
|
||||
CopyChars(buf2.get(), buf.get(), readChars);
|
||||
buf = Move(buf2);
|
||||
|
||||
MOZ_ALWAYS_TRUE(length ==
|
||||
ToUpperCaseImpl(buf.get(), chars, readChars, length, resultLength));
|
||||
}
|
||||
ToUpperCaseImpl(buf.get(), chars, i, length);
|
||||
newChars.construct<TwoByteCharPtr>(Move(buf));
|
||||
}
|
||||
}
|
||||
|
||||
JSString* res;
|
||||
if (newChars.constructed<Latin1CharPtr>()) {
|
||||
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), resultLength);
|
||||
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length);
|
||||
if (!res)
|
||||
return nullptr;
|
||||
|
||||
mozilla::Unused << newChars.ref<Latin1CharPtr>().release();
|
||||
} else {
|
||||
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), resultLength);
|
||||
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length);
|
||||
if (!res)
|
||||
return nullptr;
|
||||
|
||||
|
@ -1180,33 +851,32 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
|
|||
return res;
|
||||
}
|
||||
|
||||
JSString*
|
||||
js::StringToUpperCase(JSContext* cx, HandleLinearString string)
|
||||
static bool
|
||||
ToUpperCaseHelper(JSContext* cx, const CallArgs& args)
|
||||
{
|
||||
if (string->hasLatin1Chars())
|
||||
return ToUpperCase<Latin1Char>(cx, string);
|
||||
return ToUpperCase<char16_t>(cx, string);
|
||||
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
JSLinearString* linear = str->ensureLinear(cx);
|
||||
if (!linear)
|
||||
return false;
|
||||
|
||||
if (linear->hasLatin1Chars())
|
||||
str = ToUpperCase<Latin1Char>(cx, linear);
|
||||
else
|
||||
str = ToUpperCase<char16_t>(cx, linear);
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
args.rval().setString(str);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp)
|
||||
{
|
||||
CallArgs args = CallArgsFromVp(argc, vp);
|
||||
|
||||
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
RootedLinearString linear(cx, str->ensureLinear(cx));
|
||||
if (!linear)
|
||||
return false;
|
||||
|
||||
JSString* result = StringToUpperCase(cx, linear);
|
||||
if (!result)
|
||||
return false;
|
||||
|
||||
args.rval().setString(result);
|
||||
return true;
|
||||
return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -1214,15 +884,15 @@ js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
|
|||
{
|
||||
CallArgs args = CallArgsFromVp(argc, vp);
|
||||
|
||||
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Forcefully ignore the first (or any) argument and return toUpperCase(),
|
||||
* ECMA has reserved that argument, presumably for defining the locale.
|
||||
*/
|
||||
if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) {
|
||||
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
|
||||
if (!str)
|
||||
return false;
|
||||
|
||||
RootedValue result(cx);
|
||||
if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
|
||||
return false;
|
||||
|
@ -1231,16 +901,7 @@ js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
|
|||
return true;
|
||||
}
|
||||
|
||||
RootedLinearString linear(cx, str->ensureLinear(cx));
|
||||
if (!linear)
|
||||
return false;
|
||||
|
||||
JSString* result = StringToUpperCase(cx, linear);
|
||||
if (!result)
|
||||
return false;
|
||||
|
||||
args.rval().setString(result);
|
||||
return true;
|
||||
return ToUpperCaseHelper(cx, args);
|
||||
}
|
||||
|
||||
#if !EXPOSE_INTL_API
|
||||
|
@ -1320,7 +981,7 @@ js::str_normalize(JSContext* cx, unsigned argc, Value* vp)
|
|||
if (!linear)
|
||||
return false;
|
||||
|
||||
// Latin-1 strings are already in Normalization Form C.
|
||||
// Latin1 strings are already in Normalization Form C.
|
||||
if (form == NFC && linear->hasLatin1Chars()) {
|
||||
// Step 7.
|
||||
args.rval().setString(str);
|
||||
|
@ -1736,7 +1397,7 @@ StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t
|
|||
/*
|
||||
* For big patterns with large potential overlap we want the SIMD-optimized
|
||||
* speed of memcmp. For small patterns, a simple loop is faster. We also can't
|
||||
* use memcmp if one of the strings is TwoByte and the other is Latin-1.
|
||||
* use memcmp if one of the strings is TwoByte and the other is Latin1.
|
||||
*
|
||||
* FIXME: Linux memcmp performance is sad and the manual loop is faster.
|
||||
*/
|
||||
|
@ -1933,7 +1594,7 @@ RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match)
|
|||
* need to build the list of leaf nodes. Do both here: iterate over the
|
||||
* nodes so long as there are not too many.
|
||||
*
|
||||
* We also don't use rope matching if the rope contains both Latin-1 and
|
||||
* We also don't use rope matching if the rope contains both Latin1 and
|
||||
* TwoByte nodes, to simplify the match algorithm.
|
||||
*/
|
||||
{
|
||||
|
@ -3115,7 +2776,7 @@ js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp)
|
|||
// string (thin or fat) and so we don't need to malloc the chars. (We could
|
||||
// cover some cases where args.length() goes up to
|
||||
// JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
|
||||
// all Latin-1, but it doesn't seem worth the effort.)
|
||||
// all Latin1, but it doesn't seem worth the effort.)
|
||||
if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE)
|
||||
return str_fromCharCode_few_args(cx, args);
|
||||
|
||||
|
@ -3258,7 +2919,7 @@ js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp)
|
|||
// string (thin or fat) and so we don't need to malloc the chars. (We could
|
||||
// cover some cases where |args.length()| goes up to
|
||||
// JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
|
||||
// are all Latin-1, but it doesn't seem worth the effort.)
|
||||
// are all Latin1, but it doesn't seem worth the effort.)
|
||||
if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2)
|
||||
return str_fromCodePoint_few_args(cx, args);
|
||||
|
||||
|
|
|
@ -481,12 +481,6 @@ JSString*
|
|||
str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern,
|
||||
HandleString replacement);
|
||||
|
||||
extern JSString*
|
||||
StringToLowerCase(JSContext* cx, HandleLinearString string);
|
||||
|
||||
extern JSString*
|
||||
StringToUpperCase(JSContext* cx, HandleLinearString string);
|
||||
|
||||
extern bool
|
||||
StringConstructor(JSContext* cx, unsigned argc, Value* vp);
|
||||
|
||||
|
|
|
@ -35,9 +35,6 @@ writeHeaderToLog( SECTION + " "+ TITLE);
|
|||
// Armenian
|
||||
// Range: U+0530 to U+058F
|
||||
for ( var i = 0x0530; i <= 0x058F; i++ ) {
|
||||
// U+0587 (ARMENIAN SMALL LIGATURE ECH YIWN) has special upper casing.
|
||||
if (i == 0x0587) continue;
|
||||
|
||||
var U = new Unicode( i );
|
||||
/*
|
||||
new TestCase( SECTION,
|
||||
|
|
|
@ -63,16 +63,8 @@ namespace CharFlag {
|
|||
const uint8_t UNICODE_ID_CONTINUE = UNICODE_ID_START + UNICODE_ID_CONTINUE_ONLY;
|
||||
}
|
||||
|
||||
const char16_t NO_BREAK_SPACE = 0x00A0;
|
||||
const char16_t MICRO_SIGN = 0x00B5;
|
||||
const char16_t LATIN_SMALL_LETTER_SHARP_S = 0x00DF;
|
||||
const char16_t LATIN_SMALL_LETTER_Y_WITH_DIAERESIS = 0x00FF;
|
||||
const char16_t LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE = 0x0130;
|
||||
const char16_t COMBINING_DOT_ABOVE = 0x0307;
|
||||
const char16_t GREEK_CAPITAL_LETTER_SIGMA = 0x03A3;
|
||||
const char16_t GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2;
|
||||
const char16_t GREEK_SMALL_LETTER_SIGMA = 0x03C3;
|
||||
const char16_t BYTE_ORDER_MARK2 = 0xFFFE;
|
||||
const char16_t NO_BREAK_SPACE = 0x00A0;
|
||||
|
||||
const char16_t LeadSurrogateMin = 0xD800;
|
||||
const char16_t LeadSurrogateMax = 0xDBFF;
|
||||
|
@ -247,10 +239,6 @@ IsSpaceOrBOM2(char16_t ch)
|
|||
return CharInfo(ch).isSpace();
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the simple upper case mapping (see CanUpperCaseSpecialCasing for
|
||||
* details) of the given UTF-16 code unit.
|
||||
*/
|
||||
inline char16_t
|
||||
ToUpperCase(char16_t ch)
|
||||
{
|
||||
|
@ -265,10 +253,6 @@ ToUpperCase(char16_t ch)
|
|||
return uint16_t(ch) + info.upperCase;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the simple lower case mapping (see CanUpperCaseSpecialCasing for
|
||||
* details) of the given UTF-16 code unit.
|
||||
*/
|
||||
inline char16_t
|
||||
ToLowerCase(char16_t ch)
|
||||
{
|
||||
|
@ -345,43 +329,6 @@ ToLowerCaseNonBMPTrail(char16_t lead, char16_t trail)
|
|||
return trail;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the given UTF-16 code unit has a language-independent,
|
||||
* unconditional or conditional special upper case mapping.
|
||||
*
|
||||
* Unicode defines two case mapping modes:
|
||||
* 1. "simple case mappings" for one-to-one mappings which are independent of
|
||||
* context and language (defined in UnicodeData.txt).
|
||||
* 2. "special case mappings" for mappings which can increase or decrease the
|
||||
* string length; or are dependent on context or locale (defined in
|
||||
* SpecialCasing.txt).
|
||||
*
|
||||
* The CanUpperCase() method defined above only supports simple case mappings.
|
||||
* In order to support the full case mappings of all Unicode characters,
|
||||
* callers need to check this method in addition to CanUpperCase().
|
||||
*
|
||||
* NOTE: All special upper case mappings are unconditional in Unicode 9.
|
||||
*/
|
||||
bool
|
||||
CanUpperCaseSpecialCasing(char16_t ch);
|
||||
|
||||
/*
|
||||
* Returns the length of the upper case mapping of |ch|.
|
||||
*
|
||||
* This function asserts if |ch| doesn't have a special upper case mapping.
|
||||
*/
|
||||
size_t
|
||||
LengthUpperCaseSpecialCasing(char16_t ch);
|
||||
|
||||
/*
|
||||
* Appends the upper case mapping of |ch| to the given output buffer,
|
||||
* starting at the provided index.
|
||||
*
|
||||
* This function asserts if |ch| doesn't have a special upper case mapping.
|
||||
*/
|
||||
void
|
||||
AppendUpperCaseSpecialCasing(char16_t ch, char16_t* elements, size_t* index);
|
||||
|
||||
/*
|
||||
* For a codepoint C, CodepointsWithSameUpperCaseInfo stores three offsets
|
||||
* from C to up to three codepoints with same uppercase (no codepoint in
|
||||
|
@ -544,7 +491,7 @@ UTF16Encode(uint32_t codePoint, char16_t* lead, char16_t* trail)
|
|||
*trail = TrailSurrogate(codePoint);
|
||||
}
|
||||
|
||||
inline void
|
||||
static inline void
|
||||
UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index)
|
||||
{
|
||||
if (!IsSupplementary(codePoint)) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче