зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1446237 - Rename some String.cpp and Unicode.h functions to have better, and different, names -- for clarity particularly in scumbag unified builds with scumbag global |using namespace|. r=anba
--HG-- extra : rebase_source : b962da57aaf5bef373c8cac376d4d8791b1b7c3b
This commit is contained in:
Родитель
cce3d887e8
Коммит
b9b5100ca5
|
@ -905,9 +905,9 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
|
|||
// We don't need extra special casing checks in the loop below,
|
||||
// because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
|
||||
// GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
|
||||
MOZ_ASSERT(unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
|
||||
MOZ_ASSERT(unicode::ChangesWhenLowerCased(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
|
||||
"U+0130 has a simple lower case mapping");
|
||||
MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
|
||||
MOZ_ASSERT(unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
|
||||
"U+03A3 has a simple lower case mapping");
|
||||
|
||||
// One element Latin-1 strings can be directly retrieved from the
|
||||
|
@ -930,7 +930,7 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
|
|||
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
|
||||
CharT trail = chars[i + 1];
|
||||
if (unicode::IsTrailSurrogate(trail)) {
|
||||
if (unicode::CanLowerCaseNonBMP(c, trail))
|
||||
if (unicode::ChangesWhenLowerCasedNonBMP(c, trail))
|
||||
break;
|
||||
|
||||
i++;
|
||||
|
@ -938,7 +938,7 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
|
|||
}
|
||||
}
|
||||
}
|
||||
if (unicode::CanLowerCase(c))
|
||||
if (unicode::ChangesWhenLowerCased(c))
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1114,24 +1114,24 @@ js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
|
|||
#endif // EXPOSE_INTL_API
|
||||
|
||||
static inline bool
|
||||
CanUpperCaseSpecialCasing(Latin1Char charCode)
|
||||
ToUpperCaseHasSpecialCasing(Latin1Char charCode)
|
||||
{
|
||||
// Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1
|
||||
// characters don't have special casing rules.
|
||||
MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S,
|
||||
!unicode::CanUpperCaseSpecialCasing(charCode));
|
||||
// U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
|
||||
// special casing rules, so detect it inline.
|
||||
bool hasUpperCaseSpecialCasing = charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
|
||||
MOZ_ASSERT(hasUpperCaseSpecialCasing == unicode::ChangesWhenUpperCasedSpecialCasing(charCode));
|
||||
|
||||
return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
|
||||
return hasUpperCaseSpecialCasing;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
CanUpperCaseSpecialCasing(char16_t charCode)
|
||||
ToUpperCaseHasSpecialCasing(char16_t charCode)
|
||||
{
|
||||
return unicode::CanUpperCaseSpecialCasing(charCode);
|
||||
return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
LengthUpperCaseSpecialCasing(Latin1Char charCode)
|
||||
ToUpperCaseLengthSpecialCasing(Latin1Char charCode)
|
||||
{
|
||||
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
|
||||
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
|
||||
|
@ -1140,15 +1140,15 @@ LengthUpperCaseSpecialCasing(Latin1Char charCode)
|
|||
}
|
||||
|
||||
static inline size_t
|
||||
LengthUpperCaseSpecialCasing(char16_t charCode)
|
||||
ToUpperCaseLengthSpecialCasing(char16_t charCode)
|
||||
{
|
||||
MOZ_ASSERT(::CanUpperCaseSpecialCasing(charCode));
|
||||
MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));
|
||||
|
||||
return unicode::LengthUpperCaseSpecialCasing(charCode);
|
||||
}
|
||||
|
||||
static inline void
|
||||
AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index)
|
||||
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index)
|
||||
{
|
||||
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
|
||||
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
|
||||
|
@ -1159,7 +1159,7 @@ AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* in
|
|||
}
|
||||
|
||||
static inline void
|
||||
AppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index)
|
||||
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index)
|
||||
{
|
||||
unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
|
||||
}
|
||||
|
@ -1191,12 +1191,12 @@ ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex,
|
|||
}
|
||||
}
|
||||
|
||||
if (MOZ_UNLIKELY(c > 0x7f && ::CanUpperCaseSpecialCasing(static_cast<SrcChar>(c)))) {
|
||||
if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
|
||||
// Return if the output buffer is too small.
|
||||
if (srcLength == destLength)
|
||||
return i;
|
||||
|
||||
::AppendUpperCaseSpecialCasing(c, destChars, &j);
|
||||
ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1226,8 +1226,8 @@ ToUpperCaseLength(const CharT* chars, size_t startIndex, size_t length)
|
|||
for (size_t i = startIndex; i < length; i++) {
|
||||
char16_t c = chars[i];
|
||||
|
||||
if (c > 0x7f && ::CanUpperCaseSpecialCasing(static_cast<CharT>(c)))
|
||||
upperLength += ::LengthUpperCaseSpecialCasing(static_cast<CharT>(c)) - 1;
|
||||
if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c)))
|
||||
upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
|
||||
}
|
||||
return upperLength;
|
||||
}
|
||||
|
@ -1307,7 +1307,7 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
|
|||
}
|
||||
|
||||
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
|
||||
::CanUpperCaseSpecialCasing(c));
|
||||
ToUpperCaseHasSpecialCasing(c));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1319,7 +1319,7 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
|
|||
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
|
||||
CharT trail = chars[i + 1];
|
||||
if (unicode::IsTrailSurrogate(trail)) {
|
||||
if (unicode::CanUpperCaseNonBMP(c, trail))
|
||||
if (unicode::ChangesWhenUpperCasedNonBMP(c, trail))
|
||||
break;
|
||||
|
||||
i++;
|
||||
|
@ -1327,9 +1327,9 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
|
|||
}
|
||||
}
|
||||
}
|
||||
if (unicode::CanUpperCase(c))
|
||||
if (unicode::ChangesWhenUpperCased(c))
|
||||
break;
|
||||
if (MOZ_UNLIKELY(c > 0x7f && ::CanUpperCaseSpecialCasing(c)))
|
||||
if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c)))
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -2679,7 +2679,7 @@ js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint)
|
|||
}
|
||||
|
||||
bool
|
||||
js::unicode::CanUpperCaseSpecialCasing(char16_t ch)
|
||||
js::unicode::ChangesWhenUpperCasedSpecialCasing(char16_t ch)
|
||||
{
|
||||
if (ch < 0x00DF || ch > 0xFB17)
|
||||
return false;
|
||||
|
|
|
@ -255,8 +255,9 @@ IsSpaceOrBOM2(char16_t ch)
|
|||
}
|
||||
|
||||
/*
|
||||
* Returns the simple upper case mapping (see CanUpperCaseSpecialCasing for
|
||||
* details) of the given UTF-16 code unit.
|
||||
* Returns the simple upper case mapping (possibly the identity mapping; see
|
||||
* ChangesWhenUpperCasedSpecialCasing for details) of the given UTF-16 code
|
||||
* unit.
|
||||
*/
|
||||
inline char16_t
|
||||
ToUpperCase(char16_t ch)
|
||||
|
@ -273,8 +274,9 @@ ToUpperCase(char16_t ch)
|
|||
}
|
||||
|
||||
/*
|
||||
* Returns the simple lower case mapping (see CanUpperCaseSpecialCasing for
|
||||
* details) of the given UTF-16 code unit.
|
||||
* Returns the simple lower case mapping (possibly the identity mapping; see
|
||||
* ChangesWhenUpperCasedSpecialCasing for details) of the given UTF-16 code
|
||||
* unit.
|
||||
*/
|
||||
inline char16_t
|
||||
ToLowerCase(char16_t ch)
|
||||
|
@ -290,32 +292,46 @@ ToLowerCase(char16_t ch)
|
|||
return uint16_t(ch) + info.lowerCase;
|
||||
}
|
||||
|
||||
// Returns true iff ToUpperCase(ch) != ch.
|
||||
/**
|
||||
* Returns true iff ToUpperCase(ch) != ch.
|
||||
*
|
||||
* This function isn't guaranteed to correctly handle code points for which
|
||||
* |ChangesWhenUpperCasedSpecialCasing| returns true, so it is *not* always the
|
||||
* same as the value of the Changes_When_Uppercased Unicode property value for
|
||||
* the code point.
|
||||
*/
|
||||
inline bool
|
||||
CanUpperCase(char16_t ch)
|
||||
ChangesWhenUpperCased(char16_t ch)
|
||||
{
|
||||
if (ch < 128)
|
||||
return ch >= 'a' && ch <= 'z';
|
||||
return CharInfo(ch).upperCase != 0;
|
||||
}
|
||||
|
||||
// Returns true iff ToUpperCase(ch) != ch.
|
||||
/**
|
||||
* Returns true iff ToUpperCase(ch) != ch.
|
||||
*
|
||||
* This function isn't guaranteed to correctly handle code points for which
|
||||
* |ChangesWhenUpperCasedSpecialCasing| returns true, so it is *not* always the
|
||||
* same as the value of the Changes_When_Uppercased Unicode property value for
|
||||
* the code point.
|
||||
*/
|
||||
inline bool
|
||||
CanUpperCase(JS::Latin1Char ch)
|
||||
ChangesWhenUpperCased(JS::Latin1Char ch)
|
||||
{
|
||||
if (MOZ_LIKELY(ch < 128))
|
||||
return ch >= 'a' && ch <= 'z';
|
||||
|
||||
// U+00B5 and U+00E0 to U+00FF, except U+00F7, have an uppercase form.
|
||||
bool canUpper = ch == MICRO_SIGN ||
|
||||
bool hasUpper = ch == MICRO_SIGN ||
|
||||
(((ch & ~0x1F) == LATIN_SMALL_LETTER_A_WITH_GRAVE) && ch != DIVISION_SIGN);
|
||||
MOZ_ASSERT(canUpper == CanUpperCase(char16_t(ch)));
|
||||
return canUpper;
|
||||
MOZ_ASSERT(hasUpper == ChangesWhenUpperCased(char16_t(ch)));
|
||||
return hasUpper;
|
||||
}
|
||||
|
||||
// Returns true iff ToLowerCase(ch) != ch.
|
||||
inline bool
|
||||
CanLowerCase(char16_t ch)
|
||||
ChangesWhenLowerCased(char16_t ch)
|
||||
{
|
||||
if (ch < 128)
|
||||
return ch >= 'A' && ch <= 'Z';
|
||||
|
@ -324,16 +340,16 @@ CanLowerCase(char16_t ch)
|
|||
|
||||
// Returns true iff ToLowerCase(ch) != ch.
|
||||
inline bool
|
||||
CanLowerCase(JS::Latin1Char ch)
|
||||
ChangesWhenLowerCased(JS::Latin1Char ch)
|
||||
{
|
||||
if (MOZ_LIKELY(ch < 128))
|
||||
return ch >= 'A' && ch <= 'Z';
|
||||
|
||||
// U+00C0 to U+00DE, except U+00D7, have a lowercase form.
|
||||
bool canLower = ((ch & ~0x1F) == LATIN_CAPITAL_LETTER_A_WITH_GRAVE) &&
|
||||
bool hasLower = ((ch & ~0x1F) == LATIN_CAPITAL_LETTER_A_WITH_GRAVE) &&
|
||||
((ch & MULTIPLICATION_SIGN) != MULTIPLICATION_SIGN);
|
||||
MOZ_ASSERT(canLower == CanLowerCase(char16_t(ch)));
|
||||
return canLower;
|
||||
MOZ_ASSERT(hasLower == ChangesWhenLowerCased(char16_t(ch)));
|
||||
return hasLower;
|
||||
}
|
||||
|
||||
#define CHECK_RANGE(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) \
|
||||
|
@ -341,14 +357,14 @@ CanLowerCase(JS::Latin1Char ch)
|
|||
return true;
|
||||
|
||||
inline bool
|
||||
CanUpperCaseNonBMP(char16_t lead, char16_t trail)
|
||||
ChangesWhenUpperCasedNonBMP(char16_t lead, char16_t trail)
|
||||
{
|
||||
FOR_EACH_NON_BMP_UPPERCASE(CHECK_RANGE)
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool
|
||||
CanLowerCaseNonBMP(char16_t lead, char16_t trail)
|
||||
ChangesWhenLowerCasedNonBMP(char16_t lead, char16_t trail)
|
||||
{
|
||||
FOR_EACH_NON_BMP_LOWERCASE(CHECK_RANGE)
|
||||
return false;
|
||||
|
@ -381,24 +397,36 @@ ToLowerCaseNonBMPTrail(char16_t lead, char16_t trail)
|
|||
}
|
||||
|
||||
/*
|
||||
* Returns true if the given UTF-16 code unit has a language-independent,
|
||||
* unconditional or conditional special upper case mapping.
|
||||
* Returns true if, independent of language/locale, the given UTF-16 code unit
|
||||
* has a special upper case mapping.
|
||||
*
|
||||
* Unicode defines two case mapping modes:
|
||||
* 1. "simple case mappings" for one-to-one mappings which are independent of
|
||||
* context and language (defined in UnicodeData.txt).
|
||||
* 2. "special case mappings" for mappings which can increase or decrease the
|
||||
* string length; or are dependent on context or locale (defined in
|
||||
* SpecialCasing.txt).
|
||||
*
|
||||
* The CanUpperCase() method defined above only supports simple case mappings.
|
||||
* In order to support the full case mappings of all Unicode characters,
|
||||
* callers need to check this method in addition to CanUpperCase().
|
||||
* 1. "simple case mappings" (defined in UnicodeData.txt) for one-to-one
|
||||
* mappings that are always the same regardless of locale or context
|
||||
* within a string (e.g. "a"→"A").
|
||||
* 2. "special case mappings" (defined in SpecialCasing.txt) for mappings
|
||||
* that alter string length (e.g. uppercasing "ß"→"SS") or where different
|
||||
* mappings occur depending on language/locale (e.g. uppercasing "i"→"I"
|
||||
* usually but "i"→"İ" in Turkish) or context within the string (e.g.
|
||||
* lowercasing "Σ" U+03A3 GREEK CAPITAL LETTER SIGMA to "ς" U+03C2 GREEK
|
||||
* SMALL LETTER FINAL SIGMA when the sigma appears [roughly speaking] at
|
||||
* the end of a word but "ς" U+03C3 GREEK SMALL LETTER SIGMA anywhere
|
||||
* else).
|
||||
*
|
||||
* NOTE: All special upper case mappings are unconditional in Unicode 9.
|
||||
* The ChangesWhenUpperCased*() functions defined above will return true for
|
||||
* code points that have simple case mappings, but they may not return the
|
||||
* right result for code points that have special case mappings. To correctly
|
||||
* support full case mappings for all code points, callers must determine
|
||||
* whether this function returns true or false for the code point, then use
|
||||
* AppendUpperCaseSpecialCasing in the former case and ToUpperCase in the
|
||||
* latter.
|
||||
*
|
||||
* NOTE: All special upper case mappings are unconditional (that is, they don't
|
||||
* depend on language/locale or context within the string) in Unicode 10.
|
||||
*/
|
||||
bool
|
||||
CanUpperCaseSpecialCasing(char16_t ch);
|
||||
ChangesWhenUpperCasedSpecialCasing(char16_t ch);
|
||||
|
||||
/*
|
||||
* Returns the length of the upper case mapping of |ch|.
|
||||
|
|
|
@ -723,10 +723,10 @@ def write_special_casing_methods(unconditional_toupper, codepoint_table, println
|
|||
println(indent, ' return {};'.format(range_test_expr))
|
||||
println(indent, '}')
|
||||
|
||||
def write_CanUpperCaseSpecialCasing():
|
||||
def write_ChangesWhenUpperCasedSpecialCasing():
|
||||
""" Checks if the input has a special upper case mapping. """
|
||||
println('bool')
|
||||
println('js::unicode::CanUpperCaseSpecialCasing(char16_t ch)')
|
||||
println('js::unicode::ChangesWhenUpperCasedSpecialCasing(char16_t ch)')
|
||||
println('{')
|
||||
|
||||
assert unconditional_toupper, "|unconditional_toupper| is not empty"
|
||||
|
@ -816,7 +816,7 @@ def write_special_casing_methods(unconditional_toupper, codepoint_table, println
|
|||
|
||||
println('}')
|
||||
|
||||
write_CanUpperCaseSpecialCasing()
|
||||
write_ChangesWhenUpperCasedSpecialCasing()
|
||||
println('')
|
||||
write_LengthUpperCaseSpecialCasing()
|
||||
println('')
|
||||
|
|
Загрузка…
Ссылка в новой задаче