Bug 1446237 - Rename some String.cpp and Unicode.h functions to have better, and different, names -- for clarity particularly in scumbag unified builds with scumbag global |using namespace|. r=anba

--HG--
extra : rebase_source : b962da57aaf5bef373c8cac376d4d8791b1b7c3b
This commit is contained in:
Jeff Walden 2018-03-15 18:39:05 -07:00
Родитель cce3d887e8
Коммит b9b5100ca5
4 изменённых файлов: 87 добавлений и 59 удалений

Просмотреть файл

@ -905,9 +905,9 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
// We don't need extra special casing checks in the loop below,
// because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
// GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
MOZ_ASSERT(unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
MOZ_ASSERT(unicode::ChangesWhenLowerCased(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
"U+0130 has a simple lower case mapping");
MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
MOZ_ASSERT(unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
"U+03A3 has a simple lower case mapping");
// One element Latin-1 strings can be directly retrieved from the
@ -930,7 +930,7 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
CharT trail = chars[i + 1];
if (unicode::IsTrailSurrogate(trail)) {
if (unicode::CanLowerCaseNonBMP(c, trail))
if (unicode::ChangesWhenLowerCasedNonBMP(c, trail))
break;
i++;
@ -938,7 +938,7 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
}
}
}
if (unicode::CanLowerCase(c))
if (unicode::ChangesWhenLowerCased(c))
break;
}
@ -1114,24 +1114,24 @@ js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
#endif // EXPOSE_INTL_API
static inline bool
CanUpperCaseSpecialCasing(Latin1Char charCode)
ToUpperCaseHasSpecialCasing(Latin1Char charCode)
{
// Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1
// characters don't have special casing rules.
MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S,
!unicode::CanUpperCaseSpecialCasing(charCode));
// U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
// special casing rules, so detect it inline.
bool hasUpperCaseSpecialCasing = charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
MOZ_ASSERT(hasUpperCaseSpecialCasing == unicode::ChangesWhenUpperCasedSpecialCasing(charCode));
return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
return hasUpperCaseSpecialCasing;
}
static inline bool
CanUpperCaseSpecialCasing(char16_t charCode)
ToUpperCaseHasSpecialCasing(char16_t charCode)
{
return unicode::CanUpperCaseSpecialCasing(charCode);
return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
}
static inline size_t
LengthUpperCaseSpecialCasing(Latin1Char charCode)
ToUpperCaseLengthSpecialCasing(Latin1Char charCode)
{
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
@ -1140,15 +1140,15 @@ LengthUpperCaseSpecialCasing(Latin1Char charCode)
}
static inline size_t
LengthUpperCaseSpecialCasing(char16_t charCode)
ToUpperCaseLengthSpecialCasing(char16_t charCode)
{
MOZ_ASSERT(::CanUpperCaseSpecialCasing(charCode));
MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));
return unicode::LengthUpperCaseSpecialCasing(charCode);
}
static inline void
AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index)
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index)
{
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
@ -1159,7 +1159,7 @@ AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* in
}
static inline void
AppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index)
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index)
{
unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
}
@ -1191,12 +1191,12 @@ ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex,
}
}
if (MOZ_UNLIKELY(c > 0x7f && ::CanUpperCaseSpecialCasing(static_cast<SrcChar>(c)))) {
if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
// Return if the output buffer is too small.
if (srcLength == destLength)
return i;
::AppendUpperCaseSpecialCasing(c, destChars, &j);
ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
continue;
}
@ -1226,8 +1226,8 @@ ToUpperCaseLength(const CharT* chars, size_t startIndex, size_t length)
for (size_t i = startIndex; i < length; i++) {
char16_t c = chars[i];
if (c > 0x7f && ::CanUpperCaseSpecialCasing(static_cast<CharT>(c)))
upperLength += ::LengthUpperCaseSpecialCasing(static_cast<CharT>(c)) - 1;
if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c)))
upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
}
return upperLength;
}
@ -1307,7 +1307,7 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
}
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
::CanUpperCaseSpecialCasing(c));
ToUpperCaseHasSpecialCasing(c));
}
}
@ -1319,7 +1319,7 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
CharT trail = chars[i + 1];
if (unicode::IsTrailSurrogate(trail)) {
if (unicode::CanUpperCaseNonBMP(c, trail))
if (unicode::ChangesWhenUpperCasedNonBMP(c, trail))
break;
i++;
@ -1327,9 +1327,9 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
}
}
}
if (unicode::CanUpperCase(c))
if (unicode::ChangesWhenUpperCased(c))
break;
if (MOZ_UNLIKELY(c > 0x7f && ::CanUpperCaseSpecialCasing(c)))
if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c)))
break;
}

Просмотреть файл

@ -2679,7 +2679,7 @@ js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint)
}
bool
js::unicode::CanUpperCaseSpecialCasing(char16_t ch)
js::unicode::ChangesWhenUpperCasedSpecialCasing(char16_t ch)
{
if (ch < 0x00DF || ch > 0xFB17)
return false;

Просмотреть файл

@ -255,8 +255,9 @@ IsSpaceOrBOM2(char16_t ch)
}
/*
* Returns the simple upper case mapping (see CanUpperCaseSpecialCasing for
* details) of the given UTF-16 code unit.
* Returns the simple upper case mapping (possibly the identity mapping; see
* ChangesWhenUpperCasedSpecialCasing for details) of the given UTF-16 code
* unit.
*/
inline char16_t
ToUpperCase(char16_t ch)
@ -273,8 +274,9 @@ ToUpperCase(char16_t ch)
}
/*
* Returns the simple lower case mapping (see CanUpperCaseSpecialCasing for
* details) of the given UTF-16 code unit.
* Returns the simple lower case mapping (possibly the identity mapping; see
* ChangesWhenUpperCasedSpecialCasing for details) of the given UTF-16 code
* unit.
*/
inline char16_t
ToLowerCase(char16_t ch)
@ -290,32 +292,46 @@ ToLowerCase(char16_t ch)
return uint16_t(ch) + info.lowerCase;
}
// Returns true iff ToUpperCase(ch) != ch.
/**
* Returns true iff ToUpperCase(ch) != ch.
*
* This function isn't guaranteed to correctly handle code points for which
* |ChangesWhenUpperCasedSpecialCasing| returns true, so it is *not* always the
* same as the value of the Changes_When_Uppercased Unicode property value for
* the code point.
*/
inline bool
CanUpperCase(char16_t ch)
ChangesWhenUpperCased(char16_t ch)
{
if (ch < 128)
return ch >= 'a' && ch <= 'z';
return CharInfo(ch).upperCase != 0;
}
// Returns true iff ToUpperCase(ch) != ch.
/**
* Returns true iff ToUpperCase(ch) != ch.
*
* This function isn't guaranteed to correctly handle code points for which
* |ChangesWhenUpperCasedSpecialCasing| returns true, so it is *not* always the
* same as the value of the Changes_When_Uppercased Unicode property value for
* the code point.
*/
inline bool
CanUpperCase(JS::Latin1Char ch)
ChangesWhenUpperCased(JS::Latin1Char ch)
{
if (MOZ_LIKELY(ch < 128))
return ch >= 'a' && ch <= 'z';
// U+00B5 and U+00E0 to U+00FF, except U+00F7, have an uppercase form.
bool canUpper = ch == MICRO_SIGN ||
bool hasUpper = ch == MICRO_SIGN ||
(((ch & ~0x1F) == LATIN_SMALL_LETTER_A_WITH_GRAVE) && ch != DIVISION_SIGN);
MOZ_ASSERT(canUpper == CanUpperCase(char16_t(ch)));
return canUpper;
MOZ_ASSERT(hasUpper == ChangesWhenUpperCased(char16_t(ch)));
return hasUpper;
}
// Returns true iff ToLowerCase(ch) != ch.
inline bool
CanLowerCase(char16_t ch)
ChangesWhenLowerCased(char16_t ch)
{
if (ch < 128)
return ch >= 'A' && ch <= 'Z';
@ -324,16 +340,16 @@ CanLowerCase(char16_t ch)
// Returns true iff ToLowerCase(ch) != ch.
inline bool
CanLowerCase(JS::Latin1Char ch)
ChangesWhenLowerCased(JS::Latin1Char ch)
{
if (MOZ_LIKELY(ch < 128))
return ch >= 'A' && ch <= 'Z';
// U+00C0 to U+00DE, except U+00D7, have a lowercase form.
bool canLower = ((ch & ~0x1F) == LATIN_CAPITAL_LETTER_A_WITH_GRAVE) &&
bool hasLower = ((ch & ~0x1F) == LATIN_CAPITAL_LETTER_A_WITH_GRAVE) &&
((ch & MULTIPLICATION_SIGN) != MULTIPLICATION_SIGN);
MOZ_ASSERT(canLower == CanLowerCase(char16_t(ch)));
return canLower;
MOZ_ASSERT(hasLower == ChangesWhenLowerCased(char16_t(ch)));
return hasLower;
}
#define CHECK_RANGE(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) \
@ -341,14 +357,14 @@ CanLowerCase(JS::Latin1Char ch)
return true;
inline bool
CanUpperCaseNonBMP(char16_t lead, char16_t trail)
ChangesWhenUpperCasedNonBMP(char16_t lead, char16_t trail)
{
FOR_EACH_NON_BMP_UPPERCASE(CHECK_RANGE)
return false;
}
inline bool
CanLowerCaseNonBMP(char16_t lead, char16_t trail)
ChangesWhenLowerCasedNonBMP(char16_t lead, char16_t trail)
{
FOR_EACH_NON_BMP_LOWERCASE(CHECK_RANGE)
return false;
@ -381,24 +397,36 @@ ToLowerCaseNonBMPTrail(char16_t lead, char16_t trail)
}
/*
* Returns true if the given UTF-16 code unit has a language-independent,
* unconditional or conditional special upper case mapping.
* Returns true if, independent of language/locale, the given UTF-16 code unit
* has a special upper case mapping.
*
* Unicode defines two case mapping modes:
* 1. "simple case mappings" for one-to-one mappings which are independent of
* context and language (defined in UnicodeData.txt).
* 2. "special case mappings" for mappings which can increase or decrease the
* string length; or are dependent on context or locale (defined in
* SpecialCasing.txt).
*
* The CanUpperCase() method defined above only supports simple case mappings.
* In order to support the full case mappings of all Unicode characters,
* callers need to check this method in addition to CanUpperCase().
* 1. "simple case mappings" (defined in UnicodeData.txt) for one-to-one
* mappings that are always the same regardless of locale or context
* within a string (e.g. "a""A").
* 2. "special case mappings" (defined in SpecialCasing.txt) for mappings
* that alter string length (e.g. uppercasing "ß""SS") or where different
* mappings occur depending on language/locale (e.g. uppercasing "i""I"
* usually but "i""İ" in Turkish) or context within the string (e.g.
* lowercasing "Σ" U+03A3 GREEK CAPITAL LETTER SIGMA to "ς" U+03C2 GREEK
* SMALL LETTER FINAL SIGMA when the sigma appears [roughly speaking] at
* the end of a word but "ς" U+03C3 GREEK SMALL LETTER SIGMA anywhere
* else).
*
* NOTE: All special upper case mappings are unconditional in Unicode 9.
* The ChangesWhenUpperCased*() functions defined above will return true for
* code points that have simple case mappings, but they may not return the
* right result for code points that have special case mappings. To correctly
* support full case mappings for all code points, callers must determine
* whether this function returns true or false for the code point, then use
* AppendUpperCaseSpecialCasing in the former case and ToUpperCase in the
* latter.
*
* NOTE: All special upper case mappings are unconditional (that is, they don't
* depend on language/locale or context within the string) in Unicode 10.
*/
bool
CanUpperCaseSpecialCasing(char16_t ch);
ChangesWhenUpperCasedSpecialCasing(char16_t ch);
/*
* Returns the length of the upper case mapping of |ch|.

Просмотреть файл

@ -723,10 +723,10 @@ def write_special_casing_methods(unconditional_toupper, codepoint_table, println
println(indent, ' return {};'.format(range_test_expr))
println(indent, '}')
def write_CanUpperCaseSpecialCasing():
def write_ChangesWhenUpperCasedSpecialCasing():
""" Checks if the input has a special upper case mapping. """
println('bool')
println('js::unicode::CanUpperCaseSpecialCasing(char16_t ch)')
println('js::unicode::ChangesWhenUpperCasedSpecialCasing(char16_t ch)')
println('{')
assert unconditional_toupper, "|unconditional_toupper| is not empty"
@ -816,7 +816,7 @@ def write_special_casing_methods(unconditional_toupper, codepoint_table, println
println('}')
write_CanUpperCaseSpecialCasing()
write_ChangesWhenUpperCasedSpecialCasing()
println('')
write_LengthUpperCaseSpecialCasing()
println('')