Bug 1446237 - Rename some String.cpp and Unicode.h functions to have better, and different, names -- for clarity particularly in scumbag unified builds with scumbag global |using namespace|. r=anba

--HG-- extra : rebase_source : b962da57aaf5bef373c8cac376d4d8791b1b7c3b
2018-03-15 18:39:05 -07:00 · 2018-03-15 18:39:05 -07:00 · b9b5100ca5
--- a/js/src/builtin/String.cpp
+++ b/js/src/builtin/String.cpp
@ -905,9 +905,9 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
        // We don't need extra special casing checks in the loop below,
        // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
        // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
-        MOZ_ASSERT(unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
+        MOZ_ASSERT(unicode::ChangesWhenLowerCased(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
                   "U+0130 has a simple lower case mapping");
-        MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
+        MOZ_ASSERT(unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
                   "U+03A3 has a simple lower case mapping");

        // One element Latin-1 strings can be directly retrieved from the
@ -930,7 +930,7 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
                if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
                    CharT trail = chars[i + 1];
                    if (unicode::IsTrailSurrogate(trail)) {
-                        if (unicode::CanLowerCaseNonBMP(c, trail))
+                        if (unicode::ChangesWhenLowerCasedNonBMP(c, trail))
                            break;

                        i++;
@ -938,7 +938,7 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
                    }
                }
            }
-            if (unicode::CanLowerCase(c))
+            if (unicode::ChangesWhenLowerCased(c))
                break;
        }

@ -1114,24 +1114,24 @@ js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
 #endif // EXPOSE_INTL_API

 static inline bool
-CanUpperCaseSpecialCasing(Latin1Char charCode)
+ToUpperCaseHasSpecialCasing(Latin1Char charCode)
 {
-    // Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1
-    // characters don't have special casing rules.
-    MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S,
-                  !unicode::CanUpperCaseSpecialCasing(charCode));
+    // U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
+    // special casing rules, so detect it inline.
+    bool hasUpperCaseSpecialCasing = charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
+    MOZ_ASSERT(hasUpperCaseSpecialCasing == unicode::ChangesWhenUpperCasedSpecialCasing(charCode));

-    return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
+    return hasUpperCaseSpecialCasing;
 }

 static inline bool
-CanUpperCaseSpecialCasing(char16_t charCode)
+ToUpperCaseHasSpecialCasing(char16_t charCode)
 {
-    return unicode::CanUpperCaseSpecialCasing(charCode);
+    return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
 }

 static inline size_t
-LengthUpperCaseSpecialCasing(Latin1Char charCode)
+ToUpperCaseLengthSpecialCasing(Latin1Char charCode)
 {
    // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
    MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
@ -1140,15 +1140,15 @@ LengthUpperCaseSpecialCasing(Latin1Char charCode)
 }

 static inline size_t
-LengthUpperCaseSpecialCasing(char16_t charCode)
+ToUpperCaseLengthSpecialCasing(char16_t charCode)
 {
-    MOZ_ASSERT(::CanUpperCaseSpecialCasing(charCode));
+    MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));

    return unicode::LengthUpperCaseSpecialCasing(charCode);
 }

 static inline void
-AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index)
+ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index)
 {
    // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
    MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
@ -1159,7 +1159,7 @@ AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* in
 }

 static inline void
-AppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index)
+ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index)
 {
    unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
 }
@ -1191,12 +1191,12 @@ ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex,
            }
        }

-        if (MOZ_UNLIKELY(c > 0x7f && ::CanUpperCaseSpecialCasing(static_cast<SrcChar>(c)))) {
+        if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
            // Return if the output buffer is too small.
            if (srcLength == destLength)
                return i;

-            ::AppendUpperCaseSpecialCasing(c, destChars, &j);
+            ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
            continue;
        }

@ -1226,8 +1226,8 @@ ToUpperCaseLength(const CharT* chars, size_t startIndex, size_t length)
    for (size_t i = startIndex; i < length; i++) {
        char16_t c = chars[i];

-        if (c > 0x7f && ::CanUpperCaseSpecialCasing(static_cast<CharT>(c)))
-            upperLength += ::LengthUpperCaseSpecialCasing(static_cast<CharT>(c)) - 1;
+        if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c)))
+            upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
    }
    return upperLength;
 }
@ -1307,7 +1307,7 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
                }

                MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
-                           ::CanUpperCaseSpecialCasing(c));
+                           ToUpperCaseHasSpecialCasing(c));
            }
        }

@ -1319,7 +1319,7 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
                if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
                    CharT trail = chars[i + 1];
                    if (unicode::IsTrailSurrogate(trail)) {
-                        if (unicode::CanUpperCaseNonBMP(c, trail))
+                        if (unicode::ChangesWhenUpperCasedNonBMP(c, trail))
                            break;

                        i++;
@ -1327,9 +1327,9 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
                    }
                }
            }
-            if (unicode::CanUpperCase(c))
+            if (unicode::ChangesWhenUpperCased(c))
                break;
-            if (MOZ_UNLIKELY(c > 0x7f && ::CanUpperCaseSpecialCasing(c)))
+            if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c)))
                break;
        }

--- a/js/src/util/Unicode.cpp
+++ b/js/src/util/Unicode.cpp
@ -2679,7 +2679,7 @@ js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint)
 }

 bool
-js::unicode::CanUpperCaseSpecialCasing(char16_t ch)
+js::unicode::ChangesWhenUpperCasedSpecialCasing(char16_t ch)
 {
    if (ch < 0x00DF || ch > 0xFB17)
        return false;
--- a/js/src/util/Unicode.h
+++ b/js/src/util/Unicode.h
@ -255,8 +255,9 @@ IsSpaceOrBOM2(char16_t ch)
 }

 /*
- * Returns the simple upper case mapping (see CanUpperCaseSpecialCasing for
- * details) of the given UTF-16 code unit.
+ * Returns the simple upper case mapping (possibly the identity mapping; see
+ * ChangesWhenUpperCasedSpecialCasing for details) of the given UTF-16 code
+ * unit.
 */
 inline char16_t
 ToUpperCase(char16_t ch)
@ -273,8 +274,9 @@ ToUpperCase(char16_t ch)
 }

 /*
- * Returns the simple lower case mapping (see CanUpperCaseSpecialCasing for
- * details) of the given UTF-16 code unit.
+ * Returns the simple lower case mapping (possibly the identity mapping; see
+ * ChangesWhenUpperCasedSpecialCasing for details) of the given UTF-16 code
+ * unit.
 */
 inline char16_t
 ToLowerCase(char16_t ch)
@ -290,32 +292,46 @@ ToLowerCase(char16_t ch)
    return uint16_t(ch) + info.lowerCase;
 }

-// Returns true iff ToUpperCase(ch) != ch.
+/**
+ * Returns true iff ToUpperCase(ch) != ch.
+ *
+ * This function isn't guaranteed to correctly handle code points for which
+ * |ChangesWhenUpperCasedSpecialCasing| returns true, so it is *not* always the
+ * same as the value of the Changes_When_Uppercased Unicode property value for
+ * the code point.
+ */
 inline bool
-CanUpperCase(char16_t ch)
+ChangesWhenUpperCased(char16_t ch)
 {
    if (ch < 128)
        return ch >= 'a' && ch <= 'z';
    return CharInfo(ch).upperCase != 0;
 }

-// Returns true iff ToUpperCase(ch) != ch.
+/**
+ * Returns true iff ToUpperCase(ch) != ch.
+ *
+ * This function isn't guaranteed to correctly handle code points for which
+ * |ChangesWhenUpperCasedSpecialCasing| returns true, so it is *not* always the
+ * same as the value of the Changes_When_Uppercased Unicode property value for
+ * the code point.
+ */
 inline bool
-CanUpperCase(JS::Latin1Char ch)
+ChangesWhenUpperCased(JS::Latin1Char ch)
 {
    if (MOZ_LIKELY(ch < 128))
        return ch >= 'a' && ch <= 'z';

    // U+00B5 and U+00E0 to U+00FF, except U+00F7, have an uppercase form.
-    bool canUpper = ch == MICRO_SIGN ||
+    bool hasUpper = ch == MICRO_SIGN ||
                    (((ch & ~0x1F) == LATIN_SMALL_LETTER_A_WITH_GRAVE) && ch != DIVISION_SIGN);
-    MOZ_ASSERT(canUpper == CanUpperCase(char16_t(ch)));
-    return canUpper;
+    MOZ_ASSERT(hasUpper == ChangesWhenUpperCased(char16_t(ch)));
+    return hasUpper;
 }

 // Returns true iff ToLowerCase(ch) != ch.
 inline bool
-CanLowerCase(char16_t ch)
+ChangesWhenLowerCased(char16_t ch)
 {
    if (ch < 128)
        return ch >= 'A' && ch <= 'Z';
@ -324,16 +340,16 @@ CanLowerCase(char16_t ch)

 // Returns true iff ToLowerCase(ch) != ch.
 inline bool
-CanLowerCase(JS::Latin1Char ch)
+ChangesWhenLowerCased(JS::Latin1Char ch)
 {
    if (MOZ_LIKELY(ch < 128))
        return ch >= 'A' && ch <= 'Z';

    // U+00C0 to U+00DE, except U+00D7, have a lowercase form.
-    bool canLower = ((ch & ~0x1F) == LATIN_CAPITAL_LETTER_A_WITH_GRAVE) &&
+    bool hasLower = ((ch & ~0x1F) == LATIN_CAPITAL_LETTER_A_WITH_GRAVE) &&
                    ((ch & MULTIPLICATION_SIGN) != MULTIPLICATION_SIGN);
-    MOZ_ASSERT(canLower == CanLowerCase(char16_t(ch)));
-    return canLower;
+    MOZ_ASSERT(hasLower == ChangesWhenLowerCased(char16_t(ch)));
+    return hasLower;
 }

 #define CHECK_RANGE(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) \
@ -341,14 +357,14 @@ CanLowerCase(JS::Latin1Char ch)
        return true;

 inline bool
-CanUpperCaseNonBMP(char16_t lead, char16_t trail)
+ChangesWhenUpperCasedNonBMP(char16_t lead, char16_t trail)
 {
    FOR_EACH_NON_BMP_UPPERCASE(CHECK_RANGE)
    return false;
 }

 inline bool
-CanLowerCaseNonBMP(char16_t lead, char16_t trail)
+ChangesWhenLowerCasedNonBMP(char16_t lead, char16_t trail)
 {
    FOR_EACH_NON_BMP_LOWERCASE(CHECK_RANGE)
    return false;
@ -381,24 +397,36 @@ ToLowerCaseNonBMPTrail(char16_t lead, char16_t trail)
 }

 /*
- * Returns true if the given UTF-16 code unit has a language-independent,
- * unconditional or conditional special upper case mapping.
+ * Returns true if, independent of language/locale, the given UTF-16 code unit
+ * has a special upper case mapping.
 *
 * Unicode defines two case mapping modes:
- * 1. "simple case mappings" for one-to-one mappings which are independent of
- *    context and language (defined in UnicodeData.txt).
- * 2. "special case mappings" for mappings which can increase or decrease the
- *    string length; or are dependent on context or locale (defined in
- *    SpecialCasing.txt).
 *
- * The CanUpperCase() method defined above only supports simple case mappings.
- * In order to support the full case mappings of all Unicode characters,
- * callers need to check this method in addition to CanUpperCase().
+ *   1. "simple case mappings" (defined in UnicodeData.txt) for one-to-one
+ *      mappings that are always the same regardless of locale or context
+ *      within a string (e.g. "a"→"A").
+ *   2. "special case mappings" (defined in SpecialCasing.txt) for mappings
+ *      that alter string length (e.g. uppercasing "ß"→"SS") or where different
+ *      mappings occur depending on language/locale (e.g. uppercasing "i"→"I"
+ *      usually but "i"→"İ" in Turkish) or context within the string (e.g.
+ *      lowercasing "Σ" U+03A3 GREEK CAPITAL LETTER SIGMA to "ς" U+03C2 GREEK
+ *      SMALL LETTER FINAL SIGMA when the sigma appears [roughly speaking] at
+ *      the end of a word but "ς" U+03C3 GREEK SMALL LETTER SIGMA anywhere
+ *      else).
 *
- * NOTE: All special upper case mappings are unconditional in Unicode 9.
+ * The ChangesWhenUpperCased*() functions defined above will return true for
+ * code points that have simple case mappings, but they may not return the
+ * right result for code points that have special case mappings.  To correctly
+ * support full case mappings for all code points, callers must determine
+ * whether this function returns true or false for the code point, then use
+ * AppendUpperCaseSpecialCasing in the former case and ToUpperCase in the
+ * latter.
+ *
+ * NOTE: All special upper case mappings are unconditional (that is, they don't
+ *       depend on language/locale or context within the string) in Unicode 10.
 */
 bool
-CanUpperCaseSpecialCasing(char16_t ch);
+ChangesWhenUpperCasedSpecialCasing(char16_t ch);

 /*
 * Returns the length of the upper case mapping of |ch|.
--- a/js/src/util/make_unicode.py
+++ b/js/src/util/make_unicode.py
@ -723,10 +723,10 @@ def write_special_casing_methods(unconditional_toupper, codepoint_table, println
            println(indent, '    return {};'.format(range_test_expr))
            println(indent, '}')

-    def write_CanUpperCaseSpecialCasing():
+    def write_ChangesWhenUpperCasedSpecialCasing():
        """ Checks if the input has a special upper case mapping. """
        println('bool')
-        println('js::unicode::CanUpperCaseSpecialCasing(char16_t ch)')
+        println('js::unicode::ChangesWhenUpperCasedSpecialCasing(char16_t ch)')
        println('{')

        assert unconditional_toupper, "|unconditional_toupper| is not empty"
@ -816,7 +816,7 @@ def write_special_casing_methods(unconditional_toupper, codepoint_table, println

        println('}')

-    write_CanUpperCaseSpecialCasing()
+    write_ChangesWhenUpperCasedSpecialCasing()
    println('')
    write_LengthUpperCaseSpecialCasing()
    println('')