Bug 1431957 - Move SharedIntlData into its own builtin/intl/SharedIntlData.* files so the world doesn't have to import all shared Intl functionality. r=anba

--HG-- extra : rebase_source : 61e7615c8bb172aa8fe4fe8a4d3e62458410b0c8
2018-01-19 17:27:14 -08:00 · 2018-01-19 17:27:14 -08:00 · 77349d1f5b
--- a/js/src/builtin/Intl.cpp
+++ b/js/src/builtin/Intl.cpp
@ -33,7 +33,7 @@
 #include "builtin/intl/ICUStubs.h"
 #include "builtin/intl/NumberFormat.h"
 #include "builtin/intl/ScopedICUObject.h"
-#include "builtin/IntlTimeZoneData.h"
+#include "builtin/intl/SharedIntlData.h"
 #include "ds/Sort.h"
 #include "gc/FreeOp.h"
 #include "js/Date.h"
@ -66,6 +66,7 @@ using js::intl::DateTimeFormatOptions;
 using js::intl::GetAvailableLocales;
 using js::intl::IcuLocale;
 using js::intl::INITIAL_CHAR_BUFFER_SIZE;
 using js::intl::SharedIntlData;
 using js::intl::StringsAreEqual;
 /******************** DateTimeFormat ********************/
@ -385,280 +386,6 @@ js::intl_defaultCalendar(JSContext* cx, unsigned argc, Value* vp)
    return DefaultCalendar(cx, locale, args.rval());
 }
 template<typename Char>
 static constexpr Char
 ToUpperASCII(Char c)
 {
    return ('a' <= c && c <= 'z')
           ? (c & ~0x20)
           : c;
 }
 static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly");
 static_assert(ToUpperASCII('m') == 'M', "verifying 'm' uppercases correctly");
 static_assert(ToUpperASCII('z') == 'Z', "verifying 'z' uppercases correctly");
 static_assert(ToUpperASCII(u'a') == u'A', "verifying u'a' uppercases correctly");
 static_assert(ToUpperASCII(u'k') == u'K', "verifying u'k' uppercases correctly");
 static_assert(ToUpperASCII(u'z') == u'Z', "verifying u'z' uppercases correctly");
 template<typename Char1, typename Char2>
 static bool
 EqualCharsIgnoreCaseASCII(const Char1* s1, const Char2* s2, size_t len)
 {
    for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) {
        if (ToUpperASCII(*s1) != ToUpperASCII(*s2))
            return false;
    }
    return true;
 }
 template<typename Char>
 static js::HashNumber
 HashStringIgnoreCaseASCII(const Char* s, size_t length)
 {
    uint32_t hash = 0;
    for (size_t i = 0; i < length; i++)
        hash = mozilla::AddToHash(hash, ToUpperASCII(s[i]));
    return hash;
 }
 js::SharedIntlData::TimeZoneHasher::Lookup::Lookup(JSLinearString* timeZone)
  : js::SharedIntlData::LinearStringLookup(timeZone)
 {
    if (isLatin1)
        hash = HashStringIgnoreCaseASCII(latin1Chars, length);
    else
        hash = HashStringIgnoreCaseASCII(twoByteChars, length);
 }
 bool
 js::SharedIntlData::TimeZoneHasher::match(TimeZoneName key, const Lookup& lookup)
 {
    if (key->length() != lookup.length)
        return false;
    // Compare time zone names ignoring ASCII case differences.
    if (key->hasLatin1Chars()) {
        const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
        if (lookup.isLatin1)
            return EqualCharsIgnoreCaseASCII(keyChars, lookup.latin1Chars, lookup.length);
        return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length);
    }
    const char16_t* keyChars = key->twoByteChars(lookup.nogc);
    if (lookup.isLatin1)
        return EqualCharsIgnoreCaseASCII(lookup.latin1Chars, keyChars, lookup.length);
    return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length);
 }
 static bool
 IsLegacyICUTimeZone(const char* timeZone)
 {
    for (const auto& legacyTimeZone : js::timezone::legacyICUTimeZones) {
        if (StringsAreEqual(timeZone, legacyTimeZone))
            return true;
    }
    return false;
 }
 bool
 js::SharedIntlData::ensureTimeZones(JSContext* cx)
 {
    if (timeZoneDataInitialized)
        return true;
    // If ensureTimeZones() was called previously, but didn't complete due to
    // OOM, clear all sets/maps and start from scratch.
    if (availableTimeZones.initialized())
        availableTimeZones.finish();
    if (!availableTimeZones.init()) {
        ReportOutOfMemory(cx);
        return false;
    }
    UErrorCode status = U_ZERO_ERROR;
    UEnumeration* values = ucal_openTimeZones(&status);
    if (U_FAILURE(status)) {
        intl::ReportInternalError(cx);
        return false;
    }
    ScopedICUObject<UEnumeration, uenum_close> toClose(values);
    RootedAtom timeZone(cx);
    while (true) {
        int32_t size;
        const char* rawTimeZone = uenum_next(values, &size, &status);
        if (U_FAILURE(status)) {
            intl::ReportInternalError(cx);
            return false;
        }
        if (rawTimeZone == nullptr)
            break;
        // Skip legacy ICU time zone names.
        if (IsLegacyICUTimeZone(rawTimeZone))
            continue;
        MOZ_ASSERT(size >= 0);
        timeZone = Atomize(cx, rawTimeZone, size_t(size));
        if (!timeZone)
            return false;
        TimeZoneHasher::Lookup lookup(timeZone);
        TimeZoneSet::AddPtr p = availableTimeZones.lookupForAdd(lookup);
        // ICU shouldn't report any duplicate time zone names, but if it does,
        // just ignore the duplicate name.
        if (!p && !availableTimeZones.add(p, timeZone)) {
            ReportOutOfMemory(cx);
            return false;
        }
    }
    if (ianaZonesTreatedAsLinksByICU.initialized())
        ianaZonesTreatedAsLinksByICU.finish();
    if (!ianaZonesTreatedAsLinksByICU.init()) {
        ReportOutOfMemory(cx);
        return false;
    }
    for (const char* rawTimeZone : timezone::ianaZonesTreatedAsLinksByICU) {
        MOZ_ASSERT(rawTimeZone != nullptr);
        timeZone = Atomize(cx, rawTimeZone, strlen(rawTimeZone));
        if (!timeZone)
            return false;
        TimeZoneHasher::Lookup lookup(timeZone);
        TimeZoneSet::AddPtr p = ianaZonesTreatedAsLinksByICU.lookupForAdd(lookup);
        MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaZonesTreatedAsLinksByICU");
        if (!ianaZonesTreatedAsLinksByICU.add(p, timeZone)) {
            ReportOutOfMemory(cx);
            return false;
        }
    }
    if (ianaLinksCanonicalizedDifferentlyByICU.initialized())
        ianaLinksCanonicalizedDifferentlyByICU.finish();
    if (!ianaLinksCanonicalizedDifferentlyByICU.init()) {
        ReportOutOfMemory(cx);
        return false;
    }
    RootedAtom linkName(cx);
    RootedAtom& target = timeZone;
    for (const auto& linkAndTarget : timezone::ianaLinksCanonicalizedDifferentlyByICU) {
        const char* rawLinkName = linkAndTarget.link;
        const char* rawTarget = linkAndTarget.target;
        MOZ_ASSERT(rawLinkName != nullptr);
        linkName = Atomize(cx, rawLinkName, strlen(rawLinkName));
        if (!linkName)
            return false;
        MOZ_ASSERT(rawTarget != nullptr);
        target = Atomize(cx, rawTarget, strlen(rawTarget));
        if (!target)
            return false;
        TimeZoneHasher::Lookup lookup(linkName);
        TimeZoneMap::AddPtr p = ianaLinksCanonicalizedDifferentlyByICU.lookupForAdd(lookup);
        MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaLinksCanonicalizedDifferentlyByICU");
        if (!ianaLinksCanonicalizedDifferentlyByICU.add(p, linkName, target)) {
            ReportOutOfMemory(cx);
            return false;
        }
    }
    MOZ_ASSERT(!timeZoneDataInitialized, "ensureTimeZones is neither reentrant nor thread-safe");
    timeZoneDataInitialized = true;
    return true;
 }
 bool
 js::SharedIntlData::validateTimeZoneName(JSContext* cx, HandleString timeZone,
                                         MutableHandleAtom result)
 {
    if (!ensureTimeZones(cx))
        return false;
    RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx));
    if (!timeZoneLinear)
        return false;
    TimeZoneHasher::Lookup lookup(timeZoneLinear);
    if (TimeZoneSet::Ptr p = availableTimeZones.lookup(lookup))
        result.set(*p);
    return true;
 }
 bool
 js::SharedIntlData::tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx, HandleString timeZone,
                                                              MutableHandleAtom result)
 {
    if (!ensureTimeZones(cx))
        return false;
    RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx));
    if (!timeZoneLinear)
        return false;
    TimeZoneHasher::Lookup lookup(timeZoneLinear);
    MOZ_ASSERT(availableTimeZones.has(lookup), "Invalid time zone name");
    if (TimeZoneMap::Ptr p = ianaLinksCanonicalizedDifferentlyByICU.lookup(lookup)) {
        // The effectively supported time zones aren't known at compile time,
        // when
        // 1. SpiderMonkey was compiled with "--with-system-icu".
        // 2. ICU's dynamic time zone data loading feature was used.
        //    (ICU supports loading time zone files at runtime through the
        //    ICU_TIMEZONE_FILES_DIR environment variable.)
        // Ensure ICU supports the new target zone before applying the update.
        TimeZoneName targetTimeZone = p->value();
        TimeZoneHasher::Lookup targetLookup(targetTimeZone);
        if (availableTimeZones.has(targetLookup))
            result.set(targetTimeZone);
    } else if (TimeZoneSet::Ptr p = ianaZonesTreatedAsLinksByICU.lookup(lookup)) {
        result.set(*p);
    }
    return true;
 }
 void
 js::SharedIntlData::destroyInstance()
 {
    availableTimeZones.finish();
    ianaZonesTreatedAsLinksByICU.finish();
    ianaLinksCanonicalizedDifferentlyByICU.finish();
    upperCaseFirstLocales.finish();
 }
 void
 js::SharedIntlData::trace(JSTracer* trc)
 {
    // Atoms are always tenured.
    if (!JS::CurrentThreadIsHeapMinorCollecting()) {
        availableTimeZones.trace(trc);
        ianaZonesTreatedAsLinksByICU.trace(trc);
        ianaLinksCanonicalizedDifferentlyByICU.trace(trc);
        upperCaseFirstLocales.trace(trc);
    }
 }
 size_t
 js::SharedIntlData::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const
 {
    return availableTimeZones.sizeOfExcludingThis(mallocSizeOf) +
           ianaZonesTreatedAsLinksByICU.sizeOfExcludingThis(mallocSizeOf) +
           ianaLinksCanonicalizedDifferentlyByICU.sizeOfExcludingThis(mallocSizeOf) +
           upperCaseFirstLocales.sizeOfExcludingThis(mallocSizeOf);
 }
 bool
 js::intl_IsValidTimeZoneName(JSContext* cx, unsigned argc, Value* vp)
 {
--- a/js/src/builtin/Intl.h
+++ b/js/src/builtin/Intl.h
@ -36,204 +36,6 @@ class FreeOp;
 extern JSObject*
 InitIntlClass(JSContext* cx, HandleObject obj);
 /**
 * Stores Intl data which can be shared across compartments (but not contexts).
 *
 * Used for data which is expensive when computed repeatedly or is not
 * available through ICU.
 */
 class SharedIntlData
 {
    struct LinearStringLookup
    {
        union {
            const JS::Latin1Char* latin1Chars;
            const char16_t* twoByteChars;
        };
        bool isLatin1;
        size_t length;
        JS::AutoCheckCannotGC nogc;
        HashNumber hash = 0;
        explicit LinearStringLookup(JSLinearString* string)
          : isLatin1(string->hasLatin1Chars()), length(string->length())
        {
            if (isLatin1)
                latin1Chars = string->latin1Chars(nogc);
            else
                twoByteChars = string->twoByteChars(nogc);
        }
    };
  private:
    /**
     * Information tracking the set of the supported time zone names, derived
     * from the IANA time zone database <https://www.iana.org/time-zones>.
     *
     * There are two kinds of IANA time zone names: Zone and Link (denoted as
     * such in database source files). Zone names are the canonical, preferred
     * name for a time zone, e.g. Asia/Kolkata. Link names simply refer to
     * target Zone names for their meaning, e.g. Asia/Calcutta targets
     * Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a
     * sense of deprecation: some Link names also exist partly for convenience,
     * e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC.
     *
     * Two data sources determine the time zone names we support: those ICU
     * supports and IANA's zone information.
     *
     * Unfortunately the names ICU and IANA support, and their Link
     * relationships from name to target, aren't identical, so we can't simply
     * implicitly trust ICU's name handling. We must perform various
     * preprocessing of user-provided zone names and post-processing of
     * ICU-provided zone names to implement ECMA-402's IANA-consistent behavior.
     *
     * Also see <https://ssl.icu-project.org/trac/ticket/12044> and
     * <http://unicode.org/cldr/trac/ticket/9892>.
     */
    using TimeZoneName = JSAtom*;
    struct TimeZoneHasher
    {
        struct Lookup : LinearStringLookup
        {
            explicit Lookup(JSLinearString* timeZone);
        };
        static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
        static bool match(TimeZoneName key, const Lookup& lookup);
    };
    using TimeZoneSet = js::GCHashSet<TimeZoneName,
                                      TimeZoneHasher,
                                      js::SystemAllocPolicy>;
    using TimeZoneMap = js::GCHashMap<TimeZoneName,
                                      TimeZoneName,
                                      TimeZoneHasher,
                                      js::SystemAllocPolicy>;
    /**
     * As a threshold matter, available time zones are those time zones ICU
     * supports, via ucal_openTimeZones. But ICU supports additional non-IANA
     * time zones described in intl/icu/source/tools/tzcode/icuzones (listed in
     * IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards
     * compatibility purposes. This set consists of ICU's supported time zones,
     * minus all backwards-compatibility time zones.
     */
    TimeZoneSet availableTimeZones;
    /**
     * IANA treats some time zone names as Zones, that ICU instead treats as
     * Links. For example, IANA considers "America/Indiana/Indianapolis" to be
     * a Zone and "America/Fort_Wayne" a Link that targets it, but ICU
     * considers the former a Link that targets "America/Indianapolis" (which
     * IANA treats as a Link).
     *
     * ECMA-402 requires that we respect IANA data, so if we're asked to
     * canonicalize a time zone name in this set, we must *not* return ICU's
     * canonicalization.
     */
    TimeZoneSet ianaZonesTreatedAsLinksByICU;
    /**
     * IANA treats some time zone names as Links to one target, that ICU
     * instead treats as either Zones, or Links to different targets. An
     * example of the former is "Asia/Calcutta, which IANA assigns the target
     * "Asia/Kolkata" but ICU considers its own Zone. An example of the latter
     * is "America/Virgin", which IANA assigns the target
     * "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas".
     *
     * ECMA-402 requires that we respect IANA data, so if we're asked to
     * canonicalize a time zone name that's a key in this map, we *must* return
     * the corresponding value and *must not* return ICU's canonicalization.
     */
    TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU;
    bool timeZoneDataInitialized = false;
    /**
     * Precomputes the available time zone names, because it's too expensive to
     * call ucal_openTimeZones() repeatedly.
     */
    bool ensureTimeZones(JSContext* cx);
  public:
    /**
     * Returns the validated time zone name in |result|. If the input time zone
     * isn't a valid IANA time zone name, |result| remains unchanged.
     */
    bool validateTimeZoneName(JSContext* cx, JS::HandleString timeZone,
                              MutableHandleAtom result);
    /**
     * Returns the canonical time zone name in |result|. If no canonical name
     * was found, |result| remains unchanged.
     *
     * This method only handles time zones which are canonicalized differently
     * by ICU when compared to IANA.
     */
    bool tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx, JS::HandleString timeZone,
                                                   MutableHandleAtom result);
  private:
    /**
     * The case first parameter (BCP47 key "kf") allows to switch the order of
     * upper- and lower-case characters. ICU doesn't directly provide an API
     * to query the default case first value of a given locale, but instead
     * requires to instantiate a collator object and then query the case first
     * attribute (UCOL_CASE_FIRST).
     * To avoid instantiating an additional collator object whenever we need
     * to retrieve the default case first value of a specific locale, we
     * compute the default case first value for every supported locale only
     * once and then keep a list of all locales which don't use the default
     * case first setting.
     * There is almost no difference between lower-case first and when case
     * first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to
     * track locales which use upper-case first as their default setting.
     */
    using Locale = JSAtom*;
    struct LocaleHasher
    {
        struct Lookup : LinearStringLookup
        {
            explicit Lookup(JSLinearString* locale);
        };
        static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
        static bool match(Locale key, const Lookup& lookup);
    };
    using LocaleSet = js::GCHashSet<Locale,
                                    LocaleHasher,
                                    js::SystemAllocPolicy>;
    LocaleSet upperCaseFirstLocales;
    bool upperCaseFirstInitialized = false;
    /**
     * Precomputes the available locales which use upper-case first sorting.
     */
    bool ensureUpperCaseFirstLocales(JSContext* cx);
  public:
    /**
     * Sets |isUpperFirst| to true if |locale| sorts upper-case characters
     * before lower-case characters.
     */
    bool isUpperCaseFirst(JSContext* cx, JS::HandleString locale, bool* isUpperFirst);
  public:
    void destroyInstance();
    void trace(JSTracer* trc);
    size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
 };
 /*
 * The following functions are for use by self-hosted code.
 */
--- a/js/src/builtin/intl/Collator.cpp
+++ b/js/src/builtin/intl/Collator.cpp
@ -16,6 +16,7 @@
 #include "builtin/intl/CommonFunctions.h"
 #include "builtin/intl/ICUStubs.h"
 #include "builtin/intl/ScopedICUObject.h"
 #include "builtin/intl/SharedIntlData.h"
 #include "gc/FreeOp.h"
 #include "js/TypeDecls.h"
 #include "vm/GlobalObject.h"
@ -29,6 +30,7 @@ using namespace js;
 using js::intl::GetAvailableLocales;
 using js::intl::IcuLocale;
 using js::intl::ReportInternalError;
 using js::intl::SharedIntlData;
 using js::intl::StringsAreEqual;
 const ClassOps CollatorObject::classOps_ = {
@ -464,124 +466,6 @@ js::intl_CompareStrings(JSContext* cx, unsigned argc, Value* vp)
    return intl_CompareStrings(cx, coll, str1, str2, args.rval());
 }
 js::SharedIntlData::LocaleHasher::Lookup::Lookup(JSLinearString* locale)
  : js::SharedIntlData::LinearStringLookup(locale)
 {
    if (isLatin1)
        hash = mozilla::HashString(latin1Chars, length);
    else
        hash = mozilla::HashString(twoByteChars, length);
 }
 bool
 js::SharedIntlData::LocaleHasher::match(Locale key, const Lookup& lookup)
 {
    if (key->length() != lookup.length)
        return false;
    if (key->hasLatin1Chars()) {
        const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
        if (lookup.isLatin1)
            return EqualChars(keyChars, lookup.latin1Chars, lookup.length);
        return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
    }
    const char16_t* keyChars = key->twoByteChars(lookup.nogc);
    if (lookup.isLatin1)
        return EqualChars(lookup.latin1Chars, keyChars, lookup.length);
    return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
 }
 bool
 js::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx)
 {
    if (upperCaseFirstInitialized)
        return true;
    // If ensureUpperCaseFirstLocales() was called previously, but didn't
    // complete due to OOM, clear all data and start from scratch.
    if (upperCaseFirstLocales.initialized())
        upperCaseFirstLocales.finish();
    if (!upperCaseFirstLocales.init()) {
        ReportOutOfMemory(cx);
        return false;
    }
    UErrorCode status = U_ZERO_ERROR;
    UEnumeration* available = ucol_openAvailableLocales(&status);
    if (U_FAILURE(status)) {
        ReportInternalError(cx);
        return false;
    }
    ScopedICUObject<UEnumeration, uenum_close> toClose(available);
    RootedAtom locale(cx);
    while (true) {
        int32_t size;
        const char* rawLocale = uenum_next(available, &size, &status);
        if (U_FAILURE(status)) {
            ReportInternalError(cx);
            return false;
        }
        if (rawLocale == nullptr)
            break;
        UCollator* collator = ucol_open(rawLocale, &status);
        if (U_FAILURE(status)) {
            ReportInternalError(cx);
            return false;
        }
        ScopedICUObject<UCollator, ucol_close> toCloseCollator(collator);
        UColAttributeValue caseFirst = ucol_getAttribute(collator, UCOL_CASE_FIRST, &status);
        if (U_FAILURE(status)) {
            ReportInternalError(cx);
            return false;
        }
        if (caseFirst != UCOL_UPPER_FIRST)
            continue;
        MOZ_ASSERT(size >= 0);
        locale = Atomize(cx, rawLocale, size_t(size));
        if (!locale)
            return false;
        LocaleHasher::Lookup lookup(locale);
        LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup);
        // ICU shouldn't report any duplicate locales, but if it does, just
        // ignore the duplicated locale.
        if (!p && !upperCaseFirstLocales.add(p, locale)) {
            ReportOutOfMemory(cx);
            return false;
        }
    }
    MOZ_ASSERT(!upperCaseFirstInitialized,
               "ensureUpperCaseFirstLocales is neither reentrant nor thread-safe");
    upperCaseFirstInitialized = true;
    return true;
 }
 bool
 js::SharedIntlData::isUpperCaseFirst(JSContext* cx, HandleString locale, bool* isUpperFirst)
 {
    if (!ensureUpperCaseFirstLocales(cx))
        return false;
    RootedLinearString localeLinear(cx, locale->ensureLinear(cx));
    if (!localeLinear)
        return false;
    LocaleHasher::Lookup lookup(localeLinear);
    *isUpperFirst = upperCaseFirstLocales.has(lookup);
    return true;
 }
 bool
 js::intl_isUpperCaseFirst(JSContext* cx, unsigned argc, Value* vp)
 {
--- a/js/src/builtin/intl/SharedIntlData.cpp
+++ b/js/src/builtin/intl/SharedIntlData.cpp
@ -0,0 +1,419 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set ts=8 sts=4 et sw=4 tw=99:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 /* Runtime-wide Intl data shared across compartments. */
 #include "builtin/intl/SharedIntlData.h"
 #include "mozilla/Assertions.h"
 #include "mozilla/HashFunctions.h"
 #include <stdint.h>
 #include "jsatom.h"
 #include "jsstr.h"
 #include "builtin/intl/CommonFunctions.h"
 #include "builtin/intl/ICUStubs.h"
 #include "builtin/intl/ScopedICUObject.h"
 #include "builtin/IntlTimeZoneData.h"
 #include "js/Utility.h"
 using js::HashNumber;
 using js::intl::StringsAreEqual;
 template<typename Char>
 static constexpr Char
 ToUpperASCII(Char c)
 {
    return ('a' <= c && c <= 'z')
           ? (c & ~0x20)
           : c;
 }
 static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly");
 static_assert(ToUpperASCII('m') == 'M', "verifying 'm' uppercases correctly");
 static_assert(ToUpperASCII('z') == 'Z', "verifying 'z' uppercases correctly");
 static_assert(ToUpperASCII(u'a') == u'A', "verifying u'a' uppercases correctly");
 static_assert(ToUpperASCII(u'k') == u'K', "verifying u'k' uppercases correctly");
 static_assert(ToUpperASCII(u'z') == u'Z', "verifying u'z' uppercases correctly");
 template<typename Char>
 static HashNumber
 HashStringIgnoreCaseASCII(const Char* s, size_t length)
 {
    uint32_t hash = 0;
    for (size_t i = 0; i < length; i++)
        hash = mozilla::AddToHash(hash, ToUpperASCII(s[i]));
    return hash;
 }
 js::intl::SharedIntlData::TimeZoneHasher::Lookup::Lookup(JSLinearString* timeZone)
  : js::intl::SharedIntlData::LinearStringLookup(timeZone)
 {
    if (isLatin1)
        hash = HashStringIgnoreCaseASCII(latin1Chars, length);
    else
        hash = HashStringIgnoreCaseASCII(twoByteChars, length);
 }
 template<typename Char1, typename Char2>
 static bool
 EqualCharsIgnoreCaseASCII(const Char1* s1, const Char2* s2, size_t len)
 {
    for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) {
        if (ToUpperASCII(*s1) != ToUpperASCII(*s2))
            return false;
    }
    return true;
 }
 bool
 js::intl::SharedIntlData::TimeZoneHasher::match(TimeZoneName key, const Lookup& lookup)
 {
    if (key->length() != lookup.length)
        return false;
    // Compare time zone names ignoring ASCII case differences.
    if (key->hasLatin1Chars()) {
        const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
        if (lookup.isLatin1)
            return EqualCharsIgnoreCaseASCII(keyChars, lookup.latin1Chars, lookup.length);
        return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length);
    }
    const char16_t* keyChars = key->twoByteChars(lookup.nogc);
    if (lookup.isLatin1)
        return EqualCharsIgnoreCaseASCII(lookup.latin1Chars, keyChars, lookup.length);
    return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length);
 }
 static bool
 IsLegacyICUTimeZone(const char* timeZone)
 {
    for (const auto& legacyTimeZone : js::timezone::legacyICUTimeZones) {
        if (StringsAreEqual(timeZone, legacyTimeZone))
            return true;
    }
    return false;
 }
 bool
 js::intl::SharedIntlData::ensureTimeZones(JSContext* cx)
 {
    if (timeZoneDataInitialized)
        return true;
    // If ensureTimeZones() was called previously, but didn't complete due to
    // OOM, clear all sets/maps and start from scratch.
    if (availableTimeZones.initialized())
        availableTimeZones.finish();
    if (!availableTimeZones.init()) {
        ReportOutOfMemory(cx);
        return false;
    }
    UErrorCode status = U_ZERO_ERROR;
    UEnumeration* values = ucal_openTimeZones(&status);
    if (U_FAILURE(status)) {
        ReportInternalError(cx);
        return false;
    }
    ScopedICUObject<UEnumeration, uenum_close> toClose(values);
    RootedAtom timeZone(cx);
    while (true) {
        int32_t size;
        const char* rawTimeZone = uenum_next(values, &size, &status);
        if (U_FAILURE(status)) {
            ReportInternalError(cx);
            return false;
        }
        if (rawTimeZone == nullptr)
            break;
        // Skip legacy ICU time zone names.
        if (IsLegacyICUTimeZone(rawTimeZone))
            continue;
        MOZ_ASSERT(size >= 0);
        timeZone = Atomize(cx, rawTimeZone, size_t(size));
        if (!timeZone)
            return false;
        TimeZoneHasher::Lookup lookup(timeZone);
        TimeZoneSet::AddPtr p = availableTimeZones.lookupForAdd(lookup);
        // ICU shouldn't report any duplicate time zone names, but if it does,
        // just ignore the duplicate name.
        if (!p && !availableTimeZones.add(p, timeZone)) {
            ReportOutOfMemory(cx);
            return false;
        }
    }
    if (ianaZonesTreatedAsLinksByICU.initialized())
        ianaZonesTreatedAsLinksByICU.finish();
    if (!ianaZonesTreatedAsLinksByICU.init()) {
        ReportOutOfMemory(cx);
        return false;
    }
    for (const char* rawTimeZone : timezone::ianaZonesTreatedAsLinksByICU) {
        MOZ_ASSERT(rawTimeZone != nullptr);
        timeZone = Atomize(cx, rawTimeZone, strlen(rawTimeZone));
        if (!timeZone)
            return false;
        TimeZoneHasher::Lookup lookup(timeZone);
        TimeZoneSet::AddPtr p = ianaZonesTreatedAsLinksByICU.lookupForAdd(lookup);
        MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaZonesTreatedAsLinksByICU");
        if (!ianaZonesTreatedAsLinksByICU.add(p, timeZone)) {
            ReportOutOfMemory(cx);
            return false;
        }
    }
    if (ianaLinksCanonicalizedDifferentlyByICU.initialized())
        ianaLinksCanonicalizedDifferentlyByICU.finish();
    if (!ianaLinksCanonicalizedDifferentlyByICU.init()) {
        ReportOutOfMemory(cx);
        return false;
    }
    RootedAtom linkName(cx);
    RootedAtom& target = timeZone;
    for (const auto& linkAndTarget : timezone::ianaLinksCanonicalizedDifferentlyByICU) {
        const char* rawLinkName = linkAndTarget.link;
        const char* rawTarget = linkAndTarget.target;
        MOZ_ASSERT(rawLinkName != nullptr);
        linkName = Atomize(cx, rawLinkName, strlen(rawLinkName));
        if (!linkName)
            return false;
        MOZ_ASSERT(rawTarget != nullptr);
        target = Atomize(cx, rawTarget, strlen(rawTarget));
        if (!target)
            return false;
        TimeZoneHasher::Lookup lookup(linkName);
        TimeZoneMap::AddPtr p = ianaLinksCanonicalizedDifferentlyByICU.lookupForAdd(lookup);
        MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaLinksCanonicalizedDifferentlyByICU");
        if (!ianaLinksCanonicalizedDifferentlyByICU.add(p, linkName, target)) {
            ReportOutOfMemory(cx);
            return false;
        }
    }
    MOZ_ASSERT(!timeZoneDataInitialized, "ensureTimeZones is neither reentrant nor thread-safe");
    timeZoneDataInitialized = true;
    return true;
 }
 bool
 js::intl::SharedIntlData::validateTimeZoneName(JSContext* cx, HandleString timeZone,
                                               MutableHandleAtom result)
 {
    if (!ensureTimeZones(cx))
        return false;
    RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx));
    if (!timeZoneLinear)
        return false;
    TimeZoneHasher::Lookup lookup(timeZoneLinear);
    if (TimeZoneSet::Ptr p = availableTimeZones.lookup(lookup))
        result.set(*p);
    return true;
 }
 bool
 js::intl::SharedIntlData::tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx,
                                                                    HandleString timeZone,
                                                                    MutableHandleAtom result)
 {
    if (!ensureTimeZones(cx))
        return false;
    RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx));
    if (!timeZoneLinear)
        return false;
    TimeZoneHasher::Lookup lookup(timeZoneLinear);
    MOZ_ASSERT(availableTimeZones.has(lookup), "Invalid time zone name");
    if (TimeZoneMap::Ptr p = ianaLinksCanonicalizedDifferentlyByICU.lookup(lookup)) {
        // The effectively supported time zones aren't known at compile time,
        // when
        // 1. SpiderMonkey was compiled with "--with-system-icu".
        // 2. ICU's dynamic time zone data loading feature was used.
        //    (ICU supports loading time zone files at runtime through the
        //    ICU_TIMEZONE_FILES_DIR environment variable.)
        // Ensure ICU supports the new target zone before applying the update.
        TimeZoneName targetTimeZone = p->value();
        TimeZoneHasher::Lookup targetLookup(targetTimeZone);
        if (availableTimeZones.has(targetLookup))
            result.set(targetTimeZone);
    } else if (TimeZoneSet::Ptr p = ianaZonesTreatedAsLinksByICU.lookup(lookup)) {
        result.set(*p);
    }
    return true;
 }
 js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(JSLinearString* locale)
  : js::intl::SharedIntlData::LinearStringLookup(locale)
 {
    if (isLatin1)
        hash = mozilla::HashString(latin1Chars, length);
    else
        hash = mozilla::HashString(twoByteChars, length);
 }
 bool
 js::intl::SharedIntlData::LocaleHasher::match(Locale key, const Lookup& lookup)
 {
    if (key->length() != lookup.length)
        return false;
    if (key->hasLatin1Chars()) {
        const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
        if (lookup.isLatin1)
            return EqualChars(keyChars, lookup.latin1Chars, lookup.length);
        return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
    }
    const char16_t* keyChars = key->twoByteChars(lookup.nogc);
    if (lookup.isLatin1)
        return EqualChars(lookup.latin1Chars, keyChars, lookup.length);
    return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
 }
 bool
 js::intl::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx)
 {
    if (upperCaseFirstInitialized)
        return true;
    // If ensureUpperCaseFirstLocales() was called previously, but didn't
    // complete due to OOM, clear all data and start from scratch.
    if (upperCaseFirstLocales.initialized())
        upperCaseFirstLocales.finish();
    if (!upperCaseFirstLocales.init()) {
        ReportOutOfMemory(cx);
        return false;
    }
    UErrorCode status = U_ZERO_ERROR;
    UEnumeration* available = ucol_openAvailableLocales(&status);
    if (U_FAILURE(status)) {
        ReportInternalError(cx);
        return false;
    }
    ScopedICUObject<UEnumeration, uenum_close> toClose(available);
    RootedAtom locale(cx);
    while (true) {
        int32_t size;
        const char* rawLocale = uenum_next(available, &size, &status);
        if (U_FAILURE(status)) {
            ReportInternalError(cx);
            return false;
        }
        if (rawLocale == nullptr)
            break;
        UCollator* collator = ucol_open(rawLocale, &status);
        if (U_FAILURE(status)) {
            ReportInternalError(cx);
            return false;
        }
        ScopedICUObject<UCollator, ucol_close> toCloseCollator(collator);
        UColAttributeValue caseFirst = ucol_getAttribute(collator, UCOL_CASE_FIRST, &status);
        if (U_FAILURE(status)) {
            ReportInternalError(cx);
            return false;
        }
        if (caseFirst != UCOL_UPPER_FIRST)
            continue;
        MOZ_ASSERT(size >= 0);
        locale = Atomize(cx, rawLocale, size_t(size));
        if (!locale)
            return false;
        LocaleHasher::Lookup lookup(locale);
        LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup);
        // ICU shouldn't report any duplicate locales, but if it does, just
        // ignore the duplicated locale.
        if (!p && !upperCaseFirstLocales.add(p, locale)) {
            ReportOutOfMemory(cx);
            return false;
        }
    }
    MOZ_ASSERT(!upperCaseFirstInitialized,
               "ensureUpperCaseFirstLocales is neither reentrant nor thread-safe");
    upperCaseFirstInitialized = true;
    return true;
 }
 bool
 js::intl::SharedIntlData::isUpperCaseFirst(JSContext* cx, HandleString locale, bool* isUpperFirst)
 {
    if (!ensureUpperCaseFirstLocales(cx))
        return false;
    RootedLinearString localeLinear(cx, locale->ensureLinear(cx));
    if (!localeLinear)
        return false;
    LocaleHasher::Lookup lookup(localeLinear);
    *isUpperFirst = upperCaseFirstLocales.has(lookup);
    return true;
 }
 void
 js::intl::SharedIntlData::destroyInstance()
 {
    availableTimeZones.finish();
    ianaZonesTreatedAsLinksByICU.finish();
    ianaLinksCanonicalizedDifferentlyByICU.finish();
    upperCaseFirstLocales.finish();
 }
 void
 js::intl::SharedIntlData::trace(JSTracer* trc)
 {
    // Atoms are always tenured.
    if (!JS::CurrentThreadIsHeapMinorCollecting()) {
        availableTimeZones.trace(trc);
        ianaZonesTreatedAsLinksByICU.trace(trc);
        ianaLinksCanonicalizedDifferentlyByICU.trace(trc);
        upperCaseFirstLocales.trace(trc);
    }
 }
 size_t
 js::intl::SharedIntlData::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const
 {
    return availableTimeZones.sizeOfExcludingThis(mallocSizeOf) +
           ianaZonesTreatedAsLinksByICU.sizeOfExcludingThis(mallocSizeOf) +
           ianaLinksCanonicalizedDifferentlyByICU.sizeOfExcludingThis(mallocSizeOf) +
           upperCaseFirstLocales.sizeOfExcludingThis(mallocSizeOf);
 }
--- a/js/src/builtin/intl/SharedIntlData.h
+++ b/js/src/builtin/intl/SharedIntlData.h
@ -0,0 +1,221 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set ts=8 sts=4 et sw=4 tw=99:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #ifndef builtin_intl_SharedIntlData_h
 #define builtin_intl_SharedIntlData_h
 #include "mozilla/MemoryReporting.h"
 #include <stddef.h>
 #include "jsalloc.h"
 #include "js/CharacterEncoding.h"
 #include "js/GCAPI.h"
 #include "js/GCHashTable.h"
 #include "js/RootingAPI.h"
 #include "js/Utility.h"
 #include "vm/String.h"
 namespace js {
 namespace intl {
 /**
 * Stores Intl data which can be shared across compartments (but not contexts).
 *
 * Used for data which is expensive when computed repeatedly or is not
 * available through ICU.
 */
 class SharedIntlData
 {
    struct LinearStringLookup
    {
        union {
            const JS::Latin1Char* latin1Chars;
            const char16_t* twoByteChars;
        };
        bool isLatin1;
        size_t length;
        JS::AutoCheckCannotGC nogc;
        HashNumber hash = 0;
        explicit LinearStringLookup(JSLinearString* string)
          : isLatin1(string->hasLatin1Chars()), length(string->length())
        {
            if (isLatin1)
                latin1Chars = string->latin1Chars(nogc);
            else
                twoByteChars = string->twoByteChars(nogc);
        }
    };
  private:
    /**
     * Information tracking the set of the supported time zone names, derived
     * from the IANA time zone database <https://www.iana.org/time-zones>.
     *
     * There are two kinds of IANA time zone names: Zone and Link (denoted as
     * such in database source files). Zone names are the canonical, preferred
     * name for a time zone, e.g. Asia/Kolkata. Link names simply refer to
     * target Zone names for their meaning, e.g. Asia/Calcutta targets
     * Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a
     * sense of deprecation: some Link names also exist partly for convenience,
     * e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC.
     *
     * Two data sources determine the time zone names we support: those ICU
     * supports and IANA's zone information.
     *
     * Unfortunately the names ICU and IANA support, and their Link
     * relationships from name to target, aren't identical, so we can't simply
     * implicitly trust ICU's name handling. We must perform various
     * preprocessing of user-provided zone names and post-processing of
     * ICU-provided zone names to implement ECMA-402's IANA-consistent behavior.
     *
     * Also see <https://ssl.icu-project.org/trac/ticket/12044> and
     * <http://unicode.org/cldr/trac/ticket/9892>.
     */
    using TimeZoneName = JSAtom*;
    struct TimeZoneHasher
    {
        struct Lookup : LinearStringLookup
        {
            explicit Lookup(JSLinearString* timeZone);
        };
        static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
        static bool match(TimeZoneName key, const Lookup& lookup);
    };
    using TimeZoneSet = GCHashSet<TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
    using TimeZoneMap = GCHashMap<TimeZoneName, TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
    /**
     * As a threshold matter, available time zones are those time zones ICU
     * supports, via ucal_openTimeZones. But ICU supports additional non-IANA
     * time zones described in intl/icu/source/tools/tzcode/icuzones (listed in
     * IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards
     * compatibility purposes. This set consists of ICU's supported time zones,
     * minus all backwards-compatibility time zones.
     */
    TimeZoneSet availableTimeZones;
    /**
     * IANA treats some time zone names as Zones, that ICU instead treats as
     * Links. For example, IANA considers "America/Indiana/Indianapolis" to be
     * a Zone and "America/Fort_Wayne" a Link that targets it, but ICU
     * considers the former a Link that targets "America/Indianapolis" (which
     * IANA treats as a Link).
     *
     * ECMA-402 requires that we respect IANA data, so if we're asked to
     * canonicalize a time zone name in this set, we must *not* return ICU's
     * canonicalization.
     */
    TimeZoneSet ianaZonesTreatedAsLinksByICU;
    /**
     * IANA treats some time zone names as Links to one target, that ICU
     * instead treats as either Zones, or Links to different targets. An
     * example of the former is "Asia/Calcutta, which IANA assigns the target
     * "Asia/Kolkata" but ICU considers its own Zone. An example of the latter
     * is "America/Virgin", which IANA assigns the target
     * "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas".
     *
     * ECMA-402 requires that we respect IANA data, so if we're asked to
     * canonicalize a time zone name that's a key in this map, we *must* return
     * the corresponding value and *must not* return ICU's canonicalization.
     */
    TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU;
    bool timeZoneDataInitialized = false;
    /**
     * Precomputes the available time zone names, because it's too expensive to
     * call ucal_openTimeZones() repeatedly.
     */
    bool ensureTimeZones(JSContext* cx);
  public:
    /**
     * Returns the validated time zone name in |result|. If the input time zone
     * isn't a valid IANA time zone name, |result| remains unchanged.
     */
    bool validateTimeZoneName(JSContext* cx, JS::Handle<JSString*> timeZone,
                              JS::MutableHandle<JSAtom*> result);
    /**
     * Returns the canonical time zone name in |result|. If no canonical name
     * was found, |result| remains unchanged.
     *
     * This method only handles time zones which are canonicalized differently
     * by ICU when compared to IANA.
     */
    bool tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx, JS::Handle<JSString*> timeZone,
                                                   JS::MutableHandle<JSAtom*> result);
  private:
    /**
     * The case first parameter (BCP47 key "kf") allows to switch the order of
     * upper- and lower-case characters. ICU doesn't directly provide an API
     * to query the default case first value of a given locale, but instead
     * requires to instantiate a collator object and then query the case first
     * attribute (UCOL_CASE_FIRST).
     * To avoid instantiating an additional collator object whenever we need
     * to retrieve the default case first value of a specific locale, we
     * compute the default case first value for every supported locale only
     * once and then keep a list of all locales which don't use the default
     * case first setting.
     * There is almost no difference between lower-case first and when case
     * first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to
     * track locales which use upper-case first as their default setting.
     */
    using Locale = JSAtom*;
    struct LocaleHasher
    {
        struct Lookup : LinearStringLookup
        {
            explicit Lookup(JSLinearString* locale);
        };
        static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
        static bool match(Locale key, const Lookup& lookup);
    };
    using LocaleSet = GCHashSet<Locale, LocaleHasher, SystemAllocPolicy>;
    LocaleSet upperCaseFirstLocales;
    bool upperCaseFirstInitialized = false;
    /**
     * Precomputes the available locales which use upper-case first sorting.
     */
    bool ensureUpperCaseFirstLocales(JSContext* cx);
  public:
    /**
     * Sets |isUpperFirst| to true if |locale| sorts upper-case characters
     * before lower-case characters.
     */
    bool isUpperCaseFirst(JSContext* cx, JS::Handle<JSString*> locale, bool* isUpperFirst);
  public:
    void destroyInstance();
    void trace(JSTracer* trc);
    size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
 };
 } // namespace intl
 } // namespace js
 #endif /* builtin_intl_SharedIntlData_h */
--- a/js/src/moz.build
+++ b/js/src/moz.build
@ -163,6 +163,7 @@ UNIFIED_SOURCES += [
    'builtin/intl/Collator.cpp',
    'builtin/intl/CommonFunctions.cpp',
    'builtin/intl/NumberFormat.cpp',
    'builtin/intl/SharedIntlData.cpp',
    'builtin/MapObject.cpp',
    'builtin/ModuleObject.cpp',
    'builtin/Object.cpp',
--- a/js/src/vm/Runtime.h
+++ b/js/src/vm/Runtime.h
@ -24,7 +24,7 @@
 #include "jsscript.h"
 #include "builtin/AtomicsObject.h"
-#include "builtin/Intl.h"
+#include "builtin/intl/SharedIntlData.h"
 #include "builtin/Promise.h"
 #include "frontend/NameCollections.h"
 #include "gc/GCRuntime.h"
@ -860,7 +860,7 @@ struct JSRuntime : public js::MallocProvider<JSRuntime>
    js::WriteOnceData<js::WellKnownSymbols*> wellKnownSymbols;
    /* Shared Intl data for this runtime. */
-    js::ActiveThreadData<js::SharedIntlData> sharedIntlData;
+    js::ActiveThreadData<js::intl::SharedIntlData> sharedIntlData;
    void traceSharedIntlData(JSTracer* trc);