зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1431957 - Move SharedIntlData into its own builtin/intl/SharedIntlData.* files so the world doesn't have to import all shared Intl functionality. r=anba
--HG-- extra : rebase_source : 61e7615c8bb172aa8fe4fe8a4d3e62458410b0c8
This commit is contained in:
Родитель
b73e7420db
Коммит
77349d1f5b
|
@ -33,7 +33,7 @@
|
||||||
#include "builtin/intl/ICUStubs.h"
|
#include "builtin/intl/ICUStubs.h"
|
||||||
#include "builtin/intl/NumberFormat.h"
|
#include "builtin/intl/NumberFormat.h"
|
||||||
#include "builtin/intl/ScopedICUObject.h"
|
#include "builtin/intl/ScopedICUObject.h"
|
||||||
#include "builtin/IntlTimeZoneData.h"
|
#include "builtin/intl/SharedIntlData.h"
|
||||||
#include "ds/Sort.h"
|
#include "ds/Sort.h"
|
||||||
#include "gc/FreeOp.h"
|
#include "gc/FreeOp.h"
|
||||||
#include "js/Date.h"
|
#include "js/Date.h"
|
||||||
|
@ -66,6 +66,7 @@ using js::intl::DateTimeFormatOptions;
|
||||||
using js::intl::GetAvailableLocales;
|
using js::intl::GetAvailableLocales;
|
||||||
using js::intl::IcuLocale;
|
using js::intl::IcuLocale;
|
||||||
using js::intl::INITIAL_CHAR_BUFFER_SIZE;
|
using js::intl::INITIAL_CHAR_BUFFER_SIZE;
|
||||||
|
using js::intl::SharedIntlData;
|
||||||
using js::intl::StringsAreEqual;
|
using js::intl::StringsAreEqual;
|
||||||
|
|
||||||
/******************** DateTimeFormat ********************/
|
/******************** DateTimeFormat ********************/
|
||||||
|
@ -385,280 +386,6 @@ js::intl_defaultCalendar(JSContext* cx, unsigned argc, Value* vp)
|
||||||
return DefaultCalendar(cx, locale, args.rval());
|
return DefaultCalendar(cx, locale, args.rval());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Char>
|
|
||||||
static constexpr Char
|
|
||||||
ToUpperASCII(Char c)
|
|
||||||
{
|
|
||||||
return ('a' <= c && c <= 'z')
|
|
||||||
? (c & ~0x20)
|
|
||||||
: c;
|
|
||||||
}
|
|
||||||
|
|
||||||
static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly");
|
|
||||||
static_assert(ToUpperASCII('m') == 'M', "verifying 'm' uppercases correctly");
|
|
||||||
static_assert(ToUpperASCII('z') == 'Z', "verifying 'z' uppercases correctly");
|
|
||||||
static_assert(ToUpperASCII(u'a') == u'A', "verifying u'a' uppercases correctly");
|
|
||||||
static_assert(ToUpperASCII(u'k') == u'K', "verifying u'k' uppercases correctly");
|
|
||||||
static_assert(ToUpperASCII(u'z') == u'Z', "verifying u'z' uppercases correctly");
|
|
||||||
|
|
||||||
template<typename Char1, typename Char2>
|
|
||||||
static bool
|
|
||||||
EqualCharsIgnoreCaseASCII(const Char1* s1, const Char2* s2, size_t len)
|
|
||||||
{
|
|
||||||
for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) {
|
|
||||||
if (ToUpperASCII(*s1) != ToUpperASCII(*s2))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename Char>
|
|
||||||
static js::HashNumber
|
|
||||||
HashStringIgnoreCaseASCII(const Char* s, size_t length)
|
|
||||||
{
|
|
||||||
uint32_t hash = 0;
|
|
||||||
for (size_t i = 0; i < length; i++)
|
|
||||||
hash = mozilla::AddToHash(hash, ToUpperASCII(s[i]));
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
js::SharedIntlData::TimeZoneHasher::Lookup::Lookup(JSLinearString* timeZone)
|
|
||||||
: js::SharedIntlData::LinearStringLookup(timeZone)
|
|
||||||
{
|
|
||||||
if (isLatin1)
|
|
||||||
hash = HashStringIgnoreCaseASCII(latin1Chars, length);
|
|
||||||
else
|
|
||||||
hash = HashStringIgnoreCaseASCII(twoByteChars, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
js::SharedIntlData::TimeZoneHasher::match(TimeZoneName key, const Lookup& lookup)
|
|
||||||
{
|
|
||||||
if (key->length() != lookup.length)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Compare time zone names ignoring ASCII case differences.
|
|
||||||
if (key->hasLatin1Chars()) {
|
|
||||||
const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
|
|
||||||
if (lookup.isLatin1)
|
|
||||||
return EqualCharsIgnoreCaseASCII(keyChars, lookup.latin1Chars, lookup.length);
|
|
||||||
return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
const char16_t* keyChars = key->twoByteChars(lookup.nogc);
|
|
||||||
if (lookup.isLatin1)
|
|
||||||
return EqualCharsIgnoreCaseASCII(lookup.latin1Chars, keyChars, lookup.length);
|
|
||||||
return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
|
||||||
IsLegacyICUTimeZone(const char* timeZone)
|
|
||||||
{
|
|
||||||
for (const auto& legacyTimeZone : js::timezone::legacyICUTimeZones) {
|
|
||||||
if (StringsAreEqual(timeZone, legacyTimeZone))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
js::SharedIntlData::ensureTimeZones(JSContext* cx)
|
|
||||||
{
|
|
||||||
if (timeZoneDataInitialized)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// If ensureTimeZones() was called previously, but didn't complete due to
|
|
||||||
// OOM, clear all sets/maps and start from scratch.
|
|
||||||
if (availableTimeZones.initialized())
|
|
||||||
availableTimeZones.finish();
|
|
||||||
if (!availableTimeZones.init()) {
|
|
||||||
ReportOutOfMemory(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
|
||||||
UEnumeration* values = ucal_openTimeZones(&status);
|
|
||||||
if (U_FAILURE(status)) {
|
|
||||||
intl::ReportInternalError(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ScopedICUObject<UEnumeration, uenum_close> toClose(values);
|
|
||||||
|
|
||||||
RootedAtom timeZone(cx);
|
|
||||||
while (true) {
|
|
||||||
int32_t size;
|
|
||||||
const char* rawTimeZone = uenum_next(values, &size, &status);
|
|
||||||
if (U_FAILURE(status)) {
|
|
||||||
intl::ReportInternalError(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rawTimeZone == nullptr)
|
|
||||||
break;
|
|
||||||
|
|
||||||
// Skip legacy ICU time zone names.
|
|
||||||
if (IsLegacyICUTimeZone(rawTimeZone))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
MOZ_ASSERT(size >= 0);
|
|
||||||
timeZone = Atomize(cx, rawTimeZone, size_t(size));
|
|
||||||
if (!timeZone)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
TimeZoneHasher::Lookup lookup(timeZone);
|
|
||||||
TimeZoneSet::AddPtr p = availableTimeZones.lookupForAdd(lookup);
|
|
||||||
|
|
||||||
// ICU shouldn't report any duplicate time zone names, but if it does,
|
|
||||||
// just ignore the duplicate name.
|
|
||||||
if (!p && !availableTimeZones.add(p, timeZone)) {
|
|
||||||
ReportOutOfMemory(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ianaZonesTreatedAsLinksByICU.initialized())
|
|
||||||
ianaZonesTreatedAsLinksByICU.finish();
|
|
||||||
if (!ianaZonesTreatedAsLinksByICU.init()) {
|
|
||||||
ReportOutOfMemory(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const char* rawTimeZone : timezone::ianaZonesTreatedAsLinksByICU) {
|
|
||||||
MOZ_ASSERT(rawTimeZone != nullptr);
|
|
||||||
timeZone = Atomize(cx, rawTimeZone, strlen(rawTimeZone));
|
|
||||||
if (!timeZone)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
TimeZoneHasher::Lookup lookup(timeZone);
|
|
||||||
TimeZoneSet::AddPtr p = ianaZonesTreatedAsLinksByICU.lookupForAdd(lookup);
|
|
||||||
MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaZonesTreatedAsLinksByICU");
|
|
||||||
|
|
||||||
if (!ianaZonesTreatedAsLinksByICU.add(p, timeZone)) {
|
|
||||||
ReportOutOfMemory(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ianaLinksCanonicalizedDifferentlyByICU.initialized())
|
|
||||||
ianaLinksCanonicalizedDifferentlyByICU.finish();
|
|
||||||
if (!ianaLinksCanonicalizedDifferentlyByICU.init()) {
|
|
||||||
ReportOutOfMemory(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
RootedAtom linkName(cx);
|
|
||||||
RootedAtom& target = timeZone;
|
|
||||||
for (const auto& linkAndTarget : timezone::ianaLinksCanonicalizedDifferentlyByICU) {
|
|
||||||
const char* rawLinkName = linkAndTarget.link;
|
|
||||||
const char* rawTarget = linkAndTarget.target;
|
|
||||||
|
|
||||||
MOZ_ASSERT(rawLinkName != nullptr);
|
|
||||||
linkName = Atomize(cx, rawLinkName, strlen(rawLinkName));
|
|
||||||
if (!linkName)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
MOZ_ASSERT(rawTarget != nullptr);
|
|
||||||
target = Atomize(cx, rawTarget, strlen(rawTarget));
|
|
||||||
if (!target)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
TimeZoneHasher::Lookup lookup(linkName);
|
|
||||||
TimeZoneMap::AddPtr p = ianaLinksCanonicalizedDifferentlyByICU.lookupForAdd(lookup);
|
|
||||||
MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaLinksCanonicalizedDifferentlyByICU");
|
|
||||||
|
|
||||||
if (!ianaLinksCanonicalizedDifferentlyByICU.add(p, linkName, target)) {
|
|
||||||
ReportOutOfMemory(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MOZ_ASSERT(!timeZoneDataInitialized, "ensureTimeZones is neither reentrant nor thread-safe");
|
|
||||||
timeZoneDataInitialized = true;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
js::SharedIntlData::validateTimeZoneName(JSContext* cx, HandleString timeZone,
|
|
||||||
MutableHandleAtom result)
|
|
||||||
{
|
|
||||||
if (!ensureTimeZones(cx))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx));
|
|
||||||
if (!timeZoneLinear)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
TimeZoneHasher::Lookup lookup(timeZoneLinear);
|
|
||||||
if (TimeZoneSet::Ptr p = availableTimeZones.lookup(lookup))
|
|
||||||
result.set(*p);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
js::SharedIntlData::tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx, HandleString timeZone,
|
|
||||||
MutableHandleAtom result)
|
|
||||||
{
|
|
||||||
if (!ensureTimeZones(cx))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx));
|
|
||||||
if (!timeZoneLinear)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
TimeZoneHasher::Lookup lookup(timeZoneLinear);
|
|
||||||
MOZ_ASSERT(availableTimeZones.has(lookup), "Invalid time zone name");
|
|
||||||
|
|
||||||
if (TimeZoneMap::Ptr p = ianaLinksCanonicalizedDifferentlyByICU.lookup(lookup)) {
|
|
||||||
// The effectively supported time zones aren't known at compile time,
|
|
||||||
// when
|
|
||||||
// 1. SpiderMonkey was compiled with "--with-system-icu".
|
|
||||||
// 2. ICU's dynamic time zone data loading feature was used.
|
|
||||||
// (ICU supports loading time zone files at runtime through the
|
|
||||||
// ICU_TIMEZONE_FILES_DIR environment variable.)
|
|
||||||
// Ensure ICU supports the new target zone before applying the update.
|
|
||||||
TimeZoneName targetTimeZone = p->value();
|
|
||||||
TimeZoneHasher::Lookup targetLookup(targetTimeZone);
|
|
||||||
if (availableTimeZones.has(targetLookup))
|
|
||||||
result.set(targetTimeZone);
|
|
||||||
} else if (TimeZoneSet::Ptr p = ianaZonesTreatedAsLinksByICU.lookup(lookup)) {
|
|
||||||
result.set(*p);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
js::SharedIntlData::destroyInstance()
|
|
||||||
{
|
|
||||||
availableTimeZones.finish();
|
|
||||||
ianaZonesTreatedAsLinksByICU.finish();
|
|
||||||
ianaLinksCanonicalizedDifferentlyByICU.finish();
|
|
||||||
upperCaseFirstLocales.finish();
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
js::SharedIntlData::trace(JSTracer* trc)
|
|
||||||
{
|
|
||||||
// Atoms are always tenured.
|
|
||||||
if (!JS::CurrentThreadIsHeapMinorCollecting()) {
|
|
||||||
availableTimeZones.trace(trc);
|
|
||||||
ianaZonesTreatedAsLinksByICU.trace(trc);
|
|
||||||
ianaLinksCanonicalizedDifferentlyByICU.trace(trc);
|
|
||||||
upperCaseFirstLocales.trace(trc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
|
||||||
js::SharedIntlData::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const
|
|
||||||
{
|
|
||||||
return availableTimeZones.sizeOfExcludingThis(mallocSizeOf) +
|
|
||||||
ianaZonesTreatedAsLinksByICU.sizeOfExcludingThis(mallocSizeOf) +
|
|
||||||
ianaLinksCanonicalizedDifferentlyByICU.sizeOfExcludingThis(mallocSizeOf) +
|
|
||||||
upperCaseFirstLocales.sizeOfExcludingThis(mallocSizeOf);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
js::intl_IsValidTimeZoneName(JSContext* cx, unsigned argc, Value* vp)
|
js::intl_IsValidTimeZoneName(JSContext* cx, unsigned argc, Value* vp)
|
||||||
{
|
{
|
||||||
|
|
|
@ -36,204 +36,6 @@ class FreeOp;
|
||||||
extern JSObject*
|
extern JSObject*
|
||||||
InitIntlClass(JSContext* cx, HandleObject obj);
|
InitIntlClass(JSContext* cx, HandleObject obj);
|
||||||
|
|
||||||
/**
|
|
||||||
* Stores Intl data which can be shared across compartments (but not contexts).
|
|
||||||
*
|
|
||||||
* Used for data which is expensive when computed repeatedly or is not
|
|
||||||
* available through ICU.
|
|
||||||
*/
|
|
||||||
class SharedIntlData
|
|
||||||
{
|
|
||||||
struct LinearStringLookup
|
|
||||||
{
|
|
||||||
union {
|
|
||||||
const JS::Latin1Char* latin1Chars;
|
|
||||||
const char16_t* twoByteChars;
|
|
||||||
};
|
|
||||||
bool isLatin1;
|
|
||||||
size_t length;
|
|
||||||
JS::AutoCheckCannotGC nogc;
|
|
||||||
HashNumber hash = 0;
|
|
||||||
|
|
||||||
explicit LinearStringLookup(JSLinearString* string)
|
|
||||||
: isLatin1(string->hasLatin1Chars()), length(string->length())
|
|
||||||
{
|
|
||||||
if (isLatin1)
|
|
||||||
latin1Chars = string->latin1Chars(nogc);
|
|
||||||
else
|
|
||||||
twoByteChars = string->twoByteChars(nogc);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
private:
|
|
||||||
/**
|
|
||||||
* Information tracking the set of the supported time zone names, derived
|
|
||||||
* from the IANA time zone database <https://www.iana.org/time-zones>.
|
|
||||||
*
|
|
||||||
* There are two kinds of IANA time zone names: Zone and Link (denoted as
|
|
||||||
* such in database source files). Zone names are the canonical, preferred
|
|
||||||
* name for a time zone, e.g. Asia/Kolkata. Link names simply refer to
|
|
||||||
* target Zone names for their meaning, e.g. Asia/Calcutta targets
|
|
||||||
* Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a
|
|
||||||
* sense of deprecation: some Link names also exist partly for convenience,
|
|
||||||
* e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC.
|
|
||||||
*
|
|
||||||
* Two data sources determine the time zone names we support: those ICU
|
|
||||||
* supports and IANA's zone information.
|
|
||||||
*
|
|
||||||
* Unfortunately the names ICU and IANA support, and their Link
|
|
||||||
* relationships from name to target, aren't identical, so we can't simply
|
|
||||||
* implicitly trust ICU's name handling. We must perform various
|
|
||||||
* preprocessing of user-provided zone names and post-processing of
|
|
||||||
* ICU-provided zone names to implement ECMA-402's IANA-consistent behavior.
|
|
||||||
*
|
|
||||||
* Also see <https://ssl.icu-project.org/trac/ticket/12044> and
|
|
||||||
* <http://unicode.org/cldr/trac/ticket/9892>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
using TimeZoneName = JSAtom*;
|
|
||||||
|
|
||||||
struct TimeZoneHasher
|
|
||||||
{
|
|
||||||
struct Lookup : LinearStringLookup
|
|
||||||
{
|
|
||||||
explicit Lookup(JSLinearString* timeZone);
|
|
||||||
};
|
|
||||||
|
|
||||||
static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
|
|
||||||
static bool match(TimeZoneName key, const Lookup& lookup);
|
|
||||||
};
|
|
||||||
|
|
||||||
using TimeZoneSet = js::GCHashSet<TimeZoneName,
|
|
||||||
TimeZoneHasher,
|
|
||||||
js::SystemAllocPolicy>;
|
|
||||||
|
|
||||||
using TimeZoneMap = js::GCHashMap<TimeZoneName,
|
|
||||||
TimeZoneName,
|
|
||||||
TimeZoneHasher,
|
|
||||||
js::SystemAllocPolicy>;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* As a threshold matter, available time zones are those time zones ICU
|
|
||||||
* supports, via ucal_openTimeZones. But ICU supports additional non-IANA
|
|
||||||
* time zones described in intl/icu/source/tools/tzcode/icuzones (listed in
|
|
||||||
* IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards
|
|
||||||
* compatibility purposes. This set consists of ICU's supported time zones,
|
|
||||||
* minus all backwards-compatibility time zones.
|
|
||||||
*/
|
|
||||||
TimeZoneSet availableTimeZones;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* IANA treats some time zone names as Zones, that ICU instead treats as
|
|
||||||
* Links. For example, IANA considers "America/Indiana/Indianapolis" to be
|
|
||||||
* a Zone and "America/Fort_Wayne" a Link that targets it, but ICU
|
|
||||||
* considers the former a Link that targets "America/Indianapolis" (which
|
|
||||||
* IANA treats as a Link).
|
|
||||||
*
|
|
||||||
* ECMA-402 requires that we respect IANA data, so if we're asked to
|
|
||||||
* canonicalize a time zone name in this set, we must *not* return ICU's
|
|
||||||
* canonicalization.
|
|
||||||
*/
|
|
||||||
TimeZoneSet ianaZonesTreatedAsLinksByICU;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* IANA treats some time zone names as Links to one target, that ICU
|
|
||||||
* instead treats as either Zones, or Links to different targets. An
|
|
||||||
* example of the former is "Asia/Calcutta, which IANA assigns the target
|
|
||||||
* "Asia/Kolkata" but ICU considers its own Zone. An example of the latter
|
|
||||||
* is "America/Virgin", which IANA assigns the target
|
|
||||||
* "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas".
|
|
||||||
*
|
|
||||||
* ECMA-402 requires that we respect IANA data, so if we're asked to
|
|
||||||
* canonicalize a time zone name that's a key in this map, we *must* return
|
|
||||||
* the corresponding value and *must not* return ICU's canonicalization.
|
|
||||||
*/
|
|
||||||
TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU;
|
|
||||||
|
|
||||||
bool timeZoneDataInitialized = false;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Precomputes the available time zone names, because it's too expensive to
|
|
||||||
* call ucal_openTimeZones() repeatedly.
|
|
||||||
*/
|
|
||||||
bool ensureTimeZones(JSContext* cx);
|
|
||||||
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Returns the validated time zone name in |result|. If the input time zone
|
|
||||||
* isn't a valid IANA time zone name, |result| remains unchanged.
|
|
||||||
*/
|
|
||||||
bool validateTimeZoneName(JSContext* cx, JS::HandleString timeZone,
|
|
||||||
MutableHandleAtom result);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the canonical time zone name in |result|. If no canonical name
|
|
||||||
* was found, |result| remains unchanged.
|
|
||||||
*
|
|
||||||
* This method only handles time zones which are canonicalized differently
|
|
||||||
* by ICU when compared to IANA.
|
|
||||||
*/
|
|
||||||
bool tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx, JS::HandleString timeZone,
|
|
||||||
MutableHandleAtom result);
|
|
||||||
|
|
||||||
private:
|
|
||||||
/**
|
|
||||||
* The case first parameter (BCP47 key "kf") allows to switch the order of
|
|
||||||
* upper- and lower-case characters. ICU doesn't directly provide an API
|
|
||||||
* to query the default case first value of a given locale, but instead
|
|
||||||
* requires to instantiate a collator object and then query the case first
|
|
||||||
* attribute (UCOL_CASE_FIRST).
|
|
||||||
* To avoid instantiating an additional collator object whenever we need
|
|
||||||
* to retrieve the default case first value of a specific locale, we
|
|
||||||
* compute the default case first value for every supported locale only
|
|
||||||
* once and then keep a list of all locales which don't use the default
|
|
||||||
* case first setting.
|
|
||||||
* There is almost no difference between lower-case first and when case
|
|
||||||
* first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to
|
|
||||||
* track locales which use upper-case first as their default setting.
|
|
||||||
*/
|
|
||||||
|
|
||||||
using Locale = JSAtom*;
|
|
||||||
|
|
||||||
struct LocaleHasher
|
|
||||||
{
|
|
||||||
struct Lookup : LinearStringLookup
|
|
||||||
{
|
|
||||||
explicit Lookup(JSLinearString* locale);
|
|
||||||
};
|
|
||||||
|
|
||||||
static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
|
|
||||||
static bool match(Locale key, const Lookup& lookup);
|
|
||||||
};
|
|
||||||
|
|
||||||
using LocaleSet = js::GCHashSet<Locale,
|
|
||||||
LocaleHasher,
|
|
||||||
js::SystemAllocPolicy>;
|
|
||||||
|
|
||||||
LocaleSet upperCaseFirstLocales;
|
|
||||||
|
|
||||||
bool upperCaseFirstInitialized = false;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Precomputes the available locales which use upper-case first sorting.
|
|
||||||
*/
|
|
||||||
bool ensureUpperCaseFirstLocales(JSContext* cx);
|
|
||||||
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Sets |isUpperFirst| to true if |locale| sorts upper-case characters
|
|
||||||
* before lower-case characters.
|
|
||||||
*/
|
|
||||||
bool isUpperCaseFirst(JSContext* cx, JS::HandleString locale, bool* isUpperFirst);
|
|
||||||
|
|
||||||
public:
|
|
||||||
void destroyInstance();
|
|
||||||
|
|
||||||
void trace(JSTracer* trc);
|
|
||||||
|
|
||||||
size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following functions are for use by self-hosted code.
|
* The following functions are for use by self-hosted code.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "builtin/intl/CommonFunctions.h"
|
#include "builtin/intl/CommonFunctions.h"
|
||||||
#include "builtin/intl/ICUStubs.h"
|
#include "builtin/intl/ICUStubs.h"
|
||||||
#include "builtin/intl/ScopedICUObject.h"
|
#include "builtin/intl/ScopedICUObject.h"
|
||||||
|
#include "builtin/intl/SharedIntlData.h"
|
||||||
#include "gc/FreeOp.h"
|
#include "gc/FreeOp.h"
|
||||||
#include "js/TypeDecls.h"
|
#include "js/TypeDecls.h"
|
||||||
#include "vm/GlobalObject.h"
|
#include "vm/GlobalObject.h"
|
||||||
|
@ -29,6 +30,7 @@ using namespace js;
|
||||||
using js::intl::GetAvailableLocales;
|
using js::intl::GetAvailableLocales;
|
||||||
using js::intl::IcuLocale;
|
using js::intl::IcuLocale;
|
||||||
using js::intl::ReportInternalError;
|
using js::intl::ReportInternalError;
|
||||||
|
using js::intl::SharedIntlData;
|
||||||
using js::intl::StringsAreEqual;
|
using js::intl::StringsAreEqual;
|
||||||
|
|
||||||
const ClassOps CollatorObject::classOps_ = {
|
const ClassOps CollatorObject::classOps_ = {
|
||||||
|
@ -464,124 +466,6 @@ js::intl_CompareStrings(JSContext* cx, unsigned argc, Value* vp)
|
||||||
return intl_CompareStrings(cx, coll, str1, str2, args.rval());
|
return intl_CompareStrings(cx, coll, str1, str2, args.rval());
|
||||||
}
|
}
|
||||||
|
|
||||||
js::SharedIntlData::LocaleHasher::Lookup::Lookup(JSLinearString* locale)
|
|
||||||
: js::SharedIntlData::LinearStringLookup(locale)
|
|
||||||
{
|
|
||||||
if (isLatin1)
|
|
||||||
hash = mozilla::HashString(latin1Chars, length);
|
|
||||||
else
|
|
||||||
hash = mozilla::HashString(twoByteChars, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
js::SharedIntlData::LocaleHasher::match(Locale key, const Lookup& lookup)
|
|
||||||
{
|
|
||||||
if (key->length() != lookup.length)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (key->hasLatin1Chars()) {
|
|
||||||
const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
|
|
||||||
if (lookup.isLatin1)
|
|
||||||
return EqualChars(keyChars, lookup.latin1Chars, lookup.length);
|
|
||||||
return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
const char16_t* keyChars = key->twoByteChars(lookup.nogc);
|
|
||||||
if (lookup.isLatin1)
|
|
||||||
return EqualChars(lookup.latin1Chars, keyChars, lookup.length);
|
|
||||||
return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
js::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx)
|
|
||||||
{
|
|
||||||
if (upperCaseFirstInitialized)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// If ensureUpperCaseFirstLocales() was called previously, but didn't
|
|
||||||
// complete due to OOM, clear all data and start from scratch.
|
|
||||||
if (upperCaseFirstLocales.initialized())
|
|
||||||
upperCaseFirstLocales.finish();
|
|
||||||
if (!upperCaseFirstLocales.init()) {
|
|
||||||
ReportOutOfMemory(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
|
||||||
UEnumeration* available = ucol_openAvailableLocales(&status);
|
|
||||||
if (U_FAILURE(status)) {
|
|
||||||
ReportInternalError(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ScopedICUObject<UEnumeration, uenum_close> toClose(available);
|
|
||||||
|
|
||||||
RootedAtom locale(cx);
|
|
||||||
while (true) {
|
|
||||||
int32_t size;
|
|
||||||
const char* rawLocale = uenum_next(available, &size, &status);
|
|
||||||
if (U_FAILURE(status)) {
|
|
||||||
ReportInternalError(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rawLocale == nullptr)
|
|
||||||
break;
|
|
||||||
|
|
||||||
UCollator* collator = ucol_open(rawLocale, &status);
|
|
||||||
if (U_FAILURE(status)) {
|
|
||||||
ReportInternalError(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ScopedICUObject<UCollator, ucol_close> toCloseCollator(collator);
|
|
||||||
|
|
||||||
UColAttributeValue caseFirst = ucol_getAttribute(collator, UCOL_CASE_FIRST, &status);
|
|
||||||
if (U_FAILURE(status)) {
|
|
||||||
ReportInternalError(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (caseFirst != UCOL_UPPER_FIRST)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
MOZ_ASSERT(size >= 0);
|
|
||||||
locale = Atomize(cx, rawLocale, size_t(size));
|
|
||||||
if (!locale)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
LocaleHasher::Lookup lookup(locale);
|
|
||||||
LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup);
|
|
||||||
|
|
||||||
// ICU shouldn't report any duplicate locales, but if it does, just
|
|
||||||
// ignore the duplicated locale.
|
|
||||||
if (!p && !upperCaseFirstLocales.add(p, locale)) {
|
|
||||||
ReportOutOfMemory(cx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MOZ_ASSERT(!upperCaseFirstInitialized,
|
|
||||||
"ensureUpperCaseFirstLocales is neither reentrant nor thread-safe");
|
|
||||||
upperCaseFirstInitialized = true;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
js::SharedIntlData::isUpperCaseFirst(JSContext* cx, HandleString locale, bool* isUpperFirst)
|
|
||||||
{
|
|
||||||
if (!ensureUpperCaseFirstLocales(cx))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
RootedLinearString localeLinear(cx, locale->ensureLinear(cx));
|
|
||||||
if (!localeLinear)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
LocaleHasher::Lookup lookup(localeLinear);
|
|
||||||
*isUpperFirst = upperCaseFirstLocales.has(lookup);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
js::intl_isUpperCaseFirst(JSContext* cx, unsigned argc, Value* vp)
|
js::intl_isUpperCaseFirst(JSContext* cx, unsigned argc, Value* vp)
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,419 @@
|
||||||
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||||
|
* vim: set ts=8 sts=4 et sw=4 tw=99:
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
|
/* Runtime-wide Intl data shared across compartments. */
|
||||||
|
|
||||||
|
#include "builtin/intl/SharedIntlData.h"
|
||||||
|
|
||||||
|
#include "mozilla/Assertions.h"
|
||||||
|
#include "mozilla/HashFunctions.h"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "jsatom.h"
|
||||||
|
#include "jsstr.h"
|
||||||
|
|
||||||
|
#include "builtin/intl/CommonFunctions.h"
|
||||||
|
#include "builtin/intl/ICUStubs.h"
|
||||||
|
#include "builtin/intl/ScopedICUObject.h"
|
||||||
|
#include "builtin/IntlTimeZoneData.h"
|
||||||
|
#include "js/Utility.h"
|
||||||
|
|
||||||
|
using js::HashNumber;
|
||||||
|
using js::intl::StringsAreEqual;
|
||||||
|
|
||||||
|
template<typename Char>
|
||||||
|
static constexpr Char
|
||||||
|
ToUpperASCII(Char c)
|
||||||
|
{
|
||||||
|
return ('a' <= c && c <= 'z')
|
||||||
|
? (c & ~0x20)
|
||||||
|
: c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly");
|
||||||
|
static_assert(ToUpperASCII('m') == 'M', "verifying 'm' uppercases correctly");
|
||||||
|
static_assert(ToUpperASCII('z') == 'Z', "verifying 'z' uppercases correctly");
|
||||||
|
static_assert(ToUpperASCII(u'a') == u'A', "verifying u'a' uppercases correctly");
|
||||||
|
static_assert(ToUpperASCII(u'k') == u'K', "verifying u'k' uppercases correctly");
|
||||||
|
static_assert(ToUpperASCII(u'z') == u'Z', "verifying u'z' uppercases correctly");
|
||||||
|
|
||||||
|
template<typename Char>
|
||||||
|
static HashNumber
|
||||||
|
HashStringIgnoreCaseASCII(const Char* s, size_t length)
|
||||||
|
{
|
||||||
|
uint32_t hash = 0;
|
||||||
|
for (size_t i = 0; i < length; i++)
|
||||||
|
hash = mozilla::AddToHash(hash, ToUpperASCII(s[i]));
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
js::intl::SharedIntlData::TimeZoneHasher::Lookup::Lookup(JSLinearString* timeZone)
|
||||||
|
: js::intl::SharedIntlData::LinearStringLookup(timeZone)
|
||||||
|
{
|
||||||
|
if (isLatin1)
|
||||||
|
hash = HashStringIgnoreCaseASCII(latin1Chars, length);
|
||||||
|
else
|
||||||
|
hash = HashStringIgnoreCaseASCII(twoByteChars, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Char1, typename Char2>
|
||||||
|
static bool
|
||||||
|
EqualCharsIgnoreCaseASCII(const Char1* s1, const Char2* s2, size_t len)
|
||||||
|
{
|
||||||
|
for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) {
|
||||||
|
if (ToUpperASCII(*s1) != ToUpperASCII(*s2))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
js::intl::SharedIntlData::TimeZoneHasher::match(TimeZoneName key, const Lookup& lookup)
|
||||||
|
{
|
||||||
|
if (key->length() != lookup.length)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Compare time zone names ignoring ASCII case differences.
|
||||||
|
if (key->hasLatin1Chars()) {
|
||||||
|
const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
|
||||||
|
if (lookup.isLatin1)
|
||||||
|
return EqualCharsIgnoreCaseASCII(keyChars, lookup.latin1Chars, lookup.length);
|
||||||
|
return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char16_t* keyChars = key->twoByteChars(lookup.nogc);
|
||||||
|
if (lookup.isLatin1)
|
||||||
|
return EqualCharsIgnoreCaseASCII(lookup.latin1Chars, keyChars, lookup.length);
|
||||||
|
return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, lookup.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
IsLegacyICUTimeZone(const char* timeZone)
|
||||||
|
{
|
||||||
|
for (const auto& legacyTimeZone : js::timezone::legacyICUTimeZones) {
|
||||||
|
if (StringsAreEqual(timeZone, legacyTimeZone))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
js::intl::SharedIntlData::ensureTimeZones(JSContext* cx)
|
||||||
|
{
|
||||||
|
if (timeZoneDataInitialized)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// If ensureTimeZones() was called previously, but didn't complete due to
|
||||||
|
// OOM, clear all sets/maps and start from scratch.
|
||||||
|
if (availableTimeZones.initialized())
|
||||||
|
availableTimeZones.finish();
|
||||||
|
if (!availableTimeZones.init()) {
|
||||||
|
ReportOutOfMemory(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
UEnumeration* values = ucal_openTimeZones(&status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
ReportInternalError(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ScopedICUObject<UEnumeration, uenum_close> toClose(values);
|
||||||
|
|
||||||
|
RootedAtom timeZone(cx);
|
||||||
|
while (true) {
|
||||||
|
int32_t size;
|
||||||
|
const char* rawTimeZone = uenum_next(values, &size, &status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
ReportInternalError(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rawTimeZone == nullptr)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Skip legacy ICU time zone names.
|
||||||
|
if (IsLegacyICUTimeZone(rawTimeZone))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
MOZ_ASSERT(size >= 0);
|
||||||
|
timeZone = Atomize(cx, rawTimeZone, size_t(size));
|
||||||
|
if (!timeZone)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
TimeZoneHasher::Lookup lookup(timeZone);
|
||||||
|
TimeZoneSet::AddPtr p = availableTimeZones.lookupForAdd(lookup);
|
||||||
|
|
||||||
|
// ICU shouldn't report any duplicate time zone names, but if it does,
|
||||||
|
// just ignore the duplicate name.
|
||||||
|
if (!p && !availableTimeZones.add(p, timeZone)) {
|
||||||
|
ReportOutOfMemory(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ianaZonesTreatedAsLinksByICU.initialized())
|
||||||
|
ianaZonesTreatedAsLinksByICU.finish();
|
||||||
|
if (!ianaZonesTreatedAsLinksByICU.init()) {
|
||||||
|
ReportOutOfMemory(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const char* rawTimeZone : timezone::ianaZonesTreatedAsLinksByICU) {
|
||||||
|
MOZ_ASSERT(rawTimeZone != nullptr);
|
||||||
|
timeZone = Atomize(cx, rawTimeZone, strlen(rawTimeZone));
|
||||||
|
if (!timeZone)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
TimeZoneHasher::Lookup lookup(timeZone);
|
||||||
|
TimeZoneSet::AddPtr p = ianaZonesTreatedAsLinksByICU.lookupForAdd(lookup);
|
||||||
|
MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaZonesTreatedAsLinksByICU");
|
||||||
|
|
||||||
|
if (!ianaZonesTreatedAsLinksByICU.add(p, timeZone)) {
|
||||||
|
ReportOutOfMemory(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ianaLinksCanonicalizedDifferentlyByICU.initialized())
|
||||||
|
ianaLinksCanonicalizedDifferentlyByICU.finish();
|
||||||
|
if (!ianaLinksCanonicalizedDifferentlyByICU.init()) {
|
||||||
|
ReportOutOfMemory(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
RootedAtom linkName(cx);
|
||||||
|
RootedAtom& target = timeZone;
|
||||||
|
for (const auto& linkAndTarget : timezone::ianaLinksCanonicalizedDifferentlyByICU) {
|
||||||
|
const char* rawLinkName = linkAndTarget.link;
|
||||||
|
const char* rawTarget = linkAndTarget.target;
|
||||||
|
|
||||||
|
MOZ_ASSERT(rawLinkName != nullptr);
|
||||||
|
linkName = Atomize(cx, rawLinkName, strlen(rawLinkName));
|
||||||
|
if (!linkName)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
MOZ_ASSERT(rawTarget != nullptr);
|
||||||
|
target = Atomize(cx, rawTarget, strlen(rawTarget));
|
||||||
|
if (!target)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
TimeZoneHasher::Lookup lookup(linkName);
|
||||||
|
TimeZoneMap::AddPtr p = ianaLinksCanonicalizedDifferentlyByICU.lookupForAdd(lookup);
|
||||||
|
MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaLinksCanonicalizedDifferentlyByICU");
|
||||||
|
|
||||||
|
if (!ianaLinksCanonicalizedDifferentlyByICU.add(p, linkName, target)) {
|
||||||
|
ReportOutOfMemory(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MOZ_ASSERT(!timeZoneDataInitialized, "ensureTimeZones is neither reentrant nor thread-safe");
|
||||||
|
timeZoneDataInitialized = true;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
js::intl::SharedIntlData::validateTimeZoneName(JSContext* cx, HandleString timeZone,
|
||||||
|
MutableHandleAtom result)
|
||||||
|
{
|
||||||
|
if (!ensureTimeZones(cx))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx));
|
||||||
|
if (!timeZoneLinear)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
TimeZoneHasher::Lookup lookup(timeZoneLinear);
|
||||||
|
if (TimeZoneSet::Ptr p = availableTimeZones.lookup(lookup))
|
||||||
|
result.set(*p);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
js::intl::SharedIntlData::tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx,
|
||||||
|
HandleString timeZone,
|
||||||
|
MutableHandleAtom result)
|
||||||
|
{
|
||||||
|
if (!ensureTimeZones(cx))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
RootedLinearString timeZoneLinear(cx, timeZone->ensureLinear(cx));
|
||||||
|
if (!timeZoneLinear)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
TimeZoneHasher::Lookup lookup(timeZoneLinear);
|
||||||
|
MOZ_ASSERT(availableTimeZones.has(lookup), "Invalid time zone name");
|
||||||
|
|
||||||
|
if (TimeZoneMap::Ptr p = ianaLinksCanonicalizedDifferentlyByICU.lookup(lookup)) {
|
||||||
|
// The effectively supported time zones aren't known at compile time,
|
||||||
|
// when
|
||||||
|
// 1. SpiderMonkey was compiled with "--with-system-icu".
|
||||||
|
// 2. ICU's dynamic time zone data loading feature was used.
|
||||||
|
// (ICU supports loading time zone files at runtime through the
|
||||||
|
// ICU_TIMEZONE_FILES_DIR environment variable.)
|
||||||
|
// Ensure ICU supports the new target zone before applying the update.
|
||||||
|
TimeZoneName targetTimeZone = p->value();
|
||||||
|
TimeZoneHasher::Lookup targetLookup(targetTimeZone);
|
||||||
|
if (availableTimeZones.has(targetLookup))
|
||||||
|
result.set(targetTimeZone);
|
||||||
|
} else if (TimeZoneSet::Ptr p = ianaZonesTreatedAsLinksByICU.lookup(lookup)) {
|
||||||
|
result.set(*p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(JSLinearString* locale)
|
||||||
|
: js::intl::SharedIntlData::LinearStringLookup(locale)
|
||||||
|
{
|
||||||
|
if (isLatin1)
|
||||||
|
hash = mozilla::HashString(latin1Chars, length);
|
||||||
|
else
|
||||||
|
hash = mozilla::HashString(twoByteChars, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
js::intl::SharedIntlData::LocaleHasher::match(Locale key, const Lookup& lookup)
|
||||||
|
{
|
||||||
|
if (key->length() != lookup.length)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (key->hasLatin1Chars()) {
|
||||||
|
const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
|
||||||
|
if (lookup.isLatin1)
|
||||||
|
return EqualChars(keyChars, lookup.latin1Chars, lookup.length);
|
||||||
|
return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char16_t* keyChars = key->twoByteChars(lookup.nogc);
|
||||||
|
if (lookup.isLatin1)
|
||||||
|
return EqualChars(lookup.latin1Chars, keyChars, lookup.length);
|
||||||
|
return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
js::intl::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx)
|
||||||
|
{
|
||||||
|
if (upperCaseFirstInitialized)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// If ensureUpperCaseFirstLocales() was called previously, but didn't
|
||||||
|
// complete due to OOM, clear all data and start from scratch.
|
||||||
|
if (upperCaseFirstLocales.initialized())
|
||||||
|
upperCaseFirstLocales.finish();
|
||||||
|
if (!upperCaseFirstLocales.init()) {
|
||||||
|
ReportOutOfMemory(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
UEnumeration* available = ucol_openAvailableLocales(&status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
ReportInternalError(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ScopedICUObject<UEnumeration, uenum_close> toClose(available);
|
||||||
|
|
||||||
|
RootedAtom locale(cx);
|
||||||
|
while (true) {
|
||||||
|
int32_t size;
|
||||||
|
const char* rawLocale = uenum_next(available, &size, &status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
ReportInternalError(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rawLocale == nullptr)
|
||||||
|
break;
|
||||||
|
|
||||||
|
UCollator* collator = ucol_open(rawLocale, &status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
ReportInternalError(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ScopedICUObject<UCollator, ucol_close> toCloseCollator(collator);
|
||||||
|
|
||||||
|
UColAttributeValue caseFirst = ucol_getAttribute(collator, UCOL_CASE_FIRST, &status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
ReportInternalError(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (caseFirst != UCOL_UPPER_FIRST)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
MOZ_ASSERT(size >= 0);
|
||||||
|
locale = Atomize(cx, rawLocale, size_t(size));
|
||||||
|
if (!locale)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
LocaleHasher::Lookup lookup(locale);
|
||||||
|
LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup);
|
||||||
|
|
||||||
|
// ICU shouldn't report any duplicate locales, but if it does, just
|
||||||
|
// ignore the duplicated locale.
|
||||||
|
if (!p && !upperCaseFirstLocales.add(p, locale)) {
|
||||||
|
ReportOutOfMemory(cx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MOZ_ASSERT(!upperCaseFirstInitialized,
|
||||||
|
"ensureUpperCaseFirstLocales is neither reentrant nor thread-safe");
|
||||||
|
upperCaseFirstInitialized = true;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
js::intl::SharedIntlData::isUpperCaseFirst(JSContext* cx, HandleString locale, bool* isUpperFirst)
|
||||||
|
{
|
||||||
|
if (!ensureUpperCaseFirstLocales(cx))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
RootedLinearString localeLinear(cx, locale->ensureLinear(cx));
|
||||||
|
if (!localeLinear)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
LocaleHasher::Lookup lookup(localeLinear);
|
||||||
|
*isUpperFirst = upperCaseFirstLocales.has(lookup);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
js::intl::SharedIntlData::destroyInstance()
|
||||||
|
{
|
||||||
|
availableTimeZones.finish();
|
||||||
|
ianaZonesTreatedAsLinksByICU.finish();
|
||||||
|
ianaLinksCanonicalizedDifferentlyByICU.finish();
|
||||||
|
upperCaseFirstLocales.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
js::intl::SharedIntlData::trace(JSTracer* trc)
|
||||||
|
{
|
||||||
|
// Atoms are always tenured.
|
||||||
|
if (!JS::CurrentThreadIsHeapMinorCollecting()) {
|
||||||
|
availableTimeZones.trace(trc);
|
||||||
|
ianaZonesTreatedAsLinksByICU.trace(trc);
|
||||||
|
ianaLinksCanonicalizedDifferentlyByICU.trace(trc);
|
||||||
|
upperCaseFirstLocales.trace(trc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
js::intl::SharedIntlData::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const
|
||||||
|
{
|
||||||
|
return availableTimeZones.sizeOfExcludingThis(mallocSizeOf) +
|
||||||
|
ianaZonesTreatedAsLinksByICU.sizeOfExcludingThis(mallocSizeOf) +
|
||||||
|
ianaLinksCanonicalizedDifferentlyByICU.sizeOfExcludingThis(mallocSizeOf) +
|
||||||
|
upperCaseFirstLocales.sizeOfExcludingThis(mallocSizeOf);
|
||||||
|
}
|
|
@ -0,0 +1,221 @@
|
||||||
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||||
|
* vim: set ts=8 sts=4 et sw=4 tw=99:
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
|
#ifndef builtin_intl_SharedIntlData_h
|
||||||
|
#define builtin_intl_SharedIntlData_h
|
||||||
|
|
||||||
|
#include "mozilla/MemoryReporting.h"
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#include "jsalloc.h"
|
||||||
|
|
||||||
|
#include "js/CharacterEncoding.h"
|
||||||
|
#include "js/GCAPI.h"
|
||||||
|
#include "js/GCHashTable.h"
|
||||||
|
#include "js/RootingAPI.h"
|
||||||
|
#include "js/Utility.h"
|
||||||
|
#include "vm/String.h"
|
||||||
|
|
||||||
|
namespace js {
|
||||||
|
|
||||||
|
namespace intl {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores Intl data which can be shared across compartments (but not contexts).
|
||||||
|
*
|
||||||
|
* Used for data which is expensive when computed repeatedly or is not
|
||||||
|
* available through ICU.
|
||||||
|
*/
|
||||||
|
class SharedIntlData
|
||||||
|
{
|
||||||
|
struct LinearStringLookup
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
const JS::Latin1Char* latin1Chars;
|
||||||
|
const char16_t* twoByteChars;
|
||||||
|
};
|
||||||
|
bool isLatin1;
|
||||||
|
size_t length;
|
||||||
|
JS::AutoCheckCannotGC nogc;
|
||||||
|
HashNumber hash = 0;
|
||||||
|
|
||||||
|
explicit LinearStringLookup(JSLinearString* string)
|
||||||
|
: isLatin1(string->hasLatin1Chars()), length(string->length())
|
||||||
|
{
|
||||||
|
if (isLatin1)
|
||||||
|
latin1Chars = string->latin1Chars(nogc);
|
||||||
|
else
|
||||||
|
twoByteChars = string->twoByteChars(nogc);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* Information tracking the set of the supported time zone names, derived
|
||||||
|
* from the IANA time zone database <https://www.iana.org/time-zones>.
|
||||||
|
*
|
||||||
|
* There are two kinds of IANA time zone names: Zone and Link (denoted as
|
||||||
|
* such in database source files). Zone names are the canonical, preferred
|
||||||
|
* name for a time zone, e.g. Asia/Kolkata. Link names simply refer to
|
||||||
|
* target Zone names for their meaning, e.g. Asia/Calcutta targets
|
||||||
|
* Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a
|
||||||
|
* sense of deprecation: some Link names also exist partly for convenience,
|
||||||
|
* e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC.
|
||||||
|
*
|
||||||
|
* Two data sources determine the time zone names we support: those ICU
|
||||||
|
* supports and IANA's zone information.
|
||||||
|
*
|
||||||
|
* Unfortunately the names ICU and IANA support, and their Link
|
||||||
|
* relationships from name to target, aren't identical, so we can't simply
|
||||||
|
* implicitly trust ICU's name handling. We must perform various
|
||||||
|
* preprocessing of user-provided zone names and post-processing of
|
||||||
|
* ICU-provided zone names to implement ECMA-402's IANA-consistent behavior.
|
||||||
|
*
|
||||||
|
* Also see <https://ssl.icu-project.org/trac/ticket/12044> and
|
||||||
|
* <http://unicode.org/cldr/trac/ticket/9892>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
using TimeZoneName = JSAtom*;
|
||||||
|
|
||||||
|
struct TimeZoneHasher
|
||||||
|
{
|
||||||
|
struct Lookup : LinearStringLookup
|
||||||
|
{
|
||||||
|
explicit Lookup(JSLinearString* timeZone);
|
||||||
|
};
|
||||||
|
|
||||||
|
static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
|
||||||
|
static bool match(TimeZoneName key, const Lookup& lookup);
|
||||||
|
};
|
||||||
|
|
||||||
|
using TimeZoneSet = GCHashSet<TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
|
||||||
|
using TimeZoneMap = GCHashMap<TimeZoneName, TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As a threshold matter, available time zones are those time zones ICU
|
||||||
|
* supports, via ucal_openTimeZones. But ICU supports additional non-IANA
|
||||||
|
* time zones described in intl/icu/source/tools/tzcode/icuzones (listed in
|
||||||
|
* IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards
|
||||||
|
* compatibility purposes. This set consists of ICU's supported time zones,
|
||||||
|
* minus all backwards-compatibility time zones.
|
||||||
|
*/
|
||||||
|
TimeZoneSet availableTimeZones;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IANA treats some time zone names as Zones, that ICU instead treats as
|
||||||
|
* Links. For example, IANA considers "America/Indiana/Indianapolis" to be
|
||||||
|
* a Zone and "America/Fort_Wayne" a Link that targets it, but ICU
|
||||||
|
* considers the former a Link that targets "America/Indianapolis" (which
|
||||||
|
* IANA treats as a Link).
|
||||||
|
*
|
||||||
|
* ECMA-402 requires that we respect IANA data, so if we're asked to
|
||||||
|
* canonicalize a time zone name in this set, we must *not* return ICU's
|
||||||
|
* canonicalization.
|
||||||
|
*/
|
||||||
|
TimeZoneSet ianaZonesTreatedAsLinksByICU;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IANA treats some time zone names as Links to one target, that ICU
|
||||||
|
* instead treats as either Zones, or Links to different targets. An
|
||||||
|
* example of the former is "Asia/Calcutta, which IANA assigns the target
|
||||||
|
* "Asia/Kolkata" but ICU considers its own Zone. An example of the latter
|
||||||
|
* is "America/Virgin", which IANA assigns the target
|
||||||
|
* "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas".
|
||||||
|
*
|
||||||
|
* ECMA-402 requires that we respect IANA data, so if we're asked to
|
||||||
|
* canonicalize a time zone name that's a key in this map, we *must* return
|
||||||
|
* the corresponding value and *must not* return ICU's canonicalization.
|
||||||
|
*/
|
||||||
|
TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU;
|
||||||
|
|
||||||
|
bool timeZoneDataInitialized = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Precomputes the available time zone names, because it's too expensive to
|
||||||
|
* call ucal_openTimeZones() repeatedly.
|
||||||
|
*/
|
||||||
|
bool ensureTimeZones(JSContext* cx);
|
||||||
|
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Returns the validated time zone name in |result|. If the input time zone
|
||||||
|
* isn't a valid IANA time zone name, |result| remains unchanged.
|
||||||
|
*/
|
||||||
|
bool validateTimeZoneName(JSContext* cx, JS::Handle<JSString*> timeZone,
|
||||||
|
JS::MutableHandle<JSAtom*> result);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the canonical time zone name in |result|. If no canonical name
|
||||||
|
* was found, |result| remains unchanged.
|
||||||
|
*
|
||||||
|
* This method only handles time zones which are canonicalized differently
|
||||||
|
* by ICU when compared to IANA.
|
||||||
|
*/
|
||||||
|
bool tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx, JS::Handle<JSString*> timeZone,
|
||||||
|
JS::MutableHandle<JSAtom*> result);
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* The case first parameter (BCP47 key "kf") allows to switch the order of
|
||||||
|
* upper- and lower-case characters. ICU doesn't directly provide an API
|
||||||
|
* to query the default case first value of a given locale, but instead
|
||||||
|
* requires to instantiate a collator object and then query the case first
|
||||||
|
* attribute (UCOL_CASE_FIRST).
|
||||||
|
* To avoid instantiating an additional collator object whenever we need
|
||||||
|
* to retrieve the default case first value of a specific locale, we
|
||||||
|
* compute the default case first value for every supported locale only
|
||||||
|
* once and then keep a list of all locales which don't use the default
|
||||||
|
* case first setting.
|
||||||
|
* There is almost no difference between lower-case first and when case
|
||||||
|
* first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to
|
||||||
|
* track locales which use upper-case first as their default setting.
|
||||||
|
*/
|
||||||
|
|
||||||
|
using Locale = JSAtom*;
|
||||||
|
|
||||||
|
struct LocaleHasher
|
||||||
|
{
|
||||||
|
struct Lookup : LinearStringLookup
|
||||||
|
{
|
||||||
|
explicit Lookup(JSLinearString* locale);
|
||||||
|
};
|
||||||
|
|
||||||
|
static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
|
||||||
|
static bool match(Locale key, const Lookup& lookup);
|
||||||
|
};
|
||||||
|
|
||||||
|
using LocaleSet = GCHashSet<Locale, LocaleHasher, SystemAllocPolicy>;
|
||||||
|
|
||||||
|
LocaleSet upperCaseFirstLocales;
|
||||||
|
|
||||||
|
bool upperCaseFirstInitialized = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Precomputes the available locales which use upper-case first sorting.
|
||||||
|
*/
|
||||||
|
bool ensureUpperCaseFirstLocales(JSContext* cx);
|
||||||
|
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Sets |isUpperFirst| to true if |locale| sorts upper-case characters
|
||||||
|
* before lower-case characters.
|
||||||
|
*/
|
||||||
|
bool isUpperCaseFirst(JSContext* cx, JS::Handle<JSString*> locale, bool* isUpperFirst);
|
||||||
|
|
||||||
|
public:
|
||||||
|
void destroyInstance();
|
||||||
|
|
||||||
|
void trace(JSTracer* trc);
|
||||||
|
|
||||||
|
size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace intl
|
||||||
|
|
||||||
|
} // namespace js
|
||||||
|
|
||||||
|
#endif /* builtin_intl_SharedIntlData_h */
|
|
@ -163,6 +163,7 @@ UNIFIED_SOURCES += [
|
||||||
'builtin/intl/Collator.cpp',
|
'builtin/intl/Collator.cpp',
|
||||||
'builtin/intl/CommonFunctions.cpp',
|
'builtin/intl/CommonFunctions.cpp',
|
||||||
'builtin/intl/NumberFormat.cpp',
|
'builtin/intl/NumberFormat.cpp',
|
||||||
|
'builtin/intl/SharedIntlData.cpp',
|
||||||
'builtin/MapObject.cpp',
|
'builtin/MapObject.cpp',
|
||||||
'builtin/ModuleObject.cpp',
|
'builtin/ModuleObject.cpp',
|
||||||
'builtin/Object.cpp',
|
'builtin/Object.cpp',
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
#include "jsscript.h"
|
#include "jsscript.h"
|
||||||
|
|
||||||
#include "builtin/AtomicsObject.h"
|
#include "builtin/AtomicsObject.h"
|
||||||
#include "builtin/Intl.h"
|
#include "builtin/intl/SharedIntlData.h"
|
||||||
#include "builtin/Promise.h"
|
#include "builtin/Promise.h"
|
||||||
#include "frontend/NameCollections.h"
|
#include "frontend/NameCollections.h"
|
||||||
#include "gc/GCRuntime.h"
|
#include "gc/GCRuntime.h"
|
||||||
|
@ -860,7 +860,7 @@ struct JSRuntime : public js::MallocProvider<JSRuntime>
|
||||||
js::WriteOnceData<js::WellKnownSymbols*> wellKnownSymbols;
|
js::WriteOnceData<js::WellKnownSymbols*> wellKnownSymbols;
|
||||||
|
|
||||||
/* Shared Intl data for this runtime. */
|
/* Shared Intl data for this runtime. */
|
||||||
js::ActiveThreadData<js::SharedIntlData> sharedIntlData;
|
js::ActiveThreadData<js::intl::SharedIntlData> sharedIntlData;
|
||||||
|
|
||||||
void traceSharedIntlData(JSTracer* trc);
|
void traceSharedIntlData(JSTracer* trc);
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче