зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1570370 - Part 9: Move UnicodeExtensionsGenerated.cpp into LanguageTagGenerated.cpp. r=jwalden
Differential Revision: https://phabricator.services.mozilla.com/D40075 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
b293863ab3
Коммит
c5404a4adc
|
@ -6,7 +6,6 @@ config/msvc-stl-wrapper.template.h
|
|||
# Generated code
|
||||
js/src/builtin/intl/LanguageTagGenerated.cpp
|
||||
js/src/builtin/intl/TimeZoneDataGenerated.h
|
||||
js/src/builtin/intl/UnicodeExtensionsGenerated.cpp
|
||||
|
||||
# Don't want to reformat irregexp. bug 1510128
|
||||
js/src/irregexp/.*
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
// Generated by make_intl_data.py. DO NOT EDIT.
|
||||
// Version: CLDR-35.1
|
||||
// URL: https://unicode.org/Public/cldr/35.1/core.zip
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Range.h"
|
||||
|
@ -14,6 +16,8 @@
|
|||
#include "util/Text.h"
|
||||
#include "vm/JSContext.h"
|
||||
|
||||
using namespace js::intl::LanguageTagLimits;
|
||||
|
||||
using ConstCharRange = mozilla::Range<const char>;
|
||||
|
||||
template <size_t Length, size_t TagLength, size_t SubtagLength>
|
||||
|
@ -51,7 +55,7 @@ static inline const char* SearchReplacement(
|
|||
|
||||
// Mappings from language subtags to preferred values.
|
||||
// Derived from CLDR Supplemental Data, version 35.1.
|
||||
// https://github.com/unicode-org/cldr.git
|
||||
// https://unicode.org/Public/cldr/35.1/core.zip
|
||||
bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
|
||||
|
||||
|
@ -156,7 +160,7 @@ bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
|
|||
|
||||
// Language subtags with complex mappings.
|
||||
// Derived from CLDR Supplemental Data, version 35.1.
|
||||
// https://github.com/unicode-org/cldr.git
|
||||
// https://unicode.org/Public/cldr/35.1/core.zip
|
||||
bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& language) {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
|
||||
|
||||
|
@ -177,7 +181,7 @@ bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& languag
|
|||
|
||||
// Mappings from region subtags to preferred values.
|
||||
// Derived from CLDR Supplemental Data, version 35.1.
|
||||
// https://github.com/unicode-org/cldr.git
|
||||
// https://unicode.org/Public/cldr/35.1/core.zip
|
||||
bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(region.range()));
|
||||
|
||||
|
@ -276,7 +280,7 @@ bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
|
|||
|
||||
// Region subtags with complex mappings.
|
||||
// Derived from CLDR Supplemental Data, version 35.1.
|
||||
// https://github.com/unicode-org/cldr.git
|
||||
// https://unicode.org/Public/cldr/35.1/core.zip
|
||||
bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) {
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(region.range()));
|
||||
|
||||
|
@ -298,7 +302,7 @@ bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) {
|
|||
|
||||
// Language subtags with complex mappings.
|
||||
// Derived from CLDR Supplemental Data, version 35.1.
|
||||
// https://github.com/unicode-org/cldr.git
|
||||
// https://unicode.org/Public/cldr/35.1/core.zip
|
||||
void js::intl::LanguageTag::performComplexLanguageMappings() {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
|
||||
|
||||
|
@ -333,7 +337,7 @@ void js::intl::LanguageTag::performComplexLanguageMappings() {
|
|||
|
||||
// Region subtags with complex mappings.
|
||||
// Derived from CLDR Supplemental Data, version 35.1.
|
||||
// https://github.com/unicode-org/cldr.git
|
||||
// https://unicode.org/Public/cldr/35.1/core.zip
|
||||
void js::intl::LanguageTag::performComplexRegionMappings() {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(region().range()));
|
||||
|
@ -530,7 +534,7 @@ void js::intl::LanguageTag::performComplexRegionMappings() {
|
|||
|
||||
// Canonicalize grandfathered locale identifiers.
|
||||
// Derived from CLDR Supplemental Data, version 35.1.
|
||||
// https://github.com/unicode-org/cldr.git
|
||||
// https://unicode.org/Public/cldr/35.1/core.zip
|
||||
bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
|
||||
// We're mapping regular grandfathered tags to non-grandfathered form here.
|
||||
// Other tags remain unchanged.
|
||||
|
@ -613,3 +617,174 @@ bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <size_t Length>
|
||||
static inline bool IsUnicodeKey(const ConstCharRange& key,
|
||||
const char (&str)[Length]) {
|
||||
static_assert(Length == UnicodeKeyLength + 1,
|
||||
"Unicode extension key is two characters long");
|
||||
return memcmp(key.begin().get(), str, Length - 1) == 0;
|
||||
}
|
||||
|
||||
template <size_t Length>
|
||||
static inline bool IsUnicodeType(const ConstCharRange& type,
|
||||
const char (&str)[Length]) {
|
||||
static_assert(Length > UnicodeKeyLength + 1,
|
||||
"Unicode extension type contains more than two characters");
|
||||
return type.length() == (Length - 1) &&
|
||||
memcmp(type.begin().get(), str, Length - 1) == 0;
|
||||
}
|
||||
|
||||
static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
|
||||
#ifdef DEBUG
|
||||
auto isNull = [](char c) {
|
||||
return c == '\0';
|
||||
};
|
||||
#endif
|
||||
|
||||
MOZ_ASSERT(std::none_of(b.begin().get(), b.end().get(), isNull),
|
||||
"unexpected null-character in string");
|
||||
|
||||
using UnsignedChar = unsigned char;
|
||||
for (size_t i = 0; i < b.length(); i++) {
|
||||
// |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
|
||||
// we've reached the end of |a|, the below if-statement will always be true.
|
||||
// That ensures we don't read past the end of |a|.
|
||||
if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
// Return zero if both strings are equal or a negative number if |b| is a
|
||||
// prefix of |a|.
|
||||
return -int32_t(UnsignedChar(a[b.length()]));
|
||||
};
|
||||
|
||||
template <size_t Length>
|
||||
static inline const char* SearchReplacement(const char* (&types)[Length],
|
||||
const char* (&aliases)[Length],
|
||||
const ConstCharRange& type) {
|
||||
|
||||
auto p = std::lower_bound(std::begin(types), std::end(types), type,
|
||||
[](const auto& a, const auto& b) {
|
||||
return CompareUnicodeType(a, b) < 0;
|
||||
});
|
||||
if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
|
||||
return aliases[std::distance(std::begin(types), p)];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mapping from deprecated BCP 47 Unicode extension types to their preferred
|
||||
* values.
|
||||
*
|
||||
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
|
||||
*/
|
||||
const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
|
||||
const ConstCharRange& key, const ConstCharRange& type) {
|
||||
#ifdef DEBUG
|
||||
static auto isAsciiLowercaseAlphanumeric = [](char c) {
|
||||
return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
|
||||
};
|
||||
|
||||
static auto isAsciiLowercaseAlphanumericOrDash = [](char c) {
|
||||
return isAsciiLowercaseAlphanumeric(c) || c == '-';
|
||||
};
|
||||
#endif
|
||||
|
||||
MOZ_ASSERT(key.length() == UnicodeKeyLength);
|
||||
MOZ_ASSERT(std::all_of(key.begin().get(), key.end().get(),
|
||||
isAsciiLowercaseAlphanumeric));
|
||||
|
||||
MOZ_ASSERT(type.length() > UnicodeKeyLength);
|
||||
MOZ_ASSERT(std::all_of(type.begin().get(), type.end().get(),
|
||||
isAsciiLowercaseAlphanumericOrDash));
|
||||
|
||||
if (IsUnicodeKey(key, "ca")) {
|
||||
if (IsUnicodeType(type, "ethiopic-amete-alem")) {
|
||||
return "ethioaa";
|
||||
}
|
||||
if (IsUnicodeType(type, "islamicc")) {
|
||||
return "islamic-civil";
|
||||
}
|
||||
}
|
||||
else if (IsUnicodeKey(key, "kb") ||
|
||||
IsUnicodeKey(key, "kc") ||
|
||||
IsUnicodeKey(key, "kh") ||
|
||||
IsUnicodeKey(key, "kk") ||
|
||||
IsUnicodeKey(key, "kn")) {
|
||||
if (IsUnicodeType(type, "yes")) {
|
||||
return "true";
|
||||
}
|
||||
}
|
||||
else if (IsUnicodeKey(key, "ks")) {
|
||||
if (IsUnicodeType(type, "primary")) {
|
||||
return "level1";
|
||||
}
|
||||
if (IsUnicodeType(type, "tertiary")) {
|
||||
return "level3";
|
||||
}
|
||||
}
|
||||
else if (IsUnicodeKey(key, "ms")) {
|
||||
if (IsUnicodeType(type, "imperial")) {
|
||||
return "uksystem";
|
||||
}
|
||||
}
|
||||
else if (IsUnicodeKey(key, "rg") ||
|
||||
IsUnicodeKey(key, "sd")) {
|
||||
static const char* types[116] = {
|
||||
"cn11", "cn12", "cn13", "cn14", "cn15", "cn21", "cn22", "cn23",
|
||||
"cn31", "cn32", "cn33", "cn34", "cn35", "cn36", "cn37", "cn41",
|
||||
"cn42", "cn43", "cn44", "cn45", "cn46", "cn50", "cn51", "cn52",
|
||||
"cn53", "cn54", "cn61", "cn62", "cn63", "cn64", "cn65", "cz10a",
|
||||
"cz10b", "cz10c", "cz10d", "cz10e", "cz10f", "cz611", "cz612", "cz613",
|
||||
"cz614", "cz615", "cz621", "cz622", "cz623", "cz624", "cz626", "cz627",
|
||||
"czjc", "czjm", "czka", "czkr", "czli", "czmo", "czol", "czpa",
|
||||
"czpl", "czpr", "czst", "czus", "czvy", "czzl", "fra", "frb",
|
||||
"frc", "frd", "fre", "frf", "frg", "frh", "fri", "frj",
|
||||
"frk", "frl", "frm", "frn", "fro", "frp", "frq", "frr",
|
||||
"frs", "frt", "fru", "frv", "laxn", "lud", "lug", "lul",
|
||||
"mrnkc", "nzn", "nzs", "omba", "omsh", "plds", "plkp", "pllb",
|
||||
"plld", "pllu", "plma", "plmz", "plop", "plpd", "plpk", "plpm",
|
||||
"plsk", "plsl", "plwn", "plwp", "plzp", "tteto", "ttrcm", "ttwto",
|
||||
"twkhq", "twtnq", "twtpq", "twtxq",
|
||||
};
|
||||
static const char* aliases[116] = {
|
||||
"cnbj", "cntj", "cnhe", "cnsx", "cnmn", "cnln", "cnjl", "cnhl",
|
||||
"cnsh", "cnjs", "cnzj", "cnah", "cnfj", "cnjx", "cnsd", "cnha",
|
||||
"cnhb", "cnhn", "cngd", "cngx", "cnhi", "cncq", "cnsc", "cngz",
|
||||
"cnyn", "cnxz", "cnsn", "cngs", "cnqh", "cnnx", "cnxj", "cz110",
|
||||
"cz111", "cz112", "cz113", "cz114", "cz115", "cz663", "cz632", "cz633",
|
||||
"cz634", "cz635", "cz641", "cz642", "cz643", "cz644", "cz646", "cz647",
|
||||
"cz31", "cz64", "cz41", "cz52", "cz51", "cz80", "cz71", "cz53",
|
||||
"cz32", "cz10", "cz20", "cz42", "cz63", "cz72", "frges", "frnaq",
|
||||
"frara", "frbfc", "frbre", "frcvl", "frges", "frcor", "frbfc", "fridf",
|
||||
"frocc", "frnaq", "frges", "frocc", "frhdf", "frnor", "frnor", "frpdl",
|
||||
"frhdf", "frnaq", "frpac", "frara", "laxs", "lucl", "luec", "luca",
|
||||
"mr13", "nzauk", "nzcan", "ombj", "omsj", "pl02", "pl04", "pl08",
|
||||
"pl10", "pl06", "pl12", "pl14", "pl16", "pl20", "pl18", "pl22",
|
||||
"pl26", "pl24", "pl28", "pl30", "pl32", "tttob", "ttmrc", "tttob",
|
||||
"twkhh", "twtnn", "twnwt", "twtxg",
|
||||
};
|
||||
return SearchReplacement(types, aliases, type);
|
||||
}
|
||||
else if (IsUnicodeKey(key, "tz")) {
|
||||
static const char* types[28] = {
|
||||
"aqams", "cnckg", "cnhrb", "cnkhg", "cuba", "egypt",
|
||||
"eire", "est", "gmt0", "hongkong", "hst", "iceland",
|
||||
"iran", "israel", "jamaica", "japan", "libya", "mst",
|
||||
"navajo", "poland", "portugal", "prc", "roc", "rok",
|
||||
"turkey", "uct", "usnavajo", "zulu",
|
||||
};
|
||||
static const char* aliases[28] = {
|
||||
"nzakl", "cnsha", "cnsha", "cnurc", "cuhav", "egcai",
|
||||
"iedub", "utcw05", "gmt", "hkhkg", "utcw10", "isrey",
|
||||
"irthr", "jeruslm", "jmkin", "jptyo", "lytip", "utcw07",
|
||||
"usden", "plwaw", "ptlis", "cnsha", "twtpe", "krsel",
|
||||
"trist", "utc", "usden", "utc",
|
||||
};
|
||||
return SearchReplacement(types, aliases, type);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -1,188 +0,0 @@
|
|||
// Generated by make_intl_data.py. DO NOT EDIT.
|
||||
// Version: CLDR-35.1
|
||||
// URL: https://unicode.org/Public/cldr/35.1/core.zip
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Range.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#include "builtin/intl/LanguageTag.h"
|
||||
|
||||
using namespace js::intl::LanguageTagLimits;
|
||||
|
||||
using ConstCharRange = mozilla::Range<const char>;
|
||||
|
||||
template <size_t Length>
|
||||
static inline bool IsUnicodeKey(const ConstCharRange& key,
|
||||
const char (&str)[Length]) {
|
||||
static_assert(Length == UnicodeKeyLength + 1,
|
||||
"Unicode extension key is two characters long");
|
||||
return memcmp(key.begin().get(), str, Length - 1) == 0;
|
||||
}
|
||||
|
||||
template <size_t Length>
|
||||
static inline bool IsUnicodeType(const ConstCharRange& type,
|
||||
const char (&str)[Length]) {
|
||||
static_assert(Length > UnicodeKeyLength + 1,
|
||||
"Unicode extension type contains more than two characters");
|
||||
return type.length() == (Length - 1) &&
|
||||
memcmp(type.begin().get(), str, Length - 1) == 0;
|
||||
}
|
||||
|
||||
static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
|
||||
#ifdef DEBUG
|
||||
auto isNull = [](char c) {
|
||||
return c == '\0';
|
||||
};
|
||||
#endif
|
||||
|
||||
MOZ_ASSERT(std::none_of(b.begin().get(), b.end().get(), isNull),
|
||||
"unexpected null-character in string");
|
||||
|
||||
using UnsignedChar = unsigned char;
|
||||
for (size_t i = 0; i < b.length(); i++) {
|
||||
// |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
|
||||
// we've reached the end of |a|, the below if-statement will always be true.
|
||||
// That ensures we don't read past the end of |a|.
|
||||
if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
// Return zero if both strings are equal or a negative number if |b| is a
|
||||
// prefix of |a|.
|
||||
return -int32_t(UnsignedChar(a[b.length()]));
|
||||
};
|
||||
|
||||
template <size_t Length>
|
||||
static inline const char* SearchReplacement(const char* (&types)[Length],
|
||||
const char* (&aliases)[Length],
|
||||
const ConstCharRange& type) {
|
||||
|
||||
auto p = std::lower_bound(std::begin(types), std::end(types), type,
|
||||
[](const auto& a, const auto& b) {
|
||||
return CompareUnicodeType(a, b) < 0;
|
||||
});
|
||||
if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
|
||||
return aliases[std::distance(std::begin(types), p)];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mapping from deprecated BCP 47 Unicode extension types to their preferred
|
||||
* values.
|
||||
*
|
||||
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
|
||||
*/
|
||||
const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
|
||||
const ConstCharRange& key, const ConstCharRange& type) {
|
||||
#ifdef DEBUG
|
||||
static auto isAsciiLowercaseAlphanumeric = [](char c) {
|
||||
return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
|
||||
};
|
||||
|
||||
static auto isAsciiLowercaseAlphanumericOrDash = [](char c) {
|
||||
return isAsciiLowercaseAlphanumeric(c) || c == '-';
|
||||
};
|
||||
#endif
|
||||
|
||||
MOZ_ASSERT(key.length() == UnicodeKeyLength);
|
||||
MOZ_ASSERT(std::all_of(key.begin().get(), key.end().get(),
|
||||
isAsciiLowercaseAlphanumeric));
|
||||
|
||||
MOZ_ASSERT(type.length() > UnicodeKeyLength);
|
||||
MOZ_ASSERT(std::all_of(type.begin().get(), type.end().get(),
|
||||
isAsciiLowercaseAlphanumericOrDash));
|
||||
|
||||
if (IsUnicodeKey(key, "ca")) {
|
||||
if (IsUnicodeType(type, "ethiopic-amete-alem")) {
|
||||
return "ethioaa";
|
||||
}
|
||||
if (IsUnicodeType(type, "islamicc")) {
|
||||
return "islamic-civil";
|
||||
}
|
||||
}
|
||||
else if (IsUnicodeKey(key, "kb") ||
|
||||
IsUnicodeKey(key, "kc") ||
|
||||
IsUnicodeKey(key, "kh") ||
|
||||
IsUnicodeKey(key, "kk") ||
|
||||
IsUnicodeKey(key, "kn")) {
|
||||
if (IsUnicodeType(type, "yes")) {
|
||||
return "true";
|
||||
}
|
||||
}
|
||||
else if (IsUnicodeKey(key, "ks")) {
|
||||
if (IsUnicodeType(type, "primary")) {
|
||||
return "level1";
|
||||
}
|
||||
if (IsUnicodeType(type, "tertiary")) {
|
||||
return "level3";
|
||||
}
|
||||
}
|
||||
else if (IsUnicodeKey(key, "ms")) {
|
||||
if (IsUnicodeType(type, "imperial")) {
|
||||
return "uksystem";
|
||||
}
|
||||
}
|
||||
else if (IsUnicodeKey(key, "rg") ||
|
||||
IsUnicodeKey(key, "sd")) {
|
||||
static const char* types[116] = {
|
||||
"cn11", "cn12", "cn13", "cn14", "cn15", "cn21", "cn22", "cn23",
|
||||
"cn31", "cn32", "cn33", "cn34", "cn35", "cn36", "cn37", "cn41",
|
||||
"cn42", "cn43", "cn44", "cn45", "cn46", "cn50", "cn51", "cn52",
|
||||
"cn53", "cn54", "cn61", "cn62", "cn63", "cn64", "cn65", "cz10a",
|
||||
"cz10b", "cz10c", "cz10d", "cz10e", "cz10f", "cz611", "cz612", "cz613",
|
||||
"cz614", "cz615", "cz621", "cz622", "cz623", "cz624", "cz626", "cz627",
|
||||
"czjc", "czjm", "czka", "czkr", "czli", "czmo", "czol", "czpa",
|
||||
"czpl", "czpr", "czst", "czus", "czvy", "czzl", "fra", "frb",
|
||||
"frc", "frd", "fre", "frf", "frg", "frh", "fri", "frj",
|
||||
"frk", "frl", "frm", "frn", "fro", "frp", "frq", "frr",
|
||||
"frs", "frt", "fru", "frv", "laxn", "lud", "lug", "lul",
|
||||
"mrnkc", "nzn", "nzs", "omba", "omsh", "plds", "plkp", "pllb",
|
||||
"plld", "pllu", "plma", "plmz", "plop", "plpd", "plpk", "plpm",
|
||||
"plsk", "plsl", "plwn", "plwp", "plzp", "tteto", "ttrcm", "ttwto",
|
||||
"twkhq", "twtnq", "twtpq", "twtxq",
|
||||
};
|
||||
static const char* aliases[116] = {
|
||||
"cnbj", "cntj", "cnhe", "cnsx", "cnmn", "cnln", "cnjl", "cnhl",
|
||||
"cnsh", "cnjs", "cnzj", "cnah", "cnfj", "cnjx", "cnsd", "cnha",
|
||||
"cnhb", "cnhn", "cngd", "cngx", "cnhi", "cncq", "cnsc", "cngz",
|
||||
"cnyn", "cnxz", "cnsn", "cngs", "cnqh", "cnnx", "cnxj", "cz110",
|
||||
"cz111", "cz112", "cz113", "cz114", "cz115", "cz663", "cz632", "cz633",
|
||||
"cz634", "cz635", "cz641", "cz642", "cz643", "cz644", "cz646", "cz647",
|
||||
"cz31", "cz64", "cz41", "cz52", "cz51", "cz80", "cz71", "cz53",
|
||||
"cz32", "cz10", "cz20", "cz42", "cz63", "cz72", "frges", "frnaq",
|
||||
"frara", "frbfc", "frbre", "frcvl", "frges", "frcor", "frbfc", "fridf",
|
||||
"frocc", "frnaq", "frges", "frocc", "frhdf", "frnor", "frnor", "frpdl",
|
||||
"frhdf", "frnaq", "frpac", "frara", "laxs", "lucl", "luec", "luca",
|
||||
"mr13", "nzauk", "nzcan", "ombj", "omsj", "pl02", "pl04", "pl08",
|
||||
"pl10", "pl06", "pl12", "pl14", "pl16", "pl20", "pl18", "pl22",
|
||||
"pl26", "pl24", "pl28", "pl30", "pl32", "tttob", "ttmrc", "tttob",
|
||||
"twkhh", "twtnn", "twnwt", "twtxg",
|
||||
};
|
||||
return SearchReplacement(types, aliases, type);
|
||||
}
|
||||
else if (IsUnicodeKey(key, "tz")) {
|
||||
static const char* types[28] = {
|
||||
"aqams", "cnckg", "cnhrb", "cnkhg", "cuba", "egypt",
|
||||
"eire", "est", "gmt0", "hongkong", "hst", "iceland",
|
||||
"iran", "israel", "jamaica", "japan", "libya", "mst",
|
||||
"navajo", "poland", "portugal", "prc", "roc", "rok",
|
||||
"turkey", "uct", "usnavajo", "zulu",
|
||||
};
|
||||
static const char* aliases[28] = {
|
||||
"nzakl", "cnsha", "cnsha", "cnurc", "cuhav", "egcai",
|
||||
"iedub", "utcw05", "gmt", "hkhkg", "utcw10", "isrey",
|
||||
"irthr", "jeruslm", "jmkin", "jptyo", "lytip", "utcw07",
|
||||
"usden", "plwaw", "ptlis", "cnsha", "twtpe", "krsel",
|
||||
"trist", "utc", "usden", "utc",
|
||||
};
|
||||
return SearchReplacement(types, aliases, type);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
|
@ -6,17 +6,16 @@
|
|||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
""" Usage:
|
||||
make_intl_data.py langtags [ldmlSupplemental.dtd supplementalMetadata.xml likelySubtags.xml]
|
||||
make_intl_data.py langtags [cldr_core.zip]
|
||||
make_intl_data.py tzdata
|
||||
make_intl_data.py currency
|
||||
make_intl_data.py unicode-ext
|
||||
|
||||
|
||||
Target "langtags":
|
||||
This script extracts information about mappings between deprecated and
|
||||
current Unicode BCP 47 locale identifiers from CLDR and converts it to C++
|
||||
mapping code in LanguageTagGenerated.cpp. The code is used in
|
||||
LanguageTag.cpp.
|
||||
This script extracts information about 1) mappings between deprecated and
|
||||
current Unicode BCP 47 locale identifiers, and 2) deprecated and current
|
||||
BCP 47 Unicode extension value from CLDR, and converts it to C++ mapping
|
||||
code in LanguageTagGenerated.cpp. The code is used in LanguageTag.cpp.
|
||||
|
||||
|
||||
Target "tzdata":
|
||||
|
@ -27,19 +26,12 @@
|
|||
|
||||
Target "currency":
|
||||
Generates the mapping from currency codes to decimal digits used for them.
|
||||
|
||||
Target "unicode-ext":
|
||||
Generates the mapping from deprecated BCP 47 Unicode extension values to
|
||||
their preferred values.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
import contextlib
|
||||
import os
|
||||
import re
|
||||
import io
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import tempfile
|
||||
|
@ -53,11 +45,11 @@ if sys.version_info.major == 2:
|
|||
from itertools import ifilter as filter, ifilterfalse as filterfalse, imap as map,\
|
||||
izip_longest as zip_longest
|
||||
from urllib2 import urlopen, Request as UrlRequest
|
||||
from urlparse import urlsplit, urlunsplit
|
||||
from urlparse import urlsplit
|
||||
else:
|
||||
from itertools import filterfalse, zip_longest
|
||||
from urllib.request import urlopen, Request as UrlRequest
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
|
||||
# From https://docs.python.org/3/library/itertools.html
|
||||
|
@ -87,21 +79,8 @@ def writeMappingsVar(println, mapping, name, description, source, url):
|
|||
println(u"")
|
||||
writeMappingHeader(println, description, source, url)
|
||||
println(u"var {0} = {{".format(name))
|
||||
for key in sorted(mapping):
|
||||
if not isinstance(mapping[key], dict):
|
||||
value = mapping[key]
|
||||
if isinstance(value, bool):
|
||||
value = "true" if value else "false"
|
||||
else:
|
||||
value = '"{0}"'.format(value)
|
||||
else:
|
||||
preferred = mapping[key]["preferred"]
|
||||
prefix = mapping[key]["prefix"]
|
||||
if key != preferred:
|
||||
raise Exception(
|
||||
"Expected '{0}' matches preferred locale '{1}'".format(key, preferred))
|
||||
value = '"{0}"'.format(prefix)
|
||||
println(u' "{0}": {1},'.format(key, value))
|
||||
for (key, value) in sorted(mapping.items(), key=itemgetter(0)):
|
||||
println(u' "{0}": "{1}",'.format(key, value))
|
||||
println(u"};")
|
||||
|
||||
|
||||
|
@ -518,16 +497,7 @@ bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
|
|||
}""")
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def TemporaryDirectory():
|
||||
tmpDir = tempfile.mkdtemp()
|
||||
try:
|
||||
yield tmpDir
|
||||
finally:
|
||||
shutil.rmtree(tmpDir)
|
||||
|
||||
|
||||
def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, likely_subtags_file):
|
||||
def readSupplementalData(core_file):
|
||||
""" Reads CLDR Supplemental Data and extracts information for Intl.js.
|
||||
|
||||
Information extracted:
|
||||
|
@ -542,15 +512,6 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
|
|||
"""
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
# <!ATTLIST version cldrVersion CDATA #FIXED "36" >
|
||||
re_cldr_version = re.compile(
|
||||
r"""<!ATTLIST version cldrVersion CDATA #FIXED "(?P<version>[\d|\.]+)" >""")
|
||||
|
||||
with io.open(supplemental_dtd_file, mode="r", encoding="utf-8") as f:
|
||||
version_match = re_cldr_version.search(f.read())
|
||||
assert version_match is not None, "CLDR version string not found"
|
||||
cldr_version = version_match.group("version")
|
||||
|
||||
# From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>.
|
||||
re_unicode_language_id = re.compile(
|
||||
r"""
|
||||
|
@ -656,7 +617,7 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
|
|||
script.title() if script else None,
|
||||
region.upper() if region else None)
|
||||
|
||||
tree = ET.parse(supplemental_metadata_file)
|
||||
tree = ET.parse(core_file.open("common/supplemental/supplementalMetadata.xml"))
|
||||
|
||||
for language_alias in tree.iterfind(".//languageAlias"):
|
||||
type = bcp47_id(language_alias.get("type"))
|
||||
|
@ -706,7 +667,7 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
|
|||
), "{} invalid region subtags".format(replacement)
|
||||
complex_region_mappings[type] = replacements
|
||||
|
||||
tree = ET.parse(likely_subtags_file)
|
||||
tree = ET.parse(core_file.open("common/supplemental/likelySubtags.xml"))
|
||||
|
||||
likely_subtags = {}
|
||||
|
||||
|
@ -767,8 +728,7 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
|
|||
else:
|
||||
region_mappings[deprecated_region] = default
|
||||
|
||||
return {"version": cldr_version,
|
||||
"grandfatheredMappings": grandfathered_mappings,
|
||||
return {"grandfatheredMappings": grandfathered_mappings,
|
||||
"languageMappings": language_mappings,
|
||||
"complexLanguageMappings": complex_language_mappings,
|
||||
"regionMappings": region_mappings,
|
||||
|
@ -777,10 +737,154 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
|
|||
}
|
||||
|
||||
|
||||
def readUnicodeExtensions(core_file):
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
# Match all xml-files in the BCP 47 directory.
|
||||
bcpFileRE = re.compile(r"^common/bcp47/.+\.xml$")
|
||||
|
||||
# https://www.unicode.org/reports/tr35/#Unicode_locale_identifier
|
||||
#
|
||||
# type = alphanum{3,8} (sep alphanum{3,8})* ;
|
||||
typeRE = re.compile(r"^[a-z0-9]{3,8}(-[a-z0-9]{3,8})*$")
|
||||
|
||||
# Mapping from Unicode extension types to dict of deprecated to
|
||||
# preferred values.
|
||||
mapping = {}
|
||||
|
||||
def readBCP47File(file):
|
||||
tree = ET.parse(file)
|
||||
for keyword in tree.iterfind(".//keyword/key"):
|
||||
# Skip over keywords whose extension is not "u".
|
||||
if keyword.get("extension", "u") != "u":
|
||||
continue
|
||||
|
||||
extension_name = keyword.get("name")
|
||||
|
||||
for type in keyword.iterfind("type"):
|
||||
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
|
||||
#
|
||||
# The key or type name used by Unicode locale extension with 'u' extension
|
||||
# syntax or the 't' extensions syntax. When alias below is absent, this name
|
||||
# can be also used with the old style "@key=type" syntax.
|
||||
name = type.get("name")
|
||||
|
||||
# Ignore the special name:
|
||||
# - <https://unicode.org/reports/tr35/#CODEPOINTS>
|
||||
# - <https://unicode.org/reports/tr35/#REORDER_CODE>
|
||||
# - <https://unicode.org/reports/tr35/#RG_KEY_VALUE>
|
||||
# - <https://unicode.org/reports/tr35/#SUBDIVISION_CODE>
|
||||
# - <https://unicode.org/reports/tr35/#PRIVATE_USE>
|
||||
if name in ("CODEPOINTS", "REORDER_CODE", "RG_KEY_VALUE", "SUBDIVISION_CODE",
|
||||
"PRIVATE_USE"):
|
||||
continue
|
||||
|
||||
# All other names should match the 'type' production.
|
||||
assert typeRE.match(name) is not None, (
|
||||
"{} matches the 'type' production".format(name))
|
||||
|
||||
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
|
||||
#
|
||||
# The preferred value of the deprecated key, type or attribute element.
|
||||
# When a key, type or attribute element is deprecated, this attribute is
|
||||
# used for specifying a new canonical form if available.
|
||||
preferred = type.get("preferred")
|
||||
|
||||
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
|
||||
#
|
||||
# The BCP 47 form is the canonical form, and recommended. Other aliases are
|
||||
# included only for backwards compatibility.
|
||||
alias = type.get("alias")
|
||||
|
||||
# <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>
|
||||
#
|
||||
# Use the bcp47 data to replace keys, types, tfields, and tvalues by their
|
||||
# canonical forms. See Section 3.6.4 U Extension Data Files) and Section
|
||||
# 3.7.1 T Extension Data Files. The aliases are in the alias attribute
|
||||
# value, while the canonical is in the name attribute value.
|
||||
|
||||
# 'preferred' contains the new preferred name, 'alias' the compatibility
|
||||
# name, but then there's this entry where 'preferred' and 'alias' are the
|
||||
# same. So which one to choose? Assume 'preferred' is the actual canonical
|
||||
# name.
|
||||
#
|
||||
# <type name="islamicc"
|
||||
# description="Civil (algorithmic) Arabic calendar"
|
||||
# deprecated="true"
|
||||
# preferred="islamic-civil"
|
||||
# alias="islamic-civil"/>
|
||||
|
||||
if preferred is not None:
|
||||
assert typeRE.match(preferred), preferred
|
||||
mapping.setdefault(extension_name, {})[name] = preferred
|
||||
|
||||
if alias is not None:
|
||||
for alias_name in alias.lower().split(" "):
|
||||
# Ignore alias entries which don't match the 'type' production.
|
||||
if typeRE.match(alias_name) is None:
|
||||
continue
|
||||
|
||||
# See comment above when 'alias' and 'preferred' are both present.
|
||||
if (preferred is not None and
|
||||
name in mapping[extension_name]):
|
||||
continue
|
||||
|
||||
# Skip over entries where 'name' and 'alias' are equal.
|
||||
#
|
||||
# <type name="pst8pdt"
|
||||
# description="POSIX style time zone for US Pacific Time"
|
||||
# alias="PST8PDT"
|
||||
# since="1.8"/>
|
||||
if name == alias_name:
|
||||
continue
|
||||
|
||||
mapping.setdefault(extension_name, {})[alias_name] = name
|
||||
|
||||
def readSupplementalMetadata(file):
|
||||
# Find subdivision and region replacements.
|
||||
#
|
||||
# <https://www.unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>
|
||||
#
|
||||
# Replace aliases in special key values:
|
||||
# - If there is an 'sd' or 'rg' key, replace any subdivision alias
|
||||
# in its value in the same way, using subdivisionAlias data.
|
||||
tree = ET.parse(file)
|
||||
for alias in tree.iterfind(".//subdivisionAlias"):
|
||||
type = alias.get("type")
|
||||
assert typeRE.match(type) is not None, (
|
||||
"{} matches the 'type' production".format(type))
|
||||
|
||||
# Take the first replacement when multiple ones are present.
|
||||
replacement = alias.get("replacement").split(" ")[0].lower()
|
||||
|
||||
# Skip over invalid replacements.
|
||||
#
|
||||
# <subdivisionAlias type="fi01" replacement="AX" reason="overlong"/>
|
||||
#
|
||||
# It's not entirely clear to me if CLDR actually wants to use
|
||||
# "axzzzz" as the replacement for this case.
|
||||
if typeRE.match(replacement) is None:
|
||||
continue
|
||||
|
||||
# 'subdivisionAlias' applies to 'rg' and 'sd' keys.
|
||||
mapping.setdefault("rg", {})[type] = replacement
|
||||
mapping.setdefault("sd", {})[type] = replacement
|
||||
|
||||
for name in core_file.namelist():
|
||||
if bcpFileRE.match(name):
|
||||
readBCP47File(core_file.open(name))
|
||||
|
||||
readSupplementalMetadata(core_file.open("common/supplemental/supplementalMetadata.xml"))
|
||||
|
||||
return mapping
|
||||
|
||||
|
||||
def writeCLDRLanguageTagData(println, data, url):
|
||||
""" Writes the language tag data to the Intl data file. """
|
||||
|
||||
println(generatedFileWarning)
|
||||
println(u"// Version: CLDR-{}".format(data["version"]))
|
||||
println(u"// URL: {}".format(url))
|
||||
|
||||
println(u"""
|
||||
#include "mozilla/Assertions.h"
|
||||
|
@ -797,6 +901,8 @@ def writeCLDRLanguageTagData(println, data, url):
|
|||
#include "util/Text.h"
|
||||
#include "vm/JSContext.h"
|
||||
|
||||
using namespace js::intl::LanguageTagLimits;
|
||||
|
||||
using ConstCharRange = mozilla::Range<const char>;
|
||||
|
||||
template <size_t Length, size_t TagLength, size_t SubtagLength>
|
||||
|
@ -839,6 +945,7 @@ static inline const char* SearchReplacement(
|
|||
complex_language_mappings = data["complexLanguageMappings"]
|
||||
region_mappings = data["regionMappings"]
|
||||
complex_region_mappings = data["complexRegionMappings"]
|
||||
unicode_mappings = data["unicodeMappings"]
|
||||
|
||||
# unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
|
||||
language_maxlength = 8
|
||||
|
@ -876,10 +983,14 @@ static inline const char* SearchReplacement(
|
|||
"Canonicalize grandfathered locale identifiers.", source,
|
||||
url)
|
||||
|
||||
writeUnicodeExtensionsMappings(println, unicode_mappings)
|
||||
|
||||
|
||||
def writeCLDRLanguageTagLikelySubtagsTest(println, data, url):
|
||||
""" Writes the likely-subtags test file. """
|
||||
|
||||
println(generatedFileWarning)
|
||||
|
||||
source = u"CLDR Supplemental Data, version {}".format(data["version"])
|
||||
language_mappings = data["languageMappings"]
|
||||
complex_language_mappings = data["complexLanguageMappings"]
|
||||
|
@ -1004,87 +1115,45 @@ if (typeof reportCompare === "function")
|
|||
|
||||
def updateCLDRLangTags(args):
|
||||
""" Update the LanguageTagGenerated.cpp file. """
|
||||
version = args.version
|
||||
url = args.url
|
||||
branch = args.branch
|
||||
revision = args.revision
|
||||
out = args.out
|
||||
files = args.files
|
||||
filename = args.file
|
||||
|
||||
url = url.replace("<VERSION>", version)
|
||||
|
||||
print("Arguments:")
|
||||
print("\tCLDR version: %s" % version)
|
||||
print("\tDownload url: %s" % url)
|
||||
print("\tBranch: %s" % branch)
|
||||
print("\tRevision: %s" % revision)
|
||||
print("\tLocal supplemental data and likely subtags: %s" % files)
|
||||
if filename is not None:
|
||||
print("\tLocal CLDR core.zip file: %s" % filename)
|
||||
print("\tOutput file: %s" % out)
|
||||
print("")
|
||||
|
||||
if files:
|
||||
if len(files) != 3:
|
||||
raise Exception("Expected three files, but got: {}".format(files))
|
||||
data = {
|
||||
"version": version,
|
||||
}
|
||||
|
||||
print(("Always make sure you have the newest ldmlSupplemental.dtd, "
|
||||
"supplementalMetadata.xml, and likelySubtags.xml!"))
|
||||
def readFiles(cldr_file):
|
||||
with ZipFile(cldr_file) as zip_file:
|
||||
data.update(readSupplementalData(zip_file))
|
||||
data["unicodeMappings"] = readUnicodeExtensions(zip_file)
|
||||
|
||||
supplemental_dtd_file = files[0]
|
||||
supplemental_metadata_file = files[1]
|
||||
likely_subtags_file = files[2]
|
||||
print("Processing CLDR data...")
|
||||
if filename is not None:
|
||||
print("Always make sure you have the newest CLDR core.zip!")
|
||||
with open(filename, "rb") as cldr_file:
|
||||
readFiles(cldr_file)
|
||||
else:
|
||||
print("Downloading CLDR supplemental data...")
|
||||
|
||||
supplemental_dtd_filename = "ldmlSupplemental.dtd"
|
||||
supplemental_dtd_path = "common/dtd/{}".format(supplemental_dtd_filename)
|
||||
supplemental_dtd_file = os.path.join(os.getcwd(), supplemental_dtd_filename)
|
||||
|
||||
supplemental_metadata_filename = "supplementalMetadata.xml"
|
||||
supplemental_metadata_path = "common/supplemental/{}".format(
|
||||
supplemental_metadata_filename)
|
||||
supplemental_metadata_file = os.path.join(os.getcwd(), supplemental_metadata_filename)
|
||||
|
||||
likely_subtags_filename = "likelySubtags.xml"
|
||||
likely_subtags_path = "common/supplemental/{}".format(likely_subtags_filename)
|
||||
likely_subtags_file = os.path.join(os.getcwd(), likely_subtags_filename)
|
||||
|
||||
# Try to download the raw file directly from GitHub if possible.
|
||||
split = urlsplit(url)
|
||||
if split.netloc == "github.com" and split.path.endswith(".git") and revision == "HEAD":
|
||||
def download(path, file):
|
||||
urlpath = "{}/raw/{}/{}".format(urlsplit(url).path[:-4], branch, path)
|
||||
raw_url = urlunsplit((split.scheme, split.netloc, urlpath, split.query,
|
||||
split.fragment))
|
||||
|
||||
with closing(urlopen(raw_url)) as reader:
|
||||
text = reader.read().decode("utf-8")
|
||||
with io.open(file, "w", encoding="utf-8") as saved_file:
|
||||
saved_file.write(text)
|
||||
|
||||
download(supplemental_dtd_path, supplemental_dtd_file)
|
||||
download(supplemental_metadata_path, supplemental_metadata_file)
|
||||
download(likely_subtags_path, likely_subtags_file)
|
||||
else:
|
||||
# Download the requested branch in a temporary directory.
|
||||
with TemporaryDirectory() as inDir:
|
||||
if revision == "HEAD":
|
||||
subprocess.check_call(["git", "clone", "--depth=1",
|
||||
"--branch=%s" % branch, url, inDir])
|
||||
else:
|
||||
subprocess.check_call(["git", "clone", "--single-branch",
|
||||
"--branch=%s" % branch, url, inDir])
|
||||
subprocess.check_call(["git", "-C", inDir, "reset", "--hard", revision])
|
||||
|
||||
shutil.copyfile(os.path.join(inDir, supplemental_dtd_path),
|
||||
supplemental_dtd_file)
|
||||
shutil.copyfile(os.path.join(inDir, supplemental_metadata_path),
|
||||
supplemental_metadata_file)
|
||||
shutil.copyfile(os.path.join(inDir, likely_subtags_path), likely_subtags_file)
|
||||
|
||||
print("Processing CLDR supplemental data...")
|
||||
data = readSupplementalData(supplemental_dtd_file,
|
||||
supplemental_metadata_file,
|
||||
likely_subtags_file)
|
||||
print("Downloading CLDR core.zip...")
|
||||
with closing(urlopen(url)) as cldr_file:
|
||||
cldr_data = io.BytesIO(cldr_file.read())
|
||||
readFiles(cldr_data)
|
||||
|
||||
print("Writing Intl data...")
|
||||
with io.open(out, mode="w", encoding="utf-8", newline="") as f:
|
||||
println = partial(print, file=f)
|
||||
|
||||
writeCLDRLanguageTagData(println, data, url)
|
||||
|
||||
print("Writing Intl test data...")
|
||||
|
@ -1095,7 +1164,6 @@ def updateCLDRLangTags(args):
|
|||
|
||||
println(u"// |reftest| skip-if(!this.hasOwnProperty('Intl')||"
|
||||
u"(!this.Intl.Locale&&!this.hasOwnProperty('addIntlExtras')))")
|
||||
println(generatedFileWarning)
|
||||
writeCLDRLanguageTagLikelySubtagsTest(println, data, url)
|
||||
|
||||
|
||||
|
@ -1948,61 +2016,8 @@ def updateCurrency(topsrcdir, args):
|
|||
updateFrom(currencyTmpFile.name)
|
||||
|
||||
|
||||
def writeUnicodeExtensionsFile(version, url, mapping, out):
|
||||
with io.open(out, mode="w", encoding="utf-8", newline="") as f:
|
||||
println = partial(print, file=f)
|
||||
|
||||
println(generatedFileWarning)
|
||||
println(u"// Version: CLDR-{}".format(version))
|
||||
println(u"// URL: {}".format(url))
|
||||
|
||||
println(u"""
|
||||
/**
|
||||
* Mapping from deprecated BCP 47 Unicode extension types to their preferred
|
||||
* values.
|
||||
*
|
||||
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
|
||||
*/""")
|
||||
println(u"var deprecatedUnicodeExtensionTypes = {")
|
||||
for ext_name in sorted(mapping):
|
||||
println(u" {}: {{".format(ext_name))
|
||||
is_first = True
|
||||
for type in sorted(mapping[ext_name]):
|
||||
mapped = mapping[ext_name][type]
|
||||
has_description = mapped["description"] is not None
|
||||
|
||||
if not is_first and has_description:
|
||||
println(u"")
|
||||
is_first = False
|
||||
|
||||
if has_description:
|
||||
println(u" // {}".format(mapped["description"]))
|
||||
println(u" \"{}\": \"{}\",".format(type, mapped["preferred"]))
|
||||
println(u" },")
|
||||
println(u"};")
|
||||
|
||||
with io.open(os.path.splitext(out)[0] + ".cpp", mode="w", encoding="utf-8", newline="") as f:
|
||||
println = partial(print, file=f)
|
||||
|
||||
println(generatedFileWarning)
|
||||
println(u"// Version: CLDR-{}".format(version))
|
||||
println(u"// URL: {}".format(url))
|
||||
|
||||
println(u"""
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Range.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#include "builtin/intl/LanguageTag.h"
|
||||
|
||||
using namespace js::intl::LanguageTagLimits;
|
||||
|
||||
using ConstCharRange = mozilla::Range<const char>;
|
||||
|
||||
def writeUnicodeExtensionsMappings(println, mapping):
|
||||
println(u"""
|
||||
template <size_t Length>
|
||||
static inline bool IsUnicodeKey(const ConstCharRange& key,
|
||||
const char (&str)[Length]) {
|
||||
|
@ -2059,9 +2074,7 @@ static inline const char* SearchReplacement(const char* (&types)[Length],
|
|||
}
|
||||
return nullptr;
|
||||
}
|
||||
""".rstrip("\n"))
|
||||
|
||||
println(u"""
|
||||
/**
|
||||
* Mapping from deprecated BCP 47 Unicode extension types to their preferred
|
||||
* values.
|
||||
|
@ -2089,256 +2102,72 @@ const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
|
|||
isAsciiLowercaseAlphanumericOrDash));
|
||||
""")
|
||||
|
||||
def to_hash_key(replacements):
|
||||
return str(sorted([str((k, v["preferred"])) for (k, v) in replacements.items()]))
|
||||
def to_hash_key(replacements):
|
||||
return str(sorted(replacements.items()))
|
||||
|
||||
def write_array(subtags, name, length):
|
||||
max_entries = (80 - len(" ")) // (length + len('"", '))
|
||||
def write_array(subtags, name, length):
|
||||
max_entries = (80 - len(" ")) // (length + len('"", '))
|
||||
|
||||
println(u" static const char* {}[{}] = {{".format(name, len(subtags)))
|
||||
println(u" static const char* {}[{}] = {{".format(name, len(subtags)))
|
||||
|
||||
for entries in grouper(subtags, max_entries):
|
||||
entries = (u"\"{}\"".format(tag).rjust(length + 2)
|
||||
for tag in entries if tag is not None)
|
||||
println(u" {},".format(u", ".join(entries)))
|
||||
for entries in grouper(subtags, max_entries):
|
||||
entries = (u"\"{}\"".format(tag).rjust(length + 2)
|
||||
for tag in entries if tag is not None)
|
||||
println(u" {},".format(u", ".join(entries)))
|
||||
|
||||
println(u" };")
|
||||
println(u" };")
|
||||
|
||||
# Merge duplicate keys.
|
||||
key_aliases = {}
|
||||
for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
|
||||
hash_key = to_hash_key(replacements)
|
||||
if hash_key not in key_aliases:
|
||||
key_aliases[hash_key] = []
|
||||
else:
|
||||
key_aliases[hash_key].append(key)
|
||||
# Merge duplicate keys.
|
||||
key_aliases = {}
|
||||
for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
|
||||
hash_key = to_hash_key(replacements)
|
||||
if hash_key not in key_aliases:
|
||||
key_aliases[hash_key] = []
|
||||
else:
|
||||
key_aliases[hash_key].append(key)
|
||||
|
||||
first_key = True
|
||||
for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
|
||||
hash_key = to_hash_key(replacements)
|
||||
if key in key_aliases[hash_key]:
|
||||
continue
|
||||
first_key = True
|
||||
for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
|
||||
hash_key = to_hash_key(replacements)
|
||||
if key in key_aliases[hash_key]:
|
||||
continue
|
||||
|
||||
cond = (u"IsUnicodeKey(key, \"{}\")".format(k) for k in [key] + key_aliases[hash_key])
|
||||
cond = (u"IsUnicodeKey(key, \"{}\")".format(k) for k in [key] + key_aliases[hash_key])
|
||||
|
||||
if_kind = u"if" if first_key else u"else if"
|
||||
cond = (u" ||\n" + u" " * (2 + len(if_kind) + 2)).join(cond)
|
||||
println(u"""
|
||||
if_kind = u"if" if first_key else u"else if"
|
||||
cond = (u" ||\n" + u" " * (2 + len(if_kind) + 2)).join(cond)
|
||||
println(u"""
|
||||
{} ({}) {{""".format(if_kind, cond).strip("\n"))
|
||||
first_key = False
|
||||
first_key = False
|
||||
|
||||
replacements = sorted(replacements.items(), key=itemgetter(0))
|
||||
replacements = sorted(replacements.items(), key=itemgetter(0))
|
||||
|
||||
if len(replacements) > 4:
|
||||
types = [t for (t, _) in replacements]
|
||||
preferred = [r["preferred"] for (_, r) in replacements]
|
||||
max_len = max(len(k) for k in types + preferred)
|
||||
if len(replacements) > 4:
|
||||
types = [t for (t, _) in replacements]
|
||||
preferred = [r for (_, r) in replacements]
|
||||
max_len = max(len(k) for k in types + preferred)
|
||||
|
||||
write_array(types, "types", max_len)
|
||||
write_array(preferred, "aliases", max_len)
|
||||
println(u"""
|
||||
write_array(types, "types", max_len)
|
||||
write_array(preferred, "aliases", max_len)
|
||||
println(u"""
|
||||
return SearchReplacement(types, aliases, type);
|
||||
""".strip("\n"))
|
||||
else:
|
||||
for (type, replacement) in replacements:
|
||||
println(u"""
|
||||
else:
|
||||
for (type, replacement) in replacements:
|
||||
println(u"""
|
||||
if (IsUnicodeType(type, "{}")) {{
|
||||
return "{}";
|
||||
}}""".format(type, replacement["preferred"]).strip("\n"))
|
||||
|
||||
println(u"""
|
||||
}""".lstrip("\n"))
|
||||
}}""".format(type, replacement).strip("\n"))
|
||||
|
||||
println(u"""
|
||||
}""".lstrip("\n"))
|
||||
|
||||
println(u"""
|
||||
return nullptr;
|
||||
}
|
||||
""".strip("\n"))
|
||||
|
||||
|
||||
def updateUnicodeExtensions(args):
|
||||
""" Update the UnicodeExtensionsGenerated.js file. """
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
version = args.version
|
||||
url = args.url
|
||||
out = args.out
|
||||
filename = args.file
|
||||
|
||||
url = url.replace("<VERSION>", version)
|
||||
|
||||
print("Arguments:")
|
||||
print("\tCLDR version: %s" % version)
|
||||
print("\tDownload url: %s" % url)
|
||||
if filename is not None:
|
||||
print("\tLocal CLDR core.zip file: %s" % filename)
|
||||
print("\tOutput file: %s" % out)
|
||||
print("")
|
||||
|
||||
def updateFrom(data):
|
||||
# Match all xml-files in the BCP 47 directory.
|
||||
bcpFileRE = re.compile(r"^common/bcp47/.+\.xml$")
|
||||
|
||||
# https://www.unicode.org/reports/tr35/#Unicode_locale_identifier
|
||||
#
|
||||
# type = alphanum{3,8} (sep alphanum{3,8})* ;
|
||||
typeRE = re.compile(r"^[a-z0-9]{3,8}(-[a-z0-9]{3,8})*$")
|
||||
|
||||
# Mapping from Unicode extension types to dict of deprecated to
|
||||
# preferred values.
|
||||
mapping = {}
|
||||
|
||||
with ZipFile(data) as zip_file:
|
||||
for name in zip_file.namelist():
|
||||
if not bcpFileRE.match(name):
|
||||
continue
|
||||
|
||||
tree = ET.parse(zip_file.open(name))
|
||||
for keyword in tree.iterfind(".//keyword/key"):
|
||||
# Skip over keywords whose extension is not "u".
|
||||
if keyword.get("extension", "u") != "u":
|
||||
continue
|
||||
|
||||
extension_name = keyword.get("name")
|
||||
|
||||
for type in keyword.iterfind("type"):
|
||||
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
|
||||
#
|
||||
# The key or type name used by Unicode locale extension with 'u' extension
|
||||
# syntax or the 't' extensions syntax. When alias below is absent, this
|
||||
# name can be also used with the old style "@key=type" syntax.
|
||||
name = type.get("name")
|
||||
|
||||
# Ignore the special name:
|
||||
# - <https://unicode.org/reports/tr35/#CODEPOINTS>
|
||||
# - <https://unicode.org/reports/tr35/#REORDER_CODE>
|
||||
# - <https://unicode.org/reports/tr35/#RG_KEY_VALUE>
|
||||
# - <https://unicode.org/reports/tr35/#SUBDIVISION_CODE>
|
||||
# - <https://unicode.org/reports/tr35/#PRIVATE_USE>
|
||||
if name in ("CODEPOINTS", "REORDER_CODE", "RG_KEY_VALUE",
|
||||
"SUBDIVISION_CODE", "PRIVATE_USE"):
|
||||
continue
|
||||
|
||||
# All other names should match the 'type' production.
|
||||
assert typeRE.match(name) is not None, (
|
||||
"{} matches the 'type' production".format(name))
|
||||
|
||||
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
|
||||
#
|
||||
# The preferred value of the deprecated key, type or attribute element.
|
||||
# When a key, type or attribute element is deprecated, this attribute is
|
||||
# used for specifying a new canonical form if available.
|
||||
preferred = type.get("preferred")
|
||||
|
||||
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
|
||||
#
|
||||
# The BCP 47 form is the canonical form, and recommended. Other aliases are
|
||||
# included only for backwards compatibility.
|
||||
alias = type.get("alias")
|
||||
|
||||
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
|
||||
#
|
||||
# The description of the key, type or attribute element.
|
||||
description = type.get("description")
|
||||
|
||||
# <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>
|
||||
#
|
||||
# Use the bcp47 data to replace keys, types, tfields, and tvalues by their
|
||||
# canonical forms. See Section 3.6.4 U Extension Data Files) and Section
|
||||
# 3.7.1 T Extension Data Files. The aliases are in the alias attribute
|
||||
# value, while the canonical is in the name attribute value.
|
||||
|
||||
# 'preferred' contains the new preferred name, 'alias' the compatibility
|
||||
# name, but then there's this entry where 'preferred' and 'alias' are the
|
||||
# same. So which one to choose? Assume 'preferred' is the actual canonical
|
||||
# name.
|
||||
#
|
||||
# <type name="islamicc"
|
||||
# description="Civil (algorithmic) Arabic calendar"
|
||||
# deprecated="true"
|
||||
# preferred="islamic-civil"
|
||||
# alias="islamic-civil"/>
|
||||
|
||||
if preferred is not None:
|
||||
assert typeRE.match(preferred), preferred
|
||||
mapping.setdefault(extension_name, {})[name] = {
|
||||
"preferred": preferred,
|
||||
"description": description,
|
||||
}
|
||||
|
||||
if alias is not None:
|
||||
for alias_name in alias.lower().split(" "):
|
||||
# Ignore alias entries which don't match the 'type' production.
|
||||
if typeRE.match(alias_name) is None:
|
||||
continue
|
||||
|
||||
# See comment above when 'alias' and 'preferred' are both present.
|
||||
if (preferred is not None and
|
||||
name in mapping[extension_name]):
|
||||
continue
|
||||
|
||||
# Skip over entries where 'name' and 'alias' are equal.
|
||||
#
|
||||
# <type name="pst8pdt"
|
||||
# description="POSIX style time zone for US Pacific Time"
|
||||
# alias="PST8PDT"
|
||||
# since="1.8"/>
|
||||
if name == alias_name:
|
||||
continue
|
||||
|
||||
mapping.setdefault(extension_name, {})[alias_name] = {
|
||||
"preferred": name,
|
||||
"description": description,
|
||||
}
|
||||
|
||||
# Find subdivision and region replacements.
|
||||
#
|
||||
# <https://www.unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>
|
||||
#
|
||||
# Replace aliases in special key values:
|
||||
# - If there is an 'sd' or 'rg' key, replace any subdivision alias
|
||||
# in its value in the same way, using subdivisionAlias data.
|
||||
tree = ET.parse(zip_file.open("common/supplemental/supplementalMetadata.xml"))
|
||||
for alias in tree.iterfind(".//subdivisionAlias"):
|
||||
type = alias.get("type")
|
||||
assert typeRE.match(type) is not None, (
|
||||
"{} matches the 'type' production".format(type))
|
||||
|
||||
# Take the first replacement when multiple ones are present.
|
||||
replacement = alias.get("replacement").split(" ")[0].lower()
|
||||
|
||||
# Skip over invalid replacements.
|
||||
#
|
||||
# <subdivisionAlias type="fi01" replacement="AX" reason="overlong"/>
|
||||
#
|
||||
# It's not entirely clear to me if CLDR actually wants to use
|
||||
# "axzzzz" as the replacement for this case.
|
||||
if typeRE.match(replacement) is None:
|
||||
continue
|
||||
|
||||
# 'subdivisionAlias' applies to 'rg' and 'sd' keys.
|
||||
mapping.setdefault("rg", {})[type] = {
|
||||
"preferred": replacement,
|
||||
"description": None,
|
||||
}
|
||||
mapping.setdefault("sd", {})[type] = {
|
||||
"preferred": replacement,
|
||||
"description": None,
|
||||
}
|
||||
|
||||
writeUnicodeExtensionsFile(version, url, mapping, out)
|
||||
|
||||
if filename is not None:
|
||||
print("Always make sure you have the newest CLDR core.zip!")
|
||||
with open(filename, "rb") as cldr_file:
|
||||
updateFrom(cldr_file)
|
||||
else:
|
||||
print("Downloading CLDR core.zip...")
|
||||
with closing(urlopen(url)) as cldr_file:
|
||||
cldr_data = io.BytesIO(cldr_file.read())
|
||||
updateFrom(cldr_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
|
@ -2359,21 +2188,21 @@ if __name__ == "__main__":
|
|||
|
||||
parser_cldr_tags = subparsers.add_parser("langtags",
|
||||
help="Update CLDR language tags data")
|
||||
parser_cldr_tags.add_argument("--version",
|
||||
metavar="VERSION",
|
||||
required=True,
|
||||
help="CLDR version number")
|
||||
parser_cldr_tags.add_argument("--url",
|
||||
metavar="URL",
|
||||
default="https://github.com/unicode-org/cldr.git",
|
||||
help="URL to git repository (default: %(default)s)")
|
||||
parser_cldr_tags.add_argument("--branch", default="latest",
|
||||
help="Git branch (default: %(default)s)")
|
||||
parser_cldr_tags.add_argument("--revision", default="HEAD",
|
||||
help="Git revision (default: %(default)s)")
|
||||
default="https://unicode.org/Public/cldr/<VERSION>/core.zip",
|
||||
type=EnsureHttps,
|
||||
help="Download url CLDR data (default: %(default)s)")
|
||||
parser_cldr_tags.add_argument("--out",
|
||||
default="LanguageTagGenerated.cpp",
|
||||
help="Output file (default: %(default)s)")
|
||||
parser_cldr_tags.add_argument("files",
|
||||
nargs="*",
|
||||
help="Local ldmlSupplemental.dtd, supplementalMetadata.xml, "
|
||||
"and likelySubtags.xml files, if omitted uses <URL>")
|
||||
parser_cldr_tags.add_argument("file",
|
||||
nargs="?",
|
||||
help="Local cldr-core.zip file, if omitted uses <URL>")
|
||||
parser_cldr_tags.set_defaults(func=updateCLDRLangTags)
|
||||
|
||||
parser_tz = subparsers.add_parser("tzdata", help="Update tzdata")
|
||||
|
@ -2409,23 +2238,5 @@ if __name__ == "__main__":
|
|||
help="Local currency code list file, if omitted uses <URL>")
|
||||
parser_currency.set_defaults(func=partial(updateCurrency, topsrcdir))
|
||||
|
||||
parser_unicode_ext = subparsers.add_parser("unicode-ext", help="Update Unicode extensions")
|
||||
parser_unicode_ext.add_argument("--version",
|
||||
metavar="VERSION",
|
||||
required=True,
|
||||
help="CLDR version number")
|
||||
parser_unicode_ext.add_argument("--url",
|
||||
metavar="URL",
|
||||
default="https://unicode.org/Public/cldr/<VERSION>/core.zip",
|
||||
type=EnsureHttps,
|
||||
help="Download url CLDR data (default: %(default)s)")
|
||||
parser_unicode_ext.add_argument("--out",
|
||||
default="UnicodeExtensionsGenerated.js",
|
||||
help="Output file (default: %(default)s)")
|
||||
parser_unicode_ext.add_argument("file",
|
||||
nargs="?",
|
||||
help="Local cldr-core.zip file, if omitted uses <URL>")
|
||||
parser_unicode_ext.set_defaults(func=updateUnicodeExtensions)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.func(args)
|
||||
|
|
|
@ -387,7 +387,6 @@ if CONFIG['ENABLE_INTL_API']:
|
|||
'builtin/intl/PluralRules.cpp',
|
||||
'builtin/intl/RelativeTimeFormat.cpp',
|
||||
'builtin/intl/SharedIntlData.cpp',
|
||||
'builtin/intl/UnicodeExtensionsGenerated.cpp',
|
||||
]
|
||||
|
||||
if CONFIG['MOZ_INSTRUMENTS']:
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
// Extracted from likelySubtags.xml.
|
||||
// Derived from CLDR Supplemental Data, version 35.1.
|
||||
// https://github.com/unicode-org/cldr.git
|
||||
// https://unicode.org/Public/cldr/35.1/core.zip
|
||||
var maxLikelySubtags = {
|
||||
"aa": "aa-Latn-ET",
|
||||
"aai": "aai-Latn-ZZ",
|
||||
|
@ -1842,7 +1842,7 @@ var maxLikelySubtags = {
|
|||
|
||||
// Extracted from likelySubtags.xml.
|
||||
// Derived from CLDR Supplemental Data, version 35.1.
|
||||
// https://github.com/unicode-org/cldr.git
|
||||
// https://unicode.org/Public/cldr/35.1/core.zip
|
||||
var minLikelySubtags = {
|
||||
"aa-Latn-DJ": "aa-DJ",
|
||||
"aa-Latn-ET": "aa",
|
||||
|
|
Загрузка…
Ссылка в новой задаче