Bug 1570370 - Part 9: Move UnicodeExtensionsGenerated.cpp into LanguageTagGenerated.cpp. r=jwalden

Differential Revision: https://phabricator.services.mozilla.com/D40075

--HG--
extra : moz-landing-system : lando
This commit is contained in:
André Bargull 2019-10-11 19:26:33 +00:00
Родитель b293863ab3
Коммит c5404a4adc
6 изменённых файлов: 427 добавлений и 631 удалений

Просмотреть файл

@ -6,7 +6,6 @@ config/msvc-stl-wrapper.template.h
# Generated code
js/src/builtin/intl/LanguageTagGenerated.cpp
js/src/builtin/intl/TimeZoneDataGenerated.h
js/src/builtin/intl/UnicodeExtensionsGenerated.cpp
# Don't want to reformat irregexp. bug 1510128
js/src/irregexp/.*

Просмотреть файл

@ -1,4 +1,6 @@
// Generated by make_intl_data.py. DO NOT EDIT.
// Version: CLDR-35.1
// URL: https://unicode.org/Public/cldr/35.1/core.zip
#include "mozilla/Assertions.h"
#include "mozilla/Range.h"
@ -14,6 +16,8 @@
#include "util/Text.h"
#include "vm/JSContext.h"
using namespace js::intl::LanguageTagLimits;
using ConstCharRange = mozilla::Range<const char>;
template <size_t Length, size_t TagLength, size_t SubtagLength>
@ -51,7 +55,7 @@ static inline const char* SearchReplacement(
// Mappings from language subtags to preferred values.
// Derived from CLDR Supplemental Data, version 35.1.
// https://github.com/unicode-org/cldr.git
// https://unicode.org/Public/cldr/35.1/core.zip
bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
@ -156,7 +160,7 @@ bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
// Language subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 35.1.
// https://github.com/unicode-org/cldr.git
// https://unicode.org/Public/cldr/35.1/core.zip
bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& language) {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
@ -177,7 +181,7 @@ bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& languag
// Mappings from region subtags to preferred values.
// Derived from CLDR Supplemental Data, version 35.1.
// https://github.com/unicode-org/cldr.git
// https://unicode.org/Public/cldr/35.1/core.zip
bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
MOZ_ASSERT(IsStructurallyValidRegionTag(region.range()));
@ -276,7 +280,7 @@ bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
// Region subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 35.1.
// https://github.com/unicode-org/cldr.git
// https://unicode.org/Public/cldr/35.1/core.zip
bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) {
MOZ_ASSERT(IsStructurallyValidRegionTag(region.range()));
@ -298,7 +302,7 @@ bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) {
// Language subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 35.1.
// https://github.com/unicode-org/cldr.git
// https://unicode.org/Public/cldr/35.1/core.zip
void js::intl::LanguageTag::performComplexLanguageMappings() {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
@ -333,7 +337,7 @@ void js::intl::LanguageTag::performComplexLanguageMappings() {
// Region subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 35.1.
// https://github.com/unicode-org/cldr.git
// https://unicode.org/Public/cldr/35.1/core.zip
void js::intl::LanguageTag::performComplexRegionMappings() {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
MOZ_ASSERT(IsStructurallyValidRegionTag(region().range()));
@ -530,7 +534,7 @@ void js::intl::LanguageTag::performComplexRegionMappings() {
// Canonicalize grandfathered locale identifiers.
// Derived from CLDR Supplemental Data, version 35.1.
// https://github.com/unicode-org/cldr.git
// https://unicode.org/Public/cldr/35.1/core.zip
bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
// We're mapping regular grandfathered tags to non-grandfathered form here.
// Other tags remain unchanged.
@ -613,3 +617,174 @@ bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
return true;
}
template <size_t Length>
static inline bool IsUnicodeKey(const ConstCharRange& key,
const char (&str)[Length]) {
static_assert(Length == UnicodeKeyLength + 1,
"Unicode extension key is two characters long");
return memcmp(key.begin().get(), str, Length - 1) == 0;
}
template <size_t Length>
static inline bool IsUnicodeType(const ConstCharRange& type,
const char (&str)[Length]) {
static_assert(Length > UnicodeKeyLength + 1,
"Unicode extension type contains more than two characters");
return type.length() == (Length - 1) &&
memcmp(type.begin().get(), str, Length - 1) == 0;
}
static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
#ifdef DEBUG
auto isNull = [](char c) {
return c == '\0';
};
#endif
MOZ_ASSERT(std::none_of(b.begin().get(), b.end().get(), isNull),
"unexpected null-character in string");
using UnsignedChar = unsigned char;
for (size_t i = 0; i < b.length(); i++) {
// |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
// we've reached the end of |a|, the below if-statement will always be true.
// That ensures we don't read past the end of |a|.
if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {
return r;
}
}
// Return zero if both strings are equal or a negative number if |b| is a
// prefix of |a|.
return -int32_t(UnsignedChar(a[b.length()]));
};
template <size_t Length>
static inline const char* SearchReplacement(const char* (&types)[Length],
const char* (&aliases)[Length],
const ConstCharRange& type) {
auto p = std::lower_bound(std::begin(types), std::end(types), type,
[](const auto& a, const auto& b) {
return CompareUnicodeType(a, b) < 0;
});
if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
return aliases[std::distance(std::begin(types), p)];
}
return nullptr;
}
/**
* Mapping from deprecated BCP 47 Unicode extension types to their preferred
* values.
*
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
*/
const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
const ConstCharRange& key, const ConstCharRange& type) {
#ifdef DEBUG
static auto isAsciiLowercaseAlphanumeric = [](char c) {
return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
};
static auto isAsciiLowercaseAlphanumericOrDash = [](char c) {
return isAsciiLowercaseAlphanumeric(c) || c == '-';
};
#endif
MOZ_ASSERT(key.length() == UnicodeKeyLength);
MOZ_ASSERT(std::all_of(key.begin().get(), key.end().get(),
isAsciiLowercaseAlphanumeric));
MOZ_ASSERT(type.length() > UnicodeKeyLength);
MOZ_ASSERT(std::all_of(type.begin().get(), type.end().get(),
isAsciiLowercaseAlphanumericOrDash));
if (IsUnicodeKey(key, "ca")) {
if (IsUnicodeType(type, "ethiopic-amete-alem")) {
return "ethioaa";
}
if (IsUnicodeType(type, "islamicc")) {
return "islamic-civil";
}
}
else if (IsUnicodeKey(key, "kb") ||
IsUnicodeKey(key, "kc") ||
IsUnicodeKey(key, "kh") ||
IsUnicodeKey(key, "kk") ||
IsUnicodeKey(key, "kn")) {
if (IsUnicodeType(type, "yes")) {
return "true";
}
}
else if (IsUnicodeKey(key, "ks")) {
if (IsUnicodeType(type, "primary")) {
return "level1";
}
if (IsUnicodeType(type, "tertiary")) {
return "level3";
}
}
else if (IsUnicodeKey(key, "ms")) {
if (IsUnicodeType(type, "imperial")) {
return "uksystem";
}
}
else if (IsUnicodeKey(key, "rg") ||
IsUnicodeKey(key, "sd")) {
static const char* types[116] = {
"cn11", "cn12", "cn13", "cn14", "cn15", "cn21", "cn22", "cn23",
"cn31", "cn32", "cn33", "cn34", "cn35", "cn36", "cn37", "cn41",
"cn42", "cn43", "cn44", "cn45", "cn46", "cn50", "cn51", "cn52",
"cn53", "cn54", "cn61", "cn62", "cn63", "cn64", "cn65", "cz10a",
"cz10b", "cz10c", "cz10d", "cz10e", "cz10f", "cz611", "cz612", "cz613",
"cz614", "cz615", "cz621", "cz622", "cz623", "cz624", "cz626", "cz627",
"czjc", "czjm", "czka", "czkr", "czli", "czmo", "czol", "czpa",
"czpl", "czpr", "czst", "czus", "czvy", "czzl", "fra", "frb",
"frc", "frd", "fre", "frf", "frg", "frh", "fri", "frj",
"frk", "frl", "frm", "frn", "fro", "frp", "frq", "frr",
"frs", "frt", "fru", "frv", "laxn", "lud", "lug", "lul",
"mrnkc", "nzn", "nzs", "omba", "omsh", "plds", "plkp", "pllb",
"plld", "pllu", "plma", "plmz", "plop", "plpd", "plpk", "plpm",
"plsk", "plsl", "plwn", "plwp", "plzp", "tteto", "ttrcm", "ttwto",
"twkhq", "twtnq", "twtpq", "twtxq",
};
static const char* aliases[116] = {
"cnbj", "cntj", "cnhe", "cnsx", "cnmn", "cnln", "cnjl", "cnhl",
"cnsh", "cnjs", "cnzj", "cnah", "cnfj", "cnjx", "cnsd", "cnha",
"cnhb", "cnhn", "cngd", "cngx", "cnhi", "cncq", "cnsc", "cngz",
"cnyn", "cnxz", "cnsn", "cngs", "cnqh", "cnnx", "cnxj", "cz110",
"cz111", "cz112", "cz113", "cz114", "cz115", "cz663", "cz632", "cz633",
"cz634", "cz635", "cz641", "cz642", "cz643", "cz644", "cz646", "cz647",
"cz31", "cz64", "cz41", "cz52", "cz51", "cz80", "cz71", "cz53",
"cz32", "cz10", "cz20", "cz42", "cz63", "cz72", "frges", "frnaq",
"frara", "frbfc", "frbre", "frcvl", "frges", "frcor", "frbfc", "fridf",
"frocc", "frnaq", "frges", "frocc", "frhdf", "frnor", "frnor", "frpdl",
"frhdf", "frnaq", "frpac", "frara", "laxs", "lucl", "luec", "luca",
"mr13", "nzauk", "nzcan", "ombj", "omsj", "pl02", "pl04", "pl08",
"pl10", "pl06", "pl12", "pl14", "pl16", "pl20", "pl18", "pl22",
"pl26", "pl24", "pl28", "pl30", "pl32", "tttob", "ttmrc", "tttob",
"twkhh", "twtnn", "twnwt", "twtxg",
};
return SearchReplacement(types, aliases, type);
}
else if (IsUnicodeKey(key, "tz")) {
static const char* types[28] = {
"aqams", "cnckg", "cnhrb", "cnkhg", "cuba", "egypt",
"eire", "est", "gmt0", "hongkong", "hst", "iceland",
"iran", "israel", "jamaica", "japan", "libya", "mst",
"navajo", "poland", "portugal", "prc", "roc", "rok",
"turkey", "uct", "usnavajo", "zulu",
};
static const char* aliases[28] = {
"nzakl", "cnsha", "cnsha", "cnurc", "cuhav", "egcai",
"iedub", "utcw05", "gmt", "hkhkg", "utcw10", "isrey",
"irthr", "jeruslm", "jmkin", "jptyo", "lytip", "utcw07",
"usden", "plwaw", "ptlis", "cnsha", "twtpe", "krsel",
"trist", "utc", "usden", "utc",
};
return SearchReplacement(types, aliases, type);
}
return nullptr;
}

Просмотреть файл

@ -1,188 +0,0 @@
// Generated by make_intl_data.py. DO NOT EDIT.
// Version: CLDR-35.1
// URL: https://unicode.org/Public/cldr/35.1/core.zip
#include "mozilla/Assertions.h"
#include "mozilla/Range.h"
#include "mozilla/TextUtils.h"
#include <algorithm>
#include <cstdint>
#include <cstring>
#include "builtin/intl/LanguageTag.h"
using namespace js::intl::LanguageTagLimits;
using ConstCharRange = mozilla::Range<const char>;
template <size_t Length>
static inline bool IsUnicodeKey(const ConstCharRange& key,
const char (&str)[Length]) {
static_assert(Length == UnicodeKeyLength + 1,
"Unicode extension key is two characters long");
return memcmp(key.begin().get(), str, Length - 1) == 0;
}
template <size_t Length>
static inline bool IsUnicodeType(const ConstCharRange& type,
const char (&str)[Length]) {
static_assert(Length > UnicodeKeyLength + 1,
"Unicode extension type contains more than two characters");
return type.length() == (Length - 1) &&
memcmp(type.begin().get(), str, Length - 1) == 0;
}
static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
#ifdef DEBUG
auto isNull = [](char c) {
return c == '\0';
};
#endif
MOZ_ASSERT(std::none_of(b.begin().get(), b.end().get(), isNull),
"unexpected null-character in string");
using UnsignedChar = unsigned char;
for (size_t i = 0; i < b.length(); i++) {
// |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
// we've reached the end of |a|, the below if-statement will always be true.
// That ensures we don't read past the end of |a|.
if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {
return r;
}
}
// Return zero if both strings are equal or a negative number if |b| is a
// prefix of |a|.
return -int32_t(UnsignedChar(a[b.length()]));
};
template <size_t Length>
static inline const char* SearchReplacement(const char* (&types)[Length],
const char* (&aliases)[Length],
const ConstCharRange& type) {
auto p = std::lower_bound(std::begin(types), std::end(types), type,
[](const auto& a, const auto& b) {
return CompareUnicodeType(a, b) < 0;
});
if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
return aliases[std::distance(std::begin(types), p)];
}
return nullptr;
}
/**
* Mapping from deprecated BCP 47 Unicode extension types to their preferred
* values.
*
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
*/
const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
const ConstCharRange& key, const ConstCharRange& type) {
#ifdef DEBUG
static auto isAsciiLowercaseAlphanumeric = [](char c) {
return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
};
static auto isAsciiLowercaseAlphanumericOrDash = [](char c) {
return isAsciiLowercaseAlphanumeric(c) || c == '-';
};
#endif
MOZ_ASSERT(key.length() == UnicodeKeyLength);
MOZ_ASSERT(std::all_of(key.begin().get(), key.end().get(),
isAsciiLowercaseAlphanumeric));
MOZ_ASSERT(type.length() > UnicodeKeyLength);
MOZ_ASSERT(std::all_of(type.begin().get(), type.end().get(),
isAsciiLowercaseAlphanumericOrDash));
if (IsUnicodeKey(key, "ca")) {
if (IsUnicodeType(type, "ethiopic-amete-alem")) {
return "ethioaa";
}
if (IsUnicodeType(type, "islamicc")) {
return "islamic-civil";
}
}
else if (IsUnicodeKey(key, "kb") ||
IsUnicodeKey(key, "kc") ||
IsUnicodeKey(key, "kh") ||
IsUnicodeKey(key, "kk") ||
IsUnicodeKey(key, "kn")) {
if (IsUnicodeType(type, "yes")) {
return "true";
}
}
else if (IsUnicodeKey(key, "ks")) {
if (IsUnicodeType(type, "primary")) {
return "level1";
}
if (IsUnicodeType(type, "tertiary")) {
return "level3";
}
}
else if (IsUnicodeKey(key, "ms")) {
if (IsUnicodeType(type, "imperial")) {
return "uksystem";
}
}
else if (IsUnicodeKey(key, "rg") ||
IsUnicodeKey(key, "sd")) {
static const char* types[116] = {
"cn11", "cn12", "cn13", "cn14", "cn15", "cn21", "cn22", "cn23",
"cn31", "cn32", "cn33", "cn34", "cn35", "cn36", "cn37", "cn41",
"cn42", "cn43", "cn44", "cn45", "cn46", "cn50", "cn51", "cn52",
"cn53", "cn54", "cn61", "cn62", "cn63", "cn64", "cn65", "cz10a",
"cz10b", "cz10c", "cz10d", "cz10e", "cz10f", "cz611", "cz612", "cz613",
"cz614", "cz615", "cz621", "cz622", "cz623", "cz624", "cz626", "cz627",
"czjc", "czjm", "czka", "czkr", "czli", "czmo", "czol", "czpa",
"czpl", "czpr", "czst", "czus", "czvy", "czzl", "fra", "frb",
"frc", "frd", "fre", "frf", "frg", "frh", "fri", "frj",
"frk", "frl", "frm", "frn", "fro", "frp", "frq", "frr",
"frs", "frt", "fru", "frv", "laxn", "lud", "lug", "lul",
"mrnkc", "nzn", "nzs", "omba", "omsh", "plds", "plkp", "pllb",
"plld", "pllu", "plma", "plmz", "plop", "plpd", "plpk", "plpm",
"plsk", "plsl", "plwn", "plwp", "plzp", "tteto", "ttrcm", "ttwto",
"twkhq", "twtnq", "twtpq", "twtxq",
};
static const char* aliases[116] = {
"cnbj", "cntj", "cnhe", "cnsx", "cnmn", "cnln", "cnjl", "cnhl",
"cnsh", "cnjs", "cnzj", "cnah", "cnfj", "cnjx", "cnsd", "cnha",
"cnhb", "cnhn", "cngd", "cngx", "cnhi", "cncq", "cnsc", "cngz",
"cnyn", "cnxz", "cnsn", "cngs", "cnqh", "cnnx", "cnxj", "cz110",
"cz111", "cz112", "cz113", "cz114", "cz115", "cz663", "cz632", "cz633",
"cz634", "cz635", "cz641", "cz642", "cz643", "cz644", "cz646", "cz647",
"cz31", "cz64", "cz41", "cz52", "cz51", "cz80", "cz71", "cz53",
"cz32", "cz10", "cz20", "cz42", "cz63", "cz72", "frges", "frnaq",
"frara", "frbfc", "frbre", "frcvl", "frges", "frcor", "frbfc", "fridf",
"frocc", "frnaq", "frges", "frocc", "frhdf", "frnor", "frnor", "frpdl",
"frhdf", "frnaq", "frpac", "frara", "laxs", "lucl", "luec", "luca",
"mr13", "nzauk", "nzcan", "ombj", "omsj", "pl02", "pl04", "pl08",
"pl10", "pl06", "pl12", "pl14", "pl16", "pl20", "pl18", "pl22",
"pl26", "pl24", "pl28", "pl30", "pl32", "tttob", "ttmrc", "tttob",
"twkhh", "twtnn", "twnwt", "twtxg",
};
return SearchReplacement(types, aliases, type);
}
else if (IsUnicodeKey(key, "tz")) {
static const char* types[28] = {
"aqams", "cnckg", "cnhrb", "cnkhg", "cuba", "egypt",
"eire", "est", "gmt0", "hongkong", "hst", "iceland",
"iran", "israel", "jamaica", "japan", "libya", "mst",
"navajo", "poland", "portugal", "prc", "roc", "rok",
"turkey", "uct", "usnavajo", "zulu",
};
static const char* aliases[28] = {
"nzakl", "cnsha", "cnsha", "cnurc", "cuhav", "egcai",
"iedub", "utcw05", "gmt", "hkhkg", "utcw10", "isrey",
"irthr", "jeruslm", "jmkin", "jptyo", "lytip", "utcw07",
"usden", "plwaw", "ptlis", "cnsha", "twtpe", "krsel",
"trist", "utc", "usden", "utc",
};
return SearchReplacement(types, aliases, type);
}
return nullptr;
}

Просмотреть файл

@ -6,17 +6,16 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
""" Usage:
make_intl_data.py langtags [ldmlSupplemental.dtd supplementalMetadata.xml likelySubtags.xml]
make_intl_data.py langtags [cldr_core.zip]
make_intl_data.py tzdata
make_intl_data.py currency
make_intl_data.py unicode-ext
Target "langtags":
This script extracts information about mappings between deprecated and
current Unicode BCP 47 locale identifiers from CLDR and converts it to C++
mapping code in LanguageTagGenerated.cpp. The code is used in
LanguageTag.cpp.
This script extracts information about 1) mappings between deprecated and
current Unicode BCP 47 locale identifiers, and 2) deprecated and current
BCP 47 Unicode extension value from CLDR, and converts it to C++ mapping
code in LanguageTagGenerated.cpp. The code is used in LanguageTag.cpp.
Target "tzdata":
@ -27,19 +26,12 @@
Target "currency":
Generates the mapping from currency codes to decimal digits used for them.
Target "unicode-ext":
Generates the mapping from deprecated BCP 47 Unicode extension values to
their preferred values.
"""
from __future__ import print_function
import contextlib
import os
import re
import io
import shutil
import subprocess
import sys
import tarfile
import tempfile
@ -53,11 +45,11 @@ if sys.version_info.major == 2:
from itertools import ifilter as filter, ifilterfalse as filterfalse, imap as map,\
izip_longest as zip_longest
from urllib2 import urlopen, Request as UrlRequest
from urlparse import urlsplit, urlunsplit
from urlparse import urlsplit
else:
from itertools import filterfalse, zip_longest
from urllib.request import urlopen, Request as UrlRequest
from urllib.parse import urlsplit, urlunsplit
from urllib.parse import urlsplit
# From https://docs.python.org/3/library/itertools.html
@ -87,21 +79,8 @@ def writeMappingsVar(println, mapping, name, description, source, url):
println(u"")
writeMappingHeader(println, description, source, url)
println(u"var {0} = {{".format(name))
for key in sorted(mapping):
if not isinstance(mapping[key], dict):
value = mapping[key]
if isinstance(value, bool):
value = "true" if value else "false"
else:
value = '"{0}"'.format(value)
else:
preferred = mapping[key]["preferred"]
prefix = mapping[key]["prefix"]
if key != preferred:
raise Exception(
"Expected '{0}' matches preferred locale '{1}'".format(key, preferred))
value = '"{0}"'.format(prefix)
println(u' "{0}": {1},'.format(key, value))
for (key, value) in sorted(mapping.items(), key=itemgetter(0)):
println(u' "{0}": "{1}",'.format(key, value))
println(u"};")
@ -518,16 +497,7 @@ bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
}""")
@contextlib.contextmanager
def TemporaryDirectory():
tmpDir = tempfile.mkdtemp()
try:
yield tmpDir
finally:
shutil.rmtree(tmpDir)
def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, likely_subtags_file):
def readSupplementalData(core_file):
""" Reads CLDR Supplemental Data and extracts information for Intl.js.
Information extracted:
@ -542,15 +512,6 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
"""
import xml.etree.ElementTree as ET
# <!ATTLIST version cldrVersion CDATA #FIXED "36" >
re_cldr_version = re.compile(
r"""<!ATTLIST version cldrVersion CDATA #FIXED "(?P<version>[\d|\.]+)" >""")
with io.open(supplemental_dtd_file, mode="r", encoding="utf-8") as f:
version_match = re_cldr_version.search(f.read())
assert version_match is not None, "CLDR version string not found"
cldr_version = version_match.group("version")
# From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>.
re_unicode_language_id = re.compile(
r"""
@ -656,7 +617,7 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
script.title() if script else None,
region.upper() if region else None)
tree = ET.parse(supplemental_metadata_file)
tree = ET.parse(core_file.open("common/supplemental/supplementalMetadata.xml"))
for language_alias in tree.iterfind(".//languageAlias"):
type = bcp47_id(language_alias.get("type"))
@ -706,7 +667,7 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
), "{} invalid region subtags".format(replacement)
complex_region_mappings[type] = replacements
tree = ET.parse(likely_subtags_file)
tree = ET.parse(core_file.open("common/supplemental/likelySubtags.xml"))
likely_subtags = {}
@ -767,8 +728,7 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
else:
region_mappings[deprecated_region] = default
return {"version": cldr_version,
"grandfatheredMappings": grandfathered_mappings,
return {"grandfatheredMappings": grandfathered_mappings,
"languageMappings": language_mappings,
"complexLanguageMappings": complex_language_mappings,
"regionMappings": region_mappings,
@ -777,10 +737,154 @@ def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, like
}
def readUnicodeExtensions(core_file):
import xml.etree.ElementTree as ET
# Match all xml-files in the BCP 47 directory.
bcpFileRE = re.compile(r"^common/bcp47/.+\.xml$")
# https://www.unicode.org/reports/tr35/#Unicode_locale_identifier
#
# type = alphanum{3,8} (sep alphanum{3,8})* ;
typeRE = re.compile(r"^[a-z0-9]{3,8}(-[a-z0-9]{3,8})*$")
# Mapping from Unicode extension types to dict of deprecated to
# preferred values.
mapping = {}
def readBCP47File(file):
tree = ET.parse(file)
for keyword in tree.iterfind(".//keyword/key"):
# Skip over keywords whose extension is not "u".
if keyword.get("extension", "u") != "u":
continue
extension_name = keyword.get("name")
for type in keyword.iterfind("type"):
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
#
# The key or type name used by Unicode locale extension with 'u' extension
# syntax or the 't' extensions syntax. When alias below is absent, this name
# can be also used with the old style "@key=type" syntax.
name = type.get("name")
# Ignore the special name:
# - <https://unicode.org/reports/tr35/#CODEPOINTS>
# - <https://unicode.org/reports/tr35/#REORDER_CODE>
# - <https://unicode.org/reports/tr35/#RG_KEY_VALUE>
# - <https://unicode.org/reports/tr35/#SUBDIVISION_CODE>
# - <https://unicode.org/reports/tr35/#PRIVATE_USE>
if name in ("CODEPOINTS", "REORDER_CODE", "RG_KEY_VALUE", "SUBDIVISION_CODE",
"PRIVATE_USE"):
continue
# All other names should match the 'type' production.
assert typeRE.match(name) is not None, (
"{} matches the 'type' production".format(name))
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
#
# The preferred value of the deprecated key, type or attribute element.
# When a key, type or attribute element is deprecated, this attribute is
# used for specifying a new canonical form if available.
preferred = type.get("preferred")
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
#
# The BCP 47 form is the canonical form, and recommended. Other aliases are
# included only for backwards compatibility.
alias = type.get("alias")
# <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>
#
# Use the bcp47 data to replace keys, types, tfields, and tvalues by their
# canonical forms. See Section 3.6.4 U Extension Data Files) and Section
# 3.7.1 T Extension Data Files. The aliases are in the alias attribute
# value, while the canonical is in the name attribute value.
# 'preferred' contains the new preferred name, 'alias' the compatibility
# name, but then there's this entry where 'preferred' and 'alias' are the
# same. So which one to choose? Assume 'preferred' is the actual canonical
# name.
#
# <type name="islamicc"
# description="Civil (algorithmic) Arabic calendar"
# deprecated="true"
# preferred="islamic-civil"
# alias="islamic-civil"/>
if preferred is not None:
assert typeRE.match(preferred), preferred
mapping.setdefault(extension_name, {})[name] = preferred
if alias is not None:
for alias_name in alias.lower().split(" "):
# Ignore alias entries which don't match the 'type' production.
if typeRE.match(alias_name) is None:
continue
# See comment above when 'alias' and 'preferred' are both present.
if (preferred is not None and
name in mapping[extension_name]):
continue
# Skip over entries where 'name' and 'alias' are equal.
#
# <type name="pst8pdt"
# description="POSIX style time zone for US Pacific Time"
# alias="PST8PDT"
# since="1.8"/>
if name == alias_name:
continue
mapping.setdefault(extension_name, {})[alias_name] = name
def readSupplementalMetadata(file):
# Find subdivision and region replacements.
#
# <https://www.unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>
#
# Replace aliases in special key values:
# - If there is an 'sd' or 'rg' key, replace any subdivision alias
# in its value in the same way, using subdivisionAlias data.
tree = ET.parse(file)
for alias in tree.iterfind(".//subdivisionAlias"):
type = alias.get("type")
assert typeRE.match(type) is not None, (
"{} matches the 'type' production".format(type))
# Take the first replacement when multiple ones are present.
replacement = alias.get("replacement").split(" ")[0].lower()
# Skip over invalid replacements.
#
# <subdivisionAlias type="fi01" replacement="AX" reason="overlong"/>
#
# It's not entirely clear to me if CLDR actually wants to use
# "axzzzz" as the replacement for this case.
if typeRE.match(replacement) is None:
continue
# 'subdivisionAlias' applies to 'rg' and 'sd' keys.
mapping.setdefault("rg", {})[type] = replacement
mapping.setdefault("sd", {})[type] = replacement
for name in core_file.namelist():
if bcpFileRE.match(name):
readBCP47File(core_file.open(name))
readSupplementalMetadata(core_file.open("common/supplemental/supplementalMetadata.xml"))
return mapping
def writeCLDRLanguageTagData(println, data, url):
""" Writes the language tag data to the Intl data file. """
println(generatedFileWarning)
println(u"// Version: CLDR-{}".format(data["version"]))
println(u"// URL: {}".format(url))
println(u"""
#include "mozilla/Assertions.h"
@ -797,6 +901,8 @@ def writeCLDRLanguageTagData(println, data, url):
#include "util/Text.h"
#include "vm/JSContext.h"
using namespace js::intl::LanguageTagLimits;
using ConstCharRange = mozilla::Range<const char>;
template <size_t Length, size_t TagLength, size_t SubtagLength>
@ -839,6 +945,7 @@ static inline const char* SearchReplacement(
complex_language_mappings = data["complexLanguageMappings"]
region_mappings = data["regionMappings"]
complex_region_mappings = data["complexRegionMappings"]
unicode_mappings = data["unicodeMappings"]
# unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
language_maxlength = 8
@ -876,10 +983,14 @@ static inline const char* SearchReplacement(
"Canonicalize grandfathered locale identifiers.", source,
url)
writeUnicodeExtensionsMappings(println, unicode_mappings)
def writeCLDRLanguageTagLikelySubtagsTest(println, data, url):
""" Writes the likely-subtags test file. """
println(generatedFileWarning)
source = u"CLDR Supplemental Data, version {}".format(data["version"])
language_mappings = data["languageMappings"]
complex_language_mappings = data["complexLanguageMappings"]
@ -1004,87 +1115,45 @@ if (typeof reportCompare === "function")
def updateCLDRLangTags(args):
""" Update the LanguageTagGenerated.cpp file. """
version = args.version
url = args.url
branch = args.branch
revision = args.revision
out = args.out
files = args.files
filename = args.file
url = url.replace("<VERSION>", version)
print("Arguments:")
print("\tCLDR version: %s" % version)
print("\tDownload url: %s" % url)
print("\tBranch: %s" % branch)
print("\tRevision: %s" % revision)
print("\tLocal supplemental data and likely subtags: %s" % files)
if filename is not None:
print("\tLocal CLDR core.zip file: %s" % filename)
print("\tOutput file: %s" % out)
print("")
if files:
if len(files) != 3:
raise Exception("Expected three files, but got: {}".format(files))
data = {
"version": version,
}
print(("Always make sure you have the newest ldmlSupplemental.dtd, "
"supplementalMetadata.xml, and likelySubtags.xml!"))
def readFiles(cldr_file):
with ZipFile(cldr_file) as zip_file:
data.update(readSupplementalData(zip_file))
data["unicodeMappings"] = readUnicodeExtensions(zip_file)
supplemental_dtd_file = files[0]
supplemental_metadata_file = files[1]
likely_subtags_file = files[2]
print("Processing CLDR data...")
if filename is not None:
print("Always make sure you have the newest CLDR core.zip!")
with open(filename, "rb") as cldr_file:
readFiles(cldr_file)
else:
print("Downloading CLDR supplemental data...")
supplemental_dtd_filename = "ldmlSupplemental.dtd"
supplemental_dtd_path = "common/dtd/{}".format(supplemental_dtd_filename)
supplemental_dtd_file = os.path.join(os.getcwd(), supplemental_dtd_filename)
supplemental_metadata_filename = "supplementalMetadata.xml"
supplemental_metadata_path = "common/supplemental/{}".format(
supplemental_metadata_filename)
supplemental_metadata_file = os.path.join(os.getcwd(), supplemental_metadata_filename)
likely_subtags_filename = "likelySubtags.xml"
likely_subtags_path = "common/supplemental/{}".format(likely_subtags_filename)
likely_subtags_file = os.path.join(os.getcwd(), likely_subtags_filename)
# Try to download the raw file directly from GitHub if possible.
split = urlsplit(url)
if split.netloc == "github.com" and split.path.endswith(".git") and revision == "HEAD":
def download(path, file):
urlpath = "{}/raw/{}/{}".format(urlsplit(url).path[:-4], branch, path)
raw_url = urlunsplit((split.scheme, split.netloc, urlpath, split.query,
split.fragment))
with closing(urlopen(raw_url)) as reader:
text = reader.read().decode("utf-8")
with io.open(file, "w", encoding="utf-8") as saved_file:
saved_file.write(text)
download(supplemental_dtd_path, supplemental_dtd_file)
download(supplemental_metadata_path, supplemental_metadata_file)
download(likely_subtags_path, likely_subtags_file)
else:
# Download the requested branch in a temporary directory.
with TemporaryDirectory() as inDir:
if revision == "HEAD":
subprocess.check_call(["git", "clone", "--depth=1",
"--branch=%s" % branch, url, inDir])
else:
subprocess.check_call(["git", "clone", "--single-branch",
"--branch=%s" % branch, url, inDir])
subprocess.check_call(["git", "-C", inDir, "reset", "--hard", revision])
shutil.copyfile(os.path.join(inDir, supplemental_dtd_path),
supplemental_dtd_file)
shutil.copyfile(os.path.join(inDir, supplemental_metadata_path),
supplemental_metadata_file)
shutil.copyfile(os.path.join(inDir, likely_subtags_path), likely_subtags_file)
print("Processing CLDR supplemental data...")
data = readSupplementalData(supplemental_dtd_file,
supplemental_metadata_file,
likely_subtags_file)
print("Downloading CLDR core.zip...")
with closing(urlopen(url)) as cldr_file:
cldr_data = io.BytesIO(cldr_file.read())
readFiles(cldr_data)
print("Writing Intl data...")
with io.open(out, mode="w", encoding="utf-8", newline="") as f:
println = partial(print, file=f)
writeCLDRLanguageTagData(println, data, url)
print("Writing Intl test data...")
@ -1095,7 +1164,6 @@ def updateCLDRLangTags(args):
println(u"// |reftest| skip-if(!this.hasOwnProperty('Intl')||"
u"(!this.Intl.Locale&&!this.hasOwnProperty('addIntlExtras')))")
println(generatedFileWarning)
writeCLDRLanguageTagLikelySubtagsTest(println, data, url)
@ -1948,61 +2016,8 @@ def updateCurrency(topsrcdir, args):
updateFrom(currencyTmpFile.name)
def writeUnicodeExtensionsFile(version, url, mapping, out):
with io.open(out, mode="w", encoding="utf-8", newline="") as f:
println = partial(print, file=f)
println(generatedFileWarning)
println(u"// Version: CLDR-{}".format(version))
println(u"// URL: {}".format(url))
println(u"""
/**
* Mapping from deprecated BCP 47 Unicode extension types to their preferred
* values.
*
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
*/""")
println(u"var deprecatedUnicodeExtensionTypes = {")
for ext_name in sorted(mapping):
println(u" {}: {{".format(ext_name))
is_first = True
for type in sorted(mapping[ext_name]):
mapped = mapping[ext_name][type]
has_description = mapped["description"] is not None
if not is_first and has_description:
println(u"")
is_first = False
if has_description:
println(u" // {}".format(mapped["description"]))
println(u" \"{}\": \"{}\",".format(type, mapped["preferred"]))
println(u" },")
println(u"};")
with io.open(os.path.splitext(out)[0] + ".cpp", mode="w", encoding="utf-8", newline="") as f:
println = partial(print, file=f)
println(generatedFileWarning)
println(u"// Version: CLDR-{}".format(version))
println(u"// URL: {}".format(url))
println(u"""
#include "mozilla/Assertions.h"
#include "mozilla/Range.h"
#include "mozilla/TextUtils.h"
#include <algorithm>
#include <cstdint>
#include <cstring>
#include "builtin/intl/LanguageTag.h"
using namespace js::intl::LanguageTagLimits;
using ConstCharRange = mozilla::Range<const char>;
def writeUnicodeExtensionsMappings(println, mapping):
println(u"""
template <size_t Length>
static inline bool IsUnicodeKey(const ConstCharRange& key,
const char (&str)[Length]) {
@ -2059,9 +2074,7 @@ static inline const char* SearchReplacement(const char* (&types)[Length],
}
return nullptr;
}
""".rstrip("\n"))
println(u"""
/**
* Mapping from deprecated BCP 47 Unicode extension types to their preferred
* values.
@ -2089,256 +2102,72 @@ const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
isAsciiLowercaseAlphanumericOrDash));
""")
def to_hash_key(replacements):
return str(sorted([str((k, v["preferred"])) for (k, v) in replacements.items()]))
def to_hash_key(replacements):
return str(sorted(replacements.items()))
def write_array(subtags, name, length):
max_entries = (80 - len(" ")) // (length + len('"", '))
def write_array(subtags, name, length):
max_entries = (80 - len(" ")) // (length + len('"", '))
println(u" static const char* {}[{}] = {{".format(name, len(subtags)))
println(u" static const char* {}[{}] = {{".format(name, len(subtags)))
for entries in grouper(subtags, max_entries):
entries = (u"\"{}\"".format(tag).rjust(length + 2)
for tag in entries if tag is not None)
println(u" {},".format(u", ".join(entries)))
for entries in grouper(subtags, max_entries):
entries = (u"\"{}\"".format(tag).rjust(length + 2)
for tag in entries if tag is not None)
println(u" {},".format(u", ".join(entries)))
println(u" };")
println(u" };")
# Merge duplicate keys.
key_aliases = {}
for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
hash_key = to_hash_key(replacements)
if hash_key not in key_aliases:
key_aliases[hash_key] = []
else:
key_aliases[hash_key].append(key)
# Merge duplicate keys.
key_aliases = {}
for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
hash_key = to_hash_key(replacements)
if hash_key not in key_aliases:
key_aliases[hash_key] = []
else:
key_aliases[hash_key].append(key)
first_key = True
for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
hash_key = to_hash_key(replacements)
if key in key_aliases[hash_key]:
continue
first_key = True
for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
hash_key = to_hash_key(replacements)
if key in key_aliases[hash_key]:
continue
cond = (u"IsUnicodeKey(key, \"{}\")".format(k) for k in [key] + key_aliases[hash_key])
cond = (u"IsUnicodeKey(key, \"{}\")".format(k) for k in [key] + key_aliases[hash_key])
if_kind = u"if" if first_key else u"else if"
cond = (u" ||\n" + u" " * (2 + len(if_kind) + 2)).join(cond)
println(u"""
if_kind = u"if" if first_key else u"else if"
cond = (u" ||\n" + u" " * (2 + len(if_kind) + 2)).join(cond)
println(u"""
{} ({}) {{""".format(if_kind, cond).strip("\n"))
first_key = False
first_key = False
replacements = sorted(replacements.items(), key=itemgetter(0))
replacements = sorted(replacements.items(), key=itemgetter(0))
if len(replacements) > 4:
types = [t for (t, _) in replacements]
preferred = [r["preferred"] for (_, r) in replacements]
max_len = max(len(k) for k in types + preferred)
if len(replacements) > 4:
types = [t for (t, _) in replacements]
preferred = [r for (_, r) in replacements]
max_len = max(len(k) for k in types + preferred)
write_array(types, "types", max_len)
write_array(preferred, "aliases", max_len)
println(u"""
write_array(types, "types", max_len)
write_array(preferred, "aliases", max_len)
println(u"""
return SearchReplacement(types, aliases, type);
""".strip("\n"))
else:
for (type, replacement) in replacements:
println(u"""
else:
for (type, replacement) in replacements:
println(u"""
if (IsUnicodeType(type, "{}")) {{
return "{}";
}}""".format(type, replacement["preferred"]).strip("\n"))
println(u"""
}""".lstrip("\n"))
}}""".format(type, replacement).strip("\n"))
println(u"""
}""".lstrip("\n"))
println(u"""
return nullptr;
}
""".strip("\n"))
def updateUnicodeExtensions(args):
""" Update the UnicodeExtensionsGenerated.js file. """
import xml.etree.ElementTree as ET
version = args.version
url = args.url
out = args.out
filename = args.file
url = url.replace("<VERSION>", version)
print("Arguments:")
print("\tCLDR version: %s" % version)
print("\tDownload url: %s" % url)
if filename is not None:
print("\tLocal CLDR core.zip file: %s" % filename)
print("\tOutput file: %s" % out)
print("")
def updateFrom(data):
# Match all xml-files in the BCP 47 directory.
bcpFileRE = re.compile(r"^common/bcp47/.+\.xml$")
# https://www.unicode.org/reports/tr35/#Unicode_locale_identifier
#
# type = alphanum{3,8} (sep alphanum{3,8})* ;
typeRE = re.compile(r"^[a-z0-9]{3,8}(-[a-z0-9]{3,8})*$")
# Mapping from Unicode extension types to dict of deprecated to
# preferred values.
mapping = {}
with ZipFile(data) as zip_file:
for name in zip_file.namelist():
if not bcpFileRE.match(name):
continue
tree = ET.parse(zip_file.open(name))
for keyword in tree.iterfind(".//keyword/key"):
# Skip over keywords whose extension is not "u".
if keyword.get("extension", "u") != "u":
continue
extension_name = keyword.get("name")
for type in keyword.iterfind("type"):
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
#
# The key or type name used by Unicode locale extension with 'u' extension
# syntax or the 't' extensions syntax. When alias below is absent, this
# name can be also used with the old style "@key=type" syntax.
name = type.get("name")
# Ignore the special name:
# - <https://unicode.org/reports/tr35/#CODEPOINTS>
# - <https://unicode.org/reports/tr35/#REORDER_CODE>
# - <https://unicode.org/reports/tr35/#RG_KEY_VALUE>
# - <https://unicode.org/reports/tr35/#SUBDIVISION_CODE>
# - <https://unicode.org/reports/tr35/#PRIVATE_USE>
if name in ("CODEPOINTS", "REORDER_CODE", "RG_KEY_VALUE",
"SUBDIVISION_CODE", "PRIVATE_USE"):
continue
# All other names should match the 'type' production.
assert typeRE.match(name) is not None, (
"{} matches the 'type' production".format(name))
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
#
# The preferred value of the deprecated key, type or attribute element.
# When a key, type or attribute element is deprecated, this attribute is
# used for specifying a new canonical form if available.
preferred = type.get("preferred")
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
#
# The BCP 47 form is the canonical form, and recommended. Other aliases are
# included only for backwards compatibility.
alias = type.get("alias")
# <https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files>:
#
# The description of the key, type or attribute element.
description = type.get("description")
# <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>
#
# Use the bcp47 data to replace keys, types, tfields, and tvalues by their
# canonical forms. See Section 3.6.4 U Extension Data Files) and Section
# 3.7.1 T Extension Data Files. The aliases are in the alias attribute
# value, while the canonical is in the name attribute value.
# 'preferred' contains the new preferred name, 'alias' the compatibility
# name, but then there's this entry where 'preferred' and 'alias' are the
# same. So which one to choose? Assume 'preferred' is the actual canonical
# name.
#
# <type name="islamicc"
# description="Civil (algorithmic) Arabic calendar"
# deprecated="true"
# preferred="islamic-civil"
# alias="islamic-civil"/>
if preferred is not None:
assert typeRE.match(preferred), preferred
mapping.setdefault(extension_name, {})[name] = {
"preferred": preferred,
"description": description,
}
if alias is not None:
for alias_name in alias.lower().split(" "):
# Ignore alias entries which don't match the 'type' production.
if typeRE.match(alias_name) is None:
continue
# See comment above when 'alias' and 'preferred' are both present.
if (preferred is not None and
name in mapping[extension_name]):
continue
# Skip over entries where 'name' and 'alias' are equal.
#
# <type name="pst8pdt"
# description="POSIX style time zone for US Pacific Time"
# alias="PST8PDT"
# since="1.8"/>
if name == alias_name:
continue
mapping.setdefault(extension_name, {})[alias_name] = {
"preferred": name,
"description": description,
}
# Find subdivision and region replacements.
#
# <https://www.unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>
#
# Replace aliases in special key values:
# - If there is an 'sd' or 'rg' key, replace any subdivision alias
# in its value in the same way, using subdivisionAlias data.
tree = ET.parse(zip_file.open("common/supplemental/supplementalMetadata.xml"))
for alias in tree.iterfind(".//subdivisionAlias"):
type = alias.get("type")
assert typeRE.match(type) is not None, (
"{} matches the 'type' production".format(type))
# Take the first replacement when multiple ones are present.
replacement = alias.get("replacement").split(" ")[0].lower()
# Skip over invalid replacements.
#
# <subdivisionAlias type="fi01" replacement="AX" reason="overlong"/>
#
# It's not entirely clear to me if CLDR actually wants to use
# "axzzzz" as the replacement for this case.
if typeRE.match(replacement) is None:
continue
# 'subdivisionAlias' applies to 'rg' and 'sd' keys.
mapping.setdefault("rg", {})[type] = {
"preferred": replacement,
"description": None,
}
mapping.setdefault("sd", {})[type] = {
"preferred": replacement,
"description": None,
}
writeUnicodeExtensionsFile(version, url, mapping, out)
if filename is not None:
print("Always make sure you have the newest CLDR core.zip!")
with open(filename, "rb") as cldr_file:
updateFrom(cldr_file)
else:
print("Downloading CLDR core.zip...")
with closing(urlopen(url)) as cldr_file:
cldr_data = io.BytesIO(cldr_file.read())
updateFrom(cldr_data)
if __name__ == "__main__":
import argparse
@ -2359,21 +2188,21 @@ if __name__ == "__main__":
parser_cldr_tags = subparsers.add_parser("langtags",
help="Update CLDR language tags data")
parser_cldr_tags.add_argument("--version",
metavar="VERSION",
required=True,
help="CLDR version number")
parser_cldr_tags.add_argument("--url",
metavar="URL",
default="https://github.com/unicode-org/cldr.git",
help="URL to git repository (default: %(default)s)")
parser_cldr_tags.add_argument("--branch", default="latest",
help="Git branch (default: %(default)s)")
parser_cldr_tags.add_argument("--revision", default="HEAD",
help="Git revision (default: %(default)s)")
default="https://unicode.org/Public/cldr/<VERSION>/core.zip",
type=EnsureHttps,
help="Download url CLDR data (default: %(default)s)")
parser_cldr_tags.add_argument("--out",
default="LanguageTagGenerated.cpp",
help="Output file (default: %(default)s)")
parser_cldr_tags.add_argument("files",
nargs="*",
help="Local ldmlSupplemental.dtd, supplementalMetadata.xml, "
"and likelySubtags.xml files, if omitted uses <URL>")
parser_cldr_tags.add_argument("file",
nargs="?",
help="Local cldr-core.zip file, if omitted uses <URL>")
parser_cldr_tags.set_defaults(func=updateCLDRLangTags)
parser_tz = subparsers.add_parser("tzdata", help="Update tzdata")
@ -2409,23 +2238,5 @@ if __name__ == "__main__":
help="Local currency code list file, if omitted uses <URL>")
parser_currency.set_defaults(func=partial(updateCurrency, topsrcdir))
parser_unicode_ext = subparsers.add_parser("unicode-ext", help="Update Unicode extensions")
parser_unicode_ext.add_argument("--version",
metavar="VERSION",
required=True,
help="CLDR version number")
parser_unicode_ext.add_argument("--url",
metavar="URL",
default="https://unicode.org/Public/cldr/<VERSION>/core.zip",
type=EnsureHttps,
help="Download url CLDR data (default: %(default)s)")
parser_unicode_ext.add_argument("--out",
default="UnicodeExtensionsGenerated.js",
help="Output file (default: %(default)s)")
parser_unicode_ext.add_argument("file",
nargs="?",
help="Local cldr-core.zip file, if omitted uses <URL>")
parser_unicode_ext.set_defaults(func=updateUnicodeExtensions)
args = parser.parse_args()
args.func(args)

Просмотреть файл

@ -387,7 +387,6 @@ if CONFIG['ENABLE_INTL_API']:
'builtin/intl/PluralRules.cpp',
'builtin/intl/RelativeTimeFormat.cpp',
'builtin/intl/SharedIntlData.cpp',
'builtin/intl/UnicodeExtensionsGenerated.cpp',
]
if CONFIG['MOZ_INSTRUMENTS']:

Просмотреть файл

@ -3,7 +3,7 @@
// Extracted from likelySubtags.xml.
// Derived from CLDR Supplemental Data, version 35.1.
// https://github.com/unicode-org/cldr.git
// https://unicode.org/Public/cldr/35.1/core.zip
var maxLikelySubtags = {
"aa": "aa-Latn-ET",
"aai": "aai-Latn-ZZ",
@ -1842,7 +1842,7 @@ var maxLikelySubtags = {
// Extracted from likelySubtags.xml.
// Derived from CLDR Supplemental Data, version 35.1.
// https://github.com/unicode-org/cldr.git
// https://unicode.org/Public/cldr/35.1/core.zip
var minLikelySubtags = {
"aa-Latn-DJ": "aa-DJ",
"aa-Latn-ET": "aa",