зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1451082 - Update IANA language subtag registry data to version 2018-03-30. r=Waldo
This commit is contained in:
Родитель
268b9d66c5
Коммит
37bc3aaed9
|
@ -125,17 +125,16 @@ function getUnicodeExtensions(locale) {
|
|||
* Parser for BCP 47 language tags.
|
||||
*
|
||||
* Returns null if |locale| can't be parsed as a Language-Tag. If the input is
|
||||
* an irregular grandfathered language tag, the object
|
||||
* a grandfathered language tag, the object
|
||||
*
|
||||
* {
|
||||
* locale: locale.toLowerCase(),
|
||||
* locale: locale (normalized to canonical form),
|
||||
* grandfathered: true,
|
||||
* }
|
||||
*
|
||||
* is returned. Otherwise the returned object has the following structure:
|
||||
*
|
||||
* {
|
||||
* locale: locale.toLowerCase(),
|
||||
* language: language subtag without extlang / undefined,
|
||||
* extlang1: first extlang subtag / undefined,
|
||||
* extlang2: second extlang subtag / undefined,
|
||||
|
@ -147,13 +146,12 @@ function getUnicodeExtensions(locale) {
|
|||
* privateuse: privateuse subtag / undefined,
|
||||
* }
|
||||
*
|
||||
* All language tag subtags are returned in lower-case:
|
||||
* All language tag subtags are returned in their normalized case:
|
||||
*
|
||||
* var langtag = parseLanguageTag("en-Latn-US");
|
||||
* assertEq("en-latn-us", langtag.locale);
|
||||
* var langtag = parseLanguageTag("en-latn-us");
|
||||
* assertEq("en", langtag.language);
|
||||
* assertEq("latn", langtag.script);
|
||||
* assertEq("us", langtag.region);
|
||||
* assertEq("Latn", langtag.script);
|
||||
* assertEq("US", langtag.region);
|
||||
*
|
||||
* Spec: RFC 5646 section 2.1.
|
||||
*/
|
||||
|
@ -307,6 +305,12 @@ function parseLanguageTag(locale) {
|
|||
// script = 4ALPHA ; ISO 15924 code
|
||||
if (tokenLength === 4 && token === ALPHA) {
|
||||
script = tokenStringLower();
|
||||
|
||||
// The first character of a script code needs to be capitalized.
|
||||
// "hans" -> "Hans"
|
||||
script = callFunction(std_String_toUpperCase, script[0]) +
|
||||
Substring(script, 1, script.length - 1);
|
||||
|
||||
if (!nextToken())
|
||||
return null;
|
||||
}
|
||||
|
@ -315,6 +319,10 @@ function parseLanguageTag(locale) {
|
|||
// / 3DIGIT ; UN M.49 code
|
||||
if ((tokenLength === 2 && token === ALPHA) || (tokenLength === 3 && token === DIGIT)) {
|
||||
region = tokenStringLower();
|
||||
|
||||
// Region codes need to be in upper-case. "bu" -> "BU"
|
||||
region = callFunction(std_String_toUpperCase, region);
|
||||
|
||||
if (!nextToken())
|
||||
return null;
|
||||
}
|
||||
|
@ -417,12 +425,11 @@ function parseLanguageTag(locale) {
|
|||
localeLowercase.length - privateuseStart);
|
||||
}
|
||||
|
||||
// Return if the complete input was successfully parsed. That means it is
|
||||
// either a langtag or privateuse-only language tag, or it is a regular
|
||||
// grandfathered language tag.
|
||||
if (token === NONE) {
|
||||
// Return if the complete input was successfully parsed and it is not a
|
||||
// regular grandfathered language tag. That means it is either a langtag
|
||||
// or privateuse-only language tag
|
||||
if (token === NONE && !hasOwn(localeLowercase, grandfatheredMappings)) {
|
||||
return {
|
||||
locale: localeLowercase,
|
||||
language,
|
||||
extlang1,
|
||||
extlang2,
|
||||
|
@ -443,76 +450,48 @@ function parseLanguageTag(locale) {
|
|||
// For example we need to reject "i-ha\u212A" (U+212A KELVIN SIGN) even
|
||||
// though its lower-case form "i-hak" matches a grandfathered language
|
||||
// tag.
|
||||
do {
|
||||
while (token !== NONE) {
|
||||
if (!nextToken())
|
||||
return null;
|
||||
} while (token !== NONE);
|
||||
}
|
||||
|
||||
// grandfathered = irregular ; non-redundant tags registered
|
||||
// / regular ; during the RFC 3066 era
|
||||
switch (localeLowercase) {
|
||||
#ifdef DEBUG
|
||||
// regular = "art-lojban" ; these tags match the 'langtag'
|
||||
// / "cel-gaulish" ; production, but their subtags
|
||||
// / "no-bok" ; are not extended language
|
||||
// / "no-nyn" ; or variant subtags: their meaning
|
||||
// / "zh-guoyu" ; is defined by their registration
|
||||
// / "zh-hakka" ; and all of these are deprecated
|
||||
// / "zh-min" ; in favor of a more modern
|
||||
// / "zh-min-nan" ; subtag or sequence of subtags
|
||||
// / "zh-xiang"
|
||||
case "art-lojban":
|
||||
case "cel-gaulish":
|
||||
case "no-bok":
|
||||
case "no-nyn":
|
||||
case "zh-guoyu":
|
||||
case "zh-hakka":
|
||||
case "zh-min":
|
||||
case "zh-min-nan":
|
||||
case "zh-xiang":
|
||||
assert(false, "regular grandfathered tags should have been matched above");
|
||||
#endif /* DEBUG */
|
||||
|
||||
// irregular = "en-GB-oed" ; irregular tags do not match
|
||||
// / "i-ami" ; the 'langtag' production and
|
||||
// / "i-bnn" ; would not otherwise be
|
||||
// / "i-default" ; considered 'well-formed'
|
||||
// / "i-enochian" ; These tags are all valid,
|
||||
// / "i-hak" ; but most are deprecated
|
||||
// / "i-klingon" ; in favor of more modern
|
||||
// / "i-lux" ; subtags or subtag
|
||||
// / "i-mingo" ; combination
|
||||
// / "i-navajo"
|
||||
// / "i-pwn"
|
||||
// / "i-tao"
|
||||
// / "i-tay"
|
||||
// / "i-tsu"
|
||||
// / "sgn-BE-FR"
|
||||
// / "sgn-BE-NL"
|
||||
// / "sgn-CH-DE"
|
||||
case "en-gb-oed":
|
||||
case "i-ami":
|
||||
case "i-bnn":
|
||||
case "i-default":
|
||||
case "i-enochian":
|
||||
case "i-hak":
|
||||
case "i-klingon":
|
||||
case "i-lux":
|
||||
case "i-mingo":
|
||||
case "i-navajo":
|
||||
case "i-pwn":
|
||||
case "i-tao":
|
||||
case "i-tay":
|
||||
case "i-tsu":
|
||||
case "sgn-be-fr":
|
||||
case "sgn-be-nl":
|
||||
case "sgn-ch-de":
|
||||
return { locale: localeLowercase, grandfathered: true };
|
||||
|
||||
default:
|
||||
return null;
|
||||
// irregular = "en-GB-oed" ; irregular tags do not match
|
||||
// / "i-ami" ; the 'langtag' production and
|
||||
// / "i-bnn" ; would not otherwise be
|
||||
// / "i-default" ; considered 'well-formed'
|
||||
// / "i-enochian" ; These tags are all valid,
|
||||
// / "i-hak" ; but most are deprecated
|
||||
// / "i-klingon" ; in favor of more modern
|
||||
// / "i-lux" ; subtags or subtag
|
||||
// / "i-mingo" ; combination
|
||||
// / "i-navajo"
|
||||
// / "i-pwn"
|
||||
// / "i-tao"
|
||||
// / "i-tay"
|
||||
// / "i-tsu"
|
||||
// / "sgn-BE-FR"
|
||||
// / "sgn-BE-NL"
|
||||
// / "sgn-CH-DE"
|
||||
// regular = "art-lojban" ; these tags match the 'langtag'
|
||||
// / "cel-gaulish" ; production, but their subtags
|
||||
// / "no-bok" ; are not extended language
|
||||
// / "no-nyn" ; or variant subtags: their meaning
|
||||
// / "zh-guoyu" ; is defined by their registration
|
||||
// / "zh-hakka" ; and all of these are deprecated
|
||||
// / "zh-min" ; in favor of a more modern
|
||||
// / "zh-min-nan" ; subtag or sequence of subtags
|
||||
// / "zh-xiang"
|
||||
if (hasOwn(localeLowercase, grandfatheredMappings)) {
|
||||
return {
|
||||
locale: grandfatheredMappings[localeLowercase],
|
||||
grandfathered: true
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
#undef NONE
|
||||
#undef ALPHA
|
||||
#undef DIGIT
|
||||
|
@ -560,16 +539,12 @@ function IsStructurallyValidLanguageTag(locale) {
|
|||
function CanonicalizeLanguageTagFromObject(localeObj) {
|
||||
assert(IsObject(localeObj), "CanonicalizeLanguageTagFromObject");
|
||||
|
||||
var {locale} = localeObj;
|
||||
assert(locale === callFunction(std_String_toLowerCase, locale),
|
||||
"expected lower-case form for locale string");
|
||||
// Handle grandfathered language tags.
|
||||
if (hasOwn("grandfathered", localeObj))
|
||||
return localeObj.locale;
|
||||
|
||||
// Handle mappings for complete tags.
|
||||
if (hasOwn(locale, langTagMappings))
|
||||
return langTagMappings[locale];
|
||||
|
||||
assert(!hasOwn("grandfathered", localeObj),
|
||||
"grandfathered tags should be mapped completely");
|
||||
// Update mappings for complete tags.
|
||||
updateLangTagMappings(localeObj);
|
||||
|
||||
var {
|
||||
language,
|
||||
|
@ -630,25 +605,25 @@ function CanonicalizeLanguageTagFromObject(localeObj) {
|
|||
if (extlang3)
|
||||
canonical += "-" + extlang3;
|
||||
|
||||
// No script replacements are currently present, so append as is.
|
||||
if (script) {
|
||||
// The first character of a script code needs to be capitalized.
|
||||
// "hans" -> "Hans"
|
||||
script = callFunction(std_String_toUpperCase, script[0]) +
|
||||
Substring(script, 1, script.length - 1);
|
||||
|
||||
// No script replacements are currently present, so append as is.
|
||||
assert(script.length === 4 &&
|
||||
script ===
|
||||
callFunction(std_String_toUpperCase, script[0]) +
|
||||
callFunction(std_String_toLowerCase, Substring(script, 1, script.length - 1)),
|
||||
"script must be [A-Z][a-z]{3}");
|
||||
canonical += "-" + script;
|
||||
}
|
||||
|
||||
if (region) {
|
||||
// Region codes need to be in upper-case. "bu" -> "BU"
|
||||
region = callFunction(std_String_toUpperCase, region);
|
||||
|
||||
// Replace deprecated subtags with their preferred values.
|
||||
// "BU" -> "MM"
|
||||
if (hasOwn(region, regionMappings))
|
||||
region = regionMappings[region];
|
||||
|
||||
assert((2 <= region.length && region.length <= 3) &&
|
||||
region === callFunction(std_String_toUpperCase, region),
|
||||
"region must be [A-Z]{2} or [0-9]{3}");
|
||||
canonical += "-" + region;
|
||||
}
|
||||
|
||||
|
@ -733,9 +708,9 @@ function ValidateAndCanonicalizeLanguageTag(locale) {
|
|||
// The language subtag is canonicalized to lower case.
|
||||
locale = callFunction(std_String_toLowerCase, locale);
|
||||
|
||||
// langTagMappings doesn't contain any 2*3ALPHA keys, so we don't need
|
||||
// to check for possible replacements in this map.
|
||||
assert(!hasOwn(locale, langTagMappings), "langTagMappings contains no 2*3ALPHA mappings");
|
||||
// updateLangTagMappings doesn't modify tags containing only
|
||||
// |language| subtags, so we don't need to call it for possible
|
||||
// replacements.
|
||||
|
||||
// Replace deprecated subtags with their preferred values.
|
||||
locale = hasOwn(locale, languageMappings)
|
||||
|
|
|
@ -1,9 +1,321 @@
|
|||
// Generated by make_intl_data.py. DO NOT EDIT.
|
||||
|
||||
/* eslint-disable complexity */
|
||||
// Mappings from complete tags to preferred values.
|
||||
// Derived from IANA Language Subtag Registry, file date 2018-03-20.
|
||||
// Derived from IANA Language Subtag Registry, file date 2018-03-30.
|
||||
// https://www.iana.org/assignments/language-subtag-registry
|
||||
var langTagMappings = {
|
||||
function updateLangTagMappings(tag) {
|
||||
assert(IsObject(tag), "tag is an object");
|
||||
assert(!hasOwn("grandfathered", tag), "tag is not a grandfathered tag");
|
||||
|
||||
switch (tag.language) {
|
||||
case "hy":
|
||||
// hy-arevela -> hy
|
||||
if (tag.variants.length >= 1 &&
|
||||
callFunction(ArrayIndexOf, tag.variants, "arevela") > -1)
|
||||
{
|
||||
var newVariants = [];
|
||||
for (var i = 0; i < tag.variants.length; i++) {
|
||||
var variant = tag.variants[i];
|
||||
if (variant === "arevela")
|
||||
continue;
|
||||
_DefineDataProperty(newVariants, newVariants.length, variant);
|
||||
}
|
||||
tag.variants = newVariants;
|
||||
}
|
||||
// hy-arevmda -> hyw
|
||||
else if (tag.variants.length >= 1 &&
|
||||
callFunction(ArrayIndexOf, tag.variants, "arevmda") > -1)
|
||||
{
|
||||
tag.language = "hyw";
|
||||
var newVariants = [];
|
||||
for (var i = 0; i < tag.variants.length; i++) {
|
||||
var variant = tag.variants[i];
|
||||
if (variant === "arevmda")
|
||||
continue;
|
||||
_DefineDataProperty(newVariants, newVariants.length, variant);
|
||||
}
|
||||
tag.variants = newVariants;
|
||||
}
|
||||
break;
|
||||
case "ja":
|
||||
// ja-Latn-hepburn-heploc -> ja-Latn-alalc97
|
||||
if (tag.script === "Latn" &&
|
||||
tag.variants.length >= 2 &&
|
||||
callFunction(ArrayIndexOf, tag.variants, "hepburn") > -1 &&
|
||||
callFunction(ArrayIndexOf, tag.variants, "heploc", callFunction(ArrayIndexOf, tag.variants, "hepburn") + 1) > -1)
|
||||
{
|
||||
var newVariants = [];
|
||||
for (var i = 0; i < tag.variants.length; i++) {
|
||||
var variant = tag.variants[i];
|
||||
if (variant === "hepburn")
|
||||
continue;
|
||||
if (variant === "heploc")
|
||||
continue;
|
||||
_DefineDataProperty(newVariants, newVariants.length, variant);
|
||||
}
|
||||
if (callFunction(ArrayIndexOf, newVariants, "alalc97") < 0)
|
||||
_DefineDataProperty(newVariants, newVariants.length, "alalc97");
|
||||
tag.variants = newVariants;
|
||||
}
|
||||
break;
|
||||
case "sgn":
|
||||
// sgn-BR -> bzs
|
||||
if (tag.region === "BR" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "bzs";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-CO -> csn
|
||||
else if (tag.region === "CO" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "csn";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-DE -> gsg
|
||||
else if (tag.region === "DE" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "gsg";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-DK -> dsl
|
||||
else if (tag.region === "DK" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "dsl";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-ES -> ssp
|
||||
else if (tag.region === "ES" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "ssp";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-FR -> fsl
|
||||
else if (tag.region === "FR" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "fsl";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-GB -> bfi
|
||||
else if (tag.region === "GB" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "bfi";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-GR -> gss
|
||||
else if (tag.region === "GR" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "gss";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-IE -> isg
|
||||
else if (tag.region === "IE" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "isg";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-IT -> ise
|
||||
else if (tag.region === "IT" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "ise";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-JP -> jsl
|
||||
else if (tag.region === "JP" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "jsl";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-MX -> mfs
|
||||
else if (tag.region === "MX" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "mfs";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-NI -> ncs
|
||||
else if (tag.region === "NI" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "ncs";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-NL -> dse
|
||||
else if (tag.region === "NL" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "dse";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-NO -> nsl
|
||||
else if (tag.region === "NO" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "nsl";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-PT -> psr
|
||||
else if (tag.region === "PT" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "psr";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-SE -> swl
|
||||
else if (tag.region === "SE" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "swl";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-US -> ase
|
||||
else if (tag.region === "US" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "ase";
|
||||
tag.region = undefined;
|
||||
}
|
||||
// sgn-ZA -> sfs
|
||||
else if (tag.region === "ZA" &&
|
||||
tag.extlang1 === undefined &&
|
||||
tag.extlang2 === undefined &&
|
||||
tag.extlang3 === undefined &&
|
||||
tag.script === undefined &&
|
||||
tag.variants.length === 0 &&
|
||||
tag.extensions.length === 0 &&
|
||||
tag.privateuse === undefined)
|
||||
{
|
||||
tag.language = "sfs";
|
||||
tag.region = undefined;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* eslint-enable complexity */
|
||||
|
||||
// Mappings from grandfathered tags to preferred values.
|
||||
// Derived from IANA Language Subtag Registry, file date 2018-03-30.
|
||||
// https://www.iana.org/assignments/language-subtag-registry
|
||||
var grandfatheredMappings = {
|
||||
"art-lojban": "jbo",
|
||||
"cel-gaulish": "cel-gaulish",
|
||||
"en-gb-oed": "en-GB-oxendict",
|
||||
|
@ -20,46 +332,20 @@ var langTagMappings = {
|
|||
"i-tao": "tao",
|
||||
"i-tay": "tay",
|
||||
"i-tsu": "tsu",
|
||||
"ja-latn-hepburn-heploc": "ja-Latn-alalc97",
|
||||
"no-bok": "nb",
|
||||
"no-nyn": "nn",
|
||||
"sgn-be-fr": "sfb",
|
||||
"sgn-be-nl": "vgt",
|
||||
"sgn-br": "bzs",
|
||||
"sgn-ch-de": "sgg",
|
||||
"sgn-co": "csn",
|
||||
"sgn-de": "gsg",
|
||||
"sgn-dk": "dsl",
|
||||
"sgn-es": "ssp",
|
||||
"sgn-fr": "fsl",
|
||||
"sgn-gb": "bfi",
|
||||
"sgn-gr": "gss",
|
||||
"sgn-ie": "isg",
|
||||
"sgn-it": "ise",
|
||||
"sgn-jp": "jsl",
|
||||
"sgn-mx": "mfs",
|
||||
"sgn-ni": "ncs",
|
||||
"sgn-nl": "dse",
|
||||
"sgn-no": "nsl",
|
||||
"sgn-pt": "psr",
|
||||
"sgn-se": "swl",
|
||||
"sgn-us": "ase",
|
||||
"sgn-za": "sfs",
|
||||
"zh-cmn": "cmn",
|
||||
"zh-cmn-hans": "cmn-Hans",
|
||||
"zh-cmn-hant": "cmn-Hant",
|
||||
"zh-gan": "gan",
|
||||
"zh-guoyu": "cmn",
|
||||
"zh-hakka": "hak",
|
||||
"zh-min": "zh-min",
|
||||
"zh-min-nan": "nan",
|
||||
"zh-wuu": "wuu",
|
||||
"zh-xiang": "hsn",
|
||||
"zh-yue": "yue",
|
||||
};
|
||||
|
||||
// Mappings from language subtags to preferred values.
|
||||
// Derived from IANA Language Subtag Registry, file date 2018-03-20.
|
||||
// Derived from IANA Language Subtag Registry, file date 2018-03-30.
|
||||
// https://www.iana.org/assignments/language-subtag-registry
|
||||
var languageMappings = {
|
||||
"aam": "aas",
|
||||
|
@ -143,7 +429,7 @@ var languageMappings = {
|
|||
};
|
||||
|
||||
// Mappings from region subtags to preferred values.
|
||||
// Derived from IANA Language Subtag Registry, file date 2018-03-20.
|
||||
// Derived from IANA Language Subtag Registry, file date 2018-03-30.
|
||||
// https://www.iana.org/assignments/language-subtag-registry
|
||||
var regionMappings = {
|
||||
"BU": "MM",
|
||||
|
@ -158,7 +444,7 @@ var regionMappings = {
|
|||
// All current deprecated extlang subtags have the form `<prefix>-<extlang>`
|
||||
// and their preferred value is exactly equal to `<extlang>`. So each key in
|
||||
// extlangMappings acts both as the extlang subtag and its preferred value.
|
||||
// Derived from IANA Language Subtag Registry, file date 2018-03-20.
|
||||
// Derived from IANA Language Subtag Registry, file date 2018-03-30.
|
||||
// https://www.iana.org/assignments/language-subtag-registry
|
||||
var extlangMappings = {
|
||||
"aao": "ar",
|
||||
|
|
|
@ -69,30 +69,34 @@ def readRegistryRecord(registry):
|
|||
yield record
|
||||
return
|
||||
|
||||
|
||||
def readRegistry(registry):
|
||||
""" Reads IANA Language Subtag Registry and extracts information for Intl.js.
|
||||
|
||||
Information extracted:
|
||||
- langTagMappings: mappings from complete language tags to preferred
|
||||
- grandfatheredMappings: mappings from grandfathered tags to preferred
|
||||
complete language tags
|
||||
- redundantMappings: mappings from redundant tags to preferred complete
|
||||
language tags
|
||||
- languageMappings: mappings from language subtags to preferred subtags
|
||||
- regionMappings: mappings from region subtags to preferred subtags
|
||||
- variantMappings: mappings from complete language tags to preferred
|
||||
complete language tags
|
||||
- extlangMappings: mappings from extlang subtags to preferred subtags,
|
||||
with prefix to be removed
|
||||
Returns these four mappings as dictionaries, along with the registry's
|
||||
Returns these six mappings as dictionaries, along with the registry's
|
||||
file date.
|
||||
|
||||
We also check that extlang mappings don't generate preferred values
|
||||
which in turn are subject to language subtag mappings, so that
|
||||
CanonicalizeLanguageTag can process subtags sequentially.
|
||||
"""
|
||||
langTagMappings = {}
|
||||
grandfatheredMappings = {}
|
||||
redundantMappings = {}
|
||||
languageMappings = {}
|
||||
regionMappings = {}
|
||||
variantMappings = {}
|
||||
extlangMappings = {}
|
||||
languageSubtags = set()
|
||||
extlangSubtags = set()
|
||||
extlangSubtags = []
|
||||
|
||||
for record in readRegistryRecord(registry):
|
||||
if "File-Date" in record:
|
||||
|
@ -103,23 +107,22 @@ def readRegistry(registry):
|
|||
# Grandfathered tags don't use standard syntax, so
|
||||
# CanonicalizeLanguageTag expects the mapping table to provide
|
||||
# the final form for all.
|
||||
# For langTagMappings, keys must be in lower case; values in
|
||||
# For grandfatheredMappings, keys must be in lower case; values in
|
||||
# the case used in the registry.
|
||||
tag = record["Tag"]
|
||||
if "Preferred-Value" in record:
|
||||
langTagMappings[tag.lower()] = record["Preferred-Value"]
|
||||
grandfatheredMappings[tag.lower()] = record["Preferred-Value"]
|
||||
else:
|
||||
langTagMappings[tag.lower()] = tag
|
||||
grandfatheredMappings[tag.lower()] = tag
|
||||
elif record["Type"] == "redundant":
|
||||
# For langTagMappings, keys must be in lower case; values in
|
||||
# the case used in the registry.
|
||||
# For redundantMappings, keys and values must be in the case used
|
||||
# in the registry.
|
||||
if "Preferred-Value" in record:
|
||||
langTagMappings[record["Tag"].lower()] = record["Preferred-Value"]
|
||||
redundantMappings[record["Tag"]] = record["Preferred-Value"]
|
||||
elif record["Type"] == "language":
|
||||
# For languageMappings, keys and values must be in the case used
|
||||
# in the registry.
|
||||
subtag = record["Subtag"]
|
||||
languageSubtags.add(subtag)
|
||||
if "Preferred-Value" in record:
|
||||
# The 'Prefix' field is not allowed for language records.
|
||||
# https://tools.ietf.org/html/rfc5646#section-3.1.2
|
||||
|
@ -139,21 +142,19 @@ def readRegistry(registry):
|
|||
# The registry currently doesn't contain mappings for scripts.
|
||||
raise Exception("Unexpected mapping for script subtags")
|
||||
elif record["Type"] == "variant":
|
||||
subtag = record["Subtag"]
|
||||
# For variantMappings, keys and values must be in the case used in
|
||||
# the registry.
|
||||
if "Preferred-Value" in record:
|
||||
if subtag == "heploc":
|
||||
# The entry for heploc is unique in its complexity; handle
|
||||
# it as special case below.
|
||||
continue
|
||||
# The registry currently doesn't contain mappings for variants,
|
||||
# except for heploc which is already handled above.
|
||||
raise Exception("Unexpected mapping for variant subtags")
|
||||
if "Prefix" not in record:
|
||||
raise Exception("Unexpected mapping for variant subtags")
|
||||
tag = "{}-{}".format(record["Prefix"], record["Subtag"])
|
||||
variantMappings[tag] = record["Preferred-Value"]
|
||||
elif record["Type"] == "extlang":
|
||||
# For extlangMappings, keys must be in the case used in the
|
||||
# registry; values are records with the preferred value and the
|
||||
# prefix to be removed.
|
||||
subtag = record["Subtag"]
|
||||
extlangSubtags.add(subtag)
|
||||
extlangSubtags.append(subtag)
|
||||
if "Preferred-Value" in record:
|
||||
preferred = record["Preferred-Value"]
|
||||
# The 'Preferred-Value' and 'Subtag' fields MUST be identical.
|
||||
|
@ -173,57 +174,349 @@ def readRegistry(registry):
|
|||
raise Exception("Conflict: extlang with lang mapping: " + extlang)
|
||||
|
||||
# Special case for heploc.
|
||||
langTagMappings["ja-latn-hepburn-heploc"] = "ja-Latn-alalc97"
|
||||
assert variantMappings["ja-Latn-hepburn-heploc"] == "alalc97"
|
||||
variantMappings["ja-Latn-hepburn-heploc"] = "ja-Latn-alalc97"
|
||||
|
||||
# ValidateAndCanonicalizeLanguageTag in CommonFunctions.js expects
|
||||
# langTagMappings contains no 2*3ALPHA.
|
||||
assert all(len(lang) > 3 for lang in langTagMappings.iterkeys())
|
||||
# redundantMappings contains no 2*3ALPHA.
|
||||
assert all(len(lang) > 3 for lang in redundantMappings.iterkeys())
|
||||
|
||||
return {"fileDate": fileDate,
|
||||
"langTagMappings": langTagMappings,
|
||||
"grandfatheredMappings": grandfatheredMappings,
|
||||
"redundantMappings": redundantMappings,
|
||||
"languageMappings": languageMappings,
|
||||
"regionMappings": regionMappings,
|
||||
"variantMappings": variantMappings,
|
||||
"extlangMappings": extlangMappings}
|
||||
|
||||
|
||||
def writeMappingsVar(intlData, dict, name, description, fileDate, url):
|
||||
""" Writes a variable definition with a mapping table to file intlData.
|
||||
|
||||
Writes the contents of dictionary dict to file intlData with the given
|
||||
variable name and a comment with description, fileDate, and URL.
|
||||
"""
|
||||
intlData.write("\n")
|
||||
def writeMappingHeader(println, description, fileDate, url):
|
||||
if type(description) is not list:
|
||||
description = [description]
|
||||
for desc in description:
|
||||
intlData.write("// {0}\n".format(desc))
|
||||
intlData.write("// Derived from IANA Language Subtag Registry, file date {0}.\n".format(fileDate))
|
||||
intlData.write("// {0}\n".format(url))
|
||||
intlData.write("var {0} = {{\n".format(name))
|
||||
keys = sorted(dict)
|
||||
for key in keys:
|
||||
if isinstance(dict[key], basestring):
|
||||
value = '"{0}"'.format(dict[key])
|
||||
println(u"// {0}".format(desc))
|
||||
println(u"// Derived from IANA Language Subtag Registry, file date {0}.".format(fileDate))
|
||||
println(u"// {0}".format(url))
|
||||
|
||||
def writeMappingsVar(println, mapping, name, description, fileDate, url):
|
||||
""" Writes a variable definition with a mapping table.
|
||||
|
||||
Writes the contents of dictionary |mapping| through the |println|
|
||||
function with the given variable name and a comment with description,
|
||||
fileDate, and URL.
|
||||
"""
|
||||
println(u"")
|
||||
writeMappingHeader(println, description, fileDate, url)
|
||||
println(u"var {0} = {{".format(name))
|
||||
for key in sorted(mapping):
|
||||
if isinstance(mapping[key], basestring):
|
||||
value = '"{0}"'.format(mapping[key])
|
||||
else:
|
||||
preferred = dict[key]["preferred"]
|
||||
prefix = dict[key]["prefix"]
|
||||
preferred = mapping[key]["preferred"]
|
||||
prefix = mapping[key]["prefix"]
|
||||
if key != preferred:
|
||||
raise Exception("Expected '{0}' matches preferred locale '{1}'".format(key, preferred))
|
||||
value = '"{0}"'.format(prefix)
|
||||
intlData.write(' "{0}": {1},\n'.format(key, value))
|
||||
intlData.write("};\n")
|
||||
println(u' "{0}": {1},'.format(key, value))
|
||||
println(u"};")
|
||||
|
||||
def writeMappingsFunction(println, variantMappings, redundantMappings, extlangMappings, description, fileDate, url):
|
||||
""" Writes a function definition which performs language tag mapping.
|
||||
|
||||
def writeLanguageTagData(intlData, fileDate, url, langTagMappings, languageMappings,
|
||||
regionMappings, extlangMappings):
|
||||
Processes the contents of dictionaries |variantMappings| and
|
||||
|redundantMappings| through the |println| function with the given
|
||||
function name and a comment with description, fileDate, and URL.
|
||||
"""
|
||||
|
||||
class Subtag:
|
||||
Language, ExtLang, Script, Region, Variant = range(5)
|
||||
Invalid = -1
|
||||
|
||||
def splitSubtags(tag):
|
||||
seenLanguage = False
|
||||
for subtag in tag.split("-"):
|
||||
# language = 2*3ALPHA / 4ALPHA / 5*8ALPHA
|
||||
if len(subtag) in range(2, 8+1) and subtag.isalpha() and not seenLanguage:
|
||||
seenLanguage = True
|
||||
kind = Subtag.Language
|
||||
|
||||
# extlang = 3ALPHA
|
||||
elif len(subtag) == 3 and subtag.isalpha() and seenLanguage:
|
||||
kind = Subtag.ExtLang
|
||||
|
||||
# script = 4ALPHA
|
||||
elif len(subtag) == 4 and subtag.isalpha():
|
||||
kind = Subtag.Script
|
||||
|
||||
# region = 2ALPHA / 3DIGIT
|
||||
elif ((len(subtag) == 2 and subtag.isalpha()) or
|
||||
(len(subtag) == 3 and subtag.isdigit())):
|
||||
kind = Subtag.Region
|
||||
|
||||
# variant = 5*8alphanum / (DIGIT 3alphanum)
|
||||
elif ((len(subtag) in range(5, 8+1) and subtag.isalnum()) or
|
||||
(len(subtag) == 4 and subtag[0].isdigit() and subtag[1:].isalnum())):
|
||||
kind = Subtag.Variant
|
||||
|
||||
else:
|
||||
assert False, "unexpected language tag '{}'".format(key)
|
||||
|
||||
yield (kind, subtag)
|
||||
|
||||
def language(tag):
|
||||
(kind, subtag) = next(splitSubtags(tag))
|
||||
assert kind == Subtag.Language
|
||||
return subtag
|
||||
|
||||
def variants(tag):
|
||||
return [v for (k, v) in splitSubtags(tag) if k == Subtag.Variant]
|
||||
|
||||
def emitCompare(tag, preferred, isFirstLanguageTag):
|
||||
def println_indent(level, *args):
|
||||
println(u" " * (4 * level - 1), *args)
|
||||
println2 = partial(println_indent, 2)
|
||||
println3 = partial(println_indent, 3)
|
||||
|
||||
def maybeNext(it):
|
||||
dummy = (Subtag.Invalid, "")
|
||||
return next(it, dummy)
|
||||
|
||||
# Add a comment for the language tag mapping.
|
||||
println2(u"// {} -> {}".format(tag, preferred))
|
||||
|
||||
# Compare the input language tag with the current language tag.
|
||||
cond = []
|
||||
extlangIndex = 1
|
||||
lastVariant = None
|
||||
for (kind, subtag) in splitSubtags(tag):
|
||||
if kind == Subtag.Language:
|
||||
continue
|
||||
|
||||
if kind == Subtag.ExtLang:
|
||||
assert extlangIndex in [1, 2, 3],\
|
||||
"Language-Tag permits no more than three extlang subtags"
|
||||
cond.append('tag.extlang{} === "{}"'.format(extlangIndex, subtag))
|
||||
extlangIndex += 1
|
||||
elif kind == Subtag.Script:
|
||||
cond.append('tag.script === "{}"'.format(subtag))
|
||||
elif kind == Subtag.Region:
|
||||
cond.append('tag.region === "{}"'.format(subtag))
|
||||
else:
|
||||
assert kind == Subtag.Variant
|
||||
if lastVariant is None:
|
||||
cond.append("tag.variants.length >= {}".format(len(variants(tag))))
|
||||
cond.append('callFunction(ArrayIndexOf, tag.variants, "{}") > -1'.format(subtag))
|
||||
else:
|
||||
cond.append('callFunction(ArrayIndexOf, tag.variants, "{}", callFunction(ArrayIndexOf, tag.variants, "{}") + 1) > -1'.format(subtag, lastVariant))
|
||||
lastVariant = subtag
|
||||
|
||||
# Require exact matches for redundant language tags.
|
||||
if tag in redundantMappings:
|
||||
tag_it = splitSubtags(tag)
|
||||
tag_next = partial(maybeNext, tag_it)
|
||||
(tag_kind, _) = tag_next()
|
||||
|
||||
assert tag_kind == Subtag.Language
|
||||
(tag_kind, _) = tag_next()
|
||||
|
||||
subtags = ([(Subtag.ExtLang, "extlang{}".format(i)) for i in range(1, 3+1)] +
|
||||
[(Subtag.Script, "script"), (Subtag.Region, "region")])
|
||||
for kind, prop_name in subtags:
|
||||
if tag_kind == kind:
|
||||
(tag_kind, _) = tag_next()
|
||||
else:
|
||||
cond.append("tag.{} === undefined".format(prop_name))
|
||||
|
||||
cond.append("tag.variants.length === {}".format(len(variants(tag))))
|
||||
while tag_kind == Subtag.Variant:
|
||||
(tag_kind, _) = tag_next()
|
||||
|
||||
cond.append("tag.extensions.length === 0")
|
||||
cond.append("tag.privateuse === undefined")
|
||||
assert list(tag_it) == [], "unhandled tag subtags"
|
||||
|
||||
# Emit either:
|
||||
#
|
||||
# if (cond) {
|
||||
#
|
||||
# or:
|
||||
#
|
||||
# if (cond_1 &&
|
||||
# cond_2 &&
|
||||
# ...
|
||||
# cond_n)
|
||||
# {
|
||||
#
|
||||
# depending on the number of conditions.
|
||||
ifOrElseIf = "if" if isFirstLanguageTag else "else if"
|
||||
assert len(cond) > 0, "expect at least one subtag condition"
|
||||
if len(cond) == 1:
|
||||
println2(u"{} ({}) {{".format(ifOrElseIf, cond[0]))
|
||||
else:
|
||||
println2(u"{} ({} &&".format(ifOrElseIf, cond[0]))
|
||||
for c in cond[1:-1]:
|
||||
println2(u"{}{} &&".format(" " * (len(ifOrElseIf) + 2), c))
|
||||
println2(u"{}{})".format(" " * (len(ifOrElseIf) + 2), cond[-1]))
|
||||
println2(u"{")
|
||||
|
||||
# Iterate over all subtags of |tag| and |preferred| and update |tag|
|
||||
# with |preferred| in the process. |tag| is modified in-place to use
|
||||
# the preferred values.
|
||||
tag_it = splitSubtags(tag)
|
||||
tag_next = partial(maybeNext, tag_it)
|
||||
(tag_kind, tag_subtag) = tag_next()
|
||||
|
||||
preferred_it = splitSubtags(preferred)
|
||||
preferred_next = partial(maybeNext, preferred_it)
|
||||
(preferred_kind, preferred_subtag) = preferred_next()
|
||||
|
||||
# Update the language subtag.
|
||||
assert tag_kind == Subtag.Language and preferred_kind == Subtag.Language
|
||||
if tag_subtag != preferred_subtag:
|
||||
println3(u'tag.language = "{}";'.format(preferred_subtag))
|
||||
(tag_kind, tag_subtag) = tag_next()
|
||||
(preferred_kind, preferred_subtag) = preferred_next()
|
||||
|
||||
# Remove any extlang subtags per RFC 5646, 4.5:
|
||||
# 'The canonical form contains no 'extlang' subtags.'
|
||||
# https://tools.ietf.org/html/rfc5646#section-4.5
|
||||
assert preferred_kind != Subtag.ExtLang
|
||||
extlangIndex = 1
|
||||
while tag_kind == Subtag.ExtLang:
|
||||
assert extlangIndex in [1, 2, 3],\
|
||||
"Language-Tag permits no more than three extlang subtags"
|
||||
println3(u"tag.extlang{} = undefined;".format(extlangIndex))
|
||||
extlangIndex += 1
|
||||
(tag_kind, tag_subtag) = tag_next()
|
||||
|
||||
# Update the script and region subtags.
|
||||
for kind, prop_name in [(Subtag.Script, "script"), (Subtag.Region, "region")]:
|
||||
if tag_kind == kind and preferred_kind == kind:
|
||||
if tag_subtag != preferred_subtag:
|
||||
println3(u'tag.{} = "{}";'.format(prop_name, preferred_subtag))
|
||||
(tag_kind, tag_subtag) = tag_next()
|
||||
(preferred_kind, preferred_subtag) = preferred_next()
|
||||
elif tag_kind == kind:
|
||||
println3(u"tag.{} = undefined;".format(prop_name))
|
||||
(tag_kind, tag_subtag) = tag_next()
|
||||
elif preferred_kind == kind:
|
||||
println3(u'tag.{} = "{}";'.format(prop_name, preferred_subtag))
|
||||
(preferred_kind, preferred_subtag) = preferred_next()
|
||||
|
||||
# Update variant subtags.
|
||||
if tag_kind == Subtag.Variant or preferred_kind == Subtag.Variant:
|
||||
# JS doesn't provide an easy way to remove elements from an array
|
||||
# which doesn't trigger Symbol.species, so we need to create a new
|
||||
# array and copy all elements.
|
||||
println3(u"var newVariants = [];")
|
||||
|
||||
# Copy all variant subtags, ignoring those which should be removed.
|
||||
println3(u"for (var i = 0; i < tag.variants.length; i++) {")
|
||||
println3(u" var variant = tag.variants[i];")
|
||||
while tag_kind == Subtag.Variant:
|
||||
println3(u' if (variant === "{}")'.format(tag_subtag))
|
||||
println3(u" continue;")
|
||||
(tag_kind, tag_subtag) = tag_next()
|
||||
println3(u" _DefineDataProperty(newVariants, newVariants.length, variant);")
|
||||
println3(u"}")
|
||||
|
||||
# Add the new variants, unless already present.
|
||||
while preferred_kind == Subtag.Variant:
|
||||
println3(u'if (callFunction(ArrayIndexOf, newVariants, "{}") < 0)'.format(preferred_subtag))
|
||||
println3(u' _DefineDataProperty(newVariants, newVariants.length, "{}");'.format(preferred_subtag))
|
||||
(preferred_kind, preferred_subtag) = preferred_next()
|
||||
|
||||
# Update the property.
|
||||
println3(u"tag.variants = newVariants;")
|
||||
|
||||
# Ensure both language tags were completely processed.
|
||||
assert list(tag_it) == [], "unhandled tag subtags"
|
||||
assert list(preferred_it) == [], "unhandled preferred subtags"
|
||||
|
||||
println2(u"}")
|
||||
|
||||
# Remove mappings for redundant language tags which are from our point of
|
||||
# view, wait for it, redundant, because there is an equivalent extlang
|
||||
# mapping.
|
||||
#
|
||||
# For example this entry for the redundant tag "zh-cmn":
|
||||
#
|
||||
# Type: redundant
|
||||
# Tag: zh-cmn
|
||||
# Preferred-Value: cmn
|
||||
#
|
||||
# Can also be expressed through the extlang mapping for "cmn":
|
||||
#
|
||||
# Type: extlang
|
||||
# Subtag: cmn
|
||||
# Preferred-Value: cmn
|
||||
# Prefix: zh
|
||||
#
|
||||
def hasExtlangMapping(tag, preferred):
|
||||
tag_it = splitSubtags(tag)
|
||||
(_, tag_lang) = next(tag_it)
|
||||
(tag_kind, tag_extlang) = next(tag_it)
|
||||
|
||||
preferred_it = splitSubtags(preferred)
|
||||
(_, preferred_lang) = next(preferred_it)
|
||||
|
||||
# Return true if the mapping is for an extlang language and the extlang
|
||||
# mapping table contains an equivalent entry and any trailing elements,
|
||||
# if present, are the same.
|
||||
return (tag_kind == Subtag.ExtLang and
|
||||
(tag_extlang, {"preferred": preferred_lang, "prefix": tag_lang}) in extlangMappings.items() and
|
||||
list(tag_it) == list(preferred_it))
|
||||
|
||||
# Create a single mapping for variant and redundant tags, ignoring the
|
||||
# entries which are also covered through extlang mappings.
|
||||
langTagMappings = {tag: preferred
|
||||
for mapping in [variantMappings, redundantMappings]
|
||||
for (tag, preferred) in mapping.items()
|
||||
if not hasExtlangMapping(tag, preferred)}
|
||||
|
||||
println(u"")
|
||||
println(u"/* eslint-disable complexity */")
|
||||
writeMappingHeader(println, description, fileDate, url)
|
||||
println(u"function updateLangTagMappings(tag) {")
|
||||
println(u' assert(IsObject(tag), "tag is an object");')
|
||||
println(u' assert(!hasOwn("grandfathered", tag), "tag is not a grandfathered tag");')
|
||||
println(u"")
|
||||
|
||||
# Switch on the language subtag.
|
||||
println(u" switch (tag.language) {")
|
||||
for lang in sorted(set(language(tag) for tag in langTagMappings)):
|
||||
println(u' case "{}":'.format(lang))
|
||||
isFirstLanguageTag = True
|
||||
for tag in sorted(tag for tag in langTagMappings if language(tag) == lang):
|
||||
assert not isinstance(langTagMappings[tag], dict),\
|
||||
"only supports complete language tags"
|
||||
emitCompare(tag, langTagMappings[tag], isFirstLanguageTag)
|
||||
isFirstLanguageTag = False
|
||||
println(u" break;")
|
||||
println(u" }")
|
||||
|
||||
println(u"}")
|
||||
println(u"/* eslint-enable complexity */")
|
||||
|
||||
def writeLanguageTagData(println, data, url):
|
||||
""" Writes the language tag data to the Intl data file. """
|
||||
writeMappingsVar(intlData, langTagMappings, "langTagMappings",
|
||||
"Mappings from complete tags to preferred values.", fileDate, url)
|
||||
writeMappingsVar(intlData, languageMappings, "languageMappings",
|
||||
|
||||
fileDate = data["fileDate"]
|
||||
grandfatheredMappings = data["grandfatheredMappings"]
|
||||
redundantMappings = data["redundantMappings"]
|
||||
languageMappings = data["languageMappings"]
|
||||
regionMappings = data["regionMappings"]
|
||||
variantMappings = data["variantMappings"]
|
||||
extlangMappings = data["extlangMappings"]
|
||||
|
||||
writeMappingsFunction(println, variantMappings, redundantMappings, extlangMappings,
|
||||
"Mappings from complete tags to preferred values.", fileDate, url)
|
||||
writeMappingsVar(println, grandfatheredMappings, "grandfatheredMappings",
|
||||
"Mappings from grandfathered tags to preferred values.", fileDate, url)
|
||||
writeMappingsVar(println, languageMappings, "languageMappings",
|
||||
"Mappings from language subtags to preferred values.", fileDate, url)
|
||||
writeMappingsVar(intlData, regionMappings, "regionMappings",
|
||||
writeMappingsVar(println, regionMappings, "regionMappings",
|
||||
"Mappings from region subtags to preferred values.", fileDate, url)
|
||||
writeMappingsVar(intlData, extlangMappings, "extlangMappings",
|
||||
writeMappingsVar(println, extlangMappings, "extlangMappings",
|
||||
["Mappings from extlang subtags to preferred values.",
|
||||
"All current deprecated extlang subtags have the form `<prefix>-<extlang>`",
|
||||
"and their preferred value is exactly equal to `<extlang>`. So each key in",
|
||||
|
@ -256,17 +549,13 @@ def updateLangTags(args):
|
|||
print("Processing IANA Language Subtag Registry...")
|
||||
with closing(registry) as reg:
|
||||
data = readRegistry(reg)
|
||||
fileDate = data["fileDate"]
|
||||
langTagMappings = data["langTagMappings"]
|
||||
languageMappings = data["languageMappings"]
|
||||
regionMappings = data["regionMappings"]
|
||||
extlangMappings = data["extlangMappings"]
|
||||
|
||||
print("Writing Intl data...")
|
||||
with codecs.open(out, "w", encoding="utf-8") as intlData:
|
||||
intlData.write("// Generated by make_intl_data.py. DO NOT EDIT.\n")
|
||||
writeLanguageTagData(intlData, fileDate, url, langTagMappings, languageMappings,
|
||||
regionMappings, extlangMappings)
|
||||
with io.open(out, mode="w", encoding="utf-8", newline="") as f:
|
||||
println = partial(print, file=f)
|
||||
|
||||
println(u"// Generated by make_intl_data.py. DO NOT EDIT.")
|
||||
writeLanguageTagData(println, data, url)
|
||||
|
||||
def flines(filepath, encoding="utf-8"):
|
||||
""" Open filepath and iterate over its content. """
|
||||
|
@ -746,11 +1035,11 @@ def processTimeZones(tzdataDir, icuDir, icuTzDir, version, ignoreBackzone, ignor
|
|||
|
||||
println(u"// Format:")
|
||||
println(u'// "LinkName", "Target" // ICU-Target [time zone file]')
|
||||
println(u"struct LinkAndTarget");
|
||||
println(u"{");
|
||||
println(u" const char* const link;");
|
||||
println(u" const char* const target;");
|
||||
println(u"};");
|
||||
println(u"struct LinkAndTarget")
|
||||
println(u"{")
|
||||
println(u" const char* const link;")
|
||||
println(u" const char* const target;")
|
||||
println(u"};")
|
||||
println(u"")
|
||||
println(u"const LinkAndTarget ianaLinksCanonicalizedDifferentlyByICU[] = {")
|
||||
for (zone, target, icuTarget) in incorrectLinks:
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
// |reftest| skip-if(!this.hasOwnProperty("Intl"))
|
||||
|
||||
const languageTags = {
|
||||
// The preferred value of "hy-arevela" is "hy".
|
||||
"hy-arevela": "hy",
|
||||
"hy-Armn-arevela": "hy-Armn",
|
||||
"hy-AM-arevela": "hy-AM",
|
||||
"hy-arevela-fonipa": "hy-fonipa",
|
||||
"hy-fonipa-arevela": "hy-fonipa",
|
||||
|
||||
// The preferred value of "hy-arevmda" is "hyw".
|
||||
"hy-arevmda": "hyw",
|
||||
"hy-Armn-arevmda": "hyw-Armn",
|
||||
"hy-AM-arevmda": "hyw-AM",
|
||||
"hy-arevmda-fonipa": "hyw-fonipa",
|
||||
"hy-fonipa-arevmda": "hyw-fonipa",
|
||||
|
||||
// The preferred value of "ja-Latn-hepburn-heploc" is "ja-Latn-alalc97".
|
||||
"ja-Latn-hepburn-heploc": "ja-Latn-alalc97",
|
||||
"ja-Latn-JP-hepburn-heploc": "ja-Latn-JP-alalc97",
|
||||
|
||||
// Ensure we don't emit "alalc97" when it is already present.
|
||||
"ja-Latn-alalc97-hepburn-heploc": "ja-Latn-alalc97",
|
||||
"ja-Latn-hepburn-alalc97-heploc": "ja-Latn-alalc97",
|
||||
"ja-Latn-hepburn-heploc-alalc97": "ja-Latn-alalc97",
|
||||
|
||||
// No replacement when "heploc" appears before "hepburn".
|
||||
"ja-Latn-heploc-hepburn": "ja-Latn-heploc-hepburn",
|
||||
};
|
||||
|
||||
for (let [tag, canonical] of Object.entries(languageTags)) {
|
||||
assertEq(Intl.getCanonicalLocales(tag)[0], canonical);
|
||||
}
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(0, 0);
|
Загрузка…
Ссылка в новой задаче