Bug 1522070 - Part 1: Remove support for extlang subtags. r=jwalden

https://github.com/tc39/ecma402/pull/289 changed ECMA-402 to use Unicode BCP47
locale identifiers instead of BCP47 language tags for language tags. That means
extlang subtags are no longer supported in language tags.

Differential Revision: https://phabricator.services.mozilla.com/D23536

--HG--
extra : moz-landing-system : lando
This commit is contained in:
André Bargull 2019-04-09 09:16:13 +00:00
Родитель 423efc24b8
Коммит 556ae927e7
5 изменённых файлов: 65 добавлений и 459 удалений

Просмотреть файл

@ -136,6 +136,17 @@ function getUnicodeExtensions(locale) {
/** /**
* Parser for BCP 47 language tags. * Parser for BCP 47 language tags.
* *
* ---------------------------------------------------------------------------
* The following features were removed because the spec was changed to use
* Unicode BCP 47 locale identifier instead:
* - extlang subtags
*
* The removed features may still be referenced in some comments. This will be
* cleaned up when everything has been updated to follow the new specification.
*
* Ref: https://github.com/tc39/ecma402/pull/289
* ---------------------------------------------------------------------------
*
* Returns null if |locale| can't be parsed as a Language-Tag. If the input is * Returns null if |locale| can't be parsed as a Language-Tag. If the input is
* a grandfathered language tag, the object * a grandfathered language tag, the object
* *
@ -147,10 +158,7 @@ function getUnicodeExtensions(locale) {
* is returned. Otherwise the returned object has the following structure: * is returned. Otherwise the returned object has the following structure:
* *
* { * {
* language: language subtag without extlang / undefined, * language: language subtag / undefined,
* extlang1: first extlang subtag / undefined,
* extlang2: second extlang subtag / undefined,
* extlang3: third extlang subtag / undefined,
* script: script subtag / undefined, * script: script subtag / undefined,
* region: region subtag / undefined, * region: region subtag / undefined,
* variants: array of variant subtags, * variants: array of variant subtags,
@ -269,7 +277,7 @@ function parseLanguageTag(locale) {
assert(tokenLength > 0, "token length is not zero if type is ALPHA"); assert(tokenLength > 0, "token length is not zero if type is ALPHA");
var language, extlang1, extlang2, extlang3, script, region, privateuse; var language, script, region, privateuse;
var variants = []; var variants = [];
var extensions = []; var extensions = [];
@ -281,32 +289,12 @@ function parseLanguageTag(locale) {
// ["-" privateuse] // ["-" privateuse]
if (tokenLength > 1) { if (tokenLength > 1) {
// language = 2*3ALPHA ; shortest ISO 639 code // language = 2*3ALPHA ; shortest ISO 639 code
// ["-" extlang] ; sometimes followed by
// ; extended language subtags
// / 4ALPHA ; or reserved for future use // / 4ALPHA ; or reserved for future use
// / 5*8ALPHA ; or registered language subtag // / 5*8ALPHA ; or registered language subtag
if (tokenLength <= 3) { if (tokenLength <= 3) {
language = tokenStringLower(); language = tokenStringLower();
if (!nextToken()) if (!nextToken())
return null; return null;
// extlang = 3ALPHA ; selected ISO 639 codes
// *2("-" 3ALPHA) ; permanently reserved
if (token === ALPHA && tokenLength === 3) {
extlang1 = tokenStringLower();
if (!nextToken())
return null;
if (token === ALPHA && tokenLength === 3) {
extlang2 = tokenStringLower();
if (!nextToken())
return null;
if (token === ALPHA && tokenLength === 3) {
extlang3 = tokenStringLower();
if (!nextToken())
return null;
}
}
}
} else { } else {
assert(4 <= tokenLength && tokenLength <= 8, "reserved/registered language subtags"); assert(4 <= tokenLength && tokenLength <= 8, "reserved/registered language subtags");
language = tokenStringLower(); language = tokenStringLower();
@ -443,9 +431,6 @@ function parseLanguageTag(locale) {
if (token === NONE && !hasOwn(localeLowercase, grandfatheredMappings)) { if (token === NONE && !hasOwn(localeLowercase, grandfatheredMappings)) {
return { return {
language, language,
extlang1,
extlang2,
extlang3,
script, script,
region, region,
variants, variants,
@ -531,10 +516,9 @@ function IsStructurallyValidLanguageTag(locale) {
/** /**
* Canonicalizes the given structurally valid BCP 47 language tag, including * Canonicalizes the given structurally valid BCP 47 language tag, including
* regularized case of subtags. For example, the language tag * regularized case of subtags. For example, the language tag
* Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
* *
* Zh ; 2*3ALPHA * Zh ; 2*3ALPHA
* -NAN ; ["-" extlang]
* -haNS ; ["-" script] * -haNS ; ["-" script]
* -bu ; ["-" region] * -bu ; ["-" region]
* -variant2 ; *("-" variant) * -variant2 ; *("-" variant)
@ -543,7 +527,7 @@ function IsStructurallyValidLanguageTag(locale) {
* -t-Zh-laTN * -t-Zh-laTN
* -x-PRIVATE ; ["-" privateuse] * -x-PRIVATE ; ["-" privateuse]
* *
* becomes nan-Hans-mm-variant2-variant1-t-zh-latn-u-ca-chinese-x-private * becomes zh-Hans-mm-variant2-variant1-t-zh-latn-u-ca-chinese-x-private
* *
* Spec: ECMAScript Internationalization API Specification, 6.2.3. * Spec: ECMAScript Internationalization API Specification, 6.2.3.
* Spec: RFC 5646, section 4.5. * Spec: RFC 5646, section 4.5.
@ -560,9 +544,6 @@ function CanonicalizeLanguageTagFromObject(localeObj) {
var { var {
language, language,
extlang1,
extlang2,
extlang3,
script, script,
region, region,
variants, variants,
@ -583,40 +564,6 @@ function CanonicalizeLanguageTagFromObject(localeObj) {
var canonical = language; var canonical = language;
if (extlang1) {
// When an extlang subtag is encountered with its corresponding
// primary language tag prefix, replace the combination with the
// preferred value -- which MUST be the unadorned extlang subtag.
// For example, this entry
//
// Type: extlang
// Subtag: nan
// Description: Min Nan Chinese
// Added: 2009-07-29
// Preferred-Value: nan
// Prefix: zh
// Macrolanguage: zh
//
// is interpreted to say that if a "nan" extlang appears after a "zh"
// primary language prefix, the extlang and its prefix must be
// replaced by its preferred value, so "zh-nan" must be replaced by
// the preferred value "nan". (RFC 5646 section 2.2.2)
if (hasOwn(extlang1, extlangMappings) && extlangMappings[extlang1] === language)
canonical = extlang1;
else
canonical += "-" + extlang1;
}
// The second extlang subtag will always be left as is.
// (RFC 5646 section 2.2.2)
if (extlang2)
canonical += "-" + extlang2;
// The third extlang subtag will always be left as is.
// (RFC 5646 section 2.2.2)
if (extlang3)
canonical += "-" + extlang3;
// No script replacements are currently present, so append as is. // No script replacements are currently present, so append as is.
if (script) { if (script) {
assert(script.length === 4 && assert(script.length === 4 &&
@ -833,10 +780,9 @@ function CanonicalizeUnicodeExtension(attributes, keywords) {
/** /**
* Canonicalizes the given structurally valid BCP 47 language tag, including * Canonicalizes the given structurally valid BCP 47 language tag, including
* regularized case of subtags. For example, the language tag * regularized case of subtags. For example, the language tag
* Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
* *
* Zh ; 2*3ALPHA * Zh ; 2*3ALPHA
* -NAN ; ["-" extlang]
* -haNS ; ["-" script] * -haNS ; ["-" script]
* -bu ; ["-" region] * -bu ; ["-" region]
* -variant2 ; *("-" variant) * -variant2 ; *("-" variant)
@ -845,7 +791,7 @@ function CanonicalizeUnicodeExtension(attributes, keywords) {
* -t-Zh-laTN * -t-Zh-laTN
* -x-PRIVATE ; ["-" privateuse] * -x-PRIVATE ; ["-" privateuse]
* *
* becomes nan-Hans-mm-variant2-variant1-t-zh-latn-u-ca-chinese-x-private * becomes zh-Hans-mm-variant2-variant1-t-zh-latn-u-ca-chinese-x-private
* *
* Spec: ECMAScript Internationalization API Specification, 6.2.3. * Spec: ECMAScript Internationalization API Specification, 6.2.3.
* Spec: RFC 5646, section 4.5. * Spec: RFC 5646, section 4.5.

Просмотреть файл

@ -62,9 +62,6 @@ function updateLangTagMappings(tag) {
case "sgn": case "sgn":
// sgn-BR -> bzs // sgn-BR -> bzs
if (tag.region === "BR" && if (tag.region === "BR" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -75,9 +72,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-CO -> csn // sgn-CO -> csn
else if (tag.region === "CO" && else if (tag.region === "CO" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -88,9 +82,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-DE -> gsg // sgn-DE -> gsg
else if (tag.region === "DE" && else if (tag.region === "DE" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -101,9 +92,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-DK -> dsl // sgn-DK -> dsl
else if (tag.region === "DK" && else if (tag.region === "DK" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -114,9 +102,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-ES -> ssp // sgn-ES -> ssp
else if (tag.region === "ES" && else if (tag.region === "ES" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -127,9 +112,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-FR -> fsl // sgn-FR -> fsl
else if (tag.region === "FR" && else if (tag.region === "FR" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -140,9 +122,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-GB -> bfi // sgn-GB -> bfi
else if (tag.region === "GB" && else if (tag.region === "GB" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -153,9 +132,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-GR -> gss // sgn-GR -> gss
else if (tag.region === "GR" && else if (tag.region === "GR" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -166,9 +142,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-IE -> isg // sgn-IE -> isg
else if (tag.region === "IE" && else if (tag.region === "IE" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -179,9 +152,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-IT -> ise // sgn-IT -> ise
else if (tag.region === "IT" && else if (tag.region === "IT" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -192,9 +162,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-JP -> jsl // sgn-JP -> jsl
else if (tag.region === "JP" && else if (tag.region === "JP" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -205,9 +172,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-MX -> mfs // sgn-MX -> mfs
else if (tag.region === "MX" && else if (tag.region === "MX" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -218,9 +182,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-NI -> ncs // sgn-NI -> ncs
else if (tag.region === "NI" && else if (tag.region === "NI" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -231,9 +192,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-NL -> dse // sgn-NL -> dse
else if (tag.region === "NL" && else if (tag.region === "NL" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -244,9 +202,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-NO -> nsl // sgn-NO -> nsl
else if (tag.region === "NO" && else if (tag.region === "NO" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -257,9 +212,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-PT -> psr // sgn-PT -> psr
else if (tag.region === "PT" && else if (tag.region === "PT" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -270,9 +222,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-SE -> swl // sgn-SE -> swl
else if (tag.region === "SE" && else if (tag.region === "SE" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -283,9 +232,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-US -> ase // sgn-US -> ase
else if (tag.region === "US" && else if (tag.region === "US" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -296,9 +242,6 @@ function updateLangTagMappings(tag) {
} }
// sgn-ZA -> sfs // sgn-ZA -> sfs
else if (tag.region === "ZA" && else if (tag.region === "ZA" &&
tag.extlang1 === undefined &&
tag.extlang2 === undefined &&
tag.extlang3 === undefined &&
tag.script === undefined && tag.script === undefined &&
tag.variants.length === 0 && tag.variants.length === 0 &&
tag.extensions.length === 0 && tag.extensions.length === 0 &&
@ -439,247 +382,3 @@ var regionMappings = {
"YD": "YE", "YD": "YE",
"ZR": "CD", "ZR": "CD",
}; };
// Mappings from extlang subtags to preferred values.
// All current deprecated extlang subtags have the form `<prefix>-<extlang>`
// and their preferred value is exactly equal to `<extlang>`. So each key in
// extlangMappings acts both as the extlang subtag and its preferred value.
// Derived from IANA Language Subtag Registry, file date 2019-02-20.
// https://www.iana.org/assignments/language-subtag-registry
var extlangMappings = {
"aao": "ar",
"abh": "ar",
"abv": "ar",
"acm": "ar",
"acq": "ar",
"acw": "ar",
"acx": "ar",
"acy": "ar",
"adf": "ar",
"ads": "sgn",
"aeb": "ar",
"aec": "ar",
"aed": "sgn",
"aen": "sgn",
"afb": "ar",
"afg": "sgn",
"ajp": "ar",
"apc": "ar",
"apd": "ar",
"arb": "ar",
"arq": "ar",
"ars": "ar",
"ary": "ar",
"arz": "ar",
"ase": "sgn",
"asf": "sgn",
"asp": "sgn",
"asq": "sgn",
"asw": "sgn",
"auz": "ar",
"avl": "ar",
"ayh": "ar",
"ayl": "ar",
"ayn": "ar",
"ayp": "ar",
"bbz": "ar",
"bfi": "sgn",
"bfk": "sgn",
"bjn": "ms",
"bog": "sgn",
"bqn": "sgn",
"bqy": "sgn",
"btj": "ms",
"bve": "ms",
"bvl": "sgn",
"bvu": "ms",
"bzs": "sgn",
"cdo": "zh",
"cds": "sgn",
"cjy": "zh",
"cmn": "zh",
"coa": "ms",
"cpx": "zh",
"csc": "sgn",
"csd": "sgn",
"cse": "sgn",
"csf": "sgn",
"csg": "sgn",
"csl": "sgn",
"csn": "sgn",
"csq": "sgn",
"csr": "sgn",
"czh": "zh",
"czo": "zh",
"doq": "sgn",
"dse": "sgn",
"dsl": "sgn",
"dup": "ms",
"ecs": "sgn",
"esl": "sgn",
"esn": "sgn",
"eso": "sgn",
"eth": "sgn",
"fcs": "sgn",
"fse": "sgn",
"fsl": "sgn",
"fss": "sgn",
"gan": "zh",
"gds": "sgn",
"gom": "kok",
"gse": "sgn",
"gsg": "sgn",
"gsm": "sgn",
"gss": "sgn",
"gus": "sgn",
"hab": "sgn",
"haf": "sgn",
"hak": "zh",
"hds": "sgn",
"hji": "ms",
"hks": "sgn",
"hos": "sgn",
"hps": "sgn",
"hsh": "sgn",
"hsl": "sgn",
"hsn": "zh",
"icl": "sgn",
"iks": "sgn",
"ils": "sgn",
"inl": "sgn",
"ins": "sgn",
"ise": "sgn",
"isg": "sgn",
"isr": "sgn",
"jak": "ms",
"jax": "ms",
"jcs": "sgn",
"jhs": "sgn",
"jls": "sgn",
"jos": "sgn",
"jsl": "sgn",
"jus": "sgn",
"kgi": "sgn",
"knn": "kok",
"kvb": "ms",
"kvk": "sgn",
"kvr": "ms",
"kxd": "ms",
"lbs": "sgn",
"lce": "ms",
"lcf": "ms",
"liw": "ms",
"lls": "sgn",
"lsg": "sgn",
"lsl": "sgn",
"lso": "sgn",
"lsp": "sgn",
"lst": "sgn",
"lsy": "sgn",
"ltg": "lv",
"lvs": "lv",
"lws": "sgn",
"lzh": "zh",
"max": "ms",
"mdl": "sgn",
"meo": "ms",
"mfa": "ms",
"mfb": "ms",
"mfs": "sgn",
"min": "ms",
"mnp": "zh",
"mqg": "ms",
"mre": "sgn",
"msd": "sgn",
"msi": "ms",
"msr": "sgn",
"mui": "ms",
"mzc": "sgn",
"mzg": "sgn",
"mzy": "sgn",
"nan": "zh",
"nbs": "sgn",
"ncs": "sgn",
"nsi": "sgn",
"nsl": "sgn",
"nsp": "sgn",
"nsr": "sgn",
"nzs": "sgn",
"okl": "sgn",
"orn": "ms",
"ors": "ms",
"pel": "ms",
"pga": "ar",
"pgz": "sgn",
"pks": "sgn",
"prl": "sgn",
"prz": "sgn",
"psc": "sgn",
"psd": "sgn",
"pse": "ms",
"psg": "sgn",
"psl": "sgn",
"pso": "sgn",
"psp": "sgn",
"psr": "sgn",
"pys": "sgn",
"rms": "sgn",
"rsi": "sgn",
"rsl": "sgn",
"rsm": "sgn",
"sdl": "sgn",
"sfb": "sgn",
"sfs": "sgn",
"sgg": "sgn",
"sgx": "sgn",
"shu": "ar",
"slf": "sgn",
"sls": "sgn",
"sqk": "sgn",
"sqs": "sgn",
"ssh": "ar",
"ssp": "sgn",
"ssr": "sgn",
"svk": "sgn",
"swc": "sw",
"swh": "sw",
"swl": "sgn",
"syy": "sgn",
"szs": "sgn",
"tmw": "ms",
"tse": "sgn",
"tsm": "sgn",
"tsq": "sgn",
"tss": "sgn",
"tsy": "sgn",
"tza": "sgn",
"ugn": "sgn",
"ugy": "sgn",
"ukl": "sgn",
"uks": "sgn",
"urk": "ms",
"uzn": "uz",
"uzs": "uz",
"vgt": "sgn",
"vkk": "ms",
"vkt": "ms",
"vsi": "sgn",
"vsl": "sgn",
"vsv": "sgn",
"wbs": "sgn",
"wuu": "zh",
"xki": "sgn",
"xml": "sgn",
"xmm": "ms",
"xms": "sgn",
"yds": "sgn",
"ygs": "sgn",
"yhs": "sgn",
"ysl": "sgn",
"yue": "zh",
"zib": "sgn",
"zlm": "ms",
"zmi": "ms",
"zsl": "sgn",
"zsm": "ms",
};

Просмотреть файл

@ -90,22 +90,14 @@ def readRegistry(registry):
- regionMappings: mappings from region subtags to preferred subtags - regionMappings: mappings from region subtags to preferred subtags
- variantMappings: mappings from complete language tags to preferred - variantMappings: mappings from complete language tags to preferred
complete language tags complete language tags
- extlangMappings: mappings from extlang subtags to preferred subtags, Returns these five mappings as dictionaries, along with the registry's
with prefix to be removed
Returns these six mappings as dictionaries, along with the registry's
file date. file date.
We also check that extlang mappings don't generate preferred values
which in turn are subject to language subtag mappings, so that
CanonicalizeLanguageTag can process subtags sequentially.
""" """
grandfatheredMappings = {} grandfatheredMappings = {}
redundantMappings = {} redundantMappings = {}
languageMappings = {} languageMappings = {}
regionMappings = {} regionMappings = {}
variantMappings = {} variantMappings = {}
extlangMappings = {}
extlangSubtags = []
# Set of language tags which require special handling. # Set of language tags which require special handling.
SpecialCase = namedtuple("SpecialCase", ["Type", "Subtag", "Prefix", "Preferred_Value"]) SpecialCase = namedtuple("SpecialCase", ["Type", "Subtag", "Prefix", "Preferred_Value"])
@ -184,29 +176,14 @@ def readRegistry(registry):
tag = "{}-{}".format(record["Prefix"], record["Subtag"]) tag = "{}-{}".format(record["Prefix"], record["Subtag"])
variantMappings[tag] = record["Preferred-Value"] variantMappings[tag] = record["Preferred-Value"]
elif record["Type"] == "extlang": elif record["Type"] == "extlang":
# For extlangMappings, keys must be in the case used in the # extlang subtags are not allowed in Unicode BCP 47 locale identifiers,
# registry; values are records with the preferred value and the # so ignore any replacements.
# prefix to be removed. pass
subtag = record["Subtag"]
extlangSubtags.append(subtag)
if "Preferred-Value" in record:
preferred = record["Preferred-Value"]
# The 'Preferred-Value' and 'Subtag' fields MUST be identical.
# https://tools.ietf.org/html/rfc5646#section-2.2.2
assert preferred == subtag, "{0} = {1}".format(preferred, subtag)
prefix = record["Prefix"]
extlangMappings[subtag] = {"preferred": preferred, "prefix": prefix}
else: else:
# No other types are allowed by # No other types are allowed by
# https://tools.ietf.org/html/rfc5646#section-3.1.3 # https://tools.ietf.org/html/rfc5646#section-3.1.3
assert False, "Unrecognized Type: {0}".format(record["Type"]) assert False, "Unrecognized Type: {0}".format(record["Type"])
# Check that mappings for language subtags and extlang subtags don't affect
# each other.
for extlang in extlangSubtags:
if extlang in languageMappings:
raise Exception("Conflict: extlang with lang mapping: " + extlang)
# Check all known special cases were processed. # Check all known special cases were processed.
for elem in knownSpecialCases: for elem in knownSpecialCases:
tag = "{}-{}".format(elem.Prefix, elem.Subtag) tag = "{}-{}".format(elem.Prefix, elem.Subtag)
@ -224,8 +201,7 @@ def readRegistry(registry):
"redundantMappings": redundantMappings, "redundantMappings": redundantMappings,
"languageMappings": languageMappings, "languageMappings": languageMappings,
"regionMappings": regionMappings, "regionMappings": regionMappings,
"variantMappings": variantMappings, "variantMappings": variantMappings}
"extlangMappings": extlangMappings}
def writeMappingHeader(println, description, fileDate, url): def writeMappingHeader(println, description, fileDate, url):
@ -261,8 +237,8 @@ def writeMappingsVar(println, mapping, name, description, fileDate, url):
println(u"};") println(u"};")
def writeMappingsFunction(println, variantMappings, redundantMappings, extlangMappings, def writeMappingsFunction(println, variantMappings, redundantMappings, description,
description, fileDate, url): fileDate, url):
""" Writes a function definition which performs language tag mapping. """ Writes a function definition which performs language tag mapping.
Processes the contents of dictionaries |variantMappings| and Processes the contents of dictionaries |variantMappings| and
@ -328,18 +304,12 @@ def writeMappingsFunction(println, variantMappings, redundantMappings, extlangMa
# Compare the input language tag with the current language tag. # Compare the input language tag with the current language tag.
cond = [] cond = []
extlangIndex = 1
lastVariant = None lastVariant = None
for (kind, subtag) in splitSubtags(tag): for (kind, subtag) in splitSubtags(tag):
if kind == Subtag.Language: if kind == Subtag.Language:
continue continue
if kind == Subtag.ExtLang: if kind == Subtag.Script:
assert extlangIndex in [1, 2, 3], \
"Language-Tag permits no more than three extlang subtags"
cond.append('tag.extlang{} === "{}"'.format(extlangIndex, subtag))
extlangIndex += 1
elif kind == Subtag.Script:
cond.append('tag.script === "{}"'.format(subtag)) cond.append('tag.script === "{}"'.format(subtag))
elif kind == Subtag.Region: elif kind == Subtag.Region:
cond.append('tag.region === "{}"'.format(subtag)) cond.append('tag.region === "{}"'.format(subtag))
@ -363,9 +333,7 @@ def writeMappingsFunction(println, variantMappings, redundantMappings, extlangMa
assert tag_kind == Subtag.Language assert tag_kind == Subtag.Language
(tag_kind, _) = tag_next() (tag_kind, _) = tag_next()
subtags = ([(Subtag.ExtLang, "extlang{}".format(i)) for i in range(1, 3+1)] + for kind, prop_name in ((Subtag.Script, "script"), (Subtag.Region, "region")):
[(Subtag.Script, "script"), (Subtag.Region, "region")])
for kind, prop_name in subtags:
if tag_kind == kind: if tag_kind == kind:
(tag_kind, _) = tag_next() (tag_kind, _) = tag_next()
else: else:
@ -421,20 +389,8 @@ def writeMappingsFunction(println, variantMappings, redundantMappings, extlangMa
(tag_kind, tag_subtag) = tag_next() (tag_kind, tag_subtag) = tag_next()
(preferred_kind, preferred_subtag) = preferred_next() (preferred_kind, preferred_subtag) = preferred_next()
# Remove any extlang subtags per RFC 5646, 4.5:
# 'The canonical form contains no 'extlang' subtags.'
# https://tools.ietf.org/html/rfc5646#section-4.5
assert preferred_kind != Subtag.ExtLang
extlangIndex = 1
while tag_kind == Subtag.ExtLang:
assert extlangIndex in [1, 2, 3], \
"Language-Tag permits no more than three extlang subtags"
println3(u"tag.extlang{} = undefined;".format(extlangIndex))
extlangIndex += 1
(tag_kind, tag_subtag) = tag_next()
# Update the script and region subtags. # Update the script and region subtags.
for kind, prop_name in [(Subtag.Script, "script"), (Subtag.Region, "region")]: for kind, prop_name in ((Subtag.Script, "script"), (Subtag.Region, "region")):
if tag_kind == kind and preferred_kind == kind: if tag_kind == kind and preferred_kind == kind:
if tag_subtag != preferred_subtag: if tag_subtag != preferred_subtag:
println3(u'tag.{} = "{}";'.format(prop_name, preferred_subtag)) println3(u'tag.{} = "{}";'.format(prop_name, preferred_subtag))
@ -481,9 +437,8 @@ def writeMappingsFunction(println, variantMappings, redundantMappings, extlangMa
println2(u"}") println2(u"}")
# Remove mappings for redundant language tags which are from our point of # Remove mappings for redundant language tags which contain extlang subtags,
# view, wait for it, redundant, because there is an equivalent extlang # because extlangs are not supported in Unicode BCP 47 locale identifiers.
# mapping.
# #
# For example this entry for the redundant tag "zh-cmn": # For example this entry for the redundant tag "zh-cmn":
# #
@ -491,35 +446,21 @@ def writeMappingsFunction(println, variantMappings, redundantMappings, extlangMa
# Tag: zh-cmn # Tag: zh-cmn
# Preferred-Value: cmn # Preferred-Value: cmn
# #
# Can also be expressed through the extlang mapping for "cmn": # Can be omitted because "zh-cmn" is already rejected in the parser.
# def hasExtlangSubtag(tag):
# Type: extlang
# Subtag: cmn
# Preferred-Value: cmn
# Prefix: zh
#
def hasExtlangMapping(tag, preferred):
tag_it = splitSubtags(tag) tag_it = splitSubtags(tag)
(_, tag_lang) = next(tag_it) (_, _) = next(tag_it)
(tag_kind, tag_extlang) = next(tag_it) (tag_kind, _) = next(tag_it)
preferred_it = splitSubtags(preferred) # Return true if |tag| contains an extlang subtag.
(_, preferred_lang) = next(preferred_it) return tag_kind == Subtag.ExtLang
# Return true if the mapping is for an extlang language and the extlang
# mapping table contains an equivalent entry and any trailing elements,
# if present, are the same.
return (tag_kind == Subtag.ExtLang and
(tag_extlang, {"preferred": preferred_lang, "prefix": tag_lang}) in
extlangMappings.items() and
list(tag_it) == list(preferred_it))
# Create a single mapping for variant and redundant tags, ignoring the # Create a single mapping for variant and redundant tags, ignoring the
# entries which are also covered through extlang mappings. # entries which contain extlang subtags.
langTagMappings = {tag: preferred langTagMappings = {tag: preferred
for mapping in [variantMappings, redundantMappings] for mapping in [variantMappings, redundantMappings]
for (tag, preferred) in mapping.items() for (tag, preferred) in mapping.items()
if not hasExtlangMapping(tag, preferred)} if not hasExtlangSubtag(tag)}
println(u"") println(u"")
println(u"/* eslint-disable complexity */") println(u"/* eslint-disable complexity */")
@ -555,9 +496,8 @@ def writeLanguageTagData(println, data, url):
languageMappings = data["languageMappings"] languageMappings = data["languageMappings"]
regionMappings = data["regionMappings"] regionMappings = data["regionMappings"]
variantMappings = data["variantMappings"] variantMappings = data["variantMappings"]
extlangMappings = data["extlangMappings"]
writeMappingsFunction(println, variantMappings, redundantMappings, extlangMappings, writeMappingsFunction(println, variantMappings, redundantMappings,
"Mappings from complete tags to preferred values.", fileDate, url) "Mappings from complete tags to preferred values.", fileDate, url)
writeMappingsVar(println, grandfatheredMappings, "grandfatheredMappings", writeMappingsVar(println, grandfatheredMappings, "grandfatheredMappings",
"Mappings from grandfathered tags to preferred values.", fileDate, url) "Mappings from grandfathered tags to preferred values.", fileDate, url)
@ -565,12 +505,6 @@ def writeLanguageTagData(println, data, url):
"Mappings from language subtags to preferred values.", fileDate, url) "Mappings from language subtags to preferred values.", fileDate, url)
writeMappingsVar(println, regionMappings, "regionMappings", writeMappingsVar(println, regionMappings, "regionMappings",
"Mappings from region subtags to preferred values.", fileDate, url) "Mappings from region subtags to preferred values.", fileDate, url)
writeMappingsVar(println, extlangMappings, "extlangMappings",
["Mappings from extlang subtags to preferred values.",
"All current deprecated extlang subtags have the form `<prefix>-<extlang>`",
"and their preferred value is exactly equal to `<extlang>`. So each key in",
"extlangMappings acts both as the extlang subtag and its preferred value."],
fileDate, url)
def updateLangTags(args): def updateLangTags(args):

Просмотреть файл

@ -717,3 +717,9 @@ skip script test262/harness/detachArrayBuffer.js
# shouldn't require that zh-Hans is explicitly supported without a fallback to # shouldn't require that zh-Hans is explicitly supported without a fallback to
# zh. # zh.
skip script test262/intl402/fallback-locales-are-supported.js skip script test262/intl402/fallback-locales-are-supported.js
# https://github.com/tc39/test262/pull/2097
#
# "zh-hak-CN" no longer allowed, because it contains an extlang subtag.
skip script test262/intl402/language-tags-canonicalized.js
skip script test262/intl402/Intl/getCanonicalLocales/canonicalized-tags.js

Просмотреть файл

@ -0,0 +1,21 @@
// |reftest| skip-if(!this.hasOwnProperty("Intl"))
// Unicode BCP 47 locale identifiers don't support extlang subtags.
const invalid = [
// Two letter language code followed by extlang subtags.
"en-abc",
"en-abc-def",
"en-abc-def-ghi",
// Three letter language code followed by extlang subtags.
"und-abc",
"und-abc-def",
"und-abc-def-ghi",
];
for (let locale of invalid) {
assertThrowsInstanceOf(() => Intl.getCanonicalLocales(locale), RangeError);
}
if (typeof reportCompare === "function")
reportCompare(true, true);