diff --git a/js/src/builtin/intl/LanguageTag.cpp b/js/src/builtin/intl/LanguageTag.cpp index 18f7a35bef61..ba74037dfdfc 100644 --- a/js/src/builtin/intl/LanguageTag.cpp +++ b/js/src/builtin/intl/LanguageTag.cpp @@ -262,9 +262,9 @@ static bool SortAlphabetically(JSContext* cx, } bool LanguageTag::canonicalizeBaseName(JSContext* cx) { - // Per UTS 35, 3.3.1, the very first step is to canonicalize the syntax by - // normalizing the case and ordering all subtags. The canonical syntax form - // itself is specified in UTS 35, 3.2.1. + // Per 6.2.3 CanonicalizeUnicodeLocaleId, the very first step is to + // canonicalize the syntax by normalizing the case and ordering all subtags. + // The canonical syntax form is specified in UTS 35, 3.2.1. // Language codes need to be in lower case. "JA" -> "ja" language_.toLowerCase(); @@ -316,10 +316,24 @@ bool LanguageTag::canonicalizeBaseName(JSContext* cx) { } // 2. Any extensions are in alphabetical order by their singleton. - // - A subsequent call to canonicalizeExtensions() will perform this. + // 3. All attributes are sorted in alphabetical order. + // 4. All keywords and tfields are sorted by alphabetical order of their keys, + // within their respective extensions. + // 5. Any type or tfield value "true" is removed. + // - A subsequent call to canonicalizeExtensions() will perform these steps. - // The next two steps in 3.3.1 replace deprecated language and region - // subtags with their preferred mappings. + // 6.2.3 CanonicalizeUnicodeLocaleId, step 2 transforms the locale identifier + // into its canonical form per UTS 3.2.1. + + // 1. Use the bcp47 data to replace keys, types, tfields, and tvalues by their + // canonical forms. + // - A subsequent call to canonicalizeExtensions() will perform this step. + + // 2. Replace aliases in the unicode_language_id and tlang (if any). + // - tlang is handled in canonicalizeExtensions(). + + // Replace deprecated language, region, and variant subtags with their + // preferred mappings. if (!updateGrandfatheredMappings(cx)) { return false; @@ -343,9 +357,8 @@ bool LanguageTag::canonicalizeBaseName(JSContext* cx) { // No extension replacements are currently present. // Private use sequences are left as is. - // The two final steps in 3.3.1, handling irregular grandfathered and - // private-use only language tags, don't apply, because these two forms - // can't occur in Unicode BCP 47 locale identifiers. + // 3. Replace aliases in special key values. + // - A subsequent call to canonicalizeExtensions() will perform this step. return true; } diff --git a/js/src/builtin/intl/LanguageTag.h b/js/src/builtin/intl/LanguageTag.h index b4ccff58c8f9..14540053f032 100644 --- a/js/src/builtin/intl/LanguageTag.h +++ b/js/src/builtin/intl/LanguageTag.h @@ -369,10 +369,9 @@ class MOZ_STACK_CLASS LanguageTag final { * * UTS 35 specifies two different canonicalization algorithms. There's one to * canonicalize BCP 47 language tags and other one to canonicalize Unicode - * locale identifiers. The latter one wasn't present when ECMA-402 was changed - * to use Unicode BCP 47 locale identifiers instead of BCP 47 language tags, - * so ECMA-402 currently only uses the former to canonicalize Unicode BCP 47 - * locale identifiers. + * locale identifiers. ECMA-402 was previously using the former, but has since + * been changed to use the latter to canonicalize Unicode BCP 47 locale + * identifiers. * * Spec: ECMAScript Internationalization API Specification, 6.2.3. * Spec: