Bug 1570370 - Part 1: Port Unicode BCP 47 locale identifier parser to C++. r=jwalden

Differential Revision: https://phabricator.services.mozilla.com/D40067 --HG-- extra : moz-landing-system : lando
2019-10-11 20:05:43 +00:00 · 2019-10-11 20:05:43 +00:00 · 4a8f76c4de
--- a/.clang-format-ignore
+++ b/.clang-format-ignore
@ -4,7 +4,9 @@ build/clang-plugin/.*
 config/gcc-stl-wrapper.template.h
 config/msvc-stl-wrapper.template.h
 # Generated code
 js/src/builtin/intl/LanguageTagGenerated.cpp
 js/src/builtin/intl/TimeZoneDataGenerated.h
 js/src/builtin/intl/UnicodeExtensionsGenerated.cpp
 # Don't want to reformat irregexp. bug 1510128
 js/src/irregexp/.*
--- a/js/src/builtin/intl/LanguageTag.cpp
+++ b/js/src/builtin/intl/LanguageTag.cpp
--- a/js/src/builtin/intl/LanguageTag.h
+++ b/js/src/builtin/intl/LanguageTag.h
@ -0,0 +1,689 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 * vim: set ts=8 sts=2 et sw=2 tw=80:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 /* Structured representation of Unicode locale IDs used with Intl functions. */
 #ifndef builtin_intl_LanguageTag_h
 #define builtin_intl_LanguageTag_h
 #include "mozilla/Assertions.h"
 #include "mozilla/Range.h"
 #include "mozilla/TextUtils.h"
 #include "mozilla/TypedEnumBits.h"
 #include "mozilla/Variant.h"
 #include <algorithm>
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
 #include <utility>
 #include "js/AllocPolicy.h"
 #include "js/GCAPI.h"
 #include "js/Result.h"
 #include "js/Utility.h"
 #include "js/Vector.h"
 struct JSContext;
 class JSLinearString;
 class JSString;
 namespace js {
 class StringBuffer;
 namespace intl {
 #ifdef DEBUG
 /**
 * Return true if |language| is a valid, case-normalized language subtag.
 */
 template <typename CharT>
 bool IsStructurallyValidLanguageTag(
    const mozilla::Range<const CharT>& language);
 /**
 * Return true if |script| is a valid, case-normalized script subtag.
 */
 template <typename CharT>
 bool IsStructurallyValidScriptTag(const mozilla::Range<const CharT>& script);
 /**
 * Return true if |region| is a valid, case-normalized region subtag.
 */
 template <typename CharT>
 bool IsStructurallyValidRegionTag(const mozilla::Range<const CharT>& region);
 /**
 * Return true if |variant| is a valid, case-normalized variant subtag.
 */
 bool IsStructurallyValidVariantTag(const mozilla::Range<const char>& variant);
 /**
 * Return true if |extension| is a valid, case-normalized Unicode extension
 * subtag.
 */
 bool IsStructurallyValidUnicodeExtensionTag(
    const mozilla::Range<const char>& extension);
 /**
 * Return true if |privateUse| is a valid, case-normalized private-use subtag.
 */
 bool IsStructurallyValidPrivateUseTag(
    const mozilla::Range<const char>& privateUse);
 #endif
 template <typename CharT>
 char AsciiToLowerCase(CharT c) {
  MOZ_ASSERT(mozilla::IsAscii(c));
  return mozilla::IsAsciiUppercaseAlpha(c) ? (c | 0x20) : c;
 }
 template <typename CharT>
 char AsciiToUpperCase(CharT c) {
  MOZ_ASSERT(mozilla::IsAscii(c));
  return mozilla::IsAsciiLowercaseAlpha(c) ? (c & ~0x20) : c;
 }
 template <typename CharT>
 void AsciiToLowerCase(CharT* chars, size_t length, char* dest) {
  // Tell the analysis the |std::transform| function can't GC.
  JS::AutoSuppressGCAnalysis nogc;
  char (&fn)(CharT) = AsciiToLowerCase;
  std::transform(chars, chars + length, dest, fn);
 }
 template <typename CharT>
 void AsciiToUpperCase(CharT* chars, size_t length, char* dest) {
  // Tell the analysis the |std::transform| function can't GC.
  JS::AutoSuppressGCAnalysis nogc;
  char (&fn)(CharT) = AsciiToUpperCase;
  std::transform(chars, chars + length, dest, fn);
 }
 template <typename CharT>
 void AsciiToTitleCase(CharT* chars, size_t length, char* dest) {
  if (length > 0) {
    AsciiToUpperCase(chars, 1, dest);
    AsciiToLowerCase(chars + 1, length - 1, dest + 1);
  }
 }
 // Constants for language subtag lengths.
 namespace LanguageTagLimits {
 // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
 static constexpr size_t LanguageLength = 8;
 // unicode_script_subtag = alpha{4} ;
 static constexpr size_t ScriptLength = 4;
 // unicode_region_subtag = (alpha{2} | digit{3}) ;
 static constexpr size_t RegionLength = 3;
 static constexpr size_t AlphaRegionLength = 2;
 static constexpr size_t DigitRegionLength = 3;
 // key = alphanum alpha ;
 static constexpr size_t UnicodeKeyLength = 2;
 // tkey = alpha digit ;
 static constexpr size_t TransformKeyLength = 2;
 }  // namespace LanguageTagLimits
 // Fixed size language subtag which is stored inline in LanguageTag.
 template <size_t Length>
 class LanguageTagSubtag final {
  uint8_t length_ = 0;
  char chars_[Length];
 public:
  LanguageTagSubtag() = default;
  LanguageTagSubtag(const LanguageTagSubtag&) = delete;
  LanguageTagSubtag& operator=(const LanguageTagSubtag&) = delete;
  size_t length() const { return length_; }
  mozilla::Range<const char> range() const { return {chars_, length_}; }
  template <typename CharT>
  void set(const mozilla::Range<const CharT>& str) {
    MOZ_ASSERT(str.length() <= Length);
    std::copy_n(str.begin().get(), str.length(), chars_);
    length_ = str.length();
  }
  void toLowerCase() { AsciiToLowerCase(chars_, length(), chars_); }
  void toUpperCase() { AsciiToUpperCase(chars_, length(), chars_); }
  void toTitleCase() { AsciiToTitleCase(chars_, length(), chars_); }
  template <size_t N>
  bool equalTo(const char (&str)[N]) const {
    static_assert(N - 1 <= Length,
                  "subtag literals must not exceed the maximum subtag length");
    return length_ == N - 1 && memcmp(chars_, str, N - 1) == 0;
  }
 };
 using LanguageSubtag = LanguageTagSubtag<LanguageTagLimits::LanguageLength>;
 using ScriptSubtag = LanguageTagSubtag<LanguageTagLimits::ScriptLength>;
 using RegionSubtag = LanguageTagSubtag<LanguageTagLimits::RegionLength>;
 /**
 * Object representing a language tag.
 *
 * All subtags are already in canonicalized case.
 */
 class MOZ_STACK_CLASS LanguageTag final {
  LanguageSubtag language_ = {};
  ScriptSubtag script_ = {};
  RegionSubtag region_ = {};
  using VariantsVector = Vector<JS::UniqueChars, 2>;
  using ExtensionsVector = Vector<JS::UniqueChars, 2>;
  VariantsVector variants_;
  ExtensionsVector extensions_;
  JS::UniqueChars privateuse_ = nullptr;
  friend class LanguageTagParser;
 public:
  // Flag to request canonicalized Unicode extensions.
  enum class UnicodeExtensionCanonicalForm : bool { No, Yes };
 private:
  bool canonicalizeUnicodeExtension(
      JSContext* cx, JS::UniqueChars& unicodeExtension,
      UnicodeExtensionCanonicalForm canonicalForm);
  bool canonicalizeTransformExtension(JSContext* cx,
                                      JS::UniqueChars& transformExtension);
 public:
  static bool languageMapping(LanguageSubtag& language);
  static bool complexLanguageMapping(const LanguageSubtag& language);
 private:
  static bool regionMapping(RegionSubtag& region);
  static bool complexRegionMapping(const RegionSubtag& region);
  void performComplexLanguageMappings();
  void performComplexRegionMappings();
  MOZ_MUST_USE bool updateGrandfatheredMappings(JSContext* cx);
  static const char* replaceUnicodeExtensionType(
      const mozilla::Range<const char>& key,
      const mozilla::Range<const char>& type);
 public:
  explicit LanguageTag(JSContext* cx) : variants_(cx), extensions_(cx) {}
  LanguageTag(const LanguageTag&) = delete;
  LanguageTag& operator=(const LanguageTag&) = delete;
  const LanguageSubtag& language() const { return language_; }
  const ScriptSubtag& script() const { return script_; }
  const RegionSubtag& region() const { return region_; }
  const auto& variants() const { return variants_; }
  const auto& extensions() const { return extensions_; }
  const char* privateuse() const { return privateuse_.get(); }
  /**
   * Set the language subtag. The input must be a valid, case-normalized
   * language subtag.
   */
  template <size_t N>
  void setLanguage(const char (&language)[N]) {
    mozilla::Range<const char> range(language, N - 1);
    MOZ_ASSERT(IsStructurallyValidLanguageTag(range));
    language_.set(range);
  }
  /**
   * Set the language subtag. The input must be a valid, case-normalized
   * language subtag.
   */
  void setLanguage(const LanguageSubtag& language) {
    MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
    language_.set(language.range());
  }
  /**
   * Set the script subtag. The input must be a valid, case-normalized
   * script subtag or the empty string.
   */
  template <size_t N>
  void setScript(const char (&script)[N]) {
    mozilla::Range<const char> range(script, N - 1);
    MOZ_ASSERT(IsStructurallyValidScriptTag(range));
    script_.set(range);
  }
  /**
   * Set the script subtag. The input must be a valid, case-normalized
   * script subtag or the empty string.
   */
  void setScript(const ScriptSubtag& script) {
    MOZ_ASSERT(script.length() == 0 ||
               IsStructurallyValidScriptTag(script.range()));
    script_.set(script.range());
  }
  /**
   * Set the region subtag. The input must be a valid, case-normalized
   * region subtag or the empty string.
   */
  template <size_t N>
  void setRegion(const char (&region)[N]) {
    mozilla::Range<const char> range(region, N - 1);
    MOZ_ASSERT(IsStructurallyValidRegionTag(range));
    region_.set(range);
  }
  /**
   * Set the region subtag. The input must be a valid, case-normalized
   * region subtag or the empty string.
   */
  void setRegion(const RegionSubtag& region) {
    MOZ_ASSERT(region.length() == 0 ||
               IsStructurallyValidRegionTag(region.range()));
    region_.set(region.range());
  }
  /**
   * Removes all variant subtags.
   */
  void clearVariants() { variants_.clearAndFree(); }
  /**
   * Set the Unicode extension subtag. The input must be a valid,
   * case-normalized Unicode extension subtag.
   */
  bool setUnicodeExtension(JS::UniqueChars extension);
  /**
   * Set the private-use subtag. The input must be a valid, case-normalized
   * private-use subtag or the empty string.
   */
  void setPrivateuse(JS::UniqueChars privateuse) {
    MOZ_ASSERT(!privateuse ||
               IsStructurallyValidPrivateUseTag(
                   {privateuse.get(), strlen(privateuse.get())}));
    privateuse_ = std::move(privateuse);
  }
  /**
   * Canonicalize the base-name subtags, that means the language, script,
   * region, and variant subtags.
   */
  bool canonicalizeBaseName(JSContext* cx);
  /**
   * Canonicalize all extension subtags.
   */
  bool canonicalizeExtensions(JSContext* cx,
                              UnicodeExtensionCanonicalForm canonicalForm);
  /**
   * Canonicalizes the given structurally valid Unicode BCP 47 locale
   * identifier, including regularized case of subtags. For example, the
   * language tag Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE,
   * where
   *
   *     Zh             ; 2*3ALPHA
   *     -haNS          ; ["-" script]
   *     -bu            ; ["-" region]
   *     -variant2      ; *("-" variant)
   *     -Variant1
   *     -u-ca-chinese  ; *("-" extension)
   *     -t-Zh-laTN
   *     -x-PRIVATE     ; ["-" privateuse]
   *
   * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
   *
   * UTS 35 specifies two different canonicalization algorithms. There's one to
   * canonicalize BCP 47 language tags and other one to canonicalize Unicode
   * locale identifiers. The latter one wasn't present when ECMA-402 was changed
   * to use Unicode BCP 47 locale identifiers instead of BCP 47 language tags,
   * so ECMA-402 currently only uses the former to canonicalize Unicode BCP 47
   * locale identifiers.
   *
   * Spec: ECMAScript Internationalization API Specification, 6.2.3.
   * Spec:
   * https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
   * Spec: https://unicode.org/reports/tr35/#BCP_47_Language_Tag_Conversion
   */
  bool canonicalize(JSContext* cx,
                    UnicodeExtensionCanonicalForm canonicalForm) {
    return canonicalizeBaseName(cx) &&
           canonicalizeExtensions(cx, canonicalForm);
  }
  /**
   * Append the string representation of this language tag to the given
   * string buffer.
   */
  bool appendTo(JSContext* cx, StringBuffer& sb) const;
  /**
   * Add likely-subtags to the language tag.
   *
   * Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
   */
  bool addLikelySubtags(JSContext* cx);
  /**
   * Remove likely-subtags from the language tag.
   *
   * Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
   */
  bool removeLikelySubtags(JSContext* cx);
 };
 /**
 * Parser for Unicode BCP 47 locale identifiers.
 *
 * <https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers>
 */
 class MOZ_STACK_CLASS LanguageTagParser final {
 public:
  // Exposed as |public| for |MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS|.
  enum class TokenKind : uint8_t {
    None = 0b000,
    Alpha = 0b001,
    Digit = 0b010,
    AlphaDigit = 0b011,
    Error = 0b100
  };
 private:
  class Token final {
    size_t index_;
    size_t length_;
    TokenKind kind_;
   public:
    Token(TokenKind kind, size_t index, size_t length)
        : index_(index), length_(length), kind_(kind) {}
    TokenKind kind() const { return kind_; }
    size_t index() const { return index_; }
    size_t length() const { return length_; }
    bool isError() const { return kind_ == TokenKind::Error; }
    bool isNone() const { return kind_ == TokenKind::None; }
    bool isAlpha() const { return kind_ == TokenKind::Alpha; }
    bool isDigit() const { return kind_ == TokenKind::Digit; }
    bool isAlphaDigit() const { return kind_ == TokenKind::AlphaDigit; }
  };
  using LocaleChars = mozilla::Variant<const JS::Latin1Char*, const char16_t*>;
  const LocaleChars& locale_;
  size_t length_;
  size_t index_ = 0;
  LanguageTagParser(const LocaleChars& locale, size_t length)
      : locale_(locale), length_(length) {}
  char16_t charAtUnchecked(size_t index) const {
    if (locale_.is<const JS::Latin1Char*>()) {
      return locale_.as<const JS::Latin1Char*>()[index];
    }
    return locale_.as<const char16_t*>()[index];
  }
  char charAt(size_t index) const {
    char16_t c = charAtUnchecked(index);
    MOZ_ASSERT(mozilla::IsAscii(c));
    return c;
  }
  // Copy the token characters into |subtag|.
  template <size_t N>
  void copyChars(const Token& tok, LanguageTagSubtag<N>& subtag) const {
    size_t index = tok.index();
    size_t length = tok.length();
    if (locale_.is<const JS::Latin1Char*>()) {
      using T = const JS::Latin1Char;
      subtag.set(mozilla::Range<T>(locale_.as<T*>() + index, length));
    } else {
      using T = const char16_t;
      subtag.set(mozilla::Range<T>(locale_.as<T*>() + index, length));
    }
  }
  // Create a string copy of |length| characters starting at |index|.
  JS::UniqueChars chars(JSContext* cx, size_t index, size_t length) const;
  // Create a string copy of the token characters.
  JS::UniqueChars chars(JSContext* cx, const Token& tok) const {
    return chars(cx, tok.index(), tok.length());
  }
  Token nextToken();
  JS::UniqueChars extension(JSContext* cx, const Token& start,
                            const Token& end) const;
  // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
  //
  // Four character language subtags are not allowed in Unicode BCP 47 locale
  // identifiers. Also see the comparison to Unicode CLDR locale identifiers in
  // <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
  bool isLanguage(const Token& tok) const {
    return tok.isAlpha() && ((2 <= tok.length() && tok.length() <= 3) ||
                             (5 <= tok.length() && tok.length() <= 8));
  }
  // unicode_script_subtag = alpha{4} ;
  bool isScript(const Token& tok) const {
    return tok.isAlpha() && tok.length() == 4;
  }
  // unicode_region_subtag = (alpha{2} | digit{3}) ;
  bool isRegion(const Token& tok) const {
    return (tok.isAlpha() && tok.length() == 2) ||
           (tok.isDigit() && tok.length() == 3);
  }
  // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
  bool isVariant(const Token& tok) const {
    return (5 <= tok.length() && tok.length() <= 8) ||
           (tok.length() == 4 && mozilla::IsAsciiDigit(charAt(tok.index())));
  }
  // Returns the code unit of the first character at the given singleton token.
  // Always returns the lower case form of an alphabetical character.
  char singletonKey(const Token& tok) const {
    MOZ_ASSERT(tok.length() == 1);
    char c = charAt(tok.index());
    return mozilla::IsAsciiUppercaseAlpha(c) ? (c | 0x20) : c;
  }
  // extensions = unicode_locale_extensions |
  //              transformed_extensions |
  //              other_extensions ;
  //
  // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
  //                                       (sep attribute)+ (sep keyword)*) ;
  //
  // transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) |
  //                                    (sep tfield)+) ;
  //
  // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
  bool isExtensionStart(const Token& tok) const {
    return tok.length() == 1 && singletonKey(tok) != 'x';
  }
  // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
  bool isOtherExtensionPart(const Token& tok) const {
    return 2 <= tok.length() && tok.length() <= 8;
  }
  // unicode_locale_extensions = sep [uU] ((sep keyword)+ |
  //                                       (sep attribute)+ (sep keyword)*) ;
  // keyword = key (sep type)? ;
  bool isUnicodeExtensionPart(const Token& tok) const {
    return isUnicodeExtensionKey(tok) || isUnicodeExtensionType(tok) ||
           isUnicodeExtensionAttribute(tok);
  }
  // attribute = alphanum{3,8} ;
  bool isUnicodeExtensionAttribute(const Token& tok) const {
    return 3 <= tok.length() && tok.length() <= 8;
  }
  // key = alphanum alpha ;
  bool isUnicodeExtensionKey(const Token& tok) const {
    return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index() + 1));
  }
  // type = alphanum{3,8} (sep alphanum{3,8})* ;
  bool isUnicodeExtensionType(const Token& tok) const {
    return 3 <= tok.length() && tok.length() <= 8;
  }
  // tkey = alpha digit ;
  bool isTransformExtensionKey(const Token& tok) const {
    return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index())) &&
           mozilla::IsAsciiDigit(charAt(tok.index() + 1));
  }
  // tvalue = (sep alphanum{3,8})+ ;
  bool isTransformExtensionPart(const Token& tok) const {
    return 3 <= tok.length() && tok.length() <= 8;
  }
  // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
  bool isPrivateUseStart(const Token& tok) const {
    return tok.length() == 1 && singletonKey(tok) == 'x';
  }
  // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
  bool isPrivateUsePart(const Token& tok) const {
    return 1 <= tok.length() && tok.length() <= 8;
  }
  enum class BaseNameParsing : bool { Normal, WithinTransformExtension };
  // Helper function for use in |parseBaseName| and
  // |parseTlangInTransformExtension|.  Do not use this directly!
  static JS::Result<bool> internalParseBaseName(JSContext* cx,
                                                LanguageTagParser& ts,
                                                LanguageTag& tag, Token& tok,
                                                BaseNameParsing parseType);
  // Parse the `unicode_language_id` production, i.e. the
  // language/script/region/variants portion of a language tag, into |tag|,
  // which will be filled with canonical-cased components (lowercase language,
  // titlecase script, uppercase region, lowercased and alphabetized and
  // deduplicated variants). |tok| must be the current token.
  static JS::Result<bool> parseBaseName(JSContext* cx, LanguageTagParser& ts,
                                        LanguageTag& tag, Token& tok) {
    return internalParseBaseName(cx, ts, tag, tok, BaseNameParsing::Normal);
  }
  // Parse the `tlang` production within a parsed 't' transform extension.
  // The precise requirements for "previously parsed" are:
  //
  //   * the input begins from current token |tok| with a valid `tlang`
  //   * the `tlang` is wholly lowercase (*not* canonical case)
  //   * variant subtags in the `tlang` may contain duplicates and be
  //     unordered
  //
  // Return an error on internal failure. Otherwise, return a success value. If
  // there was no `tlang`, then |tag.language().missing()|. But if there was a
  // `tlang`, then |tag| is filled with subtags exactly as they appeared in the
  // parse input: fully lowercase, variants in alphabetical order without
  // duplicates.
  static JS::Result<JS::Ok> parseTlangInTransformExtension(
      JSContext* cx, LanguageTagParser& ts, LanguageTag& tag, Token& tok) {
    MOZ_ASSERT(ts.isLanguage(tok));
    return internalParseBaseName(cx, ts, tag, tok,
                                 BaseNameParsing::WithinTransformExtension)
        .map([](bool parsed) {
          MOZ_ASSERT(parsed);
          return JS::Ok();
        });
  }
  friend class LanguageTag;
  class Range final {
    size_t begin_;
    size_t length_;
   public:
    Range(size_t begin, size_t length) : begin_(begin), length_(length) {}
    template <typename T>
    T* begin(T* ptr) const {
      return ptr + begin_;
    }
    size_t length() const { return length_; }
  };
  using TFieldVector = js::Vector<Range, 8>;
  using AttributesVector = js::Vector<Range, 8>;
  using KeywordsVector = js::Vector<Range, 8>;
  // Parse |extension|, which must be a validated, fully lowercase
  // `transformed_extensions` subtag, and fill |tag| and |fields| from the
  // `tlang` and `tfield` components. Data in |tag| is lowercase, consistent
  // with |extension|.
  static JS::Result<bool> parseTransformExtension(
      JSContext* cx, mozilla::Range<const char> extension, LanguageTag& tag,
      TFieldVector& fields);
  // Parse |extension|, which must be a validated, fully lowercase
  // `unicode_locale_extensions` subtag, and fill |attributes| and |keywords|
  // from the `attribute` and `keyword` components.
  static JS::Result<bool> parseUnicodeExtension(
      JSContext* cx, mozilla::Range<const char> extension,
      AttributesVector& attributes, KeywordsVector& keywords);
 public:
  // Parse the input string as a language tag. Reports an error to the context
  // if the input can't be parsed completely.
  static bool parse(JSContext* cx, JSLinearString* locale, LanguageTag& tag);
  // Parse the input string as a language tag. Returns Ok(true) if the input
  // could be completely parsed, Ok(false) if the input couldn't be parsed,
  // or Err() in case of internal error.
  static JS::Result<bool> tryParse(JSContext* cx, JSLinearString* locale,
                                   LanguageTag& tag);
  // Parse the input string as the base-name parts (language, script, region,
  // variants) of a language tag. Ignores any trailing characters.
  static bool parseBaseName(JSContext* cx, mozilla::Range<const char> locale,
                            LanguageTag& tag);
  // Return true iff |extension| can be parsed as a Unicode extension subtag.
  static bool canParseUnicodeExtension(mozilla::Range<const char> extension);
  // Return true iff |unicodeType| can be parsed as a Unicode extension type.
  static bool canParseUnicodeExtensionType(JSLinearString* unicodeType);
 };
 MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(LanguageTagParser::TokenKind)
 }  // namespace intl
 }  // namespace js
 #endif /* builtin_intl_LanguageTag_h */
--- a/js/src/builtin/intl/LanguageTagGenerated.cpp
+++ b/js/src/builtin/intl/LanguageTagGenerated.cpp
@ -0,0 +1,615 @@
 // Generated by make_intl_data.py. DO NOT EDIT.
 #include "mozilla/Assertions.h"
 #include "mozilla/Range.h"
 #include "mozilla/TextUtils.h"
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
 #include <iterator>
 #include <type_traits>
 #include "builtin/intl/LanguageTag.h"
 #include "util/Text.h"
 #include "vm/JSContext.h"
 using ConstCharRange = mozilla::Range<const char>;
 template <size_t Length, size_t TagLength, size_t SubtagLength>
 static inline bool HasReplacement(
    const char (&subtags)[Length][TagLength],
    const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
  MOZ_ASSERT(subtag.length() == TagLength - 1,
             "subtag must have the same length as the list of subtags");
  const char* ptr = subtag.range().begin().get();
  return std::binary_search(std::begin(subtags), std::end(subtags), ptr,
                            [](const char* a, const char* b) {
    return memcmp(a, b, TagLength - 1) < 0;
  });
 }
 template <size_t Length, size_t TagLength, size_t SubtagLength>
 static inline const char* SearchReplacement(
    const char (&subtags)[Length][TagLength],
    const char* (&aliases)[Length],
    const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
  MOZ_ASSERT(subtag.length() == TagLength - 1,
             "subtag must have the same length as the list of subtags");
  const char* ptr = subtag.range().begin().get();
  auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr,
                            [](const char* a, const char* b) {
    return memcmp(a, b, TagLength - 1) < 0;
  });
  if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) {
    return aliases[std::distance(std::begin(subtags), p)];
  }
  return nullptr;
 }
 // Mappings from language subtags to preferred values.
 // Derived from CLDR Supplemental Data, version 35.1.
 // https://github.com/unicode-org/cldr.git
 bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
  MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
  if (language.length() == 2) {
    static const char languages[9][3] = {
      "bh", "in", "iw", "ji", "jw", "mo", "no", "tl", "tw",
    };
    static const char* aliases[9] = {
      "bho", "id", "he", "yi", "jv", "ro", "nb", "fil", "ak",
    };
    if (const char* replacement = SearchReplacement(languages, aliases, language)) {
      language.set(ConstCharRange(replacement, strlen(replacement)));
      return true;
    }
    return false;
  }
  if (language.length() == 3) {
    static const char languages[340][4] = {
      "aam", "aar", "abk", "adp", "afr", "aju", "aka", "alb", "als", "amh",
      "ara", "arb", "arg", "arm", "asm", "aue", "ava", "ave", "aym", "ayr",
      "ayx", "aze", "azj", "bak", "bam", "baq", "bcc", "bcl", "bel", "ben",
      "bgm", "bih", "bis", "bjd", "bod", "bos", "bre", "bul", "bur", "bxk",
      "bxr", "cat", "ccq", "ces", "cha", "che", "chi", "chu", "chv", "cjr",
      "cka", "cld", "cmk", "cmn", "cor", "cos", "coy", "cqu", "cre", "cwd",
      "cym", "cze", "dan", "deu", "dgo", "dhd", "dik", "diq", "div", "drh",
      "dut", "dzo", "ekk", "ell", "emk", "eng", "epo", "esk", "est", "eus",
      "ewe", "fao", "fas", "fat", "fij", "fin", "fra", "fre", "fry", "fuc",
      "ful", "gav", "gaz", "gbo", "geo", "ger", "gfx", "ggn", "gla", "gle",
      "glg", "glv", "gno", "gre", "grn", "gti", "gug", "guj", "guv", "gya",
      "hat", "hau", "hdn", "hea", "heb", "her", "him", "hin", "hmo", "hrr",
      "hrv", "hun", "hye", "ibi", "ibo", "ice", "ido", "iii", "ike", "iku",
      "ile", "ilw", "ina", "ind", "ipk", "isl", "ita", "jav", "jeg", "jpn",
      "kal", "kan", "kas", "kat", "kau", "kaz", "kgc", "kgh", "khk", "khm",
      "kik", "kin", "kir", "kmr", "knc", "kng", "knn", "koj", "kom", "kon",
      "kor", "kpv", "krm", "ktr", "kua", "kur", "kvs", "kwq", "kxe", "kzj",
      "kzt", "lao", "lat", "lav", "lbk", "lii", "lim", "lin", "lit", "lmm",
      "ltz", "lub", "lug", "lvs", "mac", "mah", "mal", "mao", "mar", "may",
      "meg", "mhr", "mkd", "mlg", "mlt", "mnk", "mol", "mon", "mri", "msa",
      "mst", "mup", "mwj", "mya", "myt", "nad", "nau", "nav", "nbl", "ncp",
      "nde", "ndo", "nep", "nld", "nno", "nnx", "nob", "nor", "npi", "nts",
      "nya", "oci", "ojg", "oji", "ori", "orm", "ory", "oss", "oun", "pan",
      "pbu", "pcr", "per", "pes", "pli", "plt", "pmc", "pmu", "pnb", "pol",
      "por", "ppa", "ppr", "pry", "pus", "puz", "que", "quz", "rmy", "roh",
      "ron", "rum", "run", "rus", "sag", "san", "sca", "scc", "scr", "sin",
      "skk", "slk", "slo", "slv", "sme", "smo", "sna", "snd", "som", "sot",
      "spa", "spy", "sqi", "src", "srd", "srp", "ssw", "sun", "swa", "swe",
      "swh", "tah", "tam", "tat", "tdu", "tel", "tgk", "tgl", "tha", "thc",
      "thx", "tib", "tie", "tir", "tkk", "tlw", "tmp", "tne", "ton", "tsf",
      "tsn", "tso", "ttq", "tuk", "tur", "twi", "uig", "ukr", "umu", "uok",
      "urd", "uzb", "uzn", "ven", "vie", "vol", "wel", "wln", "wol", "xba",
      "xho", "xia", "xkh", "xpe", "xsj", "xsl", "ybd", "ydd", "yid", "yma",
      "ymt", "yor", "yos", "yuu", "zai", "zha", "zho", "zsm", "zul", "zyb",
    };
    static const char* aliases[340] = {
      "aas",  "aa",  "ab",  "dz",  "af", "jrb",  "ak",  "sq",  "sq",  "am",
       "ar",  "ar",  "an",  "hy",  "as", "ktz",  "av",  "ae",  "ay",  "ay",
      "nun",  "az",  "az",  "ba",  "bm",  "eu", "bal", "bik",  "be",  "bn",
      "bcg", "bho",  "bi", "drl",  "bo",  "bs",  "br",  "bg",  "my", "luy",
      "bua",  "ca", "rki",  "cs",  "ch",  "ce",  "zh",  "cu",  "cv", "mom",
      "cmr", "syr", "xch",  "zh",  "kw",  "co", "pij", "quh",  "cr",  "cr",
       "cy",  "cs",  "da",  "de", "doi", "mwr", "din", "zza",  "dv",  "mn",
       "nl",  "dz",  "et",  "el", "man",  "en",  "eo",  "ik",  "et",  "eu",
       "ee",  "fo",  "fa",  "ak",  "fj",  "fi",  "fr",  "fr",  "fy",  "ff",
       "ff", "dev",  "om", "grb",  "ka",  "de", "vaj", "gvr",  "gd",  "ga",
       "gl",  "gv", "gon",  "el",  "gn", "nyc",  "gn",  "gu", "duz", "gba",
       "ht",  "ha", "hai", "hmn",  "he",  "hz", "srx",  "hi",  "ho", "jal",
       "hr",  "hu",  "hy", "opa",  "ig",  "is",  "io",  "ii",  "iu",  "iu",
       "ie", "gal",  "ia",  "id",  "ik",  "is",  "it",  "jv", "oyb",  "ja",
       "kl",  "kn",  "ks",  "ka",  "kr",  "kk", "tdf", "kml",  "mn",  "km",
       "ki",  "rw",  "ky",  "ku",  "kr",  "kg", "kok", "kwv",  "kv",  "kg",
       "ko",  "kv", "bmf", "dtp",  "kj",  "ku", "gdj", "yam", "tvd", "dtp",
      "dtp",  "lo",  "la",  "lv", "bnc", "raq",  "li",  "ln",  "lt", "rmx",
       "lb",  "lu",  "lg",  "lv",  "mk",  "mh",  "ml",  "mi",  "mr",  "ms",
      "cir", "chm",  "mk",  "mg",  "mt", "man",  "ro",  "mn",  "mi",  "ms",
      "mry", "raj", "vaj",  "my", "mry", "xny",  "na",  "nv",  "nr", "kdz",
       "nd",  "ng",  "ne",  "nl",  "nn", "ngv",  "nb",  "nb",  "ne", "pij",
       "ny",  "oc",  "oj",  "oj",  "or",  "om",  "or",  "os", "vaj",  "pa",
       "ps", "adx",  "fa",  "fa",  "pi",  "mg", "huw", "phr", "lah",  "pl",
       "pt", "bfy", "lcq", "prt",  "ps", "pub",  "qu",  "qu", "rom",  "rm",
       "ro",  "ro",  "rn",  "ru",  "sg",  "sa", "hle",  "sr",  "hr",  "si",
      "oyb",  "sk",  "sk",  "sl",  "se",  "sm",  "sn",  "sd",  "so",  "st",
       "es", "kln",  "sq",  "sc",  "sc",  "sr",  "ss",  "su",  "sw",  "sv",
       "sw",  "ty",  "ta",  "tt", "dtp",  "te",  "tg", "fil",  "th", "tpo",
      "oyb",  "bo", "ras",  "ti", "twm", "weo", "tyj", "kak",  "to", "taj",
       "tn",  "ts", "tmh",  "tk",  "tr",  "ak",  "ug",  "uk", "del", "ema",
       "ur",  "uz",  "uz",  "ve",  "vi",  "vo",  "cy",  "wa",  "wo", "cax",
       "xh", "acn", "waw", "kpe", "suj", "den", "rki",  "yi",  "yi", "lrr",
      "mtm",  "yo", "zom", "yug", "zap",  "za",  "zh",  "ms",  "zu",  "za",
    };
    if (const char* replacement = SearchReplacement(languages, aliases, language)) {
      language.set(ConstCharRange(replacement, strlen(replacement)));
      return true;
    }
    return false;
  }
  return false;
 }
 // Language subtags with complex mappings.
 // Derived from CLDR Supplemental Data, version 35.1.
 // https://github.com/unicode-org/cldr.git
 bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& language) {
  MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
  if (language.length() == 2) {
    return language.equalTo("sh");
  }
  if (language.length() == 3) {
    static const char languages[6][4] = {
      "cnr", "drw", "hbs", "prs", "swc", "tnf",
    };
    return HasReplacement(languages, language);
  }
  return false;
 }
 // Mappings from region subtags to preferred values.
 // Derived from CLDR Supplemental Data, version 35.1.
 // https://github.com/unicode-org/cldr.git
 bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
  MOZ_ASSERT(IsStructurallyValidRegionTag(region.range()));
  if (region.length() == 2) {
    static const char regions[23][3] = {
      "BU", "CS", "CT", "DD", "DY", "FQ", "FX", "HV", "JT", "MI",
      "NH", "NQ", "PU", "PZ", "QU", "RH", "TP", "UK", "VD", "WK",
      "YD", "YU", "ZR",
    };
    static const char* aliases[23] = {
      "MM", "RS", "KI", "DE", "BJ", "AQ", "FR", "BF", "UM", "UM",
      "VU", "AQ", "UM", "PA", "EU", "ZW", "TL", "GB", "VN", "UM",
      "YE", "RS", "CD",
    };
    if (const char* replacement = SearchReplacement(regions, aliases, region)) {
      region.set(ConstCharRange(replacement, strlen(replacement)));
      return true;
    }
    return false;
  }
  {
    static const char regions[300][4] = {
      "004", "008", "010", "012", "016", "020", "024", "028", "031", "032",
      "036", "040", "044", "048", "050", "051", "052", "056", "060", "062",
      "064", "068", "070", "072", "074", "076", "084", "086", "090", "092",
      "096", "100", "104", "108", "112", "116", "120", "124", "132", "136",
      "140", "144", "148", "152", "156", "158", "162", "166", "170", "174",
      "175", "178", "180", "184", "188", "191", "192", "196", "203", "204",
      "208", "212", "214", "218", "222", "226", "230", "231", "232", "233",
      "234", "238", "239", "242", "246", "248", "249", "250", "254", "258",
      "260", "262", "266", "268", "270", "275", "276", "278", "280", "288",
      "292", "296", "300", "304", "308", "312", "316", "320", "324", "328",
      "332", "334", "336", "340", "344", "348", "352", "356", "360", "364",
      "368", "372", "376", "380", "384", "388", "392", "398", "400", "404",
      "408", "410", "414", "417", "418", "422", "426", "428", "430", "434",
      "438", "440", "442", "446", "450", "454", "458", "462", "466", "470",
      "474", "478", "480", "484", "492", "496", "498", "499", "500", "504",
      "508", "512", "516", "520", "524", "528", "531", "533", "534", "535",
      "540", "548", "554", "558", "562", "566", "570", "574", "578", "580",
      "581", "583", "584", "585", "586", "591", "598", "600", "604", "608",
      "612", "616", "620", "624", "626", "630", "634", "638", "642", "643",
      "646", "652", "654", "659", "660", "662", "663", "666", "670", "674",
      "678", "682", "686", "688", "690", "694", "702", "703", "704", "705",
      "706", "710", "716", "720", "724", "728", "729", "732", "736", "740",
      "744", "748", "752", "756", "760", "762", "764", "768", "772", "776",
      "780", "784", "788", "792", "795", "796", "798", "800", "804", "807",
      "818", "826", "830", "831", "832", "833", "834", "840", "850", "854",
      "858", "860", "862", "876", "882", "886", "887", "891", "894", "958",
      "959", "960", "962", "963", "964", "965", "966", "967", "968", "969",
      "970", "971", "972", "973", "974", "975", "976", "977", "978", "979",
      "980", "981", "982", "983", "984", "985", "986", "987", "988", "989",
      "990", "991", "992", "993", "994", "995", "996", "997", "998", "999",
    };
    static const char* aliases[300] = {
       "AF",  "AL",  "AQ",  "DZ",  "AS",  "AD",  "AO",  "AG",  "AZ",  "AR",
       "AU",  "AT",  "BS",  "BH",  "BD",  "AM",  "BB",  "BE",  "BM", "034",
       "BT",  "BO",  "BA",  "BW",  "BV",  "BR",  "BZ",  "IO",  "SB",  "VG",
       "BN",  "BG",  "MM",  "BI",  "BY",  "KH",  "CM",  "CA",  "CV",  "KY",
       "CF",  "LK",  "TD",  "CL",  "CN",  "TW",  "CX",  "CC",  "CO",  "KM",
       "YT",  "CG",  "CD",  "CK",  "CR",  "HR",  "CU",  "CY",  "CZ",  "BJ",
       "DK",  "DM",  "DO",  "EC",  "SV",  "GQ",  "ET",  "ET",  "ER",  "EE",
       "FO",  "FK",  "GS",  "FJ",  "FI",  "AX",  "FR",  "FR",  "GF",  "PF",
       "TF",  "DJ",  "GA",  "GE",  "GM",  "PS",  "DE",  "DE",  "DE",  "GH",
       "GI",  "KI",  "GR",  "GL",  "GD",  "GP",  "GU",  "GT",  "GN",  "GY",
       "HT",  "HM",  "VA",  "HN",  "HK",  "HU",  "IS",  "IN",  "ID",  "IR",
       "IQ",  "IE",  "IL",  "IT",  "CI",  "JM",  "JP",  "KZ",  "JO",  "KE",
       "KP",  "KR",  "KW",  "KG",  "LA",  "LB",  "LS",  "LV",  "LR",  "LY",
       "LI",  "LT",  "LU",  "MO",  "MG",  "MW",  "MY",  "MV",  "ML",  "MT",
       "MQ",  "MR",  "MU",  "MX",  "MC",  "MN",  "MD",  "ME",  "MS",  "MA",
       "MZ",  "OM",  "NA",  "NR",  "NP",  "NL",  "CW",  "AW",  "SX",  "BQ",
       "NC",  "VU",  "NZ",  "NI",  "NE",  "NG",  "NU",  "NF",  "NO",  "MP",
       "UM",  "FM",  "MH",  "PW",  "PK",  "PA",  "PG",  "PY",  "PE",  "PH",
       "PN",  "PL",  "PT",  "GW",  "TL",  "PR",  "QA",  "RE",  "RO",  "RU",
       "RW",  "BL",  "SH",  "KN",  "AI",  "LC",  "MF",  "PM",  "VC",  "SM",
       "ST",  "SA",  "SN",  "RS",  "SC",  "SL",  "SG",  "SK",  "VN",  "SI",
       "SO",  "ZA",  "ZW",  "YE",  "ES",  "SS",  "SD",  "EH",  "SD",  "SR",
       "SJ",  "SZ",  "SE",  "CH",  "SY",  "TJ",  "TH",  "TG",  "TK",  "TO",
       "TT",  "AE",  "TN",  "TR",  "TM",  "TC",  "TV",  "UG",  "UA",  "MK",
       "EG",  "GB",  "JE",  "GG",  "JE",  "IM",  "TZ",  "US",  "VI",  "BF",
       "UY",  "UZ",  "VE",  "WF",  "WS",  "YE",  "YE",  "RS",  "ZM",  "AA",
       "QM",  "QN",  "QP",  "QQ",  "QR",  "QS",  "QT",  "EU",  "QV",  "QW",
       "QX",  "QY",  "QZ",  "XA",  "XB",  "XC",  "XD",  "XE",  "XF",  "XG",
       "XH",  "XI",  "XJ",  "XK",  "XL",  "XM",  "XN",  "XO",  "XP",  "XQ",
       "XR",  "XS",  "XT",  "XU",  "XV",  "XW",  "XX",  "XY",  "XZ",  "ZZ",
    };
    if (const char* replacement = SearchReplacement(regions, aliases, region)) {
      region.set(ConstCharRange(replacement, strlen(replacement)));
      return true;
    }
    return false;
  }
 }
 // Region subtags with complex mappings.
 // Derived from CLDR Supplemental Data, version 35.1.
 // https://github.com/unicode-org/cldr.git
 bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) {
  MOZ_ASSERT(IsStructurallyValidRegionTag(region.range()));
  if (region.length() == 2) {
    return region.equalTo("AN") ||
           region.equalTo("NT") ||
           region.equalTo("PC") ||
           region.equalTo("SU");
  }
  {
    static const char regions[8][4] = {
      "172", "200", "530", "532", "536", "582", "810", "890",
    };
    return HasReplacement(regions, region);
  }
 }
 // Language subtags with complex mappings.
 // Derived from CLDR Supplemental Data, version 35.1.
 // https://github.com/unicode-org/cldr.git
 void js::intl::LanguageTag::performComplexLanguageMappings() {
  MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
  if (language().equalTo("cnr")) {
    setLanguage("sr");
    if (region().length() == 0) {
      setRegion("ME");
    }
  }
  else if (language().equalTo("drw") ||
           language().equalTo("prs") ||
           language().equalTo("tnf")) {
    setLanguage("fa");
    if (region().length() == 0) {
      setRegion("AF");
    }
  }
  else if (language().equalTo("hbs") ||
           language().equalTo("sh")) {
    setLanguage("sr");
    if (script().length() == 0) {
      setScript("Latn");
    }
  }
  else if (language().equalTo("swc")) {
    setLanguage("sw");
    if (region().length() == 0) {
      setRegion("CD");
    }
  }
 }
 // Region subtags with complex mappings.
 // Derived from CLDR Supplemental Data, version 35.1.
 // https://github.com/unicode-org/cldr.git
 void js::intl::LanguageTag::performComplexRegionMappings() {
  MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
  MOZ_ASSERT(IsStructurallyValidRegionTag(region().range()));
  if (region().equalTo("172")) {
    if (language().equalTo("hy") ||
        (language().equalTo("und") && script().equalTo("Armn"))) {
      setRegion("AM");
    }
    else if (language().equalTo("az") ||
             language().equalTo("tkr") ||
             language().equalTo("tly") ||
             language().equalTo("ttt")) {
      setRegion("AZ");
    }
    else if (language().equalTo("be")) {
      setRegion("BY");
    }
    else if (language().equalTo("ab") ||
             language().equalTo("ka") ||
             language().equalTo("os") ||
             (language().equalTo("und") && script().equalTo("Geor")) ||
             language().equalTo("xmf")) {
      setRegion("GE");
    }
    else if (language().equalTo("ky")) {
      setRegion("KG");
    }
    else if (language().equalTo("kk") ||
             (language().equalTo("ug") && script().equalTo("Cyrl"))) {
      setRegion("KZ");
    }
    else if (language().equalTo("gag")) {
      setRegion("MD");
    }
    else if (language().equalTo("tg")) {
      setRegion("TJ");
    }
    else if (language().equalTo("tk")) {
      setRegion("TM");
    }
    else if (language().equalTo("crh") ||
             language().equalTo("got") ||
             language().equalTo("ji") ||
             language().equalTo("rue") ||
             language().equalTo("uk") ||
             (language().equalTo("und") && script().equalTo("Goth"))) {
      setRegion("UA");
    }
    else if (language().equalTo("kaa") ||
             language().equalTo("sog") ||
             (language().equalTo("und") && script().equalTo("Sogd")) ||
             (language().equalTo("und") && script().equalTo("Sogo")) ||
             language().equalTo("uz")) {
      setRegion("UZ");
    }
    else {
      setRegion("RU");
    }
  }
  else if (region().equalTo("200")) {
    if (language().equalTo("sk")) {
      setRegion("SK");
    }
    else {
      setRegion("CZ");
    }
  }
  else if (region().equalTo("530") ||
           region().equalTo("532") ||
           region().equalTo("AN")) {
    if (language().equalTo("vic")) {
      setRegion("SX");
    }
    else {
      setRegion("CW");
    }
  }
  else if (region().equalTo("536") ||
           region().equalTo("NT")) {
    if (language().equalTo("akk") ||
        language().equalTo("ckb") ||
        (language().equalTo("ku") && script().equalTo("Arab")) ||
        language().equalTo("mis") ||
        language().equalTo("syr") ||
        (language().equalTo("und") && script().equalTo("Xsux")) ||
        (language().equalTo("und") && script().equalTo("Hatr")) ||
        (language().equalTo("und") && script().equalTo("Syrc"))) {
      setRegion("IQ");
    }
    else {
      setRegion("SA");
    }
  }
  else if (region().equalTo("582") ||
           region().equalTo("PC")) {
    if (language().equalTo("mh")) {
      setRegion("MH");
    }
    else if (language().equalTo("pau")) {
      setRegion("PW");
    }
    else {
      setRegion("FM");
    }
  }
  else if (region().equalTo("810") ||
           region().equalTo("SU")) {
    if (language().equalTo("hy") ||
        (language().equalTo("und") && script().equalTo("Armn"))) {
      setRegion("AM");
    }
    else if (language().equalTo("az") ||
             language().equalTo("tkr") ||
             language().equalTo("tly") ||
             language().equalTo("ttt")) {
      setRegion("AZ");
    }
    else if (language().equalTo("be")) {
      setRegion("BY");
    }
    else if (language().equalTo("et") ||
             language().equalTo("vro")) {
      setRegion("EE");
    }
    else if (language().equalTo("ab") ||
             language().equalTo("ka") ||
             language().equalTo("os") ||
             (language().equalTo("und") && script().equalTo("Geor")) ||
             language().equalTo("xmf")) {
      setRegion("GE");
    }
    else if (language().equalTo("ky")) {
      setRegion("KG");
    }
    else if (language().equalTo("kk") ||
             (language().equalTo("ug") && script().equalTo("Cyrl"))) {
      setRegion("KZ");
    }
    else if (language().equalTo("lt") ||
             language().equalTo("sgs")) {
      setRegion("LT");
    }
    else if (language().equalTo("ltg") ||
             language().equalTo("lv")) {
      setRegion("LV");
    }
    else if (language().equalTo("gag")) {
      setRegion("MD");
    }
    else if (language().equalTo("tg")) {
      setRegion("TJ");
    }
    else if (language().equalTo("tk")) {
      setRegion("TM");
    }
    else if (language().equalTo("crh") ||
             language().equalTo("got") ||
             language().equalTo("ji") ||
             language().equalTo("rue") ||
             language().equalTo("uk") ||
             (language().equalTo("und") && script().equalTo("Goth"))) {
      setRegion("UA");
    }
    else if (language().equalTo("kaa") ||
             language().equalTo("sog") ||
             (language().equalTo("und") && script().equalTo("Sogd")) ||
             (language().equalTo("und") && script().equalTo("Sogo")) ||
             language().equalTo("uz")) {
      setRegion("UZ");
    }
    else {
      setRegion("RU");
    }
  }
  else if (region().equalTo("890")) {
    if (language().equalTo("bs")) {
      setRegion("BA");
    }
    else if (language().equalTo("hr")) {
      setRegion("HR");
    }
    else if (language().equalTo("mk")) {
      setRegion("MK");
    }
    else if (language().equalTo("sl")) {
      setRegion("SI");
    }
    else {
      setRegion("RS");
    }
  }
 }
 // Canonicalize grandfathered locale identifiers.
 // Derived from CLDR Supplemental Data, version 35.1.
 // https://github.com/unicode-org/cldr.git
 bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
  // We're mapping regular grandfathered tags to non-grandfathered form here.
  // Other tags remain unchanged.
  //
  // regular       = "art-lojban"
  //               / "cel-gaulish"
  //               / "no-bok"
  //               / "no-nyn"
  //               / "zh-guoyu"
  //               / "zh-hakka"
  //               / "zh-min"
  //               / "zh-min-nan"
  //               / "zh-xiang"
  //
  // Therefore we can quickly exclude most tags by checking every
  // |unicode_locale_id| subcomponent for characteristics not shared by any of
  // the regular grandfathered (RG) tags:
  //
  //   * Real-world |unicode_language_subtag|s are all two or three letters,
  //     so don't waste time running a useless |language.length > 3| fast-path.
  //   * No RG tag has a "script"-looking component.
  //   * No RG tag has a "region"-looking component.
  //   * The RG tags that match |unicode_locale_id| (art-lojban, cel-gaulish,
  //     zh-guoyu, zh-hakka, zh-xiang) have exactly one "variant". (no-bok,
  //     no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag
  //     that |unicode_locale_id| doesn't support.)
  //   * No RG tag contains |extensions| or |pu_extensions|.
  if (script().length() != 0 ||
      region().length() != 0 ||
      variants().length() != 1 ||
      extensions().length() != 0 ||
      privateuse()) {
    return true;
  }
  auto variantEqualTo = [this](const char* variant) {
    return strcmp(variants()[0].get(), variant) == 0;
  };
  // art-lojban -> jbo
  if (language().equalTo("art") && variantEqualTo("lojban")) {
    setLanguage("jbo");
    clearVariants();
    return true;
  }
  // cel-gaulish -> xtg-x-cel-gaulish
  else if (language().equalTo("cel") && variantEqualTo("gaulish")) {
    setLanguage("xtg");
    clearVariants();
    auto privateuse = DuplicateString(cx, "x-cel-gaulish");
    if (!privateuse) {
      return false;
    }
    setPrivateuse(std::move(privateuse));
    return true;
  }
  // zh-guoyu -> zh
  else if (language().equalTo("zh") && variantEqualTo("guoyu")) {
    setLanguage("zh");
    clearVariants();
    return true;
  }
  // zh-hakka -> hak
  else if (language().equalTo("zh") && variantEqualTo("hakka")) {
    setLanguage("hak");
    clearVariants();
    return true;
  }
  // zh-xiang -> hsn
  else if (language().equalTo("zh") && variantEqualTo("xiang")) {
    setLanguage("hsn");
    clearVariants();
    return true;
  }
  return true;
 }
--- a/js/src/builtin/intl/UnicodeExtensionsGenerated.cpp
+++ b/js/src/builtin/intl/UnicodeExtensionsGenerated.cpp
@ -0,0 +1,188 @@
 // Generated by make_intl_data.py. DO NOT EDIT.
 // Version: CLDR-35.1
 // URL: https://unicode.org/Public/cldr/35.1/core.zip
 #include "mozilla/Assertions.h"
 #include "mozilla/Range.h"
 #include "mozilla/TextUtils.h"
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
 #include "builtin/intl/LanguageTag.h"
 using namespace js::intl::LanguageTagLimits;
 using ConstCharRange = mozilla::Range<const char>;
 template <size_t Length>
 static inline bool IsUnicodeKey(const ConstCharRange& key,
                                const char (&str)[Length]) {
  static_assert(Length == UnicodeKeyLength + 1,
                "Unicode extension key is two characters long");
  return memcmp(key.begin().get(), str, Length - 1) == 0;
 }
 template <size_t Length>
 static inline bool IsUnicodeType(const ConstCharRange& type,
                                 const char (&str)[Length]) {
  static_assert(Length > UnicodeKeyLength + 1,
                "Unicode extension type contains more than two characters");
  return type.length() == (Length - 1) &&
         memcmp(type.begin().get(), str, Length - 1) == 0;
 }
 static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
 #ifdef DEBUG
  auto isNull = [](char c) {
    return c == '\0';
  };
 #endif
  MOZ_ASSERT(std::none_of(b.begin().get(), b.end().get(), isNull),
             "unexpected null-character in string");
  using UnsignedChar = unsigned char;
  for (size_t i = 0; i < b.length(); i++) {
    // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
    // we've reached the end of |a|, the below if-statement will always be true.
    // That ensures we don't read past the end of |a|.
    if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {
      return r;
    }
  }
  // Return zero if both strings are equal or a negative number if |b| is a
  // prefix of |a|.
  return -int32_t(UnsignedChar(a[b.length()]));
 };
 template <size_t Length>
 static inline const char* SearchReplacement(const char* (&types)[Length],
                                            const char* (&aliases)[Length],
                                            const ConstCharRange& type) {
  auto p = std::lower_bound(std::begin(types), std::end(types), type,
                            [](const auto& a, const auto& b) {
    return CompareUnicodeType(a, b) < 0;
  });
  if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
    return aliases[std::distance(std::begin(types), p)];
  }
  return nullptr;
 }
 /**
 * Mapping from deprecated BCP 47 Unicode extension types to their preferred
 * values.
 *
 * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
 */
 const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
    const ConstCharRange& key, const ConstCharRange& type) {
 #ifdef DEBUG
  static auto isAsciiLowercaseAlphanumeric = [](char c) {
    return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
  };
  static auto isAsciiLowercaseAlphanumericOrDash = [](char c) {
    return isAsciiLowercaseAlphanumeric(c) || c == '-';
  };
 #endif
  MOZ_ASSERT(key.length() == UnicodeKeyLength);
  MOZ_ASSERT(std::all_of(key.begin().get(), key.end().get(),
                         isAsciiLowercaseAlphanumeric));
  MOZ_ASSERT(type.length() > UnicodeKeyLength);
  MOZ_ASSERT(std::all_of(type.begin().get(), type.end().get(),
                         isAsciiLowercaseAlphanumericOrDash));
  if (IsUnicodeKey(key, "ca")) {
    if (IsUnicodeType(type, "ethiopic-amete-alem")) {
      return "ethioaa";
    }
    if (IsUnicodeType(type, "islamicc")) {
      return "islamic-civil";
    }
  }
  else if (IsUnicodeKey(key, "kb") ||
           IsUnicodeKey(key, "kc") ||
           IsUnicodeKey(key, "kh") ||
           IsUnicodeKey(key, "kk") ||
           IsUnicodeKey(key, "kn")) {
    if (IsUnicodeType(type, "yes")) {
      return "true";
    }
  }
  else if (IsUnicodeKey(key, "ks")) {
    if (IsUnicodeType(type, "primary")) {
      return "level1";
    }
    if (IsUnicodeType(type, "tertiary")) {
      return "level3";
    }
  }
  else if (IsUnicodeKey(key, "ms")) {
    if (IsUnicodeType(type, "imperial")) {
      return "uksystem";
    }
  }
  else if (IsUnicodeKey(key, "rg") ||
           IsUnicodeKey(key, "sd")) {
    static const char* types[116] = {
       "cn11",  "cn12",  "cn13",  "cn14",  "cn15",  "cn21",  "cn22",  "cn23",
       "cn31",  "cn32",  "cn33",  "cn34",  "cn35",  "cn36",  "cn37",  "cn41",
       "cn42",  "cn43",  "cn44",  "cn45",  "cn46",  "cn50",  "cn51",  "cn52",
       "cn53",  "cn54",  "cn61",  "cn62",  "cn63",  "cn64",  "cn65", "cz10a",
      "cz10b", "cz10c", "cz10d", "cz10e", "cz10f", "cz611", "cz612", "cz613",
      "cz614", "cz615", "cz621", "cz622", "cz623", "cz624", "cz626", "cz627",
       "czjc",  "czjm",  "czka",  "czkr",  "czli",  "czmo",  "czol",  "czpa",
       "czpl",  "czpr",  "czst",  "czus",  "czvy",  "czzl",   "fra",   "frb",
        "frc",   "frd",   "fre",   "frf",   "frg",   "frh",   "fri",   "frj",
        "frk",   "frl",   "frm",   "frn",   "fro",   "frp",   "frq",   "frr",
        "frs",   "frt",   "fru",   "frv",  "laxn",   "lud",   "lug",   "lul",
      "mrnkc",   "nzn",   "nzs",  "omba",  "omsh",  "plds",  "plkp",  "pllb",
       "plld",  "pllu",  "plma",  "plmz",  "plop",  "plpd",  "plpk",  "plpm",
       "plsk",  "plsl",  "plwn",  "plwp",  "plzp", "tteto", "ttrcm", "ttwto",
      "twkhq", "twtnq", "twtpq", "twtxq",
    };
    static const char* aliases[116] = {
       "cnbj",  "cntj",  "cnhe",  "cnsx",  "cnmn",  "cnln",  "cnjl",  "cnhl",
       "cnsh",  "cnjs",  "cnzj",  "cnah",  "cnfj",  "cnjx",  "cnsd",  "cnha",
       "cnhb",  "cnhn",  "cngd",  "cngx",  "cnhi",  "cncq",  "cnsc",  "cngz",
       "cnyn",  "cnxz",  "cnsn",  "cngs",  "cnqh",  "cnnx",  "cnxj", "cz110",
      "cz111", "cz112", "cz113", "cz114", "cz115", "cz663", "cz632", "cz633",
      "cz634", "cz635", "cz641", "cz642", "cz643", "cz644", "cz646", "cz647",
       "cz31",  "cz64",  "cz41",  "cz52",  "cz51",  "cz80",  "cz71",  "cz53",
       "cz32",  "cz10",  "cz20",  "cz42",  "cz63",  "cz72", "frges", "frnaq",
      "frara", "frbfc", "frbre", "frcvl", "frges", "frcor", "frbfc", "fridf",
      "frocc", "frnaq", "frges", "frocc", "frhdf", "frnor", "frnor", "frpdl",
      "frhdf", "frnaq", "frpac", "frara",  "laxs",  "lucl",  "luec",  "luca",
       "mr13", "nzauk", "nzcan",  "ombj",  "omsj",  "pl02",  "pl04",  "pl08",
       "pl10",  "pl06",  "pl12",  "pl14",  "pl16",  "pl20",  "pl18",  "pl22",
       "pl26",  "pl24",  "pl28",  "pl30",  "pl32", "tttob", "ttmrc", "tttob",
      "twkhh", "twtnn", "twnwt", "twtxg",
    };
    return SearchReplacement(types, aliases, type);
  }
  else if (IsUnicodeKey(key, "tz")) {
    static const char* types[28] = {
         "aqams",    "cnckg",    "cnhrb",    "cnkhg",     "cuba",    "egypt",
          "eire",      "est",     "gmt0", "hongkong",      "hst",  "iceland",
          "iran",   "israel",  "jamaica",    "japan",    "libya",      "mst",
        "navajo",   "poland", "portugal",      "prc",      "roc",      "rok",
        "turkey",      "uct", "usnavajo",     "zulu",
    };
    static const char* aliases[28] = {
         "nzakl",    "cnsha",    "cnsha",    "cnurc",    "cuhav",    "egcai",
         "iedub",   "utcw05",      "gmt",    "hkhkg",   "utcw10",    "isrey",
         "irthr",  "jeruslm",    "jmkin",    "jptyo",    "lytip",   "utcw07",
         "usden",    "plwaw",    "ptlis",    "cnsha",    "twtpe",    "krsel",
         "trist",      "utc",    "usden",      "utc",
    };
    return SearchReplacement(types, aliases, type);
  }
  return nullptr;
 }
--- a/js/src/builtin/intl/make_intl_data.py
+++ b/js/src/builtin/intl/make_intl_data.py
@ -50,15 +50,24 @@ from operator import attrgetter, itemgetter
 from zipfile import ZipFile
 if sys.version_info.major == 2:
-    from itertools import ifilter as filter, ifilterfalse as filterfalse, imap as map
+    from itertools import ifilter as filter, ifilterfalse as filterfalse, imap as map,\
                          izip_longest as zip_longest
    from urllib2 import urlopen, Request as UrlRequest
    from urlparse import urlsplit, urlunsplit
 else:
-    from itertools import filterfalse
+    from itertools import filterfalse, zip_longest
    from urllib.request import urlopen, Request as UrlRequest
    from urllib.parse import urlsplit, urlunsplit
 # From https://docs.python.org/3/library/itertools.html
 def grouper(iterable, n, fillvalue=None):
    "Collect data into fixed-length chunks or blocks"
    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
    args = [iter(iterable)] * n
    return zip_longest(*args, fillvalue=fillvalue)
 def writeMappingHeader(println, description, source, url):
    if type(description) is not list:
        description = [description]
@ -383,6 +392,419 @@ function updateGrandfatheredMappings(tag) {
 }""".lstrip("\n"))
 def writeMappingsBinarySearch(println, fn_name, type_name, name, validate_fn, mappings,
                              tag_maxlength, description, source, url):
    """ Emit code to perform a binary search on language tag subtags.
        Uses the contents of |mapping|, which can either be a dictionary or set,
        to emit a mapping function to find subtag replacements.
    """
    println(u"")
    writeMappingHeader(println, description, source, url)
    println(u"""
 bool js::intl::LanguageTag::{0}({1} {2}) {{
  MOZ_ASSERT({3}({2}.range()));
 """.format(fn_name, type_name, name, validate_fn).strip())
    def write_array(subtags, name, length, fixed):
        if fixed:
            println(u"    static const char {}[{}][{}] = {{".format(name, len(subtags),
                                                                    length + 1))
        else:
            println(u"    static const char* {}[{}] = {{".format(name, len(subtags)))
        # Group in pairs of ten to not exceed the 80 line column limit.
        for entries in grouper(subtags, 10):
            entries = (u"\"{}\"".format(tag).rjust(length + 2)
                       for tag in entries if tag is not None)
            println(u"      {},".format(u", ".join(entries)))
        println(u"    };")
    trailing_return = True
    # Sort the subtags by length. That enables using an optimized comparator
    # for the binary search, which only performs a single |memcmp| for multiple
    # of two subtag lengths.
    mappings_keys = mappings.keys() if type(mappings) == dict else mappings
    for (length, subtags) in groupby(sorted(mappings_keys, key=len), len):
        # Omit the length check if the current length is the maximum length.
        if length != tag_maxlength:
            println(u"""
  if ({}.length() == {}) {{
 """.format(name, length).rstrip("\n"))
        else:
            trailing_return = False
            println(u"""
  {
 """.rstrip("\n"))
        # The subtags need to be sorted for binary search to work.
        subtags = sorted(subtags)
        def equals(subtag):
            return u"""{}.equalTo("{}")""".format(name, subtag)
        # Don't emit a binary search for short lists.
        if len(subtags) == 1:
            if type(mappings) == dict:
                println(u"""
    if ({}) {{
      {}.set("{}");
      return true;
    }}
    return false;
 """.format(equals(subtags[0]), name, mappings[subtags[0]]).strip("\n"))
            else:
                println(u"""
    return {};
 """.format(equals(subtags[0])).strip("\n"))
        elif len(subtags) <= 4:
            if type(mappings) == dict:
                for subtag in subtags:
                    println(u"""
    if ({}) {{
      {}.set("{}");
      return true;
    }}
 """.format(equals(subtag), name, mappings[subtag]).strip("\n"))
                println(u"""
    return false;
 """.strip("\n"))
            else:
                cond = (equals(subtag) for subtag in subtags)
                cond = (u" ||\n" + u" " * (4 + len("return "))).join(cond)
                println(u"""
    return {};
 """.format(cond).strip("\n"))
        else:
            write_array(subtags, name + "s", length, True)
            if type(mappings) == dict:
                write_array([mappings[k] for k in subtags], u"aliases", length, False)
                println(u"""
    if (const char* replacement = SearchReplacement({0}s, aliases, {0})) {{
      {0}.set(ConstCharRange(replacement, strlen(replacement)));
      return true;
    }}
    return false;
 """.format(name).rstrip())
            else:
                println(u"""
    return HasReplacement({0}s, {0});
 """.format(name).rstrip())
        println(u"""
  }
 """.strip("\n"))
    if trailing_return:
        println(u"""
  return false;""")
    println(u"""
 }""".lstrip("\n"))
 def writeComplexLanguageTagMappingsNative(println, complex_language_mappings,
                                          description, source, url):
    println(u"")
    writeMappingHeader(println, description, source, url)
    println(u"""
 void js::intl::LanguageTag::performComplexLanguageMappings() {
  MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
 """.lstrip())
    # Merge duplicate language entries.
    language_aliases = {}
    for (deprecated_language, (language, script, region)) in (
        sorted(complex_language_mappings.items(), key=itemgetter(0))
    ):
        key = (language, script, region)
        if key not in language_aliases:
            language_aliases[key] = []
        else:
            language_aliases[key].append(deprecated_language)
    first_language = True
    for (deprecated_language, (language, script, region)) in (
        sorted(complex_language_mappings.items(), key=itemgetter(0))
    ):
        key = (language, script, region)
        if deprecated_language in language_aliases[key]:
            continue
        if_kind = u"if" if first_language else u"else if"
        first_language = False
        cond = (u"language().equalTo(\"{}\")".format(lang)
                for lang in [deprecated_language] + language_aliases[key])
        cond = (u" ||\n" + u" " * (2 + len(if_kind) + 2)).join(cond)
        println(u"""
  {} ({}) {{""".format(if_kind, cond).strip("\n"))
        println(u"""
    setLanguage("{}");""".format(language).strip("\n"))
        if script is not None:
            println(u"""
    if (script().length() == 0) {{
      setScript("{}");
    }}""".format(script).strip("\n"))
        if region is not None:
            println(u"""
    if (region().length() == 0) {{
      setRegion("{}");
    }}""".format(region).strip("\n"))
        println(u"""
  }""".strip("\n"))
    println(u"""
 }
 """.strip("\n"))
 def writeComplexRegionTagMappingsNative(println, complex_region_mappings,
                                        description, source, url):
    println(u"")
    writeMappingHeader(println, description, source, url)
    println(u"""
 void js::intl::LanguageTag::performComplexRegionMappings() {
  MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
  MOZ_ASSERT(IsStructurallyValidRegionTag(region().range()));
 """.lstrip())
    # |non_default_replacements| is a list and hence not hashable. Convert it
    # to a string to get a proper hashable value.
    def hash_key(default, non_default_replacements):
        return (default, str(sorted(str(v) for v in non_default_replacements)))
    # Merge duplicate region entries.
    region_aliases = {}
    for (deprecated_region, (default, non_default_replacements)) in (
        sorted(complex_region_mappings.items(), key=itemgetter(0))
    ):
        key = hash_key(default, non_default_replacements)
        if key not in region_aliases:
            region_aliases[key] = []
        else:
            region_aliases[key].append(deprecated_region)
    first_region = True
    for (deprecated_region, (default, non_default_replacements)) in (
        sorted(complex_region_mappings.items(), key=itemgetter(0))
    ):
        key = hash_key(default, non_default_replacements)
        if deprecated_region in region_aliases[key]:
            continue
        if_kind = u"if" if first_region else u"else if"
        first_region = False
        cond = (u"region().equalTo(\"{}\")".format(region)
                for region in [deprecated_region] + region_aliases[key])
        cond = (u" ||\n" + u" " * (2 + len(if_kind) + 2)).join(cond)
        println(u"""
  {} ({}) {{""".format(if_kind, cond).strip("\n"))
        replacement_regions = sorted({region for (_, _, region) in non_default_replacements})
        first_case = True
        for replacement_region in replacement_regions:
            replacement_language_script = sorted(((language, script)
                                                  for (language, script, region) in (
                                                      non_default_replacements
                                                  )
                                                  if region == replacement_region),
                                                 key=itemgetter(0))
            if_kind = u"if" if first_case else u"else if"
            first_case = False
            def compare_tags(language, script):
                if script is None:
                    return u"language().equalTo(\"{}\")".format(language)
                return u"(language().equalTo(\"{}\") && script().equalTo(\"{}\"))".format(
                    language, script)
            cond = (compare_tags(language, script)
                    for (language, script) in replacement_language_script)
            cond = (u" ||\n" + u" " * (4 + len(if_kind) + 2)).join(cond)
            println(u"""
    {} ({}) {{
      setRegion("{}");
    }}""".format(if_kind, cond, replacement_region).rstrip().strip("\n"))
        println(u"""
    else {{
      setRegion("{}");
    }}
  }}""".format(default).rstrip().strip("\n"))
    println(u"""
 }
 """.strip("\n"))
 def writeGrandfatheredMappingsFunctionNative(println, grandfathered_mappings,
                                             description, source, url):
    """ Writes a function definition that maps grandfathered language tags. """
    println(u"")
    writeMappingHeader(println, description, source, url)
    println(u"""\
 bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
  // We're mapping regular grandfathered tags to non-grandfathered form here.
  // Other tags remain unchanged.
  //
  // regular       = "art-lojban"
  //               / "cel-gaulish"
  //               / "no-bok"
  //               / "no-nyn"
  //               / "zh-guoyu"
  //               / "zh-hakka"
  //               / "zh-min"
  //               / "zh-min-nan"
  //               / "zh-xiang"
  //
  // Therefore we can quickly exclude most tags by checking every
  // |unicode_locale_id| subcomponent for characteristics not shared by any of
  // the regular grandfathered (RG) tags:
  //
  //   * Real-world |unicode_language_subtag|s are all two or three letters,
  //     so don't waste time running a useless |language.length > 3| fast-path.
  //   * No RG tag has a "script"-looking component.
  //   * No RG tag has a "region"-looking component.
  //   * The RG tags that match |unicode_locale_id| (art-lojban, cel-gaulish,
  //     zh-guoyu, zh-hakka, zh-xiang) have exactly one "variant". (no-bok,
  //     no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag
  //     that |unicode_locale_id| doesn't support.)
  //   * No RG tag contains |extensions| or |pu_extensions|.
  if (script().length() != 0 ||
      region().length() != 0 ||
      variants().length() != 1 ||
      extensions().length() != 0 ||
      privateuse()) {
    return true;
  }
  auto variantEqualTo = [this](const char* variant) {
    return strcmp(variants()[0].get(), variant) == 0;
  };""")
    # From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>.
    #
    # Doesn't allow any 'extensions' subtags.
    re_unicode_locale_id = re.compile(
        r"""
        ^
        # unicode_language_id = unicode_language_subtag
        #     unicode_language_subtag = alpha{2,3} | alpha{5,8}
        (?P<language>[a-z]{2,3}|[a-z]{5,8})
        # (sep unicode_script_subtag)?
        #     unicode_script_subtag = alpha{4}
        (?:-(?P<script>[a-z]{4}))?
        # (sep unicode_region_subtag)?
        #     unicode_region_subtag = (alpha{2} | digit{3})
        (?:-(?P<region>([a-z]{2}|[0-9]{3})))?
        # (sep unicode_variant_subtag)*
        #     unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3})
        (?P<variants>(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+)?
        # pu_extensions?
        #     pu_extensions = sep [xX] (sep alphanum{1,8})+
        (?:-(?P<privateuse>x(-[a-z0-9]{1,8})+))?
        $
        """, re.IGNORECASE | re.VERBOSE)
    is_first = True
    for (tag, modern) in sorted(grandfathered_mappings.items(), key=itemgetter(0)):
        tag_match = re_unicode_locale_id.match(tag)
        assert tag_match is not None
        tag_language = tag_match.group("language")
        assert tag_match.group("script") is None, (
               "{} does not contain a script subtag".format(tag))
        assert tag_match.group("region") is None, (
               "{} does not contain a region subtag".format(tag))
        tag_variants = tag_match.group("variants")
        assert tag_variants is not None, (
               "{} contains a variant subtag".format(tag))
        assert tag_match.group("privateuse") is None, (
               "{} does not contain a privateuse subtag".format(tag))
        tag_variant = tag_variants[1:]
        assert "-" not in tag_variant, (
               "{} contains only a single variant".format(tag))
        modern_match = re_unicode_locale_id.match(modern)
        assert modern_match is not None
        modern_language = modern_match.group("language")
        modern_script = modern_match.group("script")
        modern_region = modern_match.group("region")
        modern_variants = modern_match.group("variants")
        modern_privateuse = modern_match.group("privateuse")
        println(u"""
  // {} -> {}
 """.format(tag, modern).rstrip())
        println(u"""
  {}if (language().equalTo("{}") && variantEqualTo("{}")) {{
        """.format("" if is_first else "else ",
                   tag_language,
                   tag_variant).rstrip().strip("\n"))
        is_first = False
        println(u"""
    setLanguage("{}");
        """.format(modern_language).rstrip().strip("\n"))
        if modern_script is not None:
            println(u"""
    setScript("{}");
            """.format(modern_script).rstrip().strip("\n"))
        if modern_region is not None:
            println(u"""
    setRegion("{}");
            """.format(modern_region).rstrip().strip("\n"))
        assert modern_variants is None, (
            "all regular grandfathered tags' modern forms do not contain variant subtags")
        println(u"""
    clearVariants();
        """.rstrip().strip("\n"))
        if modern_privateuse is not None:
            println(u"""
    auto privateuse = DuplicateString(cx, "{}");
    if (!privateuse) {{
      return false;
    }}
    setPrivateuse(std::move(privateuse));
        """.format(modern_privateuse).rstrip().rstrip("\n"))
        println(u"""
    return true;
  }""".rstrip().strip("\n"))
    println(u"""
  return true;
 }""")
@contextlib.contextmanager
 def TemporaryDirectory():
    tmpDir = tempfile.mkdtemp()
@ -674,6 +1096,106 @@ def writeCLDRLanguageTagData(println, data, url):
                                       source, url)
 def writeCLDRLanguageTagDataNative(println, data, url):
    """ Writes the language tag data to the Intl data file. """
    println(generatedFileWarning)
    println(u"""
 #include "mozilla/Assertions.h"
 #include "mozilla/Range.h"
 #include "mozilla/TextUtils.h"
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
 #include <iterator>
 #include <type_traits>
 #include "builtin/intl/LanguageTag.h"
 #include "util/Text.h"
 #include "vm/JSContext.h"
 using ConstCharRange = mozilla::Range<const char>;
 template <size_t Length, size_t TagLength, size_t SubtagLength>
 static inline bool HasReplacement(
    const char (&subtags)[Length][TagLength],
    const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
  MOZ_ASSERT(subtag.length() == TagLength - 1,
             "subtag must have the same length as the list of subtags");
  const char* ptr = subtag.range().begin().get();
  return std::binary_search(std::begin(subtags), std::end(subtags), ptr,
                            [](const char* a, const char* b) {
    return memcmp(a, b, TagLength - 1) < 0;
  });
 }
 template <size_t Length, size_t TagLength, size_t SubtagLength>
 static inline const char* SearchReplacement(
    const char (&subtags)[Length][TagLength],
    const char* (&aliases)[Length],
    const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
  MOZ_ASSERT(subtag.length() == TagLength - 1,
             "subtag must have the same length as the list of subtags");
  const char* ptr = subtag.range().begin().get();
  auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr,
                            [](const char* a, const char* b) {
    return memcmp(a, b, TagLength - 1) < 0;
  });
  if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) {
    return aliases[std::distance(std::begin(subtags), p)];
  }
  return nullptr;
 }
 """.rstrip())
    source = u"CLDR Supplemental Data, version {}".format(data["version"])
    grandfathered_mappings = data["grandfatheredMappings"]
    language_mappings = data["languageMappings"]
    complex_language_mappings = data["complexLanguageMappings"]
    region_mappings = data["regionMappings"]
    complex_region_mappings = data["complexRegionMappings"]
    # unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
    language_maxlength = 8
    # unicode_region_subtag = (alpha{2} | digit{3}) ;
    region_maxlength = 3
    writeMappingsBinarySearch(println, "languageMapping",
                              "LanguageSubtag&", "language",
                              "IsStructurallyValidLanguageTag",
                              language_mappings, language_maxlength,
                              "Mappings from language subtags to preferred values.", source, url)
    writeMappingsBinarySearch(println, "complexLanguageMapping",
                              "const LanguageSubtag&", "language",
                              "IsStructurallyValidLanguageTag",
                              complex_language_mappings.keys(), language_maxlength,
                              "Language subtags with complex mappings.", source, url)
    writeMappingsBinarySearch(println, "regionMapping",
                              "RegionSubtag&", "region",
                              "IsStructurallyValidRegionTag",
                              region_mappings, region_maxlength,
                              "Mappings from region subtags to preferred values.", source, url)
    writeMappingsBinarySearch(println, "complexRegionMapping",
                              "const RegionSubtag&", "region",
                              "IsStructurallyValidRegionTag",
                              complex_region_mappings.keys(), region_maxlength,
                              "Region subtags with complex mappings.", source, url)
    writeComplexLanguageTagMappingsNative(println, complex_language_mappings,
                                          "Language subtags with complex mappings.", source, url)
    writeComplexRegionTagMappingsNative(println, complex_region_mappings,
                                        "Region subtags with complex mappings.", source, url)
    writeGrandfatheredMappingsFunctionNative(println, grandfathered_mappings,
                                             "Canonicalize grandfathered locale identifiers.", source,
                                             url)
 def writeCLDRLanguageTagLikelySubtagsTest(println, data, url):
    """ Writes the likely-subtags test file. """
@ -886,6 +1408,13 @@ def updateCLDRLangTags(args):
        println(u"// Generated by make_intl_data.py. DO NOT EDIT.")
        writeCLDRLanguageTagData(println, data, url)
    print("Writing Intl data...")
    native_out = "LanguageTagGenerated.cpp"
    # native_out = os.path.splitext(out)[0] + ".cpp"
    with io.open(native_out, mode="w", encoding="utf-8", newline="") as f:
        println = partial(print, file=f)
        writeCLDRLanguageTagDataNative(println, data, url)
    print("Writing Intl test data...")
    test_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "../../tests/non262/Intl/Locale/likely-subtags-generated.js")
@ -894,7 +1423,7 @@ def updateCLDRLangTags(args):
        println(u"// |reftest| skip-if(!this.hasOwnProperty('Intl')||"
                u"(!this.Intl.Locale&&!this.hasOwnProperty('addIntlExtras')))")
-        println(u"// Generated by make_intl_data.py. DO NOT EDIT.")
+        println(generatedFileWarning)
        writeCLDRLanguageTagLikelySubtagsTest(println, data, url)
@ -1780,6 +2309,179 @@ def writeUnicodeExtensionsFile(version, url, mapping, out):
            println(u"    },")
        println(u"};")
    with io.open(os.path.splitext(out)[0] + ".cpp", mode="w", encoding="utf-8", newline="") as f:
        println = partial(print, file=f)
        println(generatedFileWarning)
        println(u"// Version: CLDR-{}".format(version))
        println(u"// URL: {}".format(url))
        println(u"""
 #include "mozilla/Assertions.h"
 #include "mozilla/Range.h"
 #include "mozilla/TextUtils.h"
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
 #include "builtin/intl/LanguageTag.h"
 using namespace js::intl::LanguageTagLimits;
 using ConstCharRange = mozilla::Range<const char>;
 template <size_t Length>
 static inline bool IsUnicodeKey(const ConstCharRange& key,
                                const char (&str)[Length]) {
  static_assert(Length == UnicodeKeyLength + 1,
                "Unicode extension key is two characters long");
  return memcmp(key.begin().get(), str, Length - 1) == 0;
 }
 template <size_t Length>
 static inline bool IsUnicodeType(const ConstCharRange& type,
                                 const char (&str)[Length]) {
  static_assert(Length > UnicodeKeyLength + 1,
                "Unicode extension type contains more than two characters");
  return type.length() == (Length - 1) &&
         memcmp(type.begin().get(), str, Length - 1) == 0;
 }
 static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
 #ifdef DEBUG
  auto isNull = [](char c) {
    return c == '\\0';
  };
 #endif
  MOZ_ASSERT(std::none_of(b.begin().get(), b.end().get(), isNull),
             "unexpected null-character in string");
  using UnsignedChar = unsigned char;
  for (size_t i = 0; i < b.length(); i++) {
    // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
    // we've reached the end of |a|, the below if-statement will always be true.
    // That ensures we don't read past the end of |a|.
    if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {
      return r;
    }
  }
  // Return zero if both strings are equal or a negative number if |b| is a
  // prefix of |a|.
  return -int32_t(UnsignedChar(a[b.length()]));
 };
 template <size_t Length>
 static inline const char* SearchReplacement(const char* (&types)[Length],
                                            const char* (&aliases)[Length],
                                            const ConstCharRange& type) {
  auto p = std::lower_bound(std::begin(types), std::end(types), type,
                            [](const auto& a, const auto& b) {
    return CompareUnicodeType(a, b) < 0;
  });
  if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
    return aliases[std::distance(std::begin(types), p)];
  }
  return nullptr;
 }
 """.rstrip("\n"))
        println(u"""
 /**
 * Mapping from deprecated BCP 47 Unicode extension types to their preferred
 * values.
 *
 * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
 */
 const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
    const ConstCharRange& key, const ConstCharRange& type) {
 #ifdef DEBUG
  static auto isAsciiLowercaseAlphanumeric = [](char c) {
    return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
  };
  static auto isAsciiLowercaseAlphanumericOrDash = [](char c) {
    return isAsciiLowercaseAlphanumeric(c) || c == '-';
  };
 #endif
  MOZ_ASSERT(key.length() == UnicodeKeyLength);
  MOZ_ASSERT(std::all_of(key.begin().get(), key.end().get(),
                         isAsciiLowercaseAlphanumeric));
  MOZ_ASSERT(type.length() > UnicodeKeyLength);
  MOZ_ASSERT(std::all_of(type.begin().get(), type.end().get(),
                         isAsciiLowercaseAlphanumericOrDash));
 """)
        def to_hash_key(replacements):
            return str(sorted([str((k, v["preferred"])) for (k, v) in replacements.items()]))
        def write_array(subtags, name, length):
            max_entries = (80 - len("    ")) // (length + len('"", '))
            println(u"    static const char* {}[{}] = {{".format(name, len(subtags)))
            for entries in grouper(subtags, max_entries):
                entries = (u"\"{}\"".format(tag).rjust(length + 2)
                           for tag in entries if tag is not None)
                println(u"      {},".format(u", ".join(entries)))
            println(u"    };")
        # Merge duplicate keys.
        key_aliases = {}
        for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
            hash_key = to_hash_key(replacements)
            if hash_key not in key_aliases:
                key_aliases[hash_key] = []
            else:
                key_aliases[hash_key].append(key)
        first_key = True
        for (key, replacements) in sorted(mapping.items(), key=itemgetter(0)):
            hash_key = to_hash_key(replacements)
            if key in key_aliases[hash_key]:
                continue
            cond = (u"IsUnicodeKey(key, \"{}\")".format(k) for k in [key] + key_aliases[hash_key])
            if_kind = u"if" if first_key else u"else if"
            cond = (u" ||\n" + u" " * (2 + len(if_kind) + 2)).join(cond)
            println(u"""
  {} ({}) {{""".format(if_kind, cond).strip("\n"))
            first_key = False
            replacements = sorted(replacements.items(), key=itemgetter(0))
            if len(replacements) > 4:
                types = [t for (t, _) in replacements]
                preferred = [r["preferred"] for (_, r) in replacements]
                max_len = max(len(k) for k in types + preferred)
                write_array(types, "types", max_len)
                write_array(preferred, "aliases", max_len)
                println(u"""
    return SearchReplacement(types, aliases, type);
 """.strip("\n"))
            else:
                for (type, replacement) in replacements:
                    println(u"""
    if (IsUnicodeType(type, "{}")) {{
      return "{}";
    }}""".format(type, replacement["preferred"]).strip("\n"))
            println(u"""
  }""".lstrip("\n"))
        println(u"""
  return nullptr;
 }
 """.strip("\n"))
 def updateUnicodeExtensions(args):
    """ Update the UnicodeExtensionsGenerated.js file. """
--- a/js/src/moz.build
+++ b/js/src/moz.build
@ -380,11 +380,14 @@ if CONFIG['ENABLE_INTL_API']:
        'builtin/intl/CommonFunctions.cpp',
        'builtin/intl/DateTimeFormat.cpp',
        'builtin/intl/IntlObject.cpp',
        'builtin/intl/LanguageTag.cpp',
        'builtin/intl/LanguageTagGenerated.cpp',
        'builtin/intl/Locale.cpp',
        'builtin/intl/NumberFormat.cpp',
        'builtin/intl/PluralRules.cpp',
        'builtin/intl/RelativeTimeFormat.cpp',
        'builtin/intl/SharedIntlData.cpp',
        'builtin/intl/UnicodeExtensionsGenerated.cpp',
    ]
 if CONFIG['MOZ_INSTRUMENTS']: