go.text/locale: moved package from go.exp.

R=r CC=golang-dev https://golang.org/cl/9893043
2013-05-31 14:31:13 +02:00 · 2013-05-31 14:31:13 +02:00 · 3942ae31cd
--- a/locale/Makefile
+++ b/locale/Makefile
@ -0,0 +1,16 @@
+# Copyright 2013 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+CLEANFILES+=maketables
+
+maketables: maketables.go
+	go build $^
+
+tables:	maketables
+	./maketables > tables.go
+	gofmt -w -s tables.go
+
+# Build (but do not run) maketables during testing,
+# just to make sure it still compiles.
+testshort: maketables
--- a/locale/examples_test.go
+++ b/locale/examples_test.go
@ -0,0 +1,90 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package locale_test
+
+import (
+	"code.google.com/p/go.text/locale"
+	"fmt"
+)
+
+func ExampleID_Canonicalize() {
+	p := func(id string) {
+		loc, _ := locale.Parse(id)
+		fmt.Printf("BCP47(%s) -> %s\n", id, loc.Canonicalize(locale.BCP47))
+		fmt.Printf("Macro(%s) -> %s\n", id, loc.Canonicalize(locale.Macro))
+	}
+	p("en-Latn")
+	p("zh-cmn")
+	p("bjd")
+	p("iw-Latn-fonipa-u-cu-usd")
+	// Output:
+	// BCP47(en-Latn) -> en
+	// Macro(en-Latn) -> en-Latn
+	// BCP47(zh-cmn) -> cmn
+	// Macro(zh-cmn) -> zh
+	// BCP47(bjd) -> drl
+	// Macro(bjd) -> bjd
+	// BCP47(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
+	// Macro(iw-Latn-fonipa-u-cu-usd) -> iw-Latn-fonipa-u-cu-usd
+}
+
+func ExampleID_Parent() {
+	loc := locale.Make("sl-Latn-IT-nedis")
+	fmt.Println(loc.Parent())
+	// TODO:Output: sl-Latn-IT
+}
+
+func ExampleID_Written() {
+	loc := locale.Make("sl-Latn-IT-nedis")
+	fmt.Println(loc.Written())
+	// TODO:Output: sl-Latn
+}
+
+func ExampleID_Script() {
+	en := locale.Make("en")
+	sr := locale.Make("sr")
+	fmt.Println(en.Script())
+	fmt.Println(sr.Script())
+	// TODO:Output:
+	// Latn High
+	// Cyrl Low
+}
+
+func ExampleID_Part() {
+	loc := locale.Make("sr-RS")
+	script := loc.Part(locale.ScriptPart)
+	region := loc.Part(locale.RegionPart)
+	fmt.Printf("%q %q", script, region)
+	// TODO:Output: "" "RS"
+}
+
+func ExampleID_Scope() {
+	loc := locale.Make("sr")
+	set := loc.Scope()
+	fmt.Println(set.Locales())
+	fmt.Println(set.Languages())
+	fmt.Println(set.Scripts())
+	fmt.Println(set.Regions())
+	// TODO:Output:
+	// [sr_Cyrl sr_Cyrl_ME sr_Latn sr_Latn_ME sr_Cyrl_BA sr_Cyrl_RS sr_Latn_BA sr_Latn_RS]
+	// [sr]
+	// [Cyrl Latn]
+	// [BA ME RS]
+}
+
+func ExampleScript_Scope() {
+	loc := locale.Make("zen-Tfng")
+	script, _ := loc.Script()
+	set := script.Scope()
+	fmt.Println(set.Locales())
+	fmt.Println(set.Languages())
+	fmt.Println(set.Scripts())
+	fmt.Println(set.Regions())
+	// TODO:Output:
+	// [shi shi-Tfng shi-Tfng_MA tzm]
+	// [shi tzm zen]
+	// [Tfng]
+	// [MA]
+}
--- a/locale/locale.go
+++ b/locale/locale.go
@ -0,0 +1,319 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// NOTE: This package is still under development. Parts of it are not yet implemented,
+// and the API is subject to change.
+//
+// The locale package provides a type to represent BCP 47 locale identifiers.
+// It supports various canonicalizations defined in CLDR.
+package locale
+
+import "strings"
+
+var (
+	// Und represents the undefined langauge. It is also the root locale.
+	Und   = und
+	En    = en    // Default Locale for English.
+	En_US = en_US // Default locale for American English.
+	De    = de    // Default locale for German.
+	// TODO: list of most common language identifiers.
+)
+
+var (
+	Supported Set // All supported locales.
+	Common    Set // A selection of common locales.
+)
+
+var (
+	de    = ID{lang: getLangID([]byte("de")), region: unknownRegion, script: unknownScript}
+	en    = ID{lang: getLangID([]byte("en")), region: unknownRegion, script: unknownScript}
+	en_US = en
+	und   = ID{lang: unknownLang, region: unknownRegion, script: unknownScript}
+)
+
+// ID represents a BCP 47 locale identifier. It can be used to
+// select an instance for a specific locale. All Locale values are guaranteed
+// to be well-formed.
+type ID struct {
+	// In most cases, just lang, region and script will be needed.  In such cases
+	// str may be nil.
+	lang     langID
+	region   regionID
+	script   scriptID
+	pVariant byte   // offset in str
+	pExt     uint16 // offset of first extension
+	str      *string
+}
+
+// Make calls Parse and Canonicalize and returns the resulting ID.
+// Any errors are ignored and a sensible default is returned.
+// In most cases, locale IDs should be created using this method.
+func Make(id string) ID {
+	loc, _ := Parse(id)
+	return loc.Canonicalize(All)
+}
+
+// IsRoot returns true if loc is equal to locale "und".
+func (loc ID) IsRoot() bool {
+	if loc.str != nil {
+		n := len(*loc.str)
+		if n > 0 && loc.pExt > 0 && int(loc.pExt) < n {
+			return false
+		}
+		if uint16(loc.pVariant) != loc.pExt || strings.HasPrefix(*loc.str, "x-") {
+			return false
+		}
+		loc.str = nil
+	}
+	return loc == und
+}
+
+// CanonType is can be used to enable or disable various types of canonicalization.
+type CanonType int
+
+const (
+	// Replace deprecated values with their preferred ones.
+	Deprecated CanonType = 1 << iota
+	// Remove redundant scripts.
+	SuppressScript
+	// Map the dominant language of macro language group to the macro language identifier.
+	// For example cmn -> zh.
+	Macro
+	// All canonicalizations prescribed by BCP 47.
+	BCP47 = Deprecated | SuppressScript
+	All   = BCP47 | Macro
+
+	// TODO: LikelyScript, LikelyRegion: supress similar to ICU.
+)
+
+// Canonicalize replaces the identifier with its canonical equivalent.
+func (loc ID) Canonicalize(t CanonType) ID {
+	changed := false
+	if t&SuppressScript != 0 {
+		if loc.lang < langNoIndexOffset && uint8(loc.script) == suppressScript[loc.lang] {
+			loc.script = unknownScript
+			changed = true
+		}
+	}
+	if t&Deprecated != 0 {
+		l := normLang(langOldMap[:], loc.lang)
+		if l != loc.lang {
+			changed = true
+		}
+		loc.lang = l
+	}
+	if t&Macro != 0 {
+		l := normLang(langMacroMap[:], loc.lang)
+		if l != loc.lang {
+			changed = true
+		}
+		loc.lang = l
+	}
+	if changed && loc.str != nil {
+		ext := ""
+		if loc.pExt > 0 {
+			ext = (*loc.str)[loc.pExt+1:]
+		}
+		s := loc.makeString(loc.Part(VariantPart), ext)
+		loc.str = &s
+	}
+	return loc
+}
+
+// Parent returns the direct parent for this locale, which is the locale
+// from which this locale inherits any undefined values.
+func (loc ID) Parent() ID {
+	// TODO: implement
+	return und
+}
+
+// Written strips qualifiers from the identifier until the resulting identfier
+// inherits from root.
+func (loc ID) Written() ID {
+	// TODO: implement
+	return und
+}
+
+// Confidence indicates the level of certainty for a given return value.
+// For example, Serbian may be written in cyrillic or latin script.
+// The confidence level indicates whether a value was explicitly specified,
+// whether it is typically the only possible value, or whether there is
+// an ambiguity.
+type Confidence int
+
+const (
+	Not   Confidence = iota // full confidence that there was no match
+	Low                     // most likely value picked out of a set of alternatives
+	High                    // value inferred from a parent and is generally assumed to be the correct match
+	Exact                   // exact match or explicitly specified value
+)
+
+func (loc *ID) makeString(vars, ext string) string {
+	buf := [128]byte{}
+	n := loc.lang.stringToBuf(buf[:])
+	if loc.script != unknownScript {
+		n += copy(buf[n:], "-")
+		n += copy(buf[n:], loc.script.String())
+	}
+	if loc.region != unknownRegion {
+		n += copy(buf[n:], "-")
+		n += copy(buf[n:], loc.region.String())
+	}
+	b := buf[:n]
+	if vars != "" {
+		b = append(b, '-')
+		loc.pVariant = byte(len(b))
+		b = append(b, vars...)
+		loc.pExt = uint16(len(b))
+	}
+	if ext != "" {
+		loc.pExt = uint16(len(b))
+		b = append(b, '-')
+		b = append(b, ext...)
+	}
+	return string(b)
+}
+
+// String returns the canonical string representation of the locale.
+func (loc ID) String() string {
+	if loc.str == nil {
+		return loc.makeString("", "")
+	}
+	return *loc.str
+}
+
+// Language returns the language for the locale.
+func (loc ID) Language() Language {
+	// TODO: implement
+	return Language{0}
+}
+
+// Script infers the script for the locale.  If it was not explictly given, it will infer
+// a most likely candidate from the parent locales.
+// If more than one script is commonly used for a language, the most likely one
+// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
+// for Serbian.
+// Note that an inferred script is never guaranteed to be the correct one. Latn is
+// almost exclusively used for Afrikaans, but Arabic has been used for some texts
+// in the past.  Also, the script that is commonly used may change over time.
+func (loc ID) Script() (Script, Confidence) {
+	// TODO: implement
+	return Script{0}, Exact
+}
+
+// Region returns the region for l.  If it was not explicitly given, it will
+// infer a most likely candidate from the parent locales.
+func (loc ID) Region() (Region, Confidence) {
+	// TODO: implement
+	return Region{0}, Exact
+}
+
+// Variant returns the variant specified explicitly for this locale
+// or nil if no variant was specified.
+func (loc ID) Variant() Variant {
+	return Variant{""}
+}
+
+// Scope returns a Set that indicates the common variants for which the
+// locale may be applicable.
+// Locales will returns all valid sublocales. Languages will return the language
+// for this locale.  Regions will return all regions for which a locale with
+// this language is defined.  And Scripts will return all scripts that are
+// commonly used for this locale.
+// If any of these properties is explicitly specified, the respective lists
+// will be constraint.  For example, for sr_Latn Scripts will return [Latn]
+// instead of [Cyrl Latn].
+func (loc ID) Scope() Set {
+	// TODO: implement
+	return nil
+}
+
+// TypeForKey returns the type associated with the given key, where key
+// is one of the allowed values defined for the Unicode locale extension ('u') in
+// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// TypeForKey will traverse the inheritance chain to get the correct value.
+func (loc ID) TypeForKey(key string) string {
+	// TODO: implement
+	return ""
+}
+
+// KeyValueString returns a string to be set with KeyValuePart.
+// Error handling is done by Compose.
+func KeyValueString(m map[string]string) (string, error) {
+	// TODO: implement
+	return "", nil
+}
+
+// SimplifyOptions removes options in loc that it would inherit
+// by default from its parent.
+func (loc ID) SimplifyOptions() ID {
+	// TODO: implement
+	return ID{}
+}
+
+// Language is an ISO 639 language identifier.
+type Language struct {
+	langID
+}
+
+// Scope returns a Set of all pre-defined sublocales for this language.
+func (l Language) Scope() Set {
+	// TODO: implement
+	return nil
+}
+
+// Script is a 4-letter ISO 15924 code for representing scripts.
+// It is idiomatically represented in title case.
+type Script struct {
+	scriptID
+}
+
+// Scope returns a Set of all pre-defined sublocales applicable to the script.
+func (s Script) Scope() Set {
+	// TODO: implement
+	return nil
+}
+
+// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
+type Region struct {
+	regionID
+}
+
+// IsCountry returns whether this region is a country.
+func (r Region) IsCountry() bool {
+	// TODO: implement
+	return true
+}
+
+// Scope returns a Set of all pre-defined sublocales applicable to the region.
+func (r Region) Scope() Set {
+	// TODO: implement
+	return nil
+}
+
+// Variant represents a registered variant of a language as defined by BCP 47.
+type Variant struct {
+	// TODO: implement
+	variant string
+}
+
+// String returns the string representation of the variant.
+func (v Variant) String() string {
+	// TODO: implement
+	return v.variant
+}
+
+// Currency is an ISO 4217 currency designator.
+type Currency struct {
+	currencyID
+}
+
+// Set provides information about a set of locales.
+type Set interface {
+	Locales() []ID
+	Languages() []Language
+	Regions() []Region
+	Scripts() []Script
+	Currencies() []Currency
+}
--- a/locale/locale_test.go
+++ b/locale/locale_test.go
@ -0,0 +1,86 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package locale
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestIDSize(t *testing.T) {
+	id := ID{}
+	typ := reflect.TypeOf(id)
+	if typ.Size() > 16 {
+		t.Errorf("size of ID was %d; want 16", typ.Size())
+	}
+}
+
+func TestIsRoot(t *testing.T) {
+	for i, tt := range parseTests() {
+		loc, _ := Parse(tt.in)
+		undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
+		if loc.IsRoot() != undef {
+			t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
+		}
+	}
+}
+
+/*
+func TestParent(t *testing.T) {
+	tests := []struct {
+		in, out string
+	}{
+		{"und", "und"},
+		{"de-1994", "de"},
+		{"de-CH-1994", "de-CH"},
+		{"de-Cyrl-CH-1994", "de-Cyrl-CH"},
+		{"zh", "und"},
+		{"zh-HK-u-cu-usd", "zh"},
+		{"zh-Hans-HK-u-cu-usd", "zh-Hans"},
+		{"zh-u-cu-usd", "und"},
+		{"zh_Hans", "zh"},
+		{"zh_Hant", "und"},
+		{"vai", "und"},
+		{"vai_Latn", "und"},
+		{"nl_Cyrl", "nl"},
+		{"nl", "und"},
+		{"en_US", "en"},
+		{"en_150", "en-GB"},
+		{"en-SG", "en-GB"},
+		{"en_GB", "en"},
+	}
+	for i, tt := range tests {
+		test, _ := Parse(tt.in)
+		gold, _ := Parse(tt.out)
+		if p := test.Parent(); p.String() != gold.String() {
+			t.Errorf("%d:parent(%q): found %s; want %s", i, tt.in, p.String(), tt.out)
+		}
+	}
+}
+
+func TestWritten(t *testing.T) {
+	tests := []struct {
+		in, out string
+	}{
+		{"und", "und"},
+		{"zh-Hans", "zh"},
+		{"zh-Hant", "zh-Hant"},
+		{"vai", "vai"},
+		{"vai-Latn", "vai-Latn"},
+		{"nl-Cyrl", "nl-Cyrl"},
+		{"en-US", "en"},
+		{"en-150", "en"},
+		{"en-SG", "en"},
+		{"en-GB", "en"},
+	}
+	for i, tt := range tests {
+		test, _ := Parse(tt.in)
+		gold, _ := Parse(tt.out)
+		if test.Written() != gold {
+			t.Errorf("%d:parent(%q): found %s; want %s", i, tt.in, test.String(), tt.out)
+		}
+	}
+}
+*/
--- a/locale/lookup.go
+++ b/locale/lookup.go
@ -0,0 +1,348 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package locale
+
+import (
+	"fmt"
+	"sort"
+	"strconv"
+)
+
+// get gets the string of length n for id from the given 4-byte string index.
+func get(idx string, id, n int) string {
+	return idx[id<<2:][:n]
+}
+
+// cmp returns an integer comparing a and b lexicographically.
+func cmp(a string, b []byte) int {
+	n := len(a)
+	if len(b) < n {
+		n = len(b)
+	}
+	for i, c := range b[:n] {
+		switch {
+		case a[i] > c:
+			return 1
+		case a[i] < c:
+			return -1
+		}
+	}
+	switch {
+	case len(a) < len(b):
+		return -1
+	case len(a) > len(b):
+		return 1
+	}
+	return 0
+}
+
+// search searchs for the insertion point of key in smap, which is a
+// string with consecutive 4-byte entries. Only the first len(key)
+// bytes from the start of the 4-byte entries will be considered.
+func search(smap string, key []byte) int {
+	n := len(key)
+	return sort.Search(len(smap)>>2, func(i int) bool {
+		return cmp(get(smap, i, n), key) != -1
+	}) << 2
+}
+
+func index(smap string, key []byte) int {
+	i := search(smap, key)
+	if cmp(smap[i:i+len(key)], key) != 0 {
+		return -1
+	}
+	return i
+}
+
+func searchUint(imap []uint16, key uint16) int {
+	return sort.Search(len(imap), func(i int) bool {
+		return imap[i] >= key
+	})
+}
+
+// fixCase reformats s to the same pattern of cases as pat.
+// If returns false if string s is malformed.
+func fixCase(pat string, b []byte) bool {
+	if len(pat) != len(b) {
+		return false
+	}
+	for i, c := range b {
+		r := pat[i]
+		if r <= 'Z' {
+			if c >= 'a' {
+				c -= 'z' - 'Z'
+			}
+			if c > 'Z' || c < 'A' {
+				return false
+			}
+		} else {
+			if c <= 'Z' {
+				c += 'z' - 'Z'
+			}
+			if c > 'z' || c < 'a' {
+				return false
+			}
+		}
+		b[i] = c
+	}
+	return true
+}
+
+type langID uint16
+
+// getLangID returns the langID of s if s is a canonical ID
+// or langUnknown if s is not a canonical langID.
+func getLangID(s []byte) langID {
+	if len(s) == 2 {
+		return getLangISO2(s)
+	}
+	return getLangISO3(s)
+}
+
+// mapLang returns the mapped langID of id according to mapping m.
+func normLang(m []struct{ from, to uint16 }, id langID) langID {
+	k := sort.Search(len(m), func(i int) bool {
+		return m[i].from >= uint16(id)
+	})
+	if m[k].from == uint16(id) {
+		return langID(m[k].to)
+	}
+	return id
+}
+
+// getLangISO2 returns the langID for the given 2-letter ISO language code
+// or unknownLang if this does not exist.
+func getLangISO2(s []byte) langID {
+	if len(s) == 2 && fixCase("zz", s) {
+		if i := index(lang, s); i != -1 && lang[i+3] != 0 {
+			return langID(i >> 2)
+		}
+	}
+	return unknownLang
+}
+
+const base = 'z' - 'a' + 1
+
+func strToInt(s []byte) uint {
+	v := uint(0)
+	for i := 0; i < len(s); i++ {
+		v *= base
+		v += uint(s[i] - 'a')
+	}
+	return v
+}
+
+// converts the given integer to the original ASCII string passed to strToInt.
+// len(s) must match the number of characters obtained.
+func intToStr(v uint, s []byte) {
+	for i := len(s) - 1; i >= 0; i-- {
+		s[i] = byte(v%base) + 'a'
+		v /= base
+	}
+}
+
+// getLangISO3 returns the langID for the given 3-letter ISO language code
+// or unknownLang if this does not exist.
+func getLangISO3(s []byte) langID {
+	if fixCase("und", s) {
+		// first try to match canonical 3-letter entries
+		for i := search(lang, s[:2]); cmp(lang[i:i+2], s[:2]) == 0; i += 4 {
+			if lang[i+3] == 0 && lang[i+2] == s[2] {
+				return langID(i >> 2)
+			}
+		}
+		if i := index(altLangISO3, s); i != -1 {
+			return langID(altLangISO3[i+3])
+		}
+		n := strToInt(s)
+		if langNoIndex[n/8]&(1<<(n%8)) != 0 {
+			return langID(n) + langNoIndexOffset
+		}
+		// Check for non-canonical uses of ISO3.
+		for i := search(lang, s[:1]); lang[i] == s[0]; i += 4 {
+			if cmp(lang[i+2:][:2], s[1:3]) == 0 {
+				return langID(i >> 2)
+			}
+		}
+	}
+	return unknownLang
+}
+
+// stringToBuf writes the string to b and returns the number of bytes
+// written.  cap(b) must be >= 3.
+func (id langID) stringToBuf(b []byte) int {
+	if id >= langNoIndexOffset {
+		intToStr(uint(id)-langNoIndexOffset, b[:3])
+		return 3
+	}
+	l := lang[id<<2:]
+	if l[3] == 0 {
+		return copy(b, l[:3])
+	}
+	return copy(b, l[:2])
+}
+
+// String returns the BCP 47 representation of the langID.
+func (id langID) String() string {
+	if id >= langNoIndexOffset {
+		id -= langNoIndexOffset
+		buf := [3]byte{}
+		intToStr(uint(id), buf[:])
+		return string(buf[:])
+	}
+	l := lang[id<<2:]
+	if l[3] == 0 {
+		return l[:3]
+	}
+	return l[:2]
+}
+
+// ISO3 returns the ISO 639-3 language code.
+func (id langID) ISO3() string {
+	if id >= langNoIndexOffset {
+		return id.String()
+	}
+	l := lang[id<<2:]
+	if l[3] == 0 {
+		return l[:3]
+	} else if l[2] == 0 {
+		return get(altLangISO3, int(l[3]), 3)
+	}
+	// This allocation will only happen for 3-letter ISO codes
+	// that are non-canonical BCP 47 language identifiers.
+	return l[0:1] + l[2:4]
+}
+
+type regionID uint16
+
+// getRegionID returns the region id for s if s is a valid 2-letter region code
+// or unknownRegion.
+func getRegionID(s []byte) regionID {
+	if len(s) == 3 {
+		if isAlpha(s[0]) {
+			return getRegionISO3(s)
+		}
+		if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
+			return getRegionM49(int(i))
+		}
+	}
+	return getRegionISO2(s)
+}
+
+// getRegionISO2 returns the regionID for the given 2-letter ISO country code
+// or unknownRegion if this does not exist.
+func getRegionISO2(s []byte) regionID {
+	if fixCase("ZZ", s) {
+		if i := index(regionISO, s); i != -1 {
+			return regionID(i>>2) + isoRegionOffset
+		}
+	}
+	return unknownRegion
+}
+
+// getRegionISO3 returns the regionID for the given 3-letter ISO country code
+// or unknownRegion if this does not exist.
+func getRegionISO3(s []byte) regionID {
+	if fixCase("ZZZ", s) {
+		for i := search(regionISO, s[:1]); regionISO[i] == s[0]; i += 4 {
+			if cmp(regionISO[i+2:][:2], s[1:3]) == 0 {
+				return regionID(i>>2) + isoRegionOffset
+			}
+		}
+		for i := 0; i < len(altRegionISO3); i += 3 {
+			if cmp(altRegionISO3[i:i+3], s) == 0 {
+				return regionID(altRegionIDs[i/3])
+			}
+		}
+	}
+	return unknownRegion
+}
+
+func getRegionM49(n int) regionID {
+	// These will mostly be group IDs, which are at the start of the list.
+	// For other values this may be a bit slow, as there are over 300 entries.
+	// TODO: group id is sorted!
+	if n == 0 {
+		return unknownRegion
+	}
+	for i, v := range m49 {
+		if v == uint16(n) {
+			return regionID(i)
+		}
+	}
+	return unknownRegion
+}
+
+// String returns the BCP 47 representation for the region.
+func (r regionID) String() string {
+	if r < isoRegionOffset {
+		return fmt.Sprintf("%03d", r.m49())
+	}
+	r -= isoRegionOffset
+	return get(regionISO, int(r), 2)
+}
+
+// The use of this is uncommon.
+// Note: not all regionIDs have corresponding 3-letter ISO codes!
+func (r regionID) iso3() string {
+	if r < isoRegionOffset {
+		return ""
+	}
+	r -= isoRegionOffset
+	reg := regionISO[r<<2:]
+	switch reg[2] {
+	case 0:
+		return altRegionISO3[reg[3]:][:3]
+	case ' ':
+		return ""
+	}
+	return reg[0:1] + reg[2:4]
+}
+
+func (r regionID) m49() uint16 {
+	return m49[r]
+}
+
+type scriptID uint8
+
+// getScriptID returns the script id for string s. It assumes that s
+// is of the format [A-Z][a-z]{3}.
+func getScriptID(idx string, s []byte) scriptID {
+	if fixCase("Zzzz", s) {
+		if i := index(idx, s); i != -1 {
+			return scriptID(i >> 2)
+		}
+	}
+	return unknownScript
+}
+
+// String returns the script code in title case.
+func (s scriptID) String() string {
+	return get(script, int(s), 4)
+}
+
+type currencyID uint16
+
+func getCurrencyID(idx string, s []byte) currencyID {
+	if fixCase("XXX", s) {
+		if i := index(idx, s); i != -1 {
+			return currencyID(i >> 2)
+		}
+	}
+	return unknownCurrency
+}
+
+// String returns the upper case representation of the currency.
+func (c currencyID) String() string {
+	return get(currency, int(c), 3)
+}
+
+func round(index string, c currencyID) int {
+	return int(index[c<<2+3] >> 2)
+}
+
+func decimals(index string, c currencyID) int {
+	return int(index[c<<2+3] & 0x03)
+}
--- a/locale/lookup_test.go
+++ b/locale/lookup_test.go
@ -0,0 +1,254 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package locale
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+)
+
+var strdata = []string{
+	"aa  ",
+	"aaa ",
+	"aaaa",
+	"aaab",
+	"aab ",
+	"ab  ",
+	"ba  ",
+	"xxxx",
+}
+
+func strtests() map[string]int {
+	return map[string]int{
+		"    ": 0,
+		"a":    0,
+		"aa":   0,
+		"aaa":  4,
+		"aa ":  0,
+		"aaaa": 8,
+		"aaab": 12,
+		"aaax": 16,
+		"b":    24,
+		"ba":   24,
+		"bbbb": 28,
+	}
+}
+
+func TestSearch(t *testing.T) {
+	for k, v := range strtests() {
+		if i := search(strings.Join(strdata, ""), []byte(k)); i != v {
+			t.Errorf("%s: found %d; want %d", k, i, v)
+		}
+	}
+}
+
+func TestIndex(t *testing.T) {
+	strtests := strtests()
+	strtests["    "] = -1
+	strtests["aaax"] = -1
+	strtests["bbbb"] = -1
+	for k, v := range strtests {
+		if i := index(strings.Join(strdata, ""), []byte(k)); i != v {
+			t.Errorf("%s: found %d; want %d", k, i, v)
+		}
+	}
+}
+
+func b(s string) []byte {
+	return []byte(s)
+}
+
+func TestFixCase(t *testing.T) {
+	tests := []string{
+		"aaaa", "AbCD", "abcd",
+		"Zzzz", "AbCD", "Abcd",
+		"Zzzz", "AbC", "Zzzz",
+		"XXX", "ab ", "XXX",
+		"XXX", "usd", "USD",
+		"cmn", "AB ", "cmn",
+		"gsw", "CMN", "cmn",
+	}
+	for i := 0; i+3 < len(tests); i += 3 {
+		tt := tests[i:]
+		buf := [4]byte{}
+		b := buf[:copy(buf[:], tt[1])]
+		res := fixCase(tt[0], b)
+		if res && cmp(tt[2], b) != 0 || !res && tt[0] != tt[2] {
+			t.Errorf("%s+%s: found %q; want %q", tt[0], tt[1], res, tt[2])
+		}
+	}
+}
+
+func TestLangID(t *testing.T) {
+	tests := []struct{ id, bcp47, iso3, norm string }{
+		{id: "", bcp47: "und", iso3: "und"},
+		{id: "  ", bcp47: "und", iso3: "und"},
+		{id: "   ", bcp47: "und", iso3: "und"},
+		{id: "    ", bcp47: "und", iso3: "und"},
+		{id: "und", bcp47: "und", iso3: "und"},
+		{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
+		{id: "jrb", bcp47: "jrb", iso3: "jrb"},
+		{id: "es", bcp47: "es", iso3: "spa"},
+		{id: "spa", bcp47: "es", iso3: "spa"},
+		{id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
+		{id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
+		{id: "ar", bcp47: "ar", iso3: "ara"},
+		{id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
+		{id: "ar", bcp47: "ar", iso3: "ara"},
+		{id: "kur", bcp47: "ku", iso3: "kur"},
+		{id: "nl", bcp47: "nl", iso3: "nld"},
+		{id: "NL", bcp47: "nl", iso3: "nld"},
+		{id: "gsw", bcp47: "gsw", iso3: "gsw"},
+		{id: "gSW", bcp47: "gsw", iso3: "gsw"},
+		{id: "und", bcp47: "und", iso3: "und"},
+		{id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
+		{id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
+		{id: "no", bcp47: "no", iso3: "nor", norm: "nb"},
+		{id: "nor", bcp47: "no", iso3: "nor", norm: "nb"},
+		{id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
+	}
+	for i, tt := range tests {
+		want := getLangID(b(tt.id))
+		if id := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
+			t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
+		}
+		if len(tt.iso3) == 3 {
+			if id := getLangISO3(b(tt.iso3)); want != id {
+				t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
+			}
+			if id := getLangID(b(tt.iso3)); want != id {
+				t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
+			}
+		}
+		norm := want
+		if tt.norm != "" {
+			norm = getLangID(b(tt.norm))
+		}
+		id := normLang(langOldMap[:], want)
+		id = normLang(langMacroMap[:], id)
+		if id != norm {
+			t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
+		}
+		if id := want.String(); tt.bcp47 != id {
+			t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
+		}
+		if id := want.ISO3(); tt.iso3[:3] != id {
+			t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
+		}
+	}
+}
+
+func TestRegionID(t *testing.T) {
+	tests := []struct {
+		id, iso2, iso3 string
+		m49            int
+	}{
+		{"AA", "AA", "AAA", 958},
+		{"IC", "IC", "", 0},
+		{"ZZ", "ZZ", "ZZZ", 999},
+		{"EU", "EU", "QUU", 967},
+		{"419", "", "", 419},
+	}
+	for i, tt := range tests {
+		want := getRegionID(b(tt.id))
+		if id := getRegionISO2(b(tt.iso2)); len(tt.iso2) == 2 && want != id {
+			t.Errorf("%d:getISO2(%s): found %d; want %d", i, tt.iso2, id, want)
+		}
+		if id := getRegionISO3(b(tt.iso3)); len(tt.iso3) == 3 && want != id {
+			t.Errorf("%d:getISO3(%s): found %d; want %d", i, tt.iso3, id, want)
+		}
+		if id := getRegionID(b(tt.iso3)); len(tt.iso3) == 3 && want != id {
+			t.Errorf("%d:getID3(%s): found %d; want %d", i, tt.iso3, id, want)
+		}
+		if id := getRegionM49(tt.m49); tt.m49 != 0 && want != id {
+			t.Errorf("%d:getM49(%d): found %d; want %d", i, tt.m49, id, want)
+		}
+		if len(tt.iso2) == 2 {
+			if id := want.String(); tt.iso2 != id {
+				t.Errorf("%d:String(): found %s; want %s", i, id, tt.iso2)
+			}
+		} else {
+			if id := want.String(); fmt.Sprintf("%03d", tt.m49) != id {
+				t.Errorf("%d:String(): found %s; want %03d", i, id, tt.m49)
+			}
+		}
+		if id := want.iso3(); tt.iso3 != id {
+			t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3)
+		}
+		if id := int(want.m49()); tt.m49 != id {
+			t.Errorf("%d:m49(): found %d; want %d", i, id, tt.m49)
+		}
+	}
+}
+
+func TestScript(t *testing.T) {
+	idx := "BbbbDdddEeeeZzzz\xff\xff\xff\xff"
+	const und = unknownScript
+	tests := []struct {
+		in  string
+		out scriptID
+	}{
+		{"    ", und},
+		{"      ", und},
+		{"  ", und},
+		{"", und},
+		{"Bbbb", 0},
+		{"Dddd", 1},
+		{"dddd", 1},
+		{"dDDD", 1},
+		{"Eeee", 2},
+		{"Zzzz", 3},
+	}
+	for i, tt := range tests {
+		if id := getScriptID(idx, b(tt.in)); id != tt.out {
+			t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
+		}
+	}
+}
+
+func TestCurrency(t *testing.T) {
+	curInfo := func(round, dec int) string {
+		return string(round<<2 + dec)
+	}
+	idx := strings.Join([]string{
+		"BBB" + curInfo(5, 2),
+		"DDD\x00",
+		"XXX\x00",
+		"ZZZ\x00",
+		"\xff\xff\xff\xff",
+	}, "")
+	const und = unknownCurrency
+	tests := []struct {
+		in         string
+		out        currencyID
+		round, dec int
+	}{
+		{"   ", und, 0, 0},
+		{"     ", und, 0, 0},
+		{" ", und, 0, 0},
+		{"", und, 0, 0},
+		{"BBB", 0, 5, 2},
+		{"DDD", 1, 0, 0},
+		{"dDd", 1, 0, 0},
+		{"ddd", 1, 0, 0},
+		{"XXX", 2, 0, 0},
+		{"Zzz", 3, 0, 0},
+	}
+	for i, tt := range tests {
+		id := getCurrencyID(idx, b(tt.in))
+		if id != tt.out {
+			t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
+		}
+		if id <= 3 {
+			if d := decimals(idx, id); d != tt.dec {
+				t.Errorf("%d:dec(%s): found %d; want %d", i, tt.in, d, tt.dec)
+			}
+			if d := round(idx, id); d != tt.round {
+				t.Errorf("%d:round(%s): found %d; want %d", i, tt.in, d, tt.round)
+			}
+		}
+	}
+}
--- a/locale/maketables.go
+++ b/locale/maketables.go
@ -0,0 +1,931 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Locale identifier table generator.
+// Data read from the web.
+
+package main
+
+import (
+	"bufio"
+	"code.google.com/p/go.text/cldr"
+	"flag"
+	"fmt"
+	"hash"
+	"hash/fnv"
+	"io"
+	"log"
+	"math"
+	"net/http"
+	"os"
+	"path"
+	"reflect"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+var (
+	url = flag.String("cldr",
+		"http://www.unicode.org/Public/cldr/"+cldr.Version+"/core.zip",
+		"URL of CLDR archive.")
+	iana = flag.String("iana",
+		"http://www.iana.org/assignments/language-subtag-registry",
+		"URL of IANA language subtag registry.")
+	test = flag.Bool("test", false,
+		"test existing tables; can be used to compare web data with package data.")
+	localFiles = flag.Bool("local", false,
+		"data files have been copied to the current directory; for debugging only.")
+)
+
+var comment = []string{
+	`
+lang holds an alphabetically sorted list of BCP 47 language identifiers.
+All entries are 4 bytes. The index of the identifier (divided by 4) is the language ID.
+For 2-byte language identifiers, the two successive bytes have the following meaning:
+    - if the first letter of the 2- and 3-letter ISO codes are the same:
+      the second and third letter of the 3-letter ISO code.
+    - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
+For 3-byte language identifiers the 4th byte is 0.`,
+	`
+langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
+in lookup tables. The language ids for these language codes are derived directly
+from the letters and are not consecutive.`,
+	`
+altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
+to 2-letter language codes that cannot be derived using the method described above.
+Each 3-letter code is followed by its 1-byte langID.`,
+	`
+tagAlias holds a mapping from legacy and grandfathered tags to their locale ID.`,
+	`
+langOldMap maps deprecated langIDs to their suggested replacements.`,
+	`
+langMacroMap maps languages to their macro language replacement, if applicable.`,
+	`
+script is an alphabetically sorted list of ISO 15924 codes. The index
+of the script in the string, divided by 4, is the internal script ID.`,
+	`
+isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
+for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
+the UN.M49 codes used for groups.)`,
+	`
+regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
+Each 2-letter codes is followed by two bytes with the following meaning:
+    - [A-Z}{2}: the first letter of the 2-letter code plus these two 
+                letters form the 3-letter ISO code.
+    - 0, n:     index into altRegionISO3.`,
+	`
+m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
+codes indicating collections of regions.`,
+	`
+altRegionISO3 holds a list of 3-letter region codes that cannot be
+mapped to 2-letter codes using the default algorithm. This is a short list.`,
+	`
+altRegionIDs holsd a list of regionIDs the positions of which match those
+of the 3-letter ISO codes in altRegionISO3.`,
+	`
+currency holds an alphabetically sorted list of canonical 3-letter currency identifiers.
+Each identifier is followed by a byte of which the 6 most significant bits
+indicated the rounding and the least 2 significant bits indicate the
+number of decimal positions.`,
+	`
+suppressScript is an index from langID to the dominant script for that language,
+if it exists.  If a script is given, it should be suppressed from the language tag.`,
+	`
+nRegionGroups is the number of region groups.  All regionIDs < nRegionGroups
+are groups.`,
+	`
+regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
+where each set holds all groupings that are directly connected in a region
+containment graph.`,
+	`
+regionInclusionBits is an array of bit vectors where every vector represents
+a set of region groupings.  These sets are used to compute the distance
+between two regions for the purpos of locale matching.`,
+	`
+regionInclusionNext marks, for each entry in regionInclusionBits, the set of
+all groups that are reachable from the groups set in the respective entry.`,
+}
+
+// TODO: consider changing some of these strutures to tries. This can reduce
+// memory, but may increase the need for memory allocations. This could be
+// mitigated if we can piggyback on locale strings for common cases.
+
+func failOnError(e error) {
+	if e != nil {
+		log.Panic(e)
+	}
+}
+
+type setType int
+
+const (
+	Indexed setType = 1 + iota // all elements must be of same size
+	Linear
+)
+
+type stringSet struct {
+	s              []string
+	sorted, frozen bool
+
+	// We often need to update values after the creation of an index is completed.
+	// We include a convenience map for keeping track of this.
+	update map[string]string
+	typ    setType // used for checking.
+}
+
+func (ss *stringSet) clone() stringSet {
+	c := *ss
+	c.s = append([]string(nil), c.s...)
+	return c
+}
+
+func (ss *stringSet) setType(t setType) {
+	if ss.typ != t && ss.typ != 0 {
+		log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ)
+	}
+}
+
+// parse parses a whitespace-separated string and initializes ss with its
+// components.
+func (ss *stringSet) parse(s string) {
+	scan := bufio.NewScanner(strings.NewReader(s))
+	scan.Split(bufio.ScanWords)
+	for scan.Scan() {
+		ss.add(scan.Text())
+	}
+}
+
+func (ss *stringSet) assertChangeable() {
+	if ss.frozen {
+		log.Panic("attempt to modify a frozen stringSet")
+	}
+}
+
+func (ss *stringSet) add(s string) {
+	ss.assertChangeable()
+	ss.s = append(ss.s, s)
+	ss.sorted = ss.frozen
+}
+
+func (ss *stringSet) freeze() {
+	ss.compact()
+	ss.frozen = true
+}
+
+func (ss *stringSet) compact() {
+	if ss.sorted {
+		return
+	}
+	a := ss.s
+	sort.Strings(a)
+	k := 0
+	for i := 1; i < len(a); i++ {
+		if a[k] != a[i] {
+			a[k+1] = a[i]
+			k++
+		}
+	}
+	ss.s = a[:k+1]
+	ss.sorted = ss.frozen
+}
+
+type funcSorter struct {
+	fn func(a, b string) bool
+	sort.StringSlice
+}
+
+func (s funcSorter) Less(i, j int) bool {
+	return s.fn(s.StringSlice[i], s.StringSlice[j])
+}
+
+func (ss *stringSet) sortFunc(f func(a, b string) bool) {
+	ss.compact()
+	sort.Sort(funcSorter{f, sort.StringSlice(ss.s)})
+}
+
+func (ss *stringSet) remove(s string) {
+	ss.assertChangeable()
+	if i, ok := ss.find(s); ok {
+		copy(ss.s[i:], ss.s[i+1:])
+		ss.s = ss.s[:len(ss.s)-1]
+	}
+}
+
+func (ss *stringSet) replace(ol, nu string) {
+	ss.s[ss.index(ol)] = nu
+	ss.sorted = ss.frozen
+}
+
+func (ss *stringSet) index(s string) int {
+	ss.setType(Indexed)
+	i, ok := ss.find(s)
+	if !ok {
+		if i < len(ss.s) {
+			log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i])
+		}
+		log.Panicf("find: item %q is not in list", s)
+
+	}
+	return i
+}
+
+func (ss *stringSet) find(s string) (int, bool) {
+	ss.compact()
+	i := sort.SearchStrings(ss.s, s)
+	return i, i != len(ss.s) && ss.s[i] == s
+}
+
+func (ss *stringSet) slice() []string {
+	ss.compact()
+	return ss.s
+}
+
+func (ss *stringSet) updateLater(v, key string) {
+	if ss.update == nil {
+		ss.update = map[string]string{}
+	}
+	ss.update[v] = key
+}
+
+// join joins the string and ensures that all entries are of the same length.
+func (ss *stringSet) join() string {
+	ss.setType(Indexed)
+	n := len(ss.s[0])
+	for _, s := range ss.s {
+		if len(s) != n {
+			log.Panic("join: not all entries are of the same length")
+		}
+	}
+	ss.s = append(ss.s, strings.Repeat("\xff", n))
+	return strings.Join(ss.s, "")
+}
+
+// ianaEntry holds information for an entry in the IANA Language Subtag Repository.
+// All types use the same entry.
+// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various
+// fields.
+type ianaEntry struct {
+	typ            string
+	tag            string
+	description    []string
+	scope          string
+	added          string
+	preferred      string
+	deprecated     string
+	suppressScript string
+	macro          string
+	prefix         []string
+}
+
+type builder struct {
+	w      io.Writer   // multi writer
+	out    io.Writer   // set to Stdout
+	hash32 hash.Hash32 // for checking whether tables have changed.
+	size   int
+	data   *cldr.CLDR
+	supp   *cldr.SupplementalData
+
+	// indices
+	locale      stringSet // common locales
+	lang        stringSet // canonical language ids (2 or 3 letter ISO codes) with data
+	langNoIndex stringSet // 3-letter ISO codes with no associated data
+	script      stringSet // 4-letter ISO codes
+	region      stringSet // 2-letter ISO or 3-digit UN M49 codes
+	currency    stringSet // 3-letter ISO currency codes
+
+	// langInfo
+	registry map[string]*ianaEntry
+}
+
+func openReader(url *string) io.ReadCloser {
+	if *localFiles {
+		pwd, _ := os.Getwd()
+		*url = "file://" + path.Join(pwd, path.Base(*url))
+	}
+	t := &http.Transport{}
+	t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
+	c := &http.Client{Transport: t}
+	resp, err := c.Get(*url)
+	failOnError(err)
+	if resp.StatusCode != 200 {
+		log.Fatalf(`bad GET status for "%s": %s`, *url, resp.Status)
+	}
+	return resp.Body
+}
+
+func newBuilder() *builder {
+	r := openReader(url)
+	defer r.Close()
+	d := &cldr.Decoder{}
+	d.SetDirFilter("supplemental")
+	data, err := d.DecodeZip(r)
+	failOnError(err)
+	b := builder{
+		out:    os.Stdout,
+		data:   data,
+		supp:   data.Supplemental(),
+		hash32: fnv.New32(),
+	}
+	b.w = io.MultiWriter(b.out, b.hash32)
+	b.parseRegistry()
+	return &b
+}
+
+func (b *builder) parseRegistry() {
+	r := openReader(iana)
+	defer r.Close()
+	b.registry = make(map[string]*ianaEntry)
+
+	scan := bufio.NewScanner(r)
+	scan.Split(bufio.ScanWords)
+	var record *ianaEntry
+	for more := scan.Scan(); more; {
+		key := scan.Text()
+		more = scan.Scan()
+		value := scan.Text()
+		switch key {
+		case "Type:":
+			record = &ianaEntry{typ: value}
+		case "Subtag:", "Tag:":
+			record.tag = value
+			if info, ok := b.registry[value]; ok {
+				if info.typ != "language" || record.typ != "extlang" {
+					log.Fatalf("parseRegistry: tag %q already exists", value)
+				}
+			} else {
+				b.registry[value] = record
+			}
+		case "Suppress-Script:":
+			record.suppressScript = value
+		case "Added:":
+			record.added = value
+		case "Deprecated:":
+			record.deprecated = value
+		case "Macrolanguage:":
+			record.macro = value
+		case "Preferred-Value:":
+			record.preferred = value
+		case "Prefix:":
+			record.prefix = append(record.prefix, value)
+		case "Scope:":
+			record.scope = value
+		case "Description:":
+			buf := []byte(value)
+			for more = scan.Scan(); more; more = scan.Scan() {
+				b := scan.Bytes()
+				if b[0] == '%' || b[len(b)-1] == ':' {
+					break
+				}
+				buf = append(buf, ' ')
+				buf = append(buf, b...)
+			}
+			record.description = append(record.description, string(buf))
+			continue
+		default:
+			continue
+		}
+		more = scan.Scan()
+	}
+	if scan.Err() != nil {
+		log.Panic(scan.Err())
+	}
+}
+
+var commentIndex = make(map[string]string)
+
+func init() {
+	for _, s := range comment {
+		key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
+		commentIndex[key] = strings.Replace(s, "\n", "\n// ", -1)
+	}
+}
+
+func (b *builder) comment(name string) {
+	fmt.Fprintln(b.out, commentIndex[name])
+}
+
+func (b *builder) pf(f string, x ...interface{}) {
+	fmt.Fprintf(b.w, f, x...)
+	fmt.Fprint(b.w, "\n")
+}
+
+func (b *builder) p(x ...interface{}) {
+	fmt.Fprintln(b.w, x...)
+}
+
+func (b *builder) addSize(s int) {
+	b.size += s
+	b.pf("// Size: %d bytes", s)
+}
+
+func (b *builder) addArraySize(s, n int) {
+	b.size += s
+	b.pf("// Size: %d bytes, %d elements", s, n)
+}
+
+func (b *builder) writeConst(name string, x interface{}) {
+	b.comment(name)
+	b.pf("const %s = %v", name, x)
+}
+
+func (b *builder) writeSlice(name string, ss interface{}) {
+	b.comment(name)
+	v := reflect.ValueOf(ss)
+	t := v.Type().Elem()
+	b.addArraySize(v.Len()*int(t.Size()), v.Len())
+	fmt.Fprintf(b.w, `var %s = [%d]%s{`, name, v.Len(), t)
+	for i := 0; i < v.Len(); i++ {
+		if t.Kind() == reflect.Struct {
+			fmt.Fprintf(b.w, "\n\t%#v, ", v.Index(i).Interface())
+		} else {
+			if i%12 == 0 {
+				fmt.Fprintf(b.w, "\n\t")
+			}
+			fmt.Fprintf(b.w, "%d, ", v.Index(i).Interface())
+		}
+	}
+	b.p("\n}")
+}
+
+// writeStringSlice writes a slice of strings. This produces a lot
+// of overhead. It should typically only be used for debugging.
+// TODO: remove
+func (b *builder) writeStringSlice(name string, ss []string) {
+	b.comment(name)
+	t := reflect.TypeOf(ss).Elem()
+	sz := len(ss) * int(t.Size())
+	for _, s := range ss {
+		sz += len(s)
+	}
+	b.addArraySize(sz, len(ss))
+	b.pf(`var %s = [%d]%s{`, name, len(ss), t)
+	for i := 0; i < len(ss); i++ {
+		b.pf("\t%q,", ss[i])
+	}
+	b.p("}")
+}
+
+func (b *builder) writeString(name, s string) {
+	b.comment(name)
+	b.addSize(len(s) + int(reflect.TypeOf(s).Size()))
+	if len(s) < 40 {
+		b.pf(`var %s string = %q`, name, s)
+		return
+	}
+	const cpl = 60
+	b.pf(`var %s string = "" +`, name)
+	for {
+		n := cpl
+		if n > len(s) {
+			n = len(s)
+		}
+		var q string
+		for {
+			q = strconv.Quote(s[:n])
+			if len(q) <= cpl+2 {
+				break
+			}
+			n--
+		}
+		if n < len(s) {
+			b.pf(`	%s +`, q)
+			s = s[n:]
+		} else {
+			b.pf(`	%s`, q)
+			break
+		}
+	}
+}
+
+const base = 'z' - 'a' + 1
+
+func strToInt(s string) uint {
+	v := uint(0)
+	for i := 0; i < len(s); i++ {
+		v *= base
+		v += uint(s[i] - 'a')
+	}
+	return v
+}
+
+func (b *builder) writeBitVector(name string, ss []string) {
+	vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8)))
+	for _, s := range ss {
+		v := strToInt(s)
+		vec[v/8] |= 1 << (v % 8)
+	}
+	b.writeSlice(name, vec)
+}
+
+// TODO: convert this type into a list or two-stage trie.
+func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) {
+	b.comment(name)
+	v := reflect.ValueOf(m)
+	sz := v.Len() * (2 + int(v.Type().Key().Size()))
+	for _, k := range m {
+		sz += len(k)
+	}
+	b.addSize(sz)
+	keys := []string{}
+	b.pf(`var %s = map[string]uint16{`, name)
+	for k := range m {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	for _, k := range keys {
+		b.pf("\t%q: %v,", k, f(m[k]))
+	}
+	b.p("}")
+}
+
+func (b *builder) langIndex(s string) uint16 {
+	if i, ok := b.lang.find(s); ok {
+		return uint16(i)
+	}
+	return uint16(strToInt(s)) + uint16(len(b.lang.s))
+}
+
+// inc advances the string to its lexicographical successor.
+func inc(s string) string {
+	i := len(s) - 1
+	for ; s[i]+1 > 'z'; i-- {
+	}
+	return fmt.Sprintf("%s%s%s", s[:i], string(s[i]+1), s[i+1:])
+}
+
+func (b *builder) parseIndices() {
+	meta := b.supp.Metadata
+
+	for k, v := range b.registry {
+		var ss *stringSet
+		switch v.typ {
+		case "language":
+			if len(k) == 2 || v.suppressScript != "" || v.scope == "special" {
+				b.lang.add(k)
+				continue
+			} else {
+				ss = &b.langNoIndex
+			}
+		case "region":
+			ss = &b.region
+		case "script":
+			ss = &b.script
+		default:
+			continue
+		}
+		if s := strings.SplitN(k, "..", 2); len(s) > 1 {
+			for a := s[0]; a <= s[1]; a = inc(a) {
+				ss.add(a)
+			}
+		} else {
+			ss.add(k)
+		}
+	}
+
+	// currency codes
+	for _, reg := range b.supp.CurrencyData.Region {
+		for _, cur := range reg.Currency {
+			b.currency.add(cur.Iso4217)
+		}
+	}
+
+	// common locales
+	b.locale.parse(meta.DefaultContent.Locales)
+}
+
+// writeLanguage generates all tables needed for language canonicalization.
+func (b *builder) writeLanguage() {
+	meta := b.supp.Metadata
+
+	b.writeConst("unknownLang", b.lang.index("und"))
+
+	// Get language codes that need to be mapped (overlong 3-letter codes, deprecated
+	// 2-letter codes and grandfathered tags.
+	langOldMap := stringSet{}
+
+	// Mappings for macro languages
+	langMacroMap := stringSet{}
+
+	// altLangISO3 get the alternative ISO3 names that need to be mapped.
+	altLangISO3 := stringSet{}
+
+	// legacyTag maps from tag to language code.
+	legacyTag := make(map[string]string)
+
+	lang := b.lang.clone()
+	for _, a := range meta.Alias.LanguageAlias {
+		if a.Replacement == "" {
+			a.Replacement = "und"
+		}
+		// TODO: support mapping to tags
+		repl := strings.SplitN(a.Replacement, "_", 2)[0]
+		if a.Reason == "overlong" {
+			if len(a.Replacement) == 2 && len(a.Type) == 3 {
+				lang.updateLater(a.Replacement, a.Type)
+			}
+		} else if len(a.Type) <= 3 {
+			if a.Reason != "deprecated" {
+				langMacroMap.add(a.Type)
+				langMacroMap.updateLater(a.Type, repl)
+			}
+		} else {
+			legacyTag[strings.Replace(a.Type, "_", "-", -1)] = repl
+		}
+	}
+	for k, v := range b.registry {
+		// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
+		if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
+			langOldMap.add(k)
+			langOldMap.updateLater(k, v.preferred)
+		}
+	}
+	// Fix CLDR mappings.
+	lang.updateLater("tl", "tgl")
+	lang.updateLater("sh", "hbs")
+	lang.updateLater("mo", "mol")
+	lang.updateLater("no", "nor")
+	lang.updateLater("tw", "twi")
+	lang.updateLater("nb", "nob")
+	lang.updateLater("ak", "aka")
+
+	// Ensure that each 2-letter code is matched with a 3-letter code.
+	for _, v := range lang.s {
+		s, ok := lang.update[v]
+		if !ok {
+			if s, ok = lang.update[langOldMap.update[v]]; !ok {
+				continue
+			}
+			lang.update[v] = s
+		}
+		if v[0] != s[0] {
+			altLangISO3.add(s)
+			altLangISO3.updateLater(s, v)
+		}
+	}
+
+	// Complete canonialized language tags.
+	lang.freeze()
+	for i, v := range lang.s {
+		// We can avoid these manual entries by using the IANI registry directly.
+		// Seems easier to update the list manually, as changes are rare.
+		// The panic in this loop will trigger if we miss an entry.
+		add := ""
+		if s, ok := lang.update[v]; ok {
+			if s[0] == v[0] {
+				add = s[1:]
+			} else {
+				add = string([]byte{0, byte(altLangISO3.index(s))})
+			}
+		} else if len(v) == 3 {
+			add = "\x00"
+		} else {
+			log.Panicf("no data for long form of %q", v)
+		}
+		lang.s[i] += add
+	}
+	b.writeString("lang", lang.join())
+
+	b.writeConst("langNoIndexOffset", len(b.lang.s))
+
+	// space of all valid 3-letter language identifiers.
+	b.writeBitVector("langNoIndex", b.langNoIndex.slice())
+
+	for i, s := range altLangISO3.slice() {
+		idx := b.lang.index(altLangISO3.update[s])
+		altLangISO3.s[i] += string([]byte{byte(idx)})
+	}
+	b.writeString("altLangISO3", altLangISO3.join())
+
+	makeMap := func(name string, ss *stringSet) {
+		ss.sortFunc(func(i, j string) bool {
+			return b.langIndex(i) < b.langIndex(j)
+		})
+		m := []struct{ from, to uint16 }{}
+		for _, s := range ss.s {
+			m = append(m, struct{ from, to uint16 }{
+				b.langIndex(s),
+				b.langIndex(ss.update[s]),
+			})
+		}
+		b.writeSlice(name, m)
+	}
+	makeMap("langOldMap", &langOldMap)
+	makeMap("langMacroMap", &langMacroMap)
+
+	b.writeMapFunc("tagAlias", legacyTag, func(s string) uint16 {
+		return uint16(b.langIndex(s))
+	})
+}
+
+func (b *builder) writeScript() {
+	unknown := uint8(b.script.index("Zzzz"))
+	b.writeConst("unknownScript", unknown)
+	b.writeString("script", b.script.join())
+
+	supp := make([]uint8, len(b.lang.slice()))
+	for i, v := range b.lang.slice() {
+		supp[i] = unknown
+		if sc := b.registry[v].suppressScript; sc != "" {
+			supp[i] = uint8(b.script.index(sc))
+		}
+	}
+	b.writeSlice("suppressScript", supp)
+}
+
+func parseM49(s string) uint16 {
+	if len(s) == 0 {
+		return 0
+	}
+	v, err := strconv.ParseUint(s, 10, 10)
+	failOnError(err)
+	return uint16(v)
+}
+
+func (b *builder) writeRegion() {
+	b.writeConst("unknownRegion", b.region.index("ZZ"))
+
+	isoOffset := b.region.index("AA")
+	m49map := make([]uint16, len(b.region.slice()))
+	altRegionISO3 := ""
+	altRegionIDs := []uint16{}
+
+	b.writeConst("isoRegionOffset", isoOffset)
+
+	// 2-letter region lookup and mapping to numeric codes.
+	regionISO := b.region.clone()
+	regionISO.s = regionISO.s[isoOffset:]
+	regionISO.sorted = false
+	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
+		i := regionISO.index(tc.Type)
+		if len(tc.Alpha3) == 3 {
+			if tc.Alpha3[0] == tc.Type[0] {
+				regionISO.s[i] += tc.Alpha3[1:]
+			} else {
+				regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))})
+				altRegionISO3 += tc.Alpha3
+				altRegionIDs = append(altRegionIDs, uint16(isoOffset+i))
+			}
+		}
+		if d := m49map[isoOffset+i]; d != 0 {
+			log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d)
+		}
+		m49map[isoOffset+i] = parseM49(tc.Numeric)
+	}
+	for i, s := range regionISO.s {
+		if len(s) != 4 {
+			regionISO.s[i] = s + "  "
+		}
+	}
+	b.writeString("regionISO", regionISO.join())
+	b.writeString("altRegionISO3", altRegionISO3)
+	b.writeSlice("altRegionIDs", altRegionIDs)
+
+	// 3-digit region lookup, groupings.
+	for i := 0; i < isoOffset; i++ {
+		m49map[i] = parseM49(b.region.s[i])
+	}
+	b.writeSlice("m49", m49map)
+}
+
+func (b *builder) writeLocale() {
+	b.writeStringSlice("locale", b.locale.slice())
+}
+
+func (b *builder) writeLanguageInfo() {
+}
+
+func (b *builder) writeCurrencies() {
+	unknown := b.currency.index("XXX")
+	digits := map[string]uint64{}
+	rounding := map[string]uint64{}
+	for _, info := range b.supp.CurrencyData.Fractions[0].Info {
+		var err error
+		digits[info.Iso4217], err = strconv.ParseUint(info.Digits, 10, 2)
+		failOnError(err)
+		rounding[info.Iso4217], err = strconv.ParseUint(info.Rounding, 10, 6)
+		failOnError(err)
+	}
+	for i, cur := range b.currency.slice() {
+		d := uint64(2) // default number of decimal positions
+		if dd, ok := digits[cur]; ok {
+			d = dd
+		}
+		var r uint64
+		if r = rounding[cur]; r == 0 {
+			r = 1 // default rounding increment in units 10^{-digits)
+		}
+		b.currency.s[i] += string([]byte{byte(r<<2 + d)})
+	}
+	b.writeString("currency", b.currency.join())
+	// Hack alert: gofmt indents a trailing comment after an indented string.
+	// Write this constant after currency to force a proper indentation of
+	// the final comment.
+	b.writeConst("unknownCurrency", unknown)
+}
+
+func (b *builder) writeRegionInclusionData() {
+	type index uint
+	groups := make(map[int]index)
+	// Create group indices.
+	for i := 0; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID.
+		groups[i] = index(i)
+	}
+	for _, g := range b.supp.TerritoryContainment.Group {
+		group := b.region.index(g.Type)
+		if _, ok := groups[group]; !ok {
+			groups[group] = index(len(groups))
+		}
+	}
+	if len(groups) > 32 {
+		log.Fatalf("only 32 groups supported, found %d", len(groups))
+	}
+	b.writeConst("nRegionGroups", len(groups))
+	mm := make(map[int][]index)
+	for _, g := range b.supp.TerritoryContainment.Group {
+		group := b.region.index(g.Type)
+		for _, mem := range strings.Split(g.Contains, " ") {
+			r := b.region.index(mem)
+			mm[r] = append(mm[r], groups[group])
+			if g, ok := groups[r]; ok {
+				mm[group] = append(mm[group], g)
+			}
+		}
+	}
+	regionInclusion := make([]uint8, len(b.region.s))
+	bvs := make(map[uint32]index)
+	// Make the first bitvector positions correspond with the groups.
+	for r, i := range groups {
+		bv := uint32(1 << i)
+		for _, g := range mm[r] {
+			bv |= 1 << g
+		}
+		bvs[bv] = i
+		regionInclusion[r] = uint8(bvs[bv])
+	}
+	for r := 0; r < len(b.region.s); r++ {
+		if _, ok := groups[r]; !ok {
+			bv := uint32(0)
+			for _, g := range mm[r] {
+				bv |= 1 << g
+			}
+			if bv == 0 {
+				// Pick the world for unspecified regions.
+				bv = 1 << groups[b.region.index("001")]
+			}
+			if _, ok := bvs[bv]; !ok {
+				bvs[bv] = index(len(bvs))
+			}
+			regionInclusion[r] = uint8(bvs[bv])
+		}
+	}
+	b.writeSlice("regionInclusion", regionInclusion)
+	regionInclusionBits := make([]uint32, len(bvs))
+	for k, v := range bvs {
+		regionInclusionBits[v] = uint32(k)
+	}
+	// Add bit vectors for increasingly large distances until a fixed point is reached.
+	regionInclusionNext := []uint8{}
+	for i := 0; i < len(regionInclusionBits); i++ {
+		bits := regionInclusionBits[i]
+		next := bits
+		for i := uint(0); i < uint(len(groups)); i++ {
+			if bits&(1<<i) != 0 {
+				next |= regionInclusionBits[i]
+			}
+		}
+		if _, ok := bvs[next]; !ok {
+			bvs[next] = index(len(bvs))
+			regionInclusionBits = append(regionInclusionBits, next)
+		}
+		regionInclusionNext = append(regionInclusionNext, uint8(bvs[next]))
+	}
+	b.writeSlice("regionInclusionBits", regionInclusionBits)
+	b.writeSlice("regionInclusionNext", regionInclusionNext)
+}
+
+var header = `// Generated by running
+//		maketables -url=%s -iana=%s
+// DO NOT EDIT
+
+package locale
+`
+
+func main() {
+	flag.Parse()
+	b := newBuilder()
+	fmt.Fprintf(b.out, header, *url, *iana)
+
+	b.parseIndices()
+	b.writeLanguage()
+	b.writeScript()
+	b.writeRegion()
+	// TODO: b.writeLocale()
+	b.writeCurrencies()
+	b.writeRegionInclusionData()
+
+	fmt.Fprintf(b.out, "\n// Size: %.1fK (%d bytes); Check: %X\n", float32(b.size)/1024, b.size, b.hash32.Sum32())
+}
--- a/locale/match.go
+++ b/locale/match.go
@ -0,0 +1,29 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package locale
+
+// regionDistance computes the distance between two regions based
+// on the distance in the graph of region containments as defined in CLDR.
+// It iterates over increasingly inclusive sets of groups, represented as
+// bit vectors, until the source bit vector has bits in common with the
+// destination vector.
+func regionDistance(a, b regionID) int {
+	if a == b {
+		return 0
+	}
+	p, q := regionInclusion[a], regionInclusion[b]
+	if p < nRegionGroups {
+		p, q = q, p
+	}
+	set := regionInclusionBits
+	if q < nRegionGroups && set[p]&(1<<q) != 0 {
+		return 1
+	}
+	d := 2
+	for goal := set[q]; set[p]&goal == 0; p = regionInclusionNext[p] {
+		d++
+	}
+	return d
+}
--- a/locale/match_test.go
+++ b/locale/match_test.go
@ -0,0 +1,36 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package locale
+
+import "testing"
+
+func TestRegionDistance(t *testing.T) {
+	tests := []struct {
+		a, b string
+		d    int
+	}{
+		{"NL", "NL", 0},
+		{"NL", "EU", 1},
+		{"EU", "NL", 1},
+		{"005", "005", 0},
+		{"NL", "BE", 2},
+		{"CO", "005", 1},
+		{"005", "CO", 1},
+		{"CO", "419", 2},
+		{"419", "CO", 2},
+		{"005", "419", 1},
+		{"419", "005", 1},
+		{"001", "013", 2},
+		{"013", "001", 2},
+		{"CO", "CW", 4},
+		{"CO", "PW", 6},
+		{"CO", "BV", 6},
+	}
+	for i, tt := range tests {
+		if d := regionDistance(getRegionID([]byte(tt.a)), getRegionID([]byte(tt.b))); d != tt.d {
+			t.Errorf("%d: d(%s, %s) = %v; want %v", i, tt.a, tt.b, d, tt.d)
+		}
+	}
+}
--- a/locale/parse.go
+++ b/locale/parse.go
@ -0,0 +1,557 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package locale
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"sort"
+	"strings"
+)
+
+// isAlpha returns true if the byte is not a digit.
+// b must be an ASCII letter or digit.
+func isAlpha(b byte) bool {
+	return b > '9'
+}
+
+// isAlphaNum returns true if the string contains ASCII letters or digits.
+func isAlphaNum(s []byte) bool {
+	for _, c := range s {
+		if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
+			return false
+		}
+	}
+	return true
+}
+
+var (
+	errEmpty    = errors.New("locale: empty locale identifier")
+	errInvalid  = errors.New("locale: invalid")
+	errTrailSep = errors.New("locale: trailing separator")
+)
+
+// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
+type scanner struct {
+	b     []byte
+	bytes [64]byte // small buffer to cover most common cases
+	token []byte
+	start int // start position of the current token
+	end   int // end position of the current token
+	next  int // next point for scan
+	err   error
+	done  bool
+}
+
+func makeScannerString(s string) scanner {
+	scan := scanner{}
+	if len(s) <= len(scan.bytes) {
+		scan.b = scan.bytes[:copy(scan.bytes[:], s)]
+	} else {
+		scan.b = []byte(s)
+	}
+	scan.init()
+	return scan
+}
+
+func (s *scanner) init() {
+	for i, c := range s.b {
+		if c == '_' {
+			s.b[i] = '-'
+		}
+	}
+	s.scan()
+}
+
+// restToLower converts the string between start and end to lower case.
+func (s *scanner) toLower(start, end int) {
+	for i := start; i < end; i++ {
+		c := s.b[i]
+		if 'A' <= c && c <= 'Z' {
+			s.b[i] += 'a' - 'A'
+		}
+	}
+}
+
+func (s *scanner) setError(e error) {
+	if s.err == nil {
+		s.err = e
+	}
+}
+
+func (s *scanner) setErrorf(f string, x ...interface{}) {
+	s.setError(fmt.Errorf(f, x...))
+}
+
+// replace replaces the current token with repl.
+func (s *scanner) replace(repl string) {
+	if end := s.start + len(repl); end != s.end {
+		diff := end - s.end
+		if end < cap(s.b) {
+			b := make([]byte, len(s.b)+diff)
+			copy(b, s.b[:s.start])
+			copy(b[end:], s.b[s.end:])
+			s.b = b
+		} else {
+			s.b = append(s.b[end:], s.b[s.end:]...)
+		}
+		s.next += diff
+		s.end = end
+	}
+	copy(s.b[s.start:], repl)
+}
+
+// gobble removes the current token from the input.
+// Caller must call scan after calling gobble.
+func (s *scanner) gobble() {
+	if s.start == 0 {
+		s.b = s.b[:+copy(s.b, s.b[s.next:])]
+		s.end = 0
+	} else {
+		s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
+		s.end = s.start - 1
+	}
+	s.next = s.start
+}
+
+// scan parses the next token of a BCP 47 string.  Tokens that are larger
+// than 8 characters or include non-alphanumeric characters result in an error
+// and are gobbled and removed from the output.
+// It returns the end position of the last token consumed.
+func (s *scanner) scan() (end int) {
+	end = s.end
+	s.token = nil
+	for s.start = s.next; s.next < len(s.b); {
+		i := bytes.IndexByte(s.b[s.next:], '-')
+		if i == -1 {
+			s.end = len(s.b)
+			s.next = len(s.b)
+			i = s.end - s.start
+		} else {
+			s.end = s.next + i
+			s.next = s.end + 1
+		}
+		token := s.b[s.start:s.end]
+		if i < 1 || i > 8 || !isAlphaNum(token) {
+			s.setErrorf("locale: invalid token %q", token)
+			s.gobble()
+			continue
+		}
+		s.token = token
+		return end
+	}
+	if n := len(s.b); n > 0 && s.b[n-1] == '-' {
+		s.setError(errTrailSep)
+		s.b = s.b[:len(s.b)-1]
+	}
+	s.done = true
+	return end
+}
+
+// acceptMinSize parses multiple tokens of the given size or greater.
+// It returns the end position of the last token consumed.
+func (s *scanner) acceptMinSize(min int) (end int) {
+	end = s.end
+	s.scan()
+	for ; len(s.token) >= min; s.scan() {
+		end = s.end
+	}
+	return end
+}
+
+// Parse parses the given BCP 47 string and returns a valid ID.
+// If parsing failed it returns an error and any part of the identifier
+// that could be parsed.
+// If parsing succeeded but an unknown option was found, it
+// returns the valid Locale and an error.
+// It accepts identifiers in the BCP 47 format and extensions to this standard
+// defined in
+// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+func Parse(s string) (loc ID, err error) {
+	// TODO: consider supporting old-style locale key-value pairs.
+	if s == "" {
+		return und, errEmpty
+	}
+	loc = und
+	if lang, ok := tagAlias[s]; ok {
+		loc.lang = langID(lang)
+		return
+	}
+	scan := makeScannerString(s)
+	if len(scan.token) >= 4 {
+		if !strings.EqualFold(s, "root") {
+			return und, errInvalid
+		}
+		return und, nil
+	}
+	return parse(&scan, s)
+}
+
+func parse(scan *scanner, s string) (loc ID, err error) {
+	loc = und
+	var end int
+	private := false
+	if n := len(scan.token); n <= 1 {
+		scan.toLower(0, len(scan.b))
+		end = parsePrivate(scan)
+		private = end > 0
+	} else if n >= 4 {
+		return und, errInvalid
+	} else { // the usual case
+		loc, end = parseTag(scan)
+		if n := len(scan.token); n == 1 {
+			loc.pExt = uint16(end)
+			end = parseExtensions(scan)
+			if end-int(loc.pExt) <= 1 {
+				loc.pExt = 0
+			}
+		}
+	}
+	if end < len(scan.b) {
+		scan.setErrorf("locale: invalid parts %q", scan.b[end:])
+		scan.b = scan.b[:end]
+	}
+	if len(scan.b) <= len(s) {
+		s = s[:len(scan.b)]
+	}
+	if len(s) > 0 && cmp(s, scan.b) == 0 {
+		loc.str = &s
+	} else if loc.pVariant > 0 || loc.pExt > 0 || private {
+		s = string(scan.b)
+		loc.str = &s
+	}
+	return loc, scan.err
+}
+
+// parseTag parses language, script, region and variants.
+// It returns an ID and the end position in the input that was parsed.
+func parseTag(scan *scanner) (ID, int) {
+	loc := und
+	// TODO: set an error if an unknown lang, script or region is encountered.
+	loc.lang = getLangID(scan.token)
+	scan.replace(loc.lang.String())
+	langStart := scan.start
+	end := scan.scan()
+	for len(scan.token) == 3 && isAlpha(scan.token[0]) {
+		// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
+		// to a tag of the form <extlang>.
+		if lang := getLangID(scan.token); lang != unknownLang {
+			loc.lang = lang
+			copy(scan.b[langStart:], lang.String())
+			scan.b[langStart+3] = '-'
+			scan.start = langStart + 4
+		}
+		scan.gobble()
+		end = scan.scan()
+	}
+	if len(scan.token) == 4 && isAlpha(scan.token[0]) {
+		loc.script = getScriptID(script, scan.token)
+		if loc.script == unknownScript {
+			scan.gobble()
+		}
+		end = scan.scan()
+	}
+	if n := len(scan.token); n >= 2 && n <= 3 {
+		loc.region = getRegionID(scan.token)
+		if loc.region == unknownRegion {
+			scan.gobble()
+		} else {
+			scan.replace(loc.region.String())
+		}
+		end = scan.scan()
+	}
+	scan.toLower(scan.start, len(scan.b))
+	start := scan.start
+	end = parseVariants(scan, end)
+	if start < end {
+		loc.pVariant = byte(start)
+		loc.pExt = uint16(end)
+	}
+	return loc, end
+}
+
+// parseVariants scans tokens as long as each token is a valid variant string.
+// Duplicate variants are removed.
+func parseVariants(scan *scanner, end int) int {
+	start := scan.start
+	for ; len(scan.token) >= 4; scan.scan() {
+		// TODO: validate and sort variants
+		if bytes.Index(scan.b[start:scan.start], scan.token) != -1 {
+			scan.gobble()
+			continue
+		}
+		end = scan.end
+		const maxVariantSize = 60000 // more than enough, ensures pExt will be valid.
+		if end > maxVariantSize {
+			break
+		}
+	}
+	return end
+}
+
+type bytesSort [][]byte
+
+func (b bytesSort) Len() int {
+	return len(b)
+}
+
+func (b bytesSort) Swap(i, j int) {
+	b[i], b[j] = b[j], b[i]
+}
+
+func (b bytesSort) Less(i, j int) bool {
+	return bytes.Compare(b[i], b[j]) == -1
+}
+
+// parseExtensions parses and normalizes the extensions in the buffer.
+// It returns the last position of scan.b that is part of any extension.
+func parseExtensions(scan *scanner) int {
+	start := scan.start
+	exts := [][]byte{}
+	private := []byte{}
+	end := scan.end
+	for len(scan.token) == 1 {
+		start := scan.start
+		extension := []byte{}
+		ext := scan.token[0]
+		switch ext {
+		case 'u':
+			attrEnd := scan.acceptMinSize(3)
+			end = attrEnd
+			var key []byte
+			for last := []byte{}; len(scan.token) == 2; last = key {
+				key = scan.token
+				end = scan.acceptMinSize(3)
+				// TODO: check key value validity
+				if bytes.Compare(key, last) != 1 {
+					p := attrEnd + 1
+					scan.next = p
+					keys := [][]byte{}
+					for scan.scan(); len(scan.token) == 2; {
+						keyStart := scan.start
+						end = scan.acceptMinSize(3)
+						keys = append(keys, scan.b[keyStart:end])
+					}
+					sort.Sort(bytesSort(keys))
+					copy(scan.b[p:], bytes.Join(keys, []byte{'-'}))
+					break
+				}
+			}
+		case 't':
+			scan.scan()
+			if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
+				_, end = parseTag(scan)
+				scan.toLower(start, end)
+			}
+			for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
+				end = scan.acceptMinSize(3)
+			}
+		case 'x':
+			end = scan.acceptMinSize(1)
+		default:
+			end = scan.acceptMinSize(2)
+		}
+		extension = scan.b[start:end]
+		if len(extension) < 3 {
+			scan.setErrorf("locale: empty extension %q", string(ext))
+			continue
+		} else if len(exts) == 0 && (ext == 'x' || scan.next >= len(scan.b)) {
+			return end
+		} else if ext == 'x' {
+			private = extension
+			break
+		}
+		exts = append(exts, extension)
+	}
+	if scan.next < len(scan.b) {
+		scan.setErrorf("locale: invalid trailing characters %q", scan.b[scan.end:])
+	}
+	sort.Sort(bytesSort(exts))
+	if len(private) > 0 {
+		exts = append(exts, private)
+	}
+	scan.b = append(scan.b[:start], bytes.Join(exts, []byte{'-'})...)
+	return len(scan.b)
+}
+
+func parsePrivate(scan *scanner) int {
+	if len(scan.token) == 0 || scan.token[0] != 'x' {
+		scan.setErrorf("locale: invalid locale %q", scan.b)
+		return scan.start
+	}
+	return parseExtensions(scan)
+}
+
+// A Part identifies a part of the locale identifier string.
+type Part byte
+
+const (
+	TagPart Part = iota // The identifier excluding extensions.
+	LanguagePart
+	ScriptPart
+	RegionPart
+	VariantPart
+)
+
+var partNames = []string{"Tag", "Language", "Script", "Region", "Variant"}
+
+func (p Part) String() string {
+	if p > VariantPart {
+		return string(p)
+	}
+	return partNames[p]
+}
+
+// Extension returns the Part identifier for extension e, which must be 0-9 or a-z.
+func Extension(e byte) Part {
+	return Part(e)
+}
+
+var (
+	errLang   = errors.New("locale: invalid Language")
+	errScript = errors.New("locale: invalid Script")
+	errRegion = errors.New("locale: invalid Region")
+)
+
+// Compose returns a Locale composed from the given parts or an error
+// if any of the strings for the parts are ill-formed.
+func Compose(m map[Part]string) (loc ID, err error) {
+	loc = und
+	var scan scanner
+	scan.b = scan.bytes[:0]
+	add := func(p Part) {
+		if s, ok := m[p]; ok {
+			if len(scan.b) > 0 {
+				scan.b = append(scan.b, '-')
+			}
+			if p > VariantPart {
+				scan.b = append(scan.b, byte(p), '-')
+			}
+			scan.b = append(scan.b, s...)
+		}
+	}
+	for p := TagPart; p <= VariantPart; p++ {
+		if p == TagPart && m[p] != "" {
+			for i := LanguagePart; i <= VariantPart; i++ {
+				if _, ok := m[i]; ok {
+					return und, fmt.Errorf("locale: cannot specify both Tag and %s", partNames[i])
+				}
+			}
+		}
+		add(p)
+	}
+	for p := Part('0'); p < Part('9'); p++ {
+		add(p)
+	}
+	for p := Part('a'); p < Part('w'); p++ {
+		add(p)
+	}
+	for p := Part('y'); p < Part('z'); p++ {
+		add(p)
+	}
+	add(Part('x'))
+	scan.init()
+	return parse(&scan, "")
+}
+
+// Part returns the part of the locale identifer indicated by t.
+// The one-letter section identifier, if applicable, is not included.
+// Components are separated by a '-'.
+func (loc ID) Part(p Part) string {
+	s := ""
+	switch p {
+	case TagPart:
+		s = loc.String()
+		if loc.pExt > 0 {
+			s = s[:loc.pExt]
+		}
+	case LanguagePart:
+		s = loc.lang.String()
+	case ScriptPart:
+		if loc.script != unknownScript {
+			s = loc.script.String()
+		}
+	case RegionPart:
+		if loc.region != unknownRegion {
+			s = loc.region.String()
+		}
+	case VariantPart:
+		if loc.pVariant > 0 {
+			s = (*loc.str)[loc.pVariant:loc.pExt]
+		}
+	default:
+		if loc.pExt > 0 {
+			str := *loc.str
+			for i := int(loc.pExt); i < len(str); {
+				end, name, ext := getExtension(str, i)
+				if name == byte(p) {
+					return ext
+				}
+				i = end
+			}
+		} else if p == 'x' && loc.str != nil && strings.HasPrefix(*loc.str, "x-") {
+			return (*loc.str)[2:]
+		}
+	}
+	return s
+}
+
+// Parts returns all parts of the locale identifier in a map.
+func (loc ID) Parts() map[Part]string {
+	m := make(map[Part]string)
+	m[LanguagePart] = loc.lang.String()
+	if loc.script != unknownScript {
+		m[ScriptPart] = loc.script.String()
+	}
+	if loc.region != unknownRegion {
+		m[RegionPart] = loc.region.String()
+	}
+	if loc.str != nil {
+		s := *loc.str
+		if strings.HasPrefix(s, "x-") {
+			m[Extension('x')] = s[2:]
+		} else if loc.pExt > 0 {
+			i := int(loc.pExt)
+			if int(loc.pVariant) != i && loc.pVariant > 0 {
+				m[VariantPart] = s[loc.pVariant:i]
+			}
+			for i < len(s) {
+				end, name, ext := getExtension(s, i)
+				m[Extension(name)] = ext
+				i = end
+			}
+		}
+	}
+	return m
+}
+
+// getExtension returns the name, body and end position of the extension.
+func getExtension(s string, p int) (end int, name byte, ext string) {
+	p++
+	if s[p] == 'x' {
+		return len(s), s[p], s[p+2:]
+	}
+	end = nextExtension(s, p)
+	return end, s[p], s[p+2 : end]
+}
+
+// nextExtension finds the next extension within the string, searching
+// for the -<char>- pattern from position p.
+// In the fast majority of cases, locale identifiers will have at most
+// one extension and extensions tend to be small.
+func nextExtension(s string, p int) int {
+	for n := len(s) - 3; p < n; {
+		if s[p] == '-' {
+			if s[p+2] == '-' {
+				return p
+			}
+			p += 3
+		} else {
+			p++
+		}
+	}
+	return len(s)
+}
--- a/locale/parse_test.go
+++ b/locale/parse_test.go
@ -0,0 +1,354 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package locale
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+	"testing"
+)
+
+type scanTest struct {
+	ok  bool // true if scanning does not result in an error
+	in  string
+	tok []string // the expected tokens
+}
+
+var tests = []scanTest{
+	{true, "", []string{}},
+	{true, "1", []string{"1"}},
+	{true, "en", []string{"en"}},
+	{true, "root", []string{"root"}},
+	{true, "maxchars", []string{"maxchars"}},
+	{false, "bad/", []string{}},
+	{false, "morethan8", []string{}},
+	{false, "-", []string{}},
+	{false, "----", []string{}},
+	{false, "_", []string{}},
+	{true, "en-US", []string{"en", "US"}},
+	{true, "en_US", []string{"en", "US"}},
+	{false, "en-US-", []string{"en", "US"}},
+	{false, "en-US--", []string{"en", "US"}},
+	{false, "en-US---", []string{"en", "US"}},
+	{false, "en--US", []string{"en", "US"}},
+	{false, "-en-US", []string{"en", "US"}},
+	{false, "-en--US-", []string{"en", "US"}},
+	{false, "-en--US-", []string{"en", "US"}},
+	{false, "en-.-US", []string{"en", "US"}},
+	{false, ".-en--US-.", []string{"en", "US"}},
+	{false, "en-u.-US", []string{"en", "US"}},
+	{true, "en-u1-US", []string{"en", "u1", "US"}},
+	{true, "maxchar1_maxchar2-maxchar3", []string{"maxchar1", "maxchar2", "maxchar3"}},
+	{false, "moreThan8-moreThan8-e", []string{"e"}},
+}
+
+func TestScan(t *testing.T) {
+	for i, tt := range tests {
+		scan := makeScannerString(tt.in)
+		for j := 0; !scan.done; j++ {
+			if j >= len(tt.tok) {
+				t.Errorf("%d: extra token %q", i, scan.token)
+			} else if cmp(tt.tok[j], scan.token) != 0 {
+				t.Errorf("%d: token %d: found %q; want %q", i, j, scan.token, tt.tok[j])
+				break
+			}
+			scan.scan()
+		}
+		if s := strings.Join(tt.tok, "-"); cmp(s, bytes.Replace(scan.b, b("_"), b("-"), -1)) != 0 {
+			t.Errorf("%d: input: found %q; want %q", i, scan.b, s)
+		}
+		if (scan.err == nil) != tt.ok {
+			t.Errorf("%d: ok: found %v; want %v", i, scan.err == nil, tt.ok)
+		}
+	}
+}
+
+func TestAcceptMinSize(t *testing.T) {
+	for i, tt := range tests {
+		// count number of successive tokens with a minimum size.
+		for sz := 1; sz <= 8; sz++ {
+			scan := makeScannerString(tt.in)
+			scan.end, scan.next = 0, 0
+			end := scan.acceptMinSize(sz)
+			n := 0
+			for i := 0; i < len(tt.tok) && len(tt.tok[i]) >= sz; i++ {
+				n += len(tt.tok[i])
+				if i > 0 {
+					n++
+				}
+			}
+			if end != n {
+				t.Errorf("%d:%d: found len %d; want %d", i, sz, end, n)
+			}
+		}
+	}
+}
+
+type parseTest struct {
+	i                    int // the index of this test
+	in                   string
+	lang, script, region string
+	variants, ext        string
+	extList              []string // only used when more than one extension is present
+	invalid              bool
+	rewrite              bool // special rewrite not handled by parseTag
+	changed              bool // string needed to be reformatted
+}
+
+func parseTests() []parseTest {
+	var manyVars string
+	for i := 0; i < 50; i++ {
+		manyVars += fmt.Sprintf("-abc%02d", i)
+	}
+	tests := []parseTest{
+		{in: "root", lang: "und", changed: true},
+		{in: "und", lang: "und"},
+		{in: "en", lang: "en"},
+		{in: "xy", lang: "und", changed: true},
+		{in: "gsw", lang: "gsw"},
+		{in: "sr_Latn", lang: "sr", script: "Latn", changed: true},
+		{in: "af-Arab", lang: "af", script: "Arab"},
+		{in: "nl-BE", lang: "nl", region: "BE"},
+		{in: "es-419", lang: "es", region: "419"},
+		{in: "und-001", lang: "und", region: "001"},
+		{in: "de-latn-be", lang: "de", script: "Latn", region: "BE", changed: true},
+		{in: "de-1994", lang: "de", variants: "1994"},
+		{in: "nl-abcde-abcde", lang: "nl", variants: "abcde"},
+		{in: "nl" + manyVars, lang: "nl", variants: manyVars[1:]},
+		{in: "nl" + manyVars + manyVars, lang: "nl", variants: manyVars[1:]},
+		{in: "EN_CYRL", lang: "en", script: "Cyrl", changed: true},
+		// private use and extensions
+		{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
+		{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
+		{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
+		{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
+		{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
+		{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
+		{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
+		{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
+		{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
+		{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
+		{in: "en-nedix-u-co-phonebk", lang: "en", variants: "nedix", ext: "u-co-phonebk"},
+		{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
+		{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-def-abc-co-phonebk-cu-xua", changed: true},
+		{in: "en-u-def-abc", lang: "en", ext: "u-def-abc"},
+		{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
+		{in: "en-t-en-Cyrl-NL-1994", lang: "en", ext: "t-en-cyrl-nl-1994", changed: true},
+		{in: "en-t-en-Cyrl-NL-1994-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-1994-t0-abc-def", changed: true},
+		{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
+		// Not necessary to have changed here.
+		{in: "en-t-nl-abcd", lang: "en", ext: "t-nl"},
+		{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
+		{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
+		// invalid
+		{in: "", lang: "und", invalid: true, changed: true},
+		{in: "-", lang: "und", invalid: true, changed: true},
+		{in: "x", lang: "und", invalid: true, changed: true},
+		{in: "x-", lang: "und", invalid: true, changed: true},
+		{in: "x--", lang: "und", invalid: true, changed: true},
+		{in: "a-a-b-c-d", lang: "und", invalid: true, changed: true},
+		{in: "en-", lang: "en", invalid: true},
+		{in: "enne-", lang: "und", invalid: true, changed: true},
+		{in: "en.", lang: "und", invalid: true, changed: true},
+		{in: "en.-latn", lang: "und", invalid: true, changed: true},
+		{in: "en.-en", lang: "en", invalid: true},
+		{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
+		{in: "a-tooManyChars-c-d", lang: "und", invalid: true, changed: true},
+		// TODO: check key-value validity
+		// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
+		{in: "en-t-abcd", lang: "en", invalid: true},
+		{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
+		// rewrites
+		{in: "zh-min", lang: "und", rewrite: true, changed: true},
+		{in: "zh-min-nan", lang: "nan", changed: true},
+		{in: "zh-yue", lang: "yue", changed: true},
+		{in: "zh-xiang", lang: "hsn", rewrite: true, changed: true},
+		{in: "zh-guoyu", lang: "zh", rewrite: true, changed: true},
+		{in: "iw", lang: "iw", changed: false},
+		{in: "sgn-BE-FR", lang: "sfb", rewrite: true, changed: true},
+		{in: "i-klingon", lang: "tlh", rewrite: true, changed: true},
+	}
+	for i, tt := range tests {
+		tests[i].i = i
+		if tt.extList != nil {
+			tests[i].ext = strings.Join(tt.extList, "-")
+		}
+		if tt.ext != "" && tt.extList == nil {
+			tests[i].extList = []string{tt.ext}
+		}
+	}
+	return tests
+}
+
+func TestParseExtensions(t *testing.T) {
+	for i, tt := range parseTests() {
+		if tt.ext == "" || tt.rewrite {
+			continue
+		}
+		scan := makeScannerString(tt.in)
+		if len(scan.b) > 1 && scan.b[1] != '-' {
+			scan.end = nextExtension(string(scan.b), 0)
+			scan.next = scan.end + 1
+			scan.scan()
+		}
+		start := scan.start
+		scan.toLower(start, len(scan.b))
+		parseExtensions(&scan)
+		ext := string(scan.b[start:])
+		if ext != tt.ext {
+			t.Errorf("%d: ext was %v; want %v", i, ext, tt.ext)
+		}
+		if changed := !strings.HasPrefix(tt.in[start:], ext); changed != tt.changed {
+			t.Errorf("%d: changed was %v; want %v", i, changed, tt.changed)
+		}
+	}
+}
+
+// partChecks runs checks for each part by calling the function returned by f.
+func partChecks(t *testing.T, f func(*parseTest) func(Part) string) {
+	for i, tt := range parseTests() {
+		get := f(&tt)
+		if get == nil {
+			continue
+		}
+		if s, g := get(LanguagePart), getLangID(b(tt.lang)).String(); s != g {
+			t.Errorf("%d: lang was %q; want %q", i, s, g)
+		}
+		if s, g := get(ScriptPart), tt.script; s != g {
+			t.Errorf("%d: script was %q; want %q", i, s, g)
+		}
+		if s, g := get(RegionPart), tt.region; s != g {
+			t.Errorf("%d: region was %q; want %q", i, s, g)
+		}
+		if s, g := get(VariantPart), tt.variants; s != g {
+			t.Errorf("%d: variants was %q; want %q", i, s, g)
+		}
+		for _, g := range tt.extList {
+			if s := get(Extension(g[0])); s != g[2:] {
+				t.Errorf("%d: extension '%c' was %q; want %q", i, g[0], s, g[2:])
+			}
+		}
+		if s := get(Extension('q')); s != "" {
+			t.Errorf(`%d: unused extension 'q' was %q; want ""`, s)
+		}
+	}
+}
+
+func TestParseTag(t *testing.T) {
+	partChecks(t, func(tt *parseTest) func(Part) string {
+		if strings.HasPrefix(tt.in, "x-") || tt.rewrite {
+			return nil
+		}
+		scan := makeScannerString(tt.in)
+		id, end := parseTag(&scan)
+		s := string(scan.b[:end])
+		if changed := !strings.HasPrefix(tt.in, s); changed != tt.changed && tt.ext == "" {
+			t.Errorf("%d: changed was %v; want %v", tt.i, changed, tt.changed)
+		}
+		id.str = &s
+		tt.ext = ""
+		tt.extList = []string{}
+		return func(p Part) string {
+			return id.Part(p)
+		}
+	})
+}
+
+func TestParse(t *testing.T) {
+	partChecks(t, func(tt *parseTest) func(Part) string {
+		id, err := Parse(tt.in)
+		ext := ""
+		if id.str != nil {
+			if strings.HasPrefix(*id.str, "x-") {
+				ext = *id.str
+			} else if int(id.pExt) < len(*id.str) && id.pExt > 0 {
+				ext = (*id.str)[id.pExt+1:]
+			}
+		}
+		if ext != tt.ext {
+			t.Errorf("%d: ext was %q; want %q", tt.i, ext, tt.ext)
+		}
+		changed := id.str == nil || !strings.HasPrefix(tt.in, *id.str)
+		if changed != tt.changed {
+			t.Errorf("%d: changed was %v; want %v", tt.i, changed, tt.changed)
+		}
+		if (err != nil) != tt.invalid {
+			t.Errorf("%d: invalid was %v; want %v. Error: %v", tt.i, err != nil, tt.invalid, err)
+		}
+		return func(p Part) string {
+			return id.Part(p)
+		}
+	})
+}
+
+func TestPart(t *testing.T) {
+	partChecks(t, func(tt *parseTest) func(Part) string {
+		id, _ := Parse(tt.in)
+		return func(p Part) string {
+			return id.Part(p)
+		}
+	})
+}
+
+func TestParts(t *testing.T) {
+	partChecks(t, func(tt *parseTest) func(Part) string {
+		id, _ := Parse(tt.in)
+		m := id.Parts()
+		return func(p Part) string {
+			return m[p]
+		}
+	})
+}
+
+func TestCompose1(t *testing.T) {
+	partChecks(t, func(tt *parseTest) func(Part) string {
+		m := make(map[Part]string)
+		set := func(p Part, s string) {
+			if s != "" {
+				m[p] = strings.ToUpper(s)
+			}
+		}
+		set(LanguagePart, tt.lang)
+		set(ScriptPart, tt.script)
+		set(RegionPart, tt.region)
+		if tt.variants != "" {
+			m[VariantPart] = tt.variants + "-tooManyChars-inv@lid-" + tt.variants
+		}
+		for _, ext := range tt.extList {
+			set(Extension(ext[0]), ext[2:])
+		}
+		id, err := Compose(m)
+		if tt.variants != "" && err == nil {
+			t.Errorf("%d: no error for invalid variant", tt.i)
+		}
+		return func(p Part) string {
+			return id.Part(p)
+		}
+	})
+}
+
+func TestCompose2(t *testing.T) {
+	partChecks(t, func(tt *parseTest) func(Part) string {
+		m := make(map[Part]string)
+		tag := tt.lang
+		for _, s := range []string{tt.script, tt.region, tt.variants} {
+			if s != "" {
+				tag += "-" + s
+			}
+		}
+		m[TagPart] = tag
+		for _, ext := range tt.extList {
+			m[Extension(ext[0])] = ext[2:] + "-tooManyChars"
+		}
+		id, err := Compose(m)
+		if len(tt.extList) > 0 && err == nil {
+			t.Errorf("%d: no error for invalid variant", tt.i)
+		}
+		return func(p Part) string {
+			return id.Part(p)
+		}
+	})
+}
--- a/locale/tables.go
+++ b/locale/tables.go
@ -0,0 +1,612 @@
+// Generated by running
+//		maketables -url=http://www.unicode.org/Public/cldr/23/core.zip -iana=http://www.iana.org/assignments/language-subtag-registry
+// DO NOT EDIT
+
+package locale
+
+const unknownLang = 196
+
+// lang holds an alphabetically sorted list of BCP 47 language identifiers.
+// All entries are 4 bytes. The index of the identifier (divided by 4) is the language ID.
+// For 2-byte language identifiers, the two successive bytes have the following meaning:
+//     - if the first letter of the 2- and 3-letter ISO codes are the same:
+//       the second and third letter of the 3-letter ISO code.
+//     - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
+// For 3-byte language identifiers the 4th byte is 0.
+// Size: 868 bytes
+var lang string = "" +
+	"aaarabbkaeveaffrakkaammhanrgarraassmavvaayymazzebaakbeelbgul" +
+	"bhihbiisbmambnenboodbrrebsoscaatcehechhacooscrrecsescuhucvhv" +
+	"cyymdaandeeudsb\x00dvivdzzoeeweelllenngeopoes\x00\x04etsteuu" +
+	"sfaasffulfiinfjijfoaofrrafrr\x00frs\x00fyrygalegdlagllggnrng" +
+	"sw\x00guujgvlvhaauheebhiinhomohrrvhsb\x00htathuunhyyehzerian" +
+	"aidndieleigboiiiiikpkinndiodoisslittaiukuiw\x00\x02japnji" +
+	"\x00\x05jvavjwavkaatkgonkiikkjuakkazklalkmhmknankoorkok\x00k" +
+	"rauksaskuurkvomkw\x00\x00kyirlaatlbtzlgugliimlninloaoltitluu" +
+	"blvavmai\x00men\x00mglgmhahmirimis\x00mkkdmlalmnonmoolmrarms" +
+	"samtltmul\x00myyanaaunbobnddends\x00neepngdoniu\x00nlldnnnon" +
+	"oornqo\x00nrblnso\x00nvavnyyaocciojjiomrmorriossspaanpiliplo" +
+	"lpsusptorquuermohrnunroonruusrw\x00\x03saanscrdsdndsemesgags" +
+	"h\x00\x01siinsklksllvsmmosnnasoomsqqisrrpssswstotsuunsvwesww" +
+	"ataamteeltem\x00tggkthhatiirtkuktkl\x00tlgltmh\x00tnsntoontp" +
+	"i\x00trurtssottattvl\x00twwityahugigukkrund\x00urrduzzbveenv" +
+	"iievoolwalnwoolxhhoyiidyoorzahazbl\x00zhhozuulzxx\x00\xff" +
+	"\xff\xff\xff"
+
+const langNoIndexOffset = 212
+
+// langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
+// in lookup tables. The language ids for these language codes are derived directly
+// from the letters and are not consecutive.
+// Size: 2197 bytes, 2197 elements
+var langNoIndex = [2197]uint8{
+	255, 253, 253, 254, 239, 255, 191, 219, 251, 255, 254, 250,
+	247, 31, 60, 87, 111, 151, 115, 248, 255, 255, 255, 112,
+	191, 3, 255, 255, 207, 5, 133, 98, 233, 255, 253, 127,
+	255, 255, 255, 119, 255, 255, 255, 255, 255, 255, 255, 227,
+	233, 255, 255, 255, 77, 184, 2, 122, 190, 255, 255, 255,
+	254, 255, 247, 255, 255, 255, 255, 223, 43, 244, 241, 240,
+	93, 231, 159, 20, 5, 32, 223, 237, 159, 63, 201, 33,
+	248, 191, 238, 255, 255, 255, 255, 255, 255, 127, 255, 255,
+	255, 255, 127, 253, 255, 255, 255, 247, 127, 255, 255, 255,
+	255, 255, 255, 231, 191, 255, 255, 223, 255, 239, 255, 255,
+	255, 255, 191, 255, 255, 255, 255, 223, 255, 255, 243, 255,
+	251, 47, 255, 255, 255, 254, 255, 255, 251, 255, 255, 247,
+	255, 255, 253, 255, 255, 255, 127, 223, 255, 255, 223, 254,
+	255, 255, 223, 255, 255, 223, 251, 255, 255, 254, 255, 255,
+	255, 255, 255, 247, 127, 191, 249, 213, 173, 127, 64, 255,
+	156, 193, 67, 44, 8, 36, 65, 0, 80, 68, 0, 128,
+	187, 255, 242, 159, 180, 66, 69, 214, 155, 52, 136, 244,
+	123, 231, 23, 86, 85, 125, 14, 28, 55, 113, 243, 239,
+	159, 255, 93, 40, 101, 8, 0, 16, 188, 255, 191, 255,
+	223, 247, 119, 55, 62, 135, 199, 223, 255, 0, 129, 0,
+	176, 5, 128, 0, 0, 0, 0, 3, 64, 0, 0, 146,
+	33, 208, 255, 125, 255, 222, 254, 94, 4, 0, 2, 100,
+	141, 25, 193, 223, 123, 34, 0, 0, 0, 223, 109, 222,
+	38, 229, 217, 241, 254, 255, 253, 207, 159, 20, 1, 12,
+	134, 0, 193, 0, 240, 197, 103, 91, 86, 137, 94, 183,
+	237, 239, 3, 0, 2, 0, 0, 0, 192, 119, 218, 87,
+	144, 105, 1, 44, 86, 123, 244, 255, 127, 127, 0, 0,
+	0, 1, 8, 70, 0, 0, 0, 176, 20, 7, 81, 18,
+	10, 0, 0, 0, 0, 0, 17, 73, 0, 0, 96, 16,
+	0, 0, 0, 16, 0, 0, 68, 4, 0, 16, 128, 4,
+	24, 0, 0, 4, 0, 128, 40, 4, 0, 0, 16, 213,
+	45, 16, 100, 53, 36, 83, 245, 212, 189, 194, 205, 1,
+	0, 128, 0, 64, 0, 0, 0, 0, 0, 4, 23, 57,
+	1, 217, 87, 137, 33, 152, 167, 0, 0, 1, 64, 130,
+	0, 0, 0, 4, 0, 0, 0, 2, 1, 64, 0, 64,
+	0, 0, 176, 254, 171, 57, 0, 2, 0, 0, 0, 4,
+	0, 0, 0, 0, 0, 32, 0, 64, 4, 0, 0, 0,
+	2, 0, 0, 0, 16, 129, 168, 5, 0, 0, 0, 0,
+	4, 32, 4, 166, 8, 4, 0, 8, 1, 80, 0, 0,
+	8, 49, 134, 64, 0, 0, 0, 0, 64, 0, 3, 117,
+	2, 16, 8, 4, 0, 0, 0, 224, 59, 179, 19, 0,
+	128, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 255, 255, 255, 255, 255, 223, 206, 131, 162,
+	192, 255, 223, 37, 207, 31, 197, 3, 16, 32, 178, 197,
+	166, 69, 37, 155, 3, 79, 248, 223, 3, 148, 64, 16,
+	1, 14, 0, 227, 145, 84, 155, 56, 241, 125, 247, 109,
+	249, 255, 255, 125, 4, 8, 0, 1, 33, 18, 60, 95,
+	253, 15, 133, 79, 64, 64, 0, 0, 255, 253, 255, 214,
+	232, 27, 244, 55, 163, 13, 0, 0, 32, 123, 57, 2,
+	5, 132, 0, 240, 255, 127, 254, 0, 24, 4, 129, 0,
+	0, 0, 128, 16, 148, 28, 1, 0, 0, 0, 0, 0,
+	16, 64, 0, 4, 8, 180, 254, 165, 12, 64, 0, 0,
+	17, 4, 4, 108, 0, 96, 240, 255, 251, 127, 230, 24,
+	5, 159, 223, 110, 3, 0, 17, 0, 0, 0, 64, 4,
+	149, 166, 128, 40, 4, 0, 4, 81, 226, 255, 253, 63,
+	5, 9, 8, 5, 64, 0, 0, 0, 0, 16, 0, 0,
+	8, 0, 0, 0, 0, 161, 2, 108, 229, 72, 20, 136,
+	32, 192, 71, 128, 7, 0, 0, 0, 204, 80, 64, 36,
+	133, 71, 132, 64, 32, 16, 0, 0, 2, 80, 136, 17,
+	0, 209, 140, 238, 80, 19, 29, 17, 105, 6, 89, 235,
+	51, 8, 0, 32, 5, 64, 16, 0, 0, 0, 16, 68,
+	150, 73, 214, 93, 167, 129, 69, 151, 251, 0, 16, 0,
+	8, 0, 128, 0, 64, 69, 0, 1, 2, 0, 1, 64,
+	128, 0, 6, 8, 240, 235, 247, 57, 132, 153, 22, 0,
+	0, 12, 4, 1, 32, 32, 221, 162, 1, 0, 0, 0,
+	18, 68, 0, 0, 4, 16, 240, 157, 149, 19, 0, 128,
+	0, 0, 208, 18, 64, 0, 16, 240, 144, 98, 76, 210,
+	2, 1, 10, 0, 70, 4, 0, 8, 2, 0, 32, 192,
+	0, 128, 6, 0, 8, 0, 0, 0, 0, 240, 216, 239,
+	21, 2, 8, 0, 0, 1, 0, 0, 0, 0, 16, 1,
+	0, 16, 0, 0, 0, 255, 215, 227, 253, 255, 255, 255,
+	255, 255, 127, 255, 255, 254, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 223, 255, 251, 255, 255, 219, 253, 255, 255,
+	127, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	253, 255, 223, 191, 220, 255, 255, 255, 255, 255, 255, 255,
+	255, 254, 251, 255, 255, 255, 255, 255, 255, 255, 254, 255,
+	253, 255, 255, 255, 255, 255, 255, 255, 239, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 123, 253, 221, 223, 255,
+	188, 152, 5, 40, 255, 7, 240, 255, 255, 127, 0, 8,
+	0, 195, 61, 27, 6, 230, 114, 240, 255, 124, 63, 68,
+	34, 0, 159, 107, 14, 253, 255, 87, 242, 255, 63, 255,
+	242, 30, 133, 247, 255, 255, 71, 128, 1, 2, 0, 0,
+	64, 85, 159, 138, 217, 217, 14, 17, 133, 81, 208, 243,
+	255, 119, 0, 1, 5, 209, 88, 72, 0, 0, 0, 16,
+	4, 2, 0, 32, 10, 128, 123, 182, 253, 254, 254, 255,
+	255, 255, 255, 255, 255, 239, 255, 255, 223, 127, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 247, 255, 255, 219, 119,
+	255, 255, 127, 255, 255, 255, 239, 255, 189, 255, 255, 251,
+	255, 255, 255, 223, 127, 253, 255, 247, 255, 255, 247, 255,
+	255, 255, 251, 255, 239, 255, 255, 255, 255, 255, 127, 223,
+	247, 191, 239, 247, 255, 255, 255, 255, 255, 255, 255, 255,
+	254, 255, 255, 127, 255, 255, 255, 255, 255, 252, 255, 253,
+	127, 255, 255, 158, 190, 255, 238, 255, 127, 247, 127, 2,
+	130, 4, 255, 255, 255, 255, 215, 239, 255, 255, 247, 254,
+	226, 158, 231, 255, 247, 255, 86, 189, 201, 254, 255, 255,
+	255, 255, 239, 255, 253, 247, 125, 15, 167, 81, 4, 68,
+	3, 208, 85, 174, 166, 253, 189, 255, 67, 92, 91, 255,
+	255, 255, 63, 32, 20, 0, 87, 81, 130, 101, 245, 76,
+	226, 255, 255, 223, 64, 5, 197, 5, 0, 34, 0, 116,
+	105, 16, 8, 4, 65, 0, 1, 6, 0, 0, 0, 0,
+	0, 81, 96, 5, 4, 1, 0, 0, 6, 1, 32, 0,
+	24, 1, 146, 177, 253, 103, 75, 6, 148, 0, 87, 237,
+	251, 76, 157, 123, 131, 4, 98, 64, 0, 21, 66, 0,
+	0, 0, 84, 131, 249, 95, 16, 140, 201, 70, 223, 247,
+	19, 49, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0,
+	0, 10, 16, 0, 1, 64, 0, 240, 223, 253, 191, 125,
+	186, 207, 255, 191, 66, 20, 132, 97, 176, 255, 93, 122,
+	4, 2, 0, 65, 45, 20, 37, 247, 237, 241, 191, 239,
+	63, 0, 0, 2, 199, 224, 30, 252, 187, 255, 253, 251,
+	247, 253, 117, 253, 255, 252, 245, 237, 71, 244, 127, 16,
+	1, 1, 196, 127, 255, 247, 221, 249, 95, 5, 134, 235,
+	245, 119, 189, 61, 0, 0, 0, 67, 112, 66, 0, 64,
+	0, 0, 1, 67, 25, 0, 8, 0, 255, 255, 255, 3,
+	0, 0, 8, 0, 0, 32, 0, 0, 128, 0, 0, 0,
+	2, 0, 0, 8, 0, 0, 32, 0, 0, 128, 0, 0,
+	0, 2, 0, 0, 8, 0, 0, 32, 0, 0, 128, 0,
+	0, 0, 2, 0, 0, 8, 0, 0, 32, 0, 0, 128,
+	0, 0, 0, 2, 0, 0, 8, 0, 0, 32, 0, 0,
+	128, 239, 189, 231, 87, 238, 19, 93, 9, 193, 64, 33,
+	250, 23, 1, 128, 0, 0, 0, 0, 240, 254, 255, 191,
+	0, 35, 0, 32, 0, 0, 8, 0, 0, 48, 181, 227,
+	16, 0, 0, 0, 17, 36, 22, 0, 1, 2, 16, 131,
+	163, 1, 80, 0, 1, 131, 17, 8, 0, 0, 0, 240,
+	223, 255, 127, 18, 170, 16, 127, 216, 82, 0, 128, 32,
+	0, 0, 0, 0, 64, 16, 2, 2, 9, 0, 16, 66,
+	0, 97, 95, 156, 49, 0, 0, 0, 1, 84, 2, 0,
+	0, 0, 0, 0, 66, 1, 0, 0, 0, 191, 223, 255,
+	255, 255, 255, 63, 223, 94, 207, 189, 191, 175, 255, 255,
+	127, 75, 64, 16, 241, 253, 239, 253, 247, 255, 255, 251,
+	223, 255, 111, 241, 123, 241, 127, 255, 127, 255, 238, 247,
+	239, 191, 255, 219, 255, 223, 255, 253, 126, 191, 87, 247,
+	111, 129, 118, 31, 220, 247, 253, 255, 255, 255, 251, 254,
+	255, 31, 87, 31, 239, 95, 16, 24, 98, 254, 255, 159,
+	21, 159, 21, 15, 125, 70, 125, 161, 130, 241, 247, 126,
+	255, 255, 255, 255, 255, 253, 221, 255, 191, 253, 246, 95,
+	254, 31, 64, 152, 2, 255, 227, 255, 243, 246, 254, 223,
+	255, 223, 127, 80, 30, 5, 123, 180, 223, 190, 255, 255,
+	247, 247, 255, 247, 127, 255, 255, 254, 219, 247, 215, 249,
+	239, 47, 128, 191, 197, 255, 255, 255, 255, 159, 255, 255,
+	255, 255, 253, 191, 223, 127, 6, 29, 87, 255, 248, 219,
+	93, 199, 125, 22, 185, 234, 107, 160, 28, 32, 0, 48,
+	2, 4, 36, 72, 4, 0, 0, 64, 212, 6, 4, 0,
+	0, 4, 0, 4, 0, 48, 1, 6, 80, 0, 8, 0,
+	0, 0, 36, 0, 4, 0, 16, 140, 88, 213, 73, 15,
+	20, 79, 241, 22, 68, 81, 10, 10, 64, 0, 0, 64,
+	0, 8, 0, 0, 0, 220, 255, 235, 31, 88, 8, 65,
+	4, 160, 4, 0, 48, 18, 64, 34, 0, 16, 0, 0,
+	0, 0, 0, 0, 1, 0, 0, 0, 128, 16, 16, 191,
+	111, 147, 0, 1, 0, 0, 0, 0, 0, 0, 0, 192,
+	128, 45, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
+	192, 134, 194, 2, 0, 0, 0, 1, 223, 24, 0, 0,
+	18, 240, 255, 121, 63, 0, 37, 0, 0, 0, 10, 0,
+	0, 0, 0, 0, 0, 64, 0, 16, 3, 0, 9, 32,
+	0, 0, 1, 0, 0, 131, 0, 0, 0, 0, 1, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
+	255, 255, 207, 126, 174, 17, 16, 0, 0, 146, 0, 4,
+	141, 241, 94, 0, 1, 0, 48, 20, 4, 85, 16, 1,
+	4, 246, 63, 122, 5, 4, 0, 176, 128, 0, 69, 85,
+	151, 125, 159, 113, 204, 120, 85, 67, 244, 87, 103, 20,
+	1, 0, 0, 0, 0, 0, 44, 247, 219, 31, 80, 96,
+	3, 72, 5, 16, 139, 56, 186, 1, 0, 0, 48, 0,
+	36, 68, 0, 0, 0, 3, 16, 2, 1, 0, 0, 240,
+	149, 255, 215, 65, 156, 48, 214, 120, 122, 17, 64, 0,
+	164, 132, 233, 65, 0, 0, 0, 35, 40, 18, 116, 0,
+	232, 48, 144, 42, 18, 0, 0, 0, 255, 239, 255, 127,
+	133, 83, 244, 239, 255, 255, 50, 152, 131, 76, 245, 66,
+	80, 221, 95, 20, 0, 128, 192, 68, 140, 22, 159, 251,
+	55, 125, 237, 127, 189, 36, 175, 1, 68, 24, 1, 85,
+	72, 2, 8, 16, 40, 0, 128, 0, 16, 32, 36, 0,
+	255, 255, 255, 111, 254, 1, 6, 136, 10, 0, 22, 1,
+	1, 21, 43, 62, 1, 0, 0, 16, 128, 41, 68, 2,
+	2, 0, 225, 191, 191, 3, 0, 0, 16, 212, 167, 209,
+	84, 158, 68, 223, 253, 143, 102, 179, 85, 32, 212, 195,
+	216, 48, 61, 128, 0, 0, 0, 76, 180, 16, 193, 132,
+	110, 80, 0, 34, 16, 127, 191, 219, 7, 0, 32, 16,
+	128, 178, 5, 16, 0, 64, 0, 0, 16, 2, 17, 0,
+	240, 255, 253, 63, 5, 0, 18, 129, 0, 0, 0, 8,
+	0, 16, 12, 2, 0, 0, 0, 0, 131, 48, 2, 40,
+	132, 0, 51, 192, 35, 36, 0, 0, 0, 203, 228, 58,
+	66, 200, 20, 241, 255, 255, 127, 22, 1, 1, 132, 80,
+	7, 252, 255, 255, 15, 1, 0, 64, 16, 56, 1, 1,
+	28, 18, 64, 225, 118, 22, 8, 3, 16, 0, 0, 0,
+	1, 0, 0, 0, 0, 0, 32, 36, 10, 64, 128, 0,
+	0,
+}
+
+// altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
+// to 2-letter language codes that cannot be derived using the method described above.
+// Each 3-letter code is followed by its 1-byte langID.
+// Size: 44 bytes
+var altLangISO3 string = "corchbs\xa1hebPkin\x9bspa(yidR\xff\xff\xff\xff"
+
+// langOldMap maps deprecated langIDs to their suggested replacements.
+// Size: 108 bytes, 27 elements
+var langOldMap = [27]struct {
+	from uint16
+	to   uint16
+}{
+	{from: 0x4b, to: 0x46},
+	{from: 0x50, to: 0x3c},
+	{from: 0x52, to: 0xcd},
+	{from: 0x54, to: 0x53},
+	{from: 0x77, to: 0x99},
+	{from: 0x35b, to: 0x253d},
+	{from: 0x465, to: 0xa85},
+	{from: 0x660, to: 0x2ec4},
+	{from: 0x717, to: 0x21fc},
+	{from: 0x720, to: 0x765},
+	{from: 0x75e, to: 0x3dcb},
+	{from: 0xa81, to: 0x1bfc},
+	{from: 0xa90, to: 0x2a3c},
+	{from: 0x10c1, to: 0x93d},
+	{from: 0x151b, to: 0x18a3},
+	{from: 0x1616, to: 0x2752},
+	{from: 0x1bdf, to: 0x1c7f},
+	{from: 0x1e24, to: 0x2a07},
+	{from: 0x226b, to: 0x2256},
+	{from: 0x2307, to: 0x2256},
+	{from: 0x3090, to: 0x1472},
+	{from: 0x33d4, to: 0x2dca},
+	{from: 0x340e, to: 0x3548},
+	{from: 0x3434, to: 0x3b62},
+	{from: 0x3457, to: 0x2a3c},
+	{from: 0x4051, to: 0x2ec4},
+	{from: 0x416c, to: 0x1fab},
+}
+
+// langMacroMap maps languages to their macro language replacement, if applicable.
+// Size: 260 bytes, 65 elements
+var langMacroMap = [65]struct {
+	from uint16
+	to   uint16
+}{
+	{from: 0x86, to: 0x7e},
+	{from: 0xa1, to: 0xa9},
+	{from: 0xb7, to: 0xee3},
+	{from: 0xc0, to: 0x4},
+	{from: 0x1d2, to: 0x1a53},
+	{from: 0x204, to: 0xa8},
+	{from: 0x28f, to: 0x7},
+	{from: 0x355, to: 0xa},
+	{from: 0x367, to: 0xb},
+	{from: 0x3ae, to: 0x383},
+	{from: 0x3b7, to: 0x452},
+	{from: 0x5d8, to: 0x2000},
+	{from: 0x5df, to: 0x580},
+	{from: 0x73d, to: 0x32dd},
+	{from: 0x761, to: 0xd1},
+	{from: 0x85b, to: 0x1a},
+	{from: 0x96a, to: 0xa34},
+	{from: 0x979, to: 0x22d1},
+	{from: 0x99a, to: 0x99d},
+	{from: 0x9a0, to: 0x4562},
+	{from: 0xc72, to: 0x29},
+	{from: 0xca6, to: 0x2091},
+	{from: 0xd42, to: 0x4a},
+	{from: 0xe1b, to: 0x4},
+	{from: 0x1012, to: 0x2c},
+	{from: 0x10c5, to: 0x8e},
+	{from: 0x10d4, to: 0x1267},
+	{from: 0x120c, to: 0x1225},
+	{from: 0x12ba, to: 0x37},
+	{from: 0x131c, to: 0x10c6},
+	{from: 0x13ab, to: 0x1358},
+	{from: 0x13b8, to: 0x1495},
+	{from: 0x142c, to: 0x322d},
+	{from: 0x16fc, to: 0x4f},
+	{from: 0x1bfc, to: 0x76},
+	{from: 0x1c85, to: 0x61},
+	{from: 0x1c90, to: 0x5f},
+	{from: 0x1c94, to: 0x56},
+	{from: 0x1c9b, to: 0x5e},
+	{from: 0x1cd7, to: 0x62},
+	{from: 0x1e04, to: 0x4cc},
+	{from: 0x2014, to: 0x6d},
+	{from: 0x214b, to: 0x6de},
+	{from: 0x229b, to: 0x2dc1},
+	{from: 0x24b6, to: 0x81},
+	{from: 0x26bc, to: 0x8d},
+	{from: 0x279e, to: 0x8f},
+	{from: 0x289e, to: 0x94},
+	{from: 0x28ea, to: 0x2b},
+	{from: 0x29a1, to: 0x70},
+	{from: 0x29c3, to: 0x1de7},
+	{from: 0x2d79, to: 0x96},
+	{from: 0x2f08, to: 0x2f30},
+	{from: 0x31fa, to: 0x1c67},
+	{from: 0x3218, to: 0x9d},
+	{from: 0x329f, to: 0xae},
+	{from: 0x34fe, to: 0xb8},
+	{from: 0x36f0, to: 0x933},
+	{from: 0x383b, to: 0xc6},
+	{from: 0x3f1a, to: 0x1cc6},
+	{from: 0x3f6f, to: 0x935},
+	{from: 0x4085, to: 0xcd},
+	{from: 0x42e0, to: 0x42e7},
+	{from: 0x44b8, to: 0x79},
+	{from: 0x4549, to: 0xcf},
+}
+
+// tagAlias holds a mapping from legacy and grandfathered tags to their locale ID.
+// Size: 497 bytes
+var tagAlias = map[string]uint16{
+	"aa-SAAHO":   12872,
+	"art-lojban": 6336,
+	"i-ami":      532,
+	"i-bnn":      1239,
+	"i-hak":      4954,
+	"i-klingon":  13349,
+	"i-lux":      102,
+	"i-navajo":   138,
+	"i-pwn":      10937,
+	"i-tao":      13070,
+	"i-tay":      13080,
+	"i-tsu":      13544,
+	"no-BOKMAL":  126,
+	"no-NYNORSK": 133,
+	"no-bok":     126,
+	"no-nyn":     133,
+	"sgn-BE-FR":  12511,
+	"sgn-BE-NL":  14583,
+	"sgn-CH-DE":  12542,
+	"zh-guoyu":   209,
+	"zh-hakka":   4954,
+	"zh-min":     196,
+	"zh-min-nan": 9013,
+	"zh-xiang":   5425,
+}
+
+const unknownScript = 186
+
+// script is an alphabetically sorted list of ISO 15924 codes. The index
+// of the script in the string, divided by 4, is the internal script ID.
+// Size: 768 bytes
+var script string = "" +
+	"AfakAghbArabArmiArmnAvstBaliBamuBassBatkBengBlisBopoBrahBrai" +
+	"BugiBuhdCakmCansCariChamCherCirtCoptCprtCyrlCyrsDevaDsrtDupl" +
+	"EgydEgyhEgypElbaEthiGeokGeorGlagGothGranGrekGujrGuruHangHani" +
+	"HanoHansHantHebrHiraHluwHmngHrktHungIndsItalJavaJpanJurcKali" +
+	"KanaKharKhmrKhojKndaKoreKpelKthiLanaLaooLatfLatgLatnLepcLimb" +
+	"LinaLinbLisuLomaLyciLydiMahjMandManiMayaMendMercMeroMlymMong" +
+	"MoonMrooMteiMymrNarbNbatNkgbNkooNshuOgamOlckOrkhOryaOsmaPalm" +
+	"PermPhagPhliPhlpPhlvPhnxPlrdPrtiQaaaQaabQaacQaadQaaeQaafQaag" +
+	"QaahQaaiQaajQaakQaalQaamQaanQaaoQaapQaaqQaarQaasQaatQaauQaav" +
+	"QaawQaaxQaayQaazRjngRoroRunrSamrSaraSarbSaurSgnwShawShrdSind" +
+	"SinhSoraSundSyloSyrcSyreSyrjSyrnTagbTakrTaleTaluTamlTangTavt" +
+	"TeluTengTfngTglgThaaThaiTibtTirhUgarVaiiVispWaraWoleXpeoXsux" +
+	"YiiiZinhZmthZsymZxxxZyyyZzzz\xff\xff\xff\xff"
+
+// suppressScript is an index from langID to the dominant script for that language,
+// if it exists.  If a script is given, it should be suppressed from the language tag.
+// Size: 212 bytes, 212 elements
+var suppressScript = [212]uint8{
+	186, 25, 186, 72, 186, 34, 186, 2, 10, 186, 72, 186,
+	186, 25, 25, 186, 186, 186, 10, 186, 186, 72, 72, 186,
+	72, 186, 186, 72, 186, 186, 72, 72, 72, 72, 169, 171,
+	186, 40, 72, 72, 72, 72, 72, 2, 186, 72, 72, 72,
+	72, 72, 72, 72, 72, 186, 72, 72, 72, 41, 72, 186,
+	48, 27, 186, 72, 72, 72, 72, 4, 186, 186, 72, 186,
+	186, 186, 186, 72, 186, 72, 72, 186, 48, 57, 186, 186,
+	186, 36, 186, 186, 186, 25, 72, 62, 64, 65, 27, 186,
+	186, 186, 186, 186, 186, 72, 72, 186, 186, 72, 69, 72,
+	186, 72, 27, 72, 72, 72, 186, 186, 25, 88, 186, 72,
+	27, 72, 72, 186, 93, 72, 72, 72, 72, 27, 186, 72,
+	72, 72, 72, 97, 72, 72, 186, 72, 186, 186, 72, 102,
+	186, 42, 186, 72, 2, 72, 72, 72, 72, 72, 25, 72,
+	186, 186, 186, 186, 72, 186, 150, 72, 72, 72, 186, 72,
+	72, 186, 72, 72, 186, 72, 72, 162, 165, 72, 186, 170,
+	34, 186, 72, 72, 72, 72, 72, 72, 72, 72, 186, 72,
+	186, 186, 186, 25, 186, 2, 186, 72, 72, 186, 186, 186,
+	72, 48, 186, 186, 11, 186, 72, 186,
+}
+
+const unknownRegion = 338
+
+// isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
+// for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
+// the UN.M49 codes used for groups.)
+const isoRegionOffset = 30
+
+// regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
+// Each 2-letter codes is followed by two bytes with the following meaning:
+//     - [A-Z}{2}: the first letter of the 2-letter code plus these two
+//                 letters form the 3-letter ISO code.
+//     - 0, n:     index into altRegionISO3.
+// Size: 1256 bytes
+var regionISO string = "" +
+	"AAAAACSCADNDAEREAFFGAGTGAIIAALLBAMRMANNTAOGOAQTAARRGASSMATUT" +
+	"AUUSAWBWAXLAAZZEBAIHBBRBBDGDBEELBFFABGGRBHHRBIDIBJENBLLMBMMU" +
+	"BNRNBOOLBQESBRRABSHSBTTNBUURBVVTBWWABYLRBZLZCAANCCCKCDODCFAF" +
+	"CGOGCHHECIIVCKOKCLHLCMMRCNHNCOOLCPPTCRRICS\x00\x00CUUBCVPVCW" +
+	"UWCXXRCYYPCZZEDDDRDEEUDGGADJJIDKNKDMMADOOMDZZAEA  ECCUEESTEG" +
+	"GYEHSHERRIESSPETTHEU\x00\x03FIINFJJIFKLKFMSMFOROFRRAFXXXGAAB" +
+	"GBBRGDRDGEEOGFUFGGGYGHHAGIIBGLRLGMMBGNINGPLPGQNQGRRCGS\x00" +
+	"\x06GTTMGUUMGWNBGYUYHKKGHMMDHNNDHRRVHTTIHUUNIC  IDDNIERLILSR" +
+	"IMMNINNDIOOTIQRQIRRNISSLITTAJEEYJMAMJOORJPPNKEENKGGZKHHMKIIR" +
+	"KM\x00\tKNNAKP\x00\fKRORKWWTKY\x00\x0fKZAZLAAOLBBNLCCALIIELK" +
+	"KALRBRLSSOLTTULUUXLVVALYBYMAARMCCOMDDAMENEMFAFMGDGMHHLMKKDML" +
+	"LIMMMRMNNGMOACMPNPMQTQMRRTMSSRMTLTMUUSMVDVMWWIMXEXMYYSMZOZNA" +
+	"AMNCCLNEERNFFKNGGANIICNLLDNOORNPPLNRRUNTTZNUIUNZZLOMMNPAANPE" +
+	"ERPFYFPGNGPHHLPKAKPLOLPM\x00\x12PNCNPRRIPSSEPTRTPWLWPYRYQAAT" +
+	"QMMMQNNNQOOOQPPPQQQQQRRRQSSSQTTTQU  QVVVQWWWQXXXQYYYQZZZREEU" +
+	"ROOURS\x00\x15RUUSRWWASAAUSBLBSCYCSDDNSEWESGGPSHHNSIVNSJJMSK" +
+	"VKSLLESMMRSNENSOOMSRURSSSDSTTPSUUNSVLVSXXMSYYRSZWZTAAATCCATD" +
+	"CDTF\x00\x18TGGOTHHATJJKTKKLTLLSTMKMTNUNTOONTPMPTRURTTTOTVUV" +
+	"TWWNTZZAUAKRUGGAUMMIUSSAUYRYUZZBVAATVCCTVEENVGGBVIIRVNNMVUUT" +
+	"WFLFWSSMXAAAXBBBXCCCXDDDXEEEXFFFXGGGXHHHXIIIXJJJXKKKXLLLXMMM" +
+	"XNNNXOOOXPPPXQQQXRRRXSSSXTTTXUUUXVVVXWWWXXXXXYYYXZZZYDMDYEEM" +
+	"YT\x00\x1bYUUGZAAFZMMBZRARZWWEZZZZ\xff\xff\xff\xff"
+
+// altRegionISO3 holds a list of 3-letter region codes that cannot be
+// mapped to 2-letter codes using the default algorithm. This is a short list.
+// Size: 46 bytes
+var altRegionISO3 string = "SCGQUUSGSCOMPRKCYMSPMSRBATFMYT"
+
+// altRegionIDs holsd a list of regionIDs the positions of which match those
+// of the 3-letter ISO codes in altRegionISO3.
+// Size: 20 bytes, 10 elements
+var altRegionIDs = [10]uint16{
+	85, 108, 130, 160, 162, 165, 222, 246, 274, 332,
+}
+
+// m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
+// codes indicating collections of regions.
+// Size: 678 bytes, 339 elements
+var m49 = [339]uint16{
+	1, 2, 3, 5, 9, 11, 13, 14, 15, 17, 18, 19,
+	21, 29, 30, 34, 35, 39, 53, 54, 57, 61, 142, 143,
+	145, 150, 151, 154, 155, 419, 958, 0, 20, 784, 4, 28,
+	660, 8, 51, 530, 24, 10, 32, 16, 40, 36, 533, 248,
+	31, 70, 52, 50, 56, 854, 100, 48, 108, 204, 652, 60,
+	96, 68, 535, 76, 44, 64, 104, 74, 72, 112, 84, 124,
+	166, 180, 140, 178, 756, 384, 184, 152, 120, 156, 170, 0,
+	188, 891, 192, 132, 531, 162, 196, 203, 278, 276, 0, 262,
+	208, 212, 214, 12, 0, 218, 233, 818, 732, 232, 724, 231,
+	967, 246, 242, 238, 583, 234, 250, 249, 266, 826, 308, 268,
+	254, 831, 288, 292, 304, 270, 324, 312, 226, 300, 239, 320,
+	316, 624, 328, 344, 334, 340, 191, 332, 348, 0, 360, 372,
+	376, 833, 356, 86, 368, 364, 352, 380, 832, 388, 400, 392,
+	404, 417, 116, 296, 174, 659, 408, 410, 414, 136, 398, 418,
+	422, 662, 438, 144, 430, 426, 440, 442, 428, 434, 504, 492,
+	498, 499, 663, 450, 584, 807, 466, 104, 496, 446, 580, 474,
+	478, 500, 470, 480, 462, 454, 484, 458, 508, 516, 540, 562,
+	574, 566, 558, 528, 578, 524, 520, 536, 570, 554, 512, 591,
+	604, 258, 598, 608, 586, 616, 666, 612, 630, 275, 620, 585,
+	600, 634, 959, 960, 961, 962, 963, 964, 965, 966, 0, 968,
+	969, 970, 971, 972, 638, 642, 688, 643, 646, 682, 90, 690,
+	729, 752, 702, 654, 705, 744, 703, 694, 674, 686, 706, 740,
+	728, 678, 810, 222, 534, 760, 748, 0, 796, 148, 260, 768,
+	764, 762, 772, 626, 795, 788, 776, 626, 792, 780, 798, 158,
+	834, 804, 800, 581, 840, 858, 860, 336, 670, 862, 92, 850,
+	704, 548, 876, 882, 973, 974, 975, 976, 977, 978, 979, 980,
+	981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992,
+	993, 994, 995, 996, 997, 998, 720, 887, 175, 891, 710, 894,
+	180, 716, 999,
+}
+
+// currency holds an alphabetically sorted list of canonical 3-letter currency identifiers.
+// Each identifier is followed by a byte of which the 6 most significant bits
+// indicated the rounding and the least 2 significant bits indicate the
+// number of decimal positions.
+// Size: 1208 bytes
+var currency string = "" +
+	"ADP\x04AED\x06AFA\x06AFN\x04ALK\x06ALL\x04AMD\x04ANG\x06AOA" +
+	"\x06AOK\x06AON\x06AOR\x06ARA\x06ARL\x06ARM\x06ARP\x06ARS\x06" +
+	"ATS\x06AUD\x06AWG\x06AZM\x06AZN\x06BAD\x06BAM\x06BAN\x06BBD" +
+	"\x06BDT\x06BEC\x06BEF\x06BEL\x06BGL\x06BGM\x06BGN\x06BGO\x06" +
+	"BHD\aBIF\x04BMD\x06BND\x06BOB\x06BOL\x06BOP\x06BOV\x06BRB" +
+	"\x06BRC\x06BRE\x06BRL\x06BRN\x06BRR\x06BRZ\x06BSD\x06BTN\x06" +
+	"BUK\x06BWP\x06BYB\x06BYR\x04BZD\x06CAD\x06CDF\x06CHE\x06CHF" +
+	"\x06CHW\x06CLE\x06CLF\x04CLP\x04CNX\x06CNY\x06COP\x04COU\x06" +
+	"CRC\x04CSD\x06CSK\x06CUC\x06CUP\x06CVE\x06CYP\x06CZK\x06DDM" +
+	"\x06DEM\x06DJF\x04DKK\x06DOP\x06DZD\x06ECS\x06ECV\x06EEK\x06" +
+	"EGP\x06ERN\x06ESA\x06ESB\x06ESP\x04ETB\x06EUR\x06FIM\x06FJD" +
+	"\x06FKP\x06FRF\x06GBP\x06GEK\x06GEL\x06GHC\x06GHS\x06GIP\x06" +
+	"GMD\x06GNF\x04GNS\x06GQE\x06GRD\x06GTQ\x06GWE\x06GWP\x06GYD" +
+	"\x04HKD\x06HNL\x06HRD\x06HRK\x06HTG\x06HUF\x04IDR\x04IEP\x06" +
+	"ILP\x06ILR\x06ILS\x06INR\x06IQD\x04IRR\x04ISJ\x06ISK\x04ITL" +
+	"\x04JMD\x06JOD\aJPY\x04KES\x06KGS\x06KHR\x06KMF\x04KPW\x04KR" +
+	"H\x06KRO\x06KRW\x04KWD\aKYD\x06KZT\x06LAK\x04LBP\x04LKR\x06L" +
+	"RD\x06LSL\x06LTL\x06LTT\x06LUC\x06LUF\x04LUL\x06LVL\x06LVR" +
+	"\x06LYD\aMAD\x06MAF\x06MCF\x06MDC\x06MDL\x06MGA\x04MGF\x04MK" +
+	"D\x06MKN\x06MLF\x06MMK\x04MNT\x04MOP\x06MRO\x04MTL\x06MTP" +
+	"\x06MUR\x04MVP\x06MVR\x06MWK\x06MXN\x06MXP\x06MXV\x06MYR\x06" +
+	"MZE\x06MZM\x06MZN\x06NAD\x06NGN\x06NIC\x06NIO\x06NLG\x06NOK" +
+	"\x06NPR\x06NZD\x06OMR\aPAB\x06PEI\x06PEN\x06PES\x06PGK\x06PH" +
+	"P\x06PKR\x04PLN\x06PLZ\x06PTE\x06PYG\x04QAR\x06RHD\x06ROL" +
+	"\x06RON\x06RSD\x04RUB\x06RUR\x06RWF\x04SAR\x06SBD\x06SCR\x06" +
+	"SDD\x06SDG\x06SDP\x06SEK\x06SGD\x06SHP\x06SIT\x06SKK\x06SLL" +
+	"\x04SOS\x04SRD\x06SRG\x06SSP\x06STD\x04SUR\x06SVC\x06SYP\x04" +
+	"SZL\x06THB\x06TJR\x06TJS\x06TMM\x04TMT\x06TND\aTOP\x06TPE" +
+	"\x06TRL\x04TRY\x06TTD\x06TWD\x06TZS\x04UAH\x06UAK\x06UGS\x06" +
+	"UGX\x04USD\x06USN\x06USS\x06UYI\x06UYP\x06UYU\x06UZS\x04VEB" +
+	"\x06VEF\x06VND\x04VNN\x06VUV\x04WST\x06XAF\x04XAG\x06XAU\x06" +
+	"XBA\x06XBB\x06XBC\x06XBD\x06XCD\x06XDR\x06XEU\x06XFO\x06XFU" +
+	"\x06XOF\x04XPD\x06XPF\x04XPT\x06XRE\x06XSU\x06XTS\x06XUA\x06" +
+	"XXX\x06YDD\x06YER\x04YUD\x06YUM\x06YUN\x06YUR\x06ZAL\x06ZAR" +
+	"\x06ZMK\x04ZMW\x06ZRN\x06ZRZ\x06ZWD\x04ZWL\x06ZWR\x06\xff" +
+	"\xff\xff\xff"
+
+const unknownCurrency = 281
+
+// nRegionGroups is the number of region groups.  All regionIDs < nRegionGroups
+// are groups.
+const nRegionGroups = 32
+
+// regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
+// where each set holds all groupings that are directly connected in a region
+// containment graph.
+// Size: 339 bytes, 339 elements
+var regionInclusion = [339]uint8{
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+	12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+	24, 25, 26, 27, 28, 29, 32, 33, 34, 35, 36, 37,
+	37, 34, 35, 37, 38, 33, 39, 40, 41, 42, 37, 43,
+	35, 34, 37, 36, 41, 44, 45, 35, 46, 44, 37, 47,
+	48, 39, 37, 39, 37, 36, 48, 33, 49, 50, 51, 47,
+	33, 38, 38, 38, 52, 44, 40, 39, 38, 53, 39, 33,
+	51, 34, 37, 44, 37, 33, 54, 45, 52, 41, 33, 46,
+	55, 37, 37, 56, 56, 39, 55, 56, 56, 46, 57, 46,
+	31, 55, 58, 39, 59, 43, 41, 52, 38, 55, 37, 35,
+	39, 43, 44, 34, 47, 44, 44, 37, 38, 57, 33, 51,
+	59, 44, 39, 53, 33, 51, 34, 37, 45, 56, 48, 55,
+	35, 43, 36, 33, 35, 36, 43, 57, 43, 37, 35, 53,
+	46, 60, 48, 59, 46, 37, 53, 53, 35, 37, 60, 48,
+	35, 37, 52, 36, 44, 49, 55, 41, 55, 56, 56, 52,
+	50, 34, 37, 46, 59, 34, 44, 48, 53, 53, 59, 37,
+	44, 37, 57, 46, 36, 46, 51, 48, 46, 49, 58, 44,
+	42, 44, 51, 41, 43, 36, 59, 35, 40, 42, 35, 51,
+	39, 40, 58, 48, 36, 45, 47, 40, 37, 35, 57, 59,
+	39, 35, 32, 32, 30, 32, 32, 32, 32, 32, 61, 32,
+	32, 32, 32, 32, 46, 45, 34, 50, 46, 35, 58, 46,
+	56, 55, 48, 44, 57, 43, 45, 44, 34, 44, 46, 39,
+	56, 38, 50, 51, 37, 35, 49, 33, 37, 38, 33, 44,
+	48, 60, 40, 48, 60, 56, 40, 48, 35, 37, 40, 53,
+	46, 50, 46, 33, 47, 39, 60, 34, 37, 39, 37, 37,
+	48, 58, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32,
+	32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+	32, 32, 32, 32, 32, 32, 35, 35, 46, 34, 49, 46,
+	38, 46, 32,
+}
+
+// regionInclusionBits is an array of bit vectors where every vector represents
+// a set of region groupings.  These sets are used to compute the distance
+// between two regions for the purpos of locale matching.
+// Size: 300 bytes, 75 elements
+var regionInclusionBits = [75]uint32{
+	37750803, 1955, 14404, 536872968, 1077674001, 34, 536873028, 130, 258, 514, 1026, 536885325,
+	6148, 536881156, 4210688, 4227072, 4259840, 33685504, 262160, 524304, 1048592, 2097168, 29474817, 12582912,
+	20971520, 2650931201, 100663296, 167772160, 301989888, 536881224, 1073741840, 2181038080, 1, 1073741824, 131072, 16777216,
+	32768, 8192, 512, 8, 2097152, 2415919104, 262144, 134217728, 32, 2214592512, 128, 4096,
+	65536, 1024, 67108864, 64, 268435456, 16384, 2164260864, 2281701376, 256, 2147614720, 524288, 1048576,
+	8388608, 33554432, 4294967295, 37752755, 1115424787, 574634079, 63031315, 2655127571, 2449473536, 2248146944, 2202009600, 2315255808,
+	2181169152, 2680406017, 2680408083,
+}
+
+// regionInclusionNext marks, for each entry in regionInclusionBits, the set of
+// all groups that are reachable from the groups set in the respective entry.
+// Size: 75 bytes, 75 elements
+var regionInclusionNext = [75]uint8{
+	62, 63, 11, 11, 64, 1, 11, 1, 1, 1, 1, 65,
+	11, 11, 22, 22, 22, 25, 4, 4, 4, 4, 66, 22,
+	22, 67, 25, 25, 25, 11, 4, 25, 0, 30, 17, 24,
+	15, 13, 9, 3, 21, 68, 18, 27, 5, 69, 7, 12,
+	16, 10, 26, 6, 28, 14, 70, 71, 8, 72, 19, 20,
+	23, 25, 62, 62, 62, 62, 62, 62, 25, 25, 73, 25,
+	25, 74, 62,
+}
+
+// Size: 8.7K (8876 bytes); Check: D7ACA2A7