go.text/locale: moved package from go.exp.

R=r
CC=golang-dev
https://golang.org/cl/9893043
This commit is contained in:
Marcel van Lohuizen 2013-05-31 14:31:13 +02:00
Родитель 809f98b91c
Коммит 3942ae31cd
12 изменённых файлов: 3632 добавлений и 0 удалений

16
locale/Makefile Normal file
Просмотреть файл

@ -0,0 +1,16 @@
# Copyright 2013 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
CLEANFILES+=maketables
maketables: maketables.go
go build $^
tables: maketables
./maketables > tables.go
gofmt -w -s tables.go
# Build (but do not run) maketables during testing,
# just to make sure it still compiles.
testshort: maketables

90
locale/examples_test.go Normal file
Просмотреть файл

@ -0,0 +1,90 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package locale_test
import (
"code.google.com/p/go.text/locale"
"fmt"
)
func ExampleID_Canonicalize() {
p := func(id string) {
loc, _ := locale.Parse(id)
fmt.Printf("BCP47(%s) -> %s\n", id, loc.Canonicalize(locale.BCP47))
fmt.Printf("Macro(%s) -> %s\n", id, loc.Canonicalize(locale.Macro))
}
p("en-Latn")
p("zh-cmn")
p("bjd")
p("iw-Latn-fonipa-u-cu-usd")
// Output:
// BCP47(en-Latn) -> en
// Macro(en-Latn) -> en-Latn
// BCP47(zh-cmn) -> cmn
// Macro(zh-cmn) -> zh
// BCP47(bjd) -> drl
// Macro(bjd) -> bjd
// BCP47(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
// Macro(iw-Latn-fonipa-u-cu-usd) -> iw-Latn-fonipa-u-cu-usd
}
func ExampleID_Parent() {
loc := locale.Make("sl-Latn-IT-nedis")
fmt.Println(loc.Parent())
// TODO:Output: sl-Latn-IT
}
func ExampleID_Written() {
loc := locale.Make("sl-Latn-IT-nedis")
fmt.Println(loc.Written())
// TODO:Output: sl-Latn
}
func ExampleID_Script() {
en := locale.Make("en")
sr := locale.Make("sr")
fmt.Println(en.Script())
fmt.Println(sr.Script())
// TODO:Output:
// Latn High
// Cyrl Low
}
func ExampleID_Part() {
loc := locale.Make("sr-RS")
script := loc.Part(locale.ScriptPart)
region := loc.Part(locale.RegionPart)
fmt.Printf("%q %q", script, region)
// TODO:Output: "" "RS"
}
func ExampleID_Scope() {
loc := locale.Make("sr")
set := loc.Scope()
fmt.Println(set.Locales())
fmt.Println(set.Languages())
fmt.Println(set.Scripts())
fmt.Println(set.Regions())
// TODO:Output:
// [sr_Cyrl sr_Cyrl_ME sr_Latn sr_Latn_ME sr_Cyrl_BA sr_Cyrl_RS sr_Latn_BA sr_Latn_RS]
// [sr]
// [Cyrl Latn]
// [BA ME RS]
}
func ExampleScript_Scope() {
loc := locale.Make("zen-Tfng")
script, _ := loc.Script()
set := script.Scope()
fmt.Println(set.Locales())
fmt.Println(set.Languages())
fmt.Println(set.Scripts())
fmt.Println(set.Regions())
// TODO:Output:
// [shi shi-Tfng shi-Tfng_MA tzm]
// [shi tzm zen]
// [Tfng]
// [MA]
}

319
locale/locale.go Normal file
Просмотреть файл

@ -0,0 +1,319 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// NOTE: This package is still under development. Parts of it are not yet implemented,
// and the API is subject to change.
//
// The locale package provides a type to represent BCP 47 locale identifiers.
// It supports various canonicalizations defined in CLDR.
package locale
import "strings"
var (
// Und represents the undefined langauge. It is also the root locale.
Und = und
En = en // Default Locale for English.
En_US = en_US // Default locale for American English.
De = de // Default locale for German.
// TODO: list of most common language identifiers.
)
var (
Supported Set // All supported locales.
Common Set // A selection of common locales.
)
var (
de = ID{lang: getLangID([]byte("de")), region: unknownRegion, script: unknownScript}
en = ID{lang: getLangID([]byte("en")), region: unknownRegion, script: unknownScript}
en_US = en
und = ID{lang: unknownLang, region: unknownRegion, script: unknownScript}
)
// ID represents a BCP 47 locale identifier. It can be used to
// select an instance for a specific locale. All Locale values are guaranteed
// to be well-formed.
type ID struct {
// In most cases, just lang, region and script will be needed. In such cases
// str may be nil.
lang langID
region regionID
script scriptID
pVariant byte // offset in str
pExt uint16 // offset of first extension
str *string
}
// Make calls Parse and Canonicalize and returns the resulting ID.
// Any errors are ignored and a sensible default is returned.
// In most cases, locale IDs should be created using this method.
func Make(id string) ID {
loc, _ := Parse(id)
return loc.Canonicalize(All)
}
// IsRoot returns true if loc is equal to locale "und".
func (loc ID) IsRoot() bool {
if loc.str != nil {
n := len(*loc.str)
if n > 0 && loc.pExt > 0 && int(loc.pExt) < n {
return false
}
if uint16(loc.pVariant) != loc.pExt || strings.HasPrefix(*loc.str, "x-") {
return false
}
loc.str = nil
}
return loc == und
}
// CanonType is can be used to enable or disable various types of canonicalization.
type CanonType int
const (
// Replace deprecated values with their preferred ones.
Deprecated CanonType = 1 << iota
// Remove redundant scripts.
SuppressScript
// Map the dominant language of macro language group to the macro language identifier.
// For example cmn -> zh.
Macro
// All canonicalizations prescribed by BCP 47.
BCP47 = Deprecated | SuppressScript
All = BCP47 | Macro
// TODO: LikelyScript, LikelyRegion: supress similar to ICU.
)
// Canonicalize replaces the identifier with its canonical equivalent.
func (loc ID) Canonicalize(t CanonType) ID {
changed := false
if t&SuppressScript != 0 {
if loc.lang < langNoIndexOffset && uint8(loc.script) == suppressScript[loc.lang] {
loc.script = unknownScript
changed = true
}
}
if t&Deprecated != 0 {
l := normLang(langOldMap[:], loc.lang)
if l != loc.lang {
changed = true
}
loc.lang = l
}
if t&Macro != 0 {
l := normLang(langMacroMap[:], loc.lang)
if l != loc.lang {
changed = true
}
loc.lang = l
}
if changed && loc.str != nil {
ext := ""
if loc.pExt > 0 {
ext = (*loc.str)[loc.pExt+1:]
}
s := loc.makeString(loc.Part(VariantPart), ext)
loc.str = &s
}
return loc
}
// Parent returns the direct parent for this locale, which is the locale
// from which this locale inherits any undefined values.
func (loc ID) Parent() ID {
// TODO: implement
return und
}
// Written strips qualifiers from the identifier until the resulting identfier
// inherits from root.
func (loc ID) Written() ID {
// TODO: implement
return und
}
// Confidence indicates the level of certainty for a given return value.
// For example, Serbian may be written in cyrillic or latin script.
// The confidence level indicates whether a value was explicitly specified,
// whether it is typically the only possible value, or whether there is
// an ambiguity.
type Confidence int
const (
Not Confidence = iota // full confidence that there was no match
Low // most likely value picked out of a set of alternatives
High // value inferred from a parent and is generally assumed to be the correct match
Exact // exact match or explicitly specified value
)
func (loc *ID) makeString(vars, ext string) string {
buf := [128]byte{}
n := loc.lang.stringToBuf(buf[:])
if loc.script != unknownScript {
n += copy(buf[n:], "-")
n += copy(buf[n:], loc.script.String())
}
if loc.region != unknownRegion {
n += copy(buf[n:], "-")
n += copy(buf[n:], loc.region.String())
}
b := buf[:n]
if vars != "" {
b = append(b, '-')
loc.pVariant = byte(len(b))
b = append(b, vars...)
loc.pExt = uint16(len(b))
}
if ext != "" {
loc.pExt = uint16(len(b))
b = append(b, '-')
b = append(b, ext...)
}
return string(b)
}
// String returns the canonical string representation of the locale.
func (loc ID) String() string {
if loc.str == nil {
return loc.makeString("", "")
}
return *loc.str
}
// Language returns the language for the locale.
func (loc ID) Language() Language {
// TODO: implement
return Language{0}
}
// Script infers the script for the locale. If it was not explictly given, it will infer
// a most likely candidate from the parent locales.
// If more than one script is commonly used for a language, the most likely one
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
// for Serbian.
// Note that an inferred script is never guaranteed to be the correct one. Latn is
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
// in the past. Also, the script that is commonly used may change over time.
func (loc ID) Script() (Script, Confidence) {
// TODO: implement
return Script{0}, Exact
}
// Region returns the region for l. If it was not explicitly given, it will
// infer a most likely candidate from the parent locales.
func (loc ID) Region() (Region, Confidence) {
// TODO: implement
return Region{0}, Exact
}
// Variant returns the variant specified explicitly for this locale
// or nil if no variant was specified.
func (loc ID) Variant() Variant {
return Variant{""}
}
// Scope returns a Set that indicates the common variants for which the
// locale may be applicable.
// Locales will returns all valid sublocales. Languages will return the language
// for this locale. Regions will return all regions for which a locale with
// this language is defined. And Scripts will return all scripts that are
// commonly used for this locale.
// If any of these properties is explicitly specified, the respective lists
// will be constraint. For example, for sr_Latn Scripts will return [Latn]
// instead of [Cyrl Latn].
func (loc ID) Scope() Set {
// TODO: implement
return nil
}
// TypeForKey returns the type associated with the given key, where key
// is one of the allowed values defined for the Unicode locale extension ('u') in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
func (loc ID) TypeForKey(key string) string {
// TODO: implement
return ""
}
// KeyValueString returns a string to be set with KeyValuePart.
// Error handling is done by Compose.
func KeyValueString(m map[string]string) (string, error) {
// TODO: implement
return "", nil
}
// SimplifyOptions removes options in loc that it would inherit
// by default from its parent.
func (loc ID) SimplifyOptions() ID {
// TODO: implement
return ID{}
}
// Language is an ISO 639 language identifier.
type Language struct {
langID
}
// Scope returns a Set of all pre-defined sublocales for this language.
func (l Language) Scope() Set {
// TODO: implement
return nil
}
// Script is a 4-letter ISO 15924 code for representing scripts.
// It is idiomatically represented in title case.
type Script struct {
scriptID
}
// Scope returns a Set of all pre-defined sublocales applicable to the script.
func (s Script) Scope() Set {
// TODO: implement
return nil
}
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
type Region struct {
regionID
}
// IsCountry returns whether this region is a country.
func (r Region) IsCountry() bool {
// TODO: implement
return true
}
// Scope returns a Set of all pre-defined sublocales applicable to the region.
func (r Region) Scope() Set {
// TODO: implement
return nil
}
// Variant represents a registered variant of a language as defined by BCP 47.
type Variant struct {
// TODO: implement
variant string
}
// String returns the string representation of the variant.
func (v Variant) String() string {
// TODO: implement
return v.variant
}
// Currency is an ISO 4217 currency designator.
type Currency struct {
currencyID
}
// Set provides information about a set of locales.
type Set interface {
Locales() []ID
Languages() []Language
Regions() []Region
Scripts() []Script
Currencies() []Currency
}

86
locale/locale_test.go Normal file
Просмотреть файл

@ -0,0 +1,86 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package locale
import (
"reflect"
"testing"
)
func TestIDSize(t *testing.T) {
id := ID{}
typ := reflect.TypeOf(id)
if typ.Size() > 16 {
t.Errorf("size of ID was %d; want 16", typ.Size())
}
}
func TestIsRoot(t *testing.T) {
for i, tt := range parseTests() {
loc, _ := Parse(tt.in)
undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
if loc.IsRoot() != undef {
t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
}
}
}
/*
func TestParent(t *testing.T) {
tests := []struct {
in, out string
}{
{"und", "und"},
{"de-1994", "de"},
{"de-CH-1994", "de-CH"},
{"de-Cyrl-CH-1994", "de-Cyrl-CH"},
{"zh", "und"},
{"zh-HK-u-cu-usd", "zh"},
{"zh-Hans-HK-u-cu-usd", "zh-Hans"},
{"zh-u-cu-usd", "und"},
{"zh_Hans", "zh"},
{"zh_Hant", "und"},
{"vai", "und"},
{"vai_Latn", "und"},
{"nl_Cyrl", "nl"},
{"nl", "und"},
{"en_US", "en"},
{"en_150", "en-GB"},
{"en-SG", "en-GB"},
{"en_GB", "en"},
}
for i, tt := range tests {
test, _ := Parse(tt.in)
gold, _ := Parse(tt.out)
if p := test.Parent(); p.String() != gold.String() {
t.Errorf("%d:parent(%q): found %s; want %s", i, tt.in, p.String(), tt.out)
}
}
}
func TestWritten(t *testing.T) {
tests := []struct {
in, out string
}{
{"und", "und"},
{"zh-Hans", "zh"},
{"zh-Hant", "zh-Hant"},
{"vai", "vai"},
{"vai-Latn", "vai-Latn"},
{"nl-Cyrl", "nl-Cyrl"},
{"en-US", "en"},
{"en-150", "en"},
{"en-SG", "en"},
{"en-GB", "en"},
}
for i, tt := range tests {
test, _ := Parse(tt.in)
gold, _ := Parse(tt.out)
if test.Written() != gold {
t.Errorf("%d:parent(%q): found %s; want %s", i, tt.in, test.String(), tt.out)
}
}
}
*/

348
locale/lookup.go Normal file
Просмотреть файл

@ -0,0 +1,348 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package locale
import (
"fmt"
"sort"
"strconv"
)
// get gets the string of length n for id from the given 4-byte string index.
func get(idx string, id, n int) string {
return idx[id<<2:][:n]
}
// cmp returns an integer comparing a and b lexicographically.
func cmp(a string, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
for i, c := range b[:n] {
switch {
case a[i] > c:
return 1
case a[i] < c:
return -1
}
}
switch {
case len(a) < len(b):
return -1
case len(a) > len(b):
return 1
}
return 0
}
// search searchs for the insertion point of key in smap, which is a
// string with consecutive 4-byte entries. Only the first len(key)
// bytes from the start of the 4-byte entries will be considered.
func search(smap string, key []byte) int {
n := len(key)
return sort.Search(len(smap)>>2, func(i int) bool {
return cmp(get(smap, i, n), key) != -1
}) << 2
}
func index(smap string, key []byte) int {
i := search(smap, key)
if cmp(smap[i:i+len(key)], key) != 0 {
return -1
}
return i
}
func searchUint(imap []uint16, key uint16) int {
return sort.Search(len(imap), func(i int) bool {
return imap[i] >= key
})
}
// fixCase reformats s to the same pattern of cases as pat.
// If returns false if string s is malformed.
func fixCase(pat string, b []byte) bool {
if len(pat) != len(b) {
return false
}
for i, c := range b {
r := pat[i]
if r <= 'Z' {
if c >= 'a' {
c -= 'z' - 'Z'
}
if c > 'Z' || c < 'A' {
return false
}
} else {
if c <= 'Z' {
c += 'z' - 'Z'
}
if c > 'z' || c < 'a' {
return false
}
}
b[i] = c
}
return true
}
type langID uint16
// getLangID returns the langID of s if s is a canonical ID
// or langUnknown if s is not a canonical langID.
func getLangID(s []byte) langID {
if len(s) == 2 {
return getLangISO2(s)
}
return getLangISO3(s)
}
// mapLang returns the mapped langID of id according to mapping m.
func normLang(m []struct{ from, to uint16 }, id langID) langID {
k := sort.Search(len(m), func(i int) bool {
return m[i].from >= uint16(id)
})
if m[k].from == uint16(id) {
return langID(m[k].to)
}
return id
}
// getLangISO2 returns the langID for the given 2-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO2(s []byte) langID {
if len(s) == 2 && fixCase("zz", s) {
if i := index(lang, s); i != -1 && lang[i+3] != 0 {
return langID(i >> 2)
}
}
return unknownLang
}
const base = 'z' - 'a' + 1
func strToInt(s []byte) uint {
v := uint(0)
for i := 0; i < len(s); i++ {
v *= base
v += uint(s[i] - 'a')
}
return v
}
// converts the given integer to the original ASCII string passed to strToInt.
// len(s) must match the number of characters obtained.
func intToStr(v uint, s []byte) {
for i := len(s) - 1; i >= 0; i-- {
s[i] = byte(v%base) + 'a'
v /= base
}
}
// getLangISO3 returns the langID for the given 3-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO3(s []byte) langID {
if fixCase("und", s) {
// first try to match canonical 3-letter entries
for i := search(lang, s[:2]); cmp(lang[i:i+2], s[:2]) == 0; i += 4 {
if lang[i+3] == 0 && lang[i+2] == s[2] {
return langID(i >> 2)
}
}
if i := index(altLangISO3, s); i != -1 {
return langID(altLangISO3[i+3])
}
n := strToInt(s)
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
return langID(n) + langNoIndexOffset
}
// Check for non-canonical uses of ISO3.
for i := search(lang, s[:1]); lang[i] == s[0]; i += 4 {
if cmp(lang[i+2:][:2], s[1:3]) == 0 {
return langID(i >> 2)
}
}
}
return unknownLang
}
// stringToBuf writes the string to b and returns the number of bytes
// written. cap(b) must be >= 3.
func (id langID) stringToBuf(b []byte) int {
if id >= langNoIndexOffset {
intToStr(uint(id)-langNoIndexOffset, b[:3])
return 3
}
l := lang[id<<2:]
if l[3] == 0 {
return copy(b, l[:3])
}
return copy(b, l[:2])
}
// String returns the BCP 47 representation of the langID.
func (id langID) String() string {
if id >= langNoIndexOffset {
id -= langNoIndexOffset
buf := [3]byte{}
intToStr(uint(id), buf[:])
return string(buf[:])
}
l := lang[id<<2:]
if l[3] == 0 {
return l[:3]
}
return l[:2]
}
// ISO3 returns the ISO 639-3 language code.
func (id langID) ISO3() string {
if id >= langNoIndexOffset {
return id.String()
}
l := lang[id<<2:]
if l[3] == 0 {
return l[:3]
} else if l[2] == 0 {
return get(altLangISO3, int(l[3]), 3)
}
// This allocation will only happen for 3-letter ISO codes
// that are non-canonical BCP 47 language identifiers.
return l[0:1] + l[2:4]
}
type regionID uint16
// getRegionID returns the region id for s if s is a valid 2-letter region code
// or unknownRegion.
func getRegionID(s []byte) regionID {
if len(s) == 3 {
if isAlpha(s[0]) {
return getRegionISO3(s)
}
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
return getRegionM49(int(i))
}
}
return getRegionISO2(s)
}
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO2(s []byte) regionID {
if fixCase("ZZ", s) {
if i := index(regionISO, s); i != -1 {
return regionID(i>>2) + isoRegionOffset
}
}
return unknownRegion
}
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO3(s []byte) regionID {
if fixCase("ZZZ", s) {
for i := search(regionISO, s[:1]); regionISO[i] == s[0]; i += 4 {
if cmp(regionISO[i+2:][:2], s[1:3]) == 0 {
return regionID(i>>2) + isoRegionOffset
}
}
for i := 0; i < len(altRegionISO3); i += 3 {
if cmp(altRegionISO3[i:i+3], s) == 0 {
return regionID(altRegionIDs[i/3])
}
}
}
return unknownRegion
}
func getRegionM49(n int) regionID {
// These will mostly be group IDs, which are at the start of the list.
// For other values this may be a bit slow, as there are over 300 entries.
// TODO: group id is sorted!
if n == 0 {
return unknownRegion
}
for i, v := range m49 {
if v == uint16(n) {
return regionID(i)
}
}
return unknownRegion
}
// String returns the BCP 47 representation for the region.
func (r regionID) String() string {
if r < isoRegionOffset {
return fmt.Sprintf("%03d", r.m49())
}
r -= isoRegionOffset
return get(regionISO, int(r), 2)
}
// The use of this is uncommon.
// Note: not all regionIDs have corresponding 3-letter ISO codes!
func (r regionID) iso3() string {
if r < isoRegionOffset {
return ""
}
r -= isoRegionOffset
reg := regionISO[r<<2:]
switch reg[2] {
case 0:
return altRegionISO3[reg[3]:][:3]
case ' ':
return ""
}
return reg[0:1] + reg[2:4]
}
func (r regionID) m49() uint16 {
return m49[r]
}
type scriptID uint8
// getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}.
func getScriptID(idx string, s []byte) scriptID {
if fixCase("Zzzz", s) {
if i := index(idx, s); i != -1 {
return scriptID(i >> 2)
}
}
return unknownScript
}
// String returns the script code in title case.
func (s scriptID) String() string {
return get(script, int(s), 4)
}
type currencyID uint16
func getCurrencyID(idx string, s []byte) currencyID {
if fixCase("XXX", s) {
if i := index(idx, s); i != -1 {
return currencyID(i >> 2)
}
}
return unknownCurrency
}
// String returns the upper case representation of the currency.
func (c currencyID) String() string {
return get(currency, int(c), 3)
}
func round(index string, c currencyID) int {
return int(index[c<<2+3] >> 2)
}
func decimals(index string, c currencyID) int {
return int(index[c<<2+3] & 0x03)
}

254
locale/lookup_test.go Normal file
Просмотреть файл

@ -0,0 +1,254 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package locale
import (
"fmt"
"strings"
"testing"
)
var strdata = []string{
"aa ",
"aaa ",
"aaaa",
"aaab",
"aab ",
"ab ",
"ba ",
"xxxx",
}
func strtests() map[string]int {
return map[string]int{
" ": 0,
"a": 0,
"aa": 0,
"aaa": 4,
"aa ": 0,
"aaaa": 8,
"aaab": 12,
"aaax": 16,
"b": 24,
"ba": 24,
"bbbb": 28,
}
}
func TestSearch(t *testing.T) {
for k, v := range strtests() {
if i := search(strings.Join(strdata, ""), []byte(k)); i != v {
t.Errorf("%s: found %d; want %d", k, i, v)
}
}
}
func TestIndex(t *testing.T) {
strtests := strtests()
strtests[" "] = -1
strtests["aaax"] = -1
strtests["bbbb"] = -1
for k, v := range strtests {
if i := index(strings.Join(strdata, ""), []byte(k)); i != v {
t.Errorf("%s: found %d; want %d", k, i, v)
}
}
}
func b(s string) []byte {
return []byte(s)
}
func TestFixCase(t *testing.T) {
tests := []string{
"aaaa", "AbCD", "abcd",
"Zzzz", "AbCD", "Abcd",
"Zzzz", "AbC", "Zzzz",
"XXX", "ab ", "XXX",
"XXX", "usd", "USD",
"cmn", "AB ", "cmn",
"gsw", "CMN", "cmn",
}
for i := 0; i+3 < len(tests); i += 3 {
tt := tests[i:]
buf := [4]byte{}
b := buf[:copy(buf[:], tt[1])]
res := fixCase(tt[0], b)
if res && cmp(tt[2], b) != 0 || !res && tt[0] != tt[2] {
t.Errorf("%s+%s: found %q; want %q", tt[0], tt[1], res, tt[2])
}
}
}
func TestLangID(t *testing.T) {
tests := []struct{ id, bcp47, iso3, norm string }{
{id: "", bcp47: "und", iso3: "und"},
{id: " ", bcp47: "und", iso3: "und"},
{id: " ", bcp47: "und", iso3: "und"},
{id: " ", bcp47: "und", iso3: "und"},
{id: "und", bcp47: "und", iso3: "und"},
{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
{id: "jrb", bcp47: "jrb", iso3: "jrb"},
{id: "es", bcp47: "es", iso3: "spa"},
{id: "spa", bcp47: "es", iso3: "spa"},
{id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
{id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
{id: "ar", bcp47: "ar", iso3: "ara"},
{id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
{id: "ar", bcp47: "ar", iso3: "ara"},
{id: "kur", bcp47: "ku", iso3: "kur"},
{id: "nl", bcp47: "nl", iso3: "nld"},
{id: "NL", bcp47: "nl", iso3: "nld"},
{id: "gsw", bcp47: "gsw", iso3: "gsw"},
{id: "gSW", bcp47: "gsw", iso3: "gsw"},
{id: "und", bcp47: "und", iso3: "und"},
{id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
{id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
{id: "no", bcp47: "no", iso3: "nor", norm: "nb"},
{id: "nor", bcp47: "no", iso3: "nor", norm: "nb"},
{id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
}
for i, tt := range tests {
want := getLangID(b(tt.id))
if id := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
}
if len(tt.iso3) == 3 {
if id := getLangISO3(b(tt.iso3)); want != id {
t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
}
if id := getLangID(b(tt.iso3)); want != id {
t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
}
}
norm := want
if tt.norm != "" {
norm = getLangID(b(tt.norm))
}
id := normLang(langOldMap[:], want)
id = normLang(langMacroMap[:], id)
if id != norm {
t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
}
if id := want.String(); tt.bcp47 != id {
t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
}
if id := want.ISO3(); tt.iso3[:3] != id {
t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
}
}
}
func TestRegionID(t *testing.T) {
tests := []struct {
id, iso2, iso3 string
m49 int
}{
{"AA", "AA", "AAA", 958},
{"IC", "IC", "", 0},
{"ZZ", "ZZ", "ZZZ", 999},
{"EU", "EU", "QUU", 967},
{"419", "", "", 419},
}
for i, tt := range tests {
want := getRegionID(b(tt.id))
if id := getRegionISO2(b(tt.iso2)); len(tt.iso2) == 2 && want != id {
t.Errorf("%d:getISO2(%s): found %d; want %d", i, tt.iso2, id, want)
}
if id := getRegionISO3(b(tt.iso3)); len(tt.iso3) == 3 && want != id {
t.Errorf("%d:getISO3(%s): found %d; want %d", i, tt.iso3, id, want)
}
if id := getRegionID(b(tt.iso3)); len(tt.iso3) == 3 && want != id {
t.Errorf("%d:getID3(%s): found %d; want %d", i, tt.iso3, id, want)
}
if id := getRegionM49(tt.m49); tt.m49 != 0 && want != id {
t.Errorf("%d:getM49(%d): found %d; want %d", i, tt.m49, id, want)
}
if len(tt.iso2) == 2 {
if id := want.String(); tt.iso2 != id {
t.Errorf("%d:String(): found %s; want %s", i, id, tt.iso2)
}
} else {
if id := want.String(); fmt.Sprintf("%03d", tt.m49) != id {
t.Errorf("%d:String(): found %s; want %03d", i, id, tt.m49)
}
}
if id := want.iso3(); tt.iso3 != id {
t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3)
}
if id := int(want.m49()); tt.m49 != id {
t.Errorf("%d:m49(): found %d; want %d", i, id, tt.m49)
}
}
}
func TestScript(t *testing.T) {
idx := "BbbbDdddEeeeZzzz\xff\xff\xff\xff"
const und = unknownScript
tests := []struct {
in string
out scriptID
}{
{" ", und},
{" ", und},
{" ", und},
{"", und},
{"Bbbb", 0},
{"Dddd", 1},
{"dddd", 1},
{"dDDD", 1},
{"Eeee", 2},
{"Zzzz", 3},
}
for i, tt := range tests {
if id := getScriptID(idx, b(tt.in)); id != tt.out {
t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
}
}
}
func TestCurrency(t *testing.T) {
curInfo := func(round, dec int) string {
return string(round<<2 + dec)
}
idx := strings.Join([]string{
"BBB" + curInfo(5, 2),
"DDD\x00",
"XXX\x00",
"ZZZ\x00",
"\xff\xff\xff\xff",
}, "")
const und = unknownCurrency
tests := []struct {
in string
out currencyID
round, dec int
}{
{" ", und, 0, 0},
{" ", und, 0, 0},
{" ", und, 0, 0},
{"", und, 0, 0},
{"BBB", 0, 5, 2},
{"DDD", 1, 0, 0},
{"dDd", 1, 0, 0},
{"ddd", 1, 0, 0},
{"XXX", 2, 0, 0},
{"Zzz", 3, 0, 0},
}
for i, tt := range tests {
id := getCurrencyID(idx, b(tt.in))
if id != tt.out {
t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
}
if id <= 3 {
if d := decimals(idx, id); d != tt.dec {
t.Errorf("%d:dec(%s): found %d; want %d", i, tt.in, d, tt.dec)
}
if d := round(idx, id); d != tt.round {
t.Errorf("%d:round(%s): found %d; want %d", i, tt.in, d, tt.round)
}
}
}
}

931
locale/maketables.go Normal file
Просмотреть файл

@ -0,0 +1,931 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Locale identifier table generator.
// Data read from the web.
package main
import (
"bufio"
"code.google.com/p/go.text/cldr"
"flag"
"fmt"
"hash"
"hash/fnv"
"io"
"log"
"math"
"net/http"
"os"
"path"
"reflect"
"sort"
"strconv"
"strings"
)
var (
url = flag.String("cldr",
"http://www.unicode.org/Public/cldr/"+cldr.Version+"/core.zip",
"URL of CLDR archive.")
iana = flag.String("iana",
"http://www.iana.org/assignments/language-subtag-registry",
"URL of IANA language subtag registry.")
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
localFiles = flag.Bool("local", false,
"data files have been copied to the current directory; for debugging only.")
)
var comment = []string{
`
lang holds an alphabetically sorted list of BCP 47 language identifiers.
All entries are 4 bytes. The index of the identifier (divided by 4) is the language ID.
For 2-byte language identifiers, the two successive bytes have the following meaning:
- if the first letter of the 2- and 3-letter ISO codes are the same:
the second and third letter of the 3-letter ISO code.
- otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
For 3-byte language identifiers the 4th byte is 0.`,
`
langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
in lookup tables. The language ids for these language codes are derived directly
from the letters and are not consecutive.`,
`
altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
to 2-letter language codes that cannot be derived using the method described above.
Each 3-letter code is followed by its 1-byte langID.`,
`
tagAlias holds a mapping from legacy and grandfathered tags to their locale ID.`,
`
langOldMap maps deprecated langIDs to their suggested replacements.`,
`
langMacroMap maps languages to their macro language replacement, if applicable.`,
`
script is an alphabetically sorted list of ISO 15924 codes. The index
of the script in the string, divided by 4, is the internal script ID.`,
`
isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
the UN.M49 codes used for groups.)`,
`
regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
Each 2-letter codes is followed by two bytes with the following meaning:
- [A-Z}{2}: the first letter of the 2-letter code plus these two
letters form the 3-letter ISO code.
- 0, n: index into altRegionISO3.`,
`
m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
codes indicating collections of regions.`,
`
altRegionISO3 holds a list of 3-letter region codes that cannot be
mapped to 2-letter codes using the default algorithm. This is a short list.`,
`
altRegionIDs holsd a list of regionIDs the positions of which match those
of the 3-letter ISO codes in altRegionISO3.`,
`
currency holds an alphabetically sorted list of canonical 3-letter currency identifiers.
Each identifier is followed by a byte of which the 6 most significant bits
indicated the rounding and the least 2 significant bits indicate the
number of decimal positions.`,
`
suppressScript is an index from langID to the dominant script for that language,
if it exists. If a script is given, it should be suppressed from the language tag.`,
`
nRegionGroups is the number of region groups. All regionIDs < nRegionGroups
are groups.`,
`
regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
where each set holds all groupings that are directly connected in a region
containment graph.`,
`
regionInclusionBits is an array of bit vectors where every vector represents
a set of region groupings. These sets are used to compute the distance
between two regions for the purpos of locale matching.`,
`
regionInclusionNext marks, for each entry in regionInclusionBits, the set of
all groups that are reachable from the groups set in the respective entry.`,
}
// TODO: consider changing some of these strutures to tries. This can reduce
// memory, but may increase the need for memory allocations. This could be
// mitigated if we can piggyback on locale strings for common cases.
func failOnError(e error) {
if e != nil {
log.Panic(e)
}
}
type setType int
const (
Indexed setType = 1 + iota // all elements must be of same size
Linear
)
type stringSet struct {
s []string
sorted, frozen bool
// We often need to update values after the creation of an index is completed.
// We include a convenience map for keeping track of this.
update map[string]string
typ setType // used for checking.
}
func (ss *stringSet) clone() stringSet {
c := *ss
c.s = append([]string(nil), c.s...)
return c
}
func (ss *stringSet) setType(t setType) {
if ss.typ != t && ss.typ != 0 {
log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ)
}
}
// parse parses a whitespace-separated string and initializes ss with its
// components.
func (ss *stringSet) parse(s string) {
scan := bufio.NewScanner(strings.NewReader(s))
scan.Split(bufio.ScanWords)
for scan.Scan() {
ss.add(scan.Text())
}
}
func (ss *stringSet) assertChangeable() {
if ss.frozen {
log.Panic("attempt to modify a frozen stringSet")
}
}
func (ss *stringSet) add(s string) {
ss.assertChangeable()
ss.s = append(ss.s, s)
ss.sorted = ss.frozen
}
func (ss *stringSet) freeze() {
ss.compact()
ss.frozen = true
}
func (ss *stringSet) compact() {
if ss.sorted {
return
}
a := ss.s
sort.Strings(a)
k := 0
for i := 1; i < len(a); i++ {
if a[k] != a[i] {
a[k+1] = a[i]
k++
}
}
ss.s = a[:k+1]
ss.sorted = ss.frozen
}
type funcSorter struct {
fn func(a, b string) bool
sort.StringSlice
}
func (s funcSorter) Less(i, j int) bool {
return s.fn(s.StringSlice[i], s.StringSlice[j])
}
func (ss *stringSet) sortFunc(f func(a, b string) bool) {
ss.compact()
sort.Sort(funcSorter{f, sort.StringSlice(ss.s)})
}
func (ss *stringSet) remove(s string) {
ss.assertChangeable()
if i, ok := ss.find(s); ok {
copy(ss.s[i:], ss.s[i+1:])
ss.s = ss.s[:len(ss.s)-1]
}
}
func (ss *stringSet) replace(ol, nu string) {
ss.s[ss.index(ol)] = nu
ss.sorted = ss.frozen
}
func (ss *stringSet) index(s string) int {
ss.setType(Indexed)
i, ok := ss.find(s)
if !ok {
if i < len(ss.s) {
log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i])
}
log.Panicf("find: item %q is not in list", s)
}
return i
}
func (ss *stringSet) find(s string) (int, bool) {
ss.compact()
i := sort.SearchStrings(ss.s, s)
return i, i != len(ss.s) && ss.s[i] == s
}
func (ss *stringSet) slice() []string {
ss.compact()
return ss.s
}
func (ss *stringSet) updateLater(v, key string) {
if ss.update == nil {
ss.update = map[string]string{}
}
ss.update[v] = key
}
// join joins the string and ensures that all entries are of the same length.
func (ss *stringSet) join() string {
ss.setType(Indexed)
n := len(ss.s[0])
for _, s := range ss.s {
if len(s) != n {
log.Panic("join: not all entries are of the same length")
}
}
ss.s = append(ss.s, strings.Repeat("\xff", n))
return strings.Join(ss.s, "")
}
// ianaEntry holds information for an entry in the IANA Language Subtag Repository.
// All types use the same entry.
// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various
// fields.
type ianaEntry struct {
typ string
tag string
description []string
scope string
added string
preferred string
deprecated string
suppressScript string
macro string
prefix []string
}
type builder struct {
w io.Writer // multi writer
out io.Writer // set to Stdout
hash32 hash.Hash32 // for checking whether tables have changed.
size int
data *cldr.CLDR
supp *cldr.SupplementalData
// indices
locale stringSet // common locales
lang stringSet // canonical language ids (2 or 3 letter ISO codes) with data
langNoIndex stringSet // 3-letter ISO codes with no associated data
script stringSet // 4-letter ISO codes
region stringSet // 2-letter ISO or 3-digit UN M49 codes
currency stringSet // 3-letter ISO currency codes
// langInfo
registry map[string]*ianaEntry
}
func openReader(url *string) io.ReadCloser {
if *localFiles {
pwd, _ := os.Getwd()
*url = "file://" + path.Join(pwd, path.Base(*url))
}
t := &http.Transport{}
t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
c := &http.Client{Transport: t}
resp, err := c.Get(*url)
failOnError(err)
if resp.StatusCode != 200 {
log.Fatalf(`bad GET status for "%s": %s`, *url, resp.Status)
}
return resp.Body
}
func newBuilder() *builder {
r := openReader(url)
defer r.Close()
d := &cldr.Decoder{}
d.SetDirFilter("supplemental")
data, err := d.DecodeZip(r)
failOnError(err)
b := builder{
out: os.Stdout,
data: data,
supp: data.Supplemental(),
hash32: fnv.New32(),
}
b.w = io.MultiWriter(b.out, b.hash32)
b.parseRegistry()
return &b
}
func (b *builder) parseRegistry() {
r := openReader(iana)
defer r.Close()
b.registry = make(map[string]*ianaEntry)
scan := bufio.NewScanner(r)
scan.Split(bufio.ScanWords)
var record *ianaEntry
for more := scan.Scan(); more; {
key := scan.Text()
more = scan.Scan()
value := scan.Text()
switch key {
case "Type:":
record = &ianaEntry{typ: value}
case "Subtag:", "Tag:":
record.tag = value
if info, ok := b.registry[value]; ok {
if info.typ != "language" || record.typ != "extlang" {
log.Fatalf("parseRegistry: tag %q already exists", value)
}
} else {
b.registry[value] = record
}
case "Suppress-Script:":
record.suppressScript = value
case "Added:":
record.added = value
case "Deprecated:":
record.deprecated = value
case "Macrolanguage:":
record.macro = value
case "Preferred-Value:":
record.preferred = value
case "Prefix:":
record.prefix = append(record.prefix, value)
case "Scope:":
record.scope = value
case "Description:":
buf := []byte(value)
for more = scan.Scan(); more; more = scan.Scan() {
b := scan.Bytes()
if b[0] == '%' || b[len(b)-1] == ':' {
break
}
buf = append(buf, ' ')
buf = append(buf, b...)
}
record.description = append(record.description, string(buf))
continue
default:
continue
}
more = scan.Scan()
}
if scan.Err() != nil {
log.Panic(scan.Err())
}
}
var commentIndex = make(map[string]string)
func init() {
for _, s := range comment {
key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
commentIndex[key] = strings.Replace(s, "\n", "\n// ", -1)
}
}
func (b *builder) comment(name string) {
fmt.Fprintln(b.out, commentIndex[name])
}
func (b *builder) pf(f string, x ...interface{}) {
fmt.Fprintf(b.w, f, x...)
fmt.Fprint(b.w, "\n")
}
func (b *builder) p(x ...interface{}) {
fmt.Fprintln(b.w, x...)
}
func (b *builder) addSize(s int) {
b.size += s
b.pf("// Size: %d bytes", s)
}
func (b *builder) addArraySize(s, n int) {
b.size += s
b.pf("// Size: %d bytes, %d elements", s, n)
}
func (b *builder) writeConst(name string, x interface{}) {
b.comment(name)
b.pf("const %s = %v", name, x)
}
func (b *builder) writeSlice(name string, ss interface{}) {
b.comment(name)
v := reflect.ValueOf(ss)
t := v.Type().Elem()
b.addArraySize(v.Len()*int(t.Size()), v.Len())
fmt.Fprintf(b.w, `var %s = [%d]%s{`, name, v.Len(), t)
for i := 0; i < v.Len(); i++ {
if t.Kind() == reflect.Struct {
fmt.Fprintf(b.w, "\n\t%#v, ", v.Index(i).Interface())
} else {
if i%12 == 0 {
fmt.Fprintf(b.w, "\n\t")
}
fmt.Fprintf(b.w, "%d, ", v.Index(i).Interface())
}
}
b.p("\n}")
}
// writeStringSlice writes a slice of strings. This produces a lot
// of overhead. It should typically only be used for debugging.
// TODO: remove
func (b *builder) writeStringSlice(name string, ss []string) {
b.comment(name)
t := reflect.TypeOf(ss).Elem()
sz := len(ss) * int(t.Size())
for _, s := range ss {
sz += len(s)
}
b.addArraySize(sz, len(ss))
b.pf(`var %s = [%d]%s{`, name, len(ss), t)
for i := 0; i < len(ss); i++ {
b.pf("\t%q,", ss[i])
}
b.p("}")
}
func (b *builder) writeString(name, s string) {
b.comment(name)
b.addSize(len(s) + int(reflect.TypeOf(s).Size()))
if len(s) < 40 {
b.pf(`var %s string = %q`, name, s)
return
}
const cpl = 60
b.pf(`var %s string = "" +`, name)
for {
n := cpl
if n > len(s) {
n = len(s)
}
var q string
for {
q = strconv.Quote(s[:n])
if len(q) <= cpl+2 {
break
}
n--
}
if n < len(s) {
b.pf(` %s +`, q)
s = s[n:]
} else {
b.pf(` %s`, q)
break
}
}
}
const base = 'z' - 'a' + 1
func strToInt(s string) uint {
v := uint(0)
for i := 0; i < len(s); i++ {
v *= base
v += uint(s[i] - 'a')
}
return v
}
func (b *builder) writeBitVector(name string, ss []string) {
vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8)))
for _, s := range ss {
v := strToInt(s)
vec[v/8] |= 1 << (v % 8)
}
b.writeSlice(name, vec)
}
// TODO: convert this type into a list or two-stage trie.
func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) {
b.comment(name)
v := reflect.ValueOf(m)
sz := v.Len() * (2 + int(v.Type().Key().Size()))
for _, k := range m {
sz += len(k)
}
b.addSize(sz)
keys := []string{}
b.pf(`var %s = map[string]uint16{`, name)
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
b.pf("\t%q: %v,", k, f(m[k]))
}
b.p("}")
}
func (b *builder) langIndex(s string) uint16 {
if i, ok := b.lang.find(s); ok {
return uint16(i)
}
return uint16(strToInt(s)) + uint16(len(b.lang.s))
}
// inc advances the string to its lexicographical successor.
func inc(s string) string {
i := len(s) - 1
for ; s[i]+1 > 'z'; i-- {
}
return fmt.Sprintf("%s%s%s", s[:i], string(s[i]+1), s[i+1:])
}
func (b *builder) parseIndices() {
meta := b.supp.Metadata
for k, v := range b.registry {
var ss *stringSet
switch v.typ {
case "language":
if len(k) == 2 || v.suppressScript != "" || v.scope == "special" {
b.lang.add(k)
continue
} else {
ss = &b.langNoIndex
}
case "region":
ss = &b.region
case "script":
ss = &b.script
default:
continue
}
if s := strings.SplitN(k, "..", 2); len(s) > 1 {
for a := s[0]; a <= s[1]; a = inc(a) {
ss.add(a)
}
} else {
ss.add(k)
}
}
// currency codes
for _, reg := range b.supp.CurrencyData.Region {
for _, cur := range reg.Currency {
b.currency.add(cur.Iso4217)
}
}
// common locales
b.locale.parse(meta.DefaultContent.Locales)
}
// writeLanguage generates all tables needed for language canonicalization.
func (b *builder) writeLanguage() {
meta := b.supp.Metadata
b.writeConst("unknownLang", b.lang.index("und"))
// Get language codes that need to be mapped (overlong 3-letter codes, deprecated
// 2-letter codes and grandfathered tags.
langOldMap := stringSet{}
// Mappings for macro languages
langMacroMap := stringSet{}
// altLangISO3 get the alternative ISO3 names that need to be mapped.
altLangISO3 := stringSet{}
// legacyTag maps from tag to language code.
legacyTag := make(map[string]string)
lang := b.lang.clone()
for _, a := range meta.Alias.LanguageAlias {
if a.Replacement == "" {
a.Replacement = "und"
}
// TODO: support mapping to tags
repl := strings.SplitN(a.Replacement, "_", 2)[0]
if a.Reason == "overlong" {
if len(a.Replacement) == 2 && len(a.Type) == 3 {
lang.updateLater(a.Replacement, a.Type)
}
} else if len(a.Type) <= 3 {
if a.Reason != "deprecated" {
langMacroMap.add(a.Type)
langMacroMap.updateLater(a.Type, repl)
}
} else {
legacyTag[strings.Replace(a.Type, "_", "-", -1)] = repl
}
}
for k, v := range b.registry {
// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
langOldMap.add(k)
langOldMap.updateLater(k, v.preferred)
}
}
// Fix CLDR mappings.
lang.updateLater("tl", "tgl")
lang.updateLater("sh", "hbs")
lang.updateLater("mo", "mol")
lang.updateLater("no", "nor")
lang.updateLater("tw", "twi")
lang.updateLater("nb", "nob")
lang.updateLater("ak", "aka")
// Ensure that each 2-letter code is matched with a 3-letter code.
for _, v := range lang.s {
s, ok := lang.update[v]
if !ok {
if s, ok = lang.update[langOldMap.update[v]]; !ok {
continue
}
lang.update[v] = s
}
if v[0] != s[0] {
altLangISO3.add(s)
altLangISO3.updateLater(s, v)
}
}
// Complete canonialized language tags.
lang.freeze()
for i, v := range lang.s {
// We can avoid these manual entries by using the IANI registry directly.
// Seems easier to update the list manually, as changes are rare.
// The panic in this loop will trigger if we miss an entry.
add := ""
if s, ok := lang.update[v]; ok {
if s[0] == v[0] {
add = s[1:]
} else {
add = string([]byte{0, byte(altLangISO3.index(s))})
}
} else if len(v) == 3 {
add = "\x00"
} else {
log.Panicf("no data for long form of %q", v)
}
lang.s[i] += add
}
b.writeString("lang", lang.join())
b.writeConst("langNoIndexOffset", len(b.lang.s))
// space of all valid 3-letter language identifiers.
b.writeBitVector("langNoIndex", b.langNoIndex.slice())
for i, s := range altLangISO3.slice() {
idx := b.lang.index(altLangISO3.update[s])
altLangISO3.s[i] += string([]byte{byte(idx)})
}
b.writeString("altLangISO3", altLangISO3.join())
makeMap := func(name string, ss *stringSet) {
ss.sortFunc(func(i, j string) bool {
return b.langIndex(i) < b.langIndex(j)
})
m := []struct{ from, to uint16 }{}
for _, s := range ss.s {
m = append(m, struct{ from, to uint16 }{
b.langIndex(s),
b.langIndex(ss.update[s]),
})
}
b.writeSlice(name, m)
}
makeMap("langOldMap", &langOldMap)
makeMap("langMacroMap", &langMacroMap)
b.writeMapFunc("tagAlias", legacyTag, func(s string) uint16 {
return uint16(b.langIndex(s))
})
}
func (b *builder) writeScript() {
unknown := uint8(b.script.index("Zzzz"))
b.writeConst("unknownScript", unknown)
b.writeString("script", b.script.join())
supp := make([]uint8, len(b.lang.slice()))
for i, v := range b.lang.slice() {
supp[i] = unknown
if sc := b.registry[v].suppressScript; sc != "" {
supp[i] = uint8(b.script.index(sc))
}
}
b.writeSlice("suppressScript", supp)
}
func parseM49(s string) uint16 {
if len(s) == 0 {
return 0
}
v, err := strconv.ParseUint(s, 10, 10)
failOnError(err)
return uint16(v)
}
func (b *builder) writeRegion() {
b.writeConst("unknownRegion", b.region.index("ZZ"))
isoOffset := b.region.index("AA")
m49map := make([]uint16, len(b.region.slice()))
altRegionISO3 := ""
altRegionIDs := []uint16{}
b.writeConst("isoRegionOffset", isoOffset)
// 2-letter region lookup and mapping to numeric codes.
regionISO := b.region.clone()
regionISO.s = regionISO.s[isoOffset:]
regionISO.sorted = false
for _, tc := range b.supp.CodeMappings.TerritoryCodes {
i := regionISO.index(tc.Type)
if len(tc.Alpha3) == 3 {
if tc.Alpha3[0] == tc.Type[0] {
regionISO.s[i] += tc.Alpha3[1:]
} else {
regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))})
altRegionISO3 += tc.Alpha3
altRegionIDs = append(altRegionIDs, uint16(isoOffset+i))
}
}
if d := m49map[isoOffset+i]; d != 0 {
log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d)
}
m49map[isoOffset+i] = parseM49(tc.Numeric)
}
for i, s := range regionISO.s {
if len(s) != 4 {
regionISO.s[i] = s + " "
}
}
b.writeString("regionISO", regionISO.join())
b.writeString("altRegionISO3", altRegionISO3)
b.writeSlice("altRegionIDs", altRegionIDs)
// 3-digit region lookup, groupings.
for i := 0; i < isoOffset; i++ {
m49map[i] = parseM49(b.region.s[i])
}
b.writeSlice("m49", m49map)
}
func (b *builder) writeLocale() {
b.writeStringSlice("locale", b.locale.slice())
}
func (b *builder) writeLanguageInfo() {
}
func (b *builder) writeCurrencies() {
unknown := b.currency.index("XXX")
digits := map[string]uint64{}
rounding := map[string]uint64{}
for _, info := range b.supp.CurrencyData.Fractions[0].Info {
var err error
digits[info.Iso4217], err = strconv.ParseUint(info.Digits, 10, 2)
failOnError(err)
rounding[info.Iso4217], err = strconv.ParseUint(info.Rounding, 10, 6)
failOnError(err)
}
for i, cur := range b.currency.slice() {
d := uint64(2) // default number of decimal positions
if dd, ok := digits[cur]; ok {
d = dd
}
var r uint64
if r = rounding[cur]; r == 0 {
r = 1 // default rounding increment in units 10^{-digits)
}
b.currency.s[i] += string([]byte{byte(r<<2 + d)})
}
b.writeString("currency", b.currency.join())
// Hack alert: gofmt indents a trailing comment after an indented string.
// Write this constant after currency to force a proper indentation of
// the final comment.
b.writeConst("unknownCurrency", unknown)
}
func (b *builder) writeRegionInclusionData() {
type index uint
groups := make(map[int]index)
// Create group indices.
for i := 0; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID.
groups[i] = index(i)
}
for _, g := range b.supp.TerritoryContainment.Group {
group := b.region.index(g.Type)
if _, ok := groups[group]; !ok {
groups[group] = index(len(groups))
}
}
if len(groups) > 32 {
log.Fatalf("only 32 groups supported, found %d", len(groups))
}
b.writeConst("nRegionGroups", len(groups))
mm := make(map[int][]index)
for _, g := range b.supp.TerritoryContainment.Group {
group := b.region.index(g.Type)
for _, mem := range strings.Split(g.Contains, " ") {
r := b.region.index(mem)
mm[r] = append(mm[r], groups[group])
if g, ok := groups[r]; ok {
mm[group] = append(mm[group], g)
}
}
}
regionInclusion := make([]uint8, len(b.region.s))
bvs := make(map[uint32]index)
// Make the first bitvector positions correspond with the groups.
for r, i := range groups {
bv := uint32(1 << i)
for _, g := range mm[r] {
bv |= 1 << g
}
bvs[bv] = i
regionInclusion[r] = uint8(bvs[bv])
}
for r := 0; r < len(b.region.s); r++ {
if _, ok := groups[r]; !ok {
bv := uint32(0)
for _, g := range mm[r] {
bv |= 1 << g
}
if bv == 0 {
// Pick the world for unspecified regions.
bv = 1 << groups[b.region.index("001")]
}
if _, ok := bvs[bv]; !ok {
bvs[bv] = index(len(bvs))
}
regionInclusion[r] = uint8(bvs[bv])
}
}
b.writeSlice("regionInclusion", regionInclusion)
regionInclusionBits := make([]uint32, len(bvs))
for k, v := range bvs {
regionInclusionBits[v] = uint32(k)
}
// Add bit vectors for increasingly large distances until a fixed point is reached.
regionInclusionNext := []uint8{}
for i := 0; i < len(regionInclusionBits); i++ {
bits := regionInclusionBits[i]
next := bits
for i := uint(0); i < uint(len(groups)); i++ {
if bits&(1<<i) != 0 {
next |= regionInclusionBits[i]
}
}
if _, ok := bvs[next]; !ok {
bvs[next] = index(len(bvs))
regionInclusionBits = append(regionInclusionBits, next)
}
regionInclusionNext = append(regionInclusionNext, uint8(bvs[next]))
}
b.writeSlice("regionInclusionBits", regionInclusionBits)
b.writeSlice("regionInclusionNext", regionInclusionNext)
}
var header = `// Generated by running
// maketables -url=%s -iana=%s
// DO NOT EDIT
package locale
`
func main() {
flag.Parse()
b := newBuilder()
fmt.Fprintf(b.out, header, *url, *iana)
b.parseIndices()
b.writeLanguage()
b.writeScript()
b.writeRegion()
// TODO: b.writeLocale()
b.writeCurrencies()
b.writeRegionInclusionData()
fmt.Fprintf(b.out, "\n// Size: %.1fK (%d bytes); Check: %X\n", float32(b.size)/1024, b.size, b.hash32.Sum32())
}

29
locale/match.go Normal file
Просмотреть файл

@ -0,0 +1,29 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package locale
// regionDistance computes the distance between two regions based
// on the distance in the graph of region containments as defined in CLDR.
// It iterates over increasingly inclusive sets of groups, represented as
// bit vectors, until the source bit vector has bits in common with the
// destination vector.
func regionDistance(a, b regionID) int {
if a == b {
return 0
}
p, q := regionInclusion[a], regionInclusion[b]
if p < nRegionGroups {
p, q = q, p
}
set := regionInclusionBits
if q < nRegionGroups && set[p]&(1<<q) != 0 {
return 1
}
d := 2
for goal := set[q]; set[p]&goal == 0; p = regionInclusionNext[p] {
d++
}
return d
}

36
locale/match_test.go Normal file
Просмотреть файл

@ -0,0 +1,36 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package locale
import "testing"
func TestRegionDistance(t *testing.T) {
tests := []struct {
a, b string
d int
}{
{"NL", "NL", 0},
{"NL", "EU", 1},
{"EU", "NL", 1},
{"005", "005", 0},
{"NL", "BE", 2},
{"CO", "005", 1},
{"005", "CO", 1},
{"CO", "419", 2},
{"419", "CO", 2},
{"005", "419", 1},
{"419", "005", 1},
{"001", "013", 2},
{"013", "001", 2},
{"CO", "CW", 4},
{"CO", "PW", 6},
{"CO", "BV", 6},
}
for i, tt := range tests {
if d := regionDistance(getRegionID([]byte(tt.a)), getRegionID([]byte(tt.b))); d != tt.d {
t.Errorf("%d: d(%s, %s) = %v; want %v", i, tt.a, tt.b, d, tt.d)
}
}
}

557
locale/parse.go Normal file
Просмотреть файл

@ -0,0 +1,557 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package locale
import (
"bytes"
"errors"
"fmt"
"sort"
"strings"
)
// isAlpha returns true if the byte is not a digit.
// b must be an ASCII letter or digit.
func isAlpha(b byte) bool {
return b > '9'
}
// isAlphaNum returns true if the string contains ASCII letters or digits.
func isAlphaNum(s []byte) bool {
for _, c := range s {
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
return false
}
}
return true
}
var (
errEmpty = errors.New("locale: empty locale identifier")
errInvalid = errors.New("locale: invalid")
errTrailSep = errors.New("locale: trailing separator")
)
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
type scanner struct {
b []byte
bytes [64]byte // small buffer to cover most common cases
token []byte
start int // start position of the current token
end int // end position of the current token
next int // next point for scan
err error
done bool
}
func makeScannerString(s string) scanner {
scan := scanner{}
if len(s) <= len(scan.bytes) {
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
} else {
scan.b = []byte(s)
}
scan.init()
return scan
}
func (s *scanner) init() {
for i, c := range s.b {
if c == '_' {
s.b[i] = '-'
}
}
s.scan()
}
// restToLower converts the string between start and end to lower case.
func (s *scanner) toLower(start, end int) {
for i := start; i < end; i++ {
c := s.b[i]
if 'A' <= c && c <= 'Z' {
s.b[i] += 'a' - 'A'
}
}
}
func (s *scanner) setError(e error) {
if s.err == nil {
s.err = e
}
}
func (s *scanner) setErrorf(f string, x ...interface{}) {
s.setError(fmt.Errorf(f, x...))
}
// replace replaces the current token with repl.
func (s *scanner) replace(repl string) {
if end := s.start + len(repl); end != s.end {
diff := end - s.end
if end < cap(s.b) {
b := make([]byte, len(s.b)+diff)
copy(b, s.b[:s.start])
copy(b[end:], s.b[s.end:])
s.b = b
} else {
s.b = append(s.b[end:], s.b[s.end:]...)
}
s.next += diff
s.end = end
}
copy(s.b[s.start:], repl)
}
// gobble removes the current token from the input.
// Caller must call scan after calling gobble.
func (s *scanner) gobble() {
if s.start == 0 {
s.b = s.b[:+copy(s.b, s.b[s.next:])]
s.end = 0
} else {
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
s.end = s.start - 1
}
s.next = s.start
}
// scan parses the next token of a BCP 47 string. Tokens that are larger
// than 8 characters or include non-alphanumeric characters result in an error
// and are gobbled and removed from the output.
// It returns the end position of the last token consumed.
func (s *scanner) scan() (end int) {
end = s.end
s.token = nil
for s.start = s.next; s.next < len(s.b); {
i := bytes.IndexByte(s.b[s.next:], '-')
if i == -1 {
s.end = len(s.b)
s.next = len(s.b)
i = s.end - s.start
} else {
s.end = s.next + i
s.next = s.end + 1
}
token := s.b[s.start:s.end]
if i < 1 || i > 8 || !isAlphaNum(token) {
s.setErrorf("locale: invalid token %q", token)
s.gobble()
continue
}
s.token = token
return end
}
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
s.setError(errTrailSep)
s.b = s.b[:len(s.b)-1]
}
s.done = true
return end
}
// acceptMinSize parses multiple tokens of the given size or greater.
// It returns the end position of the last token consumed.
func (s *scanner) acceptMinSize(min int) (end int) {
end = s.end
s.scan()
for ; len(s.token) >= min; s.scan() {
end = s.end
}
return end
}
// Parse parses the given BCP 47 string and returns a valid ID.
// If parsing failed it returns an error and any part of the identifier
// that could be parsed.
// If parsing succeeded but an unknown option was found, it
// returns the valid Locale and an error.
// It accepts identifiers in the BCP 47 format and extensions to this standard
// defined in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
func Parse(s string) (loc ID, err error) {
// TODO: consider supporting old-style locale key-value pairs.
if s == "" {
return und, errEmpty
}
loc = und
if lang, ok := tagAlias[s]; ok {
loc.lang = langID(lang)
return
}
scan := makeScannerString(s)
if len(scan.token) >= 4 {
if !strings.EqualFold(s, "root") {
return und, errInvalid
}
return und, nil
}
return parse(&scan, s)
}
func parse(scan *scanner, s string) (loc ID, err error) {
loc = und
var end int
private := false
if n := len(scan.token); n <= 1 {
scan.toLower(0, len(scan.b))
end = parsePrivate(scan)
private = end > 0
} else if n >= 4 {
return und, errInvalid
} else { // the usual case
loc, end = parseTag(scan)
if n := len(scan.token); n == 1 {
loc.pExt = uint16(end)
end = parseExtensions(scan)
if end-int(loc.pExt) <= 1 {
loc.pExt = 0
}
}
}
if end < len(scan.b) {
scan.setErrorf("locale: invalid parts %q", scan.b[end:])
scan.b = scan.b[:end]
}
if len(scan.b) <= len(s) {
s = s[:len(scan.b)]
}
if len(s) > 0 && cmp(s, scan.b) == 0 {
loc.str = &s
} else if loc.pVariant > 0 || loc.pExt > 0 || private {
s = string(scan.b)
loc.str = &s
}
return loc, scan.err
}
// parseTag parses language, script, region and variants.
// It returns an ID and the end position in the input that was parsed.
func parseTag(scan *scanner) (ID, int) {
loc := und
// TODO: set an error if an unknown lang, script or region is encountered.
loc.lang = getLangID(scan.token)
scan.replace(loc.lang.String())
langStart := scan.start
end := scan.scan()
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>.
if lang := getLangID(scan.token); lang != unknownLang {
loc.lang = lang
copy(scan.b[langStart:], lang.String())
scan.b[langStart+3] = '-'
scan.start = langStart + 4
}
scan.gobble()
end = scan.scan()
}
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
loc.script = getScriptID(script, scan.token)
if loc.script == unknownScript {
scan.gobble()
}
end = scan.scan()
}
if n := len(scan.token); n >= 2 && n <= 3 {
loc.region = getRegionID(scan.token)
if loc.region == unknownRegion {
scan.gobble()
} else {
scan.replace(loc.region.String())
}
end = scan.scan()
}
scan.toLower(scan.start, len(scan.b))
start := scan.start
end = parseVariants(scan, end)
if start < end {
loc.pVariant = byte(start)
loc.pExt = uint16(end)
}
return loc, end
}
// parseVariants scans tokens as long as each token is a valid variant string.
// Duplicate variants are removed.
func parseVariants(scan *scanner, end int) int {
start := scan.start
for ; len(scan.token) >= 4; scan.scan() {
// TODO: validate and sort variants
if bytes.Index(scan.b[start:scan.start], scan.token) != -1 {
scan.gobble()
continue
}
end = scan.end
const maxVariantSize = 60000 // more than enough, ensures pExt will be valid.
if end > maxVariantSize {
break
}
}
return end
}
type bytesSort [][]byte
func (b bytesSort) Len() int {
return len(b)
}
func (b bytesSort) Swap(i, j int) {
b[i], b[j] = b[j], b[i]
}
func (b bytesSort) Less(i, j int) bool {
return bytes.Compare(b[i], b[j]) == -1
}
// parseExtensions parses and normalizes the extensions in the buffer.
// It returns the last position of scan.b that is part of any extension.
func parseExtensions(scan *scanner) int {
start := scan.start
exts := [][]byte{}
private := []byte{}
end := scan.end
for len(scan.token) == 1 {
start := scan.start
extension := []byte{}
ext := scan.token[0]
switch ext {
case 'u':
attrEnd := scan.acceptMinSize(3)
end = attrEnd
var key []byte
for last := []byte{}; len(scan.token) == 2; last = key {
key = scan.token
end = scan.acceptMinSize(3)
// TODO: check key value validity
if bytes.Compare(key, last) != 1 {
p := attrEnd + 1
scan.next = p
keys := [][]byte{}
for scan.scan(); len(scan.token) == 2; {
keyStart := scan.start
end = scan.acceptMinSize(3)
keys = append(keys, scan.b[keyStart:end])
}
sort.Sort(bytesSort(keys))
copy(scan.b[p:], bytes.Join(keys, []byte{'-'}))
break
}
}
case 't':
scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan)
scan.toLower(start, end)
}
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
end = scan.acceptMinSize(3)
}
case 'x':
end = scan.acceptMinSize(1)
default:
end = scan.acceptMinSize(2)
}
extension = scan.b[start:end]
if len(extension) < 3 {
scan.setErrorf("locale: empty extension %q", string(ext))
continue
} else if len(exts) == 0 && (ext == 'x' || scan.next >= len(scan.b)) {
return end
} else if ext == 'x' {
private = extension
break
}
exts = append(exts, extension)
}
if scan.next < len(scan.b) {
scan.setErrorf("locale: invalid trailing characters %q", scan.b[scan.end:])
}
sort.Sort(bytesSort(exts))
if len(private) > 0 {
exts = append(exts, private)
}
scan.b = append(scan.b[:start], bytes.Join(exts, []byte{'-'})...)
return len(scan.b)
}
func parsePrivate(scan *scanner) int {
if len(scan.token) == 0 || scan.token[0] != 'x' {
scan.setErrorf("locale: invalid locale %q", scan.b)
return scan.start
}
return parseExtensions(scan)
}
// A Part identifies a part of the locale identifier string.
type Part byte
const (
TagPart Part = iota // The identifier excluding extensions.
LanguagePart
ScriptPart
RegionPart
VariantPart
)
var partNames = []string{"Tag", "Language", "Script", "Region", "Variant"}
func (p Part) String() string {
if p > VariantPart {
return string(p)
}
return partNames[p]
}
// Extension returns the Part identifier for extension e, which must be 0-9 or a-z.
func Extension(e byte) Part {
return Part(e)
}
var (
errLang = errors.New("locale: invalid Language")
errScript = errors.New("locale: invalid Script")
errRegion = errors.New("locale: invalid Region")
)
// Compose returns a Locale composed from the given parts or an error
// if any of the strings for the parts are ill-formed.
func Compose(m map[Part]string) (loc ID, err error) {
loc = und
var scan scanner
scan.b = scan.bytes[:0]
add := func(p Part) {
if s, ok := m[p]; ok {
if len(scan.b) > 0 {
scan.b = append(scan.b, '-')
}
if p > VariantPart {
scan.b = append(scan.b, byte(p), '-')
}
scan.b = append(scan.b, s...)
}
}
for p := TagPart; p <= VariantPart; p++ {
if p == TagPart && m[p] != "" {
for i := LanguagePart; i <= VariantPart; i++ {
if _, ok := m[i]; ok {
return und, fmt.Errorf("locale: cannot specify both Tag and %s", partNames[i])
}
}
}
add(p)
}
for p := Part('0'); p < Part('9'); p++ {
add(p)
}
for p := Part('a'); p < Part('w'); p++ {
add(p)
}
for p := Part('y'); p < Part('z'); p++ {
add(p)
}
add(Part('x'))
scan.init()
return parse(&scan, "")
}
// Part returns the part of the locale identifer indicated by t.
// The one-letter section identifier, if applicable, is not included.
// Components are separated by a '-'.
func (loc ID) Part(p Part) string {
s := ""
switch p {
case TagPart:
s = loc.String()
if loc.pExt > 0 {
s = s[:loc.pExt]
}
case LanguagePart:
s = loc.lang.String()
case ScriptPart:
if loc.script != unknownScript {
s = loc.script.String()
}
case RegionPart:
if loc.region != unknownRegion {
s = loc.region.String()
}
case VariantPart:
if loc.pVariant > 0 {
s = (*loc.str)[loc.pVariant:loc.pExt]
}
default:
if loc.pExt > 0 {
str := *loc.str
for i := int(loc.pExt); i < len(str); {
end, name, ext := getExtension(str, i)
if name == byte(p) {
return ext
}
i = end
}
} else if p == 'x' && loc.str != nil && strings.HasPrefix(*loc.str, "x-") {
return (*loc.str)[2:]
}
}
return s
}
// Parts returns all parts of the locale identifier in a map.
func (loc ID) Parts() map[Part]string {
m := make(map[Part]string)
m[LanguagePart] = loc.lang.String()
if loc.script != unknownScript {
m[ScriptPart] = loc.script.String()
}
if loc.region != unknownRegion {
m[RegionPart] = loc.region.String()
}
if loc.str != nil {
s := *loc.str
if strings.HasPrefix(s, "x-") {
m[Extension('x')] = s[2:]
} else if loc.pExt > 0 {
i := int(loc.pExt)
if int(loc.pVariant) != i && loc.pVariant > 0 {
m[VariantPart] = s[loc.pVariant:i]
}
for i < len(s) {
end, name, ext := getExtension(s, i)
m[Extension(name)] = ext
i = end
}
}
}
return m
}
// getExtension returns the name, body and end position of the extension.
func getExtension(s string, p int) (end int, name byte, ext string) {
p++
if s[p] == 'x' {
return len(s), s[p], s[p+2:]
}
end = nextExtension(s, p)
return end, s[p], s[p+2 : end]
}
// nextExtension finds the next extension within the string, searching
// for the -<char>- pattern from position p.
// In the fast majority of cases, locale identifiers will have at most
// one extension and extensions tend to be small.
func nextExtension(s string, p int) int {
for n := len(s) - 3; p < n; {
if s[p] == '-' {
if s[p+2] == '-' {
return p
}
p += 3
} else {
p++
}
}
return len(s)
}

354
locale/parse_test.go Normal file
Просмотреть файл

@ -0,0 +1,354 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package locale
import (
"bytes"
"fmt"
"strings"
"testing"
)
type scanTest struct {
ok bool // true if scanning does not result in an error
in string
tok []string // the expected tokens
}
var tests = []scanTest{
{true, "", []string{}},
{true, "1", []string{"1"}},
{true, "en", []string{"en"}},
{true, "root", []string{"root"}},
{true, "maxchars", []string{"maxchars"}},
{false, "bad/", []string{}},
{false, "morethan8", []string{}},
{false, "-", []string{}},
{false, "----", []string{}},
{false, "_", []string{}},
{true, "en-US", []string{"en", "US"}},
{true, "en_US", []string{"en", "US"}},
{false, "en-US-", []string{"en", "US"}},
{false, "en-US--", []string{"en", "US"}},
{false, "en-US---", []string{"en", "US"}},
{false, "en--US", []string{"en", "US"}},
{false, "-en-US", []string{"en", "US"}},
{false, "-en--US-", []string{"en", "US"}},
{false, "-en--US-", []string{"en", "US"}},
{false, "en-.-US", []string{"en", "US"}},
{false, ".-en--US-.", []string{"en", "US"}},
{false, "en-u.-US", []string{"en", "US"}},
{true, "en-u1-US", []string{"en", "u1", "US"}},
{true, "maxchar1_maxchar2-maxchar3", []string{"maxchar1", "maxchar2", "maxchar3"}},
{false, "moreThan8-moreThan8-e", []string{"e"}},
}
func TestScan(t *testing.T) {
for i, tt := range tests {
scan := makeScannerString(tt.in)
for j := 0; !scan.done; j++ {
if j >= len(tt.tok) {
t.Errorf("%d: extra token %q", i, scan.token)
} else if cmp(tt.tok[j], scan.token) != 0 {
t.Errorf("%d: token %d: found %q; want %q", i, j, scan.token, tt.tok[j])
break
}
scan.scan()
}
if s := strings.Join(tt.tok, "-"); cmp(s, bytes.Replace(scan.b, b("_"), b("-"), -1)) != 0 {
t.Errorf("%d: input: found %q; want %q", i, scan.b, s)
}
if (scan.err == nil) != tt.ok {
t.Errorf("%d: ok: found %v; want %v", i, scan.err == nil, tt.ok)
}
}
}
func TestAcceptMinSize(t *testing.T) {
for i, tt := range tests {
// count number of successive tokens with a minimum size.
for sz := 1; sz <= 8; sz++ {
scan := makeScannerString(tt.in)
scan.end, scan.next = 0, 0
end := scan.acceptMinSize(sz)
n := 0
for i := 0; i < len(tt.tok) && len(tt.tok[i]) >= sz; i++ {
n += len(tt.tok[i])
if i > 0 {
n++
}
}
if end != n {
t.Errorf("%d:%d: found len %d; want %d", i, sz, end, n)
}
}
}
}
type parseTest struct {
i int // the index of this test
in string
lang, script, region string
variants, ext string
extList []string // only used when more than one extension is present
invalid bool
rewrite bool // special rewrite not handled by parseTag
changed bool // string needed to be reformatted
}
func parseTests() []parseTest {
var manyVars string
for i := 0; i < 50; i++ {
manyVars += fmt.Sprintf("-abc%02d", i)
}
tests := []parseTest{
{in: "root", lang: "und", changed: true},
{in: "und", lang: "und"},
{in: "en", lang: "en"},
{in: "xy", lang: "und", changed: true},
{in: "gsw", lang: "gsw"},
{in: "sr_Latn", lang: "sr", script: "Latn", changed: true},
{in: "af-Arab", lang: "af", script: "Arab"},
{in: "nl-BE", lang: "nl", region: "BE"},
{in: "es-419", lang: "es", region: "419"},
{in: "und-001", lang: "und", region: "001"},
{in: "de-latn-be", lang: "de", script: "Latn", region: "BE", changed: true},
{in: "de-1994", lang: "de", variants: "1994"},
{in: "nl-abcde-abcde", lang: "nl", variants: "abcde"},
{in: "nl" + manyVars, lang: "nl", variants: manyVars[1:]},
{in: "nl" + manyVars + manyVars, lang: "nl", variants: manyVars[1:]},
{in: "EN_CYRL", lang: "en", script: "Cyrl", changed: true},
// private use and extensions
{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
{in: "en-nedix-u-co-phonebk", lang: "en", variants: "nedix", ext: "u-co-phonebk"},
{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-def-abc-co-phonebk-cu-xua", changed: true},
{in: "en-u-def-abc", lang: "en", ext: "u-def-abc"},
{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
{in: "en-t-en-Cyrl-NL-1994", lang: "en", ext: "t-en-cyrl-nl-1994", changed: true},
{in: "en-t-en-Cyrl-NL-1994-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-1994-t0-abc-def", changed: true},
{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
// Not necessary to have changed here.
{in: "en-t-nl-abcd", lang: "en", ext: "t-nl"},
{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
// invalid
{in: "", lang: "und", invalid: true, changed: true},
{in: "-", lang: "und", invalid: true, changed: true},
{in: "x", lang: "und", invalid: true, changed: true},
{in: "x-", lang: "und", invalid: true, changed: true},
{in: "x--", lang: "und", invalid: true, changed: true},
{in: "a-a-b-c-d", lang: "und", invalid: true, changed: true},
{in: "en-", lang: "en", invalid: true},
{in: "enne-", lang: "und", invalid: true, changed: true},
{in: "en.", lang: "und", invalid: true, changed: true},
{in: "en.-latn", lang: "und", invalid: true, changed: true},
{in: "en.-en", lang: "en", invalid: true},
{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
{in: "a-tooManyChars-c-d", lang: "und", invalid: true, changed: true},
// TODO: check key-value validity
// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
{in: "en-t-abcd", lang: "en", invalid: true},
{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
// rewrites
{in: "zh-min", lang: "und", rewrite: true, changed: true},
{in: "zh-min-nan", lang: "nan", changed: true},
{in: "zh-yue", lang: "yue", changed: true},
{in: "zh-xiang", lang: "hsn", rewrite: true, changed: true},
{in: "zh-guoyu", lang: "zh", rewrite: true, changed: true},
{in: "iw", lang: "iw", changed: false},
{in: "sgn-BE-FR", lang: "sfb", rewrite: true, changed: true},
{in: "i-klingon", lang: "tlh", rewrite: true, changed: true},
}
for i, tt := range tests {
tests[i].i = i
if tt.extList != nil {
tests[i].ext = strings.Join(tt.extList, "-")
}
if tt.ext != "" && tt.extList == nil {
tests[i].extList = []string{tt.ext}
}
}
return tests
}
func TestParseExtensions(t *testing.T) {
for i, tt := range parseTests() {
if tt.ext == "" || tt.rewrite {
continue
}
scan := makeScannerString(tt.in)
if len(scan.b) > 1 && scan.b[1] != '-' {
scan.end = nextExtension(string(scan.b), 0)
scan.next = scan.end + 1
scan.scan()
}
start := scan.start
scan.toLower(start, len(scan.b))
parseExtensions(&scan)
ext := string(scan.b[start:])
if ext != tt.ext {
t.Errorf("%d: ext was %v; want %v", i, ext, tt.ext)
}
if changed := !strings.HasPrefix(tt.in[start:], ext); changed != tt.changed {
t.Errorf("%d: changed was %v; want %v", i, changed, tt.changed)
}
}
}
// partChecks runs checks for each part by calling the function returned by f.
func partChecks(t *testing.T, f func(*parseTest) func(Part) string) {
for i, tt := range parseTests() {
get := f(&tt)
if get == nil {
continue
}
if s, g := get(LanguagePart), getLangID(b(tt.lang)).String(); s != g {
t.Errorf("%d: lang was %q; want %q", i, s, g)
}
if s, g := get(ScriptPart), tt.script; s != g {
t.Errorf("%d: script was %q; want %q", i, s, g)
}
if s, g := get(RegionPart), tt.region; s != g {
t.Errorf("%d: region was %q; want %q", i, s, g)
}
if s, g := get(VariantPart), tt.variants; s != g {
t.Errorf("%d: variants was %q; want %q", i, s, g)
}
for _, g := range tt.extList {
if s := get(Extension(g[0])); s != g[2:] {
t.Errorf("%d: extension '%c' was %q; want %q", i, g[0], s, g[2:])
}
}
if s := get(Extension('q')); s != "" {
t.Errorf(`%d: unused extension 'q' was %q; want ""`, s)
}
}
}
func TestParseTag(t *testing.T) {
partChecks(t, func(tt *parseTest) func(Part) string {
if strings.HasPrefix(tt.in, "x-") || tt.rewrite {
return nil
}
scan := makeScannerString(tt.in)
id, end := parseTag(&scan)
s := string(scan.b[:end])
if changed := !strings.HasPrefix(tt.in, s); changed != tt.changed && tt.ext == "" {
t.Errorf("%d: changed was %v; want %v", tt.i, changed, tt.changed)
}
id.str = &s
tt.ext = ""
tt.extList = []string{}
return func(p Part) string {
return id.Part(p)
}
})
}
func TestParse(t *testing.T) {
partChecks(t, func(tt *parseTest) func(Part) string {
id, err := Parse(tt.in)
ext := ""
if id.str != nil {
if strings.HasPrefix(*id.str, "x-") {
ext = *id.str
} else if int(id.pExt) < len(*id.str) && id.pExt > 0 {
ext = (*id.str)[id.pExt+1:]
}
}
if ext != tt.ext {
t.Errorf("%d: ext was %q; want %q", tt.i, ext, tt.ext)
}
changed := id.str == nil || !strings.HasPrefix(tt.in, *id.str)
if changed != tt.changed {
t.Errorf("%d: changed was %v; want %v", tt.i, changed, tt.changed)
}
if (err != nil) != tt.invalid {
t.Errorf("%d: invalid was %v; want %v. Error: %v", tt.i, err != nil, tt.invalid, err)
}
return func(p Part) string {
return id.Part(p)
}
})
}
func TestPart(t *testing.T) {
partChecks(t, func(tt *parseTest) func(Part) string {
id, _ := Parse(tt.in)
return func(p Part) string {
return id.Part(p)
}
})
}
func TestParts(t *testing.T) {
partChecks(t, func(tt *parseTest) func(Part) string {
id, _ := Parse(tt.in)
m := id.Parts()
return func(p Part) string {
return m[p]
}
})
}
func TestCompose1(t *testing.T) {
partChecks(t, func(tt *parseTest) func(Part) string {
m := make(map[Part]string)
set := func(p Part, s string) {
if s != "" {
m[p] = strings.ToUpper(s)
}
}
set(LanguagePart, tt.lang)
set(ScriptPart, tt.script)
set(RegionPart, tt.region)
if tt.variants != "" {
m[VariantPart] = tt.variants + "-tooManyChars-inv@lid-" + tt.variants
}
for _, ext := range tt.extList {
set(Extension(ext[0]), ext[2:])
}
id, err := Compose(m)
if tt.variants != "" && err == nil {
t.Errorf("%d: no error for invalid variant", tt.i)
}
return func(p Part) string {
return id.Part(p)
}
})
}
func TestCompose2(t *testing.T) {
partChecks(t, func(tt *parseTest) func(Part) string {
m := make(map[Part]string)
tag := tt.lang
for _, s := range []string{tt.script, tt.region, tt.variants} {
if s != "" {
tag += "-" + s
}
}
m[TagPart] = tag
for _, ext := range tt.extList {
m[Extension(ext[0])] = ext[2:] + "-tooManyChars"
}
id, err := Compose(m)
if len(tt.extList) > 0 && err == nil {
t.Errorf("%d: no error for invalid variant", tt.i)
}
return func(p Part) string {
return id.Part(p)
}
})
}

612
locale/tables.go Normal file
Просмотреть файл

@ -0,0 +1,612 @@
// Generated by running
// maketables -url=http://www.unicode.org/Public/cldr/23/core.zip -iana=http://www.iana.org/assignments/language-subtag-registry
// DO NOT EDIT
package locale
const unknownLang = 196
// lang holds an alphabetically sorted list of BCP 47 language identifiers.
// All entries are 4 bytes. The index of the identifier (divided by 4) is the language ID.
// For 2-byte language identifiers, the two successive bytes have the following meaning:
// - if the first letter of the 2- and 3-letter ISO codes are the same:
// the second and third letter of the 3-letter ISO code.
// - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
// For 3-byte language identifiers the 4th byte is 0.
// Size: 868 bytes
var lang string = "" +
"aaarabbkaeveaffrakkaammhanrgarraassmavvaayymazzebaakbeelbgul" +
"bhihbiisbmambnenboodbrrebsoscaatcehechhacooscrrecsescuhucvhv" +
"cyymdaandeeudsb\x00dvivdzzoeeweelllenngeopoes\x00\x04etsteuu" +
"sfaasffulfiinfjijfoaofrrafrr\x00frs\x00fyrygalegdlagllggnrng" +
"sw\x00guujgvlvhaauheebhiinhomohrrvhsb\x00htathuunhyyehzerian" +
"aidndieleigboiiiiikpkinndiodoisslittaiukuiw\x00\x02japnji" +
"\x00\x05jvavjwavkaatkgonkiikkjuakkazklalkmhmknankoorkok\x00k" +
"rauksaskuurkvomkw\x00\x00kyirlaatlbtzlgugliimlninloaoltitluu" +
"blvavmai\x00men\x00mglgmhahmirimis\x00mkkdmlalmnonmoolmrarms" +
"samtltmul\x00myyanaaunbobnddends\x00neepngdoniu\x00nlldnnnon" +
"oornqo\x00nrblnso\x00nvavnyyaocciojjiomrmorriossspaanpiliplo" +
"lpsusptorquuermohrnunroonruusrw\x00\x03saanscrdsdndsemesgags" +
"h\x00\x01siinsklksllvsmmosnnasoomsqqisrrpssswstotsuunsvwesww" +
"ataamteeltem\x00tggkthhatiirtkuktkl\x00tlgltmh\x00tnsntoontp" +
"i\x00trurtssottattvl\x00twwityahugigukkrund\x00urrduzzbveenv" +
"iievoolwalnwoolxhhoyiidyoorzahazbl\x00zhhozuulzxx\x00\xff" +
"\xff\xff\xff"
const langNoIndexOffset = 212
// langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
// in lookup tables. The language ids for these language codes are derived directly
// from the letters and are not consecutive.
// Size: 2197 bytes, 2197 elements
var langNoIndex = [2197]uint8{
255, 253, 253, 254, 239, 255, 191, 219, 251, 255, 254, 250,
247, 31, 60, 87, 111, 151, 115, 248, 255, 255, 255, 112,
191, 3, 255, 255, 207, 5, 133, 98, 233, 255, 253, 127,
255, 255, 255, 119, 255, 255, 255, 255, 255, 255, 255, 227,
233, 255, 255, 255, 77, 184, 2, 122, 190, 255, 255, 255,
254, 255, 247, 255, 255, 255, 255, 223, 43, 244, 241, 240,
93, 231, 159, 20, 5, 32, 223, 237, 159, 63, 201, 33,
248, 191, 238, 255, 255, 255, 255, 255, 255, 127, 255, 255,
255, 255, 127, 253, 255, 255, 255, 247, 127, 255, 255, 255,
255, 255, 255, 231, 191, 255, 255, 223, 255, 239, 255, 255,
255, 255, 191, 255, 255, 255, 255, 223, 255, 255, 243, 255,
251, 47, 255, 255, 255, 254, 255, 255, 251, 255, 255, 247,
255, 255, 253, 255, 255, 255, 127, 223, 255, 255, 223, 254,
255, 255, 223, 255, 255, 223, 251, 255, 255, 254, 255, 255,
255, 255, 255, 247, 127, 191, 249, 213, 173, 127, 64, 255,
156, 193, 67, 44, 8, 36, 65, 0, 80, 68, 0, 128,
187, 255, 242, 159, 180, 66, 69, 214, 155, 52, 136, 244,
123, 231, 23, 86, 85, 125, 14, 28, 55, 113, 243, 239,
159, 255, 93, 40, 101, 8, 0, 16, 188, 255, 191, 255,
223, 247, 119, 55, 62, 135, 199, 223, 255, 0, 129, 0,
176, 5, 128, 0, 0, 0, 0, 3, 64, 0, 0, 146,
33, 208, 255, 125, 255, 222, 254, 94, 4, 0, 2, 100,
141, 25, 193, 223, 123, 34, 0, 0, 0, 223, 109, 222,
38, 229, 217, 241, 254, 255, 253, 207, 159, 20, 1, 12,
134, 0, 193, 0, 240, 197, 103, 91, 86, 137, 94, 183,
237, 239, 3, 0, 2, 0, 0, 0, 192, 119, 218, 87,
144, 105, 1, 44, 86, 123, 244, 255, 127, 127, 0, 0,
0, 1, 8, 70, 0, 0, 0, 176, 20, 7, 81, 18,
10, 0, 0, 0, 0, 0, 17, 73, 0, 0, 96, 16,
0, 0, 0, 16, 0, 0, 68, 4, 0, 16, 128, 4,
24, 0, 0, 4, 0, 128, 40, 4, 0, 0, 16, 213,
45, 16, 100, 53, 36, 83, 245, 212, 189, 194, 205, 1,
0, 128, 0, 64, 0, 0, 0, 0, 0, 4, 23, 57,
1, 217, 87, 137, 33, 152, 167, 0, 0, 1, 64, 130,
0, 0, 0, 4, 0, 0, 0, 2, 1, 64, 0, 64,
0, 0, 176, 254, 171, 57, 0, 2, 0, 0, 0, 4,
0, 0, 0, 0, 0, 32, 0, 64, 4, 0, 0, 0,
2, 0, 0, 0, 16, 129, 168, 5, 0, 0, 0, 0,
4, 32, 4, 166, 8, 4, 0, 8, 1, 80, 0, 0,
8, 49, 134, 64, 0, 0, 0, 0, 64, 0, 3, 117,
2, 16, 8, 4, 0, 0, 0, 224, 59, 179, 19, 0,
128, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 255, 255, 255, 255, 255, 223, 206, 131, 162,
192, 255, 223, 37, 207, 31, 197, 3, 16, 32, 178, 197,
166, 69, 37, 155, 3, 79, 248, 223, 3, 148, 64, 16,
1, 14, 0, 227, 145, 84, 155, 56, 241, 125, 247, 109,
249, 255, 255, 125, 4, 8, 0, 1, 33, 18, 60, 95,
253, 15, 133, 79, 64, 64, 0, 0, 255, 253, 255, 214,
232, 27, 244, 55, 163, 13, 0, 0, 32, 123, 57, 2,
5, 132, 0, 240, 255, 127, 254, 0, 24, 4, 129, 0,
0, 0, 128, 16, 148, 28, 1, 0, 0, 0, 0, 0,
16, 64, 0, 4, 8, 180, 254, 165, 12, 64, 0, 0,
17, 4, 4, 108, 0, 96, 240, 255, 251, 127, 230, 24,
5, 159, 223, 110, 3, 0, 17, 0, 0, 0, 64, 4,
149, 166, 128, 40, 4, 0, 4, 81, 226, 255, 253, 63,
5, 9, 8, 5, 64, 0, 0, 0, 0, 16, 0, 0,
8, 0, 0, 0, 0, 161, 2, 108, 229, 72, 20, 136,
32, 192, 71, 128, 7, 0, 0, 0, 204, 80, 64, 36,
133, 71, 132, 64, 32, 16, 0, 0, 2, 80, 136, 17,
0, 209, 140, 238, 80, 19, 29, 17, 105, 6, 89, 235,
51, 8, 0, 32, 5, 64, 16, 0, 0, 0, 16, 68,
150, 73, 214, 93, 167, 129, 69, 151, 251, 0, 16, 0,
8, 0, 128, 0, 64, 69, 0, 1, 2, 0, 1, 64,
128, 0, 6, 8, 240, 235, 247, 57, 132, 153, 22, 0,
0, 12, 4, 1, 32, 32, 221, 162, 1, 0, 0, 0,
18, 68, 0, 0, 4, 16, 240, 157, 149, 19, 0, 128,
0, 0, 208, 18, 64, 0, 16, 240, 144, 98, 76, 210,
2, 1, 10, 0, 70, 4, 0, 8, 2, 0, 32, 192,
0, 128, 6, 0, 8, 0, 0, 0, 0, 240, 216, 239,
21, 2, 8, 0, 0, 1, 0, 0, 0, 0, 16, 1,
0, 16, 0, 0, 0, 255, 215, 227, 253, 255, 255, 255,
255, 255, 127, 255, 255, 254, 255, 255, 255, 255, 255, 255,
255, 255, 255, 223, 255, 251, 255, 255, 219, 253, 255, 255,
127, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
253, 255, 223, 191, 220, 255, 255, 255, 255, 255, 255, 255,
255, 254, 251, 255, 255, 255, 255, 255, 255, 255, 254, 255,
253, 255, 255, 255, 255, 255, 255, 255, 239, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 123, 253, 221, 223, 255,
188, 152, 5, 40, 255, 7, 240, 255, 255, 127, 0, 8,
0, 195, 61, 27, 6, 230, 114, 240, 255, 124, 63, 68,
34, 0, 159, 107, 14, 253, 255, 87, 242, 255, 63, 255,
242, 30, 133, 247, 255, 255, 71, 128, 1, 2, 0, 0,
64, 85, 159, 138, 217, 217, 14, 17, 133, 81, 208, 243,
255, 119, 0, 1, 5, 209, 88, 72, 0, 0, 0, 16,
4, 2, 0, 32, 10, 128, 123, 182, 253, 254, 254, 255,
255, 255, 255, 255, 255, 239, 255, 255, 223, 127, 255, 255,
255, 255, 255, 255, 255, 255, 255, 247, 255, 255, 219, 119,
255, 255, 127, 255, 255, 255, 239, 255, 189, 255, 255, 251,
255, 255, 255, 223, 127, 253, 255, 247, 255, 255, 247, 255,
255, 255, 251, 255, 239, 255, 255, 255, 255, 255, 127, 223,
247, 191, 239, 247, 255, 255, 255, 255, 255, 255, 255, 255,
254, 255, 255, 127, 255, 255, 255, 255, 255, 252, 255, 253,
127, 255, 255, 158, 190, 255, 238, 255, 127, 247, 127, 2,
130, 4, 255, 255, 255, 255, 215, 239, 255, 255, 247, 254,
226, 158, 231, 255, 247, 255, 86, 189, 201, 254, 255, 255,
255, 255, 239, 255, 253, 247, 125, 15, 167, 81, 4, 68,
3, 208, 85, 174, 166, 253, 189, 255, 67, 92, 91, 255,
255, 255, 63, 32, 20, 0, 87, 81, 130, 101, 245, 76,
226, 255, 255, 223, 64, 5, 197, 5, 0, 34, 0, 116,
105, 16, 8, 4, 65, 0, 1, 6, 0, 0, 0, 0,
0, 81, 96, 5, 4, 1, 0, 0, 6, 1, 32, 0,
24, 1, 146, 177, 253, 103, 75, 6, 148, 0, 87, 237,
251, 76, 157, 123, 131, 4, 98, 64, 0, 21, 66, 0,
0, 0, 84, 131, 249, 95, 16, 140, 201, 70, 223, 247,
19, 49, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0,
0, 10, 16, 0, 1, 64, 0, 240, 223, 253, 191, 125,
186, 207, 255, 191, 66, 20, 132, 97, 176, 255, 93, 122,
4, 2, 0, 65, 45, 20, 37, 247, 237, 241, 191, 239,
63, 0, 0, 2, 199, 224, 30, 252, 187, 255, 253, 251,
247, 253, 117, 253, 255, 252, 245, 237, 71, 244, 127, 16,
1, 1, 196, 127, 255, 247, 221, 249, 95, 5, 134, 235,
245, 119, 189, 61, 0, 0, 0, 67, 112, 66, 0, 64,
0, 0, 1, 67, 25, 0, 8, 0, 255, 255, 255, 3,
0, 0, 8, 0, 0, 32, 0, 0, 128, 0, 0, 0,
2, 0, 0, 8, 0, 0, 32, 0, 0, 128, 0, 0,
0, 2, 0, 0, 8, 0, 0, 32, 0, 0, 128, 0,
0, 0, 2, 0, 0, 8, 0, 0, 32, 0, 0, 128,
0, 0, 0, 2, 0, 0, 8, 0, 0, 32, 0, 0,
128, 239, 189, 231, 87, 238, 19, 93, 9, 193, 64, 33,
250, 23, 1, 128, 0, 0, 0, 0, 240, 254, 255, 191,
0, 35, 0, 32, 0, 0, 8, 0, 0, 48, 181, 227,
16, 0, 0, 0, 17, 36, 22, 0, 1, 2, 16, 131,
163, 1, 80, 0, 1, 131, 17, 8, 0, 0, 0, 240,
223, 255, 127, 18, 170, 16, 127, 216, 82, 0, 128, 32,
0, 0, 0, 0, 64, 16, 2, 2, 9, 0, 16, 66,
0, 97, 95, 156, 49, 0, 0, 0, 1, 84, 2, 0,
0, 0, 0, 0, 66, 1, 0, 0, 0, 191, 223, 255,
255, 255, 255, 63, 223, 94, 207, 189, 191, 175, 255, 255,
127, 75, 64, 16, 241, 253, 239, 253, 247, 255, 255, 251,
223, 255, 111, 241, 123, 241, 127, 255, 127, 255, 238, 247,
239, 191, 255, 219, 255, 223, 255, 253, 126, 191, 87, 247,
111, 129, 118, 31, 220, 247, 253, 255, 255, 255, 251, 254,
255, 31, 87, 31, 239, 95, 16, 24, 98, 254, 255, 159,
21, 159, 21, 15, 125, 70, 125, 161, 130, 241, 247, 126,
255, 255, 255, 255, 255, 253, 221, 255, 191, 253, 246, 95,
254, 31, 64, 152, 2, 255, 227, 255, 243, 246, 254, 223,
255, 223, 127, 80, 30, 5, 123, 180, 223, 190, 255, 255,
247, 247, 255, 247, 127, 255, 255, 254, 219, 247, 215, 249,
239, 47, 128, 191, 197, 255, 255, 255, 255, 159, 255, 255,
255, 255, 253, 191, 223, 127, 6, 29, 87, 255, 248, 219,
93, 199, 125, 22, 185, 234, 107, 160, 28, 32, 0, 48,
2, 4, 36, 72, 4, 0, 0, 64, 212, 6, 4, 0,
0, 4, 0, 4, 0, 48, 1, 6, 80, 0, 8, 0,
0, 0, 36, 0, 4, 0, 16, 140, 88, 213, 73, 15,
20, 79, 241, 22, 68, 81, 10, 10, 64, 0, 0, 64,
0, 8, 0, 0, 0, 220, 255, 235, 31, 88, 8, 65,
4, 160, 4, 0, 48, 18, 64, 34, 0, 16, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 128, 16, 16, 191,
111, 147, 0, 1, 0, 0, 0, 0, 0, 0, 0, 192,
128, 45, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
192, 134, 194, 2, 0, 0, 0, 1, 223, 24, 0, 0,
18, 240, 255, 121, 63, 0, 37, 0, 0, 0, 10, 0,
0, 0, 0, 0, 0, 64, 0, 16, 3, 0, 9, 32,
0, 0, 1, 0, 0, 131, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
255, 255, 207, 126, 174, 17, 16, 0, 0, 146, 0, 4,
141, 241, 94, 0, 1, 0, 48, 20, 4, 85, 16, 1,
4, 246, 63, 122, 5, 4, 0, 176, 128, 0, 69, 85,
151, 125, 159, 113, 204, 120, 85, 67, 244, 87, 103, 20,
1, 0, 0, 0, 0, 0, 44, 247, 219, 31, 80, 96,
3, 72, 5, 16, 139, 56, 186, 1, 0, 0, 48, 0,
36, 68, 0, 0, 0, 3, 16, 2, 1, 0, 0, 240,
149, 255, 215, 65, 156, 48, 214, 120, 122, 17, 64, 0,
164, 132, 233, 65, 0, 0, 0, 35, 40, 18, 116, 0,
232, 48, 144, 42, 18, 0, 0, 0, 255, 239, 255, 127,
133, 83, 244, 239, 255, 255, 50, 152, 131, 76, 245, 66,
80, 221, 95, 20, 0, 128, 192, 68, 140, 22, 159, 251,
55, 125, 237, 127, 189, 36, 175, 1, 68, 24, 1, 85,
72, 2, 8, 16, 40, 0, 128, 0, 16, 32, 36, 0,
255, 255, 255, 111, 254, 1, 6, 136, 10, 0, 22, 1,
1, 21, 43, 62, 1, 0, 0, 16, 128, 41, 68, 2,
2, 0, 225, 191, 191, 3, 0, 0, 16, 212, 167, 209,
84, 158, 68, 223, 253, 143, 102, 179, 85, 32, 212, 195,
216, 48, 61, 128, 0, 0, 0, 76, 180, 16, 193, 132,
110, 80, 0, 34, 16, 127, 191, 219, 7, 0, 32, 16,
128, 178, 5, 16, 0, 64, 0, 0, 16, 2, 17, 0,
240, 255, 253, 63, 5, 0, 18, 129, 0, 0, 0, 8,
0, 16, 12, 2, 0, 0, 0, 0, 131, 48, 2, 40,
132, 0, 51, 192, 35, 36, 0, 0, 0, 203, 228, 58,
66, 200, 20, 241, 255, 255, 127, 22, 1, 1, 132, 80,
7, 252, 255, 255, 15, 1, 0, 64, 16, 56, 1, 1,
28, 18, 64, 225, 118, 22, 8, 3, 16, 0, 0, 0,
1, 0, 0, 0, 0, 0, 32, 36, 10, 64, 128, 0,
0,
}
// altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
// to 2-letter language codes that cannot be derived using the method described above.
// Each 3-letter code is followed by its 1-byte langID.
// Size: 44 bytes
var altLangISO3 string = "corchbs\xa1hebPkin\x9bspa(yidR\xff\xff\xff\xff"
// langOldMap maps deprecated langIDs to their suggested replacements.
// Size: 108 bytes, 27 elements
var langOldMap = [27]struct {
from uint16
to uint16
}{
{from: 0x4b, to: 0x46},
{from: 0x50, to: 0x3c},
{from: 0x52, to: 0xcd},
{from: 0x54, to: 0x53},
{from: 0x77, to: 0x99},
{from: 0x35b, to: 0x253d},
{from: 0x465, to: 0xa85},
{from: 0x660, to: 0x2ec4},
{from: 0x717, to: 0x21fc},
{from: 0x720, to: 0x765},
{from: 0x75e, to: 0x3dcb},
{from: 0xa81, to: 0x1bfc},
{from: 0xa90, to: 0x2a3c},
{from: 0x10c1, to: 0x93d},
{from: 0x151b, to: 0x18a3},
{from: 0x1616, to: 0x2752},
{from: 0x1bdf, to: 0x1c7f},
{from: 0x1e24, to: 0x2a07},
{from: 0x226b, to: 0x2256},
{from: 0x2307, to: 0x2256},
{from: 0x3090, to: 0x1472},
{from: 0x33d4, to: 0x2dca},
{from: 0x340e, to: 0x3548},
{from: 0x3434, to: 0x3b62},
{from: 0x3457, to: 0x2a3c},
{from: 0x4051, to: 0x2ec4},
{from: 0x416c, to: 0x1fab},
}
// langMacroMap maps languages to their macro language replacement, if applicable.
// Size: 260 bytes, 65 elements
var langMacroMap = [65]struct {
from uint16
to uint16
}{
{from: 0x86, to: 0x7e},
{from: 0xa1, to: 0xa9},
{from: 0xb7, to: 0xee3},
{from: 0xc0, to: 0x4},
{from: 0x1d2, to: 0x1a53},
{from: 0x204, to: 0xa8},
{from: 0x28f, to: 0x7},
{from: 0x355, to: 0xa},
{from: 0x367, to: 0xb},
{from: 0x3ae, to: 0x383},
{from: 0x3b7, to: 0x452},
{from: 0x5d8, to: 0x2000},
{from: 0x5df, to: 0x580},
{from: 0x73d, to: 0x32dd},
{from: 0x761, to: 0xd1},
{from: 0x85b, to: 0x1a},
{from: 0x96a, to: 0xa34},
{from: 0x979, to: 0x22d1},
{from: 0x99a, to: 0x99d},
{from: 0x9a0, to: 0x4562},
{from: 0xc72, to: 0x29},
{from: 0xca6, to: 0x2091},
{from: 0xd42, to: 0x4a},
{from: 0xe1b, to: 0x4},
{from: 0x1012, to: 0x2c},
{from: 0x10c5, to: 0x8e},
{from: 0x10d4, to: 0x1267},
{from: 0x120c, to: 0x1225},
{from: 0x12ba, to: 0x37},
{from: 0x131c, to: 0x10c6},
{from: 0x13ab, to: 0x1358},
{from: 0x13b8, to: 0x1495},
{from: 0x142c, to: 0x322d},
{from: 0x16fc, to: 0x4f},
{from: 0x1bfc, to: 0x76},
{from: 0x1c85, to: 0x61},
{from: 0x1c90, to: 0x5f},
{from: 0x1c94, to: 0x56},
{from: 0x1c9b, to: 0x5e},
{from: 0x1cd7, to: 0x62},
{from: 0x1e04, to: 0x4cc},
{from: 0x2014, to: 0x6d},
{from: 0x214b, to: 0x6de},
{from: 0x229b, to: 0x2dc1},
{from: 0x24b6, to: 0x81},
{from: 0x26bc, to: 0x8d},
{from: 0x279e, to: 0x8f},
{from: 0x289e, to: 0x94},
{from: 0x28ea, to: 0x2b},
{from: 0x29a1, to: 0x70},
{from: 0x29c3, to: 0x1de7},
{from: 0x2d79, to: 0x96},
{from: 0x2f08, to: 0x2f30},
{from: 0x31fa, to: 0x1c67},
{from: 0x3218, to: 0x9d},
{from: 0x329f, to: 0xae},
{from: 0x34fe, to: 0xb8},
{from: 0x36f0, to: 0x933},
{from: 0x383b, to: 0xc6},
{from: 0x3f1a, to: 0x1cc6},
{from: 0x3f6f, to: 0x935},
{from: 0x4085, to: 0xcd},
{from: 0x42e0, to: 0x42e7},
{from: 0x44b8, to: 0x79},
{from: 0x4549, to: 0xcf},
}
// tagAlias holds a mapping from legacy and grandfathered tags to their locale ID.
// Size: 497 bytes
var tagAlias = map[string]uint16{
"aa-SAAHO": 12872,
"art-lojban": 6336,
"i-ami": 532,
"i-bnn": 1239,
"i-hak": 4954,
"i-klingon": 13349,
"i-lux": 102,
"i-navajo": 138,
"i-pwn": 10937,
"i-tao": 13070,
"i-tay": 13080,
"i-tsu": 13544,
"no-BOKMAL": 126,
"no-NYNORSK": 133,
"no-bok": 126,
"no-nyn": 133,
"sgn-BE-FR": 12511,
"sgn-BE-NL": 14583,
"sgn-CH-DE": 12542,
"zh-guoyu": 209,
"zh-hakka": 4954,
"zh-min": 196,
"zh-min-nan": 9013,
"zh-xiang": 5425,
}
const unknownScript = 186
// script is an alphabetically sorted list of ISO 15924 codes. The index
// of the script in the string, divided by 4, is the internal script ID.
// Size: 768 bytes
var script string = "" +
"AfakAghbArabArmiArmnAvstBaliBamuBassBatkBengBlisBopoBrahBrai" +
"BugiBuhdCakmCansCariChamCherCirtCoptCprtCyrlCyrsDevaDsrtDupl" +
"EgydEgyhEgypElbaEthiGeokGeorGlagGothGranGrekGujrGuruHangHani" +
"HanoHansHantHebrHiraHluwHmngHrktHungIndsItalJavaJpanJurcKali" +
"KanaKharKhmrKhojKndaKoreKpelKthiLanaLaooLatfLatgLatnLepcLimb" +
"LinaLinbLisuLomaLyciLydiMahjMandManiMayaMendMercMeroMlymMong" +
"MoonMrooMteiMymrNarbNbatNkgbNkooNshuOgamOlckOrkhOryaOsmaPalm" +
"PermPhagPhliPhlpPhlvPhnxPlrdPrtiQaaaQaabQaacQaadQaaeQaafQaag" +
"QaahQaaiQaajQaakQaalQaamQaanQaaoQaapQaaqQaarQaasQaatQaauQaav" +
"QaawQaaxQaayQaazRjngRoroRunrSamrSaraSarbSaurSgnwShawShrdSind" +
"SinhSoraSundSyloSyrcSyreSyrjSyrnTagbTakrTaleTaluTamlTangTavt" +
"TeluTengTfngTglgThaaThaiTibtTirhUgarVaiiVispWaraWoleXpeoXsux" +
"YiiiZinhZmthZsymZxxxZyyyZzzz\xff\xff\xff\xff"
// suppressScript is an index from langID to the dominant script for that language,
// if it exists. If a script is given, it should be suppressed from the language tag.
// Size: 212 bytes, 212 elements
var suppressScript = [212]uint8{
186, 25, 186, 72, 186, 34, 186, 2, 10, 186, 72, 186,
186, 25, 25, 186, 186, 186, 10, 186, 186, 72, 72, 186,
72, 186, 186, 72, 186, 186, 72, 72, 72, 72, 169, 171,
186, 40, 72, 72, 72, 72, 72, 2, 186, 72, 72, 72,
72, 72, 72, 72, 72, 186, 72, 72, 72, 41, 72, 186,
48, 27, 186, 72, 72, 72, 72, 4, 186, 186, 72, 186,
186, 186, 186, 72, 186, 72, 72, 186, 48, 57, 186, 186,
186, 36, 186, 186, 186, 25, 72, 62, 64, 65, 27, 186,
186, 186, 186, 186, 186, 72, 72, 186, 186, 72, 69, 72,
186, 72, 27, 72, 72, 72, 186, 186, 25, 88, 186, 72,
27, 72, 72, 186, 93, 72, 72, 72, 72, 27, 186, 72,
72, 72, 72, 97, 72, 72, 186, 72, 186, 186, 72, 102,
186, 42, 186, 72, 2, 72, 72, 72, 72, 72, 25, 72,
186, 186, 186, 186, 72, 186, 150, 72, 72, 72, 186, 72,
72, 186, 72, 72, 186, 72, 72, 162, 165, 72, 186, 170,
34, 186, 72, 72, 72, 72, 72, 72, 72, 72, 186, 72,
186, 186, 186, 25, 186, 2, 186, 72, 72, 186, 186, 186,
72, 48, 186, 186, 11, 186, 72, 186,
}
const unknownRegion = 338
// isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
// for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
// the UN.M49 codes used for groups.)
const isoRegionOffset = 30
// regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
// Each 2-letter codes is followed by two bytes with the following meaning:
// - [A-Z}{2}: the first letter of the 2-letter code plus these two
// letters form the 3-letter ISO code.
// - 0, n: index into altRegionISO3.
// Size: 1256 bytes
var regionISO string = "" +
"AAAAACSCADNDAEREAFFGAGTGAIIAALLBAMRMANNTAOGOAQTAARRGASSMATUT" +
"AUUSAWBWAXLAAZZEBAIHBBRBBDGDBEELBFFABGGRBHHRBIDIBJENBLLMBMMU" +
"BNRNBOOLBQESBRRABSHSBTTNBUURBVVTBWWABYLRBZLZCAANCCCKCDODCFAF" +
"CGOGCHHECIIVCKOKCLHLCMMRCNHNCOOLCPPTCRRICS\x00\x00CUUBCVPVCW" +
"UWCXXRCYYPCZZEDDDRDEEUDGGADJJIDKNKDMMADOOMDZZAEA ECCUEESTEG" +
"GYEHSHERRIESSPETTHEU\x00\x03FIINFJJIFKLKFMSMFOROFRRAFXXXGAAB" +
"GBBRGDRDGEEOGFUFGGGYGHHAGIIBGLRLGMMBGNINGPLPGQNQGRRCGS\x00" +
"\x06GTTMGUUMGWNBGYUYHKKGHMMDHNNDHRRVHTTIHUUNIC IDDNIERLILSR" +
"IMMNINNDIOOTIQRQIRRNISSLITTAJEEYJMAMJOORJPPNKEENKGGZKHHMKIIR" +
"KM\x00\tKNNAKP\x00\fKRORKWWTKY\x00\x0fKZAZLAAOLBBNLCCALIIELK" +
"KALRBRLSSOLTTULUUXLVVALYBYMAARMCCOMDDAMENEMFAFMGDGMHHLMKKDML" +
"LIMMMRMNNGMOACMPNPMQTQMRRTMSSRMTLTMUUSMVDVMWWIMXEXMYYSMZOZNA" +
"AMNCCLNEERNFFKNGGANIICNLLDNOORNPPLNRRUNTTZNUIUNZZLOMMNPAANPE" +
"ERPFYFPGNGPHHLPKAKPLOLPM\x00\x12PNCNPRRIPSSEPTRTPWLWPYRYQAAT" +
"QMMMQNNNQOOOQPPPQQQQQRRRQSSSQTTTQU QVVVQWWWQXXXQYYYQZZZREEU" +
"ROOURS\x00\x15RUUSRWWASAAUSBLBSCYCSDDNSEWESGGPSHHNSIVNSJJMSK" +
"VKSLLESMMRSNENSOOMSRURSSSDSTTPSUUNSVLVSXXMSYYRSZWZTAAATCCATD" +
"CDTF\x00\x18TGGOTHHATJJKTKKLTLLSTMKMTNUNTOONTPMPTRURTTTOTVUV" +
"TWWNTZZAUAKRUGGAUMMIUSSAUYRYUZZBVAATVCCTVEENVGGBVIIRVNNMVUUT" +
"WFLFWSSMXAAAXBBBXCCCXDDDXEEEXFFFXGGGXHHHXIIIXJJJXKKKXLLLXMMM" +
"XNNNXOOOXPPPXQQQXRRRXSSSXTTTXUUUXVVVXWWWXXXXXYYYXZZZYDMDYEEM" +
"YT\x00\x1bYUUGZAAFZMMBZRARZWWEZZZZ\xff\xff\xff\xff"
// altRegionISO3 holds a list of 3-letter region codes that cannot be
// mapped to 2-letter codes using the default algorithm. This is a short list.
// Size: 46 bytes
var altRegionISO3 string = "SCGQUUSGSCOMPRKCYMSPMSRBATFMYT"
// altRegionIDs holsd a list of regionIDs the positions of which match those
// of the 3-letter ISO codes in altRegionISO3.
// Size: 20 bytes, 10 elements
var altRegionIDs = [10]uint16{
85, 108, 130, 160, 162, 165, 222, 246, 274, 332,
}
// m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
// codes indicating collections of regions.
// Size: 678 bytes, 339 elements
var m49 = [339]uint16{
1, 2, 3, 5, 9, 11, 13, 14, 15, 17, 18, 19,
21, 29, 30, 34, 35, 39, 53, 54, 57, 61, 142, 143,
145, 150, 151, 154, 155, 419, 958, 0, 20, 784, 4, 28,
660, 8, 51, 530, 24, 10, 32, 16, 40, 36, 533, 248,
31, 70, 52, 50, 56, 854, 100, 48, 108, 204, 652, 60,
96, 68, 535, 76, 44, 64, 104, 74, 72, 112, 84, 124,
166, 180, 140, 178, 756, 384, 184, 152, 120, 156, 170, 0,
188, 891, 192, 132, 531, 162, 196, 203, 278, 276, 0, 262,
208, 212, 214, 12, 0, 218, 233, 818, 732, 232, 724, 231,
967, 246, 242, 238, 583, 234, 250, 249, 266, 826, 308, 268,
254, 831, 288, 292, 304, 270, 324, 312, 226, 300, 239, 320,
316, 624, 328, 344, 334, 340, 191, 332, 348, 0, 360, 372,
376, 833, 356, 86, 368, 364, 352, 380, 832, 388, 400, 392,
404, 417, 116, 296, 174, 659, 408, 410, 414, 136, 398, 418,
422, 662, 438, 144, 430, 426, 440, 442, 428, 434, 504, 492,
498, 499, 663, 450, 584, 807, 466, 104, 496, 446, 580, 474,
478, 500, 470, 480, 462, 454, 484, 458, 508, 516, 540, 562,
574, 566, 558, 528, 578, 524, 520, 536, 570, 554, 512, 591,
604, 258, 598, 608, 586, 616, 666, 612, 630, 275, 620, 585,
600, 634, 959, 960, 961, 962, 963, 964, 965, 966, 0, 968,
969, 970, 971, 972, 638, 642, 688, 643, 646, 682, 90, 690,
729, 752, 702, 654, 705, 744, 703, 694, 674, 686, 706, 740,
728, 678, 810, 222, 534, 760, 748, 0, 796, 148, 260, 768,
764, 762, 772, 626, 795, 788, 776, 626, 792, 780, 798, 158,
834, 804, 800, 581, 840, 858, 860, 336, 670, 862, 92, 850,
704, 548, 876, 882, 973, 974, 975, 976, 977, 978, 979, 980,
981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992,
993, 994, 995, 996, 997, 998, 720, 887, 175, 891, 710, 894,
180, 716, 999,
}
// currency holds an alphabetically sorted list of canonical 3-letter currency identifiers.
// Each identifier is followed by a byte of which the 6 most significant bits
// indicated the rounding and the least 2 significant bits indicate the
// number of decimal positions.
// Size: 1208 bytes
var currency string = "" +
"ADP\x04AED\x06AFA\x06AFN\x04ALK\x06ALL\x04AMD\x04ANG\x06AOA" +
"\x06AOK\x06AON\x06AOR\x06ARA\x06ARL\x06ARM\x06ARP\x06ARS\x06" +
"ATS\x06AUD\x06AWG\x06AZM\x06AZN\x06BAD\x06BAM\x06BAN\x06BBD" +
"\x06BDT\x06BEC\x06BEF\x06BEL\x06BGL\x06BGM\x06BGN\x06BGO\x06" +
"BHD\aBIF\x04BMD\x06BND\x06BOB\x06BOL\x06BOP\x06BOV\x06BRB" +
"\x06BRC\x06BRE\x06BRL\x06BRN\x06BRR\x06BRZ\x06BSD\x06BTN\x06" +
"BUK\x06BWP\x06BYB\x06BYR\x04BZD\x06CAD\x06CDF\x06CHE\x06CHF" +
"\x06CHW\x06CLE\x06CLF\x04CLP\x04CNX\x06CNY\x06COP\x04COU\x06" +
"CRC\x04CSD\x06CSK\x06CUC\x06CUP\x06CVE\x06CYP\x06CZK\x06DDM" +
"\x06DEM\x06DJF\x04DKK\x06DOP\x06DZD\x06ECS\x06ECV\x06EEK\x06" +
"EGP\x06ERN\x06ESA\x06ESB\x06ESP\x04ETB\x06EUR\x06FIM\x06FJD" +
"\x06FKP\x06FRF\x06GBP\x06GEK\x06GEL\x06GHC\x06GHS\x06GIP\x06" +
"GMD\x06GNF\x04GNS\x06GQE\x06GRD\x06GTQ\x06GWE\x06GWP\x06GYD" +
"\x04HKD\x06HNL\x06HRD\x06HRK\x06HTG\x06HUF\x04IDR\x04IEP\x06" +
"ILP\x06ILR\x06ILS\x06INR\x06IQD\x04IRR\x04ISJ\x06ISK\x04ITL" +
"\x04JMD\x06JOD\aJPY\x04KES\x06KGS\x06KHR\x06KMF\x04KPW\x04KR" +
"H\x06KRO\x06KRW\x04KWD\aKYD\x06KZT\x06LAK\x04LBP\x04LKR\x06L" +
"RD\x06LSL\x06LTL\x06LTT\x06LUC\x06LUF\x04LUL\x06LVL\x06LVR" +
"\x06LYD\aMAD\x06MAF\x06MCF\x06MDC\x06MDL\x06MGA\x04MGF\x04MK" +
"D\x06MKN\x06MLF\x06MMK\x04MNT\x04MOP\x06MRO\x04MTL\x06MTP" +
"\x06MUR\x04MVP\x06MVR\x06MWK\x06MXN\x06MXP\x06MXV\x06MYR\x06" +
"MZE\x06MZM\x06MZN\x06NAD\x06NGN\x06NIC\x06NIO\x06NLG\x06NOK" +
"\x06NPR\x06NZD\x06OMR\aPAB\x06PEI\x06PEN\x06PES\x06PGK\x06PH" +
"P\x06PKR\x04PLN\x06PLZ\x06PTE\x06PYG\x04QAR\x06RHD\x06ROL" +
"\x06RON\x06RSD\x04RUB\x06RUR\x06RWF\x04SAR\x06SBD\x06SCR\x06" +
"SDD\x06SDG\x06SDP\x06SEK\x06SGD\x06SHP\x06SIT\x06SKK\x06SLL" +
"\x04SOS\x04SRD\x06SRG\x06SSP\x06STD\x04SUR\x06SVC\x06SYP\x04" +
"SZL\x06THB\x06TJR\x06TJS\x06TMM\x04TMT\x06TND\aTOP\x06TPE" +
"\x06TRL\x04TRY\x06TTD\x06TWD\x06TZS\x04UAH\x06UAK\x06UGS\x06" +
"UGX\x04USD\x06USN\x06USS\x06UYI\x06UYP\x06UYU\x06UZS\x04VEB" +
"\x06VEF\x06VND\x04VNN\x06VUV\x04WST\x06XAF\x04XAG\x06XAU\x06" +
"XBA\x06XBB\x06XBC\x06XBD\x06XCD\x06XDR\x06XEU\x06XFO\x06XFU" +
"\x06XOF\x04XPD\x06XPF\x04XPT\x06XRE\x06XSU\x06XTS\x06XUA\x06" +
"XXX\x06YDD\x06YER\x04YUD\x06YUM\x06YUN\x06YUR\x06ZAL\x06ZAR" +
"\x06ZMK\x04ZMW\x06ZRN\x06ZRZ\x06ZWD\x04ZWL\x06ZWR\x06\xff" +
"\xff\xff\xff"
const unknownCurrency = 281
// nRegionGroups is the number of region groups. All regionIDs < nRegionGroups
// are groups.
const nRegionGroups = 32
// regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
// where each set holds all groupings that are directly connected in a region
// containment graph.
// Size: 339 bytes, 339 elements
var regionInclusion = [339]uint8{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 32, 33, 34, 35, 36, 37,
37, 34, 35, 37, 38, 33, 39, 40, 41, 42, 37, 43,
35, 34, 37, 36, 41, 44, 45, 35, 46, 44, 37, 47,
48, 39, 37, 39, 37, 36, 48, 33, 49, 50, 51, 47,
33, 38, 38, 38, 52, 44, 40, 39, 38, 53, 39, 33,
51, 34, 37, 44, 37, 33, 54, 45, 52, 41, 33, 46,
55, 37, 37, 56, 56, 39, 55, 56, 56, 46, 57, 46,
31, 55, 58, 39, 59, 43, 41, 52, 38, 55, 37, 35,
39, 43, 44, 34, 47, 44, 44, 37, 38, 57, 33, 51,
59, 44, 39, 53, 33, 51, 34, 37, 45, 56, 48, 55,
35, 43, 36, 33, 35, 36, 43, 57, 43, 37, 35, 53,
46, 60, 48, 59, 46, 37, 53, 53, 35, 37, 60, 48,
35, 37, 52, 36, 44, 49, 55, 41, 55, 56, 56, 52,
50, 34, 37, 46, 59, 34, 44, 48, 53, 53, 59, 37,
44, 37, 57, 46, 36, 46, 51, 48, 46, 49, 58, 44,
42, 44, 51, 41, 43, 36, 59, 35, 40, 42, 35, 51,
39, 40, 58, 48, 36, 45, 47, 40, 37, 35, 57, 59,
39, 35, 32, 32, 30, 32, 32, 32, 32, 32, 61, 32,
32, 32, 32, 32, 46, 45, 34, 50, 46, 35, 58, 46,
56, 55, 48, 44, 57, 43, 45, 44, 34, 44, 46, 39,
56, 38, 50, 51, 37, 35, 49, 33, 37, 38, 33, 44,
48, 60, 40, 48, 60, 56, 40, 48, 35, 37, 40, 53,
46, 50, 46, 33, 47, 39, 60, 34, 37, 39, 37, 37,
48, 58, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 35, 35, 46, 34, 49, 46,
38, 46, 32,
}
// regionInclusionBits is an array of bit vectors where every vector represents
// a set of region groupings. These sets are used to compute the distance
// between two regions for the purpos of locale matching.
// Size: 300 bytes, 75 elements
var regionInclusionBits = [75]uint32{
37750803, 1955, 14404, 536872968, 1077674001, 34, 536873028, 130, 258, 514, 1026, 536885325,
6148, 536881156, 4210688, 4227072, 4259840, 33685504, 262160, 524304, 1048592, 2097168, 29474817, 12582912,
20971520, 2650931201, 100663296, 167772160, 301989888, 536881224, 1073741840, 2181038080, 1, 1073741824, 131072, 16777216,
32768, 8192, 512, 8, 2097152, 2415919104, 262144, 134217728, 32, 2214592512, 128, 4096,
65536, 1024, 67108864, 64, 268435456, 16384, 2164260864, 2281701376, 256, 2147614720, 524288, 1048576,
8388608, 33554432, 4294967295, 37752755, 1115424787, 574634079, 63031315, 2655127571, 2449473536, 2248146944, 2202009600, 2315255808,
2181169152, 2680406017, 2680408083,
}
// regionInclusionNext marks, for each entry in regionInclusionBits, the set of
// all groups that are reachable from the groups set in the respective entry.
// Size: 75 bytes, 75 elements
var regionInclusionNext = [75]uint8{
62, 63, 11, 11, 64, 1, 11, 1, 1, 1, 1, 65,
11, 11, 22, 22, 22, 25, 4, 4, 4, 4, 66, 22,
22, 67, 25, 25, 25, 11, 4, 25, 0, 30, 17, 24,
15, 13, 9, 3, 21, 68, 18, 27, 5, 69, 7, 12,
16, 10, 26, 6, 28, 14, 70, 71, 8, 72, 19, 20,
23, 25, 62, 62, 62, 62, 62, 62, 25, 25, 73, 25,
25, 74, 62,
}
// Size: 8.7K (8876 bytes); Check: D7ACA2A7