зеркало из https://github.com/golang/text.git
go.text/locale: moved package from go.exp.
R=r CC=golang-dev https://golang.org/cl/9893043
This commit is contained in:
Родитель
809f98b91c
Коммит
3942ae31cd
|
@ -0,0 +1,16 @@
|
|||
# Copyright 2013 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
CLEANFILES+=maketables
|
||||
|
||||
maketables: maketables.go
|
||||
go build $^
|
||||
|
||||
tables: maketables
|
||||
./maketables > tables.go
|
||||
gofmt -w -s tables.go
|
||||
|
||||
# Build (but do not run) maketables during testing,
|
||||
# just to make sure it still compiles.
|
||||
testshort: maketables
|
|
@ -0,0 +1,90 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package locale_test
|
||||
|
||||
import (
|
||||
"code.google.com/p/go.text/locale"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func ExampleID_Canonicalize() {
|
||||
p := func(id string) {
|
||||
loc, _ := locale.Parse(id)
|
||||
fmt.Printf("BCP47(%s) -> %s\n", id, loc.Canonicalize(locale.BCP47))
|
||||
fmt.Printf("Macro(%s) -> %s\n", id, loc.Canonicalize(locale.Macro))
|
||||
}
|
||||
p("en-Latn")
|
||||
p("zh-cmn")
|
||||
p("bjd")
|
||||
p("iw-Latn-fonipa-u-cu-usd")
|
||||
// Output:
|
||||
// BCP47(en-Latn) -> en
|
||||
// Macro(en-Latn) -> en-Latn
|
||||
// BCP47(zh-cmn) -> cmn
|
||||
// Macro(zh-cmn) -> zh
|
||||
// BCP47(bjd) -> drl
|
||||
// Macro(bjd) -> bjd
|
||||
// BCP47(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
|
||||
// Macro(iw-Latn-fonipa-u-cu-usd) -> iw-Latn-fonipa-u-cu-usd
|
||||
}
|
||||
|
||||
func ExampleID_Parent() {
|
||||
loc := locale.Make("sl-Latn-IT-nedis")
|
||||
fmt.Println(loc.Parent())
|
||||
// TODO:Output: sl-Latn-IT
|
||||
}
|
||||
|
||||
func ExampleID_Written() {
|
||||
loc := locale.Make("sl-Latn-IT-nedis")
|
||||
fmt.Println(loc.Written())
|
||||
// TODO:Output: sl-Latn
|
||||
}
|
||||
|
||||
func ExampleID_Script() {
|
||||
en := locale.Make("en")
|
||||
sr := locale.Make("sr")
|
||||
fmt.Println(en.Script())
|
||||
fmt.Println(sr.Script())
|
||||
// TODO:Output:
|
||||
// Latn High
|
||||
// Cyrl Low
|
||||
}
|
||||
|
||||
func ExampleID_Part() {
|
||||
loc := locale.Make("sr-RS")
|
||||
script := loc.Part(locale.ScriptPart)
|
||||
region := loc.Part(locale.RegionPart)
|
||||
fmt.Printf("%q %q", script, region)
|
||||
// TODO:Output: "" "RS"
|
||||
}
|
||||
|
||||
func ExampleID_Scope() {
|
||||
loc := locale.Make("sr")
|
||||
set := loc.Scope()
|
||||
fmt.Println(set.Locales())
|
||||
fmt.Println(set.Languages())
|
||||
fmt.Println(set.Scripts())
|
||||
fmt.Println(set.Regions())
|
||||
// TODO:Output:
|
||||
// [sr_Cyrl sr_Cyrl_ME sr_Latn sr_Latn_ME sr_Cyrl_BA sr_Cyrl_RS sr_Latn_BA sr_Latn_RS]
|
||||
// [sr]
|
||||
// [Cyrl Latn]
|
||||
// [BA ME RS]
|
||||
}
|
||||
|
||||
func ExampleScript_Scope() {
|
||||
loc := locale.Make("zen-Tfng")
|
||||
script, _ := loc.Script()
|
||||
set := script.Scope()
|
||||
fmt.Println(set.Locales())
|
||||
fmt.Println(set.Languages())
|
||||
fmt.Println(set.Scripts())
|
||||
fmt.Println(set.Regions())
|
||||
// TODO:Output:
|
||||
// [shi shi-Tfng shi-Tfng_MA tzm]
|
||||
// [shi tzm zen]
|
||||
// [Tfng]
|
||||
// [MA]
|
||||
}
|
|
@ -0,0 +1,319 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// NOTE: This package is still under development. Parts of it are not yet implemented,
|
||||
// and the API is subject to change.
|
||||
//
|
||||
// The locale package provides a type to represent BCP 47 locale identifiers.
|
||||
// It supports various canonicalizations defined in CLDR.
|
||||
package locale
|
||||
|
||||
import "strings"
|
||||
|
||||
var (
|
||||
// Und represents the undefined langauge. It is also the root locale.
|
||||
Und = und
|
||||
En = en // Default Locale for English.
|
||||
En_US = en_US // Default locale for American English.
|
||||
De = de // Default locale for German.
|
||||
// TODO: list of most common language identifiers.
|
||||
)
|
||||
|
||||
var (
|
||||
Supported Set // All supported locales.
|
||||
Common Set // A selection of common locales.
|
||||
)
|
||||
|
||||
var (
|
||||
de = ID{lang: getLangID([]byte("de")), region: unknownRegion, script: unknownScript}
|
||||
en = ID{lang: getLangID([]byte("en")), region: unknownRegion, script: unknownScript}
|
||||
en_US = en
|
||||
und = ID{lang: unknownLang, region: unknownRegion, script: unknownScript}
|
||||
)
|
||||
|
||||
// ID represents a BCP 47 locale identifier. It can be used to
|
||||
// select an instance for a specific locale. All Locale values are guaranteed
|
||||
// to be well-formed.
|
||||
type ID struct {
|
||||
// In most cases, just lang, region and script will be needed. In such cases
|
||||
// str may be nil.
|
||||
lang langID
|
||||
region regionID
|
||||
script scriptID
|
||||
pVariant byte // offset in str
|
||||
pExt uint16 // offset of first extension
|
||||
str *string
|
||||
}
|
||||
|
||||
// Make calls Parse and Canonicalize and returns the resulting ID.
|
||||
// Any errors are ignored and a sensible default is returned.
|
||||
// In most cases, locale IDs should be created using this method.
|
||||
func Make(id string) ID {
|
||||
loc, _ := Parse(id)
|
||||
return loc.Canonicalize(All)
|
||||
}
|
||||
|
||||
// IsRoot returns true if loc is equal to locale "und".
|
||||
func (loc ID) IsRoot() bool {
|
||||
if loc.str != nil {
|
||||
n := len(*loc.str)
|
||||
if n > 0 && loc.pExt > 0 && int(loc.pExt) < n {
|
||||
return false
|
||||
}
|
||||
if uint16(loc.pVariant) != loc.pExt || strings.HasPrefix(*loc.str, "x-") {
|
||||
return false
|
||||
}
|
||||
loc.str = nil
|
||||
}
|
||||
return loc == und
|
||||
}
|
||||
|
||||
// CanonType is can be used to enable or disable various types of canonicalization.
|
||||
type CanonType int
|
||||
|
||||
const (
|
||||
// Replace deprecated values with their preferred ones.
|
||||
Deprecated CanonType = 1 << iota
|
||||
// Remove redundant scripts.
|
||||
SuppressScript
|
||||
// Map the dominant language of macro language group to the macro language identifier.
|
||||
// For example cmn -> zh.
|
||||
Macro
|
||||
// All canonicalizations prescribed by BCP 47.
|
||||
BCP47 = Deprecated | SuppressScript
|
||||
All = BCP47 | Macro
|
||||
|
||||
// TODO: LikelyScript, LikelyRegion: supress similar to ICU.
|
||||
)
|
||||
|
||||
// Canonicalize replaces the identifier with its canonical equivalent.
|
||||
func (loc ID) Canonicalize(t CanonType) ID {
|
||||
changed := false
|
||||
if t&SuppressScript != 0 {
|
||||
if loc.lang < langNoIndexOffset && uint8(loc.script) == suppressScript[loc.lang] {
|
||||
loc.script = unknownScript
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
if t&Deprecated != 0 {
|
||||
l := normLang(langOldMap[:], loc.lang)
|
||||
if l != loc.lang {
|
||||
changed = true
|
||||
}
|
||||
loc.lang = l
|
||||
}
|
||||
if t&Macro != 0 {
|
||||
l := normLang(langMacroMap[:], loc.lang)
|
||||
if l != loc.lang {
|
||||
changed = true
|
||||
}
|
||||
loc.lang = l
|
||||
}
|
||||
if changed && loc.str != nil {
|
||||
ext := ""
|
||||
if loc.pExt > 0 {
|
||||
ext = (*loc.str)[loc.pExt+1:]
|
||||
}
|
||||
s := loc.makeString(loc.Part(VariantPart), ext)
|
||||
loc.str = &s
|
||||
}
|
||||
return loc
|
||||
}
|
||||
|
||||
// Parent returns the direct parent for this locale, which is the locale
|
||||
// from which this locale inherits any undefined values.
|
||||
func (loc ID) Parent() ID {
|
||||
// TODO: implement
|
||||
return und
|
||||
}
|
||||
|
||||
// Written strips qualifiers from the identifier until the resulting identfier
|
||||
// inherits from root.
|
||||
func (loc ID) Written() ID {
|
||||
// TODO: implement
|
||||
return und
|
||||
}
|
||||
|
||||
// Confidence indicates the level of certainty for a given return value.
|
||||
// For example, Serbian may be written in cyrillic or latin script.
|
||||
// The confidence level indicates whether a value was explicitly specified,
|
||||
// whether it is typically the only possible value, or whether there is
|
||||
// an ambiguity.
|
||||
type Confidence int
|
||||
|
||||
const (
|
||||
Not Confidence = iota // full confidence that there was no match
|
||||
Low // most likely value picked out of a set of alternatives
|
||||
High // value inferred from a parent and is generally assumed to be the correct match
|
||||
Exact // exact match or explicitly specified value
|
||||
)
|
||||
|
||||
func (loc *ID) makeString(vars, ext string) string {
|
||||
buf := [128]byte{}
|
||||
n := loc.lang.stringToBuf(buf[:])
|
||||
if loc.script != unknownScript {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], loc.script.String())
|
||||
}
|
||||
if loc.region != unknownRegion {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], loc.region.String())
|
||||
}
|
||||
b := buf[:n]
|
||||
if vars != "" {
|
||||
b = append(b, '-')
|
||||
loc.pVariant = byte(len(b))
|
||||
b = append(b, vars...)
|
||||
loc.pExt = uint16(len(b))
|
||||
}
|
||||
if ext != "" {
|
||||
loc.pExt = uint16(len(b))
|
||||
b = append(b, '-')
|
||||
b = append(b, ext...)
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
// String returns the canonical string representation of the locale.
|
||||
func (loc ID) String() string {
|
||||
if loc.str == nil {
|
||||
return loc.makeString("", "")
|
||||
}
|
||||
return *loc.str
|
||||
}
|
||||
|
||||
// Language returns the language for the locale.
|
||||
func (loc ID) Language() Language {
|
||||
// TODO: implement
|
||||
return Language{0}
|
||||
}
|
||||
|
||||
// Script infers the script for the locale. If it was not explictly given, it will infer
|
||||
// a most likely candidate from the parent locales.
|
||||
// If more than one script is commonly used for a language, the most likely one
|
||||
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
|
||||
// for Serbian.
|
||||
// Note that an inferred script is never guaranteed to be the correct one. Latn is
|
||||
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
|
||||
// in the past. Also, the script that is commonly used may change over time.
|
||||
func (loc ID) Script() (Script, Confidence) {
|
||||
// TODO: implement
|
||||
return Script{0}, Exact
|
||||
}
|
||||
|
||||
// Region returns the region for l. If it was not explicitly given, it will
|
||||
// infer a most likely candidate from the parent locales.
|
||||
func (loc ID) Region() (Region, Confidence) {
|
||||
// TODO: implement
|
||||
return Region{0}, Exact
|
||||
}
|
||||
|
||||
// Variant returns the variant specified explicitly for this locale
|
||||
// or nil if no variant was specified.
|
||||
func (loc ID) Variant() Variant {
|
||||
return Variant{""}
|
||||
}
|
||||
|
||||
// Scope returns a Set that indicates the common variants for which the
|
||||
// locale may be applicable.
|
||||
// Locales will returns all valid sublocales. Languages will return the language
|
||||
// for this locale. Regions will return all regions for which a locale with
|
||||
// this language is defined. And Scripts will return all scripts that are
|
||||
// commonly used for this locale.
|
||||
// If any of these properties is explicitly specified, the respective lists
|
||||
// will be constraint. For example, for sr_Latn Scripts will return [Latn]
|
||||
// instead of [Cyrl Latn].
|
||||
func (loc ID) Scope() Set {
|
||||
// TODO: implement
|
||||
return nil
|
||||
}
|
||||
|
||||
// TypeForKey returns the type associated with the given key, where key
|
||||
// is one of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||
func (loc ID) TypeForKey(key string) string {
|
||||
// TODO: implement
|
||||
return ""
|
||||
}
|
||||
|
||||
// KeyValueString returns a string to be set with KeyValuePart.
|
||||
// Error handling is done by Compose.
|
||||
func KeyValueString(m map[string]string) (string, error) {
|
||||
// TODO: implement
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// SimplifyOptions removes options in loc that it would inherit
|
||||
// by default from its parent.
|
||||
func (loc ID) SimplifyOptions() ID {
|
||||
// TODO: implement
|
||||
return ID{}
|
||||
}
|
||||
|
||||
// Language is an ISO 639 language identifier.
|
||||
type Language struct {
|
||||
langID
|
||||
}
|
||||
|
||||
// Scope returns a Set of all pre-defined sublocales for this language.
|
||||
func (l Language) Scope() Set {
|
||||
// TODO: implement
|
||||
return nil
|
||||
}
|
||||
|
||||
// Script is a 4-letter ISO 15924 code for representing scripts.
|
||||
// It is idiomatically represented in title case.
|
||||
type Script struct {
|
||||
scriptID
|
||||
}
|
||||
|
||||
// Scope returns a Set of all pre-defined sublocales applicable to the script.
|
||||
func (s Script) Scope() Set {
|
||||
// TODO: implement
|
||||
return nil
|
||||
}
|
||||
|
||||
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
|
||||
type Region struct {
|
||||
regionID
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country.
|
||||
func (r Region) IsCountry() bool {
|
||||
// TODO: implement
|
||||
return true
|
||||
}
|
||||
|
||||
// Scope returns a Set of all pre-defined sublocales applicable to the region.
|
||||
func (r Region) Scope() Set {
|
||||
// TODO: implement
|
||||
return nil
|
||||
}
|
||||
|
||||
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||
type Variant struct {
|
||||
// TODO: implement
|
||||
variant string
|
||||
}
|
||||
|
||||
// String returns the string representation of the variant.
|
||||
func (v Variant) String() string {
|
||||
// TODO: implement
|
||||
return v.variant
|
||||
}
|
||||
|
||||
// Currency is an ISO 4217 currency designator.
|
||||
type Currency struct {
|
||||
currencyID
|
||||
}
|
||||
|
||||
// Set provides information about a set of locales.
|
||||
type Set interface {
|
||||
Locales() []ID
|
||||
Languages() []Language
|
||||
Regions() []Region
|
||||
Scripts() []Script
|
||||
Currencies() []Currency
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package locale
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIDSize(t *testing.T) {
|
||||
id := ID{}
|
||||
typ := reflect.TypeOf(id)
|
||||
if typ.Size() > 16 {
|
||||
t.Errorf("size of ID was %d; want 16", typ.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsRoot(t *testing.T) {
|
||||
for i, tt := range parseTests() {
|
||||
loc, _ := Parse(tt.in)
|
||||
undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
|
||||
if loc.IsRoot() != undef {
|
||||
t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
func TestParent(t *testing.T) {
|
||||
tests := []struct {
|
||||
in, out string
|
||||
}{
|
||||
{"und", "und"},
|
||||
{"de-1994", "de"},
|
||||
{"de-CH-1994", "de-CH"},
|
||||
{"de-Cyrl-CH-1994", "de-Cyrl-CH"},
|
||||
{"zh", "und"},
|
||||
{"zh-HK-u-cu-usd", "zh"},
|
||||
{"zh-Hans-HK-u-cu-usd", "zh-Hans"},
|
||||
{"zh-u-cu-usd", "und"},
|
||||
{"zh_Hans", "zh"},
|
||||
{"zh_Hant", "und"},
|
||||
{"vai", "und"},
|
||||
{"vai_Latn", "und"},
|
||||
{"nl_Cyrl", "nl"},
|
||||
{"nl", "und"},
|
||||
{"en_US", "en"},
|
||||
{"en_150", "en-GB"},
|
||||
{"en-SG", "en-GB"},
|
||||
{"en_GB", "en"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
test, _ := Parse(tt.in)
|
||||
gold, _ := Parse(tt.out)
|
||||
if p := test.Parent(); p.String() != gold.String() {
|
||||
t.Errorf("%d:parent(%q): found %s; want %s", i, tt.in, p.String(), tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWritten(t *testing.T) {
|
||||
tests := []struct {
|
||||
in, out string
|
||||
}{
|
||||
{"und", "und"},
|
||||
{"zh-Hans", "zh"},
|
||||
{"zh-Hant", "zh-Hant"},
|
||||
{"vai", "vai"},
|
||||
{"vai-Latn", "vai-Latn"},
|
||||
{"nl-Cyrl", "nl-Cyrl"},
|
||||
{"en-US", "en"},
|
||||
{"en-150", "en"},
|
||||
{"en-SG", "en"},
|
||||
{"en-GB", "en"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
test, _ := Parse(tt.in)
|
||||
gold, _ := Parse(tt.out)
|
||||
if test.Written() != gold {
|
||||
t.Errorf("%d:parent(%q): found %s; want %s", i, tt.in, test.String(), tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
|
@ -0,0 +1,348 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package locale
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// get gets the string of length n for id from the given 4-byte string index.
|
||||
func get(idx string, id, n int) string {
|
||||
return idx[id<<2:][:n]
|
||||
}
|
||||
|
||||
// cmp returns an integer comparing a and b lexicographically.
|
||||
func cmp(a string, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
for i, c := range b[:n] {
|
||||
switch {
|
||||
case a[i] > c:
|
||||
return 1
|
||||
case a[i] < c:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case len(a) < len(b):
|
||||
return -1
|
||||
case len(a) > len(b):
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// search searchs for the insertion point of key in smap, which is a
|
||||
// string with consecutive 4-byte entries. Only the first len(key)
|
||||
// bytes from the start of the 4-byte entries will be considered.
|
||||
func search(smap string, key []byte) int {
|
||||
n := len(key)
|
||||
return sort.Search(len(smap)>>2, func(i int) bool {
|
||||
return cmp(get(smap, i, n), key) != -1
|
||||
}) << 2
|
||||
}
|
||||
|
||||
func index(smap string, key []byte) int {
|
||||
i := search(smap, key)
|
||||
if cmp(smap[i:i+len(key)], key) != 0 {
|
||||
return -1
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func searchUint(imap []uint16, key uint16) int {
|
||||
return sort.Search(len(imap), func(i int) bool {
|
||||
return imap[i] >= key
|
||||
})
|
||||
}
|
||||
|
||||
// fixCase reformats s to the same pattern of cases as pat.
|
||||
// If returns false if string s is malformed.
|
||||
func fixCase(pat string, b []byte) bool {
|
||||
if len(pat) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i, c := range b {
|
||||
r := pat[i]
|
||||
if r <= 'Z' {
|
||||
if c >= 'a' {
|
||||
c -= 'z' - 'Z'
|
||||
}
|
||||
if c > 'Z' || c < 'A' {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
if c <= 'Z' {
|
||||
c += 'z' - 'Z'
|
||||
}
|
||||
if c > 'z' || c < 'a' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
b[i] = c
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type langID uint16
|
||||
|
||||
// getLangID returns the langID of s if s is a canonical ID
|
||||
// or langUnknown if s is not a canonical langID.
|
||||
func getLangID(s []byte) langID {
|
||||
if len(s) == 2 {
|
||||
return getLangISO2(s)
|
||||
}
|
||||
return getLangISO3(s)
|
||||
}
|
||||
|
||||
// mapLang returns the mapped langID of id according to mapping m.
|
||||
func normLang(m []struct{ from, to uint16 }, id langID) langID {
|
||||
k := sort.Search(len(m), func(i int) bool {
|
||||
return m[i].from >= uint16(id)
|
||||
})
|
||||
if m[k].from == uint16(id) {
|
||||
return langID(m[k].to)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// getLangISO2 returns the langID for the given 2-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO2(s []byte) langID {
|
||||
if len(s) == 2 && fixCase("zz", s) {
|
||||
if i := index(lang, s); i != -1 && lang[i+3] != 0 {
|
||||
return langID(i >> 2)
|
||||
}
|
||||
}
|
||||
return unknownLang
|
||||
}
|
||||
|
||||
const base = 'z' - 'a' + 1
|
||||
|
||||
func strToInt(s []byte) uint {
|
||||
v := uint(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
v *= base
|
||||
v += uint(s[i] - 'a')
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// converts the given integer to the original ASCII string passed to strToInt.
|
||||
// len(s) must match the number of characters obtained.
|
||||
func intToStr(v uint, s []byte) {
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
s[i] = byte(v%base) + 'a'
|
||||
v /= base
|
||||
}
|
||||
}
|
||||
|
||||
// getLangISO3 returns the langID for the given 3-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO3(s []byte) langID {
|
||||
if fixCase("und", s) {
|
||||
// first try to match canonical 3-letter entries
|
||||
for i := search(lang, s[:2]); cmp(lang[i:i+2], s[:2]) == 0; i += 4 {
|
||||
if lang[i+3] == 0 && lang[i+2] == s[2] {
|
||||
return langID(i >> 2)
|
||||
}
|
||||
}
|
||||
if i := index(altLangISO3, s); i != -1 {
|
||||
return langID(altLangISO3[i+3])
|
||||
}
|
||||
n := strToInt(s)
|
||||
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
|
||||
return langID(n) + langNoIndexOffset
|
||||
}
|
||||
// Check for non-canonical uses of ISO3.
|
||||
for i := search(lang, s[:1]); lang[i] == s[0]; i += 4 {
|
||||
if cmp(lang[i+2:][:2], s[1:3]) == 0 {
|
||||
return langID(i >> 2)
|
||||
}
|
||||
}
|
||||
}
|
||||
return unknownLang
|
||||
}
|
||||
|
||||
// stringToBuf writes the string to b and returns the number of bytes
|
||||
// written. cap(b) must be >= 3.
|
||||
func (id langID) stringToBuf(b []byte) int {
|
||||
if id >= langNoIndexOffset {
|
||||
intToStr(uint(id)-langNoIndexOffset, b[:3])
|
||||
return 3
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
if l[3] == 0 {
|
||||
return copy(b, l[:3])
|
||||
}
|
||||
return copy(b, l[:2])
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation of the langID.
|
||||
func (id langID) String() string {
|
||||
if id >= langNoIndexOffset {
|
||||
id -= langNoIndexOffset
|
||||
buf := [3]byte{}
|
||||
intToStr(uint(id), buf[:])
|
||||
return string(buf[:])
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
}
|
||||
return l[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (id langID) ISO3() string {
|
||||
if id >= langNoIndexOffset {
|
||||
return id.String()
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
} else if l[2] == 0 {
|
||||
return get(altLangISO3, int(l[3]), 3)
|
||||
}
|
||||
// This allocation will only happen for 3-letter ISO codes
|
||||
// that are non-canonical BCP 47 language identifiers.
|
||||
return l[0:1] + l[2:4]
|
||||
}
|
||||
|
||||
type regionID uint16
|
||||
|
||||
// getRegionID returns the region id for s if s is a valid 2-letter region code
|
||||
// or unknownRegion.
|
||||
func getRegionID(s []byte) regionID {
|
||||
if len(s) == 3 {
|
||||
if isAlpha(s[0]) {
|
||||
return getRegionISO3(s)
|
||||
}
|
||||
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
|
||||
return getRegionM49(int(i))
|
||||
}
|
||||
}
|
||||
return getRegionISO2(s)
|
||||
}
|
||||
|
||||
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO2(s []byte) regionID {
|
||||
if fixCase("ZZ", s) {
|
||||
if i := index(regionISO, s); i != -1 {
|
||||
return regionID(i>>2) + isoRegionOffset
|
||||
}
|
||||
}
|
||||
return unknownRegion
|
||||
}
|
||||
|
||||
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO3(s []byte) regionID {
|
||||
if fixCase("ZZZ", s) {
|
||||
for i := search(regionISO, s[:1]); regionISO[i] == s[0]; i += 4 {
|
||||
if cmp(regionISO[i+2:][:2], s[1:3]) == 0 {
|
||||
return regionID(i>>2) + isoRegionOffset
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(altRegionISO3); i += 3 {
|
||||
if cmp(altRegionISO3[i:i+3], s) == 0 {
|
||||
return regionID(altRegionIDs[i/3])
|
||||
}
|
||||
}
|
||||
}
|
||||
return unknownRegion
|
||||
}
|
||||
|
||||
func getRegionM49(n int) regionID {
|
||||
// These will mostly be group IDs, which are at the start of the list.
|
||||
// For other values this may be a bit slow, as there are over 300 entries.
|
||||
// TODO: group id is sorted!
|
||||
if n == 0 {
|
||||
return unknownRegion
|
||||
}
|
||||
for i, v := range m49 {
|
||||
if v == uint16(n) {
|
||||
return regionID(i)
|
||||
}
|
||||
}
|
||||
return unknownRegion
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation for the region.
|
||||
func (r regionID) String() string {
|
||||
if r < isoRegionOffset {
|
||||
return fmt.Sprintf("%03d", r.m49())
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
return get(regionISO, int(r), 2)
|
||||
}
|
||||
|
||||
// The use of this is uncommon.
|
||||
// Note: not all regionIDs have corresponding 3-letter ISO codes!
|
||||
func (r regionID) iso3() string {
|
||||
if r < isoRegionOffset {
|
||||
return ""
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
reg := regionISO[r<<2:]
|
||||
switch reg[2] {
|
||||
case 0:
|
||||
return altRegionISO3[reg[3]:][:3]
|
||||
case ' ':
|
||||
return ""
|
||||
}
|
||||
return reg[0:1] + reg[2:4]
|
||||
}
|
||||
|
||||
func (r regionID) m49() uint16 {
|
||||
return m49[r]
|
||||
}
|
||||
|
||||
type scriptID uint8
|
||||
|
||||
// getScriptID returns the script id for string s. It assumes that s
|
||||
// is of the format [A-Z][a-z]{3}.
|
||||
func getScriptID(idx string, s []byte) scriptID {
|
||||
if fixCase("Zzzz", s) {
|
||||
if i := index(idx, s); i != -1 {
|
||||
return scriptID(i >> 2)
|
||||
}
|
||||
}
|
||||
return unknownScript
|
||||
}
|
||||
|
||||
// String returns the script code in title case.
|
||||
func (s scriptID) String() string {
|
||||
return get(script, int(s), 4)
|
||||
}
|
||||
|
||||
type currencyID uint16
|
||||
|
||||
func getCurrencyID(idx string, s []byte) currencyID {
|
||||
if fixCase("XXX", s) {
|
||||
if i := index(idx, s); i != -1 {
|
||||
return currencyID(i >> 2)
|
||||
}
|
||||
}
|
||||
return unknownCurrency
|
||||
}
|
||||
|
||||
// String returns the upper case representation of the currency.
|
||||
func (c currencyID) String() string {
|
||||
return get(currency, int(c), 3)
|
||||
}
|
||||
|
||||
func round(index string, c currencyID) int {
|
||||
return int(index[c<<2+3] >> 2)
|
||||
}
|
||||
|
||||
func decimals(index string, c currencyID) int {
|
||||
return int(index[c<<2+3] & 0x03)
|
||||
}
|
|
@ -0,0 +1,254 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package locale
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var strdata = []string{
|
||||
"aa ",
|
||||
"aaa ",
|
||||
"aaaa",
|
||||
"aaab",
|
||||
"aab ",
|
||||
"ab ",
|
||||
"ba ",
|
||||
"xxxx",
|
||||
}
|
||||
|
||||
func strtests() map[string]int {
|
||||
return map[string]int{
|
||||
" ": 0,
|
||||
"a": 0,
|
||||
"aa": 0,
|
||||
"aaa": 4,
|
||||
"aa ": 0,
|
||||
"aaaa": 8,
|
||||
"aaab": 12,
|
||||
"aaax": 16,
|
||||
"b": 24,
|
||||
"ba": 24,
|
||||
"bbbb": 28,
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearch(t *testing.T) {
|
||||
for k, v := range strtests() {
|
||||
if i := search(strings.Join(strdata, ""), []byte(k)); i != v {
|
||||
t.Errorf("%s: found %d; want %d", k, i, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndex(t *testing.T) {
|
||||
strtests := strtests()
|
||||
strtests[" "] = -1
|
||||
strtests["aaax"] = -1
|
||||
strtests["bbbb"] = -1
|
||||
for k, v := range strtests {
|
||||
if i := index(strings.Join(strdata, ""), []byte(k)); i != v {
|
||||
t.Errorf("%s: found %d; want %d", k, i, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func b(s string) []byte {
|
||||
return []byte(s)
|
||||
}
|
||||
|
||||
func TestFixCase(t *testing.T) {
|
||||
tests := []string{
|
||||
"aaaa", "AbCD", "abcd",
|
||||
"Zzzz", "AbCD", "Abcd",
|
||||
"Zzzz", "AbC", "Zzzz",
|
||||
"XXX", "ab ", "XXX",
|
||||
"XXX", "usd", "USD",
|
||||
"cmn", "AB ", "cmn",
|
||||
"gsw", "CMN", "cmn",
|
||||
}
|
||||
for i := 0; i+3 < len(tests); i += 3 {
|
||||
tt := tests[i:]
|
||||
buf := [4]byte{}
|
||||
b := buf[:copy(buf[:], tt[1])]
|
||||
res := fixCase(tt[0], b)
|
||||
if res && cmp(tt[2], b) != 0 || !res && tt[0] != tt[2] {
|
||||
t.Errorf("%s+%s: found %q; want %q", tt[0], tt[1], res, tt[2])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLangID(t *testing.T) {
|
||||
tests := []struct{ id, bcp47, iso3, norm string }{
|
||||
{id: "", bcp47: "und", iso3: "und"},
|
||||
{id: " ", bcp47: "und", iso3: "und"},
|
||||
{id: " ", bcp47: "und", iso3: "und"},
|
||||
{id: " ", bcp47: "und", iso3: "und"},
|
||||
{id: "und", bcp47: "und", iso3: "und"},
|
||||
{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
|
||||
{id: "jrb", bcp47: "jrb", iso3: "jrb"},
|
||||
{id: "es", bcp47: "es", iso3: "spa"},
|
||||
{id: "spa", bcp47: "es", iso3: "spa"},
|
||||
{id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
|
||||
{id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
|
||||
{id: "ar", bcp47: "ar", iso3: "ara"},
|
||||
{id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
|
||||
{id: "ar", bcp47: "ar", iso3: "ara"},
|
||||
{id: "kur", bcp47: "ku", iso3: "kur"},
|
||||
{id: "nl", bcp47: "nl", iso3: "nld"},
|
||||
{id: "NL", bcp47: "nl", iso3: "nld"},
|
||||
{id: "gsw", bcp47: "gsw", iso3: "gsw"},
|
||||
{id: "gSW", bcp47: "gsw", iso3: "gsw"},
|
||||
{id: "und", bcp47: "und", iso3: "und"},
|
||||
{id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
|
||||
{id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
|
||||
{id: "no", bcp47: "no", iso3: "nor", norm: "nb"},
|
||||
{id: "nor", bcp47: "no", iso3: "nor", norm: "nb"},
|
||||
{id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
want := getLangID(b(tt.id))
|
||||
if id := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
|
||||
t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
|
||||
}
|
||||
if len(tt.iso3) == 3 {
|
||||
if id := getLangISO3(b(tt.iso3)); want != id {
|
||||
t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
|
||||
}
|
||||
if id := getLangID(b(tt.iso3)); want != id {
|
||||
t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
|
||||
}
|
||||
}
|
||||
norm := want
|
||||
if tt.norm != "" {
|
||||
norm = getLangID(b(tt.norm))
|
||||
}
|
||||
id := normLang(langOldMap[:], want)
|
||||
id = normLang(langMacroMap[:], id)
|
||||
if id != norm {
|
||||
t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
|
||||
}
|
||||
if id := want.String(); tt.bcp47 != id {
|
||||
t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
|
||||
}
|
||||
if id := want.ISO3(); tt.iso3[:3] != id {
|
||||
t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionID(t *testing.T) {
|
||||
tests := []struct {
|
||||
id, iso2, iso3 string
|
||||
m49 int
|
||||
}{
|
||||
{"AA", "AA", "AAA", 958},
|
||||
{"IC", "IC", "", 0},
|
||||
{"ZZ", "ZZ", "ZZZ", 999},
|
||||
{"EU", "EU", "QUU", 967},
|
||||
{"419", "", "", 419},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
want := getRegionID(b(tt.id))
|
||||
if id := getRegionISO2(b(tt.iso2)); len(tt.iso2) == 2 && want != id {
|
||||
t.Errorf("%d:getISO2(%s): found %d; want %d", i, tt.iso2, id, want)
|
||||
}
|
||||
if id := getRegionISO3(b(tt.iso3)); len(tt.iso3) == 3 && want != id {
|
||||
t.Errorf("%d:getISO3(%s): found %d; want %d", i, tt.iso3, id, want)
|
||||
}
|
||||
if id := getRegionID(b(tt.iso3)); len(tt.iso3) == 3 && want != id {
|
||||
t.Errorf("%d:getID3(%s): found %d; want %d", i, tt.iso3, id, want)
|
||||
}
|
||||
if id := getRegionM49(tt.m49); tt.m49 != 0 && want != id {
|
||||
t.Errorf("%d:getM49(%d): found %d; want %d", i, tt.m49, id, want)
|
||||
}
|
||||
if len(tt.iso2) == 2 {
|
||||
if id := want.String(); tt.iso2 != id {
|
||||
t.Errorf("%d:String(): found %s; want %s", i, id, tt.iso2)
|
||||
}
|
||||
} else {
|
||||
if id := want.String(); fmt.Sprintf("%03d", tt.m49) != id {
|
||||
t.Errorf("%d:String(): found %s; want %03d", i, id, tt.m49)
|
||||
}
|
||||
}
|
||||
if id := want.iso3(); tt.iso3 != id {
|
||||
t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3)
|
||||
}
|
||||
if id := int(want.m49()); tt.m49 != id {
|
||||
t.Errorf("%d:m49(): found %d; want %d", i, id, tt.m49)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScript(t *testing.T) {
|
||||
idx := "BbbbDdddEeeeZzzz\xff\xff\xff\xff"
|
||||
const und = unknownScript
|
||||
tests := []struct {
|
||||
in string
|
||||
out scriptID
|
||||
}{
|
||||
{" ", und},
|
||||
{" ", und},
|
||||
{" ", und},
|
||||
{"", und},
|
||||
{"Bbbb", 0},
|
||||
{"Dddd", 1},
|
||||
{"dddd", 1},
|
||||
{"dDDD", 1},
|
||||
{"Eeee", 2},
|
||||
{"Zzzz", 3},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if id := getScriptID(idx, b(tt.in)); id != tt.out {
|
||||
t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCurrency(t *testing.T) {
|
||||
curInfo := func(round, dec int) string {
|
||||
return string(round<<2 + dec)
|
||||
}
|
||||
idx := strings.Join([]string{
|
||||
"BBB" + curInfo(5, 2),
|
||||
"DDD\x00",
|
||||
"XXX\x00",
|
||||
"ZZZ\x00",
|
||||
"\xff\xff\xff\xff",
|
||||
}, "")
|
||||
const und = unknownCurrency
|
||||
tests := []struct {
|
||||
in string
|
||||
out currencyID
|
||||
round, dec int
|
||||
}{
|
||||
{" ", und, 0, 0},
|
||||
{" ", und, 0, 0},
|
||||
{" ", und, 0, 0},
|
||||
{"", und, 0, 0},
|
||||
{"BBB", 0, 5, 2},
|
||||
{"DDD", 1, 0, 0},
|
||||
{"dDd", 1, 0, 0},
|
||||
{"ddd", 1, 0, 0},
|
||||
{"XXX", 2, 0, 0},
|
||||
{"Zzz", 3, 0, 0},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
id := getCurrencyID(idx, b(tt.in))
|
||||
if id != tt.out {
|
||||
t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
|
||||
}
|
||||
if id <= 3 {
|
||||
if d := decimals(idx, id); d != tt.dec {
|
||||
t.Errorf("%d:dec(%s): found %d; want %d", i, tt.in, d, tt.dec)
|
||||
}
|
||||
if d := round(idx, id); d != tt.round {
|
||||
t.Errorf("%d:round(%s): found %d; want %d", i, tt.in, d, tt.round)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,931 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Locale identifier table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"code.google.com/p/go.text/cldr"
|
||||
"flag"
|
||||
"fmt"
|
||||
"hash"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
url = flag.String("cldr",
|
||||
"http://www.unicode.org/Public/cldr/"+cldr.Version+"/core.zip",
|
||||
"URL of CLDR archive.")
|
||||
iana = flag.String("iana",
|
||||
"http://www.iana.org/assignments/language-subtag-registry",
|
||||
"URL of IANA language subtag registry.")
|
||||
test = flag.Bool("test", false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
localFiles = flag.Bool("local", false,
|
||||
"data files have been copied to the current directory; for debugging only.")
|
||||
)
|
||||
|
||||
var comment = []string{
|
||||
`
|
||||
lang holds an alphabetically sorted list of BCP 47 language identifiers.
|
||||
All entries are 4 bytes. The index of the identifier (divided by 4) is the language ID.
|
||||
For 2-byte language identifiers, the two successive bytes have the following meaning:
|
||||
- if the first letter of the 2- and 3-letter ISO codes are the same:
|
||||
the second and third letter of the 3-letter ISO code.
|
||||
- otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
|
||||
For 3-byte language identifiers the 4th byte is 0.`,
|
||||
`
|
||||
langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
|
||||
in lookup tables. The language ids for these language codes are derived directly
|
||||
from the letters and are not consecutive.`,
|
||||
`
|
||||
altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
|
||||
to 2-letter language codes that cannot be derived using the method described above.
|
||||
Each 3-letter code is followed by its 1-byte langID.`,
|
||||
`
|
||||
tagAlias holds a mapping from legacy and grandfathered tags to their locale ID.`,
|
||||
`
|
||||
langOldMap maps deprecated langIDs to their suggested replacements.`,
|
||||
`
|
||||
langMacroMap maps languages to their macro language replacement, if applicable.`,
|
||||
`
|
||||
script is an alphabetically sorted list of ISO 15924 codes. The index
|
||||
of the script in the string, divided by 4, is the internal script ID.`,
|
||||
`
|
||||
isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
|
||||
for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
|
||||
the UN.M49 codes used for groups.)`,
|
||||
`
|
||||
regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
|
||||
Each 2-letter codes is followed by two bytes with the following meaning:
|
||||
- [A-Z}{2}: the first letter of the 2-letter code plus these two
|
||||
letters form the 3-letter ISO code.
|
||||
- 0, n: index into altRegionISO3.`,
|
||||
`
|
||||
m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
|
||||
codes indicating collections of regions.`,
|
||||
`
|
||||
altRegionISO3 holds a list of 3-letter region codes that cannot be
|
||||
mapped to 2-letter codes using the default algorithm. This is a short list.`,
|
||||
`
|
||||
altRegionIDs holsd a list of regionIDs the positions of which match those
|
||||
of the 3-letter ISO codes in altRegionISO3.`,
|
||||
`
|
||||
currency holds an alphabetically sorted list of canonical 3-letter currency identifiers.
|
||||
Each identifier is followed by a byte of which the 6 most significant bits
|
||||
indicated the rounding and the least 2 significant bits indicate the
|
||||
number of decimal positions.`,
|
||||
`
|
||||
suppressScript is an index from langID to the dominant script for that language,
|
||||
if it exists. If a script is given, it should be suppressed from the language tag.`,
|
||||
`
|
||||
nRegionGroups is the number of region groups. All regionIDs < nRegionGroups
|
||||
are groups.`,
|
||||
`
|
||||
regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
|
||||
where each set holds all groupings that are directly connected in a region
|
||||
containment graph.`,
|
||||
`
|
||||
regionInclusionBits is an array of bit vectors where every vector represents
|
||||
a set of region groupings. These sets are used to compute the distance
|
||||
between two regions for the purpos of locale matching.`,
|
||||
`
|
||||
regionInclusionNext marks, for each entry in regionInclusionBits, the set of
|
||||
all groups that are reachable from the groups set in the respective entry.`,
|
||||
}
|
||||
|
||||
// TODO: consider changing some of these strutures to tries. This can reduce
|
||||
// memory, but may increase the need for memory allocations. This could be
|
||||
// mitigated if we can piggyback on locale strings for common cases.
|
||||
|
||||
func failOnError(e error) {
|
||||
if e != nil {
|
||||
log.Panic(e)
|
||||
}
|
||||
}
|
||||
|
||||
type setType int
|
||||
|
||||
const (
|
||||
Indexed setType = 1 + iota // all elements must be of same size
|
||||
Linear
|
||||
)
|
||||
|
||||
type stringSet struct {
|
||||
s []string
|
||||
sorted, frozen bool
|
||||
|
||||
// We often need to update values after the creation of an index is completed.
|
||||
// We include a convenience map for keeping track of this.
|
||||
update map[string]string
|
||||
typ setType // used for checking.
|
||||
}
|
||||
|
||||
func (ss *stringSet) clone() stringSet {
|
||||
c := *ss
|
||||
c.s = append([]string(nil), c.s...)
|
||||
return c
|
||||
}
|
||||
|
||||
func (ss *stringSet) setType(t setType) {
|
||||
if ss.typ != t && ss.typ != 0 {
|
||||
log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ)
|
||||
}
|
||||
}
|
||||
|
||||
// parse parses a whitespace-separated string and initializes ss with its
|
||||
// components.
|
||||
func (ss *stringSet) parse(s string) {
|
||||
scan := bufio.NewScanner(strings.NewReader(s))
|
||||
scan.Split(bufio.ScanWords)
|
||||
for scan.Scan() {
|
||||
ss.add(scan.Text())
|
||||
}
|
||||
}
|
||||
|
||||
func (ss *stringSet) assertChangeable() {
|
||||
if ss.frozen {
|
||||
log.Panic("attempt to modify a frozen stringSet")
|
||||
}
|
||||
}
|
||||
|
||||
func (ss *stringSet) add(s string) {
|
||||
ss.assertChangeable()
|
||||
ss.s = append(ss.s, s)
|
||||
ss.sorted = ss.frozen
|
||||
}
|
||||
|
||||
func (ss *stringSet) freeze() {
|
||||
ss.compact()
|
||||
ss.frozen = true
|
||||
}
|
||||
|
||||
func (ss *stringSet) compact() {
|
||||
if ss.sorted {
|
||||
return
|
||||
}
|
||||
a := ss.s
|
||||
sort.Strings(a)
|
||||
k := 0
|
||||
for i := 1; i < len(a); i++ {
|
||||
if a[k] != a[i] {
|
||||
a[k+1] = a[i]
|
||||
k++
|
||||
}
|
||||
}
|
||||
ss.s = a[:k+1]
|
||||
ss.sorted = ss.frozen
|
||||
}
|
||||
|
||||
type funcSorter struct {
|
||||
fn func(a, b string) bool
|
||||
sort.StringSlice
|
||||
}
|
||||
|
||||
func (s funcSorter) Less(i, j int) bool {
|
||||
return s.fn(s.StringSlice[i], s.StringSlice[j])
|
||||
}
|
||||
|
||||
func (ss *stringSet) sortFunc(f func(a, b string) bool) {
|
||||
ss.compact()
|
||||
sort.Sort(funcSorter{f, sort.StringSlice(ss.s)})
|
||||
}
|
||||
|
||||
func (ss *stringSet) remove(s string) {
|
||||
ss.assertChangeable()
|
||||
if i, ok := ss.find(s); ok {
|
||||
copy(ss.s[i:], ss.s[i+1:])
|
||||
ss.s = ss.s[:len(ss.s)-1]
|
||||
}
|
||||
}
|
||||
|
||||
func (ss *stringSet) replace(ol, nu string) {
|
||||
ss.s[ss.index(ol)] = nu
|
||||
ss.sorted = ss.frozen
|
||||
}
|
||||
|
||||
func (ss *stringSet) index(s string) int {
|
||||
ss.setType(Indexed)
|
||||
i, ok := ss.find(s)
|
||||
if !ok {
|
||||
if i < len(ss.s) {
|
||||
log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i])
|
||||
}
|
||||
log.Panicf("find: item %q is not in list", s)
|
||||
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func (ss *stringSet) find(s string) (int, bool) {
|
||||
ss.compact()
|
||||
i := sort.SearchStrings(ss.s, s)
|
||||
return i, i != len(ss.s) && ss.s[i] == s
|
||||
}
|
||||
|
||||
func (ss *stringSet) slice() []string {
|
||||
ss.compact()
|
||||
return ss.s
|
||||
}
|
||||
|
||||
func (ss *stringSet) updateLater(v, key string) {
|
||||
if ss.update == nil {
|
||||
ss.update = map[string]string{}
|
||||
}
|
||||
ss.update[v] = key
|
||||
}
|
||||
|
||||
// join joins the string and ensures that all entries are of the same length.
|
||||
func (ss *stringSet) join() string {
|
||||
ss.setType(Indexed)
|
||||
n := len(ss.s[0])
|
||||
for _, s := range ss.s {
|
||||
if len(s) != n {
|
||||
log.Panic("join: not all entries are of the same length")
|
||||
}
|
||||
}
|
||||
ss.s = append(ss.s, strings.Repeat("\xff", n))
|
||||
return strings.Join(ss.s, "")
|
||||
}
|
||||
|
||||
// ianaEntry holds information for an entry in the IANA Language Subtag Repository.
|
||||
// All types use the same entry.
|
||||
// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various
|
||||
// fields.
|
||||
type ianaEntry struct {
|
||||
typ string
|
||||
tag string
|
||||
description []string
|
||||
scope string
|
||||
added string
|
||||
preferred string
|
||||
deprecated string
|
||||
suppressScript string
|
||||
macro string
|
||||
prefix []string
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w io.Writer // multi writer
|
||||
out io.Writer // set to Stdout
|
||||
hash32 hash.Hash32 // for checking whether tables have changed.
|
||||
size int
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
|
||||
// indices
|
||||
locale stringSet // common locales
|
||||
lang stringSet // canonical language ids (2 or 3 letter ISO codes) with data
|
||||
langNoIndex stringSet // 3-letter ISO codes with no associated data
|
||||
script stringSet // 4-letter ISO codes
|
||||
region stringSet // 2-letter ISO or 3-digit UN M49 codes
|
||||
currency stringSet // 3-letter ISO currency codes
|
||||
|
||||
// langInfo
|
||||
registry map[string]*ianaEntry
|
||||
}
|
||||
|
||||
func openReader(url *string) io.ReadCloser {
|
||||
if *localFiles {
|
||||
pwd, _ := os.Getwd()
|
||||
*url = "file://" + path.Join(pwd, path.Base(*url))
|
||||
}
|
||||
t := &http.Transport{}
|
||||
t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
|
||||
c := &http.Client{Transport: t}
|
||||
resp, err := c.Get(*url)
|
||||
failOnError(err)
|
||||
if resp.StatusCode != 200 {
|
||||
log.Fatalf(`bad GET status for "%s": %s`, *url, resp.Status)
|
||||
}
|
||||
return resp.Body
|
||||
}
|
||||
|
||||
func newBuilder() *builder {
|
||||
r := openReader(url)
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
d.SetDirFilter("supplemental")
|
||||
data, err := d.DecodeZip(r)
|
||||
failOnError(err)
|
||||
b := builder{
|
||||
out: os.Stdout,
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
hash32: fnv.New32(),
|
||||
}
|
||||
b.w = io.MultiWriter(b.out, b.hash32)
|
||||
b.parseRegistry()
|
||||
return &b
|
||||
}
|
||||
|
||||
func (b *builder) parseRegistry() {
|
||||
r := openReader(iana)
|
||||
defer r.Close()
|
||||
b.registry = make(map[string]*ianaEntry)
|
||||
|
||||
scan := bufio.NewScanner(r)
|
||||
scan.Split(bufio.ScanWords)
|
||||
var record *ianaEntry
|
||||
for more := scan.Scan(); more; {
|
||||
key := scan.Text()
|
||||
more = scan.Scan()
|
||||
value := scan.Text()
|
||||
switch key {
|
||||
case "Type:":
|
||||
record = &ianaEntry{typ: value}
|
||||
case "Subtag:", "Tag:":
|
||||
record.tag = value
|
||||
if info, ok := b.registry[value]; ok {
|
||||
if info.typ != "language" || record.typ != "extlang" {
|
||||
log.Fatalf("parseRegistry: tag %q already exists", value)
|
||||
}
|
||||
} else {
|
||||
b.registry[value] = record
|
||||
}
|
||||
case "Suppress-Script:":
|
||||
record.suppressScript = value
|
||||
case "Added:":
|
||||
record.added = value
|
||||
case "Deprecated:":
|
||||
record.deprecated = value
|
||||
case "Macrolanguage:":
|
||||
record.macro = value
|
||||
case "Preferred-Value:":
|
||||
record.preferred = value
|
||||
case "Prefix:":
|
||||
record.prefix = append(record.prefix, value)
|
||||
case "Scope:":
|
||||
record.scope = value
|
||||
case "Description:":
|
||||
buf := []byte(value)
|
||||
for more = scan.Scan(); more; more = scan.Scan() {
|
||||
b := scan.Bytes()
|
||||
if b[0] == '%' || b[len(b)-1] == ':' {
|
||||
break
|
||||
}
|
||||
buf = append(buf, ' ')
|
||||
buf = append(buf, b...)
|
||||
}
|
||||
record.description = append(record.description, string(buf))
|
||||
continue
|
||||
default:
|
||||
continue
|
||||
}
|
||||
more = scan.Scan()
|
||||
}
|
||||
if scan.Err() != nil {
|
||||
log.Panic(scan.Err())
|
||||
}
|
||||
}
|
||||
|
||||
var commentIndex = make(map[string]string)
|
||||
|
||||
func init() {
|
||||
for _, s := range comment {
|
||||
key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
|
||||
commentIndex[key] = strings.Replace(s, "\n", "\n// ", -1)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) comment(name string) {
|
||||
fmt.Fprintln(b.out, commentIndex[name])
|
||||
}
|
||||
|
||||
func (b *builder) pf(f string, x ...interface{}) {
|
||||
fmt.Fprintf(b.w, f, x...)
|
||||
fmt.Fprint(b.w, "\n")
|
||||
}
|
||||
|
||||
func (b *builder) p(x ...interface{}) {
|
||||
fmt.Fprintln(b.w, x...)
|
||||
}
|
||||
|
||||
func (b *builder) addSize(s int) {
|
||||
b.size += s
|
||||
b.pf("// Size: %d bytes", s)
|
||||
}
|
||||
|
||||
func (b *builder) addArraySize(s, n int) {
|
||||
b.size += s
|
||||
b.pf("// Size: %d bytes, %d elements", s, n)
|
||||
}
|
||||
|
||||
func (b *builder) writeConst(name string, x interface{}) {
|
||||
b.comment(name)
|
||||
b.pf("const %s = %v", name, x)
|
||||
}
|
||||
|
||||
func (b *builder) writeSlice(name string, ss interface{}) {
|
||||
b.comment(name)
|
||||
v := reflect.ValueOf(ss)
|
||||
t := v.Type().Elem()
|
||||
b.addArraySize(v.Len()*int(t.Size()), v.Len())
|
||||
fmt.Fprintf(b.w, `var %s = [%d]%s{`, name, v.Len(), t)
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
if t.Kind() == reflect.Struct {
|
||||
fmt.Fprintf(b.w, "\n\t%#v, ", v.Index(i).Interface())
|
||||
} else {
|
||||
if i%12 == 0 {
|
||||
fmt.Fprintf(b.w, "\n\t")
|
||||
}
|
||||
fmt.Fprintf(b.w, "%d, ", v.Index(i).Interface())
|
||||
}
|
||||
}
|
||||
b.p("\n}")
|
||||
}
|
||||
|
||||
// writeStringSlice writes a slice of strings. This produces a lot
|
||||
// of overhead. It should typically only be used for debugging.
|
||||
// TODO: remove
|
||||
func (b *builder) writeStringSlice(name string, ss []string) {
|
||||
b.comment(name)
|
||||
t := reflect.TypeOf(ss).Elem()
|
||||
sz := len(ss) * int(t.Size())
|
||||
for _, s := range ss {
|
||||
sz += len(s)
|
||||
}
|
||||
b.addArraySize(sz, len(ss))
|
||||
b.pf(`var %s = [%d]%s{`, name, len(ss), t)
|
||||
for i := 0; i < len(ss); i++ {
|
||||
b.pf("\t%q,", ss[i])
|
||||
}
|
||||
b.p("}")
|
||||
}
|
||||
|
||||
func (b *builder) writeString(name, s string) {
|
||||
b.comment(name)
|
||||
b.addSize(len(s) + int(reflect.TypeOf(s).Size()))
|
||||
if len(s) < 40 {
|
||||
b.pf(`var %s string = %q`, name, s)
|
||||
return
|
||||
}
|
||||
const cpl = 60
|
||||
b.pf(`var %s string = "" +`, name)
|
||||
for {
|
||||
n := cpl
|
||||
if n > len(s) {
|
||||
n = len(s)
|
||||
}
|
||||
var q string
|
||||
for {
|
||||
q = strconv.Quote(s[:n])
|
||||
if len(q) <= cpl+2 {
|
||||
break
|
||||
}
|
||||
n--
|
||||
}
|
||||
if n < len(s) {
|
||||
b.pf(` %s +`, q)
|
||||
s = s[n:]
|
||||
} else {
|
||||
b.pf(` %s`, q)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const base = 'z' - 'a' + 1
|
||||
|
||||
func strToInt(s string) uint {
|
||||
v := uint(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
v *= base
|
||||
v += uint(s[i] - 'a')
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func (b *builder) writeBitVector(name string, ss []string) {
|
||||
vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8)))
|
||||
for _, s := range ss {
|
||||
v := strToInt(s)
|
||||
vec[v/8] |= 1 << (v % 8)
|
||||
}
|
||||
b.writeSlice(name, vec)
|
||||
}
|
||||
|
||||
// TODO: convert this type into a list or two-stage trie.
|
||||
func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) {
|
||||
b.comment(name)
|
||||
v := reflect.ValueOf(m)
|
||||
sz := v.Len() * (2 + int(v.Type().Key().Size()))
|
||||
for _, k := range m {
|
||||
sz += len(k)
|
||||
}
|
||||
b.addSize(sz)
|
||||
keys := []string{}
|
||||
b.pf(`var %s = map[string]uint16{`, name)
|
||||
for k := range m {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
for _, k := range keys {
|
||||
b.pf("\t%q: %v,", k, f(m[k]))
|
||||
}
|
||||
b.p("}")
|
||||
}
|
||||
|
||||
func (b *builder) langIndex(s string) uint16 {
|
||||
if i, ok := b.lang.find(s); ok {
|
||||
return uint16(i)
|
||||
}
|
||||
return uint16(strToInt(s)) + uint16(len(b.lang.s))
|
||||
}
|
||||
|
||||
// inc advances the string to its lexicographical successor.
|
||||
func inc(s string) string {
|
||||
i := len(s) - 1
|
||||
for ; s[i]+1 > 'z'; i-- {
|
||||
}
|
||||
return fmt.Sprintf("%s%s%s", s[:i], string(s[i]+1), s[i+1:])
|
||||
}
|
||||
|
||||
func (b *builder) parseIndices() {
|
||||
meta := b.supp.Metadata
|
||||
|
||||
for k, v := range b.registry {
|
||||
var ss *stringSet
|
||||
switch v.typ {
|
||||
case "language":
|
||||
if len(k) == 2 || v.suppressScript != "" || v.scope == "special" {
|
||||
b.lang.add(k)
|
||||
continue
|
||||
} else {
|
||||
ss = &b.langNoIndex
|
||||
}
|
||||
case "region":
|
||||
ss = &b.region
|
||||
case "script":
|
||||
ss = &b.script
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if s := strings.SplitN(k, "..", 2); len(s) > 1 {
|
||||
for a := s[0]; a <= s[1]; a = inc(a) {
|
||||
ss.add(a)
|
||||
}
|
||||
} else {
|
||||
ss.add(k)
|
||||
}
|
||||
}
|
||||
|
||||
// currency codes
|
||||
for _, reg := range b.supp.CurrencyData.Region {
|
||||
for _, cur := range reg.Currency {
|
||||
b.currency.add(cur.Iso4217)
|
||||
}
|
||||
}
|
||||
|
||||
// common locales
|
||||
b.locale.parse(meta.DefaultContent.Locales)
|
||||
}
|
||||
|
||||
// writeLanguage generates all tables needed for language canonicalization.
|
||||
func (b *builder) writeLanguage() {
|
||||
meta := b.supp.Metadata
|
||||
|
||||
b.writeConst("unknownLang", b.lang.index("und"))
|
||||
|
||||
// Get language codes that need to be mapped (overlong 3-letter codes, deprecated
|
||||
// 2-letter codes and grandfathered tags.
|
||||
langOldMap := stringSet{}
|
||||
|
||||
// Mappings for macro languages
|
||||
langMacroMap := stringSet{}
|
||||
|
||||
// altLangISO3 get the alternative ISO3 names that need to be mapped.
|
||||
altLangISO3 := stringSet{}
|
||||
|
||||
// legacyTag maps from tag to language code.
|
||||
legacyTag := make(map[string]string)
|
||||
|
||||
lang := b.lang.clone()
|
||||
for _, a := range meta.Alias.LanguageAlias {
|
||||
if a.Replacement == "" {
|
||||
a.Replacement = "und"
|
||||
}
|
||||
// TODO: support mapping to tags
|
||||
repl := strings.SplitN(a.Replacement, "_", 2)[0]
|
||||
if a.Reason == "overlong" {
|
||||
if len(a.Replacement) == 2 && len(a.Type) == 3 {
|
||||
lang.updateLater(a.Replacement, a.Type)
|
||||
}
|
||||
} else if len(a.Type) <= 3 {
|
||||
if a.Reason != "deprecated" {
|
||||
langMacroMap.add(a.Type)
|
||||
langMacroMap.updateLater(a.Type, repl)
|
||||
}
|
||||
} else {
|
||||
legacyTag[strings.Replace(a.Type, "_", "-", -1)] = repl
|
||||
}
|
||||
}
|
||||
for k, v := range b.registry {
|
||||
// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
|
||||
if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
|
||||
langOldMap.add(k)
|
||||
langOldMap.updateLater(k, v.preferred)
|
||||
}
|
||||
}
|
||||
// Fix CLDR mappings.
|
||||
lang.updateLater("tl", "tgl")
|
||||
lang.updateLater("sh", "hbs")
|
||||
lang.updateLater("mo", "mol")
|
||||
lang.updateLater("no", "nor")
|
||||
lang.updateLater("tw", "twi")
|
||||
lang.updateLater("nb", "nob")
|
||||
lang.updateLater("ak", "aka")
|
||||
|
||||
// Ensure that each 2-letter code is matched with a 3-letter code.
|
||||
for _, v := range lang.s {
|
||||
s, ok := lang.update[v]
|
||||
if !ok {
|
||||
if s, ok = lang.update[langOldMap.update[v]]; !ok {
|
||||
continue
|
||||
}
|
||||
lang.update[v] = s
|
||||
}
|
||||
if v[0] != s[0] {
|
||||
altLangISO3.add(s)
|
||||
altLangISO3.updateLater(s, v)
|
||||
}
|
||||
}
|
||||
|
||||
// Complete canonialized language tags.
|
||||
lang.freeze()
|
||||
for i, v := range lang.s {
|
||||
// We can avoid these manual entries by using the IANI registry directly.
|
||||
// Seems easier to update the list manually, as changes are rare.
|
||||
// The panic in this loop will trigger if we miss an entry.
|
||||
add := ""
|
||||
if s, ok := lang.update[v]; ok {
|
||||
if s[0] == v[0] {
|
||||
add = s[1:]
|
||||
} else {
|
||||
add = string([]byte{0, byte(altLangISO3.index(s))})
|
||||
}
|
||||
} else if len(v) == 3 {
|
||||
add = "\x00"
|
||||
} else {
|
||||
log.Panicf("no data for long form of %q", v)
|
||||
}
|
||||
lang.s[i] += add
|
||||
}
|
||||
b.writeString("lang", lang.join())
|
||||
|
||||
b.writeConst("langNoIndexOffset", len(b.lang.s))
|
||||
|
||||
// space of all valid 3-letter language identifiers.
|
||||
b.writeBitVector("langNoIndex", b.langNoIndex.slice())
|
||||
|
||||
for i, s := range altLangISO3.slice() {
|
||||
idx := b.lang.index(altLangISO3.update[s])
|
||||
altLangISO3.s[i] += string([]byte{byte(idx)})
|
||||
}
|
||||
b.writeString("altLangISO3", altLangISO3.join())
|
||||
|
||||
makeMap := func(name string, ss *stringSet) {
|
||||
ss.sortFunc(func(i, j string) bool {
|
||||
return b.langIndex(i) < b.langIndex(j)
|
||||
})
|
||||
m := []struct{ from, to uint16 }{}
|
||||
for _, s := range ss.s {
|
||||
m = append(m, struct{ from, to uint16 }{
|
||||
b.langIndex(s),
|
||||
b.langIndex(ss.update[s]),
|
||||
})
|
||||
}
|
||||
b.writeSlice(name, m)
|
||||
}
|
||||
makeMap("langOldMap", &langOldMap)
|
||||
makeMap("langMacroMap", &langMacroMap)
|
||||
|
||||
b.writeMapFunc("tagAlias", legacyTag, func(s string) uint16 {
|
||||
return uint16(b.langIndex(s))
|
||||
})
|
||||
}
|
||||
|
||||
func (b *builder) writeScript() {
|
||||
unknown := uint8(b.script.index("Zzzz"))
|
||||
b.writeConst("unknownScript", unknown)
|
||||
b.writeString("script", b.script.join())
|
||||
|
||||
supp := make([]uint8, len(b.lang.slice()))
|
||||
for i, v := range b.lang.slice() {
|
||||
supp[i] = unknown
|
||||
if sc := b.registry[v].suppressScript; sc != "" {
|
||||
supp[i] = uint8(b.script.index(sc))
|
||||
}
|
||||
}
|
||||
b.writeSlice("suppressScript", supp)
|
||||
}
|
||||
|
||||
func parseM49(s string) uint16 {
|
||||
if len(s) == 0 {
|
||||
return 0
|
||||
}
|
||||
v, err := strconv.ParseUint(s, 10, 10)
|
||||
failOnError(err)
|
||||
return uint16(v)
|
||||
}
|
||||
|
||||
func (b *builder) writeRegion() {
|
||||
b.writeConst("unknownRegion", b.region.index("ZZ"))
|
||||
|
||||
isoOffset := b.region.index("AA")
|
||||
m49map := make([]uint16, len(b.region.slice()))
|
||||
altRegionISO3 := ""
|
||||
altRegionIDs := []uint16{}
|
||||
|
||||
b.writeConst("isoRegionOffset", isoOffset)
|
||||
|
||||
// 2-letter region lookup and mapping to numeric codes.
|
||||
regionISO := b.region.clone()
|
||||
regionISO.s = regionISO.s[isoOffset:]
|
||||
regionISO.sorted = false
|
||||
for _, tc := range b.supp.CodeMappings.TerritoryCodes {
|
||||
i := regionISO.index(tc.Type)
|
||||
if len(tc.Alpha3) == 3 {
|
||||
if tc.Alpha3[0] == tc.Type[0] {
|
||||
regionISO.s[i] += tc.Alpha3[1:]
|
||||
} else {
|
||||
regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))})
|
||||
altRegionISO3 += tc.Alpha3
|
||||
altRegionIDs = append(altRegionIDs, uint16(isoOffset+i))
|
||||
}
|
||||
}
|
||||
if d := m49map[isoOffset+i]; d != 0 {
|
||||
log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d)
|
||||
}
|
||||
m49map[isoOffset+i] = parseM49(tc.Numeric)
|
||||
}
|
||||
for i, s := range regionISO.s {
|
||||
if len(s) != 4 {
|
||||
regionISO.s[i] = s + " "
|
||||
}
|
||||
}
|
||||
b.writeString("regionISO", regionISO.join())
|
||||
b.writeString("altRegionISO3", altRegionISO3)
|
||||
b.writeSlice("altRegionIDs", altRegionIDs)
|
||||
|
||||
// 3-digit region lookup, groupings.
|
||||
for i := 0; i < isoOffset; i++ {
|
||||
m49map[i] = parseM49(b.region.s[i])
|
||||
}
|
||||
b.writeSlice("m49", m49map)
|
||||
}
|
||||
|
||||
func (b *builder) writeLocale() {
|
||||
b.writeStringSlice("locale", b.locale.slice())
|
||||
}
|
||||
|
||||
func (b *builder) writeLanguageInfo() {
|
||||
}
|
||||
|
||||
func (b *builder) writeCurrencies() {
|
||||
unknown := b.currency.index("XXX")
|
||||
digits := map[string]uint64{}
|
||||
rounding := map[string]uint64{}
|
||||
for _, info := range b.supp.CurrencyData.Fractions[0].Info {
|
||||
var err error
|
||||
digits[info.Iso4217], err = strconv.ParseUint(info.Digits, 10, 2)
|
||||
failOnError(err)
|
||||
rounding[info.Iso4217], err = strconv.ParseUint(info.Rounding, 10, 6)
|
||||
failOnError(err)
|
||||
}
|
||||
for i, cur := range b.currency.slice() {
|
||||
d := uint64(2) // default number of decimal positions
|
||||
if dd, ok := digits[cur]; ok {
|
||||
d = dd
|
||||
}
|
||||
var r uint64
|
||||
if r = rounding[cur]; r == 0 {
|
||||
r = 1 // default rounding increment in units 10^{-digits)
|
||||
}
|
||||
b.currency.s[i] += string([]byte{byte(r<<2 + d)})
|
||||
}
|
||||
b.writeString("currency", b.currency.join())
|
||||
// Hack alert: gofmt indents a trailing comment after an indented string.
|
||||
// Write this constant after currency to force a proper indentation of
|
||||
// the final comment.
|
||||
b.writeConst("unknownCurrency", unknown)
|
||||
}
|
||||
|
||||
func (b *builder) writeRegionInclusionData() {
|
||||
type index uint
|
||||
groups := make(map[int]index)
|
||||
// Create group indices.
|
||||
for i := 0; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID.
|
||||
groups[i] = index(i)
|
||||
}
|
||||
for _, g := range b.supp.TerritoryContainment.Group {
|
||||
group := b.region.index(g.Type)
|
||||
if _, ok := groups[group]; !ok {
|
||||
groups[group] = index(len(groups))
|
||||
}
|
||||
}
|
||||
if len(groups) > 32 {
|
||||
log.Fatalf("only 32 groups supported, found %d", len(groups))
|
||||
}
|
||||
b.writeConst("nRegionGroups", len(groups))
|
||||
mm := make(map[int][]index)
|
||||
for _, g := range b.supp.TerritoryContainment.Group {
|
||||
group := b.region.index(g.Type)
|
||||
for _, mem := range strings.Split(g.Contains, " ") {
|
||||
r := b.region.index(mem)
|
||||
mm[r] = append(mm[r], groups[group])
|
||||
if g, ok := groups[r]; ok {
|
||||
mm[group] = append(mm[group], g)
|
||||
}
|
||||
}
|
||||
}
|
||||
regionInclusion := make([]uint8, len(b.region.s))
|
||||
bvs := make(map[uint32]index)
|
||||
// Make the first bitvector positions correspond with the groups.
|
||||
for r, i := range groups {
|
||||
bv := uint32(1 << i)
|
||||
for _, g := range mm[r] {
|
||||
bv |= 1 << g
|
||||
}
|
||||
bvs[bv] = i
|
||||
regionInclusion[r] = uint8(bvs[bv])
|
||||
}
|
||||
for r := 0; r < len(b.region.s); r++ {
|
||||
if _, ok := groups[r]; !ok {
|
||||
bv := uint32(0)
|
||||
for _, g := range mm[r] {
|
||||
bv |= 1 << g
|
||||
}
|
||||
if bv == 0 {
|
||||
// Pick the world for unspecified regions.
|
||||
bv = 1 << groups[b.region.index("001")]
|
||||
}
|
||||
if _, ok := bvs[bv]; !ok {
|
||||
bvs[bv] = index(len(bvs))
|
||||
}
|
||||
regionInclusion[r] = uint8(bvs[bv])
|
||||
}
|
||||
}
|
||||
b.writeSlice("regionInclusion", regionInclusion)
|
||||
regionInclusionBits := make([]uint32, len(bvs))
|
||||
for k, v := range bvs {
|
||||
regionInclusionBits[v] = uint32(k)
|
||||
}
|
||||
// Add bit vectors for increasingly large distances until a fixed point is reached.
|
||||
regionInclusionNext := []uint8{}
|
||||
for i := 0; i < len(regionInclusionBits); i++ {
|
||||
bits := regionInclusionBits[i]
|
||||
next := bits
|
||||
for i := uint(0); i < uint(len(groups)); i++ {
|
||||
if bits&(1<<i) != 0 {
|
||||
next |= regionInclusionBits[i]
|
||||
}
|
||||
}
|
||||
if _, ok := bvs[next]; !ok {
|
||||
bvs[next] = index(len(bvs))
|
||||
regionInclusionBits = append(regionInclusionBits, next)
|
||||
}
|
||||
regionInclusionNext = append(regionInclusionNext, uint8(bvs[next]))
|
||||
}
|
||||
b.writeSlice("regionInclusionBits", regionInclusionBits)
|
||||
b.writeSlice("regionInclusionNext", regionInclusionNext)
|
||||
}
|
||||
|
||||
var header = `// Generated by running
|
||||
// maketables -url=%s -iana=%s
|
||||
// DO NOT EDIT
|
||||
|
||||
package locale
|
||||
`
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
b := newBuilder()
|
||||
fmt.Fprintf(b.out, header, *url, *iana)
|
||||
|
||||
b.parseIndices()
|
||||
b.writeLanguage()
|
||||
b.writeScript()
|
||||
b.writeRegion()
|
||||
// TODO: b.writeLocale()
|
||||
b.writeCurrencies()
|
||||
b.writeRegionInclusionData()
|
||||
|
||||
fmt.Fprintf(b.out, "\n// Size: %.1fK (%d bytes); Check: %X\n", float32(b.size)/1024, b.size, b.hash32.Sum32())
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package locale
|
||||
|
||||
// regionDistance computes the distance between two regions based
|
||||
// on the distance in the graph of region containments as defined in CLDR.
|
||||
// It iterates over increasingly inclusive sets of groups, represented as
|
||||
// bit vectors, until the source bit vector has bits in common with the
|
||||
// destination vector.
|
||||
func regionDistance(a, b regionID) int {
|
||||
if a == b {
|
||||
return 0
|
||||
}
|
||||
p, q := regionInclusion[a], regionInclusion[b]
|
||||
if p < nRegionGroups {
|
||||
p, q = q, p
|
||||
}
|
||||
set := regionInclusionBits
|
||||
if q < nRegionGroups && set[p]&(1<<q) != 0 {
|
||||
return 1
|
||||
}
|
||||
d := 2
|
||||
for goal := set[q]; set[p]&goal == 0; p = regionInclusionNext[p] {
|
||||
d++
|
||||
}
|
||||
return d
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package locale
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestRegionDistance(t *testing.T) {
|
||||
tests := []struct {
|
||||
a, b string
|
||||
d int
|
||||
}{
|
||||
{"NL", "NL", 0},
|
||||
{"NL", "EU", 1},
|
||||
{"EU", "NL", 1},
|
||||
{"005", "005", 0},
|
||||
{"NL", "BE", 2},
|
||||
{"CO", "005", 1},
|
||||
{"005", "CO", 1},
|
||||
{"CO", "419", 2},
|
||||
{"419", "CO", 2},
|
||||
{"005", "419", 1},
|
||||
{"419", "005", 1},
|
||||
{"001", "013", 2},
|
||||
{"013", "001", 2},
|
||||
{"CO", "CW", 4},
|
||||
{"CO", "PW", 6},
|
||||
{"CO", "BV", 6},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if d := regionDistance(getRegionID([]byte(tt.a)), getRegionID([]byte(tt.b))); d != tt.d {
|
||||
t.Errorf("%d: d(%s, %s) = %v; want %v", i, tt.a, tt.b, d, tt.d)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,557 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package locale
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// isAlpha returns true if the byte is not a digit.
|
||||
// b must be an ASCII letter or digit.
|
||||
func isAlpha(b byte) bool {
|
||||
return b > '9'
|
||||
}
|
||||
|
||||
// isAlphaNum returns true if the string contains ASCII letters or digits.
|
||||
func isAlphaNum(s []byte) bool {
|
||||
for _, c := range s {
|
||||
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
errEmpty = errors.New("locale: empty locale identifier")
|
||||
errInvalid = errors.New("locale: invalid")
|
||||
errTrailSep = errors.New("locale: trailing separator")
|
||||
)
|
||||
|
||||
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
|
||||
type scanner struct {
|
||||
b []byte
|
||||
bytes [64]byte // small buffer to cover most common cases
|
||||
token []byte
|
||||
start int // start position of the current token
|
||||
end int // end position of the current token
|
||||
next int // next point for scan
|
||||
err error
|
||||
done bool
|
||||
}
|
||||
|
||||
func makeScannerString(s string) scanner {
|
||||
scan := scanner{}
|
||||
if len(s) <= len(scan.bytes) {
|
||||
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
|
||||
} else {
|
||||
scan.b = []byte(s)
|
||||
}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
func (s *scanner) init() {
|
||||
for i, c := range s.b {
|
||||
if c == '_' {
|
||||
s.b[i] = '-'
|
||||
}
|
||||
}
|
||||
s.scan()
|
||||
}
|
||||
|
||||
// restToLower converts the string between start and end to lower case.
|
||||
func (s *scanner) toLower(start, end int) {
|
||||
for i := start; i < end; i++ {
|
||||
c := s.b[i]
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
s.b[i] += 'a' - 'A'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) setError(e error) {
|
||||
if s.err == nil {
|
||||
s.err = e
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) setErrorf(f string, x ...interface{}) {
|
||||
s.setError(fmt.Errorf(f, x...))
|
||||
}
|
||||
|
||||
// replace replaces the current token with repl.
|
||||
func (s *scanner) replace(repl string) {
|
||||
if end := s.start + len(repl); end != s.end {
|
||||
diff := end - s.end
|
||||
if end < cap(s.b) {
|
||||
b := make([]byte, len(s.b)+diff)
|
||||
copy(b, s.b[:s.start])
|
||||
copy(b[end:], s.b[s.end:])
|
||||
s.b = b
|
||||
} else {
|
||||
s.b = append(s.b[end:], s.b[s.end:]...)
|
||||
}
|
||||
s.next += diff
|
||||
s.end = end
|
||||
}
|
||||
copy(s.b[s.start:], repl)
|
||||
}
|
||||
|
||||
// gobble removes the current token from the input.
|
||||
// Caller must call scan after calling gobble.
|
||||
func (s *scanner) gobble() {
|
||||
if s.start == 0 {
|
||||
s.b = s.b[:+copy(s.b, s.b[s.next:])]
|
||||
s.end = 0
|
||||
} else {
|
||||
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
|
||||
s.end = s.start - 1
|
||||
}
|
||||
s.next = s.start
|
||||
}
|
||||
|
||||
// scan parses the next token of a BCP 47 string. Tokens that are larger
|
||||
// than 8 characters or include non-alphanumeric characters result in an error
|
||||
// and are gobbled and removed from the output.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) scan() (end int) {
|
||||
end = s.end
|
||||
s.token = nil
|
||||
for s.start = s.next; s.next < len(s.b); {
|
||||
i := bytes.IndexByte(s.b[s.next:], '-')
|
||||
if i == -1 {
|
||||
s.end = len(s.b)
|
||||
s.next = len(s.b)
|
||||
i = s.end - s.start
|
||||
} else {
|
||||
s.end = s.next + i
|
||||
s.next = s.end + 1
|
||||
}
|
||||
token := s.b[s.start:s.end]
|
||||
if i < 1 || i > 8 || !isAlphaNum(token) {
|
||||
s.setErrorf("locale: invalid token %q", token)
|
||||
s.gobble()
|
||||
continue
|
||||
}
|
||||
s.token = token
|
||||
return end
|
||||
}
|
||||
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
|
||||
s.setError(errTrailSep)
|
||||
s.b = s.b[:len(s.b)-1]
|
||||
}
|
||||
s.done = true
|
||||
return end
|
||||
}
|
||||
|
||||
// acceptMinSize parses multiple tokens of the given size or greater.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) acceptMinSize(min int) (end int) {
|
||||
end = s.end
|
||||
s.scan()
|
||||
for ; len(s.token) >= min; s.scan() {
|
||||
end = s.end
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid ID.
|
||||
// If parsing failed it returns an error and any part of the identifier
|
||||
// that could be parsed.
|
||||
// If parsing succeeded but an unknown option was found, it
|
||||
// returns the valid Locale and an error.
|
||||
// It accepts identifiers in the BCP 47 format and extensions to this standard
|
||||
// defined in
|
||||
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
func Parse(s string) (loc ID, err error) {
|
||||
// TODO: consider supporting old-style locale key-value pairs.
|
||||
if s == "" {
|
||||
return und, errEmpty
|
||||
}
|
||||
loc = und
|
||||
if lang, ok := tagAlias[s]; ok {
|
||||
loc.lang = langID(lang)
|
||||
return
|
||||
}
|
||||
scan := makeScannerString(s)
|
||||
if len(scan.token) >= 4 {
|
||||
if !strings.EqualFold(s, "root") {
|
||||
return und, errInvalid
|
||||
}
|
||||
return und, nil
|
||||
}
|
||||
return parse(&scan, s)
|
||||
}
|
||||
|
||||
func parse(scan *scanner, s string) (loc ID, err error) {
|
||||
loc = und
|
||||
var end int
|
||||
private := false
|
||||
if n := len(scan.token); n <= 1 {
|
||||
scan.toLower(0, len(scan.b))
|
||||
end = parsePrivate(scan)
|
||||
private = end > 0
|
||||
} else if n >= 4 {
|
||||
return und, errInvalid
|
||||
} else { // the usual case
|
||||
loc, end = parseTag(scan)
|
||||
if n := len(scan.token); n == 1 {
|
||||
loc.pExt = uint16(end)
|
||||
end = parseExtensions(scan)
|
||||
if end-int(loc.pExt) <= 1 {
|
||||
loc.pExt = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
if end < len(scan.b) {
|
||||
scan.setErrorf("locale: invalid parts %q", scan.b[end:])
|
||||
scan.b = scan.b[:end]
|
||||
}
|
||||
if len(scan.b) <= len(s) {
|
||||
s = s[:len(scan.b)]
|
||||
}
|
||||
if len(s) > 0 && cmp(s, scan.b) == 0 {
|
||||
loc.str = &s
|
||||
} else if loc.pVariant > 0 || loc.pExt > 0 || private {
|
||||
s = string(scan.b)
|
||||
loc.str = &s
|
||||
}
|
||||
return loc, scan.err
|
||||
}
|
||||
|
||||
// parseTag parses language, script, region and variants.
|
||||
// It returns an ID and the end position in the input that was parsed.
|
||||
func parseTag(scan *scanner) (ID, int) {
|
||||
loc := und
|
||||
// TODO: set an error if an unknown lang, script or region is encountered.
|
||||
loc.lang = getLangID(scan.token)
|
||||
scan.replace(loc.lang.String())
|
||||
langStart := scan.start
|
||||
end := scan.scan()
|
||||
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
|
||||
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
|
||||
// to a tag of the form <extlang>.
|
||||
if lang := getLangID(scan.token); lang != unknownLang {
|
||||
loc.lang = lang
|
||||
copy(scan.b[langStart:], lang.String())
|
||||
scan.b[langStart+3] = '-'
|
||||
scan.start = langStart + 4
|
||||
}
|
||||
scan.gobble()
|
||||
end = scan.scan()
|
||||
}
|
||||
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
|
||||
loc.script = getScriptID(script, scan.token)
|
||||
if loc.script == unknownScript {
|
||||
scan.gobble()
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
if n := len(scan.token); n >= 2 && n <= 3 {
|
||||
loc.region = getRegionID(scan.token)
|
||||
if loc.region == unknownRegion {
|
||||
scan.gobble()
|
||||
} else {
|
||||
scan.replace(loc.region.String())
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
scan.toLower(scan.start, len(scan.b))
|
||||
start := scan.start
|
||||
end = parseVariants(scan, end)
|
||||
if start < end {
|
||||
loc.pVariant = byte(start)
|
||||
loc.pExt = uint16(end)
|
||||
}
|
||||
return loc, end
|
||||
}
|
||||
|
||||
// parseVariants scans tokens as long as each token is a valid variant string.
|
||||
// Duplicate variants are removed.
|
||||
func parseVariants(scan *scanner, end int) int {
|
||||
start := scan.start
|
||||
for ; len(scan.token) >= 4; scan.scan() {
|
||||
// TODO: validate and sort variants
|
||||
if bytes.Index(scan.b[start:scan.start], scan.token) != -1 {
|
||||
scan.gobble()
|
||||
continue
|
||||
}
|
||||
end = scan.end
|
||||
const maxVariantSize = 60000 // more than enough, ensures pExt will be valid.
|
||||
if end > maxVariantSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
type bytesSort [][]byte
|
||||
|
||||
func (b bytesSort) Len() int {
|
||||
return len(b)
|
||||
}
|
||||
|
||||
func (b bytesSort) Swap(i, j int) {
|
||||
b[i], b[j] = b[j], b[i]
|
||||
}
|
||||
|
||||
func (b bytesSort) Less(i, j int) bool {
|
||||
return bytes.Compare(b[i], b[j]) == -1
|
||||
}
|
||||
|
||||
// parseExtensions parses and normalizes the extensions in the buffer.
|
||||
// It returns the last position of scan.b that is part of any extension.
|
||||
func parseExtensions(scan *scanner) int {
|
||||
start := scan.start
|
||||
exts := [][]byte{}
|
||||
private := []byte{}
|
||||
end := scan.end
|
||||
for len(scan.token) == 1 {
|
||||
start := scan.start
|
||||
extension := []byte{}
|
||||
ext := scan.token[0]
|
||||
switch ext {
|
||||
case 'u':
|
||||
attrEnd := scan.acceptMinSize(3)
|
||||
end = attrEnd
|
||||
var key []byte
|
||||
for last := []byte{}; len(scan.token) == 2; last = key {
|
||||
key = scan.token
|
||||
end = scan.acceptMinSize(3)
|
||||
// TODO: check key value validity
|
||||
if bytes.Compare(key, last) != 1 {
|
||||
p := attrEnd + 1
|
||||
scan.next = p
|
||||
keys := [][]byte{}
|
||||
for scan.scan(); len(scan.token) == 2; {
|
||||
keyStart := scan.start
|
||||
end = scan.acceptMinSize(3)
|
||||
keys = append(keys, scan.b[keyStart:end])
|
||||
}
|
||||
sort.Sort(bytesSort(keys))
|
||||
copy(scan.b[p:], bytes.Join(keys, []byte{'-'}))
|
||||
break
|
||||
}
|
||||
}
|
||||
case 't':
|
||||
scan.scan()
|
||||
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
|
||||
_, end = parseTag(scan)
|
||||
scan.toLower(start, end)
|
||||
}
|
||||
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
|
||||
end = scan.acceptMinSize(3)
|
||||
}
|
||||
case 'x':
|
||||
end = scan.acceptMinSize(1)
|
||||
default:
|
||||
end = scan.acceptMinSize(2)
|
||||
}
|
||||
extension = scan.b[start:end]
|
||||
if len(extension) < 3 {
|
||||
scan.setErrorf("locale: empty extension %q", string(ext))
|
||||
continue
|
||||
} else if len(exts) == 0 && (ext == 'x' || scan.next >= len(scan.b)) {
|
||||
return end
|
||||
} else if ext == 'x' {
|
||||
private = extension
|
||||
break
|
||||
}
|
||||
exts = append(exts, extension)
|
||||
}
|
||||
if scan.next < len(scan.b) {
|
||||
scan.setErrorf("locale: invalid trailing characters %q", scan.b[scan.end:])
|
||||
}
|
||||
sort.Sort(bytesSort(exts))
|
||||
if len(private) > 0 {
|
||||
exts = append(exts, private)
|
||||
}
|
||||
scan.b = append(scan.b[:start], bytes.Join(exts, []byte{'-'})...)
|
||||
return len(scan.b)
|
||||
}
|
||||
|
||||
func parsePrivate(scan *scanner) int {
|
||||
if len(scan.token) == 0 || scan.token[0] != 'x' {
|
||||
scan.setErrorf("locale: invalid locale %q", scan.b)
|
||||
return scan.start
|
||||
}
|
||||
return parseExtensions(scan)
|
||||
}
|
||||
|
||||
// A Part identifies a part of the locale identifier string.
|
||||
type Part byte
|
||||
|
||||
const (
|
||||
TagPart Part = iota // The identifier excluding extensions.
|
||||
LanguagePart
|
||||
ScriptPart
|
||||
RegionPart
|
||||
VariantPart
|
||||
)
|
||||
|
||||
var partNames = []string{"Tag", "Language", "Script", "Region", "Variant"}
|
||||
|
||||
func (p Part) String() string {
|
||||
if p > VariantPart {
|
||||
return string(p)
|
||||
}
|
||||
return partNames[p]
|
||||
}
|
||||
|
||||
// Extension returns the Part identifier for extension e, which must be 0-9 or a-z.
|
||||
func Extension(e byte) Part {
|
||||
return Part(e)
|
||||
}
|
||||
|
||||
var (
|
||||
errLang = errors.New("locale: invalid Language")
|
||||
errScript = errors.New("locale: invalid Script")
|
||||
errRegion = errors.New("locale: invalid Region")
|
||||
)
|
||||
|
||||
// Compose returns a Locale composed from the given parts or an error
|
||||
// if any of the strings for the parts are ill-formed.
|
||||
func Compose(m map[Part]string) (loc ID, err error) {
|
||||
loc = und
|
||||
var scan scanner
|
||||
scan.b = scan.bytes[:0]
|
||||
add := func(p Part) {
|
||||
if s, ok := m[p]; ok {
|
||||
if len(scan.b) > 0 {
|
||||
scan.b = append(scan.b, '-')
|
||||
}
|
||||
if p > VariantPart {
|
||||
scan.b = append(scan.b, byte(p), '-')
|
||||
}
|
||||
scan.b = append(scan.b, s...)
|
||||
}
|
||||
}
|
||||
for p := TagPart; p <= VariantPart; p++ {
|
||||
if p == TagPart && m[p] != "" {
|
||||
for i := LanguagePart; i <= VariantPart; i++ {
|
||||
if _, ok := m[i]; ok {
|
||||
return und, fmt.Errorf("locale: cannot specify both Tag and %s", partNames[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
add(p)
|
||||
}
|
||||
for p := Part('0'); p < Part('9'); p++ {
|
||||
add(p)
|
||||
}
|
||||
for p := Part('a'); p < Part('w'); p++ {
|
||||
add(p)
|
||||
}
|
||||
for p := Part('y'); p < Part('z'); p++ {
|
||||
add(p)
|
||||
}
|
||||
add(Part('x'))
|
||||
scan.init()
|
||||
return parse(&scan, "")
|
||||
}
|
||||
|
||||
// Part returns the part of the locale identifer indicated by t.
|
||||
// The one-letter section identifier, if applicable, is not included.
|
||||
// Components are separated by a '-'.
|
||||
func (loc ID) Part(p Part) string {
|
||||
s := ""
|
||||
switch p {
|
||||
case TagPart:
|
||||
s = loc.String()
|
||||
if loc.pExt > 0 {
|
||||
s = s[:loc.pExt]
|
||||
}
|
||||
case LanguagePart:
|
||||
s = loc.lang.String()
|
||||
case ScriptPart:
|
||||
if loc.script != unknownScript {
|
||||
s = loc.script.String()
|
||||
}
|
||||
case RegionPart:
|
||||
if loc.region != unknownRegion {
|
||||
s = loc.region.String()
|
||||
}
|
||||
case VariantPart:
|
||||
if loc.pVariant > 0 {
|
||||
s = (*loc.str)[loc.pVariant:loc.pExt]
|
||||
}
|
||||
default:
|
||||
if loc.pExt > 0 {
|
||||
str := *loc.str
|
||||
for i := int(loc.pExt); i < len(str); {
|
||||
end, name, ext := getExtension(str, i)
|
||||
if name == byte(p) {
|
||||
return ext
|
||||
}
|
||||
i = end
|
||||
}
|
||||
} else if p == 'x' && loc.str != nil && strings.HasPrefix(*loc.str, "x-") {
|
||||
return (*loc.str)[2:]
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Parts returns all parts of the locale identifier in a map.
|
||||
func (loc ID) Parts() map[Part]string {
|
||||
m := make(map[Part]string)
|
||||
m[LanguagePart] = loc.lang.String()
|
||||
if loc.script != unknownScript {
|
||||
m[ScriptPart] = loc.script.String()
|
||||
}
|
||||
if loc.region != unknownRegion {
|
||||
m[RegionPart] = loc.region.String()
|
||||
}
|
||||
if loc.str != nil {
|
||||
s := *loc.str
|
||||
if strings.HasPrefix(s, "x-") {
|
||||
m[Extension('x')] = s[2:]
|
||||
} else if loc.pExt > 0 {
|
||||
i := int(loc.pExt)
|
||||
if int(loc.pVariant) != i && loc.pVariant > 0 {
|
||||
m[VariantPart] = s[loc.pVariant:i]
|
||||
}
|
||||
for i < len(s) {
|
||||
end, name, ext := getExtension(s, i)
|
||||
m[Extension(name)] = ext
|
||||
i = end
|
||||
}
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// getExtension returns the name, body and end position of the extension.
|
||||
func getExtension(s string, p int) (end int, name byte, ext string) {
|
||||
p++
|
||||
if s[p] == 'x' {
|
||||
return len(s), s[p], s[p+2:]
|
||||
}
|
||||
end = nextExtension(s, p)
|
||||
return end, s[p], s[p+2 : end]
|
||||
}
|
||||
|
||||
// nextExtension finds the next extension within the string, searching
|
||||
// for the -<char>- pattern from position p.
|
||||
// In the fast majority of cases, locale identifiers will have at most
|
||||
// one extension and extensions tend to be small.
|
||||
func nextExtension(s string, p int) int {
|
||||
for n := len(s) - 3; p < n; {
|
||||
if s[p] == '-' {
|
||||
if s[p+2] == '-' {
|
||||
return p
|
||||
}
|
||||
p += 3
|
||||
} else {
|
||||
p++
|
||||
}
|
||||
}
|
||||
return len(s)
|
||||
}
|
|
@ -0,0 +1,354 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package locale
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type scanTest struct {
|
||||
ok bool // true if scanning does not result in an error
|
||||
in string
|
||||
tok []string // the expected tokens
|
||||
}
|
||||
|
||||
var tests = []scanTest{
|
||||
{true, "", []string{}},
|
||||
{true, "1", []string{"1"}},
|
||||
{true, "en", []string{"en"}},
|
||||
{true, "root", []string{"root"}},
|
||||
{true, "maxchars", []string{"maxchars"}},
|
||||
{false, "bad/", []string{}},
|
||||
{false, "morethan8", []string{}},
|
||||
{false, "-", []string{}},
|
||||
{false, "----", []string{}},
|
||||
{false, "_", []string{}},
|
||||
{true, "en-US", []string{"en", "US"}},
|
||||
{true, "en_US", []string{"en", "US"}},
|
||||
{false, "en-US-", []string{"en", "US"}},
|
||||
{false, "en-US--", []string{"en", "US"}},
|
||||
{false, "en-US---", []string{"en", "US"}},
|
||||
{false, "en--US", []string{"en", "US"}},
|
||||
{false, "-en-US", []string{"en", "US"}},
|
||||
{false, "-en--US-", []string{"en", "US"}},
|
||||
{false, "-en--US-", []string{"en", "US"}},
|
||||
{false, "en-.-US", []string{"en", "US"}},
|
||||
{false, ".-en--US-.", []string{"en", "US"}},
|
||||
{false, "en-u.-US", []string{"en", "US"}},
|
||||
{true, "en-u1-US", []string{"en", "u1", "US"}},
|
||||
{true, "maxchar1_maxchar2-maxchar3", []string{"maxchar1", "maxchar2", "maxchar3"}},
|
||||
{false, "moreThan8-moreThan8-e", []string{"e"}},
|
||||
}
|
||||
|
||||
func TestScan(t *testing.T) {
|
||||
for i, tt := range tests {
|
||||
scan := makeScannerString(tt.in)
|
||||
for j := 0; !scan.done; j++ {
|
||||
if j >= len(tt.tok) {
|
||||
t.Errorf("%d: extra token %q", i, scan.token)
|
||||
} else if cmp(tt.tok[j], scan.token) != 0 {
|
||||
t.Errorf("%d: token %d: found %q; want %q", i, j, scan.token, tt.tok[j])
|
||||
break
|
||||
}
|
||||
scan.scan()
|
||||
}
|
||||
if s := strings.Join(tt.tok, "-"); cmp(s, bytes.Replace(scan.b, b("_"), b("-"), -1)) != 0 {
|
||||
t.Errorf("%d: input: found %q; want %q", i, scan.b, s)
|
||||
}
|
||||
if (scan.err == nil) != tt.ok {
|
||||
t.Errorf("%d: ok: found %v; want %v", i, scan.err == nil, tt.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcceptMinSize(t *testing.T) {
|
||||
for i, tt := range tests {
|
||||
// count number of successive tokens with a minimum size.
|
||||
for sz := 1; sz <= 8; sz++ {
|
||||
scan := makeScannerString(tt.in)
|
||||
scan.end, scan.next = 0, 0
|
||||
end := scan.acceptMinSize(sz)
|
||||
n := 0
|
||||
for i := 0; i < len(tt.tok) && len(tt.tok[i]) >= sz; i++ {
|
||||
n += len(tt.tok[i])
|
||||
if i > 0 {
|
||||
n++
|
||||
}
|
||||
}
|
||||
if end != n {
|
||||
t.Errorf("%d:%d: found len %d; want %d", i, sz, end, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type parseTest struct {
|
||||
i int // the index of this test
|
||||
in string
|
||||
lang, script, region string
|
||||
variants, ext string
|
||||
extList []string // only used when more than one extension is present
|
||||
invalid bool
|
||||
rewrite bool // special rewrite not handled by parseTag
|
||||
changed bool // string needed to be reformatted
|
||||
}
|
||||
|
||||
func parseTests() []parseTest {
|
||||
var manyVars string
|
||||
for i := 0; i < 50; i++ {
|
||||
manyVars += fmt.Sprintf("-abc%02d", i)
|
||||
}
|
||||
tests := []parseTest{
|
||||
{in: "root", lang: "und", changed: true},
|
||||
{in: "und", lang: "und"},
|
||||
{in: "en", lang: "en"},
|
||||
{in: "xy", lang: "und", changed: true},
|
||||
{in: "gsw", lang: "gsw"},
|
||||
{in: "sr_Latn", lang: "sr", script: "Latn", changed: true},
|
||||
{in: "af-Arab", lang: "af", script: "Arab"},
|
||||
{in: "nl-BE", lang: "nl", region: "BE"},
|
||||
{in: "es-419", lang: "es", region: "419"},
|
||||
{in: "und-001", lang: "und", region: "001"},
|
||||
{in: "de-latn-be", lang: "de", script: "Latn", region: "BE", changed: true},
|
||||
{in: "de-1994", lang: "de", variants: "1994"},
|
||||
{in: "nl-abcde-abcde", lang: "nl", variants: "abcde"},
|
||||
{in: "nl" + manyVars, lang: "nl", variants: manyVars[1:]},
|
||||
{in: "nl" + manyVars + manyVars, lang: "nl", variants: manyVars[1:]},
|
||||
{in: "EN_CYRL", lang: "en", script: "Cyrl", changed: true},
|
||||
// private use and extensions
|
||||
{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
|
||||
{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
|
||||
{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
|
||||
{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
|
||||
{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
|
||||
{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
|
||||
{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
|
||||
{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
|
||||
{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
|
||||
{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
|
||||
{in: "en-nedix-u-co-phonebk", lang: "en", variants: "nedix", ext: "u-co-phonebk"},
|
||||
{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
|
||||
{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-def-abc-co-phonebk-cu-xua", changed: true},
|
||||
{in: "en-u-def-abc", lang: "en", ext: "u-def-abc"},
|
||||
{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
|
||||
{in: "en-t-en-Cyrl-NL-1994", lang: "en", ext: "t-en-cyrl-nl-1994", changed: true},
|
||||
{in: "en-t-en-Cyrl-NL-1994-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-1994-t0-abc-def", changed: true},
|
||||
{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
|
||||
// Not necessary to have changed here.
|
||||
{in: "en-t-nl-abcd", lang: "en", ext: "t-nl"},
|
||||
{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
|
||||
{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
|
||||
// invalid
|
||||
{in: "", lang: "und", invalid: true, changed: true},
|
||||
{in: "-", lang: "und", invalid: true, changed: true},
|
||||
{in: "x", lang: "und", invalid: true, changed: true},
|
||||
{in: "x-", lang: "und", invalid: true, changed: true},
|
||||
{in: "x--", lang: "und", invalid: true, changed: true},
|
||||
{in: "a-a-b-c-d", lang: "und", invalid: true, changed: true},
|
||||
{in: "en-", lang: "en", invalid: true},
|
||||
{in: "enne-", lang: "und", invalid: true, changed: true},
|
||||
{in: "en.", lang: "und", invalid: true, changed: true},
|
||||
{in: "en.-latn", lang: "und", invalid: true, changed: true},
|
||||
{in: "en.-en", lang: "en", invalid: true},
|
||||
{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
|
||||
{in: "a-tooManyChars-c-d", lang: "und", invalid: true, changed: true},
|
||||
// TODO: check key-value validity
|
||||
// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
|
||||
{in: "en-t-abcd", lang: "en", invalid: true},
|
||||
{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
|
||||
// rewrites
|
||||
{in: "zh-min", lang: "und", rewrite: true, changed: true},
|
||||
{in: "zh-min-nan", lang: "nan", changed: true},
|
||||
{in: "zh-yue", lang: "yue", changed: true},
|
||||
{in: "zh-xiang", lang: "hsn", rewrite: true, changed: true},
|
||||
{in: "zh-guoyu", lang: "zh", rewrite: true, changed: true},
|
||||
{in: "iw", lang: "iw", changed: false},
|
||||
{in: "sgn-BE-FR", lang: "sfb", rewrite: true, changed: true},
|
||||
{in: "i-klingon", lang: "tlh", rewrite: true, changed: true},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
tests[i].i = i
|
||||
if tt.extList != nil {
|
||||
tests[i].ext = strings.Join(tt.extList, "-")
|
||||
}
|
||||
if tt.ext != "" && tt.extList == nil {
|
||||
tests[i].extList = []string{tt.ext}
|
||||
}
|
||||
}
|
||||
return tests
|
||||
}
|
||||
|
||||
func TestParseExtensions(t *testing.T) {
|
||||
for i, tt := range parseTests() {
|
||||
if tt.ext == "" || tt.rewrite {
|
||||
continue
|
||||
}
|
||||
scan := makeScannerString(tt.in)
|
||||
if len(scan.b) > 1 && scan.b[1] != '-' {
|
||||
scan.end = nextExtension(string(scan.b), 0)
|
||||
scan.next = scan.end + 1
|
||||
scan.scan()
|
||||
}
|
||||
start := scan.start
|
||||
scan.toLower(start, len(scan.b))
|
||||
parseExtensions(&scan)
|
||||
ext := string(scan.b[start:])
|
||||
if ext != tt.ext {
|
||||
t.Errorf("%d: ext was %v; want %v", i, ext, tt.ext)
|
||||
}
|
||||
if changed := !strings.HasPrefix(tt.in[start:], ext); changed != tt.changed {
|
||||
t.Errorf("%d: changed was %v; want %v", i, changed, tt.changed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// partChecks runs checks for each part by calling the function returned by f.
|
||||
func partChecks(t *testing.T, f func(*parseTest) func(Part) string) {
|
||||
for i, tt := range parseTests() {
|
||||
get := f(&tt)
|
||||
if get == nil {
|
||||
continue
|
||||
}
|
||||
if s, g := get(LanguagePart), getLangID(b(tt.lang)).String(); s != g {
|
||||
t.Errorf("%d: lang was %q; want %q", i, s, g)
|
||||
}
|
||||
if s, g := get(ScriptPart), tt.script; s != g {
|
||||
t.Errorf("%d: script was %q; want %q", i, s, g)
|
||||
}
|
||||
if s, g := get(RegionPart), tt.region; s != g {
|
||||
t.Errorf("%d: region was %q; want %q", i, s, g)
|
||||
}
|
||||
if s, g := get(VariantPart), tt.variants; s != g {
|
||||
t.Errorf("%d: variants was %q; want %q", i, s, g)
|
||||
}
|
||||
for _, g := range tt.extList {
|
||||
if s := get(Extension(g[0])); s != g[2:] {
|
||||
t.Errorf("%d: extension '%c' was %q; want %q", i, g[0], s, g[2:])
|
||||
}
|
||||
}
|
||||
if s := get(Extension('q')); s != "" {
|
||||
t.Errorf(`%d: unused extension 'q' was %q; want ""`, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTag(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) func(Part) string {
|
||||
if strings.HasPrefix(tt.in, "x-") || tt.rewrite {
|
||||
return nil
|
||||
}
|
||||
scan := makeScannerString(tt.in)
|
||||
id, end := parseTag(&scan)
|
||||
s := string(scan.b[:end])
|
||||
if changed := !strings.HasPrefix(tt.in, s); changed != tt.changed && tt.ext == "" {
|
||||
t.Errorf("%d: changed was %v; want %v", tt.i, changed, tt.changed)
|
||||
}
|
||||
id.str = &s
|
||||
tt.ext = ""
|
||||
tt.extList = []string{}
|
||||
return func(p Part) string {
|
||||
return id.Part(p)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) func(Part) string {
|
||||
id, err := Parse(tt.in)
|
||||
ext := ""
|
||||
if id.str != nil {
|
||||
if strings.HasPrefix(*id.str, "x-") {
|
||||
ext = *id.str
|
||||
} else if int(id.pExt) < len(*id.str) && id.pExt > 0 {
|
||||
ext = (*id.str)[id.pExt+1:]
|
||||
}
|
||||
}
|
||||
if ext != tt.ext {
|
||||
t.Errorf("%d: ext was %q; want %q", tt.i, ext, tt.ext)
|
||||
}
|
||||
changed := id.str == nil || !strings.HasPrefix(tt.in, *id.str)
|
||||
if changed != tt.changed {
|
||||
t.Errorf("%d: changed was %v; want %v", tt.i, changed, tt.changed)
|
||||
}
|
||||
if (err != nil) != tt.invalid {
|
||||
t.Errorf("%d: invalid was %v; want %v. Error: %v", tt.i, err != nil, tt.invalid, err)
|
||||
}
|
||||
return func(p Part) string {
|
||||
return id.Part(p)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestPart(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) func(Part) string {
|
||||
id, _ := Parse(tt.in)
|
||||
return func(p Part) string {
|
||||
return id.Part(p)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestParts(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) func(Part) string {
|
||||
id, _ := Parse(tt.in)
|
||||
m := id.Parts()
|
||||
return func(p Part) string {
|
||||
return m[p]
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestCompose1(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) func(Part) string {
|
||||
m := make(map[Part]string)
|
||||
set := func(p Part, s string) {
|
||||
if s != "" {
|
||||
m[p] = strings.ToUpper(s)
|
||||
}
|
||||
}
|
||||
set(LanguagePart, tt.lang)
|
||||
set(ScriptPart, tt.script)
|
||||
set(RegionPart, tt.region)
|
||||
if tt.variants != "" {
|
||||
m[VariantPart] = tt.variants + "-tooManyChars-inv@lid-" + tt.variants
|
||||
}
|
||||
for _, ext := range tt.extList {
|
||||
set(Extension(ext[0]), ext[2:])
|
||||
}
|
||||
id, err := Compose(m)
|
||||
if tt.variants != "" && err == nil {
|
||||
t.Errorf("%d: no error for invalid variant", tt.i)
|
||||
}
|
||||
return func(p Part) string {
|
||||
return id.Part(p)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestCompose2(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) func(Part) string {
|
||||
m := make(map[Part]string)
|
||||
tag := tt.lang
|
||||
for _, s := range []string{tt.script, tt.region, tt.variants} {
|
||||
if s != "" {
|
||||
tag += "-" + s
|
||||
}
|
||||
}
|
||||
m[TagPart] = tag
|
||||
for _, ext := range tt.extList {
|
||||
m[Extension(ext[0])] = ext[2:] + "-tooManyChars"
|
||||
}
|
||||
id, err := Compose(m)
|
||||
if len(tt.extList) > 0 && err == nil {
|
||||
t.Errorf("%d: no error for invalid variant", tt.i)
|
||||
}
|
||||
return func(p Part) string {
|
||||
return id.Part(p)
|
||||
}
|
||||
})
|
||||
}
|
|
@ -0,0 +1,612 @@
|
|||
// Generated by running
|
||||
// maketables -url=http://www.unicode.org/Public/cldr/23/core.zip -iana=http://www.iana.org/assignments/language-subtag-registry
|
||||
// DO NOT EDIT
|
||||
|
||||
package locale
|
||||
|
||||
const unknownLang = 196
|
||||
|
||||
// lang holds an alphabetically sorted list of BCP 47 language identifiers.
|
||||
// All entries are 4 bytes. The index of the identifier (divided by 4) is the language ID.
|
||||
// For 2-byte language identifiers, the two successive bytes have the following meaning:
|
||||
// - if the first letter of the 2- and 3-letter ISO codes are the same:
|
||||
// the second and third letter of the 3-letter ISO code.
|
||||
// - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
|
||||
// For 3-byte language identifiers the 4th byte is 0.
|
||||
// Size: 868 bytes
|
||||
var lang string = "" +
|
||||
"aaarabbkaeveaffrakkaammhanrgarraassmavvaayymazzebaakbeelbgul" +
|
||||
"bhihbiisbmambnenboodbrrebsoscaatcehechhacooscrrecsescuhucvhv" +
|
||||
"cyymdaandeeudsb\x00dvivdzzoeeweelllenngeopoes\x00\x04etsteuu" +
|
||||
"sfaasffulfiinfjijfoaofrrafrr\x00frs\x00fyrygalegdlagllggnrng" +
|
||||
"sw\x00guujgvlvhaauheebhiinhomohrrvhsb\x00htathuunhyyehzerian" +
|
||||
"aidndieleigboiiiiikpkinndiodoisslittaiukuiw\x00\x02japnji" +
|
||||
"\x00\x05jvavjwavkaatkgonkiikkjuakkazklalkmhmknankoorkok\x00k" +
|
||||
"rauksaskuurkvomkw\x00\x00kyirlaatlbtzlgugliimlninloaoltitluu" +
|
||||
"blvavmai\x00men\x00mglgmhahmirimis\x00mkkdmlalmnonmoolmrarms" +
|
||||
"samtltmul\x00myyanaaunbobnddends\x00neepngdoniu\x00nlldnnnon" +
|
||||
"oornqo\x00nrblnso\x00nvavnyyaocciojjiomrmorriossspaanpiliplo" +
|
||||
"lpsusptorquuermohrnunroonruusrw\x00\x03saanscrdsdndsemesgags" +
|
||||
"h\x00\x01siinsklksllvsmmosnnasoomsqqisrrpssswstotsuunsvwesww" +
|
||||
"ataamteeltem\x00tggkthhatiirtkuktkl\x00tlgltmh\x00tnsntoontp" +
|
||||
"i\x00trurtssottattvl\x00twwityahugigukkrund\x00urrduzzbveenv" +
|
||||
"iievoolwalnwoolxhhoyiidyoorzahazbl\x00zhhozuulzxx\x00\xff" +
|
||||
"\xff\xff\xff"
|
||||
|
||||
const langNoIndexOffset = 212
|
||||
|
||||
// langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
|
||||
// in lookup tables. The language ids for these language codes are derived directly
|
||||
// from the letters and are not consecutive.
|
||||
// Size: 2197 bytes, 2197 elements
|
||||
var langNoIndex = [2197]uint8{
|
||||
255, 253, 253, 254, 239, 255, 191, 219, 251, 255, 254, 250,
|
||||
247, 31, 60, 87, 111, 151, 115, 248, 255, 255, 255, 112,
|
||||
191, 3, 255, 255, 207, 5, 133, 98, 233, 255, 253, 127,
|
||||
255, 255, 255, 119, 255, 255, 255, 255, 255, 255, 255, 227,
|
||||
233, 255, 255, 255, 77, 184, 2, 122, 190, 255, 255, 255,
|
||||
254, 255, 247, 255, 255, 255, 255, 223, 43, 244, 241, 240,
|
||||
93, 231, 159, 20, 5, 32, 223, 237, 159, 63, 201, 33,
|
||||
248, 191, 238, 255, 255, 255, 255, 255, 255, 127, 255, 255,
|
||||
255, 255, 127, 253, 255, 255, 255, 247, 127, 255, 255, 255,
|
||||
255, 255, 255, 231, 191, 255, 255, 223, 255, 239, 255, 255,
|
||||
255, 255, 191, 255, 255, 255, 255, 223, 255, 255, 243, 255,
|
||||
251, 47, 255, 255, 255, 254, 255, 255, 251, 255, 255, 247,
|
||||
255, 255, 253, 255, 255, 255, 127, 223, 255, 255, 223, 254,
|
||||
255, 255, 223, 255, 255, 223, 251, 255, 255, 254, 255, 255,
|
||||
255, 255, 255, 247, 127, 191, 249, 213, 173, 127, 64, 255,
|
||||
156, 193, 67, 44, 8, 36, 65, 0, 80, 68, 0, 128,
|
||||
187, 255, 242, 159, 180, 66, 69, 214, 155, 52, 136, 244,
|
||||
123, 231, 23, 86, 85, 125, 14, 28, 55, 113, 243, 239,
|
||||
159, 255, 93, 40, 101, 8, 0, 16, 188, 255, 191, 255,
|
||||
223, 247, 119, 55, 62, 135, 199, 223, 255, 0, 129, 0,
|
||||
176, 5, 128, 0, 0, 0, 0, 3, 64, 0, 0, 146,
|
||||
33, 208, 255, 125, 255, 222, 254, 94, 4, 0, 2, 100,
|
||||
141, 25, 193, 223, 123, 34, 0, 0, 0, 223, 109, 222,
|
||||
38, 229, 217, 241, 254, 255, 253, 207, 159, 20, 1, 12,
|
||||
134, 0, 193, 0, 240, 197, 103, 91, 86, 137, 94, 183,
|
||||
237, 239, 3, 0, 2, 0, 0, 0, 192, 119, 218, 87,
|
||||
144, 105, 1, 44, 86, 123, 244, 255, 127, 127, 0, 0,
|
||||
0, 1, 8, 70, 0, 0, 0, 176, 20, 7, 81, 18,
|
||||
10, 0, 0, 0, 0, 0, 17, 73, 0, 0, 96, 16,
|
||||
0, 0, 0, 16, 0, 0, 68, 4, 0, 16, 128, 4,
|
||||
24, 0, 0, 4, 0, 128, 40, 4, 0, 0, 16, 213,
|
||||
45, 16, 100, 53, 36, 83, 245, 212, 189, 194, 205, 1,
|
||||
0, 128, 0, 64, 0, 0, 0, 0, 0, 4, 23, 57,
|
||||
1, 217, 87, 137, 33, 152, 167, 0, 0, 1, 64, 130,
|
||||
0, 0, 0, 4, 0, 0, 0, 2, 1, 64, 0, 64,
|
||||
0, 0, 176, 254, 171, 57, 0, 2, 0, 0, 0, 4,
|
||||
0, 0, 0, 0, 0, 32, 0, 64, 4, 0, 0, 0,
|
||||
2, 0, 0, 0, 16, 129, 168, 5, 0, 0, 0, 0,
|
||||
4, 32, 4, 166, 8, 4, 0, 8, 1, 80, 0, 0,
|
||||
8, 49, 134, 64, 0, 0, 0, 0, 64, 0, 3, 117,
|
||||
2, 16, 8, 4, 0, 0, 0, 224, 59, 179, 19, 0,
|
||||
128, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 255, 255, 255, 255, 255, 223, 206, 131, 162,
|
||||
192, 255, 223, 37, 207, 31, 197, 3, 16, 32, 178, 197,
|
||||
166, 69, 37, 155, 3, 79, 248, 223, 3, 148, 64, 16,
|
||||
1, 14, 0, 227, 145, 84, 155, 56, 241, 125, 247, 109,
|
||||
249, 255, 255, 125, 4, 8, 0, 1, 33, 18, 60, 95,
|
||||
253, 15, 133, 79, 64, 64, 0, 0, 255, 253, 255, 214,
|
||||
232, 27, 244, 55, 163, 13, 0, 0, 32, 123, 57, 2,
|
||||
5, 132, 0, 240, 255, 127, 254, 0, 24, 4, 129, 0,
|
||||
0, 0, 128, 16, 148, 28, 1, 0, 0, 0, 0, 0,
|
||||
16, 64, 0, 4, 8, 180, 254, 165, 12, 64, 0, 0,
|
||||
17, 4, 4, 108, 0, 96, 240, 255, 251, 127, 230, 24,
|
||||
5, 159, 223, 110, 3, 0, 17, 0, 0, 0, 64, 4,
|
||||
149, 166, 128, 40, 4, 0, 4, 81, 226, 255, 253, 63,
|
||||
5, 9, 8, 5, 64, 0, 0, 0, 0, 16, 0, 0,
|
||||
8, 0, 0, 0, 0, 161, 2, 108, 229, 72, 20, 136,
|
||||
32, 192, 71, 128, 7, 0, 0, 0, 204, 80, 64, 36,
|
||||
133, 71, 132, 64, 32, 16, 0, 0, 2, 80, 136, 17,
|
||||
0, 209, 140, 238, 80, 19, 29, 17, 105, 6, 89, 235,
|
||||
51, 8, 0, 32, 5, 64, 16, 0, 0, 0, 16, 68,
|
||||
150, 73, 214, 93, 167, 129, 69, 151, 251, 0, 16, 0,
|
||||
8, 0, 128, 0, 64, 69, 0, 1, 2, 0, 1, 64,
|
||||
128, 0, 6, 8, 240, 235, 247, 57, 132, 153, 22, 0,
|
||||
0, 12, 4, 1, 32, 32, 221, 162, 1, 0, 0, 0,
|
||||
18, 68, 0, 0, 4, 16, 240, 157, 149, 19, 0, 128,
|
||||
0, 0, 208, 18, 64, 0, 16, 240, 144, 98, 76, 210,
|
||||
2, 1, 10, 0, 70, 4, 0, 8, 2, 0, 32, 192,
|
||||
0, 128, 6, 0, 8, 0, 0, 0, 0, 240, 216, 239,
|
||||
21, 2, 8, 0, 0, 1, 0, 0, 0, 0, 16, 1,
|
||||
0, 16, 0, 0, 0, 255, 215, 227, 253, 255, 255, 255,
|
||||
255, 255, 127, 255, 255, 254, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 223, 255, 251, 255, 255, 219, 253, 255, 255,
|
||||
127, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
253, 255, 223, 191, 220, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 254, 251, 255, 255, 255, 255, 255, 255, 255, 254, 255,
|
||||
253, 255, 255, 255, 255, 255, 255, 255, 239, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 123, 253, 221, 223, 255,
|
||||
188, 152, 5, 40, 255, 7, 240, 255, 255, 127, 0, 8,
|
||||
0, 195, 61, 27, 6, 230, 114, 240, 255, 124, 63, 68,
|
||||
34, 0, 159, 107, 14, 253, 255, 87, 242, 255, 63, 255,
|
||||
242, 30, 133, 247, 255, 255, 71, 128, 1, 2, 0, 0,
|
||||
64, 85, 159, 138, 217, 217, 14, 17, 133, 81, 208, 243,
|
||||
255, 119, 0, 1, 5, 209, 88, 72, 0, 0, 0, 16,
|
||||
4, 2, 0, 32, 10, 128, 123, 182, 253, 254, 254, 255,
|
||||
255, 255, 255, 255, 255, 239, 255, 255, 223, 127, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 247, 255, 255, 219, 119,
|
||||
255, 255, 127, 255, 255, 255, 239, 255, 189, 255, 255, 251,
|
||||
255, 255, 255, 223, 127, 253, 255, 247, 255, 255, 247, 255,
|
||||
255, 255, 251, 255, 239, 255, 255, 255, 255, 255, 127, 223,
|
||||
247, 191, 239, 247, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
254, 255, 255, 127, 255, 255, 255, 255, 255, 252, 255, 253,
|
||||
127, 255, 255, 158, 190, 255, 238, 255, 127, 247, 127, 2,
|
||||
130, 4, 255, 255, 255, 255, 215, 239, 255, 255, 247, 254,
|
||||
226, 158, 231, 255, 247, 255, 86, 189, 201, 254, 255, 255,
|
||||
255, 255, 239, 255, 253, 247, 125, 15, 167, 81, 4, 68,
|
||||
3, 208, 85, 174, 166, 253, 189, 255, 67, 92, 91, 255,
|
||||
255, 255, 63, 32, 20, 0, 87, 81, 130, 101, 245, 76,
|
||||
226, 255, 255, 223, 64, 5, 197, 5, 0, 34, 0, 116,
|
||||
105, 16, 8, 4, 65, 0, 1, 6, 0, 0, 0, 0,
|
||||
0, 81, 96, 5, 4, 1, 0, 0, 6, 1, 32, 0,
|
||||
24, 1, 146, 177, 253, 103, 75, 6, 148, 0, 87, 237,
|
||||
251, 76, 157, 123, 131, 4, 98, 64, 0, 21, 66, 0,
|
||||
0, 0, 84, 131, 249, 95, 16, 140, 201, 70, 223, 247,
|
||||
19, 49, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0,
|
||||
0, 10, 16, 0, 1, 64, 0, 240, 223, 253, 191, 125,
|
||||
186, 207, 255, 191, 66, 20, 132, 97, 176, 255, 93, 122,
|
||||
4, 2, 0, 65, 45, 20, 37, 247, 237, 241, 191, 239,
|
||||
63, 0, 0, 2, 199, 224, 30, 252, 187, 255, 253, 251,
|
||||
247, 253, 117, 253, 255, 252, 245, 237, 71, 244, 127, 16,
|
||||
1, 1, 196, 127, 255, 247, 221, 249, 95, 5, 134, 235,
|
||||
245, 119, 189, 61, 0, 0, 0, 67, 112, 66, 0, 64,
|
||||
0, 0, 1, 67, 25, 0, 8, 0, 255, 255, 255, 3,
|
||||
0, 0, 8, 0, 0, 32, 0, 0, 128, 0, 0, 0,
|
||||
2, 0, 0, 8, 0, 0, 32, 0, 0, 128, 0, 0,
|
||||
0, 2, 0, 0, 8, 0, 0, 32, 0, 0, 128, 0,
|
||||
0, 0, 2, 0, 0, 8, 0, 0, 32, 0, 0, 128,
|
||||
0, 0, 0, 2, 0, 0, 8, 0, 0, 32, 0, 0,
|
||||
128, 239, 189, 231, 87, 238, 19, 93, 9, 193, 64, 33,
|
||||
250, 23, 1, 128, 0, 0, 0, 0, 240, 254, 255, 191,
|
||||
0, 35, 0, 32, 0, 0, 8, 0, 0, 48, 181, 227,
|
||||
16, 0, 0, 0, 17, 36, 22, 0, 1, 2, 16, 131,
|
||||
163, 1, 80, 0, 1, 131, 17, 8, 0, 0, 0, 240,
|
||||
223, 255, 127, 18, 170, 16, 127, 216, 82, 0, 128, 32,
|
||||
0, 0, 0, 0, 64, 16, 2, 2, 9, 0, 16, 66,
|
||||
0, 97, 95, 156, 49, 0, 0, 0, 1, 84, 2, 0,
|
||||
0, 0, 0, 0, 66, 1, 0, 0, 0, 191, 223, 255,
|
||||
255, 255, 255, 63, 223, 94, 207, 189, 191, 175, 255, 255,
|
||||
127, 75, 64, 16, 241, 253, 239, 253, 247, 255, 255, 251,
|
||||
223, 255, 111, 241, 123, 241, 127, 255, 127, 255, 238, 247,
|
||||
239, 191, 255, 219, 255, 223, 255, 253, 126, 191, 87, 247,
|
||||
111, 129, 118, 31, 220, 247, 253, 255, 255, 255, 251, 254,
|
||||
255, 31, 87, 31, 239, 95, 16, 24, 98, 254, 255, 159,
|
||||
21, 159, 21, 15, 125, 70, 125, 161, 130, 241, 247, 126,
|
||||
255, 255, 255, 255, 255, 253, 221, 255, 191, 253, 246, 95,
|
||||
254, 31, 64, 152, 2, 255, 227, 255, 243, 246, 254, 223,
|
||||
255, 223, 127, 80, 30, 5, 123, 180, 223, 190, 255, 255,
|
||||
247, 247, 255, 247, 127, 255, 255, 254, 219, 247, 215, 249,
|
||||
239, 47, 128, 191, 197, 255, 255, 255, 255, 159, 255, 255,
|
||||
255, 255, 253, 191, 223, 127, 6, 29, 87, 255, 248, 219,
|
||||
93, 199, 125, 22, 185, 234, 107, 160, 28, 32, 0, 48,
|
||||
2, 4, 36, 72, 4, 0, 0, 64, 212, 6, 4, 0,
|
||||
0, 4, 0, 4, 0, 48, 1, 6, 80, 0, 8, 0,
|
||||
0, 0, 36, 0, 4, 0, 16, 140, 88, 213, 73, 15,
|
||||
20, 79, 241, 22, 68, 81, 10, 10, 64, 0, 0, 64,
|
||||
0, 8, 0, 0, 0, 220, 255, 235, 31, 88, 8, 65,
|
||||
4, 160, 4, 0, 48, 18, 64, 34, 0, 16, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 128, 16, 16, 191,
|
||||
111, 147, 0, 1, 0, 0, 0, 0, 0, 0, 0, 192,
|
||||
128, 45, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
||||
192, 134, 194, 2, 0, 0, 0, 1, 223, 24, 0, 0,
|
||||
18, 240, 255, 121, 63, 0, 37, 0, 0, 0, 10, 0,
|
||||
0, 0, 0, 0, 0, 64, 0, 16, 3, 0, 9, 32,
|
||||
0, 0, 1, 0, 0, 131, 0, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
|
||||
255, 255, 207, 126, 174, 17, 16, 0, 0, 146, 0, 4,
|
||||
141, 241, 94, 0, 1, 0, 48, 20, 4, 85, 16, 1,
|
||||
4, 246, 63, 122, 5, 4, 0, 176, 128, 0, 69, 85,
|
||||
151, 125, 159, 113, 204, 120, 85, 67, 244, 87, 103, 20,
|
||||
1, 0, 0, 0, 0, 0, 44, 247, 219, 31, 80, 96,
|
||||
3, 72, 5, 16, 139, 56, 186, 1, 0, 0, 48, 0,
|
||||
36, 68, 0, 0, 0, 3, 16, 2, 1, 0, 0, 240,
|
||||
149, 255, 215, 65, 156, 48, 214, 120, 122, 17, 64, 0,
|
||||
164, 132, 233, 65, 0, 0, 0, 35, 40, 18, 116, 0,
|
||||
232, 48, 144, 42, 18, 0, 0, 0, 255, 239, 255, 127,
|
||||
133, 83, 244, 239, 255, 255, 50, 152, 131, 76, 245, 66,
|
||||
80, 221, 95, 20, 0, 128, 192, 68, 140, 22, 159, 251,
|
||||
55, 125, 237, 127, 189, 36, 175, 1, 68, 24, 1, 85,
|
||||
72, 2, 8, 16, 40, 0, 128, 0, 16, 32, 36, 0,
|
||||
255, 255, 255, 111, 254, 1, 6, 136, 10, 0, 22, 1,
|
||||
1, 21, 43, 62, 1, 0, 0, 16, 128, 41, 68, 2,
|
||||
2, 0, 225, 191, 191, 3, 0, 0, 16, 212, 167, 209,
|
||||
84, 158, 68, 223, 253, 143, 102, 179, 85, 32, 212, 195,
|
||||
216, 48, 61, 128, 0, 0, 0, 76, 180, 16, 193, 132,
|
||||
110, 80, 0, 34, 16, 127, 191, 219, 7, 0, 32, 16,
|
||||
128, 178, 5, 16, 0, 64, 0, 0, 16, 2, 17, 0,
|
||||
240, 255, 253, 63, 5, 0, 18, 129, 0, 0, 0, 8,
|
||||
0, 16, 12, 2, 0, 0, 0, 0, 131, 48, 2, 40,
|
||||
132, 0, 51, 192, 35, 36, 0, 0, 0, 203, 228, 58,
|
||||
66, 200, 20, 241, 255, 255, 127, 22, 1, 1, 132, 80,
|
||||
7, 252, 255, 255, 15, 1, 0, 64, 16, 56, 1, 1,
|
||||
28, 18, 64, 225, 118, 22, 8, 3, 16, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 32, 36, 10, 64, 128, 0,
|
||||
0,
|
||||
}
|
||||
|
||||
// altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
|
||||
// to 2-letter language codes that cannot be derived using the method described above.
|
||||
// Each 3-letter code is followed by its 1-byte langID.
|
||||
// Size: 44 bytes
|
||||
var altLangISO3 string = "corchbs\xa1hebPkin\x9bspa(yidR\xff\xff\xff\xff"
|
||||
|
||||
// langOldMap maps deprecated langIDs to their suggested replacements.
|
||||
// Size: 108 bytes, 27 elements
|
||||
var langOldMap = [27]struct {
|
||||
from uint16
|
||||
to uint16
|
||||
}{
|
||||
{from: 0x4b, to: 0x46},
|
||||
{from: 0x50, to: 0x3c},
|
||||
{from: 0x52, to: 0xcd},
|
||||
{from: 0x54, to: 0x53},
|
||||
{from: 0x77, to: 0x99},
|
||||
{from: 0x35b, to: 0x253d},
|
||||
{from: 0x465, to: 0xa85},
|
||||
{from: 0x660, to: 0x2ec4},
|
||||
{from: 0x717, to: 0x21fc},
|
||||
{from: 0x720, to: 0x765},
|
||||
{from: 0x75e, to: 0x3dcb},
|
||||
{from: 0xa81, to: 0x1bfc},
|
||||
{from: 0xa90, to: 0x2a3c},
|
||||
{from: 0x10c1, to: 0x93d},
|
||||
{from: 0x151b, to: 0x18a3},
|
||||
{from: 0x1616, to: 0x2752},
|
||||
{from: 0x1bdf, to: 0x1c7f},
|
||||
{from: 0x1e24, to: 0x2a07},
|
||||
{from: 0x226b, to: 0x2256},
|
||||
{from: 0x2307, to: 0x2256},
|
||||
{from: 0x3090, to: 0x1472},
|
||||
{from: 0x33d4, to: 0x2dca},
|
||||
{from: 0x340e, to: 0x3548},
|
||||
{from: 0x3434, to: 0x3b62},
|
||||
{from: 0x3457, to: 0x2a3c},
|
||||
{from: 0x4051, to: 0x2ec4},
|
||||
{from: 0x416c, to: 0x1fab},
|
||||
}
|
||||
|
||||
// langMacroMap maps languages to their macro language replacement, if applicable.
|
||||
// Size: 260 bytes, 65 elements
|
||||
var langMacroMap = [65]struct {
|
||||
from uint16
|
||||
to uint16
|
||||
}{
|
||||
{from: 0x86, to: 0x7e},
|
||||
{from: 0xa1, to: 0xa9},
|
||||
{from: 0xb7, to: 0xee3},
|
||||
{from: 0xc0, to: 0x4},
|
||||
{from: 0x1d2, to: 0x1a53},
|
||||
{from: 0x204, to: 0xa8},
|
||||
{from: 0x28f, to: 0x7},
|
||||
{from: 0x355, to: 0xa},
|
||||
{from: 0x367, to: 0xb},
|
||||
{from: 0x3ae, to: 0x383},
|
||||
{from: 0x3b7, to: 0x452},
|
||||
{from: 0x5d8, to: 0x2000},
|
||||
{from: 0x5df, to: 0x580},
|
||||
{from: 0x73d, to: 0x32dd},
|
||||
{from: 0x761, to: 0xd1},
|
||||
{from: 0x85b, to: 0x1a},
|
||||
{from: 0x96a, to: 0xa34},
|
||||
{from: 0x979, to: 0x22d1},
|
||||
{from: 0x99a, to: 0x99d},
|
||||
{from: 0x9a0, to: 0x4562},
|
||||
{from: 0xc72, to: 0x29},
|
||||
{from: 0xca6, to: 0x2091},
|
||||
{from: 0xd42, to: 0x4a},
|
||||
{from: 0xe1b, to: 0x4},
|
||||
{from: 0x1012, to: 0x2c},
|
||||
{from: 0x10c5, to: 0x8e},
|
||||
{from: 0x10d4, to: 0x1267},
|
||||
{from: 0x120c, to: 0x1225},
|
||||
{from: 0x12ba, to: 0x37},
|
||||
{from: 0x131c, to: 0x10c6},
|
||||
{from: 0x13ab, to: 0x1358},
|
||||
{from: 0x13b8, to: 0x1495},
|
||||
{from: 0x142c, to: 0x322d},
|
||||
{from: 0x16fc, to: 0x4f},
|
||||
{from: 0x1bfc, to: 0x76},
|
||||
{from: 0x1c85, to: 0x61},
|
||||
{from: 0x1c90, to: 0x5f},
|
||||
{from: 0x1c94, to: 0x56},
|
||||
{from: 0x1c9b, to: 0x5e},
|
||||
{from: 0x1cd7, to: 0x62},
|
||||
{from: 0x1e04, to: 0x4cc},
|
||||
{from: 0x2014, to: 0x6d},
|
||||
{from: 0x214b, to: 0x6de},
|
||||
{from: 0x229b, to: 0x2dc1},
|
||||
{from: 0x24b6, to: 0x81},
|
||||
{from: 0x26bc, to: 0x8d},
|
||||
{from: 0x279e, to: 0x8f},
|
||||
{from: 0x289e, to: 0x94},
|
||||
{from: 0x28ea, to: 0x2b},
|
||||
{from: 0x29a1, to: 0x70},
|
||||
{from: 0x29c3, to: 0x1de7},
|
||||
{from: 0x2d79, to: 0x96},
|
||||
{from: 0x2f08, to: 0x2f30},
|
||||
{from: 0x31fa, to: 0x1c67},
|
||||
{from: 0x3218, to: 0x9d},
|
||||
{from: 0x329f, to: 0xae},
|
||||
{from: 0x34fe, to: 0xb8},
|
||||
{from: 0x36f0, to: 0x933},
|
||||
{from: 0x383b, to: 0xc6},
|
||||
{from: 0x3f1a, to: 0x1cc6},
|
||||
{from: 0x3f6f, to: 0x935},
|
||||
{from: 0x4085, to: 0xcd},
|
||||
{from: 0x42e0, to: 0x42e7},
|
||||
{from: 0x44b8, to: 0x79},
|
||||
{from: 0x4549, to: 0xcf},
|
||||
}
|
||||
|
||||
// tagAlias holds a mapping from legacy and grandfathered tags to their locale ID.
|
||||
// Size: 497 bytes
|
||||
var tagAlias = map[string]uint16{
|
||||
"aa-SAAHO": 12872,
|
||||
"art-lojban": 6336,
|
||||
"i-ami": 532,
|
||||
"i-bnn": 1239,
|
||||
"i-hak": 4954,
|
||||
"i-klingon": 13349,
|
||||
"i-lux": 102,
|
||||
"i-navajo": 138,
|
||||
"i-pwn": 10937,
|
||||
"i-tao": 13070,
|
||||
"i-tay": 13080,
|
||||
"i-tsu": 13544,
|
||||
"no-BOKMAL": 126,
|
||||
"no-NYNORSK": 133,
|
||||
"no-bok": 126,
|
||||
"no-nyn": 133,
|
||||
"sgn-BE-FR": 12511,
|
||||
"sgn-BE-NL": 14583,
|
||||
"sgn-CH-DE": 12542,
|
||||
"zh-guoyu": 209,
|
||||
"zh-hakka": 4954,
|
||||
"zh-min": 196,
|
||||
"zh-min-nan": 9013,
|
||||
"zh-xiang": 5425,
|
||||
}
|
||||
|
||||
const unknownScript = 186
|
||||
|
||||
// script is an alphabetically sorted list of ISO 15924 codes. The index
|
||||
// of the script in the string, divided by 4, is the internal script ID.
|
||||
// Size: 768 bytes
|
||||
var script string = "" +
|
||||
"AfakAghbArabArmiArmnAvstBaliBamuBassBatkBengBlisBopoBrahBrai" +
|
||||
"BugiBuhdCakmCansCariChamCherCirtCoptCprtCyrlCyrsDevaDsrtDupl" +
|
||||
"EgydEgyhEgypElbaEthiGeokGeorGlagGothGranGrekGujrGuruHangHani" +
|
||||
"HanoHansHantHebrHiraHluwHmngHrktHungIndsItalJavaJpanJurcKali" +
|
||||
"KanaKharKhmrKhojKndaKoreKpelKthiLanaLaooLatfLatgLatnLepcLimb" +
|
||||
"LinaLinbLisuLomaLyciLydiMahjMandManiMayaMendMercMeroMlymMong" +
|
||||
"MoonMrooMteiMymrNarbNbatNkgbNkooNshuOgamOlckOrkhOryaOsmaPalm" +
|
||||
"PermPhagPhliPhlpPhlvPhnxPlrdPrtiQaaaQaabQaacQaadQaaeQaafQaag" +
|
||||
"QaahQaaiQaajQaakQaalQaamQaanQaaoQaapQaaqQaarQaasQaatQaauQaav" +
|
||||
"QaawQaaxQaayQaazRjngRoroRunrSamrSaraSarbSaurSgnwShawShrdSind" +
|
||||
"SinhSoraSundSyloSyrcSyreSyrjSyrnTagbTakrTaleTaluTamlTangTavt" +
|
||||
"TeluTengTfngTglgThaaThaiTibtTirhUgarVaiiVispWaraWoleXpeoXsux" +
|
||||
"YiiiZinhZmthZsymZxxxZyyyZzzz\xff\xff\xff\xff"
|
||||
|
||||
// suppressScript is an index from langID to the dominant script for that language,
|
||||
// if it exists. If a script is given, it should be suppressed from the language tag.
|
||||
// Size: 212 bytes, 212 elements
|
||||
var suppressScript = [212]uint8{
|
||||
186, 25, 186, 72, 186, 34, 186, 2, 10, 186, 72, 186,
|
||||
186, 25, 25, 186, 186, 186, 10, 186, 186, 72, 72, 186,
|
||||
72, 186, 186, 72, 186, 186, 72, 72, 72, 72, 169, 171,
|
||||
186, 40, 72, 72, 72, 72, 72, 2, 186, 72, 72, 72,
|
||||
72, 72, 72, 72, 72, 186, 72, 72, 72, 41, 72, 186,
|
||||
48, 27, 186, 72, 72, 72, 72, 4, 186, 186, 72, 186,
|
||||
186, 186, 186, 72, 186, 72, 72, 186, 48, 57, 186, 186,
|
||||
186, 36, 186, 186, 186, 25, 72, 62, 64, 65, 27, 186,
|
||||
186, 186, 186, 186, 186, 72, 72, 186, 186, 72, 69, 72,
|
||||
186, 72, 27, 72, 72, 72, 186, 186, 25, 88, 186, 72,
|
||||
27, 72, 72, 186, 93, 72, 72, 72, 72, 27, 186, 72,
|
||||
72, 72, 72, 97, 72, 72, 186, 72, 186, 186, 72, 102,
|
||||
186, 42, 186, 72, 2, 72, 72, 72, 72, 72, 25, 72,
|
||||
186, 186, 186, 186, 72, 186, 150, 72, 72, 72, 186, 72,
|
||||
72, 186, 72, 72, 186, 72, 72, 162, 165, 72, 186, 170,
|
||||
34, 186, 72, 72, 72, 72, 72, 72, 72, 72, 186, 72,
|
||||
186, 186, 186, 25, 186, 2, 186, 72, 72, 186, 186, 186,
|
||||
72, 48, 186, 186, 11, 186, 72, 186,
|
||||
}
|
||||
|
||||
const unknownRegion = 338
|
||||
|
||||
// isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
|
||||
// for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
|
||||
// the UN.M49 codes used for groups.)
|
||||
const isoRegionOffset = 30
|
||||
|
||||
// regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
|
||||
// Each 2-letter codes is followed by two bytes with the following meaning:
|
||||
// - [A-Z}{2}: the first letter of the 2-letter code plus these two
|
||||
// letters form the 3-letter ISO code.
|
||||
// - 0, n: index into altRegionISO3.
|
||||
// Size: 1256 bytes
|
||||
var regionISO string = "" +
|
||||
"AAAAACSCADNDAEREAFFGAGTGAIIAALLBAMRMANNTAOGOAQTAARRGASSMATUT" +
|
||||
"AUUSAWBWAXLAAZZEBAIHBBRBBDGDBEELBFFABGGRBHHRBIDIBJENBLLMBMMU" +
|
||||
"BNRNBOOLBQESBRRABSHSBTTNBUURBVVTBWWABYLRBZLZCAANCCCKCDODCFAF" +
|
||||
"CGOGCHHECIIVCKOKCLHLCMMRCNHNCOOLCPPTCRRICS\x00\x00CUUBCVPVCW" +
|
||||
"UWCXXRCYYPCZZEDDDRDEEUDGGADJJIDKNKDMMADOOMDZZAEA ECCUEESTEG" +
|
||||
"GYEHSHERRIESSPETTHEU\x00\x03FIINFJJIFKLKFMSMFOROFRRAFXXXGAAB" +
|
||||
"GBBRGDRDGEEOGFUFGGGYGHHAGIIBGLRLGMMBGNINGPLPGQNQGRRCGS\x00" +
|
||||
"\x06GTTMGUUMGWNBGYUYHKKGHMMDHNNDHRRVHTTIHUUNIC IDDNIERLILSR" +
|
||||
"IMMNINNDIOOTIQRQIRRNISSLITTAJEEYJMAMJOORJPPNKEENKGGZKHHMKIIR" +
|
||||
"KM\x00\tKNNAKP\x00\fKRORKWWTKY\x00\x0fKZAZLAAOLBBNLCCALIIELK" +
|
||||
"KALRBRLSSOLTTULUUXLVVALYBYMAARMCCOMDDAMENEMFAFMGDGMHHLMKKDML" +
|
||||
"LIMMMRMNNGMOACMPNPMQTQMRRTMSSRMTLTMUUSMVDVMWWIMXEXMYYSMZOZNA" +
|
||||
"AMNCCLNEERNFFKNGGANIICNLLDNOORNPPLNRRUNTTZNUIUNZZLOMMNPAANPE" +
|
||||
"ERPFYFPGNGPHHLPKAKPLOLPM\x00\x12PNCNPRRIPSSEPTRTPWLWPYRYQAAT" +
|
||||
"QMMMQNNNQOOOQPPPQQQQQRRRQSSSQTTTQU QVVVQWWWQXXXQYYYQZZZREEU" +
|
||||
"ROOURS\x00\x15RUUSRWWASAAUSBLBSCYCSDDNSEWESGGPSHHNSIVNSJJMSK" +
|
||||
"VKSLLESMMRSNENSOOMSRURSSSDSTTPSUUNSVLVSXXMSYYRSZWZTAAATCCATD" +
|
||||
"CDTF\x00\x18TGGOTHHATJJKTKKLTLLSTMKMTNUNTOONTPMPTRURTTTOTVUV" +
|
||||
"TWWNTZZAUAKRUGGAUMMIUSSAUYRYUZZBVAATVCCTVEENVGGBVIIRVNNMVUUT" +
|
||||
"WFLFWSSMXAAAXBBBXCCCXDDDXEEEXFFFXGGGXHHHXIIIXJJJXKKKXLLLXMMM" +
|
||||
"XNNNXOOOXPPPXQQQXRRRXSSSXTTTXUUUXVVVXWWWXXXXXYYYXZZZYDMDYEEM" +
|
||||
"YT\x00\x1bYUUGZAAFZMMBZRARZWWEZZZZ\xff\xff\xff\xff"
|
||||
|
||||
// altRegionISO3 holds a list of 3-letter region codes that cannot be
|
||||
// mapped to 2-letter codes using the default algorithm. This is a short list.
|
||||
// Size: 46 bytes
|
||||
var altRegionISO3 string = "SCGQUUSGSCOMPRKCYMSPMSRBATFMYT"
|
||||
|
||||
// altRegionIDs holsd a list of regionIDs the positions of which match those
|
||||
// of the 3-letter ISO codes in altRegionISO3.
|
||||
// Size: 20 bytes, 10 elements
|
||||
var altRegionIDs = [10]uint16{
|
||||
85, 108, 130, 160, 162, 165, 222, 246, 274, 332,
|
||||
}
|
||||
|
||||
// m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
|
||||
// codes indicating collections of regions.
|
||||
// Size: 678 bytes, 339 elements
|
||||
var m49 = [339]uint16{
|
||||
1, 2, 3, 5, 9, 11, 13, 14, 15, 17, 18, 19,
|
||||
21, 29, 30, 34, 35, 39, 53, 54, 57, 61, 142, 143,
|
||||
145, 150, 151, 154, 155, 419, 958, 0, 20, 784, 4, 28,
|
||||
660, 8, 51, 530, 24, 10, 32, 16, 40, 36, 533, 248,
|
||||
31, 70, 52, 50, 56, 854, 100, 48, 108, 204, 652, 60,
|
||||
96, 68, 535, 76, 44, 64, 104, 74, 72, 112, 84, 124,
|
||||
166, 180, 140, 178, 756, 384, 184, 152, 120, 156, 170, 0,
|
||||
188, 891, 192, 132, 531, 162, 196, 203, 278, 276, 0, 262,
|
||||
208, 212, 214, 12, 0, 218, 233, 818, 732, 232, 724, 231,
|
||||
967, 246, 242, 238, 583, 234, 250, 249, 266, 826, 308, 268,
|
||||
254, 831, 288, 292, 304, 270, 324, 312, 226, 300, 239, 320,
|
||||
316, 624, 328, 344, 334, 340, 191, 332, 348, 0, 360, 372,
|
||||
376, 833, 356, 86, 368, 364, 352, 380, 832, 388, 400, 392,
|
||||
404, 417, 116, 296, 174, 659, 408, 410, 414, 136, 398, 418,
|
||||
422, 662, 438, 144, 430, 426, 440, 442, 428, 434, 504, 492,
|
||||
498, 499, 663, 450, 584, 807, 466, 104, 496, 446, 580, 474,
|
||||
478, 500, 470, 480, 462, 454, 484, 458, 508, 516, 540, 562,
|
||||
574, 566, 558, 528, 578, 524, 520, 536, 570, 554, 512, 591,
|
||||
604, 258, 598, 608, 586, 616, 666, 612, 630, 275, 620, 585,
|
||||
600, 634, 959, 960, 961, 962, 963, 964, 965, 966, 0, 968,
|
||||
969, 970, 971, 972, 638, 642, 688, 643, 646, 682, 90, 690,
|
||||
729, 752, 702, 654, 705, 744, 703, 694, 674, 686, 706, 740,
|
||||
728, 678, 810, 222, 534, 760, 748, 0, 796, 148, 260, 768,
|
||||
764, 762, 772, 626, 795, 788, 776, 626, 792, 780, 798, 158,
|
||||
834, 804, 800, 581, 840, 858, 860, 336, 670, 862, 92, 850,
|
||||
704, 548, 876, 882, 973, 974, 975, 976, 977, 978, 979, 980,
|
||||
981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992,
|
||||
993, 994, 995, 996, 997, 998, 720, 887, 175, 891, 710, 894,
|
||||
180, 716, 999,
|
||||
}
|
||||
|
||||
// currency holds an alphabetically sorted list of canonical 3-letter currency identifiers.
|
||||
// Each identifier is followed by a byte of which the 6 most significant bits
|
||||
// indicated the rounding and the least 2 significant bits indicate the
|
||||
// number of decimal positions.
|
||||
// Size: 1208 bytes
|
||||
var currency string = "" +
|
||||
"ADP\x04AED\x06AFA\x06AFN\x04ALK\x06ALL\x04AMD\x04ANG\x06AOA" +
|
||||
"\x06AOK\x06AON\x06AOR\x06ARA\x06ARL\x06ARM\x06ARP\x06ARS\x06" +
|
||||
"ATS\x06AUD\x06AWG\x06AZM\x06AZN\x06BAD\x06BAM\x06BAN\x06BBD" +
|
||||
"\x06BDT\x06BEC\x06BEF\x06BEL\x06BGL\x06BGM\x06BGN\x06BGO\x06" +
|
||||
"BHD\aBIF\x04BMD\x06BND\x06BOB\x06BOL\x06BOP\x06BOV\x06BRB" +
|
||||
"\x06BRC\x06BRE\x06BRL\x06BRN\x06BRR\x06BRZ\x06BSD\x06BTN\x06" +
|
||||
"BUK\x06BWP\x06BYB\x06BYR\x04BZD\x06CAD\x06CDF\x06CHE\x06CHF" +
|
||||
"\x06CHW\x06CLE\x06CLF\x04CLP\x04CNX\x06CNY\x06COP\x04COU\x06" +
|
||||
"CRC\x04CSD\x06CSK\x06CUC\x06CUP\x06CVE\x06CYP\x06CZK\x06DDM" +
|
||||
"\x06DEM\x06DJF\x04DKK\x06DOP\x06DZD\x06ECS\x06ECV\x06EEK\x06" +
|
||||
"EGP\x06ERN\x06ESA\x06ESB\x06ESP\x04ETB\x06EUR\x06FIM\x06FJD" +
|
||||
"\x06FKP\x06FRF\x06GBP\x06GEK\x06GEL\x06GHC\x06GHS\x06GIP\x06" +
|
||||
"GMD\x06GNF\x04GNS\x06GQE\x06GRD\x06GTQ\x06GWE\x06GWP\x06GYD" +
|
||||
"\x04HKD\x06HNL\x06HRD\x06HRK\x06HTG\x06HUF\x04IDR\x04IEP\x06" +
|
||||
"ILP\x06ILR\x06ILS\x06INR\x06IQD\x04IRR\x04ISJ\x06ISK\x04ITL" +
|
||||
"\x04JMD\x06JOD\aJPY\x04KES\x06KGS\x06KHR\x06KMF\x04KPW\x04KR" +
|
||||
"H\x06KRO\x06KRW\x04KWD\aKYD\x06KZT\x06LAK\x04LBP\x04LKR\x06L" +
|
||||
"RD\x06LSL\x06LTL\x06LTT\x06LUC\x06LUF\x04LUL\x06LVL\x06LVR" +
|
||||
"\x06LYD\aMAD\x06MAF\x06MCF\x06MDC\x06MDL\x06MGA\x04MGF\x04MK" +
|
||||
"D\x06MKN\x06MLF\x06MMK\x04MNT\x04MOP\x06MRO\x04MTL\x06MTP" +
|
||||
"\x06MUR\x04MVP\x06MVR\x06MWK\x06MXN\x06MXP\x06MXV\x06MYR\x06" +
|
||||
"MZE\x06MZM\x06MZN\x06NAD\x06NGN\x06NIC\x06NIO\x06NLG\x06NOK" +
|
||||
"\x06NPR\x06NZD\x06OMR\aPAB\x06PEI\x06PEN\x06PES\x06PGK\x06PH" +
|
||||
"P\x06PKR\x04PLN\x06PLZ\x06PTE\x06PYG\x04QAR\x06RHD\x06ROL" +
|
||||
"\x06RON\x06RSD\x04RUB\x06RUR\x06RWF\x04SAR\x06SBD\x06SCR\x06" +
|
||||
"SDD\x06SDG\x06SDP\x06SEK\x06SGD\x06SHP\x06SIT\x06SKK\x06SLL" +
|
||||
"\x04SOS\x04SRD\x06SRG\x06SSP\x06STD\x04SUR\x06SVC\x06SYP\x04" +
|
||||
"SZL\x06THB\x06TJR\x06TJS\x06TMM\x04TMT\x06TND\aTOP\x06TPE" +
|
||||
"\x06TRL\x04TRY\x06TTD\x06TWD\x06TZS\x04UAH\x06UAK\x06UGS\x06" +
|
||||
"UGX\x04USD\x06USN\x06USS\x06UYI\x06UYP\x06UYU\x06UZS\x04VEB" +
|
||||
"\x06VEF\x06VND\x04VNN\x06VUV\x04WST\x06XAF\x04XAG\x06XAU\x06" +
|
||||
"XBA\x06XBB\x06XBC\x06XBD\x06XCD\x06XDR\x06XEU\x06XFO\x06XFU" +
|
||||
"\x06XOF\x04XPD\x06XPF\x04XPT\x06XRE\x06XSU\x06XTS\x06XUA\x06" +
|
||||
"XXX\x06YDD\x06YER\x04YUD\x06YUM\x06YUN\x06YUR\x06ZAL\x06ZAR" +
|
||||
"\x06ZMK\x04ZMW\x06ZRN\x06ZRZ\x06ZWD\x04ZWL\x06ZWR\x06\xff" +
|
||||
"\xff\xff\xff"
|
||||
|
||||
const unknownCurrency = 281
|
||||
|
||||
// nRegionGroups is the number of region groups. All regionIDs < nRegionGroups
|
||||
// are groups.
|
||||
const nRegionGroups = 32
|
||||
|
||||
// regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
|
||||
// where each set holds all groupings that are directly connected in a region
|
||||
// containment graph.
|
||||
// Size: 339 bytes, 339 elements
|
||||
var regionInclusion = [339]uint8{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 32, 33, 34, 35, 36, 37,
|
||||
37, 34, 35, 37, 38, 33, 39, 40, 41, 42, 37, 43,
|
||||
35, 34, 37, 36, 41, 44, 45, 35, 46, 44, 37, 47,
|
||||
48, 39, 37, 39, 37, 36, 48, 33, 49, 50, 51, 47,
|
||||
33, 38, 38, 38, 52, 44, 40, 39, 38, 53, 39, 33,
|
||||
51, 34, 37, 44, 37, 33, 54, 45, 52, 41, 33, 46,
|
||||
55, 37, 37, 56, 56, 39, 55, 56, 56, 46, 57, 46,
|
||||
31, 55, 58, 39, 59, 43, 41, 52, 38, 55, 37, 35,
|
||||
39, 43, 44, 34, 47, 44, 44, 37, 38, 57, 33, 51,
|
||||
59, 44, 39, 53, 33, 51, 34, 37, 45, 56, 48, 55,
|
||||
35, 43, 36, 33, 35, 36, 43, 57, 43, 37, 35, 53,
|
||||
46, 60, 48, 59, 46, 37, 53, 53, 35, 37, 60, 48,
|
||||
35, 37, 52, 36, 44, 49, 55, 41, 55, 56, 56, 52,
|
||||
50, 34, 37, 46, 59, 34, 44, 48, 53, 53, 59, 37,
|
||||
44, 37, 57, 46, 36, 46, 51, 48, 46, 49, 58, 44,
|
||||
42, 44, 51, 41, 43, 36, 59, 35, 40, 42, 35, 51,
|
||||
39, 40, 58, 48, 36, 45, 47, 40, 37, 35, 57, 59,
|
||||
39, 35, 32, 32, 30, 32, 32, 32, 32, 32, 61, 32,
|
||||
32, 32, 32, 32, 46, 45, 34, 50, 46, 35, 58, 46,
|
||||
56, 55, 48, 44, 57, 43, 45, 44, 34, 44, 46, 39,
|
||||
56, 38, 50, 51, 37, 35, 49, 33, 37, 38, 33, 44,
|
||||
48, 60, 40, 48, 60, 56, 40, 48, 35, 37, 40, 53,
|
||||
46, 50, 46, 33, 47, 39, 60, 34, 37, 39, 37, 37,
|
||||
48, 58, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 35, 35, 46, 34, 49, 46,
|
||||
38, 46, 32,
|
||||
}
|
||||
|
||||
// regionInclusionBits is an array of bit vectors where every vector represents
|
||||
// a set of region groupings. These sets are used to compute the distance
|
||||
// between two regions for the purpos of locale matching.
|
||||
// Size: 300 bytes, 75 elements
|
||||
var regionInclusionBits = [75]uint32{
|
||||
37750803, 1955, 14404, 536872968, 1077674001, 34, 536873028, 130, 258, 514, 1026, 536885325,
|
||||
6148, 536881156, 4210688, 4227072, 4259840, 33685504, 262160, 524304, 1048592, 2097168, 29474817, 12582912,
|
||||
20971520, 2650931201, 100663296, 167772160, 301989888, 536881224, 1073741840, 2181038080, 1, 1073741824, 131072, 16777216,
|
||||
32768, 8192, 512, 8, 2097152, 2415919104, 262144, 134217728, 32, 2214592512, 128, 4096,
|
||||
65536, 1024, 67108864, 64, 268435456, 16384, 2164260864, 2281701376, 256, 2147614720, 524288, 1048576,
|
||||
8388608, 33554432, 4294967295, 37752755, 1115424787, 574634079, 63031315, 2655127571, 2449473536, 2248146944, 2202009600, 2315255808,
|
||||
2181169152, 2680406017, 2680408083,
|
||||
}
|
||||
|
||||
// regionInclusionNext marks, for each entry in regionInclusionBits, the set of
|
||||
// all groups that are reachable from the groups set in the respective entry.
|
||||
// Size: 75 bytes, 75 elements
|
||||
var regionInclusionNext = [75]uint8{
|
||||
62, 63, 11, 11, 64, 1, 11, 1, 1, 1, 1, 65,
|
||||
11, 11, 22, 22, 22, 25, 4, 4, 4, 4, 66, 22,
|
||||
22, 67, 25, 25, 25, 11, 4, 25, 0, 30, 17, 24,
|
||||
15, 13, 9, 3, 21, 68, 18, 27, 5, 69, 7, 12,
|
||||
16, 10, 26, 6, 28, 14, 70, 71, 8, 72, 19, 20,
|
||||
23, 25, 62, 62, 62, 62, 62, 62, 25, 25, 73, 25,
|
||||
25, 74, 62,
|
||||
}
|
||||
|
||||
// Size: 8.7K (8876 bytes); Check: D7ACA2A7
|
Загрузка…
Ссылка в новой задаче