language/display: moved display package

The display package is now limited to contain only names for values
related to the language package. Moving it as a subpackage of
language makes both its purpose and discoverability clearer.

Sorry for the inconveniences.

Change-Id: I5321e7b81f0837f25f2523f6eb4811f82d0455b8
Reviewed-on: https://go-review.googlesource.com/17490
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
This commit is contained in:
Marcel van Lohuizen 2015-12-05 12:05:49 +01:00
Родитель 698d493a66
Коммит b8e57db2c3
10 изменённых файлов: 45586 добавлений и 12 удалений

Просмотреть файл

@ -2,18 +2,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run maketables.go -output tables.go
// Package display provides display names for languages, scripts and regions in
// a requested language.
//
// The data is based on CLDR's localeDisplayNames. It includes the names of the
// draft level "contributed" or "approved". The resulting tables are quite
// large. The display package is designed so that users can reduce the linked-in
// table sizes by cherry picking the languages one wishes to support. There is a
// Dictionary defined for a selected set of common languages for this purpose.
// NOTICE: Package display has moved to golang.org/x/text/language/display.
// Use golang.org/x/text/language/display instead.
package display // import "golang.org/x/text/display"
// TODO: change package path reference to the following so that people get
// a warning of where the package has been moved to.
// package display // import "golang.org/x/text/language/display"
import (
"strings"

2
gen.go
Просмотреть файл

@ -76,7 +76,7 @@ func main() {
_ = generate("unicode/rangetable")
_ = generate("width")
_ = generate("currency", cldr, language, internal)
_ = generate("display", cldr, language)
_ = generate("language/display", cldr, language)
_ = generate("cases", norm)
_ = generate("collate", norm, cldr, language)
_ = generate("search", norm, cldr, language)

92
language/display/dict.go Normal file
Просмотреть файл

@ -0,0 +1,92 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
// This file contains sets of data for specific languages. Users can use these
// to create smaller collections of supported languages and reduce total table
// size.
// The variable names defined here correspond to those in package language.
var (
Afrikaans *Dictionary = &af // af
Amharic *Dictionary = &am // am
Arabic *Dictionary = &ar // ar
ModernStandardArabic *Dictionary = Arabic // ar-001
Azerbaijani *Dictionary = &az // az
Bulgarian *Dictionary = &bg // bg
Bengali *Dictionary = &bn // bn
Catalan *Dictionary = &ca // ca
Czech *Dictionary = &cs // cs
Danish *Dictionary = &da // da
German *Dictionary = &de // de
Greek *Dictionary = &el // el
English *Dictionary = &en // en
AmericanEnglish *Dictionary = English // en-US
BritishEnglish *Dictionary = English // en-GB
Spanish *Dictionary = &es // es
EuropeanSpanish *Dictionary = Spanish // es-ES
LatinAmericanSpanish *Dictionary = Spanish // es-419
Estonian *Dictionary = &et // et
Persian *Dictionary = &fa // fa
Finnish *Dictionary = &fi // fi
Filipino *Dictionary = &fil // fil
French *Dictionary = &fr // fr
Gujarati *Dictionary = &gu // gu
Hebrew *Dictionary = &he // he
Hindi *Dictionary = &hi // hi
Croatian *Dictionary = &hr // hr
Hungarian *Dictionary = &hu // hu
Armenian *Dictionary = &hy // hy
Indonesian *Dictionary = &id // id
Icelandic *Dictionary = &is // is
Italian *Dictionary = &it // it
Japanese *Dictionary = &ja // ja
Georgian *Dictionary = &ka // ka
Kazakh *Dictionary = &kk // kk
Khmer *Dictionary = &km // km
Kannada *Dictionary = &kn // kn
Korean *Dictionary = &ko // ko
Kirghiz *Dictionary = &ky // ky
Lao *Dictionary = &lo // lo
Lithuanian *Dictionary = &lt // lt
Latvian *Dictionary = &lv // lv
Macedonian *Dictionary = &mk // mk
Malayalam *Dictionary = &ml // ml
Mongolian *Dictionary = &mn // mn
Marathi *Dictionary = &mr // mr
Malay *Dictionary = &ms // ms
Burmese *Dictionary = &my // my
Nepali *Dictionary = &ne // ne
Dutch *Dictionary = &nl // nl
Norwegian *Dictionary = &no // no
Punjabi *Dictionary = &pa // pa
Polish *Dictionary = &pl // pl
Portuguese *Dictionary = &pt // pt
BrazilianPortuguese *Dictionary = Portuguese // pt-BR
EuropeanPortuguese *Dictionary = &ptPT // pt-PT
Romanian *Dictionary = &ro // ro
Russian *Dictionary = &ru // ru
Sinhala *Dictionary = &si // si
Slovak *Dictionary = &sk // sk
Slovenian *Dictionary = &sl // sl
Albanian *Dictionary = &sq // sq
Serbian *Dictionary = &sr // sr
SerbianLatin *Dictionary = &srLatn // sr
Swedish *Dictionary = &sv // sv
Swahili *Dictionary = &sw // sw
Tamil *Dictionary = &ta // ta
Telugu *Dictionary = &te // te
Thai *Dictionary = &th // th
Turkish *Dictionary = &tr // tr
Ukrainian *Dictionary = &uk // uk
Urdu *Dictionary = &ur // ur
Uzbek *Dictionary = &uz // uz
Vietnamese *Dictionary = &vi // vi
Chinese *Dictionary = &zh // zh
SimplifiedChinese *Dictionary = Chinese // zh-Hans
TraditionalChinese *Dictionary = &zhHant // zh-Hant
Zulu *Dictionary = &zu // zu
)

Просмотреть файл

@ -0,0 +1,39 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
import (
"fmt"
"testing"
"golang.org/x/text/internal/testtext"
)
func TestLinking(t *testing.T) {
base := getSize(t, `display.Tags(language.English).Name(language.English)`)
compact := getSize(t, `display.English.Languages().Name(language.English)`)
if d := base - compact; d < 1.5*1024*1024 {
t.Errorf("size(base)-size(compact) was %d; want > 1.5MB", base, compact)
}
}
func getSize(t *testing.T, main string) int {
size, err := testtext.CodeSize(fmt.Sprintf(body, main))
if err != nil {
t.Skipf("skipping link size test; binary size could not be determined: %v", err)
}
return size
}
const body = `package main
import (
"golang.org/x/text/language"
"golang.org/x/text/language/display"
)
func main() {
%s
}
`

343
language/display/display.go Normal file
Просмотреть файл

@ -0,0 +1,343 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run maketables.go -output tables.go
// Package display provides display names for languages, scripts and regions in
// a requested language.
//
// The data is based on CLDR's localeDisplayNames. It includes the names of the
// draft level "contributed" or "approved". The resulting tables are quite
// large. The display package is designed so that users can reduce the linked-in
// table sizes by cherry picking the languages one wishes to support. There is a
// Dictionary defined for a selected set of common languages for this purpose.
package display // import "golang.org/x/text/language/display"
import (
"strings"
"golang.org/x/text/language"
)
/*
TODO:
All fairly low priority at the moment:
- Include alternative and variants as an option (using func options).
- Option for returning the empty string for undefined values.
- Support variants, currencies, time zones, option names and other data
provided in CLDR.
- Do various optimizations:
- Reduce size of offset tables.
- Consider compressing infrequently used languages and decompress on demand.
*/
// A Namer is used to get the name for a given value, such as a Tag, Language,
// Script or Region.
type Namer interface {
// Name returns a display string for the given value. A Namer returns an
// empty string for values it does not support. A Namer may support naming
// an unspecified value. For example, when getting the name for a region for
// a tag that does not have a defined Region, it may return the name for an
// unknown region. It is up to the user to filter calls to Name for values
// for which one does not want to have a name string.
Name(x interface{}) string
}
var (
// Supported lists the languages for which names are defined.
Supported language.Coverage
// The set of all possible values for which names are defined. Note that not
// all Namer implementations will cover all the values of a given type.
// A Namer will return the empty string for unsupported values.
Values language.Coverage
matcher language.Matcher
)
func init() {
tags := make([]language.Tag, numSupported)
s := supported
for i := range tags {
p := strings.IndexByte(s, '|')
tags[i] = language.Raw.Make(s[:p])
s = s[p+1:]
}
matcher = language.NewMatcher(tags)
Supported = language.NewCoverage(tags)
Values = language.NewCoverage(langTagSet.Tags, supportedScripts, supportedRegions)
}
// Languages returns a Namer for naming languages. It returns nil if there is no
// data for the given tag. The type passed to Name must be either language.Base
// or language.Tag. Note that the result may differ between passing a tag or its
// base language. For example, for English, passing "nl-BE" would return Flemish
// whereas passing "nl" returns "Dutch".
func Languages(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
return languageNamer(index)
}
return nil
}
type languageNamer int
func (n languageNamer) name(i int) string {
return lookup(langHeaders[:], int(n), i)
}
// Name implements the Namer interface for language names.
func (n languageNamer) Name(x interface{}) string {
return nameLanguage(n, x)
}
// nonEmptyIndex walks up the parent chain until a non-empty header is found.
// It returns -1 if no index could be found.
func nonEmptyIndex(h []header, index int) int {
for ; index != -1 && h[index].data == ""; index = int(parents[index]) {
}
return index
}
// Scripts returns a Namer for naming scripts. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Script or a language.Tag. It will not attempt to infer a script for
// tags with an unspecified script.
func Scripts(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
if index = nonEmptyIndex(scriptHeaders[:], index); index != -1 {
return scriptNamer(index)
}
}
return nil
}
type scriptNamer int
func (n scriptNamer) name(i int) string {
return lookup(scriptHeaders[:], int(n), i)
}
// Name implements the Namer interface for script names.
func (n scriptNamer) Name(x interface{}) string {
return nameScript(n, x)
}
// Regions returns a Namer for naming regions. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Region or a language.Tag. It will not attempt to infer a region for
// tags with an unspecified region.
func Regions(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
if index = nonEmptyIndex(regionHeaders[:], index); index != -1 {
return regionNamer(index)
}
}
return nil
}
type regionNamer int
func (n regionNamer) name(i int) string {
return lookup(regionHeaders[:], int(n), i)
}
// Name implements the Namer interface for region names.
func (n regionNamer) Name(x interface{}) string {
return nameRegion(n, x)
}
// Tags returns a Namer for giving a full description of a tag. The names of
// scripts and regions that are not already implied by the language name will
// in appended within parentheses. It returns nil if there is not data for the
// given tag. The type passed to Name must be a tag.
func Tags(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
return tagNamer(index)
}
return nil
}
type tagNamer int
// Name implements the Namer interface for tag names.
func (n tagNamer) Name(x interface{}) string {
return nameTag(languageNamer(n), scriptNamer(n), regionNamer(n), x)
}
// lookup finds the name for an entry in a global table, traversing the
// inheritance hierarchy if needed.
func lookup(table []header, dict, want int) string {
for dict != -1 {
if s := table[dict].name(want); s != "" {
return s
}
dict = int(parents[dict])
}
return ""
}
// A Dictionary holds a collection of Namers for a single language. One can
// reduce the amount of data linked in to a binary by only referencing
// Dictionaries for the languages one needs to support instead of using the
// generic Namer factories.
type Dictionary struct {
parent *Dictionary
lang header
script header
region header
}
// Tags returns a Namer for giving a full description of a tag. The names of
// scripts and regions that are not already implied by the language name will
// in appended within parentheses. It returns nil if there is not data for the
// given tag. The type passed to Name must be a tag.
func (d *Dictionary) Tags() Namer {
return dictTags{d}
}
type dictTags struct {
d *Dictionary
}
// Name implements the Namer interface for tag names.
func (n dictTags) Name(x interface{}) string {
return nameTag(dictLanguages{n.d}, dictScripts{n.d}, dictRegions{n.d}, x)
}
// Languages returns a Namer for naming languages. It returns nil if there is no
// data for the given tag. The type passed to Name must be either language.Base
// or language.Tag. Note that the result may differ between passing a tag or its
// base language. For example, for English, passing "nl-BE" would return Flemish
// whereas passing "nl" returns "Dutch".
func (d *Dictionary) Languages() Namer {
return dictLanguages{d}
}
type dictLanguages struct {
d *Dictionary
}
func (n dictLanguages) name(i int) string {
for d := n.d; d != nil; d = d.parent {
if s := d.lang.name(i); s != "" {
return s
}
}
return ""
}
// Name implements the Namer interface for language names.
func (n dictLanguages) Name(x interface{}) string {
return nameLanguage(n, x)
}
// Scripts returns a Namer for naming scripts. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Script or a language.Tag. It will not attempt to infer a script for
// tags with an unspecified script.
func (d *Dictionary) Scripts() Namer {
return dictScripts{d}
}
type dictScripts struct {
d *Dictionary
}
func (n dictScripts) name(i int) string {
for d := n.d; d != nil; d = d.parent {
if s := d.script.name(i); s != "" {
return s
}
}
return ""
}
// Name implements the Namer interface for script names.
func (n dictScripts) Name(x interface{}) string {
return nameScript(n, x)
}
// Regions returns a Namer for naming regions. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Region or a language.Tag. It will not attempt to infer a region for
// tags with an unspecified region.
func (d *Dictionary) Regions() Namer {
return dictRegions{d}
}
type dictRegions struct {
d *Dictionary
}
func (n dictRegions) name(i int) string {
for d := n.d; d != nil; d = d.parent {
if s := d.region.name(i); s != "" {
return s
}
}
return ""
}
// Name implements the Namer interface for region names.
func (n dictRegions) Name(x interface{}) string {
return nameRegion(n, x)
}
// A SelfNamer implements a Namer that returns the name of language in this same
// language. It provides a very compact mechanism to provide a comprehensive
// list of languages to users in their native language.
type SelfNamer struct {
// Supported defines the values supported by this Namer.
Supported language.Coverage
}
var (
// Self is a shared instance of a SelfNamer.
Self *SelfNamer = &self
self = SelfNamer{language.NewCoverage(selfTagSet.Tags)}
)
// Name returns the name of a given language tag in the language identified by
// this tag. It supports both the language.Base and language.Tag types.
func (n SelfNamer) Name(x interface{}) string {
t, _ := language.All.Compose(x)
base, scr, reg := t.Raw()
baseScript := language.Script{}
if (scr == language.Script{} && reg != language.Region{}) {
// For looking up in the self dictionary, we need to select the
// maximized script. This is even the case if the script isn't
// specified.
s1, _ := t.Script()
if baseScript = getScript(base); baseScript != s1 {
scr = s1
}
}
i, scr, reg := selfTagSet.index(base, scr, reg)
if i == -1 {
return ""
}
// Only return the display name if the script matches the expected script.
if (scr != language.Script{}) {
if (baseScript == language.Script{}) {
baseScript = getScript(base)
}
if baseScript != scr {
return ""
}
}
return selfHeaders[0].name(i)
}
// getScript returns the maximized script for a base language.
func getScript(b language.Base) language.Script {
tag, _ := language.Raw.Compose(b)
scr, _ := tag.Script()
return scr
}

Просмотреть файл

@ -0,0 +1,621 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
import (
"reflect"
"testing"
"unicode"
"golang.org/x/text/language"
)
// TODO: test that tables are properly dropped by the linker for various use
// cases.
var (
firstLang2aa = language.MustParseBase("aa")
lastLang2zu = language.MustParseBase("zu")
firstLang3ace = language.MustParseBase("ace")
lastLang3zza = language.MustParseBase("zza")
firstTagAr001 = language.MustParse("ar-001")
lastTagZhHant = language.MustParse("zh-Hant")
)
// TestValues tests that for all languages, regions, and scripts in Values, at
// least one language has a name defined for it by checking it exists in
// English, which is assumed to be the most comprehensive. It is also tested
// that a Namer returns "" for unsupported values.
func TestValues(t *testing.T) {
type testcase struct {
kind string
n Namer
}
// checkDefined checks that a value exists in a Namer.
checkDefined := func(x interface{}, namers []testcase) {
for _, n := range namers {
if n.n.Name(x) == "" {
t.Errorf("%s.Name(%s): supported but no result", n.kind, x)
}
}
}
// checkUnsupported checks that a value does not exist in a Namer.
checkUnsupported := func(x interface{}, namers []testcase) {
for _, n := range namers {
if got := n.n.Name(x); got != "" {
t.Fatalf("%s.Name(%s): unsupported tag gave non-empty result: %q", n.kind, x, got)
}
}
}
tags := map[language.Tag]bool{}
namers := []testcase{
{"Languages(en)", Languages(language.English)},
{"Tags(en)", Tags(language.English)},
{"English.Languages()", English.Languages()},
{"English.Tags()", English.Tags()},
}
for _, tag := range Values.Tags() {
checkDefined(tag, namers)
tags[tag] = true
}
for _, base := range language.Supported.BaseLanguages() {
tag, _ := language.All.Compose(base)
if !tags[tag] {
checkUnsupported(tag, namers)
}
}
regions := map[language.Region]bool{}
namers = []testcase{
{"Regions(en)", Regions(language.English)},
{"English.Regions()", English.Regions()},
}
for _, r := range Values.Regions() {
checkDefined(r, namers)
regions[r] = true
}
for _, r := range language.Supported.Regions() {
if r = r.Canonicalize(); !regions[r] {
checkUnsupported(r, namers)
}
}
scripts := map[language.Script]bool{}
namers = []testcase{
{"Scripts(en)", Scripts(language.English)},
{"English.Scripts()", English.Scripts()},
}
for _, s := range Values.Scripts() {
checkDefined(s, namers)
scripts[s] = true
}
for _, s := range language.Supported.Scripts() {
// Canonicalize the script.
tag, _ := language.DeprecatedScript.Compose(s)
if _, s, _ = tag.Raw(); !scripts[s] {
checkUnsupported(s, namers)
}
}
}
// TestSupported tests that we have at least some Namers for languages that we
// claim to support. To test the claims in the documentation, it also verifies
// that if a Namer is returned, it will have at least some data.
func TestSupported(t *testing.T) {
supportedTags := Supported.Tags()
if len(supportedTags) != numSupported {
t.Errorf("number of supported was %d; want %d", len(supportedTags), numSupported)
}
namerFuncs := []struct {
kind string
fn func(language.Tag) Namer
}{
{"Tags", Tags},
{"Languages", Languages},
{"Regions", Regions},
{"Scripts", Scripts},
}
// Verify that we have at least one Namer for all tags we claim to support.
tags := make(map[language.Tag]bool)
for _, tag := range supportedTags {
// Test we have at least one Namer for this supported Tag.
found := false
for _, kind := range namerFuncs {
if defined(t, kind.kind, kind.fn(tag), tag) {
found = true
}
}
if !found {
t.Errorf("%s: supported, but no data available", tag)
}
if tags[tag] {
t.Errorf("%s: included in Supported.Tags more than once", tag)
}
tags[tag] = true
}
// Verify that we have no Namers for tags we don't claim to support.
for _, base := range language.Supported.BaseLanguages() {
tag, _ := language.All.Compose(base)
// Skip tags that are supported after matching.
if _, _, conf := matcher.Match(tag); conf != language.No {
continue
}
// Test there are no Namers for this tag.
for _, kind := range namerFuncs {
if defined(t, kind.kind, kind.fn(tag), tag) {
t.Errorf("%[1]s(%[2]s) returns a Namer, but %[2]s is not in the set of supported Tags.", kind.kind, tag)
}
}
}
}
// defined reports whether n is a proper Namer, which means it is non-nil and
// must have at least one non-empty value.
func defined(t *testing.T, kind string, n Namer, tag language.Tag) bool {
if n == nil {
return false
}
switch kind {
case "Tags":
for _, t := range Values.Tags() {
if n.Name(t) != "" {
return true
}
}
case "Languages":
for _, t := range Values.BaseLanguages() {
if n.Name(t) != "" {
return true
}
}
case "Regions":
for _, t := range Values.Regions() {
if n.Name(t) != "" {
return true
}
}
case "Scripts":
for _, t := range Values.Scripts() {
if n.Name(t) != "" {
return true
}
}
}
t.Errorf("%s(%s) returns non-nil Namer without content", kind, tag)
return false
}
func TestCoverage(t *testing.T) {
en := language.English
tests := []struct {
n Namer
x interface{}
}{
{Languages(en), Values.Tags()},
{Scripts(en), Values.Scripts()},
{Regions(en), Values.Regions()},
}
for i, tt := range tests {
uniq := make(map[string]interface{})
v := reflect.ValueOf(tt.x)
for j := 0; j < v.Len(); j++ {
x := v.Index(j).Interface()
s := tt.n.Name(x)
if s == "" {
t.Errorf("%d:%d:%s: missing content", i, j, x)
} else if uniq[s] != nil {
t.Errorf("%d:%d:%s: identical return value %q for %v and %v", i, j, x, s, x, uniq[s])
}
uniq[s] = x
}
}
}
// TestUpdate tests whether dictionary entries for certain languages need to be
// updated. For some languages, some of the headers may be empty or they may be
// identical to the parent. This code detects if such entries need to be updated
// after a table update.
func TestUpdate(t *testing.T) {
tests := []struct {
d *Dictionary
tag string
}{
{ModernStandardArabic, "ar-001"},
{AmericanEnglish, "en-US"},
{EuropeanSpanish, "es-ES"},
{BrazilianPortuguese, "pt-BR"},
{SimplifiedChinese, "zh-Hans"},
}
for _, tt := range tests {
_, i, _ := matcher.Match(language.MustParse(tt.tag))
if !reflect.DeepEqual(tt.d.lang, langHeaders[i]) {
t.Errorf("%s: lang table update needed", tt.tag)
}
if !reflect.DeepEqual(tt.d.script, scriptHeaders[i]) {
t.Errorf("%s: script table update needed", tt.tag)
}
if !reflect.DeepEqual(tt.d.region, regionHeaders[i]) {
t.Errorf("%s: region table update needed", tt.tag)
}
}
}
func TestIndex(t *testing.T) {
notIn := []string{"aa", "xx", "zz", "aaa", "xxx", "zzz", "Aaaa", "Xxxx", "Zzzz"}
tests := []tagIndex{
{
"",
"",
"",
},
{
"bb",
"",
"",
},
{
"",
"bbb",
"",
},
{
"",
"",
"Bbbb",
},
{
"bb",
"bbb",
"Bbbb",
},
{
"bbccddyy",
"bbbcccdddyyy",
"BbbbCcccDdddYyyy",
},
}
for i, tt := range tests {
// Create the test set from the tagIndex.
cnt := 0
for sz := 2; sz <= 4; sz++ {
a := tt[sz-2]
for j := 0; j < len(a); j += sz {
s := a[j : j+sz]
if idx := tt.index(s); idx != cnt {
t.Errorf("%d:%s: index was %d; want %d", i, s, idx, cnt)
}
cnt++
}
}
if n := tt.len(); n != cnt {
t.Errorf("%d: len was %d; want %d", i, n, cnt)
}
for _, x := range notIn {
if idx := tt.index(x); idx != -1 {
t.Errorf("%d:%s: index was %d; want -1", i, x, idx)
}
}
}
}
func TestTag(t *testing.T) {
tests := []struct {
dict string
tag string
name string
}{
{"agq", "sr", ""}, // sr is in Value.Languages(), but is not supported by agq.
{"nl", "nl", "Nederlands"},
{"nl", "nl-BE", "Vlaams"},
{"en", "en", "English"},
{"en", "en-GB", "British English"},
{"en", "en-US", "American English"}, // American English in CLDR 24+
{"ru", "ru", "русский"},
{"ru", "ru-RU", "русский (Россия)"},
{"ru", "ru-Cyrl", "русский (кириллица)"},
{"en", lastLang2zu.String(), "Zulu"},
{"en", firstLang2aa.String(), "Afar"},
{"en", lastLang3zza.String(), "Zaza"},
{"en", firstLang3ace.String(), "Achinese"},
{"en", firstTagAr001.String(), "Modern Standard Arabic"},
{"en", lastTagZhHant.String(), "Traditional Chinese"},
{"en", "aaa", ""},
{"en", "zzj", ""},
// If full tag doesn't match, try without script or region.
{"en", "aa-Hans", "Afar (Simplified Han)"},
{"en", "af-Arab", "Afrikaans (Arabic)"},
{"en", "zu-Cyrl", "Zulu (Cyrillic)"},
{"en", "aa-GB", "Afar (United Kingdom)"},
{"en", "af-NA", "Afrikaans (Namibia)"},
{"en", "zu-BR", "Zulu (Brazil)"},
// Correct inheritance and language selection.
{"zh", "zh-TW", "中文 (台湾)"},
{"zh", "zh-Hant-TW", "繁体中文 (台湾)"},
{"zh-Hant", "zh-TW", "中文 (台灣)"},
{"zh-Hant", "zh-Hant-TW", "繁體中文 (台灣)"},
// Some rather arbitrary interpretations for Serbian. This is arguably
// correct and consistent with the way zh-[Hant-]TW is handled. It will
// also give results more in line with the expectations if users
// explicitly use "sh".
{"sr-Latn", "sr-ME", "srpski (Crna Gora)"},
{"sr-Latn", "sr-Latn-ME", "Srpskohrvatski (Crna Gora)"},
// Double script and region
{"nl", "en-Cyrl-BE", "Engels (Cyrillisch, België)"},
// Canonical equivalents.
{"ro", "ro-MD", "moldovenească"},
{"ro", "mo", "moldovenească"},
}
for i, tt := range tests {
d := Tags(language.MustParse(tt.dict))
if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
// There are inconsistencies w.r.t. capitalization in the tests
// due to CLDR's update procedure which treats modern and other
// languages differently.
// See http://unicode.org/cldr/trac/ticket/8051.
// TODO: use language capitalization to sanitize the strings.
t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.tag, n, tt.name)
}
}
}
func TestLanguage(t *testing.T) {
tests := []struct {
dict string
tag string
name string
}{
{"agq", "sr", ""}, // sr is in Value.Languages(), but is not supported by agq.
{"nl", "nl", "Nederlands"},
{"nl", "nl-BE", "Vlaams"},
{"en", "pt", "Portuguese"},
{"en", "pt-PT", "European Portuguese"},
{"en", "pt-BR", "Brazilian Portuguese"},
{"en", "en", "English"},
{"en", "en-GB", "British English"},
{"en", "en-US", "American English"}, // American English in CLDR 24+
{"en", lastLang2zu.String(), "Zulu"},
{"en", firstLang2aa.String(), "Afar"},
{"en", lastLang3zza.String(), "Zaza"},
{"en", firstLang3ace.String(), "Achinese"},
{"en", firstTagAr001.String(), "Modern Standard Arabic"},
{"en", lastTagZhHant.String(), "Traditional Chinese"},
{"en", "aaa", ""},
{"en", "zzj", ""},
// If full tag doesn't match, try without script or region.
{"en", "aa-Hans", "Afar"},
{"en", "af-Arab", "Afrikaans"},
{"en", "zu-Cyrl", "Zulu"},
{"en", "aa-GB", "Afar"},
{"en", "af-NA", "Afrikaans"},
{"en", "zu-BR", "Zulu"},
{"agq", "zh-Hant", ""},
// Canonical equivalents.
{"ro", "ro-MD", "moldovenească"},
{"ro", "mo", "moldovenească"},
{"en", "sh", "Serbo-Croatian"},
{"en", "sr-Latn", "Serbo-Croatian"},
{"en", "sr", "Serbian"},
{"en", "sr-ME", "Serbian"},
{"en", "sr-Latn-ME", "Serbo-Croatian"}, // See comments in TestTag.
}
for i, tt := range tests {
d := Languages(language.Raw.MustParse(tt.dict))
if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.tag, n, tt.name)
}
if len(tt.tag) <= 3 {
if n := d.Name(language.MustParseBase(tt.tag)); n != tt.name {
t.Errorf("%d:%s:base(%s): was %q; want %q", i, tt.dict, tt.tag, n, tt.name)
}
}
}
}
func TestScript(t *testing.T) {
tests := []struct {
dict string
scr string
name string
}{
{"nl", "Arab", "Arabisch"},
{"en", "Arab", "Arabic"},
{"en", "Zzzz", "Unknown Script"},
{"zh-Hant", "Hang", "韓文字"},
{"zh-Hant-HK", "Hang", "韓文字"},
{"zh", "Arab", "阿拉伯文"},
{"zh-Hans-HK", "Arab", "阿拉伯文"}, // same as zh
{"zh-Hant", "Arab", "阿拉伯文"},
{"zh-Hant-HK", "Arab", "阿拉伯文"}, // same as zh
// Canonicalized form
{"en", "Qaai", "Inherited"}, // deprecated script, now is Zinh
{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
{"en", "en", "Unknown Script"},
// Don't introduce scripts with canonicalization.
{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
}
for i, tt := range tests {
d := Scripts(language.MustParse(tt.dict))
var x interface{}
if unicode.IsUpper(rune(tt.scr[0])) {
x = language.MustParseScript(tt.scr)
tag, _ := language.Raw.Compose(x)
if n := d.Name(tag); n != tt.name {
t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.scr, n, tt.name)
}
} else {
x = language.Raw.MustParse(tt.scr)
}
if n := d.Name(x); n != tt.name {
t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.scr, n, tt.name)
}
}
}
func TestRegion(t *testing.T) {
tests := []struct {
dict string
reg string
name string
}{
{"nl", "NL", "Nederland"},
{"en", "US", "United States"},
{"en", "ZZ", "Unknown Region"},
{"en", "UM", "U.S. Outlying Islands"},
{"en-GB", "UM", "U.S. Outlying Islands"},
{"en-GB", "NL", "Netherlands"},
// Canonical equivalents
{"en", "UK", "United Kingdom"},
// No region
{"en", "pt", "Unknown Region"},
{"en", "und", "Unknown Region"},
// Don't introduce regions with canonicalization.
{"en", "mo", "Unknown Region"},
}
for i, tt := range tests {
d := Regions(language.MustParse(tt.dict))
var x interface{}
if unicode.IsUpper(rune(tt.reg[0])) {
// Region
x = language.MustParseRegion(tt.reg)
tag, _ := language.Raw.Compose(x)
if n := d.Name(tag); n != tt.name {
t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.reg, n, tt.name)
}
} else {
// Tag
x = language.Raw.MustParse(tt.reg)
}
if n := d.Name(x); n != tt.name {
t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.reg, n, tt.name)
}
}
}
func TestSelf(t *testing.T) {
tests := []struct {
tag string
name string
}{
{"nl", "Nederlands"},
{"nl-BE", "Vlaams"},
{"en-GB", "British English"},
{lastLang2zu.String(), "isiZulu"},
{firstLang2aa.String(), ""}, // not defined
{lastLang3zza.String(), ""}, // not defined
{firstLang3ace.String(), ""}, // not defined
{firstTagAr001.String(), "العربية الرسمية الحديثة"},
{"ar", "العربية"},
{lastTagZhHant.String(), "繁體中文"},
{"aaa", ""},
{"zzj", ""},
// Drop entries that are not in the requested script, even if there is
// an entry for the language.
{"aa-Hans", ""},
{"af-Arab", ""},
{"zu-Cyrl", ""},
// Append the country name in the language of the matching language.
{"af-NA", "Afrikaans"},
{"zh", "中文"},
// zh-TW should match zh-Hant instead of zh!
{"zh-TW", "繁體中文"},
{"zh-Hant", "繁體中文"},
{"zh-Hans", "简体中文"},
{"zh-Hant-TW", "繁體中文"},
{"zh-Hans-TW", "简体中文"},
// Take the entry for sr which has the matching script.
// TODO: Capitalization changed as of CLDR 26, but change seems
// arbitrary. Revisit capitalization with revision 27. See
// http://unicode.org/cldr/trac/ticket/8051.
{"sr", "српски"},
// TODO: sr-ME should show up as Serbian or Montenegrin, not Serbo-
// Croatian. This is an artifact of the current algorithm, which is the
// way it is to have the preferred behavior for other languages such as
// Chinese. We can hardwire this case in the table generator or package
// code, but we first check if CLDR can be updated.
// {"sr-ME", "Srpski"}, // Is Srpskohrvatski
{"sr-Latn-ME", "Srpskohrvatski"},
{"sr-Cyrl-ME", "српски"},
{"sr-NL", "српски"},
// Canonical equivalents.
{"ro-MD", "moldovenească"},
{"mo", "moldovenească"},
// NOTE: kk is defined, but in Cyrillic script. For China, Arab is the
// dominant script. We do not have data for kk-Arab and we chose to not
// fall back in such cases.
{"kk-CN", ""},
}
for i, tt := range tests {
d := Self
if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
t.Errorf("%d:%s: was %q; want %q", i, tt.tag, n, tt.name)
}
}
}
func TestDictionaryLang(t *testing.T) {
tests := []struct {
d *Dictionary
tag string
name string
}{
{English, "en", "English"},
{Portuguese, "af", "africâner"},
{EuropeanPortuguese, "af", "africânder"},
{English, "nl-BE", "Flemish"},
}
for i, test := range tests {
tag := language.MustParse(test.tag)
if got := test.d.Tags().Name(tag); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
if base, _ := language.Compose(tag.Base()); base == tag {
if got := test.d.Languages().Name(base); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
}
}
}
func TestDictionaryRegion(t *testing.T) {
tests := []struct {
d *Dictionary
region string
name string
}{
{English, "FR", "France"},
{Portuguese, "009", "Oceania"},
{EuropeanPortuguese, "009", "Oceânia"},
}
for i, test := range tests {
tag := language.MustParseRegion(test.region)
if got := test.d.Regions().Name(tag); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
}
}
func TestDictionaryScript(t *testing.T) {
tests := []struct {
d *Dictionary
script string
name string
}{
{English, "Cyrl", "Cyrillic"},
{Portuguese, "Gujr", "gujerati"},
{EuropeanPortuguese, "Gujr", "guzerate"},
}
for i, test := range tests {
tag := language.MustParseScript(test.script)
if got := test.d.Scripts().Name(tag); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
}
}

Просмотреть файл

@ -7,8 +7,8 @@ package display_test
import (
"fmt"
"golang.org/x/text/display"
"golang.org/x/text/language"
"golang.org/x/text/language/display"
)
func ExampleNamer() {

238
language/display/lookup.go Normal file
Просмотреть файл

@ -0,0 +1,238 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
// This file contains common lookup code that is shared between the various
// implementations of Namer and Dictionaries.
import (
"fmt"
"sort"
"strings"
"golang.org/x/text/language"
)
type namer interface {
// name gets the string for the given index. It should walk the
// inheritance chain if a value is not present in the base index.
name(idx int) string
}
func nameLanguage(n namer, x interface{}) string {
t, _ := language.All.Compose(x)
i, _, _ := langTagSet.index(t.Raw())
return n.name(i)
}
func nameScript(n namer, x interface{}) string {
t, _ := language.DeprecatedScript.Compose(x)
_, s, _ := t.Raw()
return n.name(scriptIndex.index(s.String()))
}
func nameRegion(n namer, x interface{}) string {
t, _ := language.DeprecatedRegion.Compose(x)
_, _, r := t.Raw()
return n.name(regionIndex.index(r.String()))
}
func nameTag(langN, scrN, regN namer, x interface{}) string {
t, ok := x.(language.Tag)
if !ok {
return ""
}
const form = language.All &^ language.SuppressScript
if c, err := form.Canonicalize(t); err == nil {
t = c
}
i, scr, reg := langTagSet.index(t.Raw())
if i == -1 {
return ""
}
str := langN.name(i)
if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
ss, sr := "", ""
if hasS {
ss = scrN.name(scriptIndex.index(scr.String()))
}
if hasR {
sr = regN.name(regionIndex.index(reg.String()))
}
// TODO: use patterns in CLDR or at least confirm they are the same for
// all languages.
if ss != "" && sr != "" {
return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
}
if ss != "" || sr != "" {
return fmt.Sprintf("%s (%s%s)", str, ss, sr)
}
}
return str
}
// header contains the data and indexes for a single namer.
// data contains a series of strings concatenated into one. index contains the
// offsets for a string in data. For example, consider a header that defines
// strings for the languages de, el, en, fi, and nl:
//
// header{
// data: "GermanGreekEnglishDutch",
// index: []uint16{ 0, 6, 11, 18, 18, 23 },
// }
//
// For a language with index i, the string is defined by
// data[index[i]:index[i+1]]. So the number of elements in index is always one
// greater than the number of languages for which header defines a value.
// A string for a language may be empty, which means the name is undefined. In
// the above example, the name for fi (Finnish) is undefined.
type header struct {
data string
index []uint16
}
// name looks up the name for a tag in the dictionary, given its index.
func (h *header) name(i int) string {
if 0 <= i && i < len(h.index)-1 {
return h.data[h.index[i]:h.index[i+1]]
}
return ""
}
// tagSet is used to find the index of a language in a set of tags.
type tagSet struct {
single tagIndex
long []string
}
var (
langTagSet = tagSet{
single: langIndex,
long: langTagsLong,
}
// selfTagSet is used for indexing the language strings in their own
// language.
selfTagSet = tagSet{
single: selfIndex,
long: selfTagsLong,
}
zzzz = language.MustParseScript("Zzzz")
zz = language.MustParseRegion("ZZ")
)
// index returns the index of the tag for the given base, script and region or
// its parent if the tag is not available. If the match is for a parent entry,
// the excess script and region are returned.
func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
lang := base.String()
index := -1
if (scr != language.Script{} || reg != language.Region{}) {
if scr == zzzz {
scr = language.Script{}
}
if reg == zz {
reg = language.Region{}
}
i := sort.SearchStrings(ts.long, lang)
// All entries have either a script or a region and not both.
scrStr, regStr := scr.String(), reg.String()
for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
if s := ts.long[i][len(lang)+1:]; s == scrStr {
scr = language.Script{}
index = i + ts.single.len()
break
} else if s == regStr {
reg = language.Region{}
index = i + ts.single.len()
break
}
}
}
if index == -1 {
index = ts.single.index(lang)
}
return index, scr, reg
}
func (ts *tagSet) Tags() []language.Tag {
tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
ts.single.keys(func(s string) {
tags = append(tags, language.Raw.MustParse(s))
})
for _, s := range ts.long {
tags = append(tags, language.Raw.MustParse(s))
}
return tags
}
func supportedScripts() []language.Script {
scr := make([]language.Script, 0, scriptIndex.len())
scriptIndex.keys(func(s string) {
scr = append(scr, language.MustParseScript(s))
})
return scr
}
func supportedRegions() []language.Region {
reg := make([]language.Region, 0, regionIndex.len())
regionIndex.keys(func(s string) {
reg = append(reg, language.MustParseRegion(s))
})
return reg
}
// tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
// for each length, which can be used in combination with binary search to get
// the index associated with a tag.
// For example, a tagIndex{
// "arenesfrruzh", // 6 2-byte tags.
// "barwae", // 2 3-byte tags.
// "",
// }
// would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
// "wae" had an index of 7.
type tagIndex [3]string
func (t *tagIndex) index(s string) int {
sz := len(s)
if sz < 2 || 4 < sz {
return -1
}
a := t[sz-2]
index := sort.Search(len(a)/sz, func(i int) bool {
p := i * sz
return a[p:p+sz] >= s
})
p := index * sz
if end := p + sz; end > len(a) || a[p:end] != s {
return -1
}
// Add the number of tags for smaller sizes.
for i := 0; i < sz-2; i++ {
index += len(t[i]) / (i + 2)
}
return index
}
// len returns the number of tags that are contained in the tagIndex.
func (t *tagIndex) len() (n int) {
for i, s := range t {
n += len(s) / (i + 2)
}
return n
}
// keys calls f for each tag.
func (t *tagIndex) keys(f func(key string)) {
for i, s := range *t {
for ; s != ""; s = s[i+2:] {
f(s[:i+2])
}
}
}

Просмотреть файл

@ -0,0 +1,587 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Generator for display name tables.
package main
import (
"bytes"
"flag"
"fmt"
"log"
"reflect"
"sort"
"strings"
"golang.org/x/text/cldr"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
)
var (
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
outputFile = flag.String("output", "tables.go", "output file")
stats = flag.Bool("stats", false, "prints statistics to stderr")
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
draft = flag.String("draft",
"contributed",
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
pkg = flag.String("package",
"display",
"the name of the package in which the generated file is to be included")
tags = newTagSet("tags",
[]language.Tag{},
"space-separated list of tags to include or empty for all")
dict = newTagSet("dict",
dictTags(),
"space-separated list or tags for which to include a Dictionary. "+
`"" means the common list from go.text/language.`)
)
func dictTags() (tag []language.Tag) {
// TODO: replace with language.Common.Tags() once supported.
const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
"es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
"ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
"pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
"zh zh-Hans zh-Hant zu"
for _, s := range strings.Split(str, " ") {
tag = append(tag, language.MustParse(s))
}
return tag
}
func main() {
gen.Init()
// Read the CLDR zip file.
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
d.SetDirFilter("main", "supplemental")
d.SetSectionFilter("localeDisplayNames")
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile(*outputFile, "display")
gen.WriteCLDRVersion(w)
b := builder{
w: w,
data: data,
group: make(map[string]*group),
}
b.generate()
}
const tagForm = language.All
// tagSet is used to parse command line flags of tags. It implements the
// flag.Value interface.
type tagSet map[language.Tag]bool
func newTagSet(name string, tags []language.Tag, usage string) tagSet {
f := tagSet(make(map[language.Tag]bool))
for _, t := range tags {
f[t] = true
}
flag.Var(f, name, usage)
return f
}
// String implements the String method of the flag.Value interface.
func (f tagSet) String() string {
tags := []string{}
for t := range f {
tags = append(tags, t.String())
}
sort.Strings(tags)
return strings.Join(tags, " ")
}
// Set implements Set from the flag.Value interface.
func (f tagSet) Set(s string) error {
if s != "" {
for _, s := range strings.Split(s, " ") {
if s != "" {
tag, err := tagForm.Parse(s)
if err != nil {
return err
}
f[tag] = true
}
}
}
return nil
}
func (f tagSet) contains(t language.Tag) bool {
if len(f) == 0 {
return true
}
return f[t]
}
// builder is used to create all tables with display name information.
type builder struct {
w *gen.CodeWriter
data *cldr.CLDR
fromLocs []string
// destination tags for the current locale.
toTags []string
toTagIndex map[string]int
// list of supported tags
supported []language.Tag
// key-value pairs per group
group map[string]*group
// statistics
sizeIndex int // total size of all indexes of headers
sizeData int // total size of all data of headers
totalSize int
}
type group struct {
// Maps from a given language to the Namer data for this language.
lang map[language.Tag]keyValues
headers []header
toTags []string
threeStart int
fourPlusStart int
}
// set sets the typ to the name for locale loc.
func (g *group) set(t language.Tag, typ, name string) {
kv := g.lang[t]
if kv == nil {
kv = make(keyValues)
g.lang[t] = kv
}
if kv[typ] == "" {
kv[typ] = name
}
}
type keyValues map[string]string
type header struct {
tag language.Tag
data string
index []uint16
}
var versionInfo = `// Version is deprecated. Use CLDRVersion.
const Version = %#v
`
var self = language.MustParse("mul")
// generate builds and writes all tables.
func (b *builder) generate() {
fmt.Fprintf(b.w, versionInfo, cldr.Version)
b.filter()
b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
if ldn.Languages != nil {
for _, v := range ldn.Languages.Language {
tag := tagForm.MustParse(v.Type)
if tags.contains(tag) {
g.set(loc, tag.String(), v.Data())
}
}
}
})
b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
if ldn.Scripts != nil {
for _, v := range ldn.Scripts.Script {
g.set(loc, language.MustParseScript(v.Type).String(), v.Data())
}
}
})
b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
if ldn.Territories != nil {
for _, v := range ldn.Territories.Territory {
g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
}
}
})
b.makeSupported()
b.writeParents()
b.writeGroup("lang")
b.writeGroup("script")
b.writeGroup("region")
b.w.WriteConst("numSupported", len(b.supported))
buf := bytes.Buffer{}
for _, tag := range b.supported {
fmt.Fprint(&buf, tag.String(), "|")
}
b.w.WriteConst("supported", buf.String())
b.writeDictionaries()
b.supported = []language.Tag{self}
// Compute the names of locales in their own language. Some of these names
// may be specified in their parent locales. We iterate the maximum depth
// of the parent three times to match successive parents of tags until a
// possible match is found.
for i := 0; i < 4; i++ {
b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
parent := tag
if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
parent, _ = language.Raw.Compose(b)
}
if ldn.Languages != nil {
for _, v := range ldn.Languages.Language {
key := tagForm.MustParse(v.Type)
saved := key
if key == parent {
g.set(self, tag.String(), v.Data())
}
for k := 0; k < i; k++ {
key = key.Parent()
}
if key == tag {
g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
}
}
}
})
}
b.writeGroup("self")
}
func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
b.sizeIndex = 0
b.sizeData = 0
b.toTags = nil
b.fromLocs = nil
b.toTagIndex = make(map[string]int)
g := b.group[name]
if g == nil {
g = &group{lang: make(map[language.Tag]keyValues)}
b.group[name] = g
}
for _, loc := range b.data.Locales() {
// We use RawLDML instead of LDML as we are managing our own inheritance
// in this implementation.
ldml := b.data.RawLDML(loc)
// We do not support the POSIX variant (it is not a supported BCP 47
// variant). This locale also doesn't happen to contain any data, so
// we'll skip it by checking for this.
tag, err := tagForm.Parse(loc)
if err != nil {
if ldml.LocaleDisplayNames != nil {
log.Fatalf("setData: %v", err)
}
continue
}
if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
f(g, tag, ldml.LocaleDisplayNames)
}
}
}
func (b *builder) filter() {
filter := func(s *cldr.Slice) {
if *short {
s.SelectOnePerGroup("alt", []string{"short", ""})
} else {
s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
}
d, err := cldr.ParseDraft(*draft)
if err != nil {
log.Fatalf("filter: %v", err)
}
s.SelectDraft(d)
}
for _, loc := range b.data.Locales() {
if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
if ldn.Languages != nil {
s := cldr.MakeSlice(&ldn.Languages.Language)
if filter(&s); len(ldn.Languages.Language) == 0 {
ldn.Languages = nil
}
}
if ldn.Scripts != nil {
s := cldr.MakeSlice(&ldn.Scripts.Script)
if filter(&s); len(ldn.Scripts.Script) == 0 {
ldn.Scripts = nil
}
}
if ldn.Territories != nil {
s := cldr.MakeSlice(&ldn.Territories.Territory)
if filter(&s); len(ldn.Territories.Territory) == 0 {
ldn.Territories = nil
}
}
}
}
}
// makeSupported creates a list of all supported locales.
func (b *builder) makeSupported() {
// tags across groups
for _, g := range b.group {
for t, _ := range g.lang {
b.supported = append(b.supported, t)
}
}
b.supported = b.supported[:unique(tagsSorter(b.supported))]
}
type tagsSorter []language.Tag
func (a tagsSorter) Len() int { return len(a) }
func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
func (b *builder) writeGroup(name string) {
g := b.group[name]
for _, kv := range g.lang {
for t, _ := range kv {
g.toTags = append(g.toTags, t)
}
}
g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
// Allocate header per supported value.
g.headers = make([]header, len(b.supported))
for i, sup := range b.supported {
kv, ok := g.lang[sup]
if !ok {
g.headers[i].tag = sup
continue
}
data := []byte{}
index := make([]uint16, len(g.toTags), len(g.toTags)+1)
for j, t := range g.toTags {
index[j] = uint16(len(data))
data = append(data, kv[t]...)
}
index = append(index, uint16(len(data)))
// Trim the tail of the index.
// TODO: indexes can be reduced in size quite a bit more.
n := len(index)
for ; n >= 2 && index[n-2] == index[n-1]; n-- {
}
index = index[:n]
// Workaround for a bug in CLDR 26.
// See http://unicode.org/cldr/trac/ticket/8042.
if cldr.Version == "26" && sup.String() == "hsb" {
data = bytes.Replace(data, []byte{'"'}, nil, 1)
}
g.headers[i] = header{sup, string(data), index}
}
g.writeTable(b.w, name)
}
type tagsBySize []string
func (l tagsBySize) Len() int { return len(l) }
func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l tagsBySize) Less(i, j int) bool {
a, b := l[i], l[j]
// Sort single-tag entries based on size first. Otherwise alphabetic.
if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
return len(a) < len(b)
}
return a < b
}
// parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
// of tags[i].
func parentIndices(tags []language.Tag) []int16 {
index := make(map[language.Tag]int16)
for i, t := range tags {
index[t] = int16(i)
}
// Construct default parents.
parents := make([]int16, len(tags))
for i, t := range tags {
parents[i] = -1
for t = t.Parent(); t != language.Und; t = t.Parent() {
if j, ok := index[t]; ok {
parents[i] = j
break
}
}
}
return parents
}
func (b *builder) writeParents() {
parents := parentIndices(b.supported)
fmt.Fprintf(b.w, "var parents = ")
b.w.WriteArray(parents)
}
// writeKeys writes keys to a special index used by the display package.
// tags are assumed to be sorted by length.
func writeKeys(w *gen.CodeWriter, name string, keys []string) {
w.Size += int(3 * reflect.TypeOf("").Size())
w.WriteComment("Number of keys: %d", len(keys))
fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
for i := 2; i <= 4; i++ {
sub := []string{}
for _, t := range keys {
if len(t) != i {
break
}
sub = append(sub, t)
}
s := strings.Join(sub, "")
w.WriteString(s)
fmt.Fprintf(w, ",\n")
keys = keys[len(sub):]
}
fmt.Fprintln(w, "\t}")
if len(keys) > 0 {
w.Size += int(reflect.TypeOf([]string{}).Size())
fmt.Fprintf(w, "\t%sTagsLong = ", name)
w.WriteSlice(keys)
}
fmt.Fprintln(w, ")\n")
}
// identifier creates an identifier from the given tag.
func identifier(t language.Tag) string {
return strings.Replace(t.String(), "-", "", -1)
}
func (h *header) writeEntry(w *gen.CodeWriter, name string) {
if len(dict) > 0 && dict.contains(h.tag) {
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
fmt.Fprintln(w, "\t},")
} else if len(h.data) == 0 {
fmt.Fprintln(w, "\t\t{}, //", h.tag)
} else {
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
w.WriteString(h.data)
fmt.Fprintln(w, ",")
w.WriteSlice(h.index)
fmt.Fprintln(w, ",\n\t},")
}
}
// write the data for the given header as single entries. The size for this data
// was already accounted for in writeEntry.
func (h *header) writeSingle(w *gen.CodeWriter, name string) {
if len(dict) > 0 && dict.contains(h.tag) {
tag := identifier(h.tag)
w.WriteConst(tag+name+"Str", h.data)
// Note that we create a slice instead of an array. If we use an array
// we need to refer to it as a[:] in other tables, which will cause the
// array to always be included by the linker. See Issue 7651.
w.WriteVar(tag+name+"Idx", h.index)
}
}
// WriteTable writes an entry for a single Namer.
func (g *group) writeTable(w *gen.CodeWriter, name string) {
start := w.Size
writeKeys(w, name, g.toTags)
w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
title := strings.Title(name)
for _, h := range g.headers {
h.writeEntry(w, title)
}
fmt.Fprintln(w, "}\n")
for _, h := range g.headers {
h.writeSingle(w, title)
}
n := w.Size - start
fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
}
func (b *builder) writeDictionaries() {
fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
fmt.Fprintln(b.w, "var (")
parents := parentIndices(b.supported)
for i, t := range b.supported {
if dict.contains(t) {
ident := identifier(t)
fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
if p := parents[i]; p == -1 {
fmt.Fprintln(b.w, "\t\tnil,")
} else {
fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
}
fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
fmt.Fprintln(b.w, "\t}")
}
}
fmt.Fprintln(b.w, ")")
var s string
var a []uint16
sz := reflect.TypeOf(s).Size()
sz += reflect.TypeOf(a).Size()
sz *= 3
sz += reflect.TypeOf(&a).Size()
n := int(sz) * len(dict)
fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
b.w.Size += n
}
// unique sorts the given lists and removes duplicate entries by swapping them
// past position k, where k is the number of unique values. It returns k.
func unique(a sort.Interface) int {
if a.Len() == 0 {
return 0
}
sort.Sort(a)
k := 1
for i := 1; i < a.Len(); i++ {
if a.Less(k-1, i) {
if k != i {
a.Swap(k, i)
}
k++
}
}
return k
}

43658
language/display/tables.go Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу