go.text/language: A few small changes:

- Added Tag methods to Base, Script and Region types to convert them in a proper tag.
- Factored out part of Canonicalize that does not remake the string (used in upcoming matcher code).
- Added "nb" -> "no" conversion in the tables to allow more consistency for code using these tables directly.
- changed to short name used in some methods for type Base so that it consistenly appears as "b" in the documentation.

R=r
CC=golang-dev
https://golang.org/cl/13647043
This commit is contained in:
Marcel van Lohuizen 2013-09-23 11:03:22 +02:00
Родитель fd9ccd35d5
Коммит 4a56690205
5 изменённых файлов: 72 добавлений и 28 удалений

Просмотреть файл

@ -102,8 +102,9 @@ const (
// TODO: LikelyScript, LikelyRegion: supress similar to ICU.
)
// Canonicalize replaces the tag with its canonical equivalent.
func (t Tag) Canonicalize(c CanonType) (Tag, error) {
// canonicalize returns the canonicalized equivalent of the tag and
// whether there was any change.
func (t Tag) canonicalize(c CanonType) (Tag, bool) {
changed := false
if c&SuppressScript != 0 {
if t.lang < langNoIndexOffset && uint8(t.script) == suppressScript[t.lang] {
@ -146,7 +147,6 @@ func (t Tag) Canonicalize(c CanonType) (Tag, error) {
}
}
if c&Macro != 0 {
l := normLang(langMacroMap[:], t.lang)
// We deviate here from CLDR. The mapping "nb" -> "no" qualifies as a typical
// Macro language mapping. However, for legacy reasons, CLDR maps "no,
// the macro language code for Norwegian, to the dominant variant "nb.
@ -154,15 +154,21 @@ func (t Tag) Canonicalize(c CanonType) (Tag, error) {
// See http://unicode.org/cldr/trac/ticket/2698 and also
// http://unicode.org/cldr/trac/ticket/1790 for some of the practical
// implications.
// TODO: this code could be removed if CLDR adopts this change.
if l == lang_nb && c&CLDR == 0 {
l = lang_no
}
if l != t.lang {
changed = true
t.lang = l
// TODO: this check could be removed if CLDR adopts this change.
if c&CLDR == 0 || t.lang != lang_nb {
l := normLang(langMacroMap[:], t.lang)
if l != t.lang {
changed = true
t.lang = l
}
}
}
return t, changed
}
// Canonicalize returns the canonicalized equivalent of the tag.
func (t Tag) Canonicalize(c CanonType) (Tag, error) {
t, changed := t.canonicalize(c)
if changed && t.str != nil {
t.remakeString()
}
@ -353,6 +359,11 @@ func ParseBase(s string) (Base, error) {
return Base{l}, err
}
// Tag returns a Tag with this base language as its only subtag.
func (b Base) Tag() Tag {
return Tag{lang: b.langID}
}
// Script is a 4-letter ISO 15924 code for representing scripts.
// It is idiomatically represented in title case.
type Script struct {
@ -370,6 +381,11 @@ func ParseScript(s string) (Script, error) {
return Script{sc}, err
}
// Tag returns a Tag with the undetermined language and this script as its only subtags.
func (s Script) Tag() Tag {
return Tag{script: s.scriptID}
}
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
type Region struct {
regionID
@ -393,6 +409,11 @@ func ParseRegion(s string) (Region, error) {
return Region{r}, err
}
// Tag returns a Tag with the undetermined language and this region as its only subtags.
func (r Region) Tag() Tag {
return Tag{region: r.regionID}
}
// IsCountry returns whether this region is a country or autonomous area.
func (r Region) IsCountry() bool {
if r.regionID < isoRegionOffset || r.IsPrivateUse() {

Просмотреть файл

@ -100,9 +100,14 @@ func TestParseBase(t *testing.T) {
{"aaaa", "und", false},
}
for i, tt := range tests {
if x, err := ParseBase(tt.in); x.String() != tt.out || err == nil != tt.ok {
x, err := ParseBase(tt.in)
if x.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
tag, _ := Parse(tt.out)
if err == nil && !tag.equalTags(x.Tag()) {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x.Tag(), tt.out)
}
}
}
@ -147,9 +152,14 @@ func TestParseScript(t *testing.T) {
{"Zzzxx", "Zyyy", false},
}
for i, tt := range tests {
if x, err := ParseScript(tt.in); x.String() != tt.out || err == nil != tt.ok {
x, err := ParseScript(tt.in)
if x.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
tag, _ := Parse("und-" + tt.in)
if err == nil && !tag.equalTags(x.Tag()) {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x.Tag(), tt.out)
}
}
}
@ -214,9 +224,14 @@ func TestParseRegion(t *testing.T) {
{"01", "ZZ", false},
}
for i, tt := range tests {
if r, err := ParseRegion(tt.in); r.String() != tt.out || err == nil != tt.ok {
r, err := ParseRegion(tt.in)
if r.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
}
tag, _ := Parse("und-" + tt.out)
if err == nil && !tag.equalTags(r.Tag()) {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r.Tag(), tag)
}
}
}

Просмотреть файл

@ -196,16 +196,18 @@ func (id langID) stringToBuf(b []byte) int {
}
// String returns the BCP 47 representation of the langID.
func (id langID) String() string {
if id == 0 {
// Use b as variable name, instead of id, to ensure the variable
// used is consistent with that of Base in which this type is embedded.
func (b langID) String() string {
if b == 0 {
return "und"
} else if id >= langNoIndexOffset {
id -= langNoIndexOffset
} else if b >= langNoIndexOffset {
b -= langNoIndexOffset
buf := [3]byte{}
intToStr(uint(id), buf[:])
intToStr(uint(b), buf[:])
return string(buf[:])
}
l := lang[id<<2:]
l := lang[b<<2:]
if l[3] == 0 {
return l[:3]
}
@ -213,11 +215,11 @@ func (id langID) String() string {
}
// ISO3 returns the ISO 639-3 language code.
func (id langID) ISO3() string {
if id == 0 || id >= langNoIndexOffset {
return id.String()
func (b langID) ISO3() string {
if b == 0 || b >= langNoIndexOffset {
return b.String()
}
l := lang[id<<2:]
l := lang[b<<2:]
if l[3] == 0 {
return l[:3]
} else if l[2] == 0 {
@ -229,8 +231,8 @@ func (id langID) ISO3() string {
}
// IsPrivateUse reports whether this language code is reserved for private use.
func (id langID) IsPrivateUse() bool {
return langPrivateStart <= id && id <= langPrivateEnd
func (b langID) IsPrivateUse() bool {
return langPrivateStart <= b && b <= langPrivateEnd
}
type regionID uint16

Просмотреть файл

@ -717,6 +717,11 @@ func (b *builder) writeLanguage() {
legacyTag[strings.Replace(a.Type, "_", "-", -1)] = repl
}
}
// Manually add the mapping of "nb" (Norwegian) to its macro language.
// This can be removed if CLDR adopts this change.
langMacroMap.add("nb")
langMacroMap.updateLater("nb", "no")
for k, v := range b.registry {
// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
if v.typ == "language" && v.deprecated != "" && v.preferred != "" {

Просмотреть файл

@ -319,8 +319,9 @@ var langOldMap = [27]fromTo{
}
// langMacroMap maps languages to their macro language replacement, if applicable.
// Size: 248 bytes, 62 elements
var langMacroMap = [62]fromTo{
// Size: 252 bytes, 63 elements
var langMacroMap = [63]fromTo{
{from: 0x117, to: 0x122},
{from: 0x195, to: 0x8},
{from: 0x2ba, to: 0x1b3b},
{from: 0x2ec, to: 0x169},
@ -1929,4 +1930,4 @@ var regionInclusionNext = [75]uint8{
25, 74, 62,
}
// Size: 14.5K (14895 bytes); Check: 1A91D521
// Size: 14.5K (14899 bytes); Check: AF0F1D21