зеркало из https://github.com/golang/text.git
go.text/language: A few small changes:
- Added Tag methods to Base, Script and Region types to convert them in a proper tag. - Factored out part of Canonicalize that does not remake the string (used in upcoming matcher code). - Added "nb" -> "no" conversion in the tables to allow more consistency for code using these tables directly. - changed to short name used in some methods for type Base so that it consistenly appears as "b" in the documentation. R=r CC=golang-dev https://golang.org/cl/13647043
This commit is contained in:
Родитель
fd9ccd35d5
Коммит
4a56690205
|
@ -102,8 +102,9 @@ const (
|
|||
// TODO: LikelyScript, LikelyRegion: supress similar to ICU.
|
||||
)
|
||||
|
||||
// Canonicalize replaces the tag with its canonical equivalent.
|
||||
func (t Tag) Canonicalize(c CanonType) (Tag, error) {
|
||||
// canonicalize returns the canonicalized equivalent of the tag and
|
||||
// whether there was any change.
|
||||
func (t Tag) canonicalize(c CanonType) (Tag, bool) {
|
||||
changed := false
|
||||
if c&SuppressScript != 0 {
|
||||
if t.lang < langNoIndexOffset && uint8(t.script) == suppressScript[t.lang] {
|
||||
|
@ -146,7 +147,6 @@ func (t Tag) Canonicalize(c CanonType) (Tag, error) {
|
|||
}
|
||||
}
|
||||
if c&Macro != 0 {
|
||||
l := normLang(langMacroMap[:], t.lang)
|
||||
// We deviate here from CLDR. The mapping "nb" -> "no" qualifies as a typical
|
||||
// Macro language mapping. However, for legacy reasons, CLDR maps "no,
|
||||
// the macro language code for Norwegian, to the dominant variant "nb.
|
||||
|
@ -154,15 +154,21 @@ func (t Tag) Canonicalize(c CanonType) (Tag, error) {
|
|||
// See http://unicode.org/cldr/trac/ticket/2698 and also
|
||||
// http://unicode.org/cldr/trac/ticket/1790 for some of the practical
|
||||
// implications.
|
||||
// TODO: this code could be removed if CLDR adopts this change.
|
||||
if l == lang_nb && c&CLDR == 0 {
|
||||
l = lang_no
|
||||
}
|
||||
if l != t.lang {
|
||||
changed = true
|
||||
t.lang = l
|
||||
// TODO: this check could be removed if CLDR adopts this change.
|
||||
if c&CLDR == 0 || t.lang != lang_nb {
|
||||
l := normLang(langMacroMap[:], t.lang)
|
||||
if l != t.lang {
|
||||
changed = true
|
||||
t.lang = l
|
||||
}
|
||||
}
|
||||
}
|
||||
return t, changed
|
||||
}
|
||||
|
||||
// Canonicalize returns the canonicalized equivalent of the tag.
|
||||
func (t Tag) Canonicalize(c CanonType) (Tag, error) {
|
||||
t, changed := t.canonicalize(c)
|
||||
if changed && t.str != nil {
|
||||
t.remakeString()
|
||||
}
|
||||
|
@ -353,6 +359,11 @@ func ParseBase(s string) (Base, error) {
|
|||
return Base{l}, err
|
||||
}
|
||||
|
||||
// Tag returns a Tag with this base language as its only subtag.
|
||||
func (b Base) Tag() Tag {
|
||||
return Tag{lang: b.langID}
|
||||
}
|
||||
|
||||
// Script is a 4-letter ISO 15924 code for representing scripts.
|
||||
// It is idiomatically represented in title case.
|
||||
type Script struct {
|
||||
|
@ -370,6 +381,11 @@ func ParseScript(s string) (Script, error) {
|
|||
return Script{sc}, err
|
||||
}
|
||||
|
||||
// Tag returns a Tag with the undetermined language and this script as its only subtags.
|
||||
func (s Script) Tag() Tag {
|
||||
return Tag{script: s.scriptID}
|
||||
}
|
||||
|
||||
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
|
||||
type Region struct {
|
||||
regionID
|
||||
|
@ -393,6 +409,11 @@ func ParseRegion(s string) (Region, error) {
|
|||
return Region{r}, err
|
||||
}
|
||||
|
||||
// Tag returns a Tag with the undetermined language and this region as its only subtags.
|
||||
func (r Region) Tag() Tag {
|
||||
return Tag{region: r.regionID}
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country or autonomous area.
|
||||
func (r Region) IsCountry() bool {
|
||||
if r.regionID < isoRegionOffset || r.IsPrivateUse() {
|
||||
|
|
|
@ -100,9 +100,14 @@ func TestParseBase(t *testing.T) {
|
|||
{"aaaa", "und", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if x, err := ParseBase(tt.in); x.String() != tt.out || err == nil != tt.ok {
|
||||
x, err := ParseBase(tt.in)
|
||||
if x.String() != tt.out || err == nil != tt.ok {
|
||||
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
|
||||
}
|
||||
tag, _ := Parse(tt.out)
|
||||
if err == nil && !tag.equalTags(x.Tag()) {
|
||||
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x.Tag(), tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -147,9 +152,14 @@ func TestParseScript(t *testing.T) {
|
|||
{"Zzzxx", "Zyyy", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if x, err := ParseScript(tt.in); x.String() != tt.out || err == nil != tt.ok {
|
||||
x, err := ParseScript(tt.in)
|
||||
if x.String() != tt.out || err == nil != tt.ok {
|
||||
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
|
||||
}
|
||||
tag, _ := Parse("und-" + tt.in)
|
||||
if err == nil && !tag.equalTags(x.Tag()) {
|
||||
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x.Tag(), tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -214,9 +224,14 @@ func TestParseRegion(t *testing.T) {
|
|||
{"01", "ZZ", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if r, err := ParseRegion(tt.in); r.String() != tt.out || err == nil != tt.ok {
|
||||
r, err := ParseRegion(tt.in)
|
||||
if r.String() != tt.out || err == nil != tt.ok {
|
||||
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
|
||||
}
|
||||
tag, _ := Parse("und-" + tt.out)
|
||||
if err == nil && !tag.equalTags(r.Tag()) {
|
||||
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r.Tag(), tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -196,16 +196,18 @@ func (id langID) stringToBuf(b []byte) int {
|
|||
}
|
||||
|
||||
// String returns the BCP 47 representation of the langID.
|
||||
func (id langID) String() string {
|
||||
if id == 0 {
|
||||
// Use b as variable name, instead of id, to ensure the variable
|
||||
// used is consistent with that of Base in which this type is embedded.
|
||||
func (b langID) String() string {
|
||||
if b == 0 {
|
||||
return "und"
|
||||
} else if id >= langNoIndexOffset {
|
||||
id -= langNoIndexOffset
|
||||
} else if b >= langNoIndexOffset {
|
||||
b -= langNoIndexOffset
|
||||
buf := [3]byte{}
|
||||
intToStr(uint(id), buf[:])
|
||||
intToStr(uint(b), buf[:])
|
||||
return string(buf[:])
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
l := lang[b<<2:]
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
}
|
||||
|
@ -213,11 +215,11 @@ func (id langID) String() string {
|
|||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (id langID) ISO3() string {
|
||||
if id == 0 || id >= langNoIndexOffset {
|
||||
return id.String()
|
||||
func (b langID) ISO3() string {
|
||||
if b == 0 || b >= langNoIndexOffset {
|
||||
return b.String()
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
l := lang[b<<2:]
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
} else if l[2] == 0 {
|
||||
|
@ -229,8 +231,8 @@ func (id langID) ISO3() string {
|
|||
}
|
||||
|
||||
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||
func (id langID) IsPrivateUse() bool {
|
||||
return langPrivateStart <= id && id <= langPrivateEnd
|
||||
func (b langID) IsPrivateUse() bool {
|
||||
return langPrivateStart <= b && b <= langPrivateEnd
|
||||
}
|
||||
|
||||
type regionID uint16
|
||||
|
|
|
@ -717,6 +717,11 @@ func (b *builder) writeLanguage() {
|
|||
legacyTag[strings.Replace(a.Type, "_", "-", -1)] = repl
|
||||
}
|
||||
}
|
||||
// Manually add the mapping of "nb" (Norwegian) to its macro language.
|
||||
// This can be removed if CLDR adopts this change.
|
||||
langMacroMap.add("nb")
|
||||
langMacroMap.updateLater("nb", "no")
|
||||
|
||||
for k, v := range b.registry {
|
||||
// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
|
||||
if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
|
||||
|
|
|
@ -319,8 +319,9 @@ var langOldMap = [27]fromTo{
|
|||
}
|
||||
|
||||
// langMacroMap maps languages to their macro language replacement, if applicable.
|
||||
// Size: 248 bytes, 62 elements
|
||||
var langMacroMap = [62]fromTo{
|
||||
// Size: 252 bytes, 63 elements
|
||||
var langMacroMap = [63]fromTo{
|
||||
{from: 0x117, to: 0x122},
|
||||
{from: 0x195, to: 0x8},
|
||||
{from: 0x2ba, to: 0x1b3b},
|
||||
{from: 0x2ec, to: 0x169},
|
||||
|
@ -1929,4 +1930,4 @@ var regionInclusionNext = [75]uint8{
|
|||
25, 74, 62,
|
||||
}
|
||||
|
||||
// Size: 14.5K (14895 bytes); Check: 1A91D521
|
||||
// Size: 14.5K (14899 bytes); Check: AF0F1D21
|
||||
|
|
Загрузка…
Ссылка в новой задаче