зеркало из https://github.com/golang/text.git
collate/colltab: moved package to internal
This is a followup of CL 28230. Change-Id: I471e2ca5f0a864f855d0a93f3eb3b5b3bb52b459 Reviewed-on: https://go-review.googlesource.com/28231 Run-TryBot: Marcel van Lohuizen <mpvl@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Crawshaw <crawshaw@golang.org> Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
Родитель
09c7ea1fcb
Коммит
ceefd2213e
|
@ -12,7 +12,7 @@ import (
|
|||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
"fmt"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
|
@ -7,7 +7,7 @@ package build
|
|||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
type ceTest struct {
|
||||
|
|
|
@ -11,7 +11,7 @@ import (
|
|||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
// This file contains code for detecting contractions and generating
|
||||
|
|
|
@ -9,7 +9,7 @@ import (
|
|||
"sort"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
var largetosmall = []stridx{
|
||||
|
|
|
@ -11,7 +11,7 @@ import (
|
|||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
"strconv"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
type entryTest struct {
|
||||
|
|
|
@ -9,7 +9,7 @@ import (
|
|||
"io"
|
||||
"reflect"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
// table is an intermediate structure that roughly resembles the table in collate.
|
||||
|
|
|
@ -15,8 +15,7 @@ import (
|
|||
"bytes"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
newcolltab "golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
|
@ -56,7 +55,7 @@ var tags []language.Tag
|
|||
|
||||
// New returns a new Collator initialized for the given locale.
|
||||
func New(t language.Tag, o ...Option) *Collator {
|
||||
index := newcolltab.MatchLang(t, tags)
|
||||
index := colltab.MatchLang(t, tags)
|
||||
c := newCollator(getTable(locales[index]))
|
||||
|
||||
// Set options from the user-supplied tag.
|
||||
|
@ -236,7 +235,7 @@ func (c *Collator) getColElemsString(str string) []colltab.Elem {
|
|||
type iter struct {
|
||||
wa [512]colltab.Elem
|
||||
|
||||
newcolltab.Iter
|
||||
colltab.Iter
|
||||
pce int
|
||||
}
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
"bytes"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
|
|
|
@ -1,145 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package colltab
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// For a description of ContractTrieSet, see exp/locale/collate/build/contract.go.
|
||||
|
||||
type ContractTrieSet []struct{ L, H, N, I uint8 }
|
||||
|
||||
// ctScanner is used to match a trie to an input sequence.
|
||||
// A contraction may match a non-contiguous sequence of bytes in an input string.
|
||||
// For example, if there is a contraction for <a, combining_ring>, it should match
|
||||
// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does
|
||||
// not block combining_ring.
|
||||
// ctScanner does not automatically skip over non-blocking non-starters, but rather
|
||||
// retains the state of the last match and leaves it up to the user to continue
|
||||
// the match at the appropriate points.
|
||||
type ctScanner struct {
|
||||
states ContractTrieSet
|
||||
s []byte
|
||||
n int
|
||||
index int
|
||||
pindex int
|
||||
done bool
|
||||
}
|
||||
|
||||
type ctScannerString struct {
|
||||
states ContractTrieSet
|
||||
s string
|
||||
n int
|
||||
index int
|
||||
pindex int
|
||||
done bool
|
||||
}
|
||||
|
||||
func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
|
||||
return ctScanner{s: b, states: t[index:], n: n}
|
||||
}
|
||||
|
||||
func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
|
||||
return ctScannerString{s: str, states: t[index:], n: n}
|
||||
}
|
||||
|
||||
// result returns the offset i and bytes consumed p so far. If no suffix
|
||||
// matched, i and p will be 0.
|
||||
func (s *ctScanner) result() (i, p int) {
|
||||
return s.index, s.pindex
|
||||
}
|
||||
|
||||
func (s *ctScannerString) result() (i, p int) {
|
||||
return s.index, s.pindex
|
||||
}
|
||||
|
||||
const (
|
||||
final = 0
|
||||
noIndex = 0xFF
|
||||
)
|
||||
|
||||
// scan matches the longest suffix at the current location in the input
|
||||
// and returns the number of bytes consumed.
|
||||
func (s *ctScanner) scan(p int) int {
|
||||
pr := p // the p at the rune start
|
||||
str := s.s
|
||||
states, n := s.states, s.n
|
||||
for i := 0; i < n && p < len(str); {
|
||||
e := states[i]
|
||||
c := str[p]
|
||||
// TODO: a significant number of contractions are of a form that
|
||||
// cannot match discontiguous UTF-8 in a normalized string. We could let
|
||||
// a negative value of e.n mean that we can set s.done = true and avoid
|
||||
// the need for additional matches.
|
||||
if c >= e.L {
|
||||
if e.L == c {
|
||||
p++
|
||||
if e.I != noIndex {
|
||||
s.index = int(e.I)
|
||||
s.pindex = p
|
||||
}
|
||||
if e.N != final {
|
||||
i, states, n = 0, states[int(e.H)+n:], int(e.N)
|
||||
if p >= len(str) || utf8.RuneStart(str[p]) {
|
||||
s.states, s.n, pr = states, n, p
|
||||
}
|
||||
} else {
|
||||
s.done = true
|
||||
return p
|
||||
}
|
||||
continue
|
||||
} else if e.N == final && c <= e.H {
|
||||
p++
|
||||
s.done = true
|
||||
s.index = int(c-e.L) + int(e.I)
|
||||
s.pindex = p
|
||||
return p
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
return pr
|
||||
}
|
||||
|
||||
// scan is a verbatim copy of ctScanner.scan.
|
||||
func (s *ctScannerString) scan(p int) int {
|
||||
pr := p // the p at the rune start
|
||||
str := s.s
|
||||
states, n := s.states, s.n
|
||||
for i := 0; i < n && p < len(str); {
|
||||
e := states[i]
|
||||
c := str[p]
|
||||
// TODO: a significant number of contractions are of a form that
|
||||
// cannot match discontiguous UTF-8 in a normalized string. We could let
|
||||
// a negative value of e.n mean that we can set s.done = true and avoid
|
||||
// the need for additional matches.
|
||||
if c >= e.L {
|
||||
if e.L == c {
|
||||
p++
|
||||
if e.I != noIndex {
|
||||
s.index = int(e.I)
|
||||
s.pindex = p
|
||||
}
|
||||
if e.N != final {
|
||||
i, states, n = 0, states[int(e.H)+n:], int(e.N)
|
||||
if p >= len(str) || utf8.RuneStart(str[p]) {
|
||||
s.states, s.n, pr = states, n, p
|
||||
}
|
||||
} else {
|
||||
s.done = true
|
||||
return p
|
||||
}
|
||||
continue
|
||||
} else if e.N == final && c <= e.H {
|
||||
p++
|
||||
s.done = true
|
||||
s.index = int(c-e.L) + int(e.I)
|
||||
s.pindex = p
|
||||
return p
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
return pr
|
||||
}
|
|
@ -1,132 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package colltab
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
type lookupStrings struct {
|
||||
str string
|
||||
offset int
|
||||
n int // bytes consumed from input
|
||||
}
|
||||
|
||||
type LookupTest struct {
|
||||
lookup []lookupStrings
|
||||
n int
|
||||
tries ContractTrieSet
|
||||
}
|
||||
|
||||
var lookupTests = []LookupTest{
|
||||
{[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"a", 0, 0},
|
||||
{"b", 0, 0},
|
||||
{"c", 0, 0},
|
||||
{"d", 0, 0},
|
||||
},
|
||||
1,
|
||||
ContractTrieSet{
|
||||
{'a', 0, 1, 0xFF},
|
||||
{'b', 0, 1, 0xFF},
|
||||
{'c', 'c', 0, 1},
|
||||
},
|
||||
},
|
||||
{[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"abd", 2, 3},
|
||||
{"abe", 3, 3},
|
||||
{"a", 0, 0},
|
||||
{"ab", 0, 0},
|
||||
{"d", 0, 0},
|
||||
{"f", 0, 0},
|
||||
},
|
||||
1,
|
||||
ContractTrieSet{
|
||||
{'a', 0, 1, 0xFF},
|
||||
{'b', 0, 1, 0xFF},
|
||||
{'c', 'e', 0, 1},
|
||||
},
|
||||
},
|
||||
{[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"ab", 2, 2},
|
||||
{"a", 3, 1},
|
||||
{"abcd", 1, 3},
|
||||
{"abe", 2, 2},
|
||||
},
|
||||
1,
|
||||
ContractTrieSet{
|
||||
{'a', 0, 1, 3},
|
||||
{'b', 0, 1, 2},
|
||||
{'c', 'c', 0, 1},
|
||||
},
|
||||
},
|
||||
{[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"abd", 2, 3},
|
||||
{"ab", 3, 2},
|
||||
{"ac", 4, 2},
|
||||
{"a", 5, 1},
|
||||
{"b", 6, 1},
|
||||
{"ba", 6, 1},
|
||||
},
|
||||
2,
|
||||
ContractTrieSet{
|
||||
{'b', 'b', 0, 6},
|
||||
{'a', 0, 2, 5},
|
||||
{'c', 'c', 0, 4},
|
||||
{'b', 0, 1, 3},
|
||||
{'c', 'd', 0, 1},
|
||||
},
|
||||
},
|
||||
{[]lookupStrings{
|
||||
{"bcde", 2, 4},
|
||||
{"bc", 7, 2},
|
||||
{"ab", 6, 2},
|
||||
{"bcd", 5, 3},
|
||||
{"abcd", 1, 4},
|
||||
{"abc", 4, 3},
|
||||
{"bcdf", 3, 4},
|
||||
},
|
||||
2,
|
||||
ContractTrieSet{
|
||||
{'b', 3, 1, 0xFF},
|
||||
{'a', 0, 1, 0xFF},
|
||||
{'b', 0, 1, 6},
|
||||
{'c', 0, 1, 4},
|
||||
{'d', 'd', 0, 1},
|
||||
{'c', 0, 1, 7},
|
||||
{'d', 0, 1, 5},
|
||||
{'e', 'f', 0, 2},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func lookup(c *ContractTrieSet, nnode int, s []uint8) (i, n int) {
|
||||
scan := c.scanner(0, nnode, s)
|
||||
scan.scan(0)
|
||||
return scan.result()
|
||||
}
|
||||
|
||||
func TestLookupContraction(t *testing.T) {
|
||||
for i, tt := range lookupTests {
|
||||
cts := ContractTrieSet(tt.tries)
|
||||
for j, lu := range tt.lookup {
|
||||
str := lu.str
|
||||
for _, s := range []string{str, str + "X"} {
|
||||
const msg = `%d:%d: %s of "%s" %v; want %v`
|
||||
offset, n := lookup(&cts, tt.n, []byte(s))
|
||||
if offset != lu.offset {
|
||||
t.Errorf(msg, i, j, "offset", s, offset, lu.offset)
|
||||
}
|
||||
if n != lu.n {
|
||||
t.Errorf(msg, i, j, "bytes consumed", s, n, len(str))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -10,7 +10,7 @@ package collate
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
package collate
|
||||
|
||||
import "golang.org/x/text/collate/colltab"
|
||||
import "golang.org/x/text/internal/colltab"
|
||||
|
||||
const blockSize = 64
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ import (
|
|||
|
||||
"golang.org/x/text/collate"
|
||||
"golang.org/x/text/collate/build"
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
|
|
|
@ -7,7 +7,7 @@ package collate
|
|||
import (
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
"testing"
|
||||
|
||||
"golang.org/x/text/collate/build"
|
||||
"golang.org/x/text/collate/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
|
|
|
@ -6,9 +6,9 @@ package colltab
|
|||
|
||||
import "unicode/utf8"
|
||||
|
||||
// For a description of contractTrieSet, see text/collate/build/contract.go.
|
||||
// For a description of ContractTrieSet, see text/collate/build/contract.go.
|
||||
|
||||
type contractTrieSet []struct{ l, h, n, i uint8 }
|
||||
type ContractTrieSet []struct{ L, H, N, I uint8 }
|
||||
|
||||
// ctScanner is used to match a trie to an input sequence.
|
||||
// A contraction may match a non-contiguous sequence of bytes in an input string.
|
||||
|
@ -19,7 +19,7 @@ type contractTrieSet []struct{ l, h, n, i uint8 }
|
|||
// retains the state of the last match and leaves it up to the user to continue
|
||||
// the match at the appropriate points.
|
||||
type ctScanner struct {
|
||||
states contractTrieSet
|
||||
states ContractTrieSet
|
||||
s []byte
|
||||
n int
|
||||
index int
|
||||
|
@ -28,7 +28,7 @@ type ctScanner struct {
|
|||
}
|
||||
|
||||
type ctScannerString struct {
|
||||
states contractTrieSet
|
||||
states ContractTrieSet
|
||||
s string
|
||||
n int
|
||||
index int
|
||||
|
@ -36,11 +36,11 @@ type ctScannerString struct {
|
|||
done bool
|
||||
}
|
||||
|
||||
func (t contractTrieSet) scanner(index, n int, b []byte) ctScanner {
|
||||
func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
|
||||
return ctScanner{s: b, states: t[index:], n: n}
|
||||
}
|
||||
|
||||
func (t contractTrieSet) scannerString(index, n int, str string) ctScannerString {
|
||||
func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
|
||||
return ctScannerString{s: str, states: t[index:], n: n}
|
||||
}
|
||||
|
||||
|
@ -72,15 +72,15 @@ func (s *ctScanner) scan(p int) int {
|
|||
// cannot match discontiguous UTF-8 in a normalized string. We could let
|
||||
// a negative value of e.n mean that we can set s.done = true and avoid
|
||||
// the need for additional matches.
|
||||
if c >= e.l {
|
||||
if e.l == c {
|
||||
if c >= e.L {
|
||||
if e.L == c {
|
||||
p++
|
||||
if e.i != noIndex {
|
||||
s.index = int(e.i)
|
||||
if e.I != noIndex {
|
||||
s.index = int(e.I)
|
||||
s.pindex = p
|
||||
}
|
||||
if e.n != final {
|
||||
i, states, n = 0, states[int(e.h)+n:], int(e.n)
|
||||
if e.N != final {
|
||||
i, states, n = 0, states[int(e.H)+n:], int(e.N)
|
||||
if p >= len(str) || utf8.RuneStart(str[p]) {
|
||||
s.states, s.n, pr = states, n, p
|
||||
}
|
||||
|
@ -89,10 +89,10 @@ func (s *ctScanner) scan(p int) int {
|
|||
return p
|
||||
}
|
||||
continue
|
||||
} else if e.n == final && c <= e.h {
|
||||
} else if e.N == final && c <= e.H {
|
||||
p++
|
||||
s.done = true
|
||||
s.index = int(c-e.l) + int(e.i)
|
||||
s.index = int(c-e.L) + int(e.I)
|
||||
s.pindex = p
|
||||
return p
|
||||
}
|
||||
|
@ -114,15 +114,15 @@ func (s *ctScannerString) scan(p int) int {
|
|||
// cannot match discontiguous UTF-8 in a normalized string. We could let
|
||||
// a negative value of e.n mean that we can set s.done = true and avoid
|
||||
// the need for additional matches.
|
||||
if c >= e.l {
|
||||
if e.l == c {
|
||||
if c >= e.L {
|
||||
if e.L == c {
|
||||
p++
|
||||
if e.i != noIndex {
|
||||
s.index = int(e.i)
|
||||
if e.I != noIndex {
|
||||
s.index = int(e.I)
|
||||
s.pindex = p
|
||||
}
|
||||
if e.n != final {
|
||||
i, states, n = 0, states[int(e.h)+n:], int(e.n)
|
||||
if e.N != final {
|
||||
i, states, n = 0, states[int(e.H)+n:], int(e.N)
|
||||
if p >= len(str) || utf8.RuneStart(str[p]) {
|
||||
s.states, s.n, pr = states, n, p
|
||||
}
|
||||
|
@ -131,10 +131,10 @@ func (s *ctScannerString) scan(p int) int {
|
|||
return p
|
||||
}
|
||||
continue
|
||||
} else if e.n == final && c <= e.h {
|
||||
} else if e.N == final && c <= e.H {
|
||||
p++
|
||||
s.done = true
|
||||
s.index = int(c-e.l) + int(e.i)
|
||||
s.index = int(c-e.L) + int(e.I)
|
||||
s.pindex = p
|
||||
return p
|
||||
}
|
||||
|
|
|
@ -14,102 +14,98 @@ type lookupStrings struct {
|
|||
n int // bytes consumed from input
|
||||
}
|
||||
|
||||
var lookupTests = []struct {
|
||||
type LookupTest struct {
|
||||
lookup []lookupStrings
|
||||
n int
|
||||
tries contractTrieSet
|
||||
}{
|
||||
{
|
||||
[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"a", 0, 0},
|
||||
{"b", 0, 0},
|
||||
{"c", 0, 0},
|
||||
{"d", 0, 0},
|
||||
},
|
||||
1,
|
||||
contractTrieSet{
|
||||
{'a', 0, 1, 0xFF},
|
||||
{'b', 0, 1, 0xFF},
|
||||
{'c', 'c', 0, 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"abd", 2, 3},
|
||||
{"abe", 3, 3},
|
||||
{"a", 0, 0},
|
||||
{"ab", 0, 0},
|
||||
{"d", 0, 0},
|
||||
{"f", 0, 0},
|
||||
},
|
||||
1,
|
||||
contractTrieSet{
|
||||
{'a', 0, 1, 0xFF},
|
||||
{'b', 0, 1, 0xFF},
|
||||
{'c', 'e', 0, 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"ab", 2, 2},
|
||||
{"a", 3, 1},
|
||||
{"abcd", 1, 3},
|
||||
{"abe", 2, 2},
|
||||
},
|
||||
1,
|
||||
contractTrieSet{
|
||||
{'a', 0, 1, 3},
|
||||
{'b', 0, 1, 2},
|
||||
{'c', 'c', 0, 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"abd", 2, 3},
|
||||
{"ab", 3, 2},
|
||||
{"ac", 4, 2},
|
||||
{"a", 5, 1},
|
||||
{"b", 6, 1},
|
||||
{"ba", 6, 1},
|
||||
},
|
||||
2,
|
||||
contractTrieSet{
|
||||
{'b', 'b', 0, 6},
|
||||
{'a', 0, 2, 5},
|
||||
{'c', 'c', 0, 4},
|
||||
{'b', 0, 1, 3},
|
||||
{'c', 'd', 0, 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]lookupStrings{
|
||||
{"bcde", 2, 4},
|
||||
{"bc", 7, 2},
|
||||
{"ab", 6, 2},
|
||||
{"bcd", 5, 3},
|
||||
{"abcd", 1, 4},
|
||||
{"abc", 4, 3},
|
||||
{"bcdf", 3, 4},
|
||||
},
|
||||
2,
|
||||
contractTrieSet{
|
||||
{'b', 3, 1, 0xFF},
|
||||
{'a', 0, 1, 0xFF},
|
||||
{'b', 0, 1, 6},
|
||||
{'c', 0, 1, 4},
|
||||
{'d', 'd', 0, 1},
|
||||
{'c', 0, 1, 7},
|
||||
{'d', 0, 1, 5},
|
||||
{'e', 'f', 0, 2},
|
||||
},
|
||||
},
|
||||
tries ContractTrieSet
|
||||
}
|
||||
|
||||
func lookup(c *contractTrieSet, nnode int, s []uint8) (i, n int) {
|
||||
var lookupTests = []LookupTest{{
|
||||
[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"a", 0, 0},
|
||||
{"b", 0, 0},
|
||||
{"c", 0, 0},
|
||||
{"d", 0, 0},
|
||||
},
|
||||
1,
|
||||
ContractTrieSet{
|
||||
{'a', 0, 1, 0xFF},
|
||||
{'b', 0, 1, 0xFF},
|
||||
{'c', 'c', 0, 1},
|
||||
},
|
||||
}, {
|
||||
[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"abd", 2, 3},
|
||||
{"abe", 3, 3},
|
||||
{"a", 0, 0},
|
||||
{"ab", 0, 0},
|
||||
{"d", 0, 0},
|
||||
{"f", 0, 0},
|
||||
},
|
||||
1,
|
||||
ContractTrieSet{
|
||||
{'a', 0, 1, 0xFF},
|
||||
{'b', 0, 1, 0xFF},
|
||||
{'c', 'e', 0, 1},
|
||||
},
|
||||
}, {
|
||||
[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"ab", 2, 2},
|
||||
{"a", 3, 1},
|
||||
{"abcd", 1, 3},
|
||||
{"abe", 2, 2},
|
||||
},
|
||||
1,
|
||||
ContractTrieSet{
|
||||
{'a', 0, 1, 3},
|
||||
{'b', 0, 1, 2},
|
||||
{'c', 'c', 0, 1},
|
||||
},
|
||||
}, {
|
||||
[]lookupStrings{
|
||||
{"abc", 1, 3},
|
||||
{"abd", 2, 3},
|
||||
{"ab", 3, 2},
|
||||
{"ac", 4, 2},
|
||||
{"a", 5, 1},
|
||||
{"b", 6, 1},
|
||||
{"ba", 6, 1},
|
||||
},
|
||||
2,
|
||||
ContractTrieSet{
|
||||
{'b', 'b', 0, 6},
|
||||
{'a', 0, 2, 5},
|
||||
{'c', 'c', 0, 4},
|
||||
{'b', 0, 1, 3},
|
||||
{'c', 'd', 0, 1},
|
||||
},
|
||||
}, {
|
||||
[]lookupStrings{
|
||||
{"bcde", 2, 4},
|
||||
{"bc", 7, 2},
|
||||
{"ab", 6, 2},
|
||||
{"bcd", 5, 3},
|
||||
{"abcd", 1, 4},
|
||||
{"abc", 4, 3},
|
||||
{"bcdf", 3, 4},
|
||||
},
|
||||
2,
|
||||
ContractTrieSet{
|
||||
{'b', 3, 1, 0xFF},
|
||||
{'a', 0, 1, 0xFF},
|
||||
{'b', 0, 1, 6},
|
||||
{'c', 0, 1, 4},
|
||||
{'d', 'd', 0, 1},
|
||||
{'c', 0, 1, 7},
|
||||
{'d', 0, 1, 5},
|
||||
{'e', 'f', 0, 2},
|
||||
},
|
||||
}}
|
||||
|
||||
func lookup(c *ContractTrieSet, nnode int, s []uint8) (i, n int) {
|
||||
scan := c.scanner(0, nnode, s)
|
||||
scan.scan(0)
|
||||
return scan.result()
|
||||
|
@ -117,11 +113,11 @@ func lookup(c *contractTrieSet, nnode int, s []uint8) (i, n int) {
|
|||
|
||||
func TestLookupContraction(t *testing.T) {
|
||||
for i, tt := range lookupTests {
|
||||
cts := contractTrieSet(tt.tries)
|
||||
cts := ContractTrieSet(tt.tries)
|
||||
for j, lu := range tt.lookup {
|
||||
str := lu.str
|
||||
for _, s := range []string{str, str + "X"} {
|
||||
const msg = "%d:%d: %s of %q %v; want %v"
|
||||
const msg = `%d:%d: %s of "%s" %v; want %v`
|
||||
offset, n := lookup(&cts, tt.n, []byte(s))
|
||||
if offset != lu.offset {
|
||||
t.Errorf(msg, i, j, "offset", s, offset, lu.offset)
|
||||
|
|
|
@ -4,16 +4,12 @@
|
|||
|
||||
package colltab
|
||||
|
||||
import (
|
||||
"golang.org/x/text/collate/colltab"
|
||||
)
|
||||
|
||||
// An Iter incrementally converts chunks of the input text to collation
|
||||
// elements, while ensuring that the collation elements are in normalized order
|
||||
// (that is, they are in the order as if the input text were normalized first).
|
||||
type Iter struct {
|
||||
Weighter colltab.Weighter
|
||||
Elems []colltab.Elem
|
||||
Weighter Weighter
|
||||
Elems []Elem
|
||||
// N is the number of elements in Elems that will not be reordered on
|
||||
// subsequent iterations, N <= len(Elems).
|
||||
N int
|
||||
|
|
|
@ -6,22 +6,8 @@ package colltab
|
|||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSecondary = 0x20
|
||||
)
|
||||
|
||||
func makeCE(w []int) colltab.Elem {
|
||||
ce, err := colltab.MakeElem(w[0], w[1], w[2], uint8(w[3]))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ce
|
||||
}
|
||||
|
||||
func TestDoNorm(t *testing.T) {
|
||||
const div = -1 // The insertion point of the next block.
|
||||
tests := []struct {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package colltab // import "golang.org/x/text/collate/colltab"
|
||||
package colltab // import "golang.org/x/text/internal/colltab"
|
||||
|
||||
// A Weighter can be used as a source for Collator and Searcher.
|
||||
type Weighter interface {
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
package search
|
||||
|
||||
import "golang.org/x/text/collate/colltab"
|
||||
import "golang.org/x/text/internal/colltab"
|
||||
|
||||
const blockSize = 64
|
||||
|
||||
|
|
|
@ -5,8 +5,7 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"golang.org/x/text/collate/colltab"
|
||||
newcolltab "golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
// TODO: handle variable primary weights?
|
||||
|
@ -44,7 +43,7 @@ func isIgnorable(m *Matcher, e colltab.Elem) bool {
|
|||
|
||||
// TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching.
|
||||
|
||||
func (p *Pattern) forwardSearch(it *newcolltab.Iter) (start, end int) {
|
||||
func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) {
|
||||
for start := 0; it.Next(); it.Reset(start) {
|
||||
nextStart := it.End()
|
||||
if end := p.searchOnce(it); end != -1 {
|
||||
|
@ -55,7 +54,7 @@ func (p *Pattern) forwardSearch(it *newcolltab.Iter) (start, end int) {
|
|||
return -1, -1
|
||||
}
|
||||
|
||||
func (p *Pattern) anchoredForwardSearch(it *newcolltab.Iter) (start, end int) {
|
||||
func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) {
|
||||
if it.Next() {
|
||||
if end := p.searchOnce(it); end != -1 {
|
||||
return 0, end
|
||||
|
@ -90,7 +89,7 @@ func tertiary(e colltab.Elem) int {
|
|||
// to be filled with collation elements of the first segment, where n is the
|
||||
// number of source bytes consumed for this segment. It will return the end
|
||||
// position of the match or -1.
|
||||
func (p *Pattern) searchOnce(it *newcolltab.Iter) (end int) {
|
||||
func (p *Pattern) searchOnce(it *colltab.Iter) (end int) {
|
||||
var pLevel [4]int
|
||||
|
||||
m := p.m
|
||||
|
|
|
@ -18,8 +18,7 @@ package search // import "golang.org/x/text/search"
|
|||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/collate/colltab"
|
||||
newcolltab "golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
|
@ -76,7 +75,7 @@ func init() {
|
|||
// New returns a new Matcher for the given language and options.
|
||||
func New(t language.Tag, opts ...Option) *Matcher {
|
||||
m := &Matcher{
|
||||
w: getTable(locales[newcolltab.MatchLang(t, tags)]),
|
||||
w: getTable(locales[colltab.MatchLang(t, tags)]),
|
||||
}
|
||||
for _, f := range opts {
|
||||
f(m)
|
||||
|
@ -136,7 +135,7 @@ func (m *Matcher) EqualString(a, b string) bool {
|
|||
// Compile compiles and returns a pattern that can be used for faster searching.
|
||||
func (m *Matcher) Compile(b []byte) *Pattern {
|
||||
p := &Pattern{m: m}
|
||||
iter := newcolltab.Iter{Weighter: m.w}
|
||||
iter := colltab.Iter{Weighter: m.w}
|
||||
for iter.SetInput(b); iter.Next(); {
|
||||
}
|
||||
p.ce = iter.Elems
|
||||
|
@ -148,7 +147,7 @@ func (m *Matcher) Compile(b []byte) *Pattern {
|
|||
// searching.
|
||||
func (m *Matcher) CompileString(s string) *Pattern {
|
||||
p := &Pattern{m: m}
|
||||
iter := newcolltab.Iter{Weighter: m.w}
|
||||
iter := colltab.Iter{Weighter: m.w}
|
||||
for iter.SetInputString(s); iter.Next(); {
|
||||
}
|
||||
p.ce = iter.Elems
|
||||
|
@ -174,7 +173,7 @@ func (p *Pattern) Index(b []byte, opts ...IndexOption) (start, end int) {
|
|||
// and small enough to not cause too much overhead initializing.
|
||||
var buf [8]colltab.Elem
|
||||
|
||||
it := &newcolltab.Iter{
|
||||
it := &colltab.Iter{
|
||||
Weighter: p.m.w,
|
||||
Elems: buf[:0],
|
||||
}
|
||||
|
@ -204,7 +203,7 @@ func (p *Pattern) IndexString(s string, opts ...IndexOption) (start, end int) {
|
|||
// and small enough to not cause too much overhead initializing.
|
||||
var buf [8]colltab.Elem
|
||||
|
||||
it := &newcolltab.Iter{
|
||||
it := &colltab.Iter{
|
||||
Weighter: p.m.w,
|
||||
Elems: buf[:0],
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче