go.text/unicode/norm: as quickSpan now no longer uses reorderBuffer, we can

optimize a few more calls to not create a reorderBuffer unless necessary.
This will greatly improve performance for small strings if there is no need
to normalize.

Updated Bytes, String, IsNormal* and FirstBoundary*. The latter is
especially important as it will always only inspect a small piece of text.

Also added a few benchmarks.

R=r
CC=golang-dev
https://golang.org/cl/28260043
This commit is contained in:
Marcel van Lohuizen 2013-11-18 20:12:15 +01:00
Родитель de84dfdf8f
Коммит 9465bcffb0
2 изменённых файлов: 67 добавлений и 31 удалений

Просмотреть файл

@ -34,38 +34,41 @@ const (
// Bytes returns f(b). May return b if f(b) = b.
func (f Form) Bytes(b []byte) []byte {
rb := reorderBuffer{}
rb.init(f, b)
n, ok := rb.f.quickSpan(rb.src, 0, len(b), true)
src := inputBytes(b)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return b
}
out := make([]byte, n, len(b))
copy(out, b[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
return doAppend(&rb, out, n)
}
// String returns f(s).
func (f Form) String(s string) string {
rb := reorderBuffer{}
rb.initString(f, s)
n, ok := rb.f.quickSpan(rb.src, 0, len(s), true)
src := inputString(s)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return s
}
out := make([]byte, n, len(s))
copy(out, s[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
return string(doAppend(&rb, out, n))
}
// IsNormal returns true if b == f(b).
func (f Form) IsNormal(b []byte) bool {
rb := reorderBuffer{}
rb.init(f, b)
bp, ok := rb.f.quickSpan(rb.src, 0, len(b), true)
src := inputBytes(b)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
for bp < len(b) {
decomposeSegment(&rb, bp)
if rb.f.composing {
@ -93,12 +96,13 @@ func (f Form) IsNormal(b []byte) bool {
// IsNormalString returns true if s == f(s).
func (f Form) IsNormalString(s string) bool {
rb := reorderBuffer{}
rb.initString(f, s)
bp, ok := rb.f.quickSpan(rb.src, 0, len(s), true)
src := inputString(s)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
for bp < len(s) {
decomposeSegment(&rb, bp)
if rb.f.composing {
@ -160,11 +164,25 @@ func appendQuick(rb *reorderBuffer, dst []byte, i int) ([]byte, int) {
// Append returns f(append(out, b...)).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) Append(out []byte, src ...byte) []byte {
if len(src) == 0 {
return f.doAppend(out, inputBytes(src), len(src))
}
func (f Form) doAppend(out []byte, src input, n int) []byte {
if n == 0 {
return out
}
rb := reorderBuffer{}
rb.init(f, src)
ft := formTable[f]
// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
if len(out) == 0 {
p, _ := ft.quickSpan(src, 0, n, true)
out = src.appendSlice(out, 0, p)
if p == n {
return out
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n}
return doAppendInner(&rb, out, p)
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n}
return doAppend(&rb, out, 0)
}
@ -206,7 +224,12 @@ func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
if rb.nrune == 0 {
out, p = appendQuick(rb, out, p)
}
for p < n {
return doAppendInner(rb, out, p)
}
func doAppendInner(rb *reorderBuffer, out []byte, p int) []byte {
fd := &rb.f
for n := rb.nsrc; p < n; {
p = decomposeSegment(rb, p)
if fd.composing {
rb.compose()
@ -220,12 +243,7 @@ func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
// AppendString returns f(append(out, []byte(s))).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) AppendString(out []byte, src string) []byte {
if len(src) == 0 {
return out
}
rb := reorderBuffer{}
rb.initString(f, src)
return doAppend(&rb, out, 0)
return f.doAppend(out, inputString(src), len(src))
}
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
@ -306,18 +324,15 @@ func (f Form) QuickSpanString(s string) int {
// FirstBoundary returns the position i of the first boundary in b
// or -1 if b contains no boundary.
func (f Form) FirstBoundary(b []byte) int {
rb := reorderBuffer{}
rb.init(f, b)
return firstBoundary(&rb)
return f.firstBoundary(inputBytes(b), len(b))
}
func firstBoundary(rb *reorderBuffer) int {
src, nsrc := rb.src, rb.nsrc
func (f Form) firstBoundary(src input, nsrc int) int {
i := src.skipNonStarter(0)
if i >= nsrc {
return -1
}
fd := &rb.f
fd := formTable[f]
info := fd.info(src, i)
for n := 0; info.size != 0 && !info.BoundaryBefore(); {
i += int(info.size)
@ -341,9 +356,7 @@ func firstBoundary(rb *reorderBuffer) int {
// FirstBoundaryInString returns the position i of the first boundary in s
// or -1 if s contains no boundary.
func (f Form) FirstBoundaryInString(s string) int {
rb := reorderBuffer{}
rb.initString(f, s)
return firstBoundary(&rb)
return f.firstBoundary(inputString(s), len(s))
}
// LastBoundary returns the position i of the last boundary in b

Просмотреть файл

@ -505,6 +505,12 @@ func appendBench(f Form, in []byte) func() {
}
}
func bytesBench(f Form, in []byte) func() {
return func() {
f.Bytes(in)
}
}
func iterBench(f Form, in []byte) func() {
iter := Iter{}
return func() {
@ -602,6 +608,23 @@ func BenchmarkLowerCaseChange(b *testing.B) {
doSingle(b, lowerBench, smallChange)
}
func quickSpanBench(f Form, in []byte) func() {
return func() {
f.QuickSpan(in)
}
}
func BenchmarkQuickSpanChangeNFC(b *testing.B) {
doSingle(b, quickSpanBench, smallNoChange)
}
func BenchmarkBytesNoChangeNFC(b *testing.B) {
doSingle(b, bytesBench, smallNoChange)
}
func BenchmarkBytesChangeNFC(b *testing.B) {
doSingle(b, bytesBench, smallChange)
}
func BenchmarkAppendNoChangeNFC(b *testing.B) {
doSingle(b, appendBench, smallNoChange)
}