Merge pull request #1654 from sougou/v3

v3: revised vindexes
This commit is contained in:
sougou 2016-04-27 14:52:45 -07:00
Родитель da0b99b597 ef3f030c6e
Коммит 55b6665b35
13 изменённых файлов: 398 добавлений и 231 удалений

Просмотреть файл

@ -360,9 +360,6 @@ func (rtr *Router) resolveKeys(vals []interface{}, bindVars map[string]interface
return nil, fmt.Errorf("could not find bind var %s", v)
}
}
if v, ok := val.([]byte); ok {
val = string(v)
}
keys = append(keys, val)
}
return keys, nil

Просмотреть файл

@ -251,7 +251,7 @@ func TestInsertSharded(t *testing.T) {
BindVariables: map[string]interface{}{
"keyspace_id": "\x16k@\xb4J\xbaK\xd6",
"_id": int64(1),
"_name": "myname",
"_name": []byte("myname"),
"__seq": int64(1),
},
}}
@ -264,7 +264,7 @@ func TestInsertSharded(t *testing.T) {
wantQueries = []querytypes.BoundQuery{{
Sql: "insert into name_user_map(name, user_id) values (:name, :user_id)",
BindVariables: map[string]interface{}{
"name": "myname",
"name": []byte("myname"),
"user_id": int64(1),
},
}}
@ -284,7 +284,7 @@ func TestInsertSharded(t *testing.T) {
"keyspace_id": "N\xb1\x90ɢ\xfa\x16\x9c",
"_id": int64(3),
"__seq": int64(3),
"_name": "myname2",
"_name": []byte("myname2"),
},
}}
if !reflect.DeepEqual(sbc2.Queries, wantQueries) {
@ -296,7 +296,7 @@ func TestInsertSharded(t *testing.T) {
wantQueries = []querytypes.BoundQuery{{
Sql: "insert into name_user_map(name, user_id) values (:name, :user_id)",
BindVariables: map[string]interface{}{
"name": "myname2",
"name": []byte("myname2"),
"user_id": int64(3),
},
}}
@ -325,7 +325,7 @@ func TestInsertGenerator(t *testing.T) {
"keyspace_id": "\x16k@\xb4J\xbaK\xd6",
"_id": int64(1),
"__seq": int64(1),
"_name": "myname",
"_name": []byte("myname"),
},
}}
if !reflect.DeepEqual(sbc.Queries, wantQueries) {
@ -337,7 +337,7 @@ func TestInsertGenerator(t *testing.T) {
}, {
Sql: "insert into name_user_map(name, user_id) values (:name, :user_id)",
BindVariables: map[string]interface{}{
"name": "myname",
"name": []byte("myname"),
"user_id": int64(1),
},
}}

Просмотреть файл

@ -180,7 +180,7 @@ func TestSelectBindvars(t *testing.T) {
BindVariables: map[string]interface{}{
"name1": []byte("foo1"),
"name2": []byte("foo2"),
"__vals": []interface{}{"foo1", "foo2"},
"__vals": []interface{}{[]byte("foo1"), []byte("foo2")},
},
}}
if !reflect.DeepEqual(sbc1.Queries, wantQueries) {
@ -259,7 +259,7 @@ func TestSelectEqual(t *testing.T) {
wantQueries = []querytypes.BoundQuery{{
Sql: "select user_id from name_user_map where name = :name",
BindVariables: map[string]interface{}{
"name": "foo",
"name": []byte("foo"),
},
}}
if !reflect.DeepEqual(sbclookup.Queries, wantQueries) {
@ -419,7 +419,7 @@ func TestSelectIN(t *testing.T) {
wantQueries = []querytypes.BoundQuery{{
Sql: "select user_id from name_user_map where name = :name",
BindVariables: map[string]interface{}{
"name": "foo",
"name": []byte("foo"),
},
}}
if !reflect.DeepEqual(sbclookup.Queries, wantQueries) {
@ -470,7 +470,7 @@ func TestStreamSelectIN(t *testing.T) {
wantQueries := []querytypes.BoundQuery{{
Sql: "select user_id from name_user_map where name = :name",
BindVariables: map[string]interface{}{
"name": "foo",
"name": []byte("foo"),
},
}}
if !reflect.DeepEqual(sbclookup.Queries, wantQueries) {

Просмотреть файл

@ -0,0 +1,66 @@
package vindexes
import (
"bytes"
"crypto/md5"
"fmt"
)
// BinaryMD5 is a vindex that hashes binary bits to a keyspace id.
type BinaryMD5 struct {
name string
}
// NewBinaryMD5 creates a new BinaryMD5.
func NewBinaryMD5(name string, _ map[string]interface{}) (Vindex, error) {
return &BinaryMD5{name: name}, nil
}
// String returns the name of the vindex.
func (vind *BinaryMD5) String() string {
return vind.name
}
// Cost returns the cost as 1.
func (vind *BinaryMD5) Cost() int {
return 1
}
// Verify returns true if id maps to ksid.
func (vind *BinaryMD5) Verify(_ VCursor, id interface{}, ksid []byte) (bool, error) {
data, err := binHashKey(id)
if err != nil {
return false, fmt.Errorf("BinaryMD5_hash.Verify: %v", err)
}
return bytes.Compare(data, ksid) == 0, nil
}
// Map returns the corresponding keyspace id values for the given ids.
func (vind *BinaryMD5) Map(_ VCursor, ids []interface{}) ([][]byte, error) {
out := make([][]byte, 0, len(ids))
for _, id := range ids {
data, err := binHashKey(id)
if err != nil {
return nil, fmt.Errorf("BinaryMd5.Map :%v", err)
}
out = append(out, data)
}
return out, nil
}
func binHashKey(key interface{}) ([]byte, error) {
source, ok := key.([]byte)
if !ok {
return nil, fmt.Errorf("unexpected data type for binHash: %T", key)
}
return binHash(source), nil
}
func binHash(source []byte) []byte {
sum := md5.Sum(source)
return sum[:]
}
func init() {
Register("binary_md5", NewBinaryMD5)
}

Просмотреть файл

@ -0,0 +1,47 @@
package vindexes
import "testing"
var binVindex Vindex
func init() {
binVindex, _ = CreateVindex("binary_md5", "vch", nil)
}
func TestBinaryMD5Cost(t *testing.T) {
if binVindex.Cost() != 1 {
t.Errorf("Cost(): %d, want 1", binVindex.Cost())
}
}
func TestBinaryMD5(t *testing.T) {
tcases := []struct {
in, out string
}{{
in: "Test",
out: "\f\xbcf\x11\xf5T\vЀ\x9a8\x8d\xc9Za[",
}, {
in: "TEST",
out: "\x03;\xd9K\x11h\xd7\xe4\xf0\xd6D\xc3\xc9^5\xbf",
}, {
in: "Test",
out: "\f\xbcf\x11\xf5T\vЀ\x9a8\x8d\xc9Za[",
}}
for _, tcase := range tcases {
got, err := binVindex.(Unique).Map(nil, []interface{}{[]byte(tcase.in)})
if err != nil {
t.Error(err)
}
out := string(got[0])
if out != tcase.out {
t.Errorf("Map(%#v): %#v, want %#v", tcase.in, out, tcase.out)
}
ok, err := binVindex.Verify(nil, []byte(tcase.in), []byte(tcase.out))
if err != nil {
t.Error(err)
}
if !ok {
t.Errorf("Verify(%#v): false, want true", tcase.in)
}
}
}

Просмотреть файл

@ -66,7 +66,6 @@ func (vind *Hash) ReverseMap(_ VCursor, ksid []byte) (interface{}, error) {
}
func getNumber(v interface{}) (int64, error) {
// Failsafe check: v will never be a []byte.
if val, ok := v.([]byte); ok {
v = string(val)
}

Просмотреть файл

@ -0,0 +1,81 @@
package vindexes
import (
"bytes"
"fmt"
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
// UnicodeLooseMD5 is a vindex that normalizes and hashes unicode strings
// to a keyspace id. It conservatively converts the string to its base
// characters before hashing. This is also known as UCA level 1.
// Ref: http://www.unicode.org/reports/tr10/#Multi_Level_Comparison.
// This is compatible with MySQL's utf8_unicode_ci collation.
type UnicodeLooseMD5 struct {
name string
}
// MewUnicodeLooseMD5 creates a new UnicodeLooseMD5.
func MewUnicodeLooseMD5(name string, _ map[string]interface{}) (Vindex, error) {
return &UnicodeLooseMD5{name: name}, nil
}
// String returns the name of the vindex.
func (vind *UnicodeLooseMD5) String() string {
return vind.name
}
// Cost returns the cost as 1.
func (vind *UnicodeLooseMD5) Cost() int {
return 1
}
// Verify returns true if id maps to ksid.
func (vind *UnicodeLooseMD5) Verify(_ VCursor, id interface{}, ksid []byte) (bool, error) {
data, err := unicodeHash(id)
if err != nil {
return false, fmt.Errorf("UnicodeLooseMD5.Verify: %v", err)
}
return bytes.Compare(data, ksid) == 0, nil
}
// Map returns the corresponding keyspace id values for the given ids.
func (vind *UnicodeLooseMD5) Map(_ VCursor, ids []interface{}) ([][]byte, error) {
out := make([][]byte, 0, len(ids))
for _, id := range ids {
data, err := unicodeHash(id)
if err != nil {
return nil, fmt.Errorf("UnicodeLooseMD5.Map :%v", err)
}
out = append(out, data)
}
return out, nil
}
func unicodeHash(key interface{}) ([]byte, error) {
source, ok := key.([]byte)
if !ok {
return nil, fmt.Errorf("unexpected data type for binHash: %T", key)
}
return binHash(normalize(source)), nil
}
func normalize(in []byte) []byte {
in = bytes.TrimRight(in, " ")
// We use the collation key which can be used to
// perform lexical comparisons.
return normalizer.Key(new(collate.Buffer), in)
}
var normalizer *collate.Collator
func init() {
// Specifying the locale as english makes the collator work
// with no language-specific rules. collate.Loose makes the
// collator normalize the characters to their base versions,
// that is without diacritics, capitals, or widths.
normalizer = collate.New(language.English, collate.Loose)
Register("unicode_loose_md5", MewUnicodeLooseMD5)
}

Просмотреть файл

@ -0,0 +1,110 @@
package vindexes
import "testing"
var charVindex Vindex
func init() {
charVindex, _ = CreateVindex("unicode_loose_md5", "utf8ch", nil)
}
func TestUnicodeLosseMD5Cost(t *testing.T) {
if charVindex.Cost() != 1 {
t.Errorf("Cost(): %d, want 1", charVindex.Cost())
}
}
func TestUnicodeLosseMD5(t *testing.T) {
tcases := []struct {
in, out string
}{{
in: "Test",
out: "\v^۴\x01\xfdu$96\x90I\x1dd\xf1\xf5",
}, {
in: "TEST",
out: "\v^۴\x01\xfdu$96\x90I\x1dd\xf1\xf5",
}, {
in: "Te\u0301st",
out: "\v^۴\x01\xfdu$96\x90I\x1dd\xf1\xf5",
}, {
in: "Tést",
out: "\v^۴\x01\xfdu$96\x90I\x1dd\xf1\xf5",
}, {
in: "Bést",
out: "²3.Os\xd0\aA\x02bIpo/\xb6",
}, {
in: "Test ",
out: "\v^۴\x01\xfdu$96\x90I\x1dd\xf1\xf5",
}, {
in: " Test",
out: "\xa2\xe3Q\\~\x8d\xf1\xff\xd2\xcc\xfc\x11Ʊ\x9d\xd1",
}, {
in: "Test\t",
out: "\x82Em\xd8z\x9cz\x02\xb1\xc2\x05kZ\xba\xa2r",
}, {
in: "TéstLooong",
out: "\x96\x83\xe1+\x80C\f\xd4S\xf5\xdfߺ\x81ɥ",
}, {
in: "T",
out: "\xac\x0f\x91y\xf5\x1d\xb8\u007f\xe8\xec\xc0\xcf@ʹz",
}}
for _, tcase := range tcases {
got, err := charVindex.(Unique).Map(nil, []interface{}{[]byte(tcase.in)})
if err != nil {
t.Error(err)
}
out := string(got[0])
if out != tcase.out {
t.Errorf("Map(%#v): %#v, want %#v", tcase.in, out, tcase.out)
}
ok, err := charVindex.Verify(nil, []byte(tcase.in), []byte(tcase.out))
if err != nil {
t.Error(err)
}
if !ok {
t.Errorf("Verify(%#v): false, want true", tcase.in)
}
}
}
func TestNormalization(t *testing.T) {
tcases := []struct {
in, out string
}{{
in: "Test",
out: "\x18\x16\x16L\x17\xf3\x18\x16",
}, {
in: "TEST",
out: "\x18\x16\x16L\x17\xf3\x18\x16",
}, {
in: "Te\u0301st",
out: "\x18\x16\x16L\x17\xf3\x18\x16",
}, {
in: "Tést",
out: "\x18\x16\x16L\x17\xf3\x18\x16",
}, {
in: "Bést",
out: "\x16\x05\x16L\x17\xf3\x18\x16",
}, {
in: "Test ",
out: "\x18\x16\x16L\x17\xf3\x18\x16",
}, {
in: " Test",
out: "\x01\t\x18\x16\x16L\x17\xf3\x18\x16",
}, {
in: "Test\t",
out: "\x18\x16\x16L\x17\xf3\x18\x16\x01\x00",
}, {
in: "TéstLooong",
out: "\x18\x16\x16L\x17\xf3\x18\x16\x17\x11\x17q\x17q\x17q\x17O\x16\x91",
}, {
in: "T",
out: "\x18\x16",
}}
for _, tcase := range tcases {
out := string(normalize([]byte(tcase.in)))
if out != tcase.out {
t.Errorf("normalize(%#v): %#v, want %#v", tcase.in, out, tcase.out)
}
}
}

Просмотреть файл

@ -1,68 +0,0 @@
package vindexes
import (
"bytes"
"fmt"
)
// UTF8cihash defines vindex that hashes an varchar to a KeyspaceId
// by using bytes.toLower().
type UTF8cihash struct {
name string
}
// Newutf8cihash creates a new utf8cihash.
func Newutf8cihash(name string, m map[string]interface{}) (Vindex, error) {
return &UTF8cihash{name: name}, nil
}
// String returns the name of the vindex.
func (vind *UTF8cihash) String() string {
return vind.name
}
// Cost returns the cost of this index as 1.
func (vind *UTF8cihash) Cost() int {
return 1
}
// Verify returns true if id maps to ksid.
func (vind *UTF8cihash) Verify(_ VCursor, id interface{}, ksid []byte) (bool, error) {
data, err := getutf8cihash(id)
if err != nil {
return false, fmt.Errorf("utf8cihash.Verify: %v", err)
}
return bytes.Compare(data, ksid) == 0, nil
}
func getutf8cihash(key interface{}) ([]byte, error) {
source, ok := key.([]byte)
if !ok {
return nil, fmt.Errorf("unexpected data type for binHash: %T", key)
}
val, error := binHash(bytes.ToLower(source))
return val, error
}
func binHash(source []byte) ([]byte, error) {
dest := make([]byte, len(source))
block3DES.Encrypt(dest, source)
return dest, nil
}
// Map returns the corresponding KeyspaceId values for the given ids.
func (vind *UTF8cihash) Map(_ VCursor, ids []interface{}) ([][]byte, error) {
out := make([][]byte, 0, len(ids))
for _, id := range ids {
data, err := getutf8cihash(id)
if err != nil {
return nil, fmt.Errorf("utf8cihash.Map :%v", err)
}
out = append(out, data)
}
return out, nil
}
func init() {
Register("utf8cihash", Newutf8cihash)
}

Просмотреть файл

@ -1,46 +0,0 @@
package vindexes
import (
"reflect"
"testing"
)
var utf8cihash Vindex
func init() {
utf8cihash, _ = CreateVindex("utf8cihash", "utf8ch", nil)
}
func TestVarcharHashCost(t *testing.T) {
if utf8cihash.Cost() != 1 {
t.Errorf("Cost(): %d, want 1", utf8cihash.Cost())
}
}
//TestVarcharMap checks if the [upper/lower/mixed]case strings return the same hash
//eg: TESTTEST, testtest,TeStteST
func TestVarcharMap(t *testing.T) {
got, err := utf8cihash.(Unique).Map(nil, []interface{}{[]byte("\x55\x45\x54\x55\x55\x45\x54\x55"), []byte("\x75\x65\x74\x75\x75\x65\x74\x75"),
[]byte("\x55\x65\x54\x75\x75\x65\x54\x55")})
if err != nil {
t.Error(err)
}
want := [][]byte{
[]byte("\xf7\xaa\x9a\x46\xc9\x20\x85\x65"),
[]byte("\xf7\xaa\x9a\x46\xc9\x20\x85\x65"),
[]byte("\xf7\xaa\x9a\x46\xc9\x20\x85\x65"),
}
if !reflect.DeepEqual(got, want) {
t.Errorf("Map(): %#v, want %+v", got, want)
}
}
func TestVarCharVerify(t *testing.T) {
success, err := utf8cihash.Verify(nil, []byte("\x55\x45\x54\x55\x55\x45\x54\x55"), []byte("\xf7\xaa\x9a\x46\xc9\x20\x85\x65"))
if err != nil {
t.Error(err)
}
if !success {
t.Errorf("Verify(): %+v, want true", success)
}
}

Просмотреть файл

@ -1,62 +0,0 @@
package vindexes
import (
"bytes"
"fmt"
)
// Varbinary defines vindex that hashes an varbinary to a KeyspaceId
// by just returning the bytes.
type Varbinary struct {
name string
}
// NewVarbinary creates a new Varbinary.
func NewVarbinary(name string, m map[string]interface{}) (Vindex, error) {
return &Varbinary{name: name}, nil
}
// String returns the name of the vindex.
func (vind *Varbinary) String() string {
return vind.name
}
// Cost returns the cost of this index as 1.
func (vind *Varbinary) Cost() int {
return 1
}
func getVarbinaryHash(key interface{}) ([]byte, error) {
source, ok := key.([]byte)
if !ok {
return nil, fmt.Errorf("unexpected data type for binHash: %T", key)
}
val, error := binHash(source)
return val, error
}
// Verify returns true if id maps to ksid.
func (vind *Varbinary) Verify(_ VCursor, id interface{}, ksid []byte) (bool, error) {
data, err := getVarbinaryHash(id)
if err != nil {
return false, fmt.Errorf("Varbinary_hash.Verify: %v", err)
}
return bytes.Compare(data, ksid) == 0, nil
}
// Map returns the corresponding KeyspaceId values for the given ids.
func (vind *Varbinary) Map(_ VCursor, ids []interface{}) ([][]byte, error) {
out := make([][]byte, 0, len(ids))
for _, id := range ids {
data, err := getVarbinaryHash(id)
if err != nil {
return nil, fmt.Errorf("VarBinary_hash.Map :%v", err)
}
out = append(out, data)
}
return out, nil
}
func init() {
Register("varbinaryHash", NewVarbinary)
}

Просмотреть файл

@ -1,41 +0,0 @@
package vindexes
import (
"reflect"
"testing"
)
var varbinaryHash Vindex
func init() {
varbinaryHash, _ = CreateVindex("varbinaryHash", "vch", nil)
}
func TestVarbinaryHashCost(t *testing.T) {
if varbinaryHash.Cost() != 1 {
t.Errorf("Cost(): %d, want 1", varbinaryHash.Cost())
}
}
func TestVarBinaryMap(t *testing.T) {
got, err := varbinaryHash.(Unique).Map(nil, []interface{}{[]byte("\x74\x65\x73\x74\x74\x65\x73\x74")})
if err != nil {
t.Error(err)
}
want := [][]byte{
[]byte("\x45\x23\x56\x06\x86\xef\x04\x91"),
}
if !reflect.DeepEqual(got, want) {
t.Errorf("Map(): %#v, want %+v", got, want)
}
}
func TestVarBinaryVerify(t *testing.T) {
success, err := varbinaryHash.Verify(nil, []byte("\x74\x65\x73\x74\x74\x65\x73\x74"), []byte("\x45\x23\x56\x06\x86\xef\x04\x91"))
if err != nil {
t.Error(err)
}
if !success {
t.Errorf("Verify(): %+v, want true", success)
}
}

84
vendor/vendor.json поставляемый
Просмотреть файл

@ -180,6 +180,90 @@
"revision": "b0e2337fe6ec0c637fa4f123268b972f334504eb",
"revisionTime": "2016-04-08T01:06:28Z"
},
{
"checksumSHA1": "ws64q6/pPfBc7cgqvLKSJUOtqR0=",
"path": "golang.org/x/text/collate",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "jfnVwE5Y1HfpNJRtq7hJQuVrMxg=",
"path": "golang.org/x/text/collate/build",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "2OkXyGpDfSvE/skvLaogkiM/W0g=",
"path": "golang.org/x/text/collate/colltab",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "LTSp+1qcsKU9BMCs5kq+cE9fwM8=",
"path": "golang.org/x/text/internal/colltab",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "b4IcLhsmpoUaNiqOsBvb0L8FGGA=",
"path": "golang.org/x/text/internal/gen",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "uDusH6hHn2VFrYyJV4vRWe1PeLQ=",
"path": "golang.org/x/text/internal/tag",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "4IBXRph31MEiRjxWBR4qIu9R0S4=",
"path": "golang.org/x/text/internal/testtext",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "gKFK/QdtO0abkFjDiEyOBgLxWoM=",
"path": "golang.org/x/text/internal/triegen",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "JMVu4I9XB/6rTzYfElrURJCa5e0=",
"path": "golang.org/x/text/internal/ucd",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "tto7rGiI0WH+YSdVWfNjoo7NO0M=",
"path": "golang.org/x/text/language",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "fZsqw7apYN3t1/KiGJEMjWiKSog=",
"path": "golang.org/x/text/transform",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "1DKV831o6BguQTabt99CBmqDThE=",
"path": "golang.org/x/text/unicode/cldr",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "AD6yJilMhK5URyBNcpNSWmpflH0=",
"path": "golang.org/x/text/unicode/norm",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "Yzhgp3P/Yv+IGoWPWRDvQRWwE8o=",
"path": "golang.org/x/text/unicode/rangetable",
"revision": "3100578f0f8093e37883ba48c9187fe51367ad05",
"revisionTime": "2016-04-17T05:19:09Z"
},
{
"checksumSHA1": "GPy9lvgd0AOA8b4GJpkpG/BiCB8=",
"path": "google.golang.org/api/gensupport",