appengine/search: Introduce DocumentMetadata.

This changes FieldLoadSaver to handle document metadata,
and may break existing code.

Change-Id: I628d3b98d18c66d71b0a1fc953ad07fee3f0a23b
This commit is contained in:
Dave Day 2014-07-15 20:39:00 +10:00 коммит произвёл David Symonds
Родитель 83932124e7
Коммит b64824881a
4 изменённых файлов: 182 добавлений и 68 удалений

Просмотреть файл

@ -49,6 +49,7 @@ This list summarises the differences:
* `appengine.Datacenter` now takes an `appengine.Context` argument.
* `datastore.PropertyLoadSaver` has been simplified to use slices in place of channels.
* `search.FieldLoadSaver` now handles document metadata.
* `taskqueue.QueueStats` no longer takes a maxTasks argument. That argument has been
deprecated and unused for a long time.
* `appengine/aetest`, `appengine/blobstore`, `appengine/cloudsql`

Просмотреть файл

@ -28,10 +28,19 @@ type Field struct {
Language string
}
// FieldLoadSaver can be converted from and to a slice of Fields.
// DocumentMetadata is a struct containing information describing a given document.
type DocumentMetadata struct {
// Rank is an integer specifying the order the document will be returned in
// search results. If zero, the rank will be set to the number of seconds since
// 2011-01-01 00:00:00 UTC when being Put into an index.
Rank int
}
// FieldLoadSaver can be converted from and to a slice of Fields
// with additional document metadata.
type FieldLoadSaver interface {
Load([]Field) error
Save() ([]Field, error)
Load([]Field, *DocumentMetadata) error
Save() ([]Field, *DocumentMetadata, error)
}
// FieldList converts a []Field to implement FieldLoadSaver.
@ -39,22 +48,24 @@ type FieldList []Field
// Load loads all of the provided fields into l.
// It does not first reset *l to an empty slice.
func (l *FieldList) Load(f []Field) error {
func (l *FieldList) Load(f []Field, _ *DocumentMetadata) error {
*l = append(*l, f...)
return nil
}
// Save returns all of l's fields as a slice of Fields.
func (l *FieldList) Save() ([]Field, error) {
return *l, nil
func (l *FieldList) Save() ([]Field, *DocumentMetadata, error) {
return *l, nil, nil
}
var _ FieldLoadSaver = (*FieldList)(nil)
// structFLS adapts a struct to be a FieldLoadSaver.
type structFLS struct {
reflect.Value
}
func (s structFLS) Load(fields []Field) (err error) {
func (s structFLS) Load(fields []Field, _ *DocumentMetadata) (err error) {
for _, field := range fields {
f := s.FieldByName(field.Name)
if !f.IsValid() {
@ -84,7 +95,7 @@ func (s structFLS) Load(fields []Field) (err error) {
return err
}
func (s structFLS) Save() ([]Field, error) {
func (s structFLS) Save() ([]Field, *DocumentMetadata, error) {
fields := make([]Field, 0, s.NumField())
for i := 0; i < s.NumField(); i++ {
f := s.Field(i)
@ -96,7 +107,7 @@ func (s structFLS) Save() ([]Field, error) {
Value: f.Interface(),
})
}
return fields, nil
return fields, nil, nil
}
// newStructFLS returns a FieldLoadSaver for the struct pointer p.
@ -114,7 +125,7 @@ func LoadStruct(dst interface{}, f []Field) error {
if err != nil {
return err
}
return x.Load(f)
return x.Load(f, nil)
}
// SaveStruct returns the fields from src as a slice of Field.
@ -124,5 +135,6 @@ func SaveStruct(src interface{}) ([]Field, error) {
if err != nil {
return nil, err
}
return x.Save()
fs, _, err := x.Save()
return fs, err
}

Просмотреть файл

@ -163,6 +163,11 @@ func validFieldName(s string) bool {
return len(s) <= 500 && fieldNameRE.MatchString(s)
}
// validDocRank checks that the ranks is in the range [0, 2^31).
func validDocRank(r int) bool {
return 0 <= r && r <= (1<<31-1)
}
// validLanguage checks that a language looks like ISO 639-1.
func validLanguage(s string) bool {
return languageRE.MatchString(s)
@ -202,16 +207,22 @@ func Open(name string) (*Index, error) {
// src must be a non-nil struct pointer or implement the FieldLoadSaver
// interface.
func (x *Index) Put(c appengine.Context, id string, src interface{}) (string, error) {
fields, err := saveFields(src)
fields, meta, err := saveDoc(src)
if err != nil {
return "", err
}
d := &pb.Document{
Field: fields,
// TODO(davidday): support developers providing an explicit Rank for
// documents.
Field: fields,
OrderId: proto.Int32(int32(time.Since(orderIDEpoch).Seconds())),
}
if meta != nil {
if meta.Rank != 0 {
if !validDocRank(meta.Rank) {
return "", fmt.Errorf("search: invalid rank %d, must be [0, 2^31)", meta.Rank)
}
*d.OrderId = int32(meta.Rank)
}
}
if id != "" {
if !validIndexNameOrDocID(id) {
return "", fmt.Errorf("search: invalid ID %q", id)
@ -272,7 +283,10 @@ func (x *Index) Get(c appengine.Context, id string, dst interface{}) error {
if len(res.Document) != 1 || res.Document[0].GetId() != id {
return ErrNoSuchDocument
}
return loadFields(dst, res.Document[0].Field)
metadata := &DocumentMetadata{
Rank: int(res.Document[0].GetOrderId()),
}
return loadDoc(dst, res.Document[0].Field, metadata)
}
// Delete deletes a document from the index.
@ -493,7 +507,10 @@ func (t *Iterator) Next(dst interface{}) (string, error) {
return "", errors.New("search: internal error: no document returned")
}
if !t.idsOnly && dst != nil {
if err := loadFields(dst, doc.Field); err != nil {
metadata := &DocumentMetadata{
Rank: int(doc.GetOrderId()),
}
if err := loadDoc(dst, doc.Field, metadata); err != nil {
return "", err
}
}
@ -506,19 +523,22 @@ func (t *Iterator) Next(dst interface{}) (string, error) {
return doc.GetId(), nil
}
// saveFields converts from a struct pointer or FieldLoadSaver to protobufs.
func saveFields(src interface{}) ([]*pb.Field, error) {
// saveDoc converts from a struct pointer or FieldLoadSaver to protobufs.
func saveDoc(src interface{}) ([]*pb.Field, *DocumentMetadata, error) {
var err error
var fields []Field
if x, ok := src.(FieldLoadSaver); ok {
fields, err = x.Save()
} else {
var meta *DocumentMetadata
switch x := src.(type) {
case FieldLoadSaver:
fields, meta, err = x.Save()
default:
fields, err = SaveStruct(src)
}
if err != nil {
return nil, err
return nil, nil, err
}
return fieldsToProto(fields)
f, err := fieldsToProto(fields)
return f, meta, err
}
func fieldsToProto(src []Field) ([]*pb.Field, error) {
@ -590,16 +610,18 @@ func fieldsToProto(src []Field) ([]*pb.Field, error) {
return dst, nil
}
// loadFields converts from protobufs to a struct pointer or FieldLoadSaver.
func loadFields(dst interface{}, src []*pb.Field) (err error) {
// loadDoc converts from protobufs and document metadata to a struct pointer or FieldLoadSaver.
func loadDoc(dst interface{}, src []*pb.Field, meta *DocumentMetadata) (err error) {
fields, err := protoToFields(src)
if err != nil {
return err
}
if x, ok := dst.(FieldLoadSaver); ok {
return x.Load(fields)
switch x := dst.(type) {
case FieldLoadSaver:
return x.Load(fields, meta)
default:
return LoadStruct(dst, fields)
}
return LoadStruct(dst, fields)
}
func protoToFields(fields []*pb.Field) ([]Field, error) {

Просмотреть файл

@ -28,6 +28,24 @@ type TestDoc struct {
Time time.Time
}
type FieldListWithMeta struct {
Fields FieldList
Meta *DocumentMetadata
}
func (f *FieldListWithMeta) Load(fields []Field, meta *DocumentMetadata) error {
f.Meta = meta
return f.Fields.Load(fields, nil)
}
func (f *FieldListWithMeta) Save() ([]Field, *DocumentMetadata, error) {
fields, _, err := f.Fields.Save()
return fields, f.Meta, err
}
// Assert that FieldListWithMeta satisfies FieldLoadSaver
var _ FieldLoadSaver = &FieldListWithMeta{}
var (
float = 3.14159
floatOut = "3.14159e+00"
@ -37,7 +55,10 @@ var (
testString = "foo<b>bar"
testTime = time.Unix(1337324400, 0)
testTimeOut = "1337324400000"
searchDoc = TestDoc{
searchMeta = &DocumentMetadata{
Rank: 42,
}
searchDoc = TestDoc{
String: testString,
Atom: Atom(testString),
HTML: HTML(testString),
@ -53,7 +74,10 @@ var (
Field{Name: "Location", Value: testGeo},
Field{Name: "Time", Value: testTime},
}
protoFields = []*pb.Field{
// searchFieldsWithLang is a copy of the searchFields with the Language field
// set on text/HTML Fields.
searchFieldsWithLang = FieldList{}
protoFields = []*pb.Field{
newStringValueField("String", testString, pb.FieldValue_TEXT),
newStringValueField("Atom", testString, pb.FieldValue_ATOM),
newStringValueField("HTML", testString, pb.FieldValue_HTML),
@ -72,6 +96,15 @@ var (
}
)
func init() {
for _, f := range searchFields {
if f.Name == "String" || f.Name == "HTML" {
f.Language = "en"
}
searchFieldsWithLang = append(searchFieldsWithLang, f)
}
}
func newStringValueField(name, value string, valueType pb.FieldValue_ContentType) *pb.Field {
return &pb.Field{
Name: proto.String(name),
@ -105,20 +138,20 @@ func TestValidIndexNameOrDocID(t *testing.T) {
}
}
func TestLoadFields(t *testing.T) {
func TestLoadDoc(t *testing.T) {
got, want := TestDoc{}, searchDoc
if err := loadFields(&got, protoFields); err != nil {
t.Fatalf("loadFields: %v", err)
if err := loadDoc(&got, protoFields, nil); err != nil {
t.Fatalf("loadDoc: %v", err)
}
if got != want {
t.Errorf("\ngot %v\nwant %v", got, want)
t.Errorf("loadDoc: got %v, wanted %v", got, want)
}
}
func TestSaveFields(t *testing.T) {
got, err := saveFields(&searchDoc)
func TestSaveDoc(t *testing.T) {
got, _, err := saveDoc(&searchDoc)
if err != nil {
t.Fatalf("saveFields: %v", err)
t.Fatalf("saveDoc: %v", err)
}
want := protoFields
if !reflect.DeepEqual(got, want) {
@ -127,18 +160,10 @@ func TestSaveFields(t *testing.T) {
}
func TestLoadFieldList(t *testing.T) {
var got, want FieldList
// Make a shallow copy of searchFields, since we need to set the default
// language "en" on the text and HTML fields.
want = append(want, searchFields...)
for i, f := range want {
if f.Name == "String" || f.Name == "HTML" {
want[i].Language = "en"
}
}
err := loadFields(&got, protoFields)
if err != nil {
t.Fatalf("loadFields: %v", err)
var got FieldList
want := searchFieldsWithLang
if err := loadDoc(&got, protoFields, nil); err != nil {
t.Fatalf("loadDoc: %v", err)
}
if !reflect.DeepEqual(got, want) {
t.Errorf("\ngot %v\nwant %v", got, want)
@ -146,9 +171,9 @@ func TestLoadFieldList(t *testing.T) {
}
func TestSaveFieldList(t *testing.T) {
got, err := saveFields(&searchFields)
got, _, err := saveDoc(&searchFields)
if err != nil {
t.Fatalf("saveFields: %v", err)
t.Fatalf("saveDoc: %v", err)
}
want := protoFields
if !reflect.DeepEqual(got, want) {
@ -156,6 +181,36 @@ func TestSaveFieldList(t *testing.T) {
}
}
func TestLoadMeta(t *testing.T) {
var got FieldListWithMeta
want := FieldListWithMeta{
Meta: searchMeta,
Fields: searchFieldsWithLang,
}
if err := loadDoc(&got, protoFields, searchMeta); err != nil {
t.Fatalf("loadDoc: %v", err)
}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %v\nwant %v", got, want)
}
}
func TestSaveMeta(t *testing.T) {
got, gotMeta, err := saveDoc(&FieldListWithMeta{
Meta: searchMeta,
Fields: searchFields,
})
if err != nil {
t.Fatalf("saveDoc: %v", err)
}
if want := protoFields; !reflect.DeepEqual(got, want) {
t.Errorf("\ngot %v\nwant %v", got, want)
}
if want := searchMeta; !reflect.DeepEqual(gotMeta, want) {
t.Errorf("\ngot %v\nwant %v", gotMeta, want)
}
}
func TestValidFieldNames(t *testing.T) {
testCases := []struct {
name string
@ -174,7 +229,7 @@ func TestValidFieldNames(t *testing.T) {
}
for _, tc := range testCases {
_, err := saveFields(&FieldList{
_, _, err := saveDoc(&FieldList{
Field{Name: tc.name, Value: "val"},
})
if err != nil && !strings.Contains(err.Error(), "invalid field name") {
@ -201,9 +256,9 @@ func TestValidLangs(t *testing.T) {
}
for _, tt := range testCases {
_, err := saveFields(&FieldList{tt.field})
_, _, err := saveDoc(&FieldList{tt.field})
if err == nil != tt.valid {
t.Errorf("Field %v, got error %v, wanted err %t", tt.field, err, tt.valid)
t.Errorf("Field %v, got error %v, wanted valid %t", tt.field, err, tt.valid)
}
}
}
@ -238,7 +293,7 @@ func TestDuplicateFields(t *testing.T) {
},
}
for _, tc := range testCases {
_, err := saveFields(&tc.fields)
_, _, err := saveDoc(&tc.fields)
if (err == nil) != (tc.errMsg == "") || (err != nil && !strings.Contains(err.Error(), tc.errMsg)) {
t.Errorf("%s: got err %v, wanted %q", tc.desc, err, tc.errMsg)
}
@ -281,7 +336,7 @@ func TestLoadErrFieldMismatch(t *testing.T) {
},
}
for _, tc := range testCases {
err := loadFields(tc.dst, tc.src)
err := loadDoc(tc.dst, tc.src, nil)
if !reflect.DeepEqual(err, tc.err) {
t.Errorf("%s, got err %v, wanted %v", tc.desc, err, tc.err)
}
@ -336,22 +391,13 @@ func TestPut(t *testing.T) {
expectedIn := &pb.IndexDocumentRequest{
Params: &pb.IndexDocumentParams{
Document: []*pb.Document{
{Field: protoFields},
// Omit OrderId since we'll check it explicitly.
{Field: protoFields, OrderId: proto.Int32(42)},
},
IndexSpec: &pb.IndexSpec{
Name: proto.String("Doc"),
},
},
}
if len(in.Params.GetDocument()) < 1 {
return fmt.Errorf("expected at least one Document, got %v", in)
}
got, want := in.Params.Document[0].GetOrderId(), int32(time.Since(orderIDEpoch).Seconds())
if d := got - want; -5 > d || d > 5 {
return fmt.Errorf("got OrderId %d, want near %d", got, want)
}
in.Params.Document[0].OrderId = nil
if !proto.Equal(in, expectedIn) {
return fmt.Errorf("unsupported argument:\ngot %v\nwant %v", in, expectedIn)
}
@ -366,7 +412,10 @@ func TestPut(t *testing.T) {
return nil
})
id, err := index.Put(c, "", &searchDoc)
id, err := index.Put(c, "", &FieldListWithMeta{
Meta: searchMeta,
Fields: searchFields,
})
if err != nil {
t.Fatal(err)
}
@ -374,3 +423,33 @@ func TestPut(t *testing.T) {
t.Errorf("Got doc ID %q, want %q", id, want)
}
}
func TestPutAutoOrderID(t *testing.T) {
index, err := Open("Doc")
if err != nil {
t.Fatalf("err from Open: %v", err)
}
c := aetesting.FakeSingleContext(t, "search", "IndexDocument", func(in *pb.IndexDocumentRequest, out *pb.IndexDocumentResponse) error {
if len(in.Params.GetDocument()) < 1 {
return fmt.Errorf("expected at least one Document, got %v", in)
}
got, want := in.Params.Document[0].GetOrderId(), int32(time.Since(orderIDEpoch).Seconds())
if d := got - want; -5 > d || d > 5 {
return fmt.Errorf("got OrderId %d, want near %d", got, want)
}
*out = pb.IndexDocumentResponse{
Status: []*pb.RequestStatus{
{Code: pb.SearchServiceError_OK.Enum()},
},
DocId: []string{
"doc_id",
},
}
return nil
})
if _, err := index.Put(c, "", &searchFields); err != nil {
t.Fatal(err)
}
}