From b64824881a27332fa36479b70d919cc3181d88f2 Mon Sep 17 00:00:00 2001 From: Dave Day Date: Tue, 15 Jul 2014 20:39:00 +1000 Subject: [PATCH] appengine/search: Introduce DocumentMetadata. This changes FieldLoadSaver to handle document metadata, and may break existing code. Change-Id: I628d3b98d18c66d71b0a1fc953ad07fee3f0a23b --- README.md | 1 + search/field.go | 34 ++++++--- search/search.go | 58 +++++++++++----- search/search_test.go | 157 +++++++++++++++++++++++++++++++----------- 4 files changed, 182 insertions(+), 68 deletions(-) diff --git a/README.md b/README.md index 6c6395f..9ca43b6 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ This list summarises the differences: * `appengine.Datacenter` now takes an `appengine.Context` argument. * `datastore.PropertyLoadSaver` has been simplified to use slices in place of channels. +* `search.FieldLoadSaver` now handles document metadata. * `taskqueue.QueueStats` no longer takes a maxTasks argument. That argument has been deprecated and unused for a long time. * `appengine/aetest`, `appengine/blobstore`, `appengine/cloudsql` diff --git a/search/field.go b/search/field.go index b974a0c..19ad542 100644 --- a/search/field.go +++ b/search/field.go @@ -28,10 +28,19 @@ type Field struct { Language string } -// FieldLoadSaver can be converted from and to a slice of Fields. +// DocumentMetadata is a struct containing information describing a given document. +type DocumentMetadata struct { + // Rank is an integer specifying the order the document will be returned in + // search results. If zero, the rank will be set to the number of seconds since + // 2011-01-01 00:00:00 UTC when being Put into an index. + Rank int +} + +// FieldLoadSaver can be converted from and to a slice of Fields +// with additional document metadata. type FieldLoadSaver interface { - Load([]Field) error - Save() ([]Field, error) + Load([]Field, *DocumentMetadata) error + Save() ([]Field, *DocumentMetadata, error) } // FieldList converts a []Field to implement FieldLoadSaver. @@ -39,22 +48,24 @@ type FieldList []Field // Load loads all of the provided fields into l. // It does not first reset *l to an empty slice. -func (l *FieldList) Load(f []Field) error { +func (l *FieldList) Load(f []Field, _ *DocumentMetadata) error { *l = append(*l, f...) return nil } // Save returns all of l's fields as a slice of Fields. -func (l *FieldList) Save() ([]Field, error) { - return *l, nil +func (l *FieldList) Save() ([]Field, *DocumentMetadata, error) { + return *l, nil, nil } +var _ FieldLoadSaver = (*FieldList)(nil) + // structFLS adapts a struct to be a FieldLoadSaver. type structFLS struct { reflect.Value } -func (s structFLS) Load(fields []Field) (err error) { +func (s structFLS) Load(fields []Field, _ *DocumentMetadata) (err error) { for _, field := range fields { f := s.FieldByName(field.Name) if !f.IsValid() { @@ -84,7 +95,7 @@ func (s structFLS) Load(fields []Field) (err error) { return err } -func (s structFLS) Save() ([]Field, error) { +func (s structFLS) Save() ([]Field, *DocumentMetadata, error) { fields := make([]Field, 0, s.NumField()) for i := 0; i < s.NumField(); i++ { f := s.Field(i) @@ -96,7 +107,7 @@ func (s structFLS) Save() ([]Field, error) { Value: f.Interface(), }) } - return fields, nil + return fields, nil, nil } // newStructFLS returns a FieldLoadSaver for the struct pointer p. @@ -114,7 +125,7 @@ func LoadStruct(dst interface{}, f []Field) error { if err != nil { return err } - return x.Load(f) + return x.Load(f, nil) } // SaveStruct returns the fields from src as a slice of Field. @@ -124,5 +135,6 @@ func SaveStruct(src interface{}) ([]Field, error) { if err != nil { return nil, err } - return x.Save() + fs, _, err := x.Save() + return fs, err } diff --git a/search/search.go b/search/search.go index 93610cd..0027627 100644 --- a/search/search.go +++ b/search/search.go @@ -163,6 +163,11 @@ func validFieldName(s string) bool { return len(s) <= 500 && fieldNameRE.MatchString(s) } +// validDocRank checks that the ranks is in the range [0, 2^31). +func validDocRank(r int) bool { + return 0 <= r && r <= (1<<31-1) +} + // validLanguage checks that a language looks like ISO 639-1. func validLanguage(s string) bool { return languageRE.MatchString(s) @@ -202,16 +207,22 @@ func Open(name string) (*Index, error) { // src must be a non-nil struct pointer or implement the FieldLoadSaver // interface. func (x *Index) Put(c appengine.Context, id string, src interface{}) (string, error) { - fields, err := saveFields(src) + fields, meta, err := saveDoc(src) if err != nil { return "", err } d := &pb.Document{ - Field: fields, - // TODO(davidday): support developers providing an explicit Rank for - // documents. + Field: fields, OrderId: proto.Int32(int32(time.Since(orderIDEpoch).Seconds())), } + if meta != nil { + if meta.Rank != 0 { + if !validDocRank(meta.Rank) { + return "", fmt.Errorf("search: invalid rank %d, must be [0, 2^31)", meta.Rank) + } + *d.OrderId = int32(meta.Rank) + } + } if id != "" { if !validIndexNameOrDocID(id) { return "", fmt.Errorf("search: invalid ID %q", id) @@ -272,7 +283,10 @@ func (x *Index) Get(c appengine.Context, id string, dst interface{}) error { if len(res.Document) != 1 || res.Document[0].GetId() != id { return ErrNoSuchDocument } - return loadFields(dst, res.Document[0].Field) + metadata := &DocumentMetadata{ + Rank: int(res.Document[0].GetOrderId()), + } + return loadDoc(dst, res.Document[0].Field, metadata) } // Delete deletes a document from the index. @@ -493,7 +507,10 @@ func (t *Iterator) Next(dst interface{}) (string, error) { return "", errors.New("search: internal error: no document returned") } if !t.idsOnly && dst != nil { - if err := loadFields(dst, doc.Field); err != nil { + metadata := &DocumentMetadata{ + Rank: int(doc.GetOrderId()), + } + if err := loadDoc(dst, doc.Field, metadata); err != nil { return "", err } } @@ -506,19 +523,22 @@ func (t *Iterator) Next(dst interface{}) (string, error) { return doc.GetId(), nil } -// saveFields converts from a struct pointer or FieldLoadSaver to protobufs. -func saveFields(src interface{}) ([]*pb.Field, error) { +// saveDoc converts from a struct pointer or FieldLoadSaver to protobufs. +func saveDoc(src interface{}) ([]*pb.Field, *DocumentMetadata, error) { var err error var fields []Field - if x, ok := src.(FieldLoadSaver); ok { - fields, err = x.Save() - } else { + var meta *DocumentMetadata + switch x := src.(type) { + case FieldLoadSaver: + fields, meta, err = x.Save() + default: fields, err = SaveStruct(src) } if err != nil { - return nil, err + return nil, nil, err } - return fieldsToProto(fields) + f, err := fieldsToProto(fields) + return f, meta, err } func fieldsToProto(src []Field) ([]*pb.Field, error) { @@ -590,16 +610,18 @@ func fieldsToProto(src []Field) ([]*pb.Field, error) { return dst, nil } -// loadFields converts from protobufs to a struct pointer or FieldLoadSaver. -func loadFields(dst interface{}, src []*pb.Field) (err error) { +// loadDoc converts from protobufs and document metadata to a struct pointer or FieldLoadSaver. +func loadDoc(dst interface{}, src []*pb.Field, meta *DocumentMetadata) (err error) { fields, err := protoToFields(src) if err != nil { return err } - if x, ok := dst.(FieldLoadSaver); ok { - return x.Load(fields) + switch x := dst.(type) { + case FieldLoadSaver: + return x.Load(fields, meta) + default: + return LoadStruct(dst, fields) } - return LoadStruct(dst, fields) } func protoToFields(fields []*pb.Field) ([]Field, error) { diff --git a/search/search_test.go b/search/search_test.go index ed91a0f..518ef63 100644 --- a/search/search_test.go +++ b/search/search_test.go @@ -28,6 +28,24 @@ type TestDoc struct { Time time.Time } +type FieldListWithMeta struct { + Fields FieldList + Meta *DocumentMetadata +} + +func (f *FieldListWithMeta) Load(fields []Field, meta *DocumentMetadata) error { + f.Meta = meta + return f.Fields.Load(fields, nil) +} + +func (f *FieldListWithMeta) Save() ([]Field, *DocumentMetadata, error) { + fields, _, err := f.Fields.Save() + return fields, f.Meta, err +} + +// Assert that FieldListWithMeta satisfies FieldLoadSaver +var _ FieldLoadSaver = &FieldListWithMeta{} + var ( float = 3.14159 floatOut = "3.14159e+00" @@ -37,7 +55,10 @@ var ( testString = "foobar" testTime = time.Unix(1337324400, 0) testTimeOut = "1337324400000" - searchDoc = TestDoc{ + searchMeta = &DocumentMetadata{ + Rank: 42, + } + searchDoc = TestDoc{ String: testString, Atom: Atom(testString), HTML: HTML(testString), @@ -53,7 +74,10 @@ var ( Field{Name: "Location", Value: testGeo}, Field{Name: "Time", Value: testTime}, } - protoFields = []*pb.Field{ + // searchFieldsWithLang is a copy of the searchFields with the Language field + // set on text/HTML Fields. + searchFieldsWithLang = FieldList{} + protoFields = []*pb.Field{ newStringValueField("String", testString, pb.FieldValue_TEXT), newStringValueField("Atom", testString, pb.FieldValue_ATOM), newStringValueField("HTML", testString, pb.FieldValue_HTML), @@ -72,6 +96,15 @@ var ( } ) +func init() { + for _, f := range searchFields { + if f.Name == "String" || f.Name == "HTML" { + f.Language = "en" + } + searchFieldsWithLang = append(searchFieldsWithLang, f) + } +} + func newStringValueField(name, value string, valueType pb.FieldValue_ContentType) *pb.Field { return &pb.Field{ Name: proto.String(name), @@ -105,20 +138,20 @@ func TestValidIndexNameOrDocID(t *testing.T) { } } -func TestLoadFields(t *testing.T) { +func TestLoadDoc(t *testing.T) { got, want := TestDoc{}, searchDoc - if err := loadFields(&got, protoFields); err != nil { - t.Fatalf("loadFields: %v", err) + if err := loadDoc(&got, protoFields, nil); err != nil { + t.Fatalf("loadDoc: %v", err) } if got != want { - t.Errorf("\ngot %v\nwant %v", got, want) + t.Errorf("loadDoc: got %v, wanted %v", got, want) } } -func TestSaveFields(t *testing.T) { - got, err := saveFields(&searchDoc) +func TestSaveDoc(t *testing.T) { + got, _, err := saveDoc(&searchDoc) if err != nil { - t.Fatalf("saveFields: %v", err) + t.Fatalf("saveDoc: %v", err) } want := protoFields if !reflect.DeepEqual(got, want) { @@ -127,18 +160,10 @@ func TestSaveFields(t *testing.T) { } func TestLoadFieldList(t *testing.T) { - var got, want FieldList - // Make a shallow copy of searchFields, since we need to set the default - // language "en" on the text and HTML fields. - want = append(want, searchFields...) - for i, f := range want { - if f.Name == "String" || f.Name == "HTML" { - want[i].Language = "en" - } - } - err := loadFields(&got, protoFields) - if err != nil { - t.Fatalf("loadFields: %v", err) + var got FieldList + want := searchFieldsWithLang + if err := loadDoc(&got, protoFields, nil); err != nil { + t.Fatalf("loadDoc: %v", err) } if !reflect.DeepEqual(got, want) { t.Errorf("\ngot %v\nwant %v", got, want) @@ -146,9 +171,9 @@ func TestLoadFieldList(t *testing.T) { } func TestSaveFieldList(t *testing.T) { - got, err := saveFields(&searchFields) + got, _, err := saveDoc(&searchFields) if err != nil { - t.Fatalf("saveFields: %v", err) + t.Fatalf("saveDoc: %v", err) } want := protoFields if !reflect.DeepEqual(got, want) { @@ -156,6 +181,36 @@ func TestSaveFieldList(t *testing.T) { } } +func TestLoadMeta(t *testing.T) { + var got FieldListWithMeta + want := FieldListWithMeta{ + Meta: searchMeta, + Fields: searchFieldsWithLang, + } + if err := loadDoc(&got, protoFields, searchMeta); err != nil { + t.Fatalf("loadDoc: %v", err) + } + if !reflect.DeepEqual(got, want) { + t.Errorf("got %v\nwant %v", got, want) + } +} + +func TestSaveMeta(t *testing.T) { + got, gotMeta, err := saveDoc(&FieldListWithMeta{ + Meta: searchMeta, + Fields: searchFields, + }) + if err != nil { + t.Fatalf("saveDoc: %v", err) + } + if want := protoFields; !reflect.DeepEqual(got, want) { + t.Errorf("\ngot %v\nwant %v", got, want) + } + if want := searchMeta; !reflect.DeepEqual(gotMeta, want) { + t.Errorf("\ngot %v\nwant %v", gotMeta, want) + } +} + func TestValidFieldNames(t *testing.T) { testCases := []struct { name string @@ -174,7 +229,7 @@ func TestValidFieldNames(t *testing.T) { } for _, tc := range testCases { - _, err := saveFields(&FieldList{ + _, _, err := saveDoc(&FieldList{ Field{Name: tc.name, Value: "val"}, }) if err != nil && !strings.Contains(err.Error(), "invalid field name") { @@ -201,9 +256,9 @@ func TestValidLangs(t *testing.T) { } for _, tt := range testCases { - _, err := saveFields(&FieldList{tt.field}) + _, _, err := saveDoc(&FieldList{tt.field}) if err == nil != tt.valid { - t.Errorf("Field %v, got error %v, wanted err %t", tt.field, err, tt.valid) + t.Errorf("Field %v, got error %v, wanted valid %t", tt.field, err, tt.valid) } } } @@ -238,7 +293,7 @@ func TestDuplicateFields(t *testing.T) { }, } for _, tc := range testCases { - _, err := saveFields(&tc.fields) + _, _, err := saveDoc(&tc.fields) if (err == nil) != (tc.errMsg == "") || (err != nil && !strings.Contains(err.Error(), tc.errMsg)) { t.Errorf("%s: got err %v, wanted %q", tc.desc, err, tc.errMsg) } @@ -281,7 +336,7 @@ func TestLoadErrFieldMismatch(t *testing.T) { }, } for _, tc := range testCases { - err := loadFields(tc.dst, tc.src) + err := loadDoc(tc.dst, tc.src, nil) if !reflect.DeepEqual(err, tc.err) { t.Errorf("%s, got err %v, wanted %v", tc.desc, err, tc.err) } @@ -336,22 +391,13 @@ func TestPut(t *testing.T) { expectedIn := &pb.IndexDocumentRequest{ Params: &pb.IndexDocumentParams{ Document: []*pb.Document{ - {Field: protoFields}, - // Omit OrderId since we'll check it explicitly. + {Field: protoFields, OrderId: proto.Int32(42)}, }, IndexSpec: &pb.IndexSpec{ Name: proto.String("Doc"), }, }, } - if len(in.Params.GetDocument()) < 1 { - return fmt.Errorf("expected at least one Document, got %v", in) - } - got, want := in.Params.Document[0].GetOrderId(), int32(time.Since(orderIDEpoch).Seconds()) - if d := got - want; -5 > d || d > 5 { - return fmt.Errorf("got OrderId %d, want near %d", got, want) - } - in.Params.Document[0].OrderId = nil if !proto.Equal(in, expectedIn) { return fmt.Errorf("unsupported argument:\ngot %v\nwant %v", in, expectedIn) } @@ -366,7 +412,10 @@ func TestPut(t *testing.T) { return nil }) - id, err := index.Put(c, "", &searchDoc) + id, err := index.Put(c, "", &FieldListWithMeta{ + Meta: searchMeta, + Fields: searchFields, + }) if err != nil { t.Fatal(err) } @@ -374,3 +423,33 @@ func TestPut(t *testing.T) { t.Errorf("Got doc ID %q, want %q", id, want) } } + +func TestPutAutoOrderID(t *testing.T) { + index, err := Open("Doc") + if err != nil { + t.Fatalf("err from Open: %v", err) + } + + c := aetesting.FakeSingleContext(t, "search", "IndexDocument", func(in *pb.IndexDocumentRequest, out *pb.IndexDocumentResponse) error { + if len(in.Params.GetDocument()) < 1 { + return fmt.Errorf("expected at least one Document, got %v", in) + } + got, want := in.Params.Document[0].GetOrderId(), int32(time.Since(orderIDEpoch).Seconds()) + if d := got - want; -5 > d || d > 5 { + return fmt.Errorf("got OrderId %d, want near %d", got, want) + } + *out = pb.IndexDocumentResponse{ + Status: []*pb.RequestStatus{ + {Code: pb.SearchServiceError_OK.Enum()}, + }, + DocId: []string{ + "doc_id", + }, + } + return nil + }) + + if _, err := index.Put(c, "", &searchFields); err != nil { + t.Fatal(err) + } +}