internal/{postgres,frontend}: group search results

Group search results by module. Show the highest-ranking package of
each module, and display the other packages as sub-results, in the
same snippet as the winning package but smaller.

Also, display packages from later major module versions before earlier
ones, even if they have a lower score.

All this is protected by an experiment.

Change-Id: Iabc7fcf4e916289617b6c1c755904a27163ad554
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/316471
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
This commit is contained in:
Jonathan Amsterdam 2021-05-03 09:53:17 -04:00
Родитель 32ea36f178
Коммит 9952931af0
10 изменённых файлов: 249 добавлений и 47 удалений

Просмотреть файл

@ -54,6 +54,10 @@
font-size: 0.875rem;
line-height: 1.375rem;
}
.SearchSnippet-sub {
font-size: 0.875rem;
margin-top: 0.3125rem;
}
.Pagination-nav {
display: flex;

Просмотреть файл

@ -56,9 +56,26 @@
{{end}}
</span>
</div>
</div>
{{end}}
{{end}}
{{with .SameModule}}
<div class="SearchSnippet-sub">
<span>{{.Heading}}</span>
{{range $i, $v := .Links}}
<a href="/{{$v.Href}}" data-gtmc="search result" data-gtmv="{{$i}}">{{$v.Body}}</a>
{{end}}
<span>{{.Suffix}}</span>
</div>
{{end}}
{{with .LowerMajor}}
<div class="SearchSnippet-sub">
<span>{{.Heading}}</span>
{{range $i, $v := .Links}}
<a href="/{{$v.Href}}" data-gtmc="search result" data-gtmv="{{$i}}">{{$v.Body}}</a>
{{end}}
</div>
{{end}}
</div> <!-- SearchSnippet -->
{{end}} {{/* range over Results */}}
{{end}} {{/* else */}}
</div>
<div class="SearchResults-footer">
{{template "pagination_nav" .Pagination}}

Просмотреть файл

@ -6,6 +6,7 @@ package internal
import (
"path"
"strconv"
"strings"
"time"
@ -105,6 +106,26 @@ func MajorVersionForModule(modulePath string) string {
return strings.TrimLeft(v, "/.")
}
// SeriesPathAndMajorVersion splits modulePath into a series path and a
// numeric major version.
// If the path doesn't have a "vN" suffix, it returns 1.
// If the module path is invalid, it returns ("", 0).
func SeriesPathAndMajorVersion(modulePath string) (string, int) {
seriesPath, v, ok := module.SplitPathVersion(modulePath)
if !ok {
return "", 0
}
if v == "" {
return seriesPath, 1
}
// First two characters are either ".v" or "/v".
n, err := strconv.Atoi(v[2:])
if err != nil {
return "", 0
}
return seriesPath, n
}
// Suffix returns the suffix of the fullPath. It assumes that basePath is a
// prefix of fullPath. If fullPath and basePath are the same, the empty string
// is returned.
@ -222,6 +243,14 @@ type SearchResult struct {
// NumImportedBy is the number of packages that import PackagePath.
NumImportedBy uint64
// SameModule is a list of SearchResults from the same module as this one,
// with lower scores.
SameModule []*SearchResult
// LowerMajor is a list of SearchResults with the same v1 path but at lower
// major versions of this module.
LowerMajor []*SearchResult
// NumResults is the total number of packages that were returned for this
// search.
NumResults uint64

Просмотреть файл

@ -56,6 +56,26 @@ func TestMajorVersionForModule(t *testing.T) {
}
}
func TestSeriesPathAndMajorVersion(t *testing.T) {
for _, test := range []struct {
in string
wantSeriesPath string
wantVersion int
}{
{"m.com", "m.com", 1},
{"m.com/v123", "m.com", 123},
{"gopkg.in/m.v1", "gopkg.in/m", 1},
{"gopkg.in/m.v35", "gopkg.in/m", 35},
{"m.com/v1.2", "", 0},
} {
gotSeriesPath, gotVersion := SeriesPathAndMajorVersion(test.in)
if gotSeriesPath != test.wantSeriesPath || gotVersion != test.wantVersion {
t.Errorf("SeriesPathAndMajorVersion(%q) = (%s, %d), want (%s, %d)",
test.in, gotSeriesPath, gotVersion, test.wantSeriesPath, test.wantVersion)
}
}
}
func TestV1Path(t *testing.T) {
for _, test := range []struct {
modulePath, suffix string

Просмотреть файл

@ -9,6 +9,7 @@ const (
ExperimentDeprecatedDoc = "deprecated-doc"
ExperimentInsertSymbolHistory = "insert-symbol-history"
ExperimentReadSymbolHistory = "read-symbol-history"
ExperimentSearchGrouping = "search-grouping"
ExperimentSymbolHistoryVersionsPage = "symbol-history-versions-page"
ExperimentSymbolHistoryMainPage = "symbol-history-main-page"
)
@ -17,6 +18,7 @@ const (
// a description of each experiment.
var Experiments = map[string]string{
ExperimentDeprecatedDoc: "Treat deprecated symbols specially in documentation.",
ExperimentSearchGrouping: "Group search results.",
ExperimentInsertSymbolHistory: "Insert data into the symbol_history table.",
ExperimentReadSymbolHistory: "Read data from the symbol_history table.",
ExperimentSymbolHistoryVersionsPage: "Show package API history on the versions page.",

Просмотреть файл

@ -11,6 +11,7 @@ import (
"math"
"net/http"
"path"
"sort"
"strings"
"time"
"unicode/utf8"
@ -43,6 +44,14 @@ type SearchResult struct {
CommitTime string
NumImportedBy int
Approximate bool
SameModule *subResult // package paths in the same module
LowerMajor *subResult // package paths in lower major versions
}
type subResult struct {
Heading string
Links []link
Suffix string
}
// fetchSearchPage fetches data matching the search query from the database and
@ -65,6 +74,8 @@ func fetchSearchPage(ctx context.Context, db *postgres.DB, query string, pagePar
Licenses: r.Licenses,
CommitTime: elapsedTime(r.CommitTime),
NumImportedBy: int(r.NumImportedBy),
SameModule: packagePaths("Other packages in module "+r.ModulePath+":", r.SameModule, 5),
LowerMajor: modulePaths("Lower module versions:", r.LowerMajor),
})
}
@ -82,7 +93,17 @@ func fetchSearchPage(ctx context.Context, db *postgres.DB, query string, pagePar
}
}
pgs := newPagination(pageParams, len(results), numResults)
numPageResults := 0
for _, r := range dbresults {
// Grouping will put some results inside others. Each result counts one
// for itself plus one for each sub-result in the SameModule list,
// because each of those is removed from the top-level slice. Results in
// the LowerMajor list are not removed from the top-level slice,
// so we don't add them up.
numPageResults += 1 + len(r.SameModule)
}
pgs := newPagination(pageParams, numPageResults, numResults)
pgs.Approximate = approximate
return &SearchPage{
Results: results,
@ -103,6 +124,55 @@ func approximateNumber(estimate int, sigma float64) int {
return int(unit * math.Round(float64(estimate)/unit))
}
func packagePaths(heading string, rs []*internal.SearchResult, max int) *subResult {
if len(rs) == 0 {
return nil
}
var links []link
for i, r := range rs {
if i >= max {
break
}
links = append(links, link{Href: r.PackagePath, Body: internal.Suffix(r.PackagePath, r.ModulePath)})
}
suffix := ""
if len(rs) > len(links) {
suffix = fmt.Sprintf("(and %d more)", len(rs)-len(links))
}
return &subResult{
Heading: heading,
Links: links,
Suffix: suffix,
}
}
func modulePaths(heading string, rs []*internal.SearchResult) *subResult {
if len(rs) == 0 {
return nil
}
mpm := map[string]bool{}
for _, r := range rs {
mpm[r.ModulePath] = true
}
var mps []string
for m := range mpm {
mps = append(mps, m)
}
sort.Slice(mps, func(i, j int) bool {
_, v1 := internal.SeriesPathAndMajorVersion(mps[i])
_, v2 := internal.SeriesPathAndMajorVersion(mps[j])
return v1 > v2
})
links := make([]link, len(mps))
for i, m := range mps {
links[i] = link{Href: m, Body: m}
}
return &subResult{
Heading: heading,
Links: links,
}
}
// Search constraints.
const (
// maxSearchQueryLength represents the max number of characters that a search

Просмотреть файл

@ -9,11 +9,9 @@ import (
"database/sql"
"errors"
"fmt"
"strconv"
"strings"
"github.com/lib/pq"
"golang.org/x/mod/module"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/database"
"golang.org/x/pkgsite/internal/derrors"
@ -58,9 +56,9 @@ func (db *DB) GetLatestMajorPathForV1Path(ctx context.Context, v1path string) (_
// Keep only the N following vN.
suffix := internal.Suffix(v1path, sp)
modPath := strings.TrimSuffix(p, "/"+suffix)
i, err := modulePathMajorVersion(modPath)
if err != nil {
return "", 0, err
_, i := internal.SeriesPathAndMajorVersion(modPath)
if i == 0 {
return "", 0, fmt.Errorf("bad module path %q", modPath)
}
if maj <= i {
maj = i
@ -74,23 +72,6 @@ func (db *DB) GetLatestMajorPathForV1Path(ctx context.Context, v1path string) (_
return majPath, maj, nil
}
// modulePathMajorVersion returns the numeric major version for the given module path.
// If the path has no "vN" suffix, it returns 1.
func modulePathMajorVersion(modulePath string) (int, error) {
_, m, ok := module.SplitPathVersion(modulePath)
if !ok {
return 0, fmt.Errorf("bad module path %q", modulePath)
}
if m == "" {
return 1, nil
}
// First two characters are either ".v" or "/v".
if len(m) < 3 {
return 0, fmt.Errorf("bad version %q from module.SplitPathVersion(%q)", m, modulePath)
}
return strconv.Atoi(m[2:])
}
// upsertPath adds path into the paths table if it does not exist, and returns
// its ID either way.
// It assumes it is running inside a transaction.

Просмотреть файл

@ -84,26 +84,6 @@ func TestGetLatestMajorPathForV1Path(t *testing.T) {
}
}
func TestModulePathMajorVersion(t *testing.T) {
for _, test := range []struct {
in string
want int
}{
{"m.com", 1},
{"m.com/v123", 123},
{"gopkg.in/m.v1", 1},
{"gopkg.in/m.v35", 35},
} {
got, err := modulePathMajorVersion(test.in)
if err != nil {
t.Fatalf("%s: %v", test.in, err)
}
if got != test.want {
t.Errorf("%s: got %d, want %d", test.in, got, test.want)
}
}
}
func TestUpsertPathConcurrently(t *testing.T) {
// Verify that we get no constraint violations or other errors when
// the same path is upserted multiple times concurrently.

Просмотреть файл

@ -23,6 +23,7 @@ import (
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/database"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/experiment"
"golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/stdlib"
)
@ -114,7 +115,13 @@ var searchers = map[string]searcher{
// the penalty of a deep search that scans nearly every package.
func (db *DB) Search(ctx context.Context, q string, limit, offset, maxResultCount int) (_ []*internal.SearchResult, err error) {
defer derrors.WrapStack(&err, "DB.Search(ctx, %q, %d, %d)", q, limit, offset)
resp, err := db.hedgedSearch(ctx, q, limit, offset, maxResultCount, searchers, nil)
queryLimit := limit
if experiment.IsActive(ctx, internal.ExperimentSearchGrouping) {
// Gather extra results for better grouping by module and series.
queryLimit *= 5
}
resp, err := db.hedgedSearch(ctx, q, queryLimit, offset, maxResultCount, searchers, nil)
if err != nil {
return nil, err
}
@ -129,6 +136,12 @@ func (db *DB) Search(ctx context.Context, q string, limit, offset, maxResultCoun
results = append(results, r)
}
}
if experiment.IsActive(ctx, internal.ExperimentSearchGrouping) {
results = groupSearchResults(results)
}
if len(results) > limit {
results = results[:limit]
}
return results, nil
}
@ -391,6 +404,56 @@ func sortAndDedup(s []string) []string {
return r
}
// groupSearchResults groups and re-orders the list of SearchResults by module
// and series path and returns a new list of SearchResults.
//
// The second and later packages from a module are grouped under the first package,
// and removed from the top-level list.
//
// Higher major versions of a module are put before lower ones.
//
// Packages from lower major versions of the module are grouped under the first
// package of the highest major version. But they are not removed from the
// top-level list.
func groupSearchResults(rs []*internal.SearchResult) []*internal.SearchResult {
// Put higher major versions first, otherwise observing score.
sort.Slice(rs, func(i, j int) bool {
sp1, v1 := internal.SeriesPathAndMajorVersion(rs[i].ModulePath)
sp2, v2 := internal.SeriesPathAndMajorVersion(rs[j].ModulePath)
if sp1 != sp2 {
return rs[i].Score > rs[j].Score
}
return v1 > v2
})
modules := map[string]*internal.SearchResult{} // from module path to first result
series := map[string]*internal.SearchResult{} // for series path to first result
var results []*internal.SearchResult
for _, r := range rs {
f := modules[r.ModulePath]
if f == nil {
// First result with this module path; remember it and keep it.
modules[r.ModulePath] = r
results = append(results, r)
} else {
// Record this result under the first result.
f.SameModule = append(f.SameModule, r)
}
seriesPath := internal.SeriesPathForModule(r.ModulePath)
f = series[seriesPath]
if f == nil {
// First time we've seen anything from this series: remember it.
series[seriesPath] = r
} else if r.ModulePath != f.ModulePath {
// Result is from a different (lower) major version. Record this result
// under the first.
f.LowerMajor = append(f.LowerMajor, r)
}
}
return results
}
var upsertSearchStatement = fmt.Sprintf(`
INSERT INTO search_documents (
package_path,

Просмотреть файл

@ -1191,3 +1191,39 @@ func TestDeleteOlderVersionFromSearch(t *testing.T) {
insert(mod)
check(mod)
}
func TestGroupSearchResults(t *testing.T) {
rs := []*internal.SearchResult{
{PackagePath: "m.com/p", ModulePath: "m.com", Score: 10},
{PackagePath: "m.com/p2", ModulePath: "m.com", Score: 8},
{PackagePath: "m.com/v2/p", ModulePath: "m.com/v2", Score: 6},
{PackagePath: "m.com/v2/p2", ModulePath: "m.com/v2", Score: 4},
}
got := groupSearchResults(rs)
sp2 := &internal.SearchResult{
PackagePath: "m.com/p2",
ModulePath: "m.com",
Score: 8,
}
sp := &internal.SearchResult{
PackagePath: "m.com/p",
ModulePath: "m.com",
Score: 10,
SameModule: []*internal.SearchResult{sp2},
}
want := []*internal.SearchResult{
{
PackagePath: "m.com/v2/p",
ModulePath: "m.com/v2",
Score: 6,
SameModule: []*internal.SearchResult{
{PackagePath: "m.com/v2/p2", ModulePath: "m.com/v2", Score: 4},
},
LowerMajor: []*internal.SearchResult{sp, sp2},
},
sp,
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("mismatch (-want, +got)\n%s", diff)
}
}