internal/postgres: change logic for upsertSearchDocumentSymbols

upsertSearchDocumentSymbols is changed based on the new schema.

For golang/go#44142

Change-Id: I1857b04cceeb9b702e4c54fc3a8d39efa811a3ce
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/329493
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
This commit is contained in:
Julie Qiu 2021-06-18 08:35:06 -10:00
Родитель e011f40052
Коммит de3c212715
5 изменённых файлов: 57 добавлений и 103 удалений

Просмотреть файл

@ -139,7 +139,6 @@ func (db *DB) saveModule(ctx context.Context, m *internal.Module, lmv *internal.
if err := insertSymbols(ctx, tx, m.ModulePath, m.Version, isLatest, pathToID, pathToUnitID, pathToDocs); err != nil {
return err
}
if !isLatest {
return nil
}
@ -179,7 +178,14 @@ func (db *DB) saveModule(ctx context.Context, m *internal.Module, lmv *internal.
return nil
}
// Insert the module's packages into search_documents.
return upsertSearchDocuments(ctx, tx, m)
if err := upsertSearchDocuments(ctx, tx, m); err != nil {
return err
}
var unitIDs []int
for _, uid := range pathToUnitID {
unitIDs = append(unitIDs, uid)
}
return upsertSymbolSearchDocuments(ctx, tx, m.ModulePath, m.Version, unitIDs)
})
if err != nil {
return false, err

Просмотреть файл

@ -10,11 +10,9 @@ import (
"fmt"
"github.com/Masterminds/squirrel"
"github.com/lib/pq"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/database"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/experiment"
"golang.org/x/pkgsite/internal/middleware"
)
@ -99,98 +97,3 @@ func collectSymbolHistory(check func(sh *internal.SymbolHistory, sm internal.Sym
return nil
}
}
func upsertSearchDocumentSymbols(ctx context.Context, ddb *database.DB,
packagePath, modulePath, v string) (err error) {
defer derrors.Wrap(&err, "upsertSearchDocumentSymbols(ctx, ddb, %q, %q, %q)", packagePath, modulePath, v)
defer middleware.ElapsedStat(ctx, "upsertSearchDocumentSymbols")()
if !experiment.IsActive(ctx, internal.ExperimentInsertSymbolSearchDocuments) {
return nil
}
// If a user is looking for the symbol "DB.Begin", from package
// database/sql, we want them to be able to find this by searching for
// "DB.Begin" and "sql.DB.Begin". Searching for "sql.DB", "DB", "Begin" or
// "sql.DB" will not return "DB.Begin".
query := packageSymbolQueryJoin(squirrel.Select(
"p1.id AS package_path_id",
"s1.id AS symbol_name_id",
// Group the build contexts as an array, with the format
// "<goos>/<goarch>". We only care about the build contexts when the
// default goos/goarch for the package page does not contain the
// matching symbol.
//
// TODO(https://golang/issue/44142): We could probably get away with
// storing just the GOOS value, since we don't really need the GOARCH
// to link to a symbol page. If we do that we should also change the
// column type to []goos.
//
// Store in order of the build context list at internal.BuildContexts.
`ARRAY_AGG(FORMAT('%s/%s', d.goos, d.goarch)
ORDER BY
CASE WHEN d.goos='linux' THEN 0
WHEN d.goos='windows' THEN 1
WHEN d.goos='darwin' THEN 2
WHEN d.goos='js' THEN 3 END)`,
// If a user is looking for the symbol "DB.Begin", from package
// database/sql, we want them to be able to find this by searching for
// "DB.Begin", "Begin", and "sql.DB.Begin". Searching for "sql.DB" or
// "DB" will not return "DB.Begin".
//
// Index <package>.<identifier> (i.e. "sql.DB.Begin")
`SETWEIGHT(
TO_TSVECTOR('simple', concat(s1.name, ' ', concat(u.name, '.', s1.name))),
'A') ||`+
// Index <identifier>, including the parent name (i.e. DB.Begin).
`SETWEIGHT(
TO_TSVECTOR('simple', s1.name),
'A') ||`+
// Index <identifier> without parent name (i.e. "Begin").
//
// This is weighted less, so that if other symbols are just named
// "Begin" they will rank higher in a search for "Begin".
`SETWEIGHT(
TO_TSVECTOR('simple', split_part(s1.name, '.', 2)),
'B') AS tokens`,
), packagePath, modulePath).
Where(squirrel.Eq{"m.version": v}).
GroupBy("p1.id, s1.id", "tokens").
OrderBy("s1.name")
q, args, err := query.PlaceholderFormat(squirrel.Dollar).ToSql()
if err != nil {
return err
}
var values []interface{}
collect := func(rows *sql.Rows) (err error) {
var (
packagePathID int
symbolNameID int
tokens string
buildContexts []string
)
if err := rows.Scan(
&packagePathID,
&symbolNameID,
pq.Array(&buildContexts),
&tokens,
); err != nil {
return fmt.Errorf("row.Scan(): %v", err)
}
values = append(values, packagePathID, symbolNameID, pq.Array(buildContexts), tokens)
return nil
}
if err := ddb.RunQuery(ctx, q, collect, args...); err != nil {
return err
}
columns := []string{"package_path_id", "symbol_name_id", "build_contexts", "tsv_symbol_tokens"}
return ddb.BulkInsert(ctx, "symbol_search_documents", columns, values,
`ON CONFLICT (package_path_id, symbol_name_id)
DO UPDATE
SET
build_contexts=excluded.build_contexts,
tsv_symbol_tokens=excluded.tsv_symbol_tokens`)
}

Просмотреть файл

@ -690,9 +690,6 @@ func upsertSearchDocuments(ctx context.Context, ddb *database.DB, mod *internal.
if err := UpsertSearchDocument(ctx, ddb, args); err != nil {
return err
}
if err := upsertSearchDocumentSymbols(ctx, ddb, pkg.Path, mod.ModulePath, mod.Version); err != nil {
return err
}
}
return nil
}

Просмотреть файл

@ -341,7 +341,6 @@ func testSearch(t *testing.T, ctx context.Context) {
}
func TestSymbolSearch(t *testing.T) {
t.Skip()
ctx := context.Background()
ctx = experiment.NewContext(ctx, internal.ExperimentInsertSymbolSearchDocuments)
testDB, release := acquire(t)

Просмотреть файл

@ -56,6 +56,55 @@ func insertSymbols(ctx context.Context, tx *database.DB, modulePath, v string,
return nil
}
func upsertSymbolSearchDocuments(ctx context.Context, tx *database.DB, modulePath, v string, unitIDs []int) (err error) {
defer derrors.Wrap(&err, "upsertSymbolSearchDocuments(ctx, ddb, %q, %q)", modulePath, v)
if !experiment.IsActive(ctx, internal.ExperimentInsertSymbolSearchDocuments) {
return nil
}
// If a user is looking for the symbol "DB.Begin", from package
// database/sql, we want them to be able to find this by searching for
// "DB.Begin" and "sql.DB.Begin". Searching for "sql.DB", "DB", "Begin" or
// "sql.DB" will not return "DB.Begin".
// If a user is looking for the symbol "DB.Begin", from package
// database/sql, we want them to be able to find this by searching for
// "DB.Begin", "Begin", and "sql.DB.Begin". Searching for "sql.DB" or
// "DB" will not return "DB.Begin".
q := `
INSERT INTO symbol_search_documents
(package_path_id, symbol_name_id, unit_id, tsv_symbol_tokens)
SELECT
u.path_id,
s.id,
u.id,` +
// Index <package>.<identifier> (i.e. "sql.DB.Begin")
`SETWEIGHT( TO_TSVECTOR('simple', concat(s.name, ' ', concat(u.name, '.', s.name))), 'A') ||` +
// Index <identifier>, including the parent name (i.e. DB.Begin).
`SETWEIGHT( TO_TSVECTOR('simple', s.name), 'A') ||` +
// Index <identifier> without parent name (i.e. "Begin").
//
// This is weighted less, so that if other symbols are just named
// "Begin" they will rank higher in a search for "Begin".
`SETWEIGHT( TO_TSVECTOR('simple', split_part(s.name, '.', 2)), 'B') AS tokens` +
`
FROM symbol_names s
INNER JOIN package_symbols ps ON s.id = ps.symbol_name_id
INNER JOIN documentation_symbols ds ON ps.id = ds.package_symbol_id
INNER JOIN documentation d ON d.id = ds.documentation_id
INNER JOIN units u ON u.id = d.unit_id
WHERE u.id = ANY($1)
-- We will get a row for every unit/symbol/goos/goarch, but we only
-- care about the unit/symbol.
GROUP BY s.id, u.id, u.path_id
ON CONFLICT (package_path_id, symbol_name_id)
DO UPDATE SET
unit_id=excluded.unit_id,
tsv_symbol_tokens=excluded.tsv_symbol_tokens`
_, err = tx.Exec(ctx, q, pq.Array(unitIDs))
return err
}
type packageSymbol struct {
name string
synopsis string