internal/postgres: on insert, delete only old packages from search

Instead of deleting the entire module from search_documents on insert,
just delete packages that are not in the current version. That will
avoid resetting imported-by counts to zero when we reprocess the
latest version of a module.

For golang/go#47555

Change-Id: Ie2aedc920f0e840c20f4487ed10bf7801b43a3f8
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/341269
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
This commit is contained in:
Jonathan Amsterdam 2021-08-10 13:58:50 -04:00
Родитель 6589d1fa54
Коммит 2b0bc991dd
3 изменённых файлов: 56 добавлений и 12 удалений

Просмотреть файл

@ -149,10 +149,7 @@ func (db *DB) saveModule(ctx context.Context, m *internal.Module, lmv *internal.
return err
}
// Delete this module from search_documents completely. Below we'll
// insert the packages from this module. This will effectively remove
// packages from older module versions that are not in the latest one.
if err := deleteModuleFromSearchDocuments(ctx, tx, m.ModulePath, nil); err != nil {
if err := deleteOtherModulePackagesFromSearchDocuments(ctx, tx, m); err != nil {
return err
}

Просмотреть файл

@ -1092,6 +1092,28 @@ func (db *DB) DeleteOlderVersionFromSearchDocuments(ctx context.Context, moduleP
})
}
// deleteOtherModulePackagesFromSearchDocuments deletes all packages from search
// documents with the given module that are not in m.
func deleteOtherModulePackagesFromSearchDocuments(ctx context.Context, tx *database.DB, m *internal.Module) error {
dbPkgs, err := tx.CollectStrings(ctx, `
SELECT package_path FROM search_documents WHERE module_path = $1
`, m.ModulePath)
if err != nil {
return err
}
pkgInModule := map[string]bool{}
for _, u := range m.Packages() {
pkgInModule[u.Path] = true
}
var otherPkgs []string
for _, p := range dbPkgs {
if !pkgInModule[p] {
otherPkgs = append(otherPkgs, p)
}
}
return deleteModuleFromSearchDocuments(ctx, tx, m.ModulePath, otherPkgs)
}
// deleteModuleFromSearchDocuments deletes module_path from search_documents.
// If packages is non-empty, it only deletes those packages.
func deleteModuleFromSearchDocuments(ctx context.Context, tx *database.DB, modulePath string, packages []string) error {

Просмотреть файл

@ -1341,10 +1341,10 @@ func TestDeleteFromSearch(t *testing.T) {
const modulePath = "deleteme.com"
initial := []searchDocumentRow{
{modulePath + "/p1", modulePath, "v0.0.9"}, // oldest version of same module
{modulePath + "/p2", modulePath, "v1.1.0"}, // older version of same module
{modulePath + "/p4", modulePath, "v1.9.0"}, // newer version of same module
{"other.org/p2", "other.org", "v1.1.0"}, // older version of a different module
{modulePath + "/p1", modulePath, "v0.0.9", 0}, // oldest version of same module
{modulePath + "/p2", modulePath, "v1.1.0", 0}, // older version of same module
{modulePath + "/p4", modulePath, "v1.9.0", 0}, // newer version of same module
{"other.org/p2", "other.org", "v1.1.0", 0}, // older version of a different module
}
insertInitial := func(db *DB) {
@ -1367,8 +1367,8 @@ func TestDeleteFromSearch(t *testing.T) {
}
checkSearchDocuments(ctx, t, testDB, []searchDocumentRow{
{modulePath + "/p4", modulePath, "v1.9.0"}, // newer version not deleted
{"other.org/p2", "other.org", "v1.1.0"}, // other module not deleted
{modulePath + "/p4", modulePath, "v1.9.0", 0}, // newer version not deleted
{"other.org/p2", "other.org", "v1.1.0", 0}, // other module not deleted
})
})
t.Run("deleteModuleFromSearchDocuments", func(t *testing.T) {
@ -1381,18 +1381,43 @@ func TestDeleteFromSearch(t *testing.T) {
t.Fatal(err)
}
checkSearchDocuments(ctx, t, testDB, []searchDocumentRow{
{"other.org/p2", "other.org", "v1.1.0"}, // other module not deleted
{"other.org/p2", "other.org", "v1.1.0", 0}, // other module not deleted
})
})
t.Run("deleteOtherModulePackagesFromSearchDocuments", func(t *testing.T) {
testDB, release := acquire(t)
defer release()
// Insert a module with two packages.
m0 := sample.Module(modulePath, "v1.0.0", "p1", "p2")
MustInsertModule(ctx, t, testDB, m0)
// Set the imported-by count to a non-zero value so we can tell which
// rows were deleted.
if _, err := testDB.db.Exec(ctx, `UPDATE search_documents SET imported_by_count = 1`); err != nil {
t.Fatal(err)
}
// Later version of module does not have p1.
m1 := sample.Module(modulePath, "v1.1.0", "p2", "p3")
MustInsertModule(ctx, t, testDB, m1)
// p1 should be gone, p2 should be there with the same
// imported_by_count, and p3 should be there with a zero count.
want := []searchDocumentRow{
{modulePath + "/p2", modulePath, "v1.1.0", 1},
{modulePath + "/p3", modulePath, "v1.1.0", 0},
}
checkSearchDocuments(ctx, t, testDB, want)
})
}
type searchDocumentRow struct {
PackagePath, ModulePath, Version string
ImportedByCount int
}
func readSearchDocuments(ctx context.Context, db *DB) ([]searchDocumentRow, error) {
var rows []searchDocumentRow
err := db.db.CollectStructs(ctx, &rows, `SELECT package_path, module_path, version FROM search_documents`)
err := db.db.CollectStructs(ctx, &rows, `SELECT package_path, module_path, version, imported_by_count FROM search_documents`)
if err != nil {
return nil, err
}