internal/postgres: upsert module data

When we insert module information, make sure we overwrite existing
rows. Previously, we specified that new data should be ignored if the
row already existed, which meant that we were adding rows but never
changing them. That didn't matter at one point, when we deleted a
module before re-inserting it, but we no longer delete, so we must
upsert.

We also fix a number of places where our test modules had duplicate
directories. The upserts failed for these.

Change-Id: I97465b11e4ea6cbb7835e883f36f098445207eba
Reviewed-on: https://team-review.git.corp.google.com/c/golang/discovery/+/766365
Reviewed-by: Julie Qiu <julieqiu@google.com>
This commit is contained in:
Jonathan Amsterdam 2020-06-09 15:14:49 -04:00
Родитель 1321339c89
Коммит 45c8867497
6 изменённых файлов: 43 добавлений и 32 удалений

Просмотреть файл

@ -19,7 +19,6 @@ func moduleDirectories(modulePath string,
pkgs []*internal.LegacyPackage,
readmes []*internal.Readme,
d *licenses.Detector) []*internal.DirectoryNew {
pkgLookup := map[string]*internal.LegacyPackage{}
for _, pkg := range pkgs {
pkgLookup[pkg.Path] = pkg

Просмотреть файл

@ -396,6 +396,7 @@ func TestGetModuleLicenses(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
defer cancel()
testModule.Licenses = nil
for _, p := range testModule.LegacyPackages {
testModule.Licenses = append(testModule.Licenses, &licenses.License{
Metadata: p.Licenses[0],

Просмотреть файл

@ -294,18 +294,16 @@ func TestGetDirectoryNew(t *testing.T) {
// Add a module that has READMEs in a directory and a package.
m := sample.Module("a.com/m", "v1.2.3", "dir/p")
d := sample.DirectoryNewEmpty("a.com/m/dir")
d := findDirectory(m, "a.com/m/dir")
d.Readme = &internal.Readme{
Filepath: "DIR_README.md",
Contents: "dir readme",
}
m.Directories = append(m.Directories, d)
d = sample.DirectoryNewEmpty("a.com/m/dir/p")
d = findDirectory(m, "a.com/m/dir/p")
d.Readme = &internal.Readme{
Filepath: "PKG_README.md",
Contents: "pkg readme",
}
m.Directories = append(m.Directories, d)
if err := testDB.InsertModule(ctx, m); err != nil {
t.Fatal(err)
}
@ -443,6 +441,15 @@ func TestGetDirectoryNew(t *testing.T) {
}
}
func findDirectory(m *internal.Module, path string) *internal.DirectoryNew {
for _, d := range m.Directories {
if d.Path == path {
return d
}
}
return nil
}
func TestGetDirectoryFieldSet(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
defer cancel()

Просмотреть файл

@ -237,8 +237,8 @@ func insertLicenses(ctx context.Context, db *database.DB, m *internal.Module, mo
"coverage",
"module_id",
}
return db.BulkInsert(ctx, "licenses", licenseCols, licenseValues,
database.OnConflictDoNothing)
return db.BulkUpsert(ctx, "licenses", licenseCols, licenseValues,
[]string{"module_path", "version", "file_path"})
}
return nil
}
@ -304,6 +304,7 @@ func insertPackages(ctx context.Context, db *database.DB, m *internal.Module) (e
}
}
if len(pkgValues) > 0 {
uniqueCols := []string{"path", "module_path", "version"}
pkgCols := []string{
"path",
"synopsis",
@ -319,7 +320,7 @@ func insertPackages(ctx context.Context, db *database.DB, m *internal.Module) (e
"goarch",
"commit_time",
}
if err := db.BulkInsert(ctx, "packages", pkgCols, pkgValues, database.OnConflictDoNothing); err != nil {
if err := db.BulkUpsert(ctx, "packages", pkgCols, pkgValues, uniqueCols); err != nil {
return err
}
}
@ -331,7 +332,7 @@ func insertPackages(ctx context.Context, db *database.DB, m *internal.Module) (e
"from_version",
"to_path",
}
if err := db.BulkInsert(ctx, "imports", importCols, importValues, database.OnConflictDoNothing); err != nil {
if err := db.BulkUpsert(ctx, "imports", importCols, importValues, importCols); err != nil {
return err
}
}
@ -363,7 +364,7 @@ func insertImportsUnique(ctx context.Context, tx *database.DB, m *internal.Modul
return nil
}
cols := []string{"from_path", "from_module_path", "to_path"}
return tx.BulkInsert(ctx, "imports_unique", cols, values, database.OnConflictDoNothing)
return tx.BulkUpsert(ctx, "imports_unique", cols, values, cols)
}
func insertDirectories(ctx context.Context, db *database.DB, m *internal.Module, moduleID int) (err error) {
@ -451,7 +452,9 @@ func insertDirectories(ctx context.Context, db *database.DB, m *internal.Module,
}
log.Debugf(ctx, "memory before inserting into paths: %dM", allocMeg())
if err := db.BulkInsertReturning(ctx, "paths", pathCols, pathValues, database.OnConflictDoNothing, []string{"id", "path"}, func(rows *sql.Rows) error {
uniqueCols := []string{"path", "module_id"}
returningCols := []string{"id", "path"}
if err := db.BulkUpsertReturning(ctx, "paths", pathCols, pathValues, uniqueCols, returningCols, func(rows *sql.Rows) error {
var (
pathID int
path string
@ -484,7 +487,7 @@ func insertDirectories(ctx context.Context, db *database.DB, m *internal.Module,
readmeValues = append(readmeValues, id, readme.Filepath, makeValidUnicode(readme.Contents))
}
readmeCols := []string{"path_id", "file_path", "contents"}
if err := db.BulkInsert(ctx, "readmes", readmeCols, readmeValues, database.OnConflictDoNothing); err != nil {
if err := db.BulkUpsert(ctx, "readmes", readmeCols, readmeValues, []string{"path_id"}); err != nil {
return err
}
}
@ -500,14 +503,9 @@ func insertDirectories(ctx context.Context, db *database.DB, m *internal.Module,
id := pathToID[path]
docValues = append(docValues, id, doc.GOOS, doc.GOARCH, doc.Synopsis, makeValidUnicode(doc.HTML))
}
docCols := []string{
"path_id",
"goos",
"goarch",
"synopsis",
"html",
}
if err := db.BulkInsert(ctx, "documentation", docCols, docValues, database.OnConflictDoNothing); err != nil {
uniqueCols := []string{"path_id", "goos", "goarch"}
docCols := append(uniqueCols, "synopsis", "html")
if err := db.BulkUpsert(ctx, "documentation", docCols, docValues, uniqueCols); err != nil {
return err
}
}
@ -525,7 +523,7 @@ func insertDirectories(ctx context.Context, db *database.DB, m *internal.Module,
}
}
importCols := []string{"path_id", "to_path"}
return db.BulkInsert(ctx, "package_imports", importCols, importValues, database.OnConflictDoNothing)
return db.BulkUpsert(ctx, "package_imports", importCols, importValues, importCols)
}
// lock obtains an exclusive, transaction-scoped advisory lock on modulePath.

Просмотреть файл

@ -42,13 +42,8 @@ func TestInsertModule(t *testing.T) {
module: sample.DefaultModule(),
},
{
name: "valid test with internal package",
module: func() *internal.Module {
m := sample.Module(sample.ModulePath, sample.VersionString, "internal/ffoo")
m.Directories = append(m.Directories,
sample.DirectoryNewEmpty(sample.ModulePath+"/internal"))
return m
}(),
name: "valid test with internal package",
module: sample.Module(sample.ModulePath, sample.VersionString, "internal/ffoo"),
},
{
name: "valid test with go.mod missing",

Просмотреть файл

@ -146,12 +146,23 @@ func Module(modulePath, version string, suffixes ...string) *internal.Module {
LegacyModuleInfo: *mi,
LegacyPackages: nil,
Licenses: Licenses,
Directories: []*internal.DirectoryNew{
DirectoryNewForModuleRoot(mi, LicenseMetadata),
},
}
emptySuffix := false
for _, s := range suffixes {
if s == "" {
emptySuffix = true
break
}
}
if emptySuffix {
AddPackage(m, LegacyPackage(modulePath, ""))
} else {
m.Directories = []*internal.DirectoryNew{DirectoryNewForModuleRoot(mi, LicenseMetadata)}
}
for _, s := range suffixes {
AddPackage(m, LegacyPackage(modulePath, s))
if s != "" {
AddPackage(m, LegacyPackage(modulePath, s))
}
}
return m
}