// Copyright 2021 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package postgres import ( "context" "database/sql" "fmt" "sort" "github.com/Masterminds/squirrel" "github.com/lib/pq" "golang.org/x/pkgsite/internal" "golang.org/x/pkgsite/internal/database" "golang.org/x/pkgsite/internal/derrors" "golang.org/x/pkgsite/internal/log" "golang.org/x/pkgsite/internal/version" ) func insertSymbols(ctx context.Context, tx *database.DB, modulePath, v string, isLatest bool, pathToID map[string]int, pathToUnitID map[string]int, pathToDocs map[string][]*internal.Documentation) (err error) { defer derrors.WrapStack(&err, "insertSymbols(ctx, db, %q, %q, pathToID, pathToDocs)", modulePath, v) defer internal.RequestState(ctx, "inserting symbols")() // Only update symbol history if the version type is release. versionType, err := version.ParseType(v) if err != nil { return err } if versionType != version.TypeRelease && !isLatest { return nil } modulePathID := pathToID[modulePath] if modulePathID == 0 { return fmt.Errorf("modulePathID cannot be 0: %q", modulePath) } pathToDocIDToDoc, err := getDocIDsForPath(ctx, tx, pathToUnitID, pathToDocs) if err != nil { return err } nameToID, err := upsertSymbolNamesReturningIDs(ctx, tx, pathToDocIDToDoc) if err != nil { return err } pathToPkgsymToID, err := upsertPackageSymbolsReturningIDs(ctx, tx, modulePathID, pathToID, nameToID, pathToDocIDToDoc) if err != nil { return err } if err := upsertDocumentationSymbols(ctx, tx, pathToPkgsymToID, pathToDocIDToDoc); err != nil { return err } if versionType == version.TypeRelease { if err := upsertSymbolHistory(ctx, tx, modulePath, v, nameToID, pathToID, pathToPkgsymToID, pathToDocIDToDoc); err != nil { return err } } if isLatest { return deleteOldSymbolSearchDocuments(ctx, tx, modulePathID, pathToID, pathToDocIDToDoc, pathToPkgsymToID) } return nil } type packageSymbol struct { name string synopsis string // parentName is a unique key in packageSymbol because the section can change // with the name and synopsis remaining the same. For example: // https://pkg.go.dev/go/types@go1.8#Universe is in the Variables section, // with parentName Universe. // https://pkg.go.dev/go/types@go1.16#Universe is in the Types section, // with parentName Scope. // // https://pkg.go.dev/github.com/89z/page@v1.2.1#Help is in the Types // section under type InputMode. // https://pkg.go.dev/github.com/89z/page@v1.1.3#Help is in the Types // section under ScreenMode. parentName string } func upsertDocumentationSymbols(ctx context.Context, db *database.DB, pathToPkgsymID map[string]map[packageSymbol]int, pathToDocIDToDoc map[string]map[int]*internal.Documentation) (err error) { defer derrors.WrapStack(&err, "upsertDocumentationSymbols(ctx, db, pathToPkgsymID, pathToDocIDToDoc)") // Create a map of documentation_id TO package_symbol_id set. // This will be used to verify that all package_symbols for the unit have // been inserted. docIDToPkgsymIDs := map[int]map[int]bool{} for path, docIDToDoc := range pathToDocIDToDoc { for docID, doc := range docIDToDoc { err := updateSymbols(doc.API, func(sm *internal.SymbolMeta) error { pkgsymToID, ok := pathToPkgsymID[path] if !ok { return fmt.Errorf("path could not be found: %q", path) } ps := packageSymbol{synopsis: sm.Synopsis, name: sm.Name, parentName: sm.ParentName} pkgsymID, ok := pkgsymToID[ps] if !ok { return fmt.Errorf("package symbol could not be found: %v", ps) } _, ok = docIDToPkgsymIDs[docID] if !ok { docIDToPkgsymIDs[docID] = map[int]bool{} } docIDToPkgsymIDs[docID][pkgsymID] = true return nil }) if err != nil { return err } } } // Fetch all existing rows in documentation_symbols for this unit using the // documentation IDs. // Keep track of which rows already exist in documentation_symbols using // gotDocIDToPkgsymIDs. var documentationIDs []any for docID := range docIDToPkgsymIDs { documentationIDs = append(documentationIDs, docID) } gotDocIDToPkgsymIDs := map[int]map[int]bool{} collect := func(rows *sql.Rows) error { var id, docID, pkgsymID int if err := rows.Scan(&id, &docID, &pkgsymID); err != nil { return fmt.Errorf("row.Scan(): %v", err) } if !docIDToPkgsymIDs[docID][pkgsymID] { // The package_symbol_id in the documentation_symbols table does // not match the one we want to insert. This can happen if we // change the package_symbol_id. In that case, do not add this to // the map, so that we can upsert below. // // See https://go-review.googlesource.com/c/pkgsite/+/315309 // and https://go-review.googlesource.com/c/pkgsite/+/315310 // where the package_symbol_id was potentially changed. return nil } if _, ok := gotDocIDToPkgsymIDs[docID]; !ok { gotDocIDToPkgsymIDs[docID] = map[int]bool{} } gotDocIDToPkgsymIDs[docID][pkgsymID] = true return nil } if err := db.RunQuery(ctx, ` SELECT ds.id, ds.documentation_id, ds.package_symbol_id FROM documentation_symbols ds WHERE documentation_id = ANY($1);`, collect, pq.Array(documentationIDs)); err != nil { return err } // Get the difference between the documentation_symbols for this package, // and the ones that already exist in the documentation_symbols table. Only // insert rows that do not already exist. // // Sort first to prevent deadlocks. var docIDs []int for docID := range docIDToPkgsymIDs { docIDs = append(docIDs, docID) } sort.Ints(docIDs) var values []any for _, docID := range docIDs { gotSet := gotDocIDToPkgsymIDs[docID] for pkgsymID := range docIDToPkgsymIDs[docID] { if !gotSet[pkgsymID] { values = append(values, docID, pkgsymID) } } } // Upsert the rows. // Note that the order of pkgsymcols must match that of the SELECT query in // the collect function. docsymcols := []string{"documentation_id", "package_symbol_id"} if err := db.BulkInsert(ctx, "documentation_symbols", docsymcols, values, ` ON CONFLICT (documentation_id, package_symbol_id) DO UPDATE SET documentation_id=excluded.documentation_id, package_symbol_id=excluded.package_symbol_id`); err != nil { return err } return nil } func upsertPackageSymbolsReturningIDs(ctx context.Context, db *database.DB, modulePathID int, pathToID map[string]int, nameToID map[string]int, pathToDocIDToDoc map[string]map[int]*internal.Documentation) (_ map[string]map[packageSymbol]int, err error) { defer derrors.WrapStack(&err, "upsertPackageSymbolsReturningIDs(ctx, db, %d, pathToID, pathToDocIDToDoc)", modulePathID) idToPath := map[int]string{} for path, id := range pathToID { idToPath[id] = path } var names []string idToSymbolName := map[int]string{} for name, id := range nameToID { idToSymbolName[id] = name names = append(names, name) } pathTopkgsymToID := map[string]map[packageSymbol]int{} collect := func(rows *sql.Rows) error { var ( id, pathID, symbolID, parentSymbolID int synopsis string ) if err := rows.Scan(&id, &pathID, &symbolID, &parentSymbolID, &synopsis); err != nil { return fmt.Errorf("row.Scan(): %v", err) } path := idToPath[pathID] if _, ok := pathTopkgsymToID[path]; !ok { pathTopkgsymToID[path] = map[packageSymbol]int{} } sym := idToSymbolName[symbolID] if sym == "" { return fmt.Errorf("symbol name cannot be empty: %d", symbolID) } parentSym, ok := idToSymbolName[parentSymbolID] if !ok { // A different variable of this symbol was previously inserted. // Don't add this to pathTopkgsymToID, since it's not the package // symbol that we want. // For example: // https://dev-pkg.go.dev/github.com/fastly/kingpin@v1.2.6#TokenShort // and // https://pkg.go.dev/github.com/fastly/kingpin@v1.3.7#TokenShort // have the same synopsis, but different parents and sections. return nil } pathTopkgsymToID[path][packageSymbol{ synopsis: synopsis, name: sym, parentName: parentSym, }] = id return nil } // This query fetches more that just the package symbols that we want. // The relevant package symbols are filtered above. if err := db.RunQuery(ctx, ` SELECT ps.id, ps.package_path_id, ps.symbol_name_id, ps.parent_symbol_name_id, ps.synopsis FROM package_symbols ps INNER JOIN symbol_names s ON ps.symbol_name_id = s.id WHERE module_path_id = $1 AND s.name = ANY($2);`, collect, modulePathID, pq.Array(names)); err != nil { return nil, err } // Sort to prevent deadlocks. var paths []string for path := range pathToDocIDToDoc { paths = append(paths, path) } sort.Strings(paths) var packageSymbols []any for _, path := range paths { docs := pathToDocIDToDoc[path] pathID := pathToID[path] if pathID == 0 { return nil, fmt.Errorf("pathID cannot be 0: %q", path) } for _, doc := range docs { // Sort to prevent deadlocks. sort.Slice(doc.API, func(i, j int) bool { return doc.API[i].Name < doc.API[j].Name }) if err := updateSymbols(doc.API, func(sm *internal.SymbolMeta) error { ps := packageSymbol{synopsis: sm.Synopsis, name: sm.Name, parentName: sm.ParentName} symID := nameToID[sm.Name] if symID == 0 { return fmt.Errorf("symID cannot be 0: %q", sm.Name) } if sm.ParentName == "" { sm.ParentName = sm.Name } parentID := nameToID[sm.ParentName] if parentID == 0 { return fmt.Errorf("parentSymID cannot be 0: %q", sm.ParentName) } if _, ok := pathTopkgsymToID[path][ps]; !ok { packageSymbols = append(packageSymbols, pathID, modulePathID, symID, parentID, sm.Section, sm.Kind, sm.Synopsis) } return nil }); err != nil { return nil, err } } } // The order of pkgsymcols must match that of the SELECT query in the //collect function. pkgsymcols := []string{"id", "package_path_id", "symbol_name_id", "parent_symbol_name_id", "synopsis"} if err := db.BulkInsertReturning(ctx, "package_symbols", []string{ "package_path_id", "module_path_id", "symbol_name_id", "parent_symbol_name_id", "section", "type", "synopsis", }, packageSymbols, database.OnConflictDoNothing, pkgsymcols, collect); err != nil { return nil, err } return pathTopkgsymToID, nil } func upsertSymbolNamesReturningIDs(ctx context.Context, db *database.DB, pathToDocIDToDocs map[string]map[int]*internal.Documentation) (_ map[string]int, err error) { defer derrors.WrapStack(&err, "upsertSymbolNamesReturningIDs") var names []string for _, docIDToDocs := range pathToDocIDToDocs { for _, doc := range docIDToDocs { if err := updateSymbols(doc.API, func(sm *internal.SymbolMeta) error { names = append(names, sm.Name) return nil }); err != nil { return nil, err } } } sort.Strings(names) nameToID := map[string]int{} collect := func(rows *sql.Rows) error { var ( id int name string ) if err := rows.Scan(&id, &name); err != nil { return fmt.Errorf("row.Scan(): %v", err) } nameToID[name] = id if id == 0 { return fmt.Errorf("id can't be 0: %q", name) } return nil } query := ` SELECT id, name FROM symbol_names WHERE name = ANY($1);` if err := db.RunQuery(ctx, query, collect, pq.Array(names)); err != nil { return nil, err } var values []any for _, name := range names { if _, ok := nameToID[name]; !ok { values = append(values, name) } } if err := db.BulkInsertReturning(ctx, "symbol_names", []string{"name"}, values, database.OnConflictDoNothing, []string{"id", "name"}, collect); err != nil { return nil, err } return nameToID, nil } func updateSymbols(symbols []*internal.Symbol, updateFunc func(sm *internal.SymbolMeta) error) error { for _, s := range symbols { if err := updateFunc(&s.SymbolMeta); err != nil { return err } for _, c := range s.Children { if err := updateFunc(c); err != nil { return err } } } return nil } func deleteOldSymbolSearchDocuments(ctx context.Context, db *database.DB, modulePathID int, pathToID map[string]int, pathToDocIDToDoc map[string]map[int]*internal.Documentation, latestPathToPkgsymToID map[string]map[packageSymbol]int) (err error) { defer derrors.WrapStack(&err, "deleteOldSymbolSearchDocuments(ctx, db, %q, pathToID, pathToDocIDToDoc)", modulePathID) // Get all package_symbol_ids for the latest module (the current one we are // trying to insert). latestPkgsymIDs := map[int]bool{} for path := range pathToID { docs := pathToDocIDToDoc[path] pathID := pathToID[path] if pathID == 0 { return fmt.Errorf("pathID cannot be 0: %q", path) } for _, doc := range docs { err := updateSymbols(doc.API, func(sm *internal.SymbolMeta) error { pkgsymToID, ok := latestPathToPkgsymToID[path] if !ok { return fmt.Errorf("path could not be found: %q", path) } ps := packageSymbol{synopsis: sm.Synopsis, name: sm.Name, parentName: sm.ParentName} pkgsymID, ok := pkgsymToID[ps] if !ok { return fmt.Errorf("package symbol could not be found: %v", ps) } latestPkgsymIDs[pkgsymID] = true return nil }) if err != nil { return err } } } var pathIDs []int for _, id := range pathToID { pathIDs = append(pathIDs, id) } // Fetch package_symbol_id currently in symbol_search_documents. dbPkgSymIDs, err := database.Collect1[int](ctx, db, ` SELECT package_symbol_id FROM symbol_search_documents WHERE package_path_id = ANY($1);`, pq.Array(pathIDs)) if err != nil { return err } var toDelete []int for _, id := range dbPkgSymIDs { if _, ok := latestPkgsymIDs[id]; !ok { toDelete = append(toDelete, id) } } // Delete stale rows. q, args, err := squirrel.Delete("symbol_search_documents"). Where("package_symbol_id = ANY(?)", pq.Array(toDelete)). PlaceholderFormat(squirrel.Dollar).ToSql() if err != nil { return err } n, err := db.Exec(ctx, q, args...) if err != nil { return err } log.Infof(ctx, "deleted %d rows from symbol_search_documents", n) return nil }