cmd/guru: optimize global pkglevel referrer queries

Global, package-level queries can be done without typechecking.
This makes it significantly cheaper.

Instead of typechecking, consider all packages that directly
import the query package. In each of those packages, filter out:

* files that don't contain the query name
* files that don't import the query package

Fully parse the remaining files and look for selector expressions
matching the query package and name.

There are a few twists (dot imports, the query package, xtests).
They are described, along with how they are handled,
in the large comment at the beginning of globalReferrersPkgLevel.

On my machine and GOPATH, this reduces typical time
needed to find referrers to encoding/json.MarshalIndent
from (roughly)

real	0m39.946s
user	2m27.844s
sys	0m54.774s

to (roughly)

real	0m5.687s
user	0m15.793s
sys	0m16.001s

The processing of packages could be parallelized;
that is left for a future change, to ease reviewing.

Benefits from that will be limited;
building the reverse import graph accounts for 73%
of the runtime, and that is dominated by syscalls.

Optimization idea from Alan Donovan.

This work supported by Sourcegraph.

Change-Id: Ib19e25fcdcb27673fb03d7300dba2a53198901ad
Reviewed-on: https://go-review.googlesource.com/97800
Reviewed-by: Alan Donovan <adonovan@google.com>
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Josh Bleecher Snyder 2018-02-28 17:01:09 -08:00 коммит произвёл Alan Donovan
Родитель c47212f6f0
Коммит 1e1ec013b9
1 изменённых файлов: 283 добавлений и 17 удалений

Просмотреть файл

@ -9,17 +9,21 @@ import (
"fmt"
"go/ast"
"go/build"
"go/parser"
"go/token"
"go/types"
"io"
"log"
"os"
"sort"
"strconv"
"strings"
"sync"
"golang.org/x/tools/cmd/guru/serial"
"golang.org/x/tools/go/buildutil"
"golang.org/x/tools/go/loader"
"golang.org/x/tools/imports"
"golang.org/x/tools/refactor/importgraph"
)
@ -79,13 +83,14 @@ func referrers(q *Query) error {
// For a globally accessible object defined in package P, we
// must load packages that depend on P. Specifically, for a
// package-level object, we need load only direct importers
// of P, but for a field or interface method, we must load
// of P, but for a field or method, we must load
// any package that transitively imports P.
if global, pkglevel := classify(obj); global {
global, pkglevel := classify(obj)
if global && !pkglevel {
// We'll use the the object's position to identify it in the larger program.
objposn := fset.Position(obj.Pos())
defpkg := obj.Pkg().Path() // defining package
return globalReferrers(q, qpos.info.Pkg.Path(), defpkg, objposn, pkglevel)
return globalReferrers(q, qpos.info.Pkg.Path(), defpkg, objposn)
}
q.Output(fset, &referrersInitialResult{
@ -93,6 +98,10 @@ func referrers(q *Query) error {
obj: obj,
})
if global {
return globalReferrersPkgLevel(q, obj, fset)
}
outputUses(q, fset, usesOf(obj, qpos.info), obj.Pkg())
return nil // success
@ -216,26 +225,16 @@ func outputUses(q *Query, fset *token.FileSet, refs []*ast.Ident, pkg *types.Pac
}
// globalReferrers reports references throughout the entire workspace to the
// object at the specified source position. Its defining package is defpkg,
// and the query package is qpkg. isPkgLevel indicates whether the object
// is defined at package-level.
func globalReferrers(q *Query, qpkg, defpkg string, objposn token.Position, isPkgLevel bool) error {
// object (a field or method) at the specified source position.
// Its defining package is defpkg, and the query package is qpkg.
func globalReferrers(q *Query, qpkg, defpkg string, objposn token.Position) error {
// Scan the workspace and build the import graph.
// Ignore broken packages.
_, rev, _ := importgraph.Build(q.Build)
// Find the set of packages that depend on defpkg.
// Only function bodies in those packages need type-checking.
var users map[string]bool
if isPkgLevel {
users = rev[defpkg] // direct importers
if users == nil {
users = make(map[string]bool)
}
users[defpkg] = true // plus the defining package itself
} else {
users = rev.Search(defpkg) // transitive importers
}
users := rev.Search(defpkg) // transitive importers
// Prepare to load the larger program.
fset := token.NewFileSet()
@ -322,6 +321,273 @@ func globalReferrers(q *Query, qpkg, defpkg string, objposn token.Position, isPk
return nil // success
}
// globalReferrersPkgLevel reports references throughout the entire workspace to the package-level object obj.
// It assumes that the query object itself has already been reported.
func globalReferrersPkgLevel(q *Query, obj types.Object, fset *token.FileSet) error {
// globalReferrersPkgLevel uses go/ast and friends instead of go/types.
// This affords a considerable performance benefit.
// It comes at the cost of some code complexity.
//
// Here's a high level summary.
//
// The goal is to find references to the query object p.Q.
// There are several possible scenarios, each handled differently.
//
// 1. We are looking in a package other than p, and p is not dot-imported.
// This is the simplest case. Q must be referred to as n.Q,
// where n is the name under which p is imported.
// We look at all imports of p to gather all names under which it is imported.
// (In the typical case, it is imported only once, under its default name.)
// Then we look at all selector expressions and report any matches.
//
// 2. We are looking in a package other than p, and p is dot-imported.
// In this case, Q will be referred to just as Q.
// Furthermore, go/ast's object resolution will not be able to resolve
// Q to any other object, unlike any local (file- or function- or block-scoped) object.
// So we look at all matching identifiers and report all unresolvable ones.
//
// 3. We are looking in package p.
// (Care must be taken to separate p and p_test (an xtest package),
// and make sure that they are treated as separate packages.)
// In this case, we give go/ast the entire package for object resolution,
// instead of going file by file.
// We then iterate over all identifiers that resolve to the query object.
// (The query object itself has already been reported, so we don't re-report it.)
//
// We always skip all files that don't contain the string Q, as they cannot be
// relevant to finding references to Q.
//
// We parse all files leniently. In the presence of parsing errors, results are best-effort.
// Scan the workspace and build the import graph.
// Ignore broken packages.
_, rev, _ := importgraph.Build(q.Build)
// Find the set of packages that directly import defpkg.
defpkg := obj.Pkg().Path()
defpkg = strings.TrimSuffix(defpkg, "_test") // package x_test actually has package name x
defpkg = imports.VendorlessPath(defpkg) // remove vendor goop
users := rev[defpkg]
if len(users) == 0 {
users = make(map[string]bool)
}
// We also need to check defpkg itself, and its xtests.
// For the reverse graph packages, we process xtests with the main package.
// defpkg gets special handling; we must distinguish between in-package vs out-of-package.
// To make the control flow below simpler, add defpkg and defpkg xtest placeholders.
// Use "!test" instead of "_test" because "!" is not a valid character in an import path.
// (More precisely, it is not guaranteed to be a valid character in an import path,
// so it is unlikely that it will be in use. See https://golang.org/ref/spec#Import_declarations.)
users[defpkg] = true
users[defpkg+"!test"] = true
cwd, err := os.Getwd()
if err != nil {
return err
}
defname := obj.Pkg().Name() // name of defining package, used for imports using import path only
isxtest := strings.HasSuffix(defname, "_test") // indicates whether the query object is defined in an xtest package
name := obj.Name()
namebytes := []byte(name) // byte slice version of query object name, for early filtering
objpos := fset.Position(obj.Pos()) // position of query object, used to prevent re-emitting original decl
var files []string // reusable list of files
var pkgnames []string // reusable list of names the package is imported under
for u := range users {
uIsXTest := strings.HasSuffix(u, "!test") // indicates whether this package is the special defpkg xtest package
u = strings.TrimSuffix(u, "!test")
// Resolve package.
pkg, err := q.Build.Import(u, cwd, build.IgnoreVendor)
if err != nil {
continue
}
files = files[:0]
// If we're not in the query package,
// the object is in another package regardless,
// so we want to process all files.
// If we are in the query package,
// we want to only process the files that are
// part of that query package;
// that set depends on whether the query package itself is an xtest.
inQueryPkg := u == defpkg && isxtest == uIsXTest
if !inQueryPkg || !isxtest {
files = append(files, pkg.GoFiles...)
files = append(files, pkg.TestGoFiles...)
files = append(files, pkg.CgoFiles...) // use raw cgo files, as we're only parsing
}
if !inQueryPkg || isxtest {
files = append(files, pkg.XTestGoFiles...)
}
if len(files) == 0 {
continue
}
var deffiles map[string]*ast.File // set of files that are part of this package, for inQueryPkg only
if inQueryPkg {
deffiles = make(map[string]*ast.File)
}
for _, file := range files {
if !buildutil.IsAbsPath(q.Build, file) {
file = buildutil.JoinPath(q.Build, pkg.Dir, file)
}
src, err := readFile(q.Build, file)
if err != nil {
continue
}
// Fast path: If the object's name isn't present anywhere in the source, ignore the file.
if !bytes.Contains(src, namebytes) {
continue
}
if inQueryPkg {
// If we're in the query package, we defer final processing until we have
// parsed all of the candidate files in the package.
// Best effort; allow errors and use what we can from what remains.
f, _ := parser.ParseFile(fset, file, src, parser.AllErrors)
if f != nil {
deffiles[file] = f
}
continue
}
// We aren't in the query package. Go file by file.
// Parse out only the imports, to check whether the defining package
// was imported, and if so, under what names.
// Best effort; allow errors and use what we can from what remains.
f, _ := parser.ParseFile(fset, file, src, parser.ImportsOnly|parser.AllErrors)
if f == nil {
continue
}
// pkgnames is the set of names by which defpkg is imported in this file.
// (Multiple imports in the same file are legal but vanishingly rare.)
pkgnames = pkgnames[:0]
var isdotimport bool
for _, imp := range f.Imports {
path, err := strconv.Unquote(imp.Path.Value)
if err != nil || path != defpkg {
continue
}
switch {
case imp.Name == nil:
pkgnames = append(pkgnames, defname)
case imp.Name.Name == ".":
isdotimport = true
default:
pkgnames = append(pkgnames, imp.Name.Name)
}
}
if len(pkgnames) == 0 && !isdotimport {
// Defining package not imported, bail.
continue
}
// Re-parse the entire file.
// Parse errors are ok; we'll do the best we can with a partial AST, if we have one.
f, _ = parser.ParseFile(fset, file, src, parser.AllErrors)
if f == nil {
continue
}
// Walk the AST looking for references.
var refs []*ast.Ident
ast.Inspect(f, func(n ast.Node) bool {
// Check selector expressions.
// If the selector matches the target name,
// and the expression is one of the names
// that the defining package was imported under,
// then we have a match.
if sel, ok := n.(*ast.SelectorExpr); ok && sel.Sel.Name == name {
if id, ok := sel.X.(*ast.Ident); ok {
for _, n := range pkgnames {
if n == id.Name {
refs = append(refs, sel.Sel)
// Don't recurse further, to avoid duplicate entries
// from the dot import check below.
return false
}
}
}
}
// Dot imports are special.
// Objects imported from the defining package are placed in the package scope.
// go/ast does not resolve them to an object.
// At all other scopes (file, local), go/ast can do the resolution.
// So we're looking for object-free idents with the right name.
// The only other way to get something with the right name at the package scope
// is to *be* the defining package. We handle that case separately (inQueryPkg).
if isdotimport {
if id, ok := n.(*ast.Ident); ok && id.Obj == nil && id.Name == name {
refs = append(refs, id)
return false
}
}
return true
})
// Emit any references we found.
if len(refs) > 0 {
q.Output(fset, &referrersPackageResult{
pkg: types.NewPackage(pkg.ImportPath, pkg.Name),
build: q.Build,
fset: fset,
refs: refs,
})
}
}
// If we're in the query package, we've now collected all the files in the package.
// (Or at least the ones that might contain references to the object.)
// Find and emit refs.
if inQueryPkg {
// Bundle the files together into a package.
// This does package-level object resolution.
qpkg, _ := ast.NewPackage(fset, deffiles, nil, nil)
// Look up the query object; we know that it is defined in the package scope.
pkgobj := qpkg.Scope.Objects[name]
if pkgobj == nil {
panic("missing defpkg object for " + defpkg + "." + name)
}
// Find all references to the query object.
var refs []*ast.Ident
ast.Inspect(qpkg, func(n ast.Node) bool {
if id, ok := n.(*ast.Ident); ok {
// Check both that this is a reference to the query object
// and that it is not the query object itself;
// the query object itself was already emitted.
if id.Obj == pkgobj && objpos != fset.Position(id.Pos()) {
refs = append(refs, id)
return false
}
}
return true
})
if len(refs) > 0 {
q.Output(fset, &referrersPackageResult{
pkg: types.NewPackage(pkg.ImportPath, pkg.Name),
build: q.Build,
fset: fset,
refs: refs,
})
}
deffiles = nil // allow GC
}
}
return nil
}
// findObject returns the object defined at the specified position.
func findObject(fset *token.FileSet, info *types.Info, objposn token.Position) types.Object {
good := func(obj types.Object) bool {