From 17213ba81aef4b1d1c90e6f985890b8d155cca14 Mon Sep 17 00:00:00 2001 From: Rob Findley Date: Thu, 10 Oct 2024 20:57:30 +0000 Subject: [PATCH] gopls/internal/cache/parsego: support lazy object resolution for Files For the purposes of go/analysis, gopls could not skip syntactic object resolution, as it is part of the go/analysis contract. This prevents analysis from reusing existing type-checked packages, leading to increased CPU and memory during diagnostics. Fix this by making object resolution lazy, and ensuring that all analysed files are resolved prior to analysis. This could introduce a race if gopls were to read the fields set by object resolution, for example if it was printing the tree using ast.Fprint, so we include a test that these fields are only accessed from packages or declarations that are verified to be safe. Since the resolver is not separate from the parser, we fork the code and use go generate to keep it in sync. For golang/go#53275 Change-Id: I24ce94b5d8532c5e679789d2ec1f75376e9e9208 Reviewed-on: https://go-review.googlesource.com/c/tools/+/619516 Reviewed-by: Alan Donovan LUCI-TryBot-Result: Go LUCI --- go/ast/astutil/imports.go | 5 + gopls/internal/cache/analysis.go | 21 +- gopls/internal/cache/parsego/file.go | 37 +- gopls/internal/cache/parsego/parse.go | 8 + gopls/internal/cache/parsego/resolver.go | 614 ++++++++++++++++++ .../internal/cache/parsego/resolver_compat.go | 24 + gopls/internal/cache/parsego/resolver_gen.go | 32 + gopls/internal/cache/parsego/resolver_test.go | 158 +++++ .../internal/util/safetoken/safetoken_test.go | 4 +- 9 files changed, 892 insertions(+), 11 deletions(-) create mode 100644 gopls/internal/cache/parsego/resolver.go create mode 100644 gopls/internal/cache/parsego/resolver_compat.go create mode 100644 gopls/internal/cache/parsego/resolver_gen.go create mode 100644 gopls/internal/cache/parsego/resolver_test.go diff --git a/go/ast/astutil/imports.go b/go/ast/astutil/imports.go index 18d1adb05..a6b5ed0a8 100644 --- a/go/ast/astutil/imports.go +++ b/go/ast/astutil/imports.go @@ -344,7 +344,12 @@ func RewriteImport(fset *token.FileSet, f *ast.File, oldPath, newPath string) (r } // UsesImport reports whether a given import is used. +// The provided File must have been parsed with syntactic object resolution +// (not using go/parser.SkipObjectResolution). func UsesImport(f *ast.File, path string) (used bool) { + if f.Scope == nil { + panic("file f was not parsed with syntactic object resolution") + } spec := importSpec(f, path) if spec == nil { return diff --git a/gopls/internal/cache/analysis.go b/gopls/internal/cache/analysis.go index 7608f191f..a0043c04d 100644 --- a/gopls/internal/cache/analysis.go +++ b/gopls/internal/cache/analysis.go @@ -15,7 +15,6 @@ import ( "errors" "fmt" "go/ast" - "go/parser" "go/token" "go/types" "log" @@ -234,7 +233,10 @@ func (s *Snapshot) Analyze(ctx context.Context, pkgs map[PackageID]*metadata.Pac facty = requiredAnalyzers(facty) // File set for this batch (entire graph) of analysis. - fset := token.NewFileSet() + // + // Start at reservedForParsing so that cached parsed files can be inserted + // into the fileset retroactively. + fset := fileSetWithBase(reservedForParsing) // Get the metadata graph once for lock-free access during analysis. meta := s.MetadataGraph() @@ -261,6 +263,7 @@ func (s *Snapshot) Analyze(ctx context.Context, pkgs map[PackageID]*metadata.Pac an = &analysisNode{ allNodes: nodes, + parseCache: s.view.parseCache, fset: fset, fsource: s, // expose only ReadFile viewType: s.View().Type(), @@ -548,6 +551,7 @@ func (an *analysisNode) decrefPreds() { // type-checking and analyzing syntax (miss). type analysisNode struct { allNodes map[PackageID]*analysisNode // all nodes, for lazy lookup of transitive dependencies + parseCache *parseCache // shared parse cache fset *token.FileSet // file set shared by entire batch (DAG) fsource file.Source // Snapshot.ReadFile, for use by Pass.ReadFile viewType ViewType // type of view @@ -868,12 +872,13 @@ func (an *analysisNode) run(ctx context.Context) (*analyzeSummary, error) { for i, fh := range an.files { i, fh := i, fh group.Go(func() error { - // Call parseGoImpl directly, not the caching wrapper, - // as cached ASTs require the global FileSet. - // ast.Object resolution is unfortunately an implied part of the - // go/analysis contract. - pgf, err := parseGoImpl(ctx, an.fset, fh, parsego.Full&^parser.SkipObjectResolution, false) - parsed[i] = pgf + // Files fetched from the cache must also have their ast.Ident.Objects + // resolved, as it is part of the analysis contract. + pgfs, err := an.parseCache.parseFiles(ctx, an.fset, parsego.Full, false, fh) + if err == nil { + pgfs[0].Resolve() + } + parsed[i] = pgfs[0] return err }) } diff --git a/gopls/internal/cache/parsego/file.go b/gopls/internal/cache/parsego/file.go index b03929e6c..687c8e393 100644 --- a/gopls/internal/cache/parsego/file.go +++ b/gopls/internal/cache/parsego/file.go @@ -9,6 +9,7 @@ import ( "go/parser" "go/scanner" "go/token" + "sync" "golang.org/x/tools/gopls/internal/protocol" "golang.org/x/tools/gopls/internal/util/safetoken" @@ -18,6 +19,10 @@ import ( type File struct { URI protocol.DocumentURI Mode parser.Mode + + // File is the file resulting from parsing. Clients must not access the AST's + // legacy ast.Object-related fields without first ensuring that + // [File.Resolve] was already called. File *ast.File Tok *token.File // Source code used to build the AST. It may be different from the @@ -39,13 +44,16 @@ type File struct { fixedAST bool Mapper *protocol.Mapper // may map fixed Src, not file content ParseErr scanner.ErrorList + + // resolveOnce guards the lazy ast.Object resolution. See [File.Resolve]. + resolveOnce sync.Once } -func (pgf File) String() string { return string(pgf.URI) } +func (pgf *File) String() string { return string(pgf.URI) } // Fixed reports whether p was "Fixed", meaning that its source or positions // may not correlate with the original file. -func (pgf File) Fixed() bool { +func (pgf *File) Fixed() bool { return pgf.fixedSrc || pgf.fixedAST } @@ -100,3 +108,28 @@ func (pgf *File) RangePos(r protocol.Range) (token.Pos, token.Pos, error) { } return pgf.Tok.Pos(start), pgf.Tok.Pos(end), nil } + +// Resolve lazily resolves ast.Ident.Objects in the enclosed syntax tree. +// +// Resolve must be called before accessing any of: +// - pgf.File.Scope +// - pgf.File.Unresolved +// - Ident.Obj, for any Ident in pgf.File +func (pgf *File) Resolve() { + pgf.resolveOnce.Do(func() { + if pgf.File.Scope != nil { + return // already resolved by parsing without SkipObjectResolution. + } + defer func() { + // (panic handler duplicated from go/parser) + if e := recover(); e != nil { + // A bailout indicates the resolution stack has exceeded max depth. + if _, ok := e.(bailout); !ok { + panic(e) + } + } + }() + declErr := func(token.Pos, string) {} + resolveFile(pgf.File, pgf.Tok, declErr) + }) +} diff --git a/gopls/internal/cache/parsego/parse.go b/gopls/internal/cache/parsego/parse.go index 82f3eeebe..d940d0f8b 100644 --- a/gopls/internal/cache/parsego/parse.go +++ b/gopls/internal/cache/parsego/parse.go @@ -2,6 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:generate go run resolver_gen.go + +// The parsego package defines the [File] type, a wrapper around a go/ast.File +// that is useful for answering LSP queries. Notably, it bundles the +// *token.File and *protocol.Mapper necessary for token.Pos locations to and +// from UTF-16 LSP positions. +// +// Run `go generate` to update resolver.go from GOROOT. package parsego import ( diff --git a/gopls/internal/cache/parsego/resolver.go b/gopls/internal/cache/parsego/resolver.go new file mode 100644 index 000000000..450fcc0a2 --- /dev/null +++ b/gopls/internal/cache/parsego/resolver.go @@ -0,0 +1,614 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by resolver_gen.go. DO NOT EDIT. + +package parsego + +import ( + "fmt" + "go/ast" + "go/token" + "strings" +) + +const debugResolve = false + +// resolveFile walks the given file to resolve identifiers within the file +// scope, updating ast.Ident.Obj fields with declaration information. +// +// If declErr is non-nil, it is used to report declaration errors during +// resolution. tok is used to format position in error messages. +func resolveFile(file *ast.File, handle *token.File, declErr func(token.Pos, string)) { + pkgScope := ast.NewScope(nil) + r := &resolver{ + handle: handle, + declErr: declErr, + topScope: pkgScope, + pkgScope: pkgScope, + depth: 1, + } + + for _, decl := range file.Decls { + ast.Walk(r, decl) + } + + r.closeScope() + assert(r.topScope == nil, "unbalanced scopes") + assert(r.labelScope == nil, "unbalanced label scopes") + + // resolve global identifiers within the same file + i := 0 + for _, ident := range r.unresolved { + // i <= index for current ident + assert(ident.Obj == unresolved, "object already resolved") + ident.Obj = r.pkgScope.Lookup(ident.Name) // also removes unresolved sentinel + if ident.Obj == nil { + r.unresolved[i] = ident + i++ + } else if debugResolve { + pos := ident.Obj.Decl.(interface{ Pos() token.Pos }).Pos() + r.trace("resolved %s@%v to package object %v", ident.Name, ident.Pos(), pos) + } + } + file.Scope = r.pkgScope + file.Unresolved = r.unresolved[0:i] +} + +const maxScopeDepth int = 1e3 + +type resolver struct { + handle *token.File + declErr func(token.Pos, string) + + // Ordinary identifier scopes + pkgScope *ast.Scope // pkgScope.Outer == nil + topScope *ast.Scope // top-most scope; may be pkgScope + unresolved []*ast.Ident // unresolved identifiers + depth int // scope depth + + // Label scopes + // (maintained by open/close LabelScope) + labelScope *ast.Scope // label scope for current function + targetStack [][]*ast.Ident // stack of unresolved labels +} + +func (r *resolver) trace(format string, args ...any) { + fmt.Println(strings.Repeat(". ", r.depth) + r.sprintf(format, args...)) +} + +func (r *resolver) sprintf(format string, args ...any) string { + for i, arg := range args { + switch arg := arg.(type) { + case token.Pos: + args[i] = r.handle.Position(arg) + } + } + return fmt.Sprintf(format, args...) +} + +func (r *resolver) openScope(pos token.Pos) { + r.depth++ + if r.depth > maxScopeDepth { + panic(bailout{pos: pos, msg: "exceeded max scope depth during object resolution"}) + } + if debugResolve { + r.trace("opening scope @%v", pos) + } + r.topScope = ast.NewScope(r.topScope) +} + +func (r *resolver) closeScope() { + r.depth-- + if debugResolve { + r.trace("closing scope") + } + r.topScope = r.topScope.Outer +} + +func (r *resolver) openLabelScope() { + r.labelScope = ast.NewScope(r.labelScope) + r.targetStack = append(r.targetStack, nil) +} + +func (r *resolver) closeLabelScope() { + // resolve labels + n := len(r.targetStack) - 1 + scope := r.labelScope + for _, ident := range r.targetStack[n] { + ident.Obj = scope.Lookup(ident.Name) + if ident.Obj == nil && r.declErr != nil { + r.declErr(ident.Pos(), fmt.Sprintf("label %s undefined", ident.Name)) + } + } + // pop label scope + r.targetStack = r.targetStack[0:n] + r.labelScope = r.labelScope.Outer +} + +func (r *resolver) declare(decl, data any, scope *ast.Scope, kind ast.ObjKind, idents ...*ast.Ident) { + for _, ident := range idents { + if ident.Obj != nil { + panic(fmt.Sprintf("%v: identifier %s already declared or resolved", ident.Pos(), ident.Name)) + } + obj := ast.NewObj(kind, ident.Name) + // remember the corresponding declaration for redeclaration + // errors and global variable resolution/typechecking phase + obj.Decl = decl + obj.Data = data + // Identifiers (for receiver type parameters) are written to the scope, but + // never set as the resolved object. See go.dev/issue/50956. + if _, ok := decl.(*ast.Ident); !ok { + ident.Obj = obj + } + if ident.Name != "_" { + if debugResolve { + r.trace("declaring %s@%v", ident.Name, ident.Pos()) + } + if alt := scope.Insert(obj); alt != nil && r.declErr != nil { + prevDecl := "" + if pos := alt.Pos(); pos.IsValid() { + prevDecl = r.sprintf("\n\tprevious declaration at %v", pos) + } + r.declErr(ident.Pos(), fmt.Sprintf("%s redeclared in this block%s", ident.Name, prevDecl)) + } + } + } +} + +func (r *resolver) shortVarDecl(decl *ast.AssignStmt) { + // Go spec: A short variable declaration may redeclare variables + // provided they were originally declared in the same block with + // the same type, and at least one of the non-blank variables is new. + n := 0 // number of new variables + for _, x := range decl.Lhs { + if ident, isIdent := x.(*ast.Ident); isIdent { + assert(ident.Obj == nil, "identifier already declared or resolved") + obj := ast.NewObj(ast.Var, ident.Name) + // remember corresponding assignment for other tools + obj.Decl = decl + ident.Obj = obj + if ident.Name != "_" { + if debugResolve { + r.trace("declaring %s@%v", ident.Name, ident.Pos()) + } + if alt := r.topScope.Insert(obj); alt != nil { + ident.Obj = alt // redeclaration + } else { + n++ // new declaration + } + } + } + } + if n == 0 && r.declErr != nil { + r.declErr(decl.Lhs[0].Pos(), "no new variables on left side of :=") + } +} + +// The unresolved object is a sentinel to mark identifiers that have been added +// to the list of unresolved identifiers. The sentinel is only used for verifying +// internal consistency. +var unresolved = new(ast.Object) + +// If x is an identifier, resolve attempts to resolve x by looking up +// the object it denotes. If no object is found and collectUnresolved is +// set, x is marked as unresolved and collected in the list of unresolved +// identifiers. +func (r *resolver) resolve(ident *ast.Ident, collectUnresolved bool) { + if ident.Obj != nil { + panic(r.sprintf("%v: identifier %s already declared or resolved", ident.Pos(), ident.Name)) + } + // '_' should never refer to existing declarations, because it has special + // handling in the spec. + if ident.Name == "_" { + return + } + for s := r.topScope; s != nil; s = s.Outer { + if obj := s.Lookup(ident.Name); obj != nil { + if debugResolve { + r.trace("resolved %v:%s to %v", ident.Pos(), ident.Name, obj) + } + assert(obj.Name != "", "obj with no name") + // Identifiers (for receiver type parameters) are written to the scope, + // but never set as the resolved object. See go.dev/issue/50956. + if _, ok := obj.Decl.(*ast.Ident); !ok { + ident.Obj = obj + } + return + } + } + // all local scopes are known, so any unresolved identifier + // must be found either in the file scope, package scope + // (perhaps in another file), or universe scope --- collect + // them so that they can be resolved later + if collectUnresolved { + ident.Obj = unresolved + r.unresolved = append(r.unresolved, ident) + } +} + +func (r *resolver) walkExprs(list []ast.Expr) { + for _, node := range list { + ast.Walk(r, node) + } +} + +func (r *resolver) walkLHS(list []ast.Expr) { + for _, expr := range list { + expr := ast.Unparen(expr) + if _, ok := expr.(*ast.Ident); !ok && expr != nil { + ast.Walk(r, expr) + } + } +} + +func (r *resolver) walkStmts(list []ast.Stmt) { + for _, stmt := range list { + ast.Walk(r, stmt) + } +} + +func (r *resolver) Visit(node ast.Node) ast.Visitor { + if debugResolve && node != nil { + r.trace("node %T@%v", node, node.Pos()) + } + + switch n := node.(type) { + + // Expressions. + case *ast.Ident: + r.resolve(n, true) + + case *ast.FuncLit: + r.openScope(n.Pos()) + defer r.closeScope() + r.walkFuncType(n.Type) + r.walkBody(n.Body) + + case *ast.SelectorExpr: + ast.Walk(r, n.X) + // Note: don't try to resolve n.Sel, as we don't support qualified + // resolution. + + case *ast.StructType: + r.openScope(n.Pos()) + defer r.closeScope() + r.walkFieldList(n.Fields, ast.Var) + + case *ast.FuncType: + r.openScope(n.Pos()) + defer r.closeScope() + r.walkFuncType(n) + + case *ast.CompositeLit: + if n.Type != nil { + ast.Walk(r, n.Type) + } + for _, e := range n.Elts { + if kv, _ := e.(*ast.KeyValueExpr); kv != nil { + // See go.dev/issue/45160: try to resolve composite lit keys, but don't + // collect them as unresolved if resolution failed. This replicates + // existing behavior when resolving during parsing. + if ident, _ := kv.Key.(*ast.Ident); ident != nil { + r.resolve(ident, false) + } else { + ast.Walk(r, kv.Key) + } + ast.Walk(r, kv.Value) + } else { + ast.Walk(r, e) + } + } + + case *ast.InterfaceType: + r.openScope(n.Pos()) + defer r.closeScope() + r.walkFieldList(n.Methods, ast.Fun) + + // Statements + case *ast.LabeledStmt: + r.declare(n, nil, r.labelScope, ast.Lbl, n.Label) + ast.Walk(r, n.Stmt) + + case *ast.AssignStmt: + r.walkExprs(n.Rhs) + if n.Tok == token.DEFINE { + r.shortVarDecl(n) + } else { + r.walkExprs(n.Lhs) + } + + case *ast.BranchStmt: + // add to list of unresolved targets + if n.Tok != token.FALLTHROUGH && n.Label != nil { + depth := len(r.targetStack) - 1 + r.targetStack[depth] = append(r.targetStack[depth], n.Label) + } + + case *ast.BlockStmt: + r.openScope(n.Pos()) + defer r.closeScope() + r.walkStmts(n.List) + + case *ast.IfStmt: + r.openScope(n.Pos()) + defer r.closeScope() + if n.Init != nil { + ast.Walk(r, n.Init) + } + ast.Walk(r, n.Cond) + ast.Walk(r, n.Body) + if n.Else != nil { + ast.Walk(r, n.Else) + } + + case *ast.CaseClause: + r.walkExprs(n.List) + r.openScope(n.Pos()) + defer r.closeScope() + r.walkStmts(n.Body) + + case *ast.SwitchStmt: + r.openScope(n.Pos()) + defer r.closeScope() + if n.Init != nil { + ast.Walk(r, n.Init) + } + if n.Tag != nil { + // The scope below reproduces some unnecessary behavior of the parser, + // opening an extra scope in case this is a type switch. It's not needed + // for expression switches. + // TODO: remove this once we've matched the parser resolution exactly. + if n.Init != nil { + r.openScope(n.Tag.Pos()) + defer r.closeScope() + } + ast.Walk(r, n.Tag) + } + if n.Body != nil { + r.walkStmts(n.Body.List) + } + + case *ast.TypeSwitchStmt: + if n.Init != nil { + r.openScope(n.Pos()) + defer r.closeScope() + ast.Walk(r, n.Init) + } + r.openScope(n.Assign.Pos()) + defer r.closeScope() + ast.Walk(r, n.Assign) + // s.Body consists only of case clauses, so does not get its own + // scope. + if n.Body != nil { + r.walkStmts(n.Body.List) + } + + case *ast.CommClause: + r.openScope(n.Pos()) + defer r.closeScope() + if n.Comm != nil { + ast.Walk(r, n.Comm) + } + r.walkStmts(n.Body) + + case *ast.SelectStmt: + // as for switch statements, select statement bodies don't get their own + // scope. + if n.Body != nil { + r.walkStmts(n.Body.List) + } + + case *ast.ForStmt: + r.openScope(n.Pos()) + defer r.closeScope() + if n.Init != nil { + ast.Walk(r, n.Init) + } + if n.Cond != nil { + ast.Walk(r, n.Cond) + } + if n.Post != nil { + ast.Walk(r, n.Post) + } + ast.Walk(r, n.Body) + + case *ast.RangeStmt: + r.openScope(n.Pos()) + defer r.closeScope() + ast.Walk(r, n.X) + var lhs []ast.Expr + if n.Key != nil { + lhs = append(lhs, n.Key) + } + if n.Value != nil { + lhs = append(lhs, n.Value) + } + if len(lhs) > 0 { + if n.Tok == token.DEFINE { + // Note: we can't exactly match the behavior of object resolution + // during the parsing pass here, as it uses the position of the RANGE + // token for the RHS OpPos. That information is not contained within + // the AST. + as := &ast.AssignStmt{ + Lhs: lhs, + Tok: token.DEFINE, + TokPos: n.TokPos, + Rhs: []ast.Expr{&ast.UnaryExpr{Op: token.RANGE, X: n.X}}, + } + // TODO(rFindley): this walkLHS reproduced the parser resolution, but + // is it necessary? By comparison, for a normal AssignStmt we don't + // walk the LHS in case there is an invalid identifier list. + r.walkLHS(lhs) + r.shortVarDecl(as) + } else { + r.walkExprs(lhs) + } + } + ast.Walk(r, n.Body) + + // Declarations + case *ast.GenDecl: + switch n.Tok { + case token.CONST, token.VAR: + for i, spec := range n.Specs { + spec := spec.(*ast.ValueSpec) + kind := ast.Con + if n.Tok == token.VAR { + kind = ast.Var + } + r.walkExprs(spec.Values) + if spec.Type != nil { + ast.Walk(r, spec.Type) + } + r.declare(spec, i, r.topScope, kind, spec.Names...) + } + case token.TYPE: + for _, spec := range n.Specs { + spec := spec.(*ast.TypeSpec) + // Go spec: The scope of a type identifier declared inside a function begins + // at the identifier in the TypeSpec and ends at the end of the innermost + // containing block. + r.declare(spec, nil, r.topScope, ast.Typ, spec.Name) + if spec.TypeParams != nil { + r.openScope(spec.Pos()) + defer r.closeScope() + r.walkTParams(spec.TypeParams) + } + ast.Walk(r, spec.Type) + } + } + + case *ast.FuncDecl: + // Open the function scope. + r.openScope(n.Pos()) + defer r.closeScope() + + r.walkRecv(n.Recv) + + // Type parameters are walked normally: they can reference each other, and + // can be referenced by normal parameters. + if n.Type.TypeParams != nil { + r.walkTParams(n.Type.TypeParams) + // TODO(rFindley): need to address receiver type parameters. + } + + // Resolve and declare parameters in a specific order to get duplicate + // declaration errors in the correct location. + r.resolveList(n.Type.Params) + r.resolveList(n.Type.Results) + r.declareList(n.Recv, ast.Var) + r.declareList(n.Type.Params, ast.Var) + r.declareList(n.Type.Results, ast.Var) + + r.walkBody(n.Body) + if n.Recv == nil && n.Name.Name != "init" { + r.declare(n, nil, r.pkgScope, ast.Fun, n.Name) + } + + default: + return r + } + + return nil +} + +func (r *resolver) walkFuncType(typ *ast.FuncType) { + // typ.TypeParams must be walked separately for FuncDecls. + r.resolveList(typ.Params) + r.resolveList(typ.Results) + r.declareList(typ.Params, ast.Var) + r.declareList(typ.Results, ast.Var) +} + +func (r *resolver) resolveList(list *ast.FieldList) { + if list == nil { + return + } + for _, f := range list.List { + if f.Type != nil { + ast.Walk(r, f.Type) + } + } +} + +func (r *resolver) declareList(list *ast.FieldList, kind ast.ObjKind) { + if list == nil { + return + } + for _, f := range list.List { + r.declare(f, nil, r.topScope, kind, f.Names...) + } +} + +func (r *resolver) walkRecv(recv *ast.FieldList) { + // If our receiver has receiver type parameters, we must declare them before + // trying to resolve the rest of the receiver, and avoid re-resolving the + // type parameter identifiers. + if recv == nil || len(recv.List) == 0 { + return // nothing to do + } + typ := recv.List[0].Type + if ptr, ok := typ.(*ast.StarExpr); ok { + typ = ptr.X + } + + var declareExprs []ast.Expr // exprs to declare + var resolveExprs []ast.Expr // exprs to resolve + switch typ := typ.(type) { + case *ast.IndexExpr: + declareExprs = []ast.Expr{typ.Index} + resolveExprs = append(resolveExprs, typ.X) + case *ast.IndexListExpr: + declareExprs = typ.Indices + resolveExprs = append(resolveExprs, typ.X) + default: + resolveExprs = append(resolveExprs, typ) + } + for _, expr := range declareExprs { + if id, _ := expr.(*ast.Ident); id != nil { + r.declare(expr, nil, r.topScope, ast.Typ, id) + } else { + // The receiver type parameter expression is invalid, but try to resolve + // it anyway for consistency. + resolveExprs = append(resolveExprs, expr) + } + } + for _, expr := range resolveExprs { + if expr != nil { + ast.Walk(r, expr) + } + } + // The receiver is invalid, but try to resolve it anyway for consistency. + for _, f := range recv.List[1:] { + if f.Type != nil { + ast.Walk(r, f.Type) + } + } +} + +func (r *resolver) walkFieldList(list *ast.FieldList, kind ast.ObjKind) { + if list == nil { + return + } + r.resolveList(list) + r.declareList(list, kind) +} + +// walkTParams is like walkFieldList, but declares type parameters eagerly so +// that they may be resolved in the constraint expressions held in the field +// Type. +func (r *resolver) walkTParams(list *ast.FieldList) { + r.declareList(list, ast.Typ) + r.resolveList(list) +} + +func (r *resolver) walkBody(body *ast.BlockStmt) { + if body == nil { + return + } + r.openLabelScope() + defer r.closeLabelScope() + r.walkStmts(body.List) +} diff --git a/gopls/internal/cache/parsego/resolver_compat.go b/gopls/internal/cache/parsego/resolver_compat.go new file mode 100644 index 000000000..0d9a3e19e --- /dev/null +++ b/gopls/internal/cache/parsego/resolver_compat.go @@ -0,0 +1,24 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains declarations needed for compatibility with resolver.go +// copied from GOROOT. + +package parsego + +import "go/token" + +// assert panics with the given msg if cond is not true. +func assert(cond bool, msg string) { + if !cond { + panic(msg) + } +} + +// A bailout panic is raised to indicate early termination. pos and msg are +// only populated when bailing out of object resolution. +type bailout struct { + pos token.Pos + msg string +} diff --git a/gopls/internal/cache/parsego/resolver_gen.go b/gopls/internal/cache/parsego/resolver_gen.go new file mode 100644 index 000000000..7eb9f5631 --- /dev/null +++ b/gopls/internal/cache/parsego/resolver_gen.go @@ -0,0 +1,32 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +package main + +import ( + "bytes" + "log" + "os" + "os/exec" + "path/filepath" + "strings" +) + +func main() { + output, err := exec.Command("go", "env", "GOROOT").Output() + if err != nil { + log.Fatalf("resolving GOROOT: %v", err) + } + goroot := strings.TrimSpace(string(output)) + data, err := os.ReadFile(filepath.Join(goroot, "src/go/parser/resolver.go")) + if err != nil { + log.Fatalf("reading resolver.go: %v", err) + } + data = bytes.Replace(data, []byte("\npackage parser"), []byte("\n// Code generated by resolver_gen.go. DO NOT EDIT.\n\npackage parsego"), 1) + if err := os.WriteFile("resolver.go", data, 0666); err != nil { + log.Fatalf("writing resolver.go: %v", err) + } +} diff --git a/gopls/internal/cache/parsego/resolver_test.go b/gopls/internal/cache/parsego/resolver_test.go new file mode 100644 index 000000000..44908b7ec --- /dev/null +++ b/gopls/internal/cache/parsego/resolver_test.go @@ -0,0 +1,158 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package parsego + +import ( + "go/ast" + "go/types" + "os" + "strings" + "testing" + + "golang.org/x/tools/go/ast/astutil" + "golang.org/x/tools/go/packages" + "golang.org/x/tools/gopls/internal/util/safetoken" + "golang.org/x/tools/internal/testenv" +) + +// TestGoplsSourceDoesNotUseObjectResolution verifies that gopls does not +// read fields that are set during syntactic object resolution, except in +// locations where we can guarantee that object resolution has occurred. This +// is achieved via static analysis of gopls source code to find references to +// the legacy Object symbols, checking the results against an allowlist +// +// Reading these fields would introduce a data race, due to the lazy +// resolution implemented by File.Resolve. +func TestGoplsSourceDoesNotUseObjectResolution(t *testing.T) { + + testenv.NeedsGoPackages(t) + testenv.NeedsLocalXTools(t) + + cfg := &packages.Config{ + Mode: packages.NeedName | packages.NeedModule | packages.NeedCompiledGoFiles | packages.NeedTypes | packages.NeedTypesInfo | packages.NeedSyntax | packages.NeedImports | packages.NeedDeps, + } + cfg.Env = os.Environ() + cfg.Env = append(cfg.Env, + "GOPACKAGESDRIVER=off", + "GOWORK=off", // necessary for -mod=mod below + "GOFLAGS=-mod=mod", + ) + + pkgs, err := packages.Load(cfg, + "go/ast", + "golang.org/x/tools/go/ast/astutil", + "golang.org/x/tools/gopls/...") + + if err != nil { + t.Fatal(err) + } + var astPkg, astutilPkg *packages.Package + for _, pkg := range pkgs { + switch pkg.PkgPath { + case "go/ast": + astPkg = pkg + case "golang.org/x/tools/go/ast/astutil": + astutilPkg = pkg + } + } + if astPkg == nil { + t.Fatal("missing package go/ast") + } + if astutilPkg == nil { + t.Fatal("missing package golang.org/x/tools/go/ast/astutil") + } + + File := astPkg.Types.Scope().Lookup("File").Type() + Ident := astPkg.Types.Scope().Lookup("Ident").Type() + + Scope, _, _ := types.LookupFieldOrMethod(File, true, astPkg.Types, "Scope") + assert(Scope != nil, "nil Scope") + Unresolved, _, _ := types.LookupFieldOrMethod(File, true, astPkg.Types, "Unresolved") + assert(Unresolved != nil, "nil unresolved") + Obj, _, _ := types.LookupFieldOrMethod(Ident, true, astPkg.Types, "Obj") + assert(Obj != nil, "nil Obj") + UsesImport := astutilPkg.Types.Scope().Lookup("UsesImport") + assert(UsesImport != nil, "nil UsesImport") + + disallowed := map[types.Object]bool{ + Scope: true, + Unresolved: true, + Obj: true, + UsesImport: true, + } + + // exceptions catalogues packages or declarations that are allowed to use + // forbidden symbols, with a rationale. + // + // - If the exception ends with '/', it is a prefix. + // - If it ends with a qualified name, it is a declaration. + // - Otherwise, it is an exact package path. + // + // TODO(rfindley): some sort of callgraph analysis would make these + // exceptions much easier to maintain. + exceptions := []string{ + "golang.org/x/tools/go/analysis/passes/", // analyzers may rely on object resolution + "golang.org/x/tools/gopls/internal/analysis/simplifyslice", // restrict ourselves to one blessed analyzer + "golang.org/x/tools/gopls/internal/cache/parsego", // used by parsego.File.Resolve, of course + "golang.org/x/tools/gopls/internal/golang.builtinDecl", // the builtin file is resolved + "golang.org/x/tools/gopls/internal/golang.NewBuiltinSignature", // ditto + "golang.org/x/tools/gopls/internal/golang/completion.builtinArgKind", // ditto + "golang.org/x/tools/internal/imports", // goimports does its own parsing + "golang.org/x/tools/go/ast/astutil.UsesImport", // disallowed + "golang.org/x/tools/go/ast/astutil.isTopName", // only reached from astutil.UsesImport + "go/ast", + "go/parser", + "go/doc", // manually verified that our usage is safe + } + + packages.Visit(pkgs, nil, func(pkg *packages.Package) { + for _, exception := range exceptions { + if strings.HasSuffix(exception, "/") { + if strings.HasPrefix(pkg.PkgPath, exception) { + return + } + } else if pkg.PkgPath == exception { + return + } + } + + searchUses: + for ident, obj := range pkg.TypesInfo.Uses { + if disallowed[obj] { + decl := findEnclosingFuncDecl(ident, pkg) + if decl == "" { + posn := safetoken.Position(pkg.Fset.File(ident.Pos()), ident.Pos()) + t.Fatalf("%s: couldn't find enclosing decl for use of %s", posn, ident.Name) + } + qualified := pkg.PkgPath + "." + decl + for _, exception := range exceptions { + if exception == qualified { + continue searchUses + } + } + posn := safetoken.StartPosition(pkg.Fset, ident.Pos()) + t.Errorf("%s: forbidden use of %v in %s", posn, obj, qualified) + } + } + }) +} + +// findEnclosingFuncDecl finds the name of the func decl enclosing the usage, +// or "". +// +// (Usage could theoretically exist in e.g. var initializers, but that would be +// odd.) +func findEnclosingFuncDecl(ident *ast.Ident, pkg *packages.Package) string { + for _, file := range pkg.Syntax { + if file.FileStart <= ident.Pos() && ident.Pos() < file.FileEnd { + path, _ := astutil.PathEnclosingInterval(file, ident.Pos(), ident.End()) + decl, ok := path[len(path)-2].(*ast.FuncDecl) + if ok { + return decl.Name.Name + } + } + } + return "" +} diff --git a/gopls/internal/util/safetoken/safetoken_test.go b/gopls/internal/util/safetoken/safetoken_test.go index ac3b878c6..2606b449a 100644 --- a/gopls/internal/util/safetoken/safetoken_test.go +++ b/gopls/internal/util/safetoken/safetoken_test.go @@ -116,7 +116,9 @@ func TestGoplsSourceDoesNotCallTokenFileMethods(t *testing.T) { for _, pkg := range pkgs { switch pkg.PkgPath { - case "go/token", "golang.org/x/tools/gopls/internal/util/safetoken": + case "go/token", + "golang.org/x/tools/gopls/internal/util/safetoken", // this package + "golang.org/x/tools/gopls/internal/cache/parsego": // copies go/parser/resolver.go continue // allow calls within these packages }