Go: support extracting test code

This implements support for test extraction by two mechanisms:

* In autobuild mode, setting `CODEQL_EXTRACTOR_GO_EXTRACT_TESTS` to `true`.
* In manual build mode, tracing a `go test` command (`go test -c` is to be recommended for efficiency).

Go deals with test compilation by creating several extra packages on top of those expected from inspection of the source code (see docs of `packages.Load` for more detail): packages whose IDs include a suffix like `mydomain.com/mypackage [mydomain.com/mypackage.test]`, and packages containing generated test driver code like `mydomain.com/mypackage.test`. There are also additional packages like `mydomain.com/mypackage_tests` which are explicitly present in source code, but not compiled by a normal `go build`.

So far as I can tell, the purpose of the two variants of the package is to resolve dependency cycles (because the tests variant of the package can have more dependencies than the non-tests variant, and non-test code can compile against non-test package variants). Since the test package variants seems to be a superset of the non-tests variant, I employ the simple heuristic of ignoring the variant of each package with the shortest ID. I haven't seen a case where there are three or more variants of a package, so I expect this to always identify the tests variant as the preferred one. If several variants were extracted, and we were to attempt to match Golang's linkage strategy among the different variants, we would need to extend trap-file name and most top-level symbol trap IDs with the package variant they come from; I hope this won't prove necessary.

"Real" `_tests` packages, and wholly synthetic driver code packages, are extracted just like normal.
This commit is contained in:
Chris Smowton 2024-08-13 19:24:28 +01:00
Родитель 594045b634
Коммит 76e6942594
2 изменённых файлов: 43 добавлений и 11 удалений

Просмотреть файл

@ -21,7 +21,7 @@ func usage() {
fmt.Fprintf(os.Stderr, "--help Print this help.\n")
}
func parseFlags(args []string, mimic bool) ([]string, []string) {
func parseFlags(args []string, mimic bool, extractTests bool) ([]string, []string, bool) {
i := 0
buildFlags := []string{}
for ; i < len(args) && strings.HasPrefix(args[i], "-"); i++ {
@ -44,9 +44,9 @@ func parseFlags(args []string, mimic bool) ([]string, []string) {
if i+1 < len(args) {
i++
command := args[i]
if command == "build" || command == "install" || command == "run" {
log.Printf("Intercepting build")
return parseFlags(args[i+1:], true)
if command == "build" || command == "install" || command == "run" || command == "test" {
log.Printf("Intercepting build for %s command", command)
return parseFlags(args[i+1:], true, command == "test")
} else {
log.Printf("Non-build command '%s'; skipping", strings.Join(args[1:], " "))
os.Exit(0)
@ -63,12 +63,12 @@ func parseFlags(args []string, mimic bool) ([]string, []string) {
// parse go build flags
switch args[i] {
// skip `-o output` and `-i`, if applicable
// skip `-o output`, `-i` and `-c`, if applicable
case "-o":
if i+1 < len(args) {
i++
}
case "-i":
case "-i", "-c":
case "-p", "-asmflags", "-buildmode", "-compiler", "-gccgoflags", "-gcflags", "-installsuffix",
"-ldflags", "-mod", "-modfile", "-pkgdir", "-tags", "-toolexec", "-overlay":
if i+1 < len(args) {
@ -90,11 +90,12 @@ func parseFlags(args []string, mimic bool) ([]string, []string) {
cpuprofile = os.Getenv("CODEQL_EXTRACTOR_GO_CPU_PROFILE")
memprofile = os.Getenv("CODEQL_EXTRACTOR_GO_MEM_PROFILE")
return buildFlags, args[i:]
return buildFlags, args[i:], extractTests
}
func main() {
buildFlags, patterns := parseFlags(os.Args[1:], false)
extractTestsDefault := os.Getenv("CODEQL_EXTRACTOR_GO_EXTRACT_TESTS") == "true"
buildFlags, patterns, extractTests := parseFlags(os.Args[1:], false, extractTestsDefault)
if cpuprofile != "" {
f, err := os.Create(cpuprofile)
@ -114,7 +115,7 @@ func main() {
}
log.Printf("Build flags: '%s'; patterns: '%s'\n", strings.Join(buildFlags, " "), strings.Join(patterns, " "))
err := extractor.ExtractWithFlags(buildFlags, patterns)
err := extractor.ExtractWithFlags(buildFlags, patterns, extractTests)
if err != nil {
errString := err.Error()
if strings.Contains(errString, "unexpected directory layout:") {

Просмотреть файл

@ -59,11 +59,11 @@ func init() {
// Extract extracts the packages specified by the given patterns
func Extract(patterns []string) error {
return ExtractWithFlags(nil, patterns)
return ExtractWithFlags(nil, patterns, false)
}
// ExtractWithFlags extracts the packages specified by the given patterns and build flags
func ExtractWithFlags(buildFlags []string, patterns []string) error {
func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool) error {
startTime := time.Now()
extraction := NewExtraction(buildFlags, patterns)
@ -89,6 +89,7 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
packages.NeedTypes | packages.NeedTypesSizes |
packages.NeedTypesInfo | packages.NeedSyntax,
BuildFlags: buildFlags,
Tests: extractTests,
}
pkgs, err := packages.Load(cfg, patterns...)
if err != nil {
@ -132,10 +133,33 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
pkgsNotFound := make([]string, 0, len(pkgs))
// Build a map from package paths to their longest IDs--
// in the context of a `go test -c` compilation, we will see the same package more than
// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version
// that contains and is used by test code.
// For our purposes it is simplest to just ignore the non-test version, since the test
// version seems to be a superset of it.
longestPackageIds := make(map[string]string)
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
if shortestID, present := longestPackageIds[pkg.PkgPath]; present {
if len(pkg.ID) > len(shortestID) {
longestPackageIds[pkg.PkgPath] = pkg.ID
}
} else {
longestPackageIds[pkg.PkgPath] = pkg.ID
}
})
// Do a post-order traversal and extract the package scope of each package
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
log.Printf("Processing package %s.", pkg.PkgPath)
// If this is a variant of a package that also occurs with a longer ID, skip it.
if pkg.ID != longestPackageIds[pkg.PkgPath] {
log.Printf("Skipping variant of package %s with ID %s.", pkg.PkgPath, pkg.ID)
return
}
if _, ok := pkgInfos[pkg.PkgPath]; !ok {
pkgInfos[pkg.PkgPath] = toolchain.GetPkgInfo(pkg.PkgPath, modFlags...)
}
@ -210,6 +234,13 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
// extract AST information for all packages
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
// If this is a variant of a package that also occurs with a longer ID, skip it.
if pkg.ID != longestPackageIds[pkg.PkgPath] {
// Don't log here; we already mentioned this above.
return
}
for root := range wantedRoots {
pkgInfo := pkgInfos[pkg.PkgPath]
relDir, err := filepath.Rel(root, pkgInfo.PkgDir)