internal/scan: add binary extract mode

The extract mode spits out a json blob representing the minimal representation of a Go binary needed for govulncheck vulnerability detection. binary mode accepts both a Go binary and this representation as an input. The contents of extract should be regarded as a blob. The users of this flag should not rely on its representation. It might change in the future. Change-Id: I81027062d34609fed7541ad2092d4cbe5df0d118 Reviewed-on: https://go-review.googlesource.com/c/vuln/+/542035 Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Ian Cottrell <iancottrell@google.com> Reviewed-by: Maceo Thompson <maceothompson@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2023-11-07 15:06:22 -08:00 · 2023-11-07 15:06:22 -08:00 · 8fb35e0f9d
--- a/cmd/govulncheck/doc.go
+++ b/cmd/govulncheck/doc.go
@ -52,6 +52,11 @@ with the -mode=binary flag:
 Govulncheck uses the binary's symbol information to find mentions of vulnerable
 functions. Its output omits call stacks, which require source code analysis.

+Govulncheck also supports -mode=extract on a Go binary for extraction of minimal
+information needed to analyze the binary. This will produce a blob, typically much
+smaller than the binary, that can also be passed to govulncheck as an argument with
+-mode=binary. The users should not rely on the contents or representation of the blob.
+
 Govulncheck exits successfully (exit code 0) if there are no vulnerabilities,
 and exits unsuccessfully if there are. It also exits successfully if the -json flag
 is provided, regardless of the number of detected vulnerabilities.
--- a/cmd/govulncheck/main_test.go
+++ b/cmd/govulncheck/main_test.go
@ -24,6 +24,7 @@ import (
 	"unsafe"

 	"github.com/google/go-cmdtest"
+	"github.com/google/go-cmp/cmp"
 	"golang.org/x/vuln/internal/govulncheck"
 	"golang.org/x/vuln/internal/test"
 	"golang.org/x/vuln/internal/web"
@ -153,7 +154,10 @@ func TestCommand(t *testing.T) {
 		varName := filepath.Base(md) + "_binary"
 		os.Setenv(varName, binary)
 	}
-	runTestSuite(t, filepath.Join(testDir, "testdata", "testfiles"), govulndbURI.String(), *update)
+	testFilesDir := filepath.Join(testDir, "testdata", "testfiles")
+	os.Setenv("testdir", testFilesDir)
+
+	runTestSuite(t, testFilesDir, govulndbURI.String(), *update)
 	if runtime.GOOS != "darwin" {
 		// Binaries are not stripped on darwin with go1.21 and earlier. See #61051.
 		runTestSuite(t, filepath.Join(testDir, "testdata", "strip"), govulndbURI.String(), *update)
@ -196,7 +200,7 @@ func runTestSuite(t *testing.T, dir string, govulndb string, update bool) {
 	}
 	ts.DisableLogging = true

-	ts.Commands["govulncheck"] = func(args []string, inputFile string) ([]byte, error) {
+	govulncheckCmd := func(args []string, inputFile string) ([]byte, error) {
 		parallelLimiter <- struct{}{}
 		defer func() { <-parallelLimiter }()

@ -250,6 +254,37 @@ func runTestSuite(t *testing.T, dir string, govulndb string, update bool) {
 		}
 		return out, err
 	}
+	ts.Commands["govulncheck"] = govulncheckCmd
+
+	// govulncheck-cmp is like govulncheck except that the last argument is a file
+	// whose contents are compared to the output of govulncheck. This command does
+	// not output anything.
+	ts.Commands["govulncheck-cmp"] = func(args []string, inputFile string) ([]byte, error) {
+		l := len(args)
+		if l == 0 {
+			return nil, nil
+		}
+		cmpArg := args[l-1]
+		gArgs := args[:l-1]
+
+		out, err := govulncheckCmd(gArgs, inputFile)
+		if err != nil {
+			return nil, &cmdtest.ExitCodeErr{Msg: err.Error(), Code: 1}
+		}
+		got := string(out)
+
+		file, err := os.ReadFile(cmpArg)
+		if err != nil {
+			return nil, &cmdtest.ExitCodeErr{Msg: err.Error(), Code: 1}
+		}
+		want := string(file)
+
+		if diff := cmp.Diff(want, got); diff != "" {
+			return nil, &cmdtest.ExitCodeErr{Msg: "govulncheck output not matching the file contents:\n" + diff, Code: 1}
+		}
+		return nil, nil
+	}
+
 	if update {
 		ts.Run(t, true)
 		return
--- a/cmd/govulncheck/testdata/modules/vuln/vuln_dont_run_me
+++ b/cmd/govulncheck/testdata/modules/vuln/vuln_dont_run_me
--- a/cmd/govulncheck/testdata/testfiles/extract/binary_extract.ct
+++ b/cmd/govulncheck/testdata/testfiles/extract/binary_extract.ct
@ -0,0 +1,46 @@
+#####
+# Test binary mode using the extracted binary blob.
+$ govulncheck -mode=binary ${testdir}/extract/vuln.blob --> FAIL 3
+Scanning your binary for known vulnerabilities...
+
+Vulnerability #1: GO-2021-0265
+    A maliciously crafted path can cause Get and other query functions to
+    consume excessive amounts of CPU and time.
+  More info: https://pkg.go.dev/vuln/GO-2021-0265
+  Module: github.com/tidwall/gjson
+    Found in: github.com/tidwall/gjson@v1.6.5
+    Fixed in: github.com/tidwall/gjson@v1.9.3
+    Example traces found:
+      #1: gjson.Get
+      #2: gjson.Result.Get
+
+Vulnerability #2: GO-2021-0113
+    Due to improper index calculation, an incorrectly formatted language tag can
+    cause Parse to panic via an out of bounds read. If Parse is used to process
+    untrusted user inputs, this may be used as a vector for a denial of service
+    attack.
+  More info: https://pkg.go.dev/vuln/GO-2021-0113
+  Module: golang.org/x/text
+    Found in: golang.org/x/text@v0.3.0
+    Fixed in: golang.org/x/text@v0.3.7
+    Example traces found:
+      #1: language.Parse
+
+Vulnerability #3: GO-2021-0054
+    Due to improper bounds checking, maliciously crafted JSON objects can cause
+    an out-of-bounds panic. If parsing user input, this may be used as a denial
+    of service vector.
+  More info: https://pkg.go.dev/vuln/GO-2021-0054
+  Module: github.com/tidwall/gjson
+    Found in: github.com/tidwall/gjson@v1.6.5
+    Fixed in: github.com/tidwall/gjson@v1.6.6
+    Example traces found:
+      #1: gjson.Result.ForEach
+
+Your code is affected by 3 vulnerabilities from 2 modules.
+
+Share feedback at https://go.dev/s/govulncheck-feedback.
+
+# Test extract mode. Due to the size of the blob even for smallest programs, we
+# directly compare its output to a target vuln_blob.json file.
+$ govulncheck-cmp -mode=extract ${moddir}/vuln/vuln_dont_run_me ${testdir}/extract/vuln.blob
--- a/cmd/govulncheck/testdata/testfiles/extract/vuln.blob
+++ b/cmd/govulncheck/testdata/testfiles/extract/vuln.blob
--- a/cmd/govulncheck/testdata/testfiles/failures/after_body.blob
+++ b/cmd/govulncheck/testdata/testfiles/failures/after_body.blob
@ -0,0 +1 @@
+{"name":"govulncheck-extract","version":"0.1.0"}{"modules":[]}{"name":"govulncheck-extract","version":"0.1.0"}
--- a/cmd/govulncheck/testdata/testfiles/failures/binary_fail.ct
+++ b/cmd/govulncheck/testdata/testfiles/failures/binary_fail.ct
@ -4,9 +4,54 @@ $ govulncheck -mode=binary notafile --> FAIL 2
 "notafile" is not a file

 #####
-# Test of passing a non-binary file to -mode=binary
+# Test of passing a non-binary and non-blob file to -mode=binary
 $ govulncheck -mode=binary ${moddir}/vuln/go.mod --> FAIL 1
-govulncheck: could not parse provided binary: unrecognized file format
+govulncheck: unrecognized binary format
+
+#####
+# Test of passing a blob with invalid header id
+$ govulncheck -mode=binary ${testdir}/failures/invalid_header_name.blob --> FAIL 1
+govulncheck: unrecognized binary format
+
+#####
+# Test of passing a blob with invalid header version
+$ govulncheck -mode=binary ${testdir}/failures/invalid_header_version.blob --> FAIL 1
+govulncheck: unrecognized binary format
+
+#####
+# Test of passing a blob with no header
+$ govulncheck -mode=binary ${testdir}/failures/no_header.blob --> FAIL 1
+govulncheck: unrecognized binary format
+
+#####
+# Test of passing a blob with invalid header, i.e., no header
+$ govulncheck -mode=binary ${testdir}/failures/no_header.blob --> FAIL 1
+govulncheck: unrecognized binary format
+
+#####
+# Test of passing a blob with no body
+$ govulncheck -mode=binary ${testdir}/failures/no_body.blob --> FAIL 1
+govulncheck: unrecognized binary format
+
+#####
+# Test of passing an empty blob/file
+$ govulncheck -mode=binary ${testdir}/failures/empty.blob --> FAIL 1
+govulncheck: unrecognized binary format
+
+#####
+# Test of passing an empty blob message
+$ govulncheck -mode=binary ${testdir}/failures/empty_message.blob --> FAIL 1
+govulncheck: unrecognized binary format
+
+#####
+# Test of passing blob message with multiple headers
+$ govulncheck -mode=binary ${testdir}/failures/multi_header.blob --> FAIL 1
+govulncheck: unrecognized binary format
+
+#####
+# Test of passing blob message with something after the body
+$ govulncheck -mode=binary ${testdir}/failures/multi_header.blob --> FAIL 1
+govulncheck: unrecognized binary format

 #####
 # Test of trying to analyze multiple binaries
--- a/cmd/govulncheck/testdata/testfiles/failures/empty.blob
+++ b/cmd/govulncheck/testdata/testfiles/failures/empty.blob
--- a/cmd/govulncheck/testdata/testfiles/failures/empty_message.blob
+++ b/cmd/govulncheck/testdata/testfiles/failures/empty_message.blob
@ -0,0 +1 @@
+{}
--- a/cmd/govulncheck/testdata/testfiles/failures/extract_fail.ct
+++ b/cmd/govulncheck/testdata/testfiles/failures/extract_fail.ct
@ -0,0 +1,4 @@
+#####
+# Test extraction of an unsupported file format
+$ govulncheck -mode=extract ${moddir}/vuln/go.mod --> FAIL 1
+govulncheck: unrecognized binary format
--- a/cmd/govulncheck/testdata/testfiles/failures/invalid_header.blob
+++ b/cmd/govulncheck/testdata/testfiles/failures/invalid_header.blob
@ -0,0 +1 @@
+{"id":"invalid-name","protocol":"0.1.0"}{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}
--- a/cmd/govulncheck/testdata/testfiles/failures/invalid_header_name.blob
+++ b/cmd/govulncheck/testdata/testfiles/failures/invalid_header_name.blob
@ -0,0 +1 @@
+{"name":"invalid-name","version":"0.1.0"}{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}
--- a/cmd/govulncheck/testdata/testfiles/failures/invalid_header_version.blob
+++ b/cmd/govulncheck/testdata/testfiles/failures/invalid_header_version.blob
@ -0,0 +1 @@
+{"name":"govulncheck-extract","version":"8.8.8"}{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}
--- a/cmd/govulncheck/testdata/testfiles/failures/multi_header.blob
+++ b/cmd/govulncheck/testdata/testfiles/failures/multi_header.blob
@ -0,0 +1 @@
+{"name":"govulncheck-extract","version":"0.1.0"}{"name":"govulncheck-extract","version":"0.1.0"}{"modules":[]}
--- a/cmd/govulncheck/testdata/testfiles/failures/no_body.blob
+++ b/cmd/govulncheck/testdata/testfiles/failures/no_body.blob
@ -0,0 +1 @@
+{"name":"govulncheck-extract","version":"0.1.0"}
--- a/cmd/govulncheck/testdata/testfiles/failures/no_header.blob
+++ b/cmd/govulncheck/testdata/testfiles/failures/no_header.blob
@ -0,0 +1 @@
+{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}
--- a/internal/scan/binary.go
+++ b/internal/scan/binary.go
@ -9,7 +9,8 @@ package scan

 import (
 	"context"
-	"fmt"
+	"encoding/json"
+	"errors"
 	"io"
 	"os"
 	"runtime/debug"
@ -21,17 +22,11 @@ import (
 	"golang.org/x/vuln/internal/vulncheck"
 )

-// runBinary detects presence of vulnerable symbols in an executable.
+// runBinary detects presence of vulnerable symbols in an executable or its minimal blob representation.
 func runBinary(ctx context.Context, handler govulncheck.Handler, cfg *config, client *client.Client) (err error) {
 	defer derrors.Wrap(&err, "govulncheck")

-	exe, err := os.Open(cfg.patterns[0])
-	if err != nil {
-		return err
-	}
-	defer exe.Close()
-
-	bin, err := createBin(exe)
+	bin, err := createBin(cfg.patterns[0])
 	if err != nil {
 		return err
 	}
@ -43,18 +38,57 @@ func runBinary(ctx context.Context, handler govulncheck.Handler, cfg *config, cl
 	return vulncheck.Binary(ctx, handler, bin, &cfg.Config, client)
 }

-func createBin(exe io.ReaderAt) (*vulncheck.Bin, error) {
-	mods, packageSymbols, bi, err := buildinfo.ExtractPackagesAndSymbols(exe)
+func createBin(path string) (*vulncheck.Bin, error) {
+	f, err := os.Open(path)
 	if err != nil {
-		return nil, fmt.Errorf("could not parse provided binary: %v", err)
+		return nil, err
 	}
-	return &vulncheck.Bin{
-		Modules:    mods,
-		PkgSymbols: packageSymbols,
-		GoVersion:  bi.GoVersion,
-		GOOS:       findSetting("GOOS", bi),
-		GOARCH:     findSetting("GOARCH", bi),
-	}, nil
+	defer f.Close()
+
+	// First check if the path points to a Go binary. Otherwise, blob
+	// parsing might json decode a Go binary which takes time.
+	//
+	// TODO(#64716): use fingerprinting to make this precise, clean, and fast.
+	mods, packageSymbols, bi, err := buildinfo.ExtractPackagesAndSymbols(f)
+	if err == nil {
+		return &vulncheck.Bin{
+			Modules:    mods,
+			PkgSymbols: packageSymbols,
+			GoVersion:  bi.GoVersion,
+			GOOS:       findSetting("GOOS", bi),
+			GOARCH:     findSetting("GOARCH", bi),
+		}, nil
+	}
+
+	// Otherwise, see if the path points to a valid blob.
+	bin := parseBlob(f)
+	if bin != nil {
+		return bin, nil
+	}
+
+	return nil, errors.New("unrecognized binary format")
+}
+
+// parseBlob extracts vulncheck.Bin from a valid blob. If it
+// cannot recognize a valid blob, returns nil.
+func parseBlob(from io.Reader) *vulncheck.Bin {
+	dec := json.NewDecoder(from)
+
+	var h header
+	if err := dec.Decode(&h); err != nil {
+		return nil // no header
+	} else if h.Name != extractModeID || h.Version != extractModeVersion {
+		return nil // invalid header
+	}
+
+	var b vulncheck.Bin
+	if err := dec.Decode(&b); err != nil {
+		return nil // no body
+	}
+	if dec.More() {
+		return nil // we want just header and body, nothing else
+	}
+	return &b
 }

 // findSetting returns value of setting from bi if present.
--- a/internal/scan/extract.go
+++ b/internal/scan/extract.go
@ -0,0 +1,63 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.18
+// +build go1.18
+
+package scan
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"sort"
+
+	"golang.org/x/vuln/internal/derrors"
+	"golang.org/x/vuln/internal/vulncheck"
+)
+
+const (
+	// extractModeID is the unique name of the extract mode protocol
+	extractModeID      = "govulncheck-extract"
+	extractModeVersion = "0.1.0"
+)
+
+// header information for the blob output.
+type header struct {
+	Name    string `json:"name"`
+	Version string `json:"version"`
+}
+
+// runExtract dumps the extracted abstraction of binary at cfg.patterns to out.
+// It prints out exactly two blob messages, one with the header and one with
+// the vulncheck.Bin as the body.
+func runExtract(cfg *config, out io.Writer) (err error) {
+	defer derrors.Wrap(&err, "govulncheck")
+
+	bin, err := createBin(cfg.patterns[0])
+	if err != nil {
+		return err
+	}
+	sortBin(bin) // sort for easier testing and validation
+	header := header{
+		Name:    extractModeID,
+		Version: extractModeVersion,
+	}
+
+	enc := json.NewEncoder(out)
+
+	if err := enc.Encode(header); err != nil {
+		return fmt.Errorf("marshaling blob header: %v", err)
+	}
+	if err := enc.Encode(bin); err != nil {
+		return fmt.Errorf("marshaling blob body: %v", err)
+	}
+	return nil
+}
+
+func sortBin(bin *vulncheck.Bin) {
+	sort.SliceStable(bin.PkgSymbols, func(i, j int) bool {
+		return bin.PkgSymbols[i].Pkg+"."+bin.PkgSymbols[i].Name < bin.PkgSymbols[j].Pkg+"."+bin.PkgSymbols[j].Name
+	})
+}
--- a/internal/scan/flags.go
+++ b/internal/scan/flags.go
@ -33,6 +33,7 @@ const (
 	modeSource  = "source"
 	modeConvert = "convert" // only intended for use by gopls
 	modeQuery   = "query"   // only intended for use by gopls
+	modeExtract = "extract" // currently, only binary extraction is supported
 )

 func parseFlags(cfg *config, stderr io.Writer, args []string) error {
@ -87,6 +88,7 @@ var supportedModes = map[string]bool{
 	modeBinary:  true,
 	modeConvert: true,
 	modeQuery:   true,
+	modeExtract: true,
 }

 var supportedLevels = map[string]bool{
@ -123,6 +125,22 @@ func validateConfig(cfg *config) error {
 		if !isFile(cfg.patterns[0]) {
 			return fmt.Errorf("%q is not a file", cfg.patterns[0])
 		}
+	case modeExtract:
+		if cfg.test {
+			return fmt.Errorf("the -test flag is not supported in extract mode")
+		}
+		if len(cfg.tags) > 0 {
+			return fmt.Errorf("the -tags flag is not supported in extract mode")
+		}
+		if len(cfg.patterns) != 1 {
+			return fmt.Errorf("only 1 binary can be extracted at a time")
+		}
+		if cfg.json {
+			return fmt.Errorf("the -json flag must be off in extract mode")
+		}
+		if !isFile(cfg.patterns[0]) {
+			return fmt.Errorf("%q is not a file (source extraction is not supported)", cfg.patterns[0])
+		}
 	case modeConvert:
 		if len(cfg.patterns) != 0 {
 			return fmt.Errorf("patterns are not accepted in convert mode")
--- a/internal/scan/run.go
+++ b/internal/scan/run.go
@ -55,6 +55,8 @@ func RunGovulncheck(ctx context.Context, env []string, r io.Reader, stdout io.Wr
 		err = runSource(ctx, handler, cfg, client, dir)
 	case modeBinary:
 		err = runBinary(ctx, handler, cfg, client)
+	case modeExtract:
+		return runExtract(cfg, stdout)
 	case modeQuery:
 		err = runQuery(ctx, handler, cfg, client)
 	case modeConvert:
				`@ -0,0 +1 @@`
				`{"name":"govulncheck-extract","version":"0.1.0"}{"modules":[]}{"name":"govulncheck-extract","version":"0.1.0"}`
				`@ -0,0 +1 @@`
				`{"id":"invalid-name","protocol":"0.1.0"}{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}`
				`@ -0,0 +1 @@`
				`{"name":"invalid-name","version":"0.1.0"}{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}`
				`@ -0,0 +1 @@`
				`{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}`