From 54110aa1994bf24eedb96ac20ac0c99fd270c0b6 Mon Sep 17 00:00:00 2001 From: Peter Weinberger Date: Wed, 11 Sep 2024 08:05:40 -0400 Subject: [PATCH] internal/modindex: package for indexing GOMODCACHE This CL contains the first part of a package for maintaining an on-disk index of the module cache. The index is stored as text. Eventually it will consist of a header, followed by groups of lines, one for each import path, and sorted by package name. The groups of lines start with a header containing the package name, import path, name of the directory, and semantic version, followed (but not in this first CL) by lines, each of which contains information about one exported symbol. This CL only contains the code for computing and updating the information about directories and import paths, and reading the index. It does not compute anything about exported symbols, which will be in the next CL, and hence it does not present an API for looking up information about completion of selectors. There is a test that among directories with the same import path it can find the one with the largest semantic version. Change-Id: I0883ea732cf34f6700f5495e6dfd594e8f286af9 Reviewed-on: https://go-review.googlesource.com/c/tools/+/612355 TryBot-Bypass: Peter Weinberger Reviewed-by: Robert Findley --- internal/modindex/dir_test.go | 127 +++++++++++++++ internal/modindex/directories.go | 137 +++++++++++++++++ internal/modindex/index.go | 256 +++++++++++++++++++++++++++++++ internal/modindex/modindex.go | 148 ++++++++++++++++++ internal/modindex/types.go | 25 +++ 5 files changed, 693 insertions(+) create mode 100644 internal/modindex/dir_test.go create mode 100644 internal/modindex/directories.go create mode 100644 internal/modindex/index.go create mode 100644 internal/modindex/modindex.go create mode 100644 internal/modindex/types.go diff --git a/internal/modindex/dir_test.go b/internal/modindex/dir_test.go new file mode 100644 index 000000000..862d111ea --- /dev/null +++ b/internal/modindex/dir_test.go @@ -0,0 +1,127 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package modindex + +import ( + "os" + "path/filepath" + "testing" +) + +type id struct { + importPath string + best int // which of the dirs is the one that should have been chosen + dirs []string +} + +var idtests = []id{ + { // get one right + importPath: "cloud.google.com/go/longrunning", + best: 2, + dirs: []string{ + "cloud.google.com/go/longrunning@v0.3.0", + "cloud.google.com/go/longrunning@v0.4.1", + "cloud.google.com/go@v0.104.0/longrunning", + "cloud.google.com/go@v0.94.0/longrunning", + }, + }, + { // make sure we can run more than one test + importPath: "cloud.google.com/go/compute/metadata", + best: 2, + dirs: []string{ + "cloud.google.com/go/compute/metadata@v0.2.1", + "cloud.google.com/go/compute/metadata@v0.2.3", + "cloud.google.com/go/compute@v1.7.0/metadata", + "cloud.google.com/go@v0.94.0/compute/metadata", + }, + }, + { //m test bizarre characters in directory name + importPath: "bad,guy.com/go", + best: 0, + dirs: []string{"bad,guy.com/go@v0.1.0"}, + }, +} + +func testModCache(t *testing.T) string { + t.Helper() + dir := t.TempDir() + IndexDir = func() (string, error) { return dir, nil } + return dir +} + +func TestDirsSinglePath(t *testing.T) { + for _, itest := range idtests { + t.Run(itest.importPath, func(t *testing.T) { + // create a new fake GOMODCACHE + dir := testModCache(t) + for _, d := range itest.dirs { + if err := os.MkdirAll(filepath.Join(dir, d), 0755); err != nil { + t.Fatal(err) + } + // gopathwalk wants to see .go files + err := os.WriteFile(filepath.Join(dir, d, "main.go"), []byte("package main\nfunc main() {}"), 0600) + if err != nil { + t.Fatal(err) + } + } + // build and check the index + if err := IndexModCache(dir, false); err != nil { + t.Fatal(err) + } + ix, err := ReadIndex(dir) + if err != nil { + t.Fatal(err) + } + if len(ix.Entries) != 1 { + t.Fatalf("got %d entries, wanted 1", len(ix.Entries)) + } + if ix.Entries[0].ImportPath != itest.importPath { + t.Fatalf("got %s import path, wanted %s", ix.Entries[0].ImportPath, itest.importPath) + } + if ix.Entries[0].Dir != Relpath(itest.dirs[itest.best]) { + t.Fatalf("got dir %s, wanted %s", ix.Entries[0].Dir, itest.dirs[itest.best]) + } + }) + } +} + +/* more data for tests + +directories.go:169: WEIRD cloud.google.com/go/iam/admin/apiv1 +map[cloud.google.com/go:1 cloud.google.com/go/iam:5]: +[cloud.google.com/go/iam@v0.12.0/admin/apiv1 +cloud.google.com/go/iam@v0.13.0/admin/apiv1 +cloud.google.com/go/iam@v0.3.0/admin/apiv1 +cloud.google.com/go/iam@v0.7.0/admin/apiv1 +cloud.google.com/go/iam@v1.0.1/admin/apiv1 +cloud.google.com/go@v0.94.0/iam/admin/apiv1] +directories.go:169: WEIRD cloud.google.com/go/iam +map[cloud.google.com/go:1 cloud.google.com/go/iam:5]: +[cloud.google.com/go/iam@v0.12.0 cloud.google.com/go/iam@v0.13.0 +cloud.google.com/go/iam@v0.3.0 cloud.google.com/go/iam@v0.7.0 +cloud.google.com/go/iam@v1.0.1 cloud.google.com/go@v0.94.0/iam] +directories.go:169: WEIRD cloud.google.com/go/compute/apiv1 +map[cloud.google.com/go:1 cloud.google.com/go/compute:4]: +[cloud.google.com/go/compute@v1.12.1/apiv1 +cloud.google.com/go/compute@v1.18.0/apiv1 +cloud.google.com/go/compute@v1.19.0/apiv1 +cloud.google.com/go/compute@v1.7.0/apiv1 +cloud.google.com/go@v0.94.0/compute/apiv1] +directories.go:169: WEIRD cloud.google.com/go/longrunning/autogen +map[cloud.google.com/go:2 cloud.google.com/go/longrunning:2]: +[cloud.google.com/go/longrunning@v0.3.0/autogen +cloud.google.com/go/longrunning@v0.4.1/autogen +cloud.google.com/go@v0.104.0/longrunning/autogen +cloud.google.com/go@v0.94.0/longrunning/autogen] +directories.go:169: WEIRD cloud.google.com/go/iam/credentials/apiv1 +map[cloud.google.com/go:1 cloud.google.com/go/iam:5]: +[cloud.google.com/go/iam@v0.12.0/credentials/apiv1 +cloud.google.com/go/iam@v0.13.0/credentials/apiv1 +cloud.google.com/go/iam@v0.3.0/credentials/apiv1 +cloud.google.com/go/iam@v0.7.0/credentials/apiv1 +cloud.google.com/go/iam@v1.0.1/credentials/apiv1 +cloud.google.com/go@v0.94.0/iam/credentials/apiv1] + +*/ diff --git a/internal/modindex/directories.go b/internal/modindex/directories.go new file mode 100644 index 000000000..b8aab3b73 --- /dev/null +++ b/internal/modindex/directories.go @@ -0,0 +1,137 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package modindex + +import ( + "fmt" + "log" + "os" + "path/filepath" + "regexp" + "slices" + "strings" + "sync" + "time" + + "golang.org/x/mod/semver" + "golang.org/x/tools/internal/gopathwalk" +) + +type directory struct { + path Relpath + importPath string + version string // semantic version +} + +// filterDirs groups the directories by import path, +// sorting the ones with the same import path by semantic version, +// most recent first. +func byImportPath(dirs []Relpath) (map[string][]*directory, error) { + ans := make(map[string][]*directory) // key is import path + for _, d := range dirs { + ip, sv, err := DirToImportPathVersion(d) + if err != nil { + return nil, err + } + ans[ip] = append(ans[ip], &directory{ + path: d, + importPath: ip, + version: sv, + }) + } + for k, v := range ans { + semanticSort(v) + ans[k] = v + } + return ans, nil +} + +// sort the directories by semantic version, lates first +func semanticSort(v []*directory) { + slices.SortFunc(v, func(l, r *directory) int { + if n := semver.Compare(l.version, r.version); n != 0 { + return -n // latest first + } + return strings.Compare(string(l.path), string(r.path)) + }) +} + +// modCacheRegexp splits a relpathpath into module, module version, and package. +var modCacheRegexp = regexp.MustCompile(`(.*)@([^/\\]*)(.*)`) + +// DirToImportPathVersion computes import path and semantic version +func DirToImportPathVersion(dir Relpath) (string, string, error) { + m := modCacheRegexp.FindStringSubmatch(string(dir)) + // m[1] is the module path + // m[2] is the version major.minor.patch(-
 that contains the name
+// of the current index. We believe writing that short file is atomic.
+// ReadIndex reads that file to get the file name of the index.
+// WriteIndex writes an index with a unique name and then
+// writes that name into a new version of index-name-.
+// ( stands for the CurrentVersion of the index format.)
+package modindex
+
+import (
+	"log"
+	"path/filepath"
+	"slices"
+	"strings"
+	"time"
+
+	"golang.org/x/mod/semver"
+)
+
+// Modindex writes an index current as of when it is called.
+// If clear is true the index is constructed from all of GOMODCACHE
+// otherwise the index is constructed from the last previous index
+// and the updates to the cache.
+func IndexModCache(cachedir string, clear bool) error {
+	cachedir, err := filepath.Abs(cachedir)
+	if err != nil {
+		return err
+	}
+	cd := Abspath(cachedir)
+	future := time.Now().Add(24 * time.Hour) // safely in the future
+	err = modindexTimed(future, cd, clear)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// modindexTimed writes an index current as of onlyBefore.
+// If clear is true the index is constructed from all of GOMODCACHE
+// otherwise the index is constructed from the last previous index
+// and all the updates to the cache before onlyBefore.
+// (this is useful for testing; perhaps it should not be exported)
+func modindexTimed(onlyBefore time.Time, cachedir Abspath, clear bool) error {
+	var curIndex *Index
+	if !clear {
+		var err error
+		curIndex, err = ReadIndex(string(cachedir))
+		if clear && err != nil {
+			return err
+		}
+		// TODO(pjw): check that most of those directorie still exist
+	}
+	cfg := &work{
+		onlyBefore: onlyBefore,
+		oldIndex:   curIndex,
+		cacheDir:   cachedir,
+	}
+	if curIndex != nil {
+		cfg.onlyAfter = curIndex.Changed
+	}
+	if err := cfg.buildIndex(); err != nil {
+		return err
+	}
+	if err := cfg.writeIndex(); err != nil {
+		return err
+	}
+	return nil
+}
+
+type work struct {
+	onlyBefore time.Time // do not use directories later than this
+	onlyAfter  time.Time // only interested in directories after this
+	// directories from before onlyAfter come from oldIndex
+	oldIndex *Index
+	newIndex *Index
+	cacheDir Abspath
+}
+
+func (w *work) buildIndex() error {
+	// The effective date of the new index should be at least
+	// slightly earlier than when the directories are scanned
+	// so set it now.
+	w.newIndex = &Index{Changed: time.Now(), Cachedir: w.cacheDir}
+	dirs := findDirs(string(w.cacheDir), w.onlyAfter, w.onlyBefore)
+	newdirs, err := byImportPath(dirs)
+	if err != nil {
+		return err
+	}
+	log.Printf("%d dirs, %d ips", len(dirs), len(newdirs))
+	// for each import path it might occur only in newdirs,
+	// only in w.oldIndex, or in both.
+	// If it occurs in both, use the semantically later one
+	if w.oldIndex != nil {
+		killed := 0
+		for _, e := range w.oldIndex.Entries {
+			found, ok := newdirs[e.ImportPath]
+			if !ok {
+				continue
+			}
+			if semver.Compare(found[0].version, e.Version) > 0 {
+				// the new one is better, disable the old one
+				e.ImportPath = ""
+				killed++
+			} else {
+				// use the old one, forget the new one
+				delete(newdirs, e.ImportPath)
+			}
+		}
+		log.Printf("%d killed, %d ips", killed, len(newdirs))
+	}
+	// Build the skeleton of the new index using newdirs,
+	// and include the surviving parts of the old index
+	if w.oldIndex != nil {
+		for _, e := range w.oldIndex.Entries {
+			if e.ImportPath != "" {
+				w.newIndex.Entries = append(w.newIndex.Entries, e)
+			}
+		}
+	}
+	for k, v := range newdirs {
+		d := v[0]
+		entry := Entry{
+			Dir:        d.path,
+			ImportPath: k,
+			Version:    d.version,
+		}
+		w.newIndex.Entries = append(w.newIndex.Entries, entry)
+	}
+	// find symbols for the incomplete entries
+	log.Print("not finding any symbols yet")
+	// sort the entries in the new index
+	slices.SortFunc(w.newIndex.Entries, func(l, r Entry) int {
+		if n := strings.Compare(l.PkgName, r.PkgName); n != 0 {
+			return n
+		}
+		return strings.Compare(l.ImportPath, r.ImportPath)
+	})
+	return nil
+}
+
+func (w *work) writeIndex() error {
+	return writeIndex(w.cacheDir, w.newIndex)
+}
diff --git a/internal/modindex/types.go b/internal/modindex/types.go
new file mode 100644
index 000000000..ece448863
--- /dev/null
+++ b/internal/modindex/types.go
@@ -0,0 +1,25 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package modindex
+
+import (
+	"strings"
+)
+
+// some special types to avoid confusions
+
+// distinguish various types of directory names. It's easy to get confused.
+type Abspath string // absolute paths
+type Relpath string // paths with GOMODCACHE prefix removed
+
+func toRelpath(cachedir Abspath, s string) Relpath {
+	if strings.HasPrefix(s, string(cachedir)) {
+		if s == string(cachedir) {
+			return Relpath("")
+		}
+		return Relpath(s[len(cachedir)+1:])
+	}
+	return Relpath(s)
+}