internal/modindex: package for indexing GOMODCACHE

This CL contains the first part of a package for maintaining an
on-disk index of the module cache. The index is stored as text.
Eventually it will consist of a header, followed by groups of lines,
one for each import path, and sorted by package name. The groups of lines
start with a header containing the package name, import path, name of the
directory, and semantic version,
followed (but not in this first CL) by lines, each of which contains
information about one exported symbol.

This CL only contains the code for computing and updating the information
about directories and import paths, and reading the index. It does not
compute anything about exported symbols, which will be in the next CL,
and hence it does not present an API for looking up information about
completion of selectors.

There is a test that among directories with the same import path it can
find the one with the largest semantic version.

Change-Id: I0883ea732cf34f6700f5495e6dfd594e8f286af9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/612355
TryBot-Bypass: Peter Weinberger <pjw@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
This commit is contained in:
Peter Weinberger 2024-09-11 08:05:40 -04:00
Родитель b577f77ea7
Коммит 54110aa199
5 изменённых файлов: 693 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,127 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package modindex
import (
"os"
"path/filepath"
"testing"
)
type id struct {
importPath string
best int // which of the dirs is the one that should have been chosen
dirs []string
}
var idtests = []id{
{ // get one right
importPath: "cloud.google.com/go/longrunning",
best: 2,
dirs: []string{
"cloud.google.com/go/longrunning@v0.3.0",
"cloud.google.com/go/longrunning@v0.4.1",
"cloud.google.com/go@v0.104.0/longrunning",
"cloud.google.com/go@v0.94.0/longrunning",
},
},
{ // make sure we can run more than one test
importPath: "cloud.google.com/go/compute/metadata",
best: 2,
dirs: []string{
"cloud.google.com/go/compute/metadata@v0.2.1",
"cloud.google.com/go/compute/metadata@v0.2.3",
"cloud.google.com/go/compute@v1.7.0/metadata",
"cloud.google.com/go@v0.94.0/compute/metadata",
},
},
{ //m test bizarre characters in directory name
importPath: "bad,guy.com/go",
best: 0,
dirs: []string{"bad,guy.com/go@v0.1.0"},
},
}
func testModCache(t *testing.T) string {
t.Helper()
dir := t.TempDir()
IndexDir = func() (string, error) { return dir, nil }
return dir
}
func TestDirsSinglePath(t *testing.T) {
for _, itest := range idtests {
t.Run(itest.importPath, func(t *testing.T) {
// create a new fake GOMODCACHE
dir := testModCache(t)
for _, d := range itest.dirs {
if err := os.MkdirAll(filepath.Join(dir, d), 0755); err != nil {
t.Fatal(err)
}
// gopathwalk wants to see .go files
err := os.WriteFile(filepath.Join(dir, d, "main.go"), []byte("package main\nfunc main() {}"), 0600)
if err != nil {
t.Fatal(err)
}
}
// build and check the index
if err := IndexModCache(dir, false); err != nil {
t.Fatal(err)
}
ix, err := ReadIndex(dir)
if err != nil {
t.Fatal(err)
}
if len(ix.Entries) != 1 {
t.Fatalf("got %d entries, wanted 1", len(ix.Entries))
}
if ix.Entries[0].ImportPath != itest.importPath {
t.Fatalf("got %s import path, wanted %s", ix.Entries[0].ImportPath, itest.importPath)
}
if ix.Entries[0].Dir != Relpath(itest.dirs[itest.best]) {
t.Fatalf("got dir %s, wanted %s", ix.Entries[0].Dir, itest.dirs[itest.best])
}
})
}
}
/* more data for tests
directories.go:169: WEIRD cloud.google.com/go/iam/admin/apiv1
map[cloud.google.com/go:1 cloud.google.com/go/iam:5]:
[cloud.google.com/go/iam@v0.12.0/admin/apiv1
cloud.google.com/go/iam@v0.13.0/admin/apiv1
cloud.google.com/go/iam@v0.3.0/admin/apiv1
cloud.google.com/go/iam@v0.7.0/admin/apiv1
cloud.google.com/go/iam@v1.0.1/admin/apiv1
cloud.google.com/go@v0.94.0/iam/admin/apiv1]
directories.go:169: WEIRD cloud.google.com/go/iam
map[cloud.google.com/go:1 cloud.google.com/go/iam:5]:
[cloud.google.com/go/iam@v0.12.0 cloud.google.com/go/iam@v0.13.0
cloud.google.com/go/iam@v0.3.0 cloud.google.com/go/iam@v0.7.0
cloud.google.com/go/iam@v1.0.1 cloud.google.com/go@v0.94.0/iam]
directories.go:169: WEIRD cloud.google.com/go/compute/apiv1
map[cloud.google.com/go:1 cloud.google.com/go/compute:4]:
[cloud.google.com/go/compute@v1.12.1/apiv1
cloud.google.com/go/compute@v1.18.0/apiv1
cloud.google.com/go/compute@v1.19.0/apiv1
cloud.google.com/go/compute@v1.7.0/apiv1
cloud.google.com/go@v0.94.0/compute/apiv1]
directories.go:169: WEIRD cloud.google.com/go/longrunning/autogen
map[cloud.google.com/go:2 cloud.google.com/go/longrunning:2]:
[cloud.google.com/go/longrunning@v0.3.0/autogen
cloud.google.com/go/longrunning@v0.4.1/autogen
cloud.google.com/go@v0.104.0/longrunning/autogen
cloud.google.com/go@v0.94.0/longrunning/autogen]
directories.go:169: WEIRD cloud.google.com/go/iam/credentials/apiv1
map[cloud.google.com/go:1 cloud.google.com/go/iam:5]:
[cloud.google.com/go/iam@v0.12.0/credentials/apiv1
cloud.google.com/go/iam@v0.13.0/credentials/apiv1
cloud.google.com/go/iam@v0.3.0/credentials/apiv1
cloud.google.com/go/iam@v0.7.0/credentials/apiv1
cloud.google.com/go/iam@v1.0.1/credentials/apiv1
cloud.google.com/go@v0.94.0/iam/credentials/apiv1]
*/

Просмотреть файл

@ -0,0 +1,137 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package modindex
import (
"fmt"
"log"
"os"
"path/filepath"
"regexp"
"slices"
"strings"
"sync"
"time"
"golang.org/x/mod/semver"
"golang.org/x/tools/internal/gopathwalk"
)
type directory struct {
path Relpath
importPath string
version string // semantic version
}
// filterDirs groups the directories by import path,
// sorting the ones with the same import path by semantic version,
// most recent first.
func byImportPath(dirs []Relpath) (map[string][]*directory, error) {
ans := make(map[string][]*directory) // key is import path
for _, d := range dirs {
ip, sv, err := DirToImportPathVersion(d)
if err != nil {
return nil, err
}
ans[ip] = append(ans[ip], &directory{
path: d,
importPath: ip,
version: sv,
})
}
for k, v := range ans {
semanticSort(v)
ans[k] = v
}
return ans, nil
}
// sort the directories by semantic version, lates first
func semanticSort(v []*directory) {
slices.SortFunc(v, func(l, r *directory) int {
if n := semver.Compare(l.version, r.version); n != 0 {
return -n // latest first
}
return strings.Compare(string(l.path), string(r.path))
})
}
// modCacheRegexp splits a relpathpath into module, module version, and package.
var modCacheRegexp = regexp.MustCompile(`(.*)@([^/\\]*)(.*)`)
// DirToImportPathVersion computes import path and semantic version
func DirToImportPathVersion(dir Relpath) (string, string, error) {
m := modCacheRegexp.FindStringSubmatch(string(dir))
// m[1] is the module path
// m[2] is the version major.minor.patch(-<pre release identifier)
// m[3] is the rest of the package path
if len(m) != 4 {
return "", "", fmt.Errorf("bad dir %s", dir)
}
if !semver.IsValid(m[2]) {
return "", "", fmt.Errorf("bad semantic version %s", m[2])
}
// ToSlash is required for Windows.
return filepath.ToSlash(m[1] + m[3]), m[2], nil
}
// a region controls what directories to look at, for
// updating the index incrementally, and for testing that.
// (for testing one builds an index as of A, incrementally
// updates it to B, and compares the result to an index build
// as of B.)
type region struct {
onlyAfter, onlyBefore time.Time
sync.Mutex
ans []Relpath
}
func findDirs(root string, onlyAfter, onlyBefore time.Time) []Relpath {
roots := []gopathwalk.Root{{Path: root, Type: gopathwalk.RootModuleCache}}
// TODO(PJW): adjust concurrency
opts := gopathwalk.Options{ModulesEnabled: true, Concurrency: 1 /* ,Logf: log.Printf*/}
betw := &region{
onlyAfter: onlyAfter,
onlyBefore: onlyBefore,
}
gopathwalk.WalkSkip(roots, betw.addDir, betw.skipDir, opts)
return betw.ans
}
func (r *region) addDir(rt gopathwalk.Root, dir string) {
// do we need to check times?
r.Lock()
defer r.Unlock()
x := filepath.ToSlash(string(toRelpath(Abspath(rt.Path), dir)))
r.ans = append(r.ans, toRelpath(Abspath(rt.Path), x))
}
func (r *region) skipDir(_ gopathwalk.Root, dir string) bool {
// The cache directory is alreday ignored in gopathwalk
if filepath.Base(dir) == "vendor" {
return true
}
if filepath.Base(dir) == "internal" {
return true
}
if strings.Contains(dir, "toolchain@") {
return true
}
// don't look inside @ directories that are too old
if strings.Contains(filepath.Base(dir), "@") {
st, err := os.Stat(dir)
if err != nil {
log.Printf("can't stat dir %s %v", dir, err)
return true
}
if st.ModTime().Before(r.onlyAfter) {
return true
}
if st.ModTime().After(r.onlyBefore) {
return true
}
}
return false
}

256
internal/modindex/index.go Normal file
Просмотреть файл

@ -0,0 +1,256 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package modindex
import (
"bufio"
"encoding/csv"
"fmt"
"hash/crc64"
"io"
"log"
"os"
"path/filepath"
"strconv"
"strings"
"time"
)
/*
The on-disk index is a text file.
The first 3 lines are header information containing CurrentVersion,
the value of GOMODCACHE, and the validity date of the index.
(This is when the code started building the index.)
Following the header are sections of lines, one section for each
import path. These sections are sorted by package name.
The first line of each section, marked by a leading :, contains
the package name, the import path, the name of the directory relative
to GOMODCACHE, and its semantic version.
The rest of each section consists of one line per exported symbol.
The lines are sorted by the symbol's name and contain the name,
an indication of its lexical type (C, T, V, F), and if it is the
name of a function, information about the signature.
The fields in the section header lines are separated by commas, and
in the unlikely event this would be confusing, the csv package is used
to write (and read) them.
In the lines containing exported names, C=const, V=var, T=type, F=func.
If it is a func, the next field is the number of returned values,
followed by pairs consisting of formal parameter names and types.
All these fields are separated by spaces. Any spaces in a type
(e.g., chan struct{}) are replaced by $s on the disk. The $s are
turned back into spaces when read.
Here is an index header (the comments are not part of the index):
0 // version (of the index format)
/usr/local/google/home/pjw/go/pkg/mod // GOMODCACHE
2024-09-11 18:55:09 // validity date of the index
Here is an index section:
:yaml,gopkg.in/yaml.v1,gopkg.in/yaml.v1@v1.0.0-20140924161607-9f9df34309c0,v1.0.0-20140924161607-9f9df34309c0
Getter T
Marshal F 2 in interface{}
Setter T
Unmarshal F 1 in []byte out interface{}
The package name is yaml, the import path is gopkg.in/yaml.v1.
Getter and Setter are types, and Marshal and Unmarshal are functions.
The latter returns one value and has two arguments, 'in' and 'out'
whose types are []byte and interface{}.
*/
// CurrentVersion tells readers about the format of the index.
const CurrentVersion int = 0
// Index is returned by ReadIndex().
type Index struct {
Version int
Cachedir Abspath // The directory containing the module cache
Changed time.Time // The index is up to date as of Changed
Entries []Entry
}
// An Entry contains information for an import path.
type Entry struct {
Dir Relpath // directory in modcache
ImportPath string
PkgName string
Version string
//ModTime STime // is this useful?
Names []string // exported names and information
}
// ReadIndex reads the latest version of the on-disk index
// for the cache directory cd.
// It returns nil if there is none, or if there is an error.
func ReadIndex(cachedir string) (*Index, error) {
cachedir, err := filepath.Abs(cachedir)
if err != nil {
return nil, err
}
cd := Abspath(cachedir)
dir, err := IndexDir()
if err != nil {
return nil, err
}
base := indexNameBase(cd)
iname := filepath.Join(dir, base)
buf, err := os.ReadFile(iname)
if err != nil {
if err == os.ErrNotExist {
return nil, nil
}
return nil, fmt.Errorf("reading %s: %s %T", iname, err, err)
}
fname := filepath.Join(dir, string(buf))
fd, err := os.Open(fname)
if err != nil {
return nil, err
}
defer fd.Close()
r := bufio.NewReader(fd)
ix, err := readIndexFrom(cd, r)
if err != nil {
return nil, err
}
return ix, nil
}
func readIndexFrom(cd Abspath, bx io.Reader) (*Index, error) {
b := bufio.NewScanner(bx)
var ans Index
// header
ok := b.Scan()
if !ok {
return nil, fmt.Errorf("unexpected scan error")
}
l := b.Text()
var err error
ans.Version, err = strconv.Atoi(l)
if err != nil {
return nil, err
}
if ans.Version != CurrentVersion {
return nil, fmt.Errorf("got version %d, expected %d", ans.Version, CurrentVersion)
}
if ok := b.Scan(); !ok {
return nil, fmt.Errorf("scanner error reading cachedir")
}
ans.Cachedir = Abspath(b.Text())
if ok := b.Scan(); !ok {
return nil, fmt.Errorf("scanner error reading index creation time")
}
// TODO(pjw): need to check that this is the expected cachedir
// so the tag should be passed in to this function
ans.Changed, err = time.Parse(time.DateTime, b.Text())
if err != nil {
return nil, err
}
var curEntry *Entry
for b.Scan() {
v := b.Text()
if v[0] == ':' {
if curEntry != nil {
ans.Entries = append(ans.Entries, *curEntry)
}
// as directories may contain commas and quotes, they need to be read as csv.
rdr := strings.NewReader(v[1:])
cs := csv.NewReader(rdr)
flds, err := cs.Read()
if err != nil {
return nil, err
}
if len(flds) != 4 {
return nil, fmt.Errorf("header contains %d fields, not 4: %q", len(v), v)
}
curEntry = &Entry{PkgName: flds[0], ImportPath: flds[1], Dir: toRelpath(cd, flds[2]), Version: flds[3]}
continue
}
curEntry.Names = append(curEntry.Names, v)
}
if curEntry != nil {
ans.Entries = append(ans.Entries, *curEntry)
}
if err := b.Err(); err != nil {
return nil, fmt.Errorf("scanner failed %v", err)
}
return &ans, nil
}
// write the index as a text file
func writeIndex(cachedir Abspath, ix *Index) error {
dir, err := IndexDir()
if err != nil {
return err
}
ipat := fmt.Sprintf("index-%d-*", CurrentVersion)
fd, err := os.CreateTemp(dir, ipat)
if err != nil {
return err // can this happen?
}
defer fd.Close()
if err := writeIndexToFile(ix, fd); err != nil {
return err
}
content := fd.Name()
content = filepath.Base(content)
base := indexNameBase(cachedir)
nm := filepath.Join(dir, base)
err = os.WriteFile(nm, []byte(content), 0666)
if err != nil {
return err
}
return nil
}
func writeIndexToFile(x *Index, fd *os.File) error {
w := bufio.NewWriter(fd)
fmt.Fprintf(w, "%d\n", x.Version)
fmt.Fprintf(w, "%s\n", x.Cachedir)
// TODO(pjw): round the time down
fmt.Fprintf(w, "%s\n", x.Changed.Format(time.DateTime))
for _, e := range x.Entries {
if e.ImportPath == "" {
continue // shouldn't happen
}
// PJW: maybe always write these headers as csv?
if strings.ContainsAny(string(e.Dir), ",\"") {
log.Printf("DIR: %s", e.Dir)
cw := csv.NewWriter(w)
cw.Write([]string{":" + e.PkgName, e.ImportPath, string(e.Dir), e.Version})
cw.Flush()
} else {
fmt.Fprintf(w, ":%s,%s,%s,%s\n", e.PkgName, e.ImportPath, e.Dir, e.Version)
}
for _, x := range e.Names {
fmt.Fprintf(w, "%s\n", x)
}
}
if err := w.Flush(); err != nil {
return err
}
return nil
}
// tests can override this
var IndexDir = indexDir
// IndexDir computes the directory containing the index
func indexDir() (string, error) {
dir, err := os.UserCacheDir()
if err != nil {
return "", fmt.Errorf("cannot open UserCacheDir, %w", err)
}
return filepath.Join(dir, "go", "imports"), nil
}
// return the base name of the file containing the name of the current index
func indexNameBase(cachedir Abspath) string {
// crc64 is a way to convert path names into 16 hex digits.
h := crc64.Checksum([]byte(cachedir), crc64.MakeTable(crc64.ECMA))
fname := fmt.Sprintf("index-name-%d-%016x", CurrentVersion, h)
return fname
}

Просмотреть файл

@ -0,0 +1,148 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package modindex contains code for building and searching an index to
// the Go module cache. The directory containing the index, returned by
// IndexDir(), contains a file index-name-<ver> that contains the name
// of the current index. We believe writing that short file is atomic.
// ReadIndex reads that file to get the file name of the index.
// WriteIndex writes an index with a unique name and then
// writes that name into a new version of index-name-<ver>.
// (<ver> stands for the CurrentVersion of the index format.)
package modindex
import (
"log"
"path/filepath"
"slices"
"strings"
"time"
"golang.org/x/mod/semver"
)
// Modindex writes an index current as of when it is called.
// If clear is true the index is constructed from all of GOMODCACHE
// otherwise the index is constructed from the last previous index
// and the updates to the cache.
func IndexModCache(cachedir string, clear bool) error {
cachedir, err := filepath.Abs(cachedir)
if err != nil {
return err
}
cd := Abspath(cachedir)
future := time.Now().Add(24 * time.Hour) // safely in the future
err = modindexTimed(future, cd, clear)
if err != nil {
return err
}
return nil
}
// modindexTimed writes an index current as of onlyBefore.
// If clear is true the index is constructed from all of GOMODCACHE
// otherwise the index is constructed from the last previous index
// and all the updates to the cache before onlyBefore.
// (this is useful for testing; perhaps it should not be exported)
func modindexTimed(onlyBefore time.Time, cachedir Abspath, clear bool) error {
var curIndex *Index
if !clear {
var err error
curIndex, err = ReadIndex(string(cachedir))
if clear && err != nil {
return err
}
// TODO(pjw): check that most of those directorie still exist
}
cfg := &work{
onlyBefore: onlyBefore,
oldIndex: curIndex,
cacheDir: cachedir,
}
if curIndex != nil {
cfg.onlyAfter = curIndex.Changed
}
if err := cfg.buildIndex(); err != nil {
return err
}
if err := cfg.writeIndex(); err != nil {
return err
}
return nil
}
type work struct {
onlyBefore time.Time // do not use directories later than this
onlyAfter time.Time // only interested in directories after this
// directories from before onlyAfter come from oldIndex
oldIndex *Index
newIndex *Index
cacheDir Abspath
}
func (w *work) buildIndex() error {
// The effective date of the new index should be at least
// slightly earlier than when the directories are scanned
// so set it now.
w.newIndex = &Index{Changed: time.Now(), Cachedir: w.cacheDir}
dirs := findDirs(string(w.cacheDir), w.onlyAfter, w.onlyBefore)
newdirs, err := byImportPath(dirs)
if err != nil {
return err
}
log.Printf("%d dirs, %d ips", len(dirs), len(newdirs))
// for each import path it might occur only in newdirs,
// only in w.oldIndex, or in both.
// If it occurs in both, use the semantically later one
if w.oldIndex != nil {
killed := 0
for _, e := range w.oldIndex.Entries {
found, ok := newdirs[e.ImportPath]
if !ok {
continue
}
if semver.Compare(found[0].version, e.Version) > 0 {
// the new one is better, disable the old one
e.ImportPath = ""
killed++
} else {
// use the old one, forget the new one
delete(newdirs, e.ImportPath)
}
}
log.Printf("%d killed, %d ips", killed, len(newdirs))
}
// Build the skeleton of the new index using newdirs,
// and include the surviving parts of the old index
if w.oldIndex != nil {
for _, e := range w.oldIndex.Entries {
if e.ImportPath != "" {
w.newIndex.Entries = append(w.newIndex.Entries, e)
}
}
}
for k, v := range newdirs {
d := v[0]
entry := Entry{
Dir: d.path,
ImportPath: k,
Version: d.version,
}
w.newIndex.Entries = append(w.newIndex.Entries, entry)
}
// find symbols for the incomplete entries
log.Print("not finding any symbols yet")
// sort the entries in the new index
slices.SortFunc(w.newIndex.Entries, func(l, r Entry) int {
if n := strings.Compare(l.PkgName, r.PkgName); n != 0 {
return n
}
return strings.Compare(l.ImportPath, r.ImportPath)
})
return nil
}
func (w *work) writeIndex() error {
return writeIndex(w.cacheDir, w.newIndex)
}

Просмотреть файл

@ -0,0 +1,25 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package modindex
import (
"strings"
)
// some special types to avoid confusions
// distinguish various types of directory names. It's easy to get confused.
type Abspath string // absolute paths
type Relpath string // paths with GOMODCACHE prefix removed
func toRelpath(cachedir Abspath, s string) Relpath {
if strings.HasPrefix(s, string(cachedir)) {
if s == string(cachedir) {
return Relpath("")
}
return Relpath(s[len(cachedir)+1:])
}
return Relpath(s)
}