internal/gcimporter: use two-level file index

This change introduces a two-level index of files, as a precursor to an optimization in which only the line number information for the necessary positions is recorded. The optimization naturally requires two passes over the data, which means we can't emit the file information in one gulp. Change-Id: Ia8e015c8b19cbf6074661ec345c7360a325d1054 Reviewed-on: https://go-review.googlesource.com/c/tools/+/462095 Reviewed-by: Robert Findley <rfindley@google.com> Run-TryBot: Alan Donovan <adonovan@google.com> gopls-CI: kokoro <noreply+kokoro@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2023-01-13 11:44:12 -05:00 · 2023-01-13 11:44:12 -05:00 · d958e85480
--- a/internal/gcimporter/iexport.go
+++ b/internal/gcimporter/iexport.go
@ -102,7 +102,6 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver
 		shallow:     shallow,
 		allPkgs:     map[*types.Package]bool{},
 		stringIndex: map[string]uint64{},
 		fileIndex:   map[*token.File]uint64{},
 		declIndex:   map[types.Object]uint64{},
 		tparamNames: map[types.Object]string{},
 		typIndex:    map[types.Type]uint64{},
@ -141,6 +140,34 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver
 		p.doDecl(p.declTodo.popHead())
 	}
 	// Produce index of offset of each file record in files.
 	var files intWriter
 	var fileOffset []uint64 // fileOffset[i] is offset in files of file encoded as i
 	if p.shallow {
 		fileOffset = make([]uint64, len(p.fileInfos))
 		for i, info := range p.fileInfos {
 			fileOffset[i] = uint64(files.Len())
 			files.uint64(p.stringOff(info.file.Name()))
 			files.uint64(uint64(info.file.Size()))
 			// Delta-encode the line offsets, omitting the initial zero.
 			// (An empty file has an empty lines array.)
 			//
 			// TODO(adonovan): opt: use a two-pass approach that
 			// first gathers the set of Pos values and then
 			// encodes only the information necessary for them.
 			// This would allow us to discard the lines after the
 			// last object of interest and to run-length encode the
 			// trivial lines between lines with needed positions.
 			lines := getLines(info.file)
 			files.uint64(uint64(len(lines)))
 			for i := 1; i < len(lines); i++ {
 				files.uint64(uint64(lines[i] - lines[i-1]))
 			}
 		}
 	}
 	// Append indices to data0 section.
 	dataLen := uint64(p.data0.Len())
 	w := p.newWriter()
@ -167,7 +194,11 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver
 	hdr.uint64(uint64(p.version))
 	hdr.uint64(uint64(p.strings.Len()))
 	if p.shallow {
-		hdr.uint64(uint64(p.files.Len()))
+		hdr.uint64(uint64(files.Len()))
 		hdr.uint64(uint64(len(fileOffset)))
 		for _, offset := range fileOffset {
 			hdr.uint64(offset)
 		}
 	}
 	hdr.uint64(dataLen)
@ -175,7 +206,7 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver
 	io.Copy(out, &hdr)
 	io.Copy(out, &p.strings)
 	if p.shallow {
-		io.Copy(out, &p.files)
+		io.Copy(out, &files)
 	}
 	io.Copy(out, &p.data0)
@ -266,8 +297,9 @@ type iexporter struct {
 	// In shallow mode, object positions are encoded as (file, offset).
 	// Each file is recorded as a line-number table.
-	files     intWriter
+	// Only the lines of needed positions are saved faithfully.
-	fileIndex map[*token.File]uint64
+	fileInfo  map[*token.File]uint64 // value is index in fileInfos
 	fileInfos []*filePositions
 	data0       intWriter
 	declIndex   map[types.Object]uint64
@ -277,6 +309,11 @@ type iexporter struct {
 	indent int // for tracing support
 }
 type filePositions struct {
 	file   *token.File
 	needed []token.Pos // unordered list of needed positions
 }
 func (p *iexporter) trace(format string, args ...interface{}) {
 	if !trace {
 		// Call sites should also be guarded, but having this check here allows
@ -300,33 +337,21 @@ func (p *iexporter) stringOff(s string) uint64 {
 	return off
 }
-// fileOff returns the offset of the token.File encoding.
+// fileIndex returns the index of the token.File.
-// If not already present, it's added to the end.
+func (p *iexporter) fileIndex(file *token.File, pos token.Pos) uint64 {
-func (p *iexporter) fileOff(file *token.File) uint64 {
+	index, ok := p.fileInfo[file]
 	off, ok := p.fileIndex[file]
 	if !ok {
-		off = uint64(p.files.Len())
+		index = uint64(len(p.fileInfo))
-		p.fileIndex[file] = off
+		p.fileInfos = append(p.fileInfos, &filePositions{file: file})
-
+		if p.fileInfo == nil {
-		p.files.uint64(p.stringOff(file.Name()))
+			p.fileInfo = make(map[*token.File]uint64)
 		p.files.uint64(uint64(file.Size()))
 		// Delta-encode the line offsets, omitting the initial zero.
 		// (An empty file has an empty lines array.)
 		//
 		// TODO(adonovan): opt: use a two-pass approach that
 		// first gathers the set of Pos values and then
 		// encodes only the information necessary for them.
 		// This would allow us to discard the lines after the
 		// last object of interest and to run-length encode the
 		// trivial lines between lines with needed positions.
 		lines := getLines(file)
 		p.files.uint64(uint64(len(lines)))
 		for i := 1; i < len(lines); i++ {
 			p.files.uint64(uint64(lines[i] - lines[i-1]))
 		}
 		p.fileInfo[file] = index
 	}
-	return off
+	// Record each needed position.
 	info := p.fileInfos[index]
 	info.needed = append(info.needed, pos)
 	return index
 }
 // pushDecl adds n to the declaration work queue, if not already present.
@ -526,7 +551,7 @@ func (w *exportWriter) posV2(pos token.Pos) {
 		return
 	}
 	file := w.p.fset.File(pos) // fset must be non-nil
-	w.uint64(1 + w.p.fileOff(file))
+	w.uint64(1 + w.p.fileIndex(file, pos))
 	w.uint64(uint64(file.Offset(pos)))
 }
--- a/internal/gcimporter/iimport.go
+++ b/internal/gcimporter/iimport.go
@ -138,9 +138,14 @@ func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data
 	sLen := int64(r.uint64())
 	var fLen int64
 	var fileOffset []uint64
 	if insert != nil {
-		// shallow mode uses a different position encoding
+		// Shallow mode uses a different position encoding.
 		fLen = int64(r.uint64())
 		fileOffset = make([]uint64, r.uint64())
 		for i := range fileOffset {
 			fileOffset[i] = r.uint64()
 		}
 	}
 	dLen := int64(r.uint64())
@ -157,8 +162,9 @@ func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data
 		stringData:  stringData,
 		stringCache: make(map[uint64]string),
 		fileOffset:  fileOffset,
 		fileData:    fileData,
-		fileCache:   make(map[uint64]*token.File),
+		fileCache:   make([]*token.File, len(fileOffset)),
 		pkgCache:    make(map[uint64]*types.Package),
 		declData: declData,
@ -288,8 +294,9 @@ type iimporter struct {
 	stringData  []byte
 	stringCache map[uint64]string
 	fileOffset  []uint64 // fileOffset[i] is offset in fileData for info about file encoded as i
 	fileData    []byte
-	fileCache   map[uint64]*token.File
+	fileCache   []*token.File // memoized decoding of file encoded as i
 	pkgCache    map[uint64]*types.Package
 	declData    []byte
@ -362,9 +369,10 @@ func (p *iimporter) stringAt(off uint64) string {
 	return s
 }
-func (p *iimporter) fileAt(off uint64) *token.File {
+func (p *iimporter) fileAt(index uint64) *token.File {
-	file, ok := p.fileCache[off]
+	file := p.fileCache[index]
-	if !ok {
+	if file == nil {
 		off := p.fileOffset[index]
 		rd := intReader{bytes.NewReader(p.fileData[off:]), p.ipath}
 		filename := p.stringAt(rd.uint64())
 		size := int(rd.uint64())
@ -380,7 +388,7 @@ func (p *iimporter) fileAt(off uint64) *token.File {
 			}
 		}
-		p.fileCache[off] = file
+		p.fileCache[index] = file
 	}
 	return file
 }