internal/gcimporter: use two-level file index

This change introduces a two-level index of files, as a
precursor to an optimization in which only the line number
information for the necessary positions is recorded.
The optimization naturally requires two passes over the
data, which means we can't emit the file information
in one gulp.

Change-Id: Ia8e015c8b19cbf6074661ec345c7360a325d1054
Reviewed-on: https://go-review.googlesource.com/c/tools/+/462095
Reviewed-by: Robert Findley <rfindley@google.com>
Run-TryBot: Alan Donovan <adonovan@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Alan Donovan 2023-01-13 11:44:12 -05:00
Родитель 8aba49bb5e
Коммит d958e85480
2 изменённых файлов: 70 добавлений и 37 удалений

Просмотреть файл

@ -102,7 +102,6 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver
shallow: shallow,
allPkgs: map[*types.Package]bool{},
stringIndex: map[string]uint64{},
fileIndex: map[*token.File]uint64{},
declIndex: map[types.Object]uint64{},
tparamNames: map[types.Object]string{},
typIndex: map[types.Type]uint64{},
@ -141,6 +140,34 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver
p.doDecl(p.declTodo.popHead())
}
// Produce index of offset of each file record in files.
var files intWriter
var fileOffset []uint64 // fileOffset[i] is offset in files of file encoded as i
if p.shallow {
fileOffset = make([]uint64, len(p.fileInfos))
for i, info := range p.fileInfos {
fileOffset[i] = uint64(files.Len())
files.uint64(p.stringOff(info.file.Name()))
files.uint64(uint64(info.file.Size()))
// Delta-encode the line offsets, omitting the initial zero.
// (An empty file has an empty lines array.)
//
// TODO(adonovan): opt: use a two-pass approach that
// first gathers the set of Pos values and then
// encodes only the information necessary for them.
// This would allow us to discard the lines after the
// last object of interest and to run-length encode the
// trivial lines between lines with needed positions.
lines := getLines(info.file)
files.uint64(uint64(len(lines)))
for i := 1; i < len(lines); i++ {
files.uint64(uint64(lines[i] - lines[i-1]))
}
}
}
// Append indices to data0 section.
dataLen := uint64(p.data0.Len())
w := p.newWriter()
@ -167,7 +194,11 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver
hdr.uint64(uint64(p.version))
hdr.uint64(uint64(p.strings.Len()))
if p.shallow {
hdr.uint64(uint64(p.files.Len()))
hdr.uint64(uint64(files.Len()))
hdr.uint64(uint64(len(fileOffset)))
for _, offset := range fileOffset {
hdr.uint64(offset)
}
}
hdr.uint64(dataLen)
@ -175,7 +206,7 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver
io.Copy(out, &hdr)
io.Copy(out, &p.strings)
if p.shallow {
io.Copy(out, &p.files)
io.Copy(out, &files)
}
io.Copy(out, &p.data0)
@ -266,8 +297,9 @@ type iexporter struct {
// In shallow mode, object positions are encoded as (file, offset).
// Each file is recorded as a line-number table.
files intWriter
fileIndex map[*token.File]uint64
// Only the lines of needed positions are saved faithfully.
fileInfo map[*token.File]uint64 // value is index in fileInfos
fileInfos []*filePositions
data0 intWriter
declIndex map[types.Object]uint64
@ -277,6 +309,11 @@ type iexporter struct {
indent int // for tracing support
}
type filePositions struct {
file *token.File
needed []token.Pos // unordered list of needed positions
}
func (p *iexporter) trace(format string, args ...interface{}) {
if !trace {
// Call sites should also be guarded, but having this check here allows
@ -300,33 +337,21 @@ func (p *iexporter) stringOff(s string) uint64 {
return off
}
// fileOff returns the offset of the token.File encoding.
// If not already present, it's added to the end.
func (p *iexporter) fileOff(file *token.File) uint64 {
off, ok := p.fileIndex[file]
// fileIndex returns the index of the token.File.
func (p *iexporter) fileIndex(file *token.File, pos token.Pos) uint64 {
index, ok := p.fileInfo[file]
if !ok {
off = uint64(p.files.Len())
p.fileIndex[file] = off
p.files.uint64(p.stringOff(file.Name()))
p.files.uint64(uint64(file.Size()))
// Delta-encode the line offsets, omitting the initial zero.
// (An empty file has an empty lines array.)
//
// TODO(adonovan): opt: use a two-pass approach that
// first gathers the set of Pos values and then
// encodes only the information necessary for them.
// This would allow us to discard the lines after the
// last object of interest and to run-length encode the
// trivial lines between lines with needed positions.
lines := getLines(file)
p.files.uint64(uint64(len(lines)))
for i := 1; i < len(lines); i++ {
p.files.uint64(uint64(lines[i] - lines[i-1]))
index = uint64(len(p.fileInfo))
p.fileInfos = append(p.fileInfos, &filePositions{file: file})
if p.fileInfo == nil {
p.fileInfo = make(map[*token.File]uint64)
}
p.fileInfo[file] = index
}
return off
// Record each needed position.
info := p.fileInfos[index]
info.needed = append(info.needed, pos)
return index
}
// pushDecl adds n to the declaration work queue, if not already present.
@ -526,7 +551,7 @@ func (w *exportWriter) posV2(pos token.Pos) {
return
}
file := w.p.fset.File(pos) // fset must be non-nil
w.uint64(1 + w.p.fileOff(file))
w.uint64(1 + w.p.fileIndex(file, pos))
w.uint64(uint64(file.Offset(pos)))
}

Просмотреть файл

@ -138,9 +138,14 @@ func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data
sLen := int64(r.uint64())
var fLen int64
var fileOffset []uint64
if insert != nil {
// shallow mode uses a different position encoding
// Shallow mode uses a different position encoding.
fLen = int64(r.uint64())
fileOffset = make([]uint64, r.uint64())
for i := range fileOffset {
fileOffset[i] = r.uint64()
}
}
dLen := int64(r.uint64())
@ -157,8 +162,9 @@ func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data
stringData: stringData,
stringCache: make(map[uint64]string),
fileOffset: fileOffset,
fileData: fileData,
fileCache: make(map[uint64]*token.File),
fileCache: make([]*token.File, len(fileOffset)),
pkgCache: make(map[uint64]*types.Package),
declData: declData,
@ -288,8 +294,9 @@ type iimporter struct {
stringData []byte
stringCache map[uint64]string
fileOffset []uint64 // fileOffset[i] is offset in fileData for info about file encoded as i
fileData []byte
fileCache map[uint64]*token.File
fileCache []*token.File // memoized decoding of file encoded as i
pkgCache map[uint64]*types.Package
declData []byte
@ -362,9 +369,10 @@ func (p *iimporter) stringAt(off uint64) string {
return s
}
func (p *iimporter) fileAt(off uint64) *token.File {
file, ok := p.fileCache[off]
if !ok {
func (p *iimporter) fileAt(index uint64) *token.File {
file := p.fileCache[index]
if file == nil {
off := p.fileOffset[index]
rd := intReader{bytes.NewReader(p.fileData[off:]), p.ipath}
filename := p.stringAt(rd.uint64())
size := int(rd.uint64())
@ -380,7 +388,7 @@ func (p *iimporter) fileAt(off uint64) *token.File {
}
}
p.fileCache[off] = file
p.fileCache[index] = file
}
return file
}