diff --git a/maintner/gerrit.go b/maintner/gerrit.go index 85c52739..493fb7ec 100644 --- a/maintner/gerrit.go +++ b/maintner/gerrit.go @@ -339,7 +339,7 @@ func (gp *GerritProject) getGerritStatus(currentMeta, oldMeta *GitCommit) string if len(commit.Parents) == 0 { return "new" } - parentHash := commit.Parents[0] // meta tree has no merge commits + parentHash := commit.Parents[0].Hash // meta tree has no merge commits commit = c.gitCommit[parentHash] if commit == nil { gp.logf("getGerritStatus: did not find parent commit %s", parentHash) @@ -402,7 +402,7 @@ func (gp *GerritProject) processMutation(gm *maintpb.GerritMutation) { delete(gp.need, gc.Hash) } for _, p := range gc.Parents { - gp.markNeededCommit(p) + gp.markNeededCommit(p.Hash) } } } diff --git a/maintner/git.go b/maintner/git.go index 6d95ee63..13a3e0c8 100644 --- a/maintner/git.go +++ b/maintner/git.go @@ -54,11 +54,17 @@ func (c *Corpus) gitHashFromHex(s []byte) GitHash { return GitHash(c.strb(buf[:20])) } +// placeholderCommitter is a sentinel value for GitCommit.Committer to +// mean that the GitCommit is a placeholder. It's used for commits we +// know should exist (because they're referenced as parents) but we +// haven't yet seen in the log. +var placeholderCommitter = new(GitPerson) + // GitCommit represents a single commit in a git repository. type GitCommit struct { Hash GitHash Tree GitHash - Parents []GitHash + Parents []*GitCommit Author *GitPerson AuthorTime time.Time Committer *GitPerson @@ -67,6 +73,32 @@ type GitCommit struct { Files []*maintpb.GitDiffTreeFile } +// HasAncestor reports whether gc contains the provided ancestor +// commit in gc's history. +func (gc *GitCommit) HasAncestor(ancestor *GitCommit) bool { + return gc.hasAncestor(ancestor, make(map[*GitCommit]bool)) +} + +func (gc *GitCommit) hasAncestor(ancestor *GitCommit, checked map[*GitCommit]bool) bool { + if v, ok := checked[ancestor]; ok { + return v + } + checked[gc] = false + for _, pc := range gc.Parents { + if pc == nil { + panic("nil parent") + } + if pc.Committer == placeholderCommitter { + panic("found placeholder") + } + if pc.Hash == ancestor.Hash || pc.hasAncestor(ancestor, checked) { + checked[gc] = true + return true + } + } + return false +} + // GitPerson is a person in a git commit. type GitPerson struct { Str string // "Foo Bar " @@ -266,6 +298,9 @@ func (c *Corpus) processGitMutation(m *maintpb.GitMutation) { // c.mu is held for writing. func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*GitCommit, error) { + if c.gitCommit == nil { + c.gitCommit = map[GitHash]*GitCommit{} + } if len(commit.Sha1) != 40 { return nil, fmt.Errorf("bogus git sha1 %q", commit.Sha1) } @@ -279,7 +314,7 @@ func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*GitCommit, error) hdr, msg := catFile[:i], catFile[i+2:] gc := &GitCommit{ Hash: hash, - Parents: make([]GitHash, 0, bytes.Count(hdr, parentSpace)), + Parents: make([]*GitCommit, 0, bytes.Count(hdr, parentSpace)), Msg: c.strb(msg), } if commit.DiffTree != nil { @@ -293,7 +328,16 @@ func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*GitCommit, error) if bytes.HasPrefix(ln, parentSpace) { parents++ parentHash := c.gitHashFromHex(ln[len(parentSpace):]) - gc.Parents = append(gc.Parents, parentHash) + parent := c.gitCommit[parentHash] + if parent == nil { + // Install a placeholder to be filled in later. + parent = &GitCommit{ + Hash: parentHash, + Committer: placeholderCommitter, + } + c.gitCommit[parentHash] = parent + } + gc.Parents = append(gc.Parents, parent) c.enqueueCommitLocked(parentHash) return nil } @@ -344,10 +388,12 @@ func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*GitCommit, error) log.Printf("Unparseable commit %q: %v", hash, err) return nil, fmt.Errorf("Unparseable commit %q: %v", hash, err) } - if c.gitCommit == nil { - c.gitCommit = map[GitHash]*GitCommit{} + if ph, ok := c.gitCommit[hash]; ok { + // Update placeholder. + *ph = *gc + } else { + c.gitCommit[hash] = gc } - c.gitCommit[hash] = gc if c.gitCommitTodo != nil { delete(c.gitCommitTodo, hash) } diff --git a/maintner/godata/godata_test.go b/maintner/godata/godata_test.go index a645feab..76f6a6d0 100644 --- a/maintner/godata/godata_test.go +++ b/maintner/godata/godata_test.go @@ -6,7 +6,10 @@ package godata import ( "context" + "sync" "testing" + + "golang.org/x/build/maintner" ) func BenchmarkGet(b *testing.B) { @@ -18,3 +21,76 @@ func BenchmarkGet(b *testing.B) { } } } + +var ( + corpusMu sync.Mutex + corpusCache *maintner.Corpus +) + +func getGoData(tb testing.TB) *maintner.Corpus { + corpusMu.Lock() + defer corpusMu.Unlock() + if corpusCache != nil { + return corpusCache + } + var err error + corpusCache, err = Get(context.Background()) + if err != nil { + tb.Fatalf("getting corpus: %v", err) + } + return corpusCache +} + +func TestCorpusCheck(t *testing.T) { + c := getGoData(t) + if err := c.Check(); err != nil { + t.Fatal(err) + } +} + +func TestGitAncestor(t *testing.T) { + c := getGoData(t) + tests := []struct { + subject, ancestor string + want bool + }{ + {"3b5637ff2bd5c03479780995e7a35c48222157c1", "0bb0b61d6a85b2a1a33dcbc418089656f2754d32", true}, + {"0bb0b61d6a85b2a1a33dcbc418089656f2754d32", "3b5637ff2bd5c03479780995e7a35c48222157c1", false}, + + // Same on both sides: + {"0bb0b61d6a85b2a1a33dcbc418089656f2754d32", "0bb0b61d6a85b2a1a33dcbc418089656f2754d32", false}, + {"3b5637ff2bd5c03479780995e7a35c48222157c1", "3b5637ff2bd5c03479780995e7a35c48222157c1", false}, + } + for i, tt := range tests { + subject := c.GitCommit(tt.subject) + if subject == nil { + t.Errorf("%d. missing subject commit %q", i, tt.subject) + continue + } + anc := c.GitCommit(tt.ancestor) + if anc == nil { + t.Errorf("%d. missing ancestor commit %q", i, tt.ancestor) + continue + } + got := subject.HasAncestor(anc) + if got != tt.want { + t.Errorf("HasAncestor(%q, %q) = %v; want %v", tt.subject, tt.ancestor, got, tt.want) + } + } +} + +func BenchmarkGitAncestor(b *testing.B) { + c := getGoData(b) + subject := c.GitCommit("3b5637ff2bd5c03479780995e7a35c48222157c1") + anc := c.GitCommit("0bb0b61d6a85b2a1a33dcbc418089656f2754d32") + if subject == nil || anc == nil { + b.Fatal("missing commit(s)") + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + if !subject.HasAncestor(anc) { + b.Fatal("wrong answer") + } + } +} diff --git a/maintner/maintner.go b/maintner/maintner.go index 1935a479..cfdd6886 100644 --- a/maintner/maintner.go +++ b/maintner/maintner.go @@ -60,6 +60,15 @@ type Corpus struct { zoneCache map[string]*time.Location // "+0530" => location } +// RLock grabs the corpus's read lock. Grabbing the read lock prevents +// any concurrent writes from mutation the corpus. This is only +// necessary if the application is querying the corpus and calling its +// Update method concurrently. +func (c *Corpus) RLock() { c.mu.RLock() } + +// RUnlock unlocks the corpus's read lock. +func (c *Corpus) RUnlock() { c.mu.RUnlock() } + type polledGitCommits struct { repo *maintpb.GitRepo dir string @@ -74,6 +83,7 @@ func (c *Corpus) EnableLeaderMode(logger MutationLogger, scratchDir string) { c.dataDir = scratchDir } +// SetVerbose enables or disables verbose logging. func (c *Corpus) SetVerbose(v bool) { c.verbose = v } func (c *Corpus) getDataDir() string { @@ -99,6 +109,20 @@ func (c *Corpus) Gerrit() *Gerrit { return new(Gerrit) } +// Check verifies the internal structure of the Corpus data structures. +// It is intended for tests and debugging. +func (c *Corpus) Check() error { + for hash, gc := range c.gitCommit { + if gc.Committer == placeholderCommitter { + return fmt.Errorf("git commit for key %q was placeholder", hash) + } + if gc.Hash != hash { + return fmt.Errorf("git commit for key %q had GitCommit.Hash %q", hash, gc.Hash) + } + } + return nil +} + // GitCommit returns the provided git commit, or nil if it's unknown. func (c *Corpus) GitCommit(hash string) *GitCommit { if len(hash) != 40 {