зеркало из https://github.com/golang/build.git
462 строки
12 KiB
Go
462 строки
12 KiB
Go
|
// Copyright 2017 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
package maintner
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"fmt"
|
||
|
"io/ioutil"
|
||
|
"log"
|
||
|
"strings"
|
||
|
"time"
|
||
|
|
||
|
"github.com/golang/protobuf/ptypes"
|
||
|
"github.com/google/go-github/github"
|
||
|
|
||
|
"golang.org/x/build/maintner/maintpb"
|
||
|
"golang.org/x/oauth2"
|
||
|
)
|
||
|
|
||
|
// githubRepo is a github org & repo, lowercase, joined by a '/',
|
||
|
// such as "golang/go".
|
||
|
type githubRepo string
|
||
|
|
||
|
// Org finds "golang" in the githubRepo string "golang/go", or returns an empty
|
||
|
// string if it is malformed.
|
||
|
func (gr githubRepo) Org() string {
|
||
|
sep := strings.IndexByte(string(gr), '/')
|
||
|
if sep == -1 {
|
||
|
return ""
|
||
|
}
|
||
|
return string(gr[:sep])
|
||
|
}
|
||
|
|
||
|
func (gr githubRepo) Repo() string {
|
||
|
sep := strings.IndexByte(string(gr), '/')
|
||
|
if sep == -1 || sep == len(gr)-1 {
|
||
|
return ""
|
||
|
}
|
||
|
return string(gr[sep+1:])
|
||
|
}
|
||
|
|
||
|
func (c *Corpus) repoKey(owner, repo string) githubRepo {
|
||
|
if owner == "" || repo == "" {
|
||
|
return ""
|
||
|
}
|
||
|
// TODO: avoid garbage, use interned strings? profile later
|
||
|
// once we have gigabytes of mutation logs to slurp at
|
||
|
// start-up. (The same thing mattered for Camlistore start-up
|
||
|
// time at least)
|
||
|
return githubRepo(owner + "/" + repo)
|
||
|
}
|
||
|
|
||
|
// githubUser represents a github user.
|
||
|
// It is a subset of https://developer.github.com/v3/users/#get-a-single-user
|
||
|
type githubUser struct {
|
||
|
ID int64
|
||
|
Login string
|
||
|
}
|
||
|
|
||
|
// githubIssue represents a github issue.
|
||
|
// See https://developer.github.com/v3/issues/#get-a-single-issue
|
||
|
type githubIssue struct {
|
||
|
ID int64
|
||
|
Number int32
|
||
|
Closed bool
|
||
|
User *githubUser
|
||
|
Assignees []*githubUser
|
||
|
Created time.Time
|
||
|
Updated time.Time
|
||
|
Title string
|
||
|
Body string
|
||
|
// TODO Comments ...
|
||
|
}
|
||
|
|
||
|
func (c *Corpus) AddGithub(owner, repo, tokenFile string) {
|
||
|
c.mu.Lock()
|
||
|
defer c.mu.Unlock()
|
||
|
c.pollGithubIssues = append(c.pollGithubIssues, polledGithubIssues{
|
||
|
name: githubRepo(owner + "/" + repo),
|
||
|
tokenFile: tokenFile,
|
||
|
})
|
||
|
}
|
||
|
|
||
|
type polledGithubIssues struct {
|
||
|
name githubRepo
|
||
|
tokenFile string
|
||
|
}
|
||
|
|
||
|
// c.mu must be held
|
||
|
func (c *Corpus) getGithubUser(pu *maintpb.GithubUser) *githubUser {
|
||
|
if pu == nil {
|
||
|
return nil
|
||
|
}
|
||
|
if u := c.githubUsers[pu.Id]; u != nil {
|
||
|
if pu.Login != "" && pu.Login != u.Login {
|
||
|
u.Login = pu.Login
|
||
|
}
|
||
|
return u
|
||
|
}
|
||
|
if c.githubUsers == nil {
|
||
|
c.githubUsers = make(map[int64]*githubUser)
|
||
|
}
|
||
|
u := &githubUser{
|
||
|
ID: pu.Id,
|
||
|
Login: pu.Login,
|
||
|
}
|
||
|
c.githubUsers[pu.Id] = u
|
||
|
return u
|
||
|
}
|
||
|
|
||
|
// newGithubUserProto creates a GithubUser with the minimum diff between
|
||
|
// existing and g. The return value is nil if there were no changes. existing
|
||
|
// may also be nil.
|
||
|
func newGithubUserProto(existing *maintpb.GithubUser, g *github.User) *maintpb.GithubUser {
|
||
|
if g == nil {
|
||
|
return nil
|
||
|
}
|
||
|
id := int64(g.GetID())
|
||
|
if existing == nil {
|
||
|
return &maintpb.GithubUser{
|
||
|
Id: id,
|
||
|
Login: g.GetLogin(),
|
||
|
}
|
||
|
}
|
||
|
hasChanges := false
|
||
|
u := &maintpb.GithubUser{Id: id}
|
||
|
if login := g.GetLogin(); existing.Login != login {
|
||
|
u.Login = login
|
||
|
hasChanges = true
|
||
|
}
|
||
|
// Add more fields here
|
||
|
if hasChanges {
|
||
|
return u
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// deletedAssignees returns an array of user ID's that are present in existing
|
||
|
// but not present in new.
|
||
|
func deletedAssignees(existing []*githubUser, new []*github.User) []int64 {
|
||
|
mp := make(map[int64]bool, len(existing))
|
||
|
for _, u := range new {
|
||
|
id := int64(u.GetID())
|
||
|
mp[id] = true
|
||
|
}
|
||
|
toDelete := []int64{}
|
||
|
for _, u := range existing {
|
||
|
if _, ok := mp[u.ID]; !ok {
|
||
|
toDelete = append(toDelete, u.ID)
|
||
|
}
|
||
|
}
|
||
|
return toDelete
|
||
|
}
|
||
|
|
||
|
// newAssignees returns an array of diffs between existing and new. New users in
|
||
|
// new will be present in the returned array in their entirety. Modified users
|
||
|
// will appear containing only the ID field and changed fields. Unmodified users
|
||
|
// will not appear in the returned array.
|
||
|
func newAssignees(existing []*githubUser, new []*github.User) []*maintpb.GithubUser {
|
||
|
mp := make(map[int64]*githubUser, len(existing))
|
||
|
for _, u := range existing {
|
||
|
mp[u.ID] = u
|
||
|
}
|
||
|
changes := []*maintpb.GithubUser{}
|
||
|
for _, u := range new {
|
||
|
if existingUser, ok := mp[int64(u.GetID())]; ok {
|
||
|
diffUser := &maintpb.GithubUser{
|
||
|
Id: int64(u.GetID()),
|
||
|
}
|
||
|
hasDiff := false
|
||
|
if login := u.GetLogin(); existingUser.Login != login {
|
||
|
diffUser.Login = login
|
||
|
hasDiff = true
|
||
|
}
|
||
|
// check more User fields for diffs here, as we add them to the proto
|
||
|
|
||
|
if hasDiff {
|
||
|
changes = append(changes, diffUser)
|
||
|
}
|
||
|
} else {
|
||
|
changes = append(changes, &maintpb.GithubUser{
|
||
|
Id: int64(u.GetID()),
|
||
|
Login: u.GetLogin(),
|
||
|
})
|
||
|
}
|
||
|
}
|
||
|
return changes
|
||
|
}
|
||
|
|
||
|
// setAssigneesFromProto returns a new array of assignees according to the
|
||
|
// instructions in new (adds or modifies users in existing ), and toDelete
|
||
|
// (deletes them). c.mu must be held.
|
||
|
func (c *Corpus) setAssigneesFromProto(existing []*githubUser, new []*maintpb.GithubUser, toDelete []int64) ([]*githubUser, bool) {
|
||
|
mp := make(map[int64]*githubUser)
|
||
|
for _, u := range existing {
|
||
|
mp[u.ID] = u
|
||
|
}
|
||
|
for _, u := range new {
|
||
|
if existingUser, ok := mp[u.Id]; ok {
|
||
|
if u.Login != "" {
|
||
|
existingUser.Login = u.Login
|
||
|
}
|
||
|
// TODO: add other fields here when we add them for user.
|
||
|
} else {
|
||
|
c.debugf("adding assignee %q", u.Login)
|
||
|
existing = append(existing, c.getGithubUser(u))
|
||
|
}
|
||
|
}
|
||
|
// IDs to delete, in descending order
|
||
|
idxsToDelete := []int{}
|
||
|
// this is quadratic but the number of assignees is very unlikely to exceed,
|
||
|
// say, 5.
|
||
|
for _, id := range toDelete {
|
||
|
for i, u := range existing {
|
||
|
if u.ID == id {
|
||
|
idxsToDelete = append([]int{i}, idxsToDelete...)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
for _, idx := range idxsToDelete {
|
||
|
c.debugf("deleting assignee %q", existing[idx].Login)
|
||
|
existing = append(existing[:idx], existing[idx+1:]...)
|
||
|
}
|
||
|
return existing, len(toDelete) > 0 || len(new) > 0
|
||
|
}
|
||
|
|
||
|
// newMutationFromIssue generates a GithubIssueMutation using the smallest
|
||
|
// possible diff between ci (a corpus Issue) and gi (an external github issue).
|
||
|
//
|
||
|
// If newMutationFromIssue returns nil, the provided github.Issue is no newer
|
||
|
// than the data we have in the corpus. ci may be nil.
|
||
|
func newMutationFromIssue(ci *githubIssue, gi *github.Issue, rp githubRepo) *maintpb.Mutation {
|
||
|
if gi == nil || gi.Number == nil {
|
||
|
panic(fmt.Sprintf("github issue with nil number: %#v", gi))
|
||
|
}
|
||
|
owner, repo := rp.Org(), rp.Repo()
|
||
|
// always need these fields to figure out which key to write to
|
||
|
m := &maintpb.GithubIssueMutation{
|
||
|
Owner: owner,
|
||
|
Repo: repo,
|
||
|
Number: int32(gi.GetNumber()),
|
||
|
}
|
||
|
if ci == nil {
|
||
|
// We don't know about this github issue, so populate all fields in one
|
||
|
// mutation.
|
||
|
if gi.CreatedAt != nil {
|
||
|
tproto, err := ptypes.TimestampProto(gi.GetCreatedAt())
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
m.Created = tproto
|
||
|
}
|
||
|
if gi.UpdatedAt != nil {
|
||
|
tproto, err := ptypes.TimestampProto(gi.GetUpdatedAt())
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
m.Updated = tproto
|
||
|
}
|
||
|
m.Body = gi.GetBody()
|
||
|
m.Title = gi.GetTitle()
|
||
|
if gi.User != nil {
|
||
|
m.User = newGithubUserProto(nil, gi.User)
|
||
|
}
|
||
|
m.Assignees = newAssignees(nil, gi.Assignees)
|
||
|
// no deleted assignees on first run
|
||
|
return &maintpb.Mutation{GithubIssue: m}
|
||
|
}
|
||
|
if gi.UpdatedAt != nil {
|
||
|
if !gi.UpdatedAt.After(ci.Updated) {
|
||
|
// This data is stale, ignore it.
|
||
|
return nil
|
||
|
}
|
||
|
tproto, err := ptypes.TimestampProto(gi.GetUpdatedAt())
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
m.Updated = tproto
|
||
|
}
|
||
|
if body := gi.GetBody(); body != ci.Body {
|
||
|
m.Body = body
|
||
|
}
|
||
|
if title := gi.GetTitle(); title != ci.Title {
|
||
|
m.Title = title
|
||
|
}
|
||
|
if gi.User != nil {
|
||
|
m.User = newGithubUserProto(m.User, gi.User)
|
||
|
}
|
||
|
m.Assignees = newAssignees(ci.Assignees, gi.Assignees)
|
||
|
m.DeletedAssignees = deletedAssignees(ci.Assignees, gi.Assignees)
|
||
|
return &maintpb.Mutation{GithubIssue: m}
|
||
|
}
|
||
|
|
||
|
// getIssue finds an issue in the Corpus or returns nil, false if it is not
|
||
|
// present.
|
||
|
func (c *Corpus) getIssue(rp githubRepo, number int32) (*githubIssue, bool) {
|
||
|
c.mu.RLock()
|
||
|
defer c.mu.RUnlock()
|
||
|
issueMap, ok := c.githubIssues[rp]
|
||
|
if !ok {
|
||
|
return nil, false
|
||
|
}
|
||
|
gi, ok := issueMap[number]
|
||
|
return gi, ok
|
||
|
}
|
||
|
|
||
|
// processGithubIssueMutation updates the corpus with the information in m, and
|
||
|
// returns true if the Corpus was modified.
|
||
|
func (c *Corpus) processGithubIssueMutation(m *maintpb.GithubIssueMutation) (changed bool) {
|
||
|
if c == nil {
|
||
|
panic("nil corpus")
|
||
|
}
|
||
|
k := c.repoKey(m.Owner, m.Repo)
|
||
|
if k == "" {
|
||
|
// TODO: errors? return false? skip for now.
|
||
|
return
|
||
|
}
|
||
|
if m.Number == 0 {
|
||
|
return
|
||
|
}
|
||
|
issueMap, ok := c.githubIssues[k]
|
||
|
if !ok {
|
||
|
if c.githubIssues == nil {
|
||
|
c.githubIssues = make(map[githubRepo]map[int32]*githubIssue)
|
||
|
}
|
||
|
issueMap = make(map[int32]*githubIssue)
|
||
|
c.githubIssues[k] = issueMap
|
||
|
}
|
||
|
gi, ok := issueMap[m.Number]
|
||
|
if !ok {
|
||
|
created, err := ptypes.Timestamp(m.Created)
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
gi = &githubIssue{
|
||
|
// User added below
|
||
|
Number: m.Number,
|
||
|
ID: m.Id,
|
||
|
Created: created,
|
||
|
Assignees: []*githubUser{},
|
||
|
}
|
||
|
issueMap[m.Number] = gi
|
||
|
changed = true
|
||
|
}
|
||
|
// Check Updated before all other fields so they don't update if this
|
||
|
// Mutation is stale
|
||
|
if m.Updated != nil {
|
||
|
updated, err := ptypes.Timestamp(m.Updated)
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
if !updated.IsZero() && updated.Before(gi.Updated) {
|
||
|
// this mutation represents data older than the data we have in
|
||
|
// the corpus; ignore it.
|
||
|
return false
|
||
|
}
|
||
|
changed = changed || updated.After(gi.Updated)
|
||
|
gi.Updated = updated
|
||
|
}
|
||
|
if m.User != nil {
|
||
|
gi.User = c.getGithubUser(m.User)
|
||
|
}
|
||
|
|
||
|
gi.Assignees, ok = c.setAssigneesFromProto(gi.Assignees, m.Assignees, m.DeletedAssignees)
|
||
|
changed = changed || ok
|
||
|
|
||
|
if m.Body != "" {
|
||
|
changed = changed || m.Body != gi.Body
|
||
|
gi.Body = m.Body
|
||
|
}
|
||
|
if m.Title != "" {
|
||
|
changed = changed || m.Title != gi.Title
|
||
|
gi.Title = m.Title
|
||
|
}
|
||
|
// ignoring Created since it *should* never update
|
||
|
return changed
|
||
|
}
|
||
|
|
||
|
// PollGithubLoop checks for new changes on a single Github repository and
|
||
|
// updates the Corpus with any changes.
|
||
|
func (c *Corpus) PollGithubLoop(ctx context.Context, rp githubRepo, tokenFile string) error {
|
||
|
slurp, err := ioutil.ReadFile(tokenFile)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
f := strings.SplitN(strings.TrimSpace(string(slurp)), ":", 2)
|
||
|
if len(f) != 2 || f[0] == "" || f[1] == "" {
|
||
|
return fmt.Errorf("Expected token file %s to be of form <username>:<token>", tokenFile)
|
||
|
}
|
||
|
ts := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: f[1]})
|
||
|
tc := oauth2.NewClient(ctx, ts)
|
||
|
ghc := github.NewClient(tc)
|
||
|
for {
|
||
|
err := c.pollGithub(ctx, rp, ghc)
|
||
|
if err == context.Canceled {
|
||
|
return err
|
||
|
}
|
||
|
log.Printf("Polled github for %s; err = %v. Sleeping.", rp, err)
|
||
|
// TODO: select and listen for context errors
|
||
|
select {
|
||
|
case <-time.After(30 * time.Second):
|
||
|
continue
|
||
|
case <-ctx.Done():
|
||
|
return ctx.Err()
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (c *Corpus) pollGithub(ctx context.Context, rp githubRepo, ghc *github.Client) error {
|
||
|
log.Printf("Polling github for %s ...", rp)
|
||
|
page := 1
|
||
|
seen := make(map[int64]bool)
|
||
|
keepGoing := true
|
||
|
owner, repo := rp.Org(), rp.Repo()
|
||
|
for keepGoing {
|
||
|
// TODO: use https://godoc.org/github.com/google/go-github/github#ActivityService.ListIssueEventsForRepository probably
|
||
|
issues, _, err := ghc.Issues.ListByRepo(ctx, owner, repo, &github.IssueListByRepoOptions{
|
||
|
State: "all",
|
||
|
Sort: "updated",
|
||
|
Direction: "desc",
|
||
|
// TODO: if an issue gets updated while we are paging, we might
|
||
|
// process the same issue twice - as item 100 on page 1 and then
|
||
|
// again as item 1 on page 2.
|
||
|
ListOptions: github.ListOptions{
|
||
|
Page: page,
|
||
|
PerPage: 100,
|
||
|
},
|
||
|
})
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
log.Printf("github %s/%s: page %d, num issues %d", owner, repo, page, len(issues))
|
||
|
if len(issues) == 0 {
|
||
|
break
|
||
|
}
|
||
|
for _, is := range issues {
|
||
|
id := int64(is.GetID())
|
||
|
if seen[id] {
|
||
|
// If an issue gets updated (and bumped to the top) while we
|
||
|
// are paging, it's possible the last issue from page N can
|
||
|
// appear as the first issue on page N+1. Don't process that
|
||
|
// issue twice.
|
||
|
// https://github.com/google/go-github/issues/566
|
||
|
continue
|
||
|
}
|
||
|
seen[id] = true
|
||
|
gi, _ := c.getIssue(rp, int32(*is.Number))
|
||
|
mp := newMutationFromIssue(gi, is, rp)
|
||
|
if mp == nil {
|
||
|
keepGoing = false
|
||
|
break
|
||
|
}
|
||
|
fmt.Printf("modifying %s, issue %d: %s\n", rp, is.GetNumber(), is.GetTitle())
|
||
|
c.processMutation(mp)
|
||
|
}
|
||
|
page++
|
||
|
}
|
||
|
return nil
|
||
|
}
|