speed up for getting a lot commits

This commit is contained in:
Unknwon 2015-12-13 22:57:47 -05:00
Родитель 3835dd7748
Коммит ebd9fb2253
8 изменённых файлов: 155 добавлений и 42 удалений

Просмотреть файл

@ -95,7 +95,7 @@ func (c *Command) RunInDirTimeout(timeout time.Duration, dir string) ([]byte, er
}
if stdout.Len() > 0 {
log("stdout:\n%s", stdout)
log("stdout:\n%s", stdout.Bytes()[:1024])
}
return stdout.Bytes(), nil
}

Просмотреть файл

@ -23,8 +23,8 @@ type Commit struct {
Committer *Signature
CommitMessage string
parents []sha1 // SHA1 strings
submodules map[string]*SubModule
parents []sha1 // SHA1 strings
submoduleCache *objectCache
}
// Message returns the commit message. Same as retrieving CommitMessage directly.
@ -180,17 +180,9 @@ func (c *Commit) SearchCommits(keyword string) (*list.List, error) {
return c.repo.searchCommits(c.ID, keyword)
}
func (c *Commit) GetSubModule(entryname string) (*SubModule, error) {
modules, err := c.GetSubModules()
if err != nil {
return nil, err
}
return modules[entryname], nil
}
func (c *Commit) GetSubModules() (map[string]*SubModule, error) {
if c.submodules != nil {
return c.submodules, nil
func (c *Commit) GetSubModules() (*objectCache, error) {
if c.submoduleCache != nil {
return c.submoduleCache, nil
}
entry, err := c.GetTreeEntryByPath(".gitmodules")
@ -203,7 +195,6 @@ func (c *Commit) GetSubModules() (map[string]*SubModule, error) {
}
scanner := bufio.NewScanner(rd)
c.submodules = make(map[string]*SubModule)
var ismodule bool
var path string
for scanner.Scan() {
@ -217,11 +208,24 @@ func (c *Commit) GetSubModules() (map[string]*SubModule, error) {
if k == "path" {
path = strings.TrimSpace(fields[1])
} else if k == "url" {
c.submodules[path] = &SubModule{path, strings.TrimSpace(fields[1])}
c.submoduleCache.Set(path, &SubModule{path, strings.TrimSpace(fields[1])})
ismodule = false
}
}
}
return c.submodules, nil
return c.submoduleCache, nil
}
func (c *Commit) GetSubModule(entryname string) (*SubModule, error) {
modules, err := c.GetSubModules()
if err != nil {
return nil, err
}
module, has := modules.Get(entryname)
if has {
return module.(*SubModule), nil
}
return nil, nil
}

2
git.go
Просмотреть файл

@ -10,7 +10,7 @@ import (
"time"
)
const _VERSION = "0.1.1"
const _VERSION = "0.2.0"
func Version() string {
return _VERSION

10
repo.go
Просмотреть файл

@ -18,8 +18,8 @@ import (
type Repository struct {
Path string
commitCache map[sha1]*Commit
tagCache map[sha1]*Tag
commitCache *objectCache
tagCache *objectCache
}
const _PRETTY_LOG_FORMAT = `--pretty=format:%H`
@ -64,7 +64,11 @@ func OpenRepository(repoPath string) (*Repository, error) {
return nil, errors.New("no such file or directory")
}
return &Repository{Path: repoPath}, nil
return &Repository{
Path: repoPath,
commitCache: newObjectCache(),
tagCache: newObjectCache(),
}, nil
}
type CloneRepoOptions struct {

Просмотреть файл

@ -36,6 +36,7 @@ func (repo *Repository) GetTagCommitID(name string) (string, error) {
// \n\n separate headers from message
func parseCommitData(data []byte) (*Commit, error) {
commit := new(Commit)
commit.submoduleCache = newObjectCache()
commit.parents = make([]sha1, 0, 1)
// we now have the contents of the commit object. Let's investigate...
nextline := 0
@ -86,13 +87,10 @@ l:
}
func (repo *Repository) getCommit(id sha1) (*Commit, error) {
if repo.commitCache != nil {
c, ok := repo.commitCache.Get(id.String())
if ok {
log("Hit cache: %s", id)
if c, ok := repo.commitCache[id]; ok {
return c, nil
}
} else {
repo.commitCache = make(map[sha1]*Commit, 10)
return c.(*Commit), nil
}
data, err := NewCommand("cat-file", "-p", id.String()).RunInDirBytes(repo.Path)
@ -107,7 +105,7 @@ func (repo *Repository) getCommit(id sha1) (*Commit, error) {
commit.repo = repo
commit.ID = id
repo.commitCache[id] = commit
repo.commitCache.Set(id.String(), commit)
return commit, nil
}

Просмотреть файл

@ -27,12 +27,10 @@ func (repo *Repository) CreateTag(name, revision string) error {
}
func (repo *Repository) getTag(id sha1) (*Tag, error) {
if repo.tagCache != nil {
if t, ok := repo.tagCache[id]; ok {
return t, nil
}
} else {
repo.tagCache = make(map[sha1]*Tag, 10)
t, ok := repo.tagCache.Get(id.String())
if ok {
log("Hit cache: %s", id)
return t.(*Tag), nil
}
// Get tag type
@ -50,7 +48,8 @@ func (repo *Repository) getTag(id sha1) (*Tag, error) {
Type: string(OBJECT_COMMIT),
repo: repo,
}
repo.tagCache[id] = tag
repo.tagCache.Set(id.String(), tag)
return tag, nil
}
@ -68,7 +67,7 @@ func (repo *Repository) getTag(id sha1) (*Tag, error) {
tag.ID = id
tag.repo = repo
repo.tagCache[id] = tag
repo.tagCache.Set(id.String(), tag)
return tag, nil
}

Просмотреть файл

@ -5,6 +5,9 @@
package git
import (
"fmt"
"path"
"path/filepath"
"sort"
"strconv"
"strings"
@ -84,10 +87,10 @@ var sorter = []func(t1, t2 *TreeEntry) bool{
},
}
func (bs Entries) Len() int { return len(bs) }
func (bs Entries) Swap(i, j int) { bs[i], bs[j] = bs[j], bs[i] }
func (bs Entries) Less(i, j int) bool {
t1, t2 := bs[i], bs[j]
func (tes Entries) Len() int { return len(tes) }
func (tes Entries) Swap(i, j int) { tes[i], tes[j] = tes[j], tes[i] }
func (tes Entries) Less(i, j int) bool {
t1, t2 := tes[i], tes[j]
var k int
for k = 0; k < len(sorter)-1; k++ {
sort := sorter[k]
@ -101,6 +104,83 @@ func (bs Entries) Less(i, j int) bool {
return sorter[k](t1, t2)
}
func (bs Entries) Sort() {
sort.Sort(bs)
func (tes Entries) Sort() {
sort.Sort(tes)
}
type commitInfo struct {
id string
infos []interface{}
err error
}
// GetCommitsInfo takes advantages of concurrey to speed up getting information
// of all commits that are corresponding to these entries.
// TODO: limit max goroutines at same time
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) {
if len(tes) == 0 {
return nil, nil
}
revChan := make(chan commitInfo, 10)
infoMap := make(map[string][]interface{}, len(tes))
for i := range tes {
if tes[i].Type != OBJECT_COMMIT {
go func(i int) {
cinfo := commitInfo{id: tes[i].ID.String()}
c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], c}
}
revChan <- cinfo
}(i)
continue
}
// Handle submodule
go func(i int) {
cinfo := commitInfo{id: tes[i].ID.String()}
sm, err := commit.GetSubModule(path.Join(treePath, tes[i].Name()))
if err != nil {
cinfo.err = fmt.Errorf("GetSubModule (%s/%s): %v", treePath, tes[i].Name(), err)
revChan <- cinfo
return
}
smUrl := ""
if sm != nil {
smUrl = sm.Url
}
c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], NewSubModuleFile(c, smUrl, tes[i].ID.String())}
}
revChan <- cinfo
}(i)
}
i := 0
for info := range revChan {
if info.err != nil {
return nil, info.err
}
infoMap[info.id] = info.infos
i++
if i == len(tes) {
break
}
}
commitsInfo := make([][]interface{}, len(tes))
for i := 0; i < len(tes); i++ {
commitsInfo[i] = infoMap[tes[i].ID.String()]
}
return commitsInfo, nil
}

Просмотреть файл

@ -9,8 +9,36 @@ import (
"os"
"path/filepath"
"strings"
"sync"
)
// objectCache provides thread-safe cache opeations.
type objectCache struct {
lock sync.RWMutex
cache map[string]interface{}
}
func newObjectCache() *objectCache {
return &objectCache{
cache: make(map[string]interface{}, 10),
}
}
func (oc *objectCache) Set(id string, obj interface{}) {
oc.lock.Lock()
defer oc.lock.Unlock()
oc.cache[id] = obj
}
func (oc *objectCache) Get(id string) (interface{}, bool) {
oc.lock.RLock()
defer oc.lock.RUnlock()
obj, has := oc.cache[id]
return obj, has
}
// isDir returns true if given path is a directory,
// or returns false when it's a file or does not exist.
func isDir(dir string) bool {