
351 строка
11 KiB

package gps
import (
// A remoteRepo represents a potential remote repository resource.
// RemoteRepos are based purely on lexical analysis; successfully constructing
// one is not a guarantee that the resource it identifies actually exists or is
// accessible.
type remoteRepo struct {
Base string
RelPkg string
CloneURL *url.URL
Schemes []string
VCS []string
var (
gitSchemes = []string{"https", "ssh", "git", "http"}
bzrSchemes = []string{"https", "bzr+ssh", "bzr", "http"}
hgSchemes = []string{"https", "ssh", "http"}
svnSchemes = []string{"https", "http", "svn", "svn+ssh"}
//type remoteResult struct {
//r remoteRepo
//err error
// TODO(sdboyer) sync access to this map
//var remoteCache = make(map[string]remoteResult)
// Regexes for the different known import path flavors
var (
// This regex allowed some usernames that github currently disallows. They
// may have allowed them in the past; keeping it in case we need to revert.
//ghRegex = regexp.MustCompile(`^(?P<root>github\.com/([A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`)
ghRegex = regexp.MustCompile(`^(?P<root>github\.com/([A-Za-z0-9][-A-Za-z0-9]*[A-Za-z0-9]/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`)
gpinNewRegex = regexp.MustCompile(`^(?P<root>gopkg\.in/(?:([a-zA-Z0-9][-a-zA-Z0-9]+)/)?([a-zA-Z][-.a-zA-Z0-9]*)\.((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(-unstable)?)(?:\.git)?)((?:/[a-zA-Z0-9][-.a-zA-Z0-9]*)*)$`)
//gpinOldRegex = regexp.MustCompile(`^(?P<root>gopkg\.in/(?:([a-z0-9][-a-z0-9]+)/)?((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(-unstable)?)/([a-zA-Z][-a-zA-Z0-9]*)(?:\.git)?)((?:/[a-zA-Z][-a-zA-Z0-9]*)*)$`)
bbRegex = regexp.MustCompile(`^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`)
//lpRegex = regexp.MustCompile(`^(?P<root>launchpad\.net/([A-Za-z0-9-._]+)(/[A-Za-z0-9-._]+)?)(/.+)?`)
lpRegex = regexp.MustCompile(`^(?P<root>launchpad\.net/([A-Za-z0-9-._]+))((?:/[A-Za-z0-9_.\-]+)*)?`)
//glpRegex = regexp.MustCompile(`^(?P<root>git\.launchpad\.net/([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+)$`)
glpRegex = regexp.MustCompile(`^(?P<root>git\.launchpad\.net/([A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`)
//gcRegex = regexp.MustCompile(`^(?P<root>code\.google\.com/[pr]/(?P<project>[a-z0-9\-]+)(\.(?P<subrepo>[a-z0-9\-]+))?)(/[A-Za-z0-9_.\-]+)*$`)
jazzRegex = regexp.MustCompile(`^(?P<root>hub\.jazz\.net/(git/[a-z0-9]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`)
apacheRegex = regexp.MustCompile(`^(?P<root>git\.apache\.org/([a-z0-9_.\-]+\.git))((?:/[A-Za-z0-9_.\-]+)*)$`)
genericRegex = regexp.MustCompile(`^(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?/[A-Za-z0-9_.\-/~]*?)\.(?P<vcs>bzr|git|hg|svn))((?:/[A-Za-z0-9_.\-]+)*)$`)
// Other helper regexes
var (
scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
pathvld = regexp.MustCompile(`^([A-Za-z0-9-]+)(\.[A-Za-z0-9-]+)+(/[A-Za-z0-9-_.~]+)*$`)
// deduceRemoteRepo takes a potential import path and returns a RemoteRepo
// representing the remote location of the source of an import path. Remote
// repositories can be bare import paths, or urls including a checkout scheme.
func deduceRemoteRepo(path string) (rr *remoteRepo, err error) {
rr = &remoteRepo{}
if m := scpSyntaxRe.FindStringSubmatch(path); m != nil {
// Match SCP-like syntax and convert it to a URL.
// Eg, "" becomes
// "ssh://".
rr.CloneURL = &url.URL{
Scheme: "ssh",
User: url.User(m[1]),
Host: m[2],
Path: "/" + m[3],
// TODO(sdboyer) This is what stdlib sets; grok why better
//RawPath: m[3],
} else {
rr.CloneURL, err = url.Parse(path)
if err != nil {
return nil, fmt.Errorf("%q is not a valid import path", path)
if rr.CloneURL.Host != "" {
path = rr.CloneURL.Host + "/" + strings.TrimPrefix(rr.CloneURL.Path, "/")
} else {
path = rr.CloneURL.Path
if !pathvld.MatchString(path) {
return nil, fmt.Errorf("%q is not a valid import path", path)
if rr.CloneURL.Scheme != "" {
rr.Schemes = []string{rr.CloneURL.Scheme}
// TODO(sdboyer) instead of a switch, encode base domain in radix tree and pick
// detector from there; if failure, then fall back on metadata work
switch {
case ghRegex.MatchString(path):
v := ghRegex.FindStringSubmatch(path)
rr.CloneURL.Host = ""
rr.CloneURL.Path = v[2]
rr.Base = v[1]
rr.RelPkg = strings.TrimPrefix(v[3], "/")
rr.VCS = []string{"git"}
// If no scheme was already recorded, then add the possible schemes for github
if rr.Schemes == nil {
rr.Schemes = gitSchemes
case gpinNewRegex.MatchString(path):
v := gpinNewRegex.FindStringSubmatch(path)
// Duplicate some logic from the server in order to validate
// the import path string without having to hit the server
if strings.Contains(v[4], ".") {
return nil, fmt.Errorf("%q is not a valid import path; only allows major versions (%q instead of %q)",
path, v[4][:strings.Index(v[4], ".")], v[4])
// is always backed by github
rr.CloneURL.Host = ""
// If the third position is empty, it's the shortened form that expands
// to the go-pkg github user
if v[2] == "" {
rr.CloneURL.Path = "go-pkg/" + v[3]
} else {
rr.CloneURL.Path = v[2] + "/" + v[3]
rr.Base = v[1]
rr.RelPkg = strings.TrimPrefix(v[6], "/")
rr.VCS = []string{"git"}
// If no scheme was already recorded, then add the possible schemes for github
if rr.Schemes == nil {
rr.Schemes = gitSchemes
//case gpinOldRegex.MatchString(path):
case bbRegex.MatchString(path):
v := bbRegex.FindStringSubmatch(path)
rr.CloneURL.Host = ""
rr.CloneURL.Path = v[2]
rr.Base = v[1]
rr.RelPkg = strings.TrimPrefix(v[3], "/")
rr.VCS = []string{"git", "hg"}
// FIXME(sdboyer) this ambiguity of vcs kills us on schemes, as schemes
// are inherently vcs-specific. Fixing this requires a wider refactor.
// For now, we only allow the intersection, which is just the hg schemes
if rr.Schemes == nil {
rr.Schemes = hgSchemes
//case gcRegex.MatchString(path):
//v := gcRegex.FindStringSubmatch(path)
//rr.CloneURL.Host = ""
//rr.CloneURL.Path = "p/" + v[2]
//rr.Base = v[1]
//rr.RelPkg = strings.TrimPrefix(v[5], "/")
//rr.VCS = []string{"hg", "git"}
case lpRegex.MatchString(path):
// TODO(sdboyer) lp handling is nasty - there's ambiguities which can only really
// be resolved with a metadata request. See
v := lpRegex.FindStringSubmatch(path)
rr.CloneURL.Host = ""
rr.CloneURL.Path = v[2]
rr.Base = v[1]
rr.RelPkg = strings.TrimPrefix(v[3], "/")
rr.VCS = []string{"bzr"}
if rr.Schemes == nil {
rr.Schemes = bzrSchemes
case glpRegex.MatchString(path):
// TODO(sdboyer) same ambiguity issues as with normal bzr lp
v := glpRegex.FindStringSubmatch(path)
rr.CloneURL.Host = ""
rr.CloneURL.Path = v[2]
rr.Base = v[1]
rr.RelPkg = strings.TrimPrefix(v[3], "/")
rr.VCS = []string{"git"}
if rr.Schemes == nil {
rr.Schemes = gitSchemes
case jazzRegex.MatchString(path):
v := jazzRegex.FindStringSubmatch(path)
rr.CloneURL.Host = ""
rr.CloneURL.Path = v[2]
rr.Base = v[1]
rr.RelPkg = strings.TrimPrefix(v[3], "/")
rr.VCS = []string{"git"}
if rr.Schemes == nil {
rr.Schemes = gitSchemes
case apacheRegex.MatchString(path):
v := apacheRegex.FindStringSubmatch(path)
rr.CloneURL.Host = ""
rr.CloneURL.Path = v[2]
rr.Base = v[1]
rr.RelPkg = strings.TrimPrefix(v[3], "/")
rr.VCS = []string{"git"}
if rr.Schemes == nil {
rr.Schemes = gitSchemes
// try the general syntax
case genericRegex.MatchString(path):
v := genericRegex.FindStringSubmatch(path)
switch v[5] {
case "git", "hg", "bzr":
x := strings.SplitN(v[1], "/", 2)
// TODO(sdboyer) is this actually correct for bzr?
rr.CloneURL.Host = x[0]
rr.CloneURL.Path = x[1]
rr.VCS = []string{v[5]}
rr.Base = v[1]
rr.RelPkg = strings.TrimPrefix(v[6], "/")
if rr.Schemes == nil {
if v[5] == "git" {
rr.Schemes = gitSchemes
} else if v[5] == "bzr" {
rr.Schemes = bzrSchemes
} else if v[5] == "hg" {
rr.Schemes = hgSchemes
return nil, fmt.Errorf("unknown repository type: %q", v[5])
// No luck so far. maybe it's one of them vanity imports?
importroot, vcs, reporoot, err := parseMetadata(path)
if err != nil {
return nil, fmt.Errorf("unable to deduce repository and source type for: %q", path)
// If we got something back at all, then it supercedes the actual input for
// the real URL to hit
rr.CloneURL, err = url.Parse(reporoot)
if err != nil {
return nil, fmt.Errorf("server returned bad URL when searching for vanity import: %q", reporoot)
// We have a real URL. Set the other values and return.
rr.Base = importroot
rr.RelPkg = strings.TrimPrefix(path[len(importroot):], "/")
rr.VCS = []string{vcs}
if rr.CloneURL.Scheme != "" {
rr.Schemes = []string{rr.CloneURL.Scheme}
return rr, nil
// fetchMetadata fetchs the remote metadata for path.
func fetchMetadata(path string) (rc io.ReadCloser, err error) {
defer func() {
if err != nil {
err = fmt.Errorf("unable to determine remote metadata protocol: %s", err)
// try https first
rc, err = doFetchMetadata("https", path)
if err == nil {
rc, err = doFetchMetadata("http", path)
func doFetchMetadata(scheme, path string) (io.ReadCloser, error) {
url := fmt.Sprintf("%s://%s?go-get=1", scheme, path)
switch scheme {
case "https", "http":
resp, err := http.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to access url %q", url)
return resp.Body, nil
return nil, fmt.Errorf("unknown remote protocol scheme: %q", scheme)
// parseMetadata fetches and decodes remote metadata for path.
func parseMetadata(path string) (string, string, string, error) {
rc, err := fetchMetadata(path)
if err != nil {
return "", "", "", err
defer rc.Close()
imports, err := parseMetaGoImports(rc)
if err != nil {
return "", "", "", err
match := -1
for i, im := range imports {
if !strings.HasPrefix(path, im.Prefix) {
if match != -1 {
return "", "", "", fmt.Errorf("multiple meta tags match import path %q", path)
match = i
if match == -1 {
return "", "", "", fmt.Errorf("go-import metadata not found")
return imports[match].Prefix, imports[match].VCS, imports[match].RepoRoot, nil