diff --git a/discovery.go b/discovery.go new file mode 100644 index 00000000..5543bee7 --- /dev/null +++ b/discovery.go @@ -0,0 +1,83 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package vsolver + +// This code is taken from cmd/go/discovery.go; it is the logic go get itself +// uses to interpret meta imports information. + +import ( + "encoding/xml" + "fmt" + "io" + "strings" +) + +// charsetReader returns a reader for the given charset. Currently +// it only supports UTF-8 and ASCII. Otherwise, it returns a meaningful +// error which is printed by go get, so the user can find why the package +// wasn't downloaded if the encoding is not supported. Note that, in +// order to reduce potential errors, ASCII is treated as UTF-8 (i.e. characters +// greater than 0x7f are not rejected). +func charsetReader(charset string, input io.Reader) (io.Reader, error) { + switch strings.ToLower(charset) { + case "ascii": + return input, nil + default: + return nil, fmt.Errorf("can't decode XML document using charset %q", charset) + } +} + +type metaImport struct { + Prefix, VCS, RepoRoot string +} + +// parseMetaGoImports returns meta imports from the HTML in r. +// Parsing ends at the end of the section or the beginning of the . +func parseMetaGoImports(r io.Reader) (imports []metaImport, err error) { + d := xml.NewDecoder(r) + d.CharsetReader = charsetReader + d.Strict = false + var t xml.Token + for { + t, err = d.RawToken() + if err != nil { + if err == io.EOF || len(imports) > 0 { + err = nil + } + return + } + if e, ok := t.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") { + return + } + if e, ok := t.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") { + return + } + e, ok := t.(xml.StartElement) + if !ok || !strings.EqualFold(e.Name.Local, "meta") { + continue + } + if attrValue(e.Attr, "name") != "go-import" { + continue + } + if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 { + imports = append(imports, metaImport{ + Prefix: f[0], + VCS: f[1], + RepoRoot: f[2], + }) + } + } +} + +// attrValue returns the attribute value for the case-insensitive key +// `name', or the empty string if nothing is found. +func attrValue(attrs []xml.Attr, name string) string { + for _, a := range attrs { + if strings.EqualFold(a.Name.Local, name) { + return a.Value + } + } + return "" +} diff --git a/remote.go b/remote.go index 37d95e43..11647bf8 100644 --- a/remote.go +++ b/remote.go @@ -2,7 +2,10 @@ package vsolver import ( "fmt" + "io" + "net/http" "net/url" + "os" "regexp" "strings" ) @@ -218,6 +221,87 @@ func deduceRemoteRepo(path string) (rr *remoteRepo, err error) { } } - // TODO use HTTP metadata to resolve vanity imports - return nil, fmt.Errorf("unable to deduce repository and source type for: %q", path) + // No luck so far. maybe it's one of them vanity imports? + importroot, vcs, reporoot, err := parseMetadata(path) + if err != nil { + return nil, fmt.Errorf("unable to deduce repository and source type for: %q", path) + } + + // If we got something back at all, then it supercedes the actual input for + // the real URL to hit + rr.CloneURL, err = url.Parse(reporoot) + if err != nil { + return nil, fmt.Errorf("server returned bad URL when searching for vanity import: %q", reporoot) + } + + // We have a real URL. Set the other values and return. + rr.Base = importroot + rr.RelPkg = strings.TrimPrefix(path[len(importroot):], string(os.PathSeparator)) + + rr.VCS = []string{vcs} + if rr.CloneURL.Scheme != "" { + rr.Schemes = []string{rr.CloneURL.Scheme} + } + + return rr, nil +} + +// fetchMetadata fetchs the remote metadata for path. +func fetchMetadata(path string) (rc io.ReadCloser, err error) { + defer func() { + if err != nil { + err = fmt.Errorf("unable to determine remote metadata protocol: %s", err) + } + }() + + // try https first + rc, err = doFetchMetadata("https", path) + if err == nil { + return + } + + rc, err = doFetchMetadata("http", path) + return +} + +func doFetchMetadata(scheme, path string) (io.ReadCloser, error) { + url := fmt.Sprintf("%s://%s?go-get=1", scheme, path) + switch scheme { + case "https", "http": + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("failed to access url %q", url) + } + return resp.Body, nil + default: + return nil, fmt.Errorf("unknown remote protocol scheme: %q", scheme) + } +} + +// parseMetadata fetches and decodes remote metadata for path. +func parseMetadata(path string) (string, string, string, error) { + rc, err := fetchMetadata(path) + if err != nil { + return "", "", "", err + } + defer rc.Close() + + imports, err := parseMetaGoImports(rc) + if err != nil { + return "", "", "", err + } + match := -1 + for i, im := range imports { + if !strings.HasPrefix(path, im.Prefix) { + continue + } + if match != -1 { + return "", "", "", fmt.Errorf("multiple meta tags match import path %q", path) + } + match = i + } + if match == -1 { + return "", "", "", fmt.Errorf("go-import metadata not found") + } + return imports[match].Prefix, imports[match].VCS, imports[match].RepoRoot, nil } diff --git a/remote_test.go b/remote_test.go index 10537ca2..3bac9ae9 100644 --- a/remote_test.go +++ b/remote_test.go @@ -8,6 +8,10 @@ import ( ) func TestDeduceRemotes(t *testing.T) { + if testing.Short() { + t.Skip("Skipping remote deduction test in short mode") + } + fixtures := []struct { path string want *remoteRepo @@ -365,7 +369,50 @@ func TestDeduceRemotes(t *testing.T) { VCS: []string{"git"}, }, }, - // Regression - gh does allow 2-letter usernames + // Vanity imports + { + "golang.org/x/exp", + &remoteRepo{ + Base: "golang.org/x/exp", + RelPkg: "", + CloneURL: &url.URL{ + Scheme: "https", + Host: "go.googlesource.com", + Path: "/exp", + }, + Schemes: []string{"https"}, + VCS: []string{"git"}, + }, + }, + { + "golang.org/x/exp/inotify", + &remoteRepo{ + Base: "golang.org/x/exp", + RelPkg: "inotify", + CloneURL: &url.URL{ + Scheme: "https", + Host: "go.googlesource.com", + Path: "/exp", + }, + Schemes: []string{"https"}, + VCS: []string{"git"}, + }, + }, + { + "rsc.io/pdf", + &remoteRepo{ + Base: "rsc.io/pdf", + RelPkg: "", + CloneURL: &url.URL{ + Scheme: "https", + Host: "github.com", + Path: "/rsc/pdf", + }, + Schemes: []string{"https"}, + VCS: []string{"git"}, + }, + }, + // Regression - gh does allow two-letter usernames { "github.com/kr/pretty", &remoteRepo{