// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main // import "golang.org/x/tools/cmd/digraph" // TODO(adonovan): // - support input files other than stdin // - support alternative formats (AT&T GraphViz, CSV, etc), // a comment syntax, etc. // - allow queries to nest, like Blaze query language. import ( "bufio" "bytes" _ "embed" "errors" "flag" "fmt" "io" "os" "sort" "strconv" "strings" "unicode" "unicode/utf8" ) func usage() { // Extract the content of the /* ... */ comment in doc.go. _, after, _ := strings.Cut(doc, "/*") doc, _, _ := strings.Cut(after, "*/") io.WriteString(flag.CommandLine.Output(), doc) flag.PrintDefaults() os.Exit(2) } //go:embed doc.go var doc string func main() { flag.Usage = usage flag.Parse() args := flag.Args() if len(args) == 0 { usage() } if err := digraph(args[0], args[1:]); err != nil { fmt.Fprintf(os.Stderr, "digraph: %s\n", err) os.Exit(1) } } type nodelist []string func (l nodelist) println(sep string) { for i, node := range l { if i > 0 { fmt.Fprint(stdout, sep) } fmt.Fprint(stdout, node) } fmt.Fprintln(stdout) } type nodeset map[string]bool func (s nodeset) sort() nodelist { nodes := make(nodelist, len(s)) var i int for node := range s { nodes[i] = node i++ } sort.Strings(nodes) return nodes } func (s nodeset) addAll(x nodeset) { for node := range x { s[node] = true } } // A graph maps nodes to the non-nil set of their immediate successors. type graph map[string]nodeset func (g graph) addNode(node string) nodeset { edges := g[node] if edges == nil { edges = make(nodeset) g[node] = edges } return edges } func (g graph) addEdges(from string, to ...string) { edges := g.addNode(from) for _, to := range to { g.addNode(to) edges[to] = true } } func (g graph) nodelist() nodelist { nodes := make(nodeset) for node := range g { nodes[node] = true } return nodes.sort() } func (g graph) reachableFrom(roots nodeset) nodeset { seen := make(nodeset) var visit func(node string) visit = func(node string) { if !seen[node] { seen[node] = true for e := range g[node] { visit(e) } } } for root := range roots { visit(root) } return seen } func (g graph) transpose() graph { rev := make(graph) for node, edges := range g { rev.addNode(node) for succ := range edges { rev.addEdges(succ, node) } } return rev } func (g graph) sccs() []nodeset { // Kosaraju's algorithm---Tarjan is overkill here. // Forward pass. S := make(nodelist, 0, len(g)) // postorder stack seen := make(nodeset) var visit func(node string) visit = func(node string) { if !seen[node] { seen[node] = true for e := range g[node] { visit(e) } S = append(S, node) } } for node := range g { visit(node) } // Reverse pass. rev := g.transpose() var scc nodeset seen = make(nodeset) var rvisit func(node string) rvisit = func(node string) { if !seen[node] { seen[node] = true scc[node] = true for e := range rev[node] { rvisit(e) } } } var sccs []nodeset for len(S) > 0 { top := S[len(S)-1] S = S[:len(S)-1] // pop if !seen[top] { scc = make(nodeset) rvisit(top) if len(scc) == 1 && !g[top][top] { continue } sccs = append(sccs, scc) } } return sccs } func (g graph) allpaths(from, to string) error { // Mark all nodes to "to". seen := make(nodeset) // value of seen[x] indicates whether x is on some path to "to" var visit func(node string) bool visit = func(node string) bool { reachesTo, ok := seen[node] if !ok { reachesTo = node == to seen[node] = reachesTo for e := range g[node] { if visit(e) { reachesTo = true } } if reachesTo && node != to { seen[node] = true } } return reachesTo } visit(from) // For each marked node, collect its marked successors. var edges []string for n := range seen { for succ := range g[n] { if seen[succ] { edges = append(edges, n+" "+succ) } } } // Sort (so that this method is deterministic) and print edges. sort.Strings(edges) for _, e := range edges { fmt.Fprintln(stdout, e) } return nil } func (g graph) somepath(from, to string) error { // Search breadth-first so that we return a minimal path. // A path is a linked list whose head is a candidate "to" node // and whose tail is the path ending in the "from" node. type path struct { node string tail *path } seen := nodeset{from: true} var queue []*path queue = append(queue, &path{node: from, tail: nil}) for len(queue) > 0 { p := queue[0] queue = queue[1:] if p.node == to { // Found a path. Print, tail first. var print func(p *path) print = func(p *path) { if p.tail != nil { print(p.tail) fmt.Fprintln(stdout, p.tail.node+" "+p.node) } } print(p) return nil } for succ := range g[p.node] { if !seen[succ] { seen[succ] = true queue = append(queue, &path{node: succ, tail: p}) } } } return fmt.Errorf("no path from %q to %q", from, to) } func (g graph) toDot(w *bytes.Buffer) { fmt.Fprintln(w, "digraph {") for _, src := range g.nodelist() { for _, dst := range g[src].sort() { // Dot's quoting rules appear to align with Go's for escString, // which is the syntax of node IDs. Labels require significantly // more quoting, but that appears not to be necessary if the node ID // is implicitly used as the label. fmt.Fprintf(w, "\t%q -> %q;\n", src, dst) } } fmt.Fprintln(w, "}") } func parse(rd io.Reader) (graph, error) { g := make(graph) var linenum int // We avoid bufio.Scanner as it imposes a (configurable) limit // on line length, whereas Reader.ReadString does not. in := bufio.NewReader(rd) for { linenum++ line, err := in.ReadString('\n') eof := false if err == io.EOF { eof = true } else if err != nil { return nil, err } // Split into words, honoring double-quotes per Go spec. words, err := split(line) if err != nil { return nil, fmt.Errorf("at line %d: %v", linenum, err) } if len(words) > 0 { g.addEdges(words[0], words[1:]...) } if eof { break } } return g, nil } // Overridable for redirection. var stdin io.Reader = os.Stdin var stdout io.Writer = os.Stdout func digraph(cmd string, args []string) error { // Parse the input graph. g, err := parse(stdin) if err != nil { return err } // Parse the command line. switch cmd { case "nodes": if len(args) != 0 { return fmt.Errorf("usage: digraph nodes") } g.nodelist().println("\n") case "degree": if len(args) != 0 { return fmt.Errorf("usage: digraph degree") } nodes := make(nodeset) for node := range g { nodes[node] = true } rev := g.transpose() for _, node := range nodes.sort() { fmt.Fprintf(stdout, "%d\t%d\t%s\n", len(rev[node]), len(g[node]), node) } case "transpose": if len(args) != 0 { return fmt.Errorf("usage: digraph transpose") } var revEdges []string for node, succs := range g.transpose() { for succ := range succs { revEdges = append(revEdges, fmt.Sprintf("%s %s", node, succ)) } } sort.Strings(revEdges) // make output deterministic for _, e := range revEdges { fmt.Fprintln(stdout, e) } case "succs", "preds": if len(args) == 0 { return fmt.Errorf("usage: digraph %s ... ", cmd) } g := g if cmd == "preds" { g = g.transpose() } result := make(nodeset) for _, root := range args { edges := g[root] if edges == nil { return fmt.Errorf("no such node %q", root) } result.addAll(edges) } result.sort().println("\n") case "forward", "reverse": if len(args) == 0 { return fmt.Errorf("usage: digraph %s ... ", cmd) } roots := make(nodeset) for _, root := range args { if g[root] == nil { return fmt.Errorf("no such node %q", root) } roots[root] = true } g := g if cmd == "reverse" { g = g.transpose() } g.reachableFrom(roots).sort().println("\n") case "somepath": if len(args) != 2 { return fmt.Errorf("usage: digraph somepath ") } from, to := args[0], args[1] if g[from] == nil { return fmt.Errorf("no such 'from' node %q", from) } if g[to] == nil { return fmt.Errorf("no such 'to' node %q", to) } if err := g.somepath(from, to); err != nil { return err } case "allpaths": if len(args) != 2 { return fmt.Errorf("usage: digraph allpaths ") } from, to := args[0], args[1] if g[from] == nil { return fmt.Errorf("no such 'from' node %q", from) } if g[to] == nil { return fmt.Errorf("no such 'to' node %q", to) } if err := g.allpaths(from, to); err != nil { return err } case "sccs": if len(args) != 0 { return fmt.Errorf("usage: digraph sccs") } buf := new(bytes.Buffer) oldStdout := stdout stdout = buf for _, scc := range g.sccs() { scc.sort().println(" ") } lines := strings.SplitAfter(buf.String(), "\n") sort.Strings(lines) stdout = oldStdout io.WriteString(stdout, strings.Join(lines, "")) case "scc": if len(args) != 1 { return fmt.Errorf("usage: digraph scc ") } node := args[0] if g[node] == nil { return fmt.Errorf("no such node %q", node) } for _, scc := range g.sccs() { if scc[node] { scc.sort().println("\n") break } } case "focus": if len(args) != 1 { return fmt.Errorf("usage: digraph focus ") } node := args[0] if g[node] == nil { return fmt.Errorf("no such node %q", node) } edges := make(map[string]struct{}) for from := range g.reachableFrom(nodeset{node: true}) { for to := range g[from] { edges[fmt.Sprintf("%s %s", from, to)] = struct{}{} } } gtrans := g.transpose() for from := range gtrans.reachableFrom(nodeset{node: true}) { for to := range gtrans[from] { edges[fmt.Sprintf("%s %s", to, from)] = struct{}{} } } edgesSorted := make([]string, 0, len(edges)) for e := range edges { edgesSorted = append(edgesSorted, e) } sort.Strings(edgesSorted) fmt.Fprintln(stdout, strings.Join(edgesSorted, "\n")) case "to": if len(args) != 1 || args[0] != "dot" { return fmt.Errorf("usage: digraph to dot") } var b bytes.Buffer g.toDot(&b) stdout.Write(b.Bytes()) default: return fmt.Errorf("no such command %q", cmd) } return nil } // -- Utilities -------------------------------------------------------- // split splits a line into words, which are generally separated by // spaces, but Go-style double-quoted string literals are also supported. // (This approximates the behaviour of the Bourne shell.) // // `one "two three"` -> ["one" "two three"] // `a"\n"b` -> ["a\nb"] func split(line string) ([]string, error) { var ( words []string inWord bool current bytes.Buffer ) for len(line) > 0 { r, size := utf8.DecodeRuneInString(line) if unicode.IsSpace(r) { if inWord { words = append(words, current.String()) current.Reset() inWord = false } } else if r == '"' { var ok bool size, ok = quotedLength(line) if !ok { return nil, errors.New("invalid quotation") } s, err := strconv.Unquote(line[:size]) if err != nil { return nil, err } current.WriteString(s) inWord = true } else { current.WriteRune(r) inWord = true } line = line[size:] } if inWord { words = append(words, current.String()) } return words, nil } // quotedLength returns the length in bytes of the prefix of input that // contain a possibly-valid double-quoted Go string literal. // // On success, n is at least two (""); input[:n] may be passed to // strconv.Unquote to interpret its value, and input[n:] contains the // rest of the input. // // On failure, quotedLength returns false, and the entire input can be // passed to strconv.Unquote if an informative error message is desired. // // quotedLength does not and need not detect all errors, such as // invalid hex or octal escape sequences, since it assumes // strconv.Unquote will be applied to the prefix. It guarantees only // that if there is a prefix of input containing a valid string literal, // its length is returned. // // TODO(adonovan): move this into a strconv-like utility package. func quotedLength(input string) (n int, ok bool) { var offset int // next returns the rune at offset, or -1 on EOF. // offset advances to just after that rune. next := func() rune { if offset < len(input) { r, size := utf8.DecodeRuneInString(input[offset:]) offset += size return r } return -1 } if next() != '"' { return // error: not a quotation } for { r := next() if r == '\n' || r < 0 { return // error: string literal not terminated } if r == '"' { return offset, true // success } if r == '\\' { var skip int switch next() { case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"': skip = 0 case '0', '1', '2', '3', '4', '5', '6', '7': skip = 2 case 'x': skip = 2 case 'u': skip = 4 case 'U': skip = 8 default: return // error: invalid escape } for i := 0; i < skip; i++ { next() } } } }