diff --git a/internal/godoc/format.go b/internal/godoc/format.go deleted file mode 100644 index e005dcb0..00000000 --- a/internal/godoc/format.go +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.16 -// +build go1.16 - -// This file implements FormatSelections and FormatText. -// FormatText is used to HTML-format Go and non-Go source -// text with line numbers and highlighted sections. It is -// built on top of FormatSelections, a generic formatter -// for "selected" text. - -package godoc - -import ( - "fmt" - "go/scanner" - "go/token" - "io" - "regexp" - "strconv" - "text/template" -) - -// ---------------------------------------------------------------------------- -// Implementation of FormatSelections - -// A Segment describes a text segment [start, end). -// The zero value of a Segment is a ready-to-use empty segment. -// -type Segment struct { - start, end int -} - -func (seg *Segment) isEmpty() bool { return seg.start >= seg.end } - -// A Selection is an "iterator" function returning a text segment. -// Repeated calls to a selection return consecutive, non-overlapping, -// non-empty segments, followed by an infinite sequence of empty -// segments. The first empty segment marks the end of the selection. -// -type Selection func() Segment - -// A LinkWriter writes some start or end "tag" to w for the text offset offs. -// It is called by FormatSelections at the start or end of each link segment. -// -type LinkWriter func(w io.Writer, offs int, start bool) - -// A SegmentWriter formats a text according to selections and writes it to w. -// The selections parameter is a bit set indicating which selections provided -// to FormatSelections overlap with the text segment: If the n'th bit is set -// in selections, the n'th selection provided to FormatSelections is overlapping -// with the text. -// -type SegmentWriter func(w io.Writer, text []byte, selections int) - -// FormatSelections takes a text and writes it to w using link and segment -// writers lw and sw as follows: lw is invoked for consecutive segment starts -// and ends as specified through the links selection, and sw is invoked for -// consecutive segments of text overlapped by the same selections as specified -// by selections. The link writer lw may be nil, in which case the links -// Selection is ignored. -// -func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) { - // If we have a link writer, make the links - // selection the last entry in selections - if lw != nil { - selections = append(selections, links) - } - - // compute the sequence of consecutive segment changes - changes := newMerger(selections) - - // The i'th bit in bitset indicates that the text - // at the current offset is covered by selections[i]. - bitset := 0 - lastOffs := 0 - - // Text segments are written in a delayed fashion - // such that consecutive segments belonging to the - // same selection can be combined (peephole optimization). - // last describes the last segment which has not yet been written. - var last struct { - begin, end int // valid if begin < end - bitset int - } - - // flush writes the last delayed text segment - flush := func() { - if last.begin < last.end { - sw(w, text[last.begin:last.end], last.bitset) - } - last.begin = last.end // invalidate last - } - - // segment runs the segment [lastOffs, end) with the selection - // indicated by bitset through the segment peephole optimizer. - segment := func(end int) { - if lastOffs < end { // ignore empty segments - if last.end != lastOffs || last.bitset != bitset { - // the last segment is not adjacent to or - // differs from the new one - flush() - // start a new segment - last.begin = lastOffs - } - last.end = end - last.bitset = bitset - } - } - - for { - // get the next segment change - index, offs, start := changes.next() - if index < 0 || offs > len(text) { - // no more segment changes or the next change - // is past the end of the text - we're done - break - } - // determine the kind of segment change - if lw != nil && index == len(selections)-1 { - // we have a link segment change (see start of this function): - // format the previous selection segment, write the - // link tag and start a new selection segment - segment(offs) - flush() - lastOffs = offs - lw(w, offs, start) - } else { - // we have a selection change: - // format the previous selection segment, determine - // the new selection bitset and start a new segment - segment(offs) - lastOffs = offs - mask := 1 << uint(index) - if start { - bitset |= mask - } else { - bitset &^= mask - } - } - } - segment(len(text)) - flush() -} - -// A merger merges a slice of Selections and produces a sequence of -// consecutive segment change events through repeated next() calls. -// -type merger struct { - selections []Selection - segments []Segment // segments[i] is the next segment of selections[i] -} - -const infinity int = 2e9 - -func newMerger(selections []Selection) *merger { - segments := make([]Segment, len(selections)) - for i, sel := range selections { - segments[i] = Segment{infinity, infinity} - if sel != nil { - if seg := sel(); !seg.isEmpty() { - segments[i] = seg - } - } - } - return &merger{selections, segments} -} - -// next returns the next segment change: index specifies the Selection -// to which the segment belongs, offs is the segment start or end offset -// as determined by the start value. If there are no more segment changes, -// next returns an index value < 0. -// -func (m *merger) next() (index, offs int, start bool) { - // find the next smallest offset where a segment starts or ends - offs = infinity - index = -1 - for i, seg := range m.segments { - switch { - case seg.start < offs: - offs = seg.start - index = i - start = true - case seg.end < offs: - offs = seg.end - index = i - start = false - } - } - if index < 0 { - // no offset found => all selections merged - return - } - // offset found - it's either the start or end offset but - // either way it is ok to consume the start offset: set it - // to infinity so it won't be considered in the following - // next call - m.segments[index].start = infinity - if start { - return - } - // end offset found - consume it - m.segments[index].end = infinity - // advance to the next segment for that selection - seg := m.selections[index]() - if !seg.isEmpty() { - m.segments[index] = seg - } - return -} - -// ---------------------------------------------------------------------------- -// Implementation of FormatText - -// lineSelection returns the line segments for text as a Selection. -func lineSelection(text []byte) Selection { - i, j := 0, 0 - return func() (seg Segment) { - // find next newline, if any - for j < len(text) { - j++ - if text[j-1] == '\n' { - break - } - } - if i < j { - // text[i:j] constitutes a line - seg = Segment{i, j} - i = j - } - return - } -} - -// tokenSelection returns, as a selection, the sequence of -// consecutive occurrences of token sel in the Go src text. -// -func tokenSelection(src []byte, sel token.Token) Selection { - var s scanner.Scanner - fset := token.NewFileSet() - file := fset.AddFile("", fset.Base(), len(src)) - s.Init(file, src, nil, scanner.ScanComments) - return func() (seg Segment) { - for { - pos, tok, lit := s.Scan() - if tok == token.EOF { - break - } - offs := file.Offset(pos) - if tok == sel { - seg = Segment{offs, offs + len(lit)} - break - } - } - return - } -} - -// makeSelection is a helper function to make a Selection from a slice of pairs. -// Pairs describing empty segments are ignored. -// -func makeSelection(matches [][]int) Selection { - i := 0 - return func() Segment { - for i < len(matches) { - m := matches[i] - i++ - if m[0] < m[1] { - // non-empty segment - return Segment{m[0], m[1]} - } - } - return Segment{} - } -} - -// regexpSelection computes the Selection for the regular expression expr in text. -func regexpSelection(text []byte, expr string) Selection { - var matches [][]int - if rx, err := regexp.Compile(expr); err == nil { - matches = rx.FindAllIndex(text, -1) - } - return makeSelection(matches) -} - -var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`) - -// RangeSelection computes the Selection for a text range described -// by the argument str; the range description must match the selRx -// regular expression. -func RangeSelection(str string) Selection { - m := selRx.FindStringSubmatch(str) - if len(m) >= 2 { - from, _ := strconv.Atoi(m[1]) - to, _ := strconv.Atoi(m[2]) - if from < to { - return makeSelection([][]int{{from, to}}) - } - } - return nil -} - -// Span tags for all the possible selection combinations that may -// be generated by FormatText. Selections are indicated by a bitset, -// and the value of the bitset specifies the tag to be used. -// -// bit 0: comments -// bit 1: highlights -// bit 2: selections -// -var startTags = [][]byte{ - /* 000 */ []byte(``), - /* 001 */ []byte(``), - /* 010 */ []byte(``), - /* 011 */ []byte(``), - /* 100 */ []byte(``), - /* 101 */ []byte(``), - /* 110 */ []byte(``), - /* 111 */ []byte(``), -} - -var endTag = []byte(``) - -func selectionTag(w io.Writer, text []byte, selections int) { - if selections < len(startTags) { - if tag := startTags[selections]; len(tag) > 0 { - w.Write(tag) - template.HTMLEscape(w, text) - w.Write(endTag) - return - } - } - template.HTMLEscape(w, text) -} - -// FormatText HTML-escapes text and writes it to w. -// Consecutive text segments are wrapped in HTML spans (with tags as -// defined by startTags and endTag) as follows: -// -// - if line >= 0, line number (ln) spans are inserted before each line, -// starting with the value of line -// - if the text is Go source, comments get the "comment" span class -// - each occurrence of the regular expression pattern gets the "highlight" -// span class -// - text segments covered by selection get the "selection" span class -// -// Comments, highlights, and selections may overlap arbitrarily; the respective -// HTML span classes are specified in the startTags variable. -// -func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) { - var comments, highlights Selection - if goSource { - comments = tokenSelection(text, token.COMMENT) - } - if pattern != "" { - highlights = regexpSelection(text, pattern) - } - if line >= 0 || comments != nil || highlights != nil || selection != nil { - var lineTag LinkWriter - if line >= 0 { - lineTag = func(w io.Writer, _ int, start bool) { - if start { - fmt.Fprintf(w, "%6d", line, line) - line++ - } - } - } - FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection) - } else { - template.HTMLEscape(w, text) - } -} diff --git a/internal/godoc/godoc.go b/internal/godoc/godoc.go index c9b18367..25b30632 100644 --- a/internal/godoc/godoc.go +++ b/internal/godoc/godoc.go @@ -31,6 +31,8 @@ import ( "time" "unicode" "unicode/utf8" + + "golang.org/x/website/internal/texthtml" ) // Fake relative package path for built-ins. Documentation for all globals @@ -127,150 +129,17 @@ func (p *Presentation) node_htmlFunc(info *PageInfo, node interface{}, linkify b p.writeNode(&buf1, info, info.FSet, node) var buf2 bytes.Buffer - if n, _ := node.(ast.Node); n != nil && linkify && p.DeclLinks { - LinkifyText(&buf2, buf1.Bytes(), n) - if st, name := isStructTypeDecl(n); st != nil { - addStructFieldIDAttributes(&buf2, name, st) - } - } else { - FormatText(&buf2, buf1.Bytes(), -1, true, "", nil) + var n ast.Node + if linkify && p.DeclLinks { + n, _ = node.(ast.Node) } - + buf2.Write(texthtml.Format(buf1.Bytes(), texthtml.Config{ + AST: n, + GoComments: true, + })) return buf2.String() } -// isStructTypeDecl checks whether n is a struct declaration. -// It either returns a non-nil StructType and its name, or zero values. -func isStructTypeDecl(n ast.Node) (st *ast.StructType, name string) { - gd, ok := n.(*ast.GenDecl) - if !ok || gd.Tok != token.TYPE { - return nil, "" - } - if gd.Lparen > 0 { - // Parenthesized type. Who does that, anyway? - // TODO: Reportedly gri does. Fix this to handle that too. - return nil, "" - } - if len(gd.Specs) != 1 { - return nil, "" - } - ts, ok := gd.Specs[0].(*ast.TypeSpec) - if !ok { - return nil, "" - } - st, ok = ts.Type.(*ast.StructType) - if !ok { - return nil, "" - } - return st, ts.Name.Name -} - -// addStructFieldIDAttributes modifies the contents of buf such that -// all struct fields of the named struct have -// in them, so people can link to /#Struct.Field. -func addStructFieldIDAttributes(buf *bytes.Buffer, name string, st *ast.StructType) { - if st.Fields == nil { - return - } - // needsLink is a set of identifiers that still need to be - // linked, where value == key, to avoid an allocation in func - // linkedField. - needsLink := make(map[string]string) - - for _, f := range st.Fields.List { - if len(f.Names) == 0 { - continue - } - fieldName := f.Names[0].Name - needsLink[fieldName] = fieldName - } - var newBuf bytes.Buffer - foreachLine(buf.Bytes(), func(line []byte) { - if fieldName := linkedField(line, needsLink); fieldName != "" { - fmt.Fprintf(&newBuf, ``, name, fieldName) - delete(needsLink, fieldName) - } - newBuf.Write(line) - }) - buf.Reset() - buf.Write(newBuf.Bytes()) -} - -// foreachLine calls fn for each line of in, where a line includes -// the trailing "\n", except on the last line, if it doesn't exist. -func foreachLine(in []byte, fn func(line []byte)) { - for len(in) > 0 { - nl := bytes.IndexByte(in, '\n') - if nl == -1 { - fn(in) - return - } - fn(in[:nl+1]) - in = in[nl+1:] - } -} - -// commentPrefix is the line prefix for comments after they've been HTMLified. -var commentPrefix = []byte(`// `) - -// linkedField determines whether the given line starts with an -// identifier in the provided ids map (mapping from identifier to the -// same identifier). The line can start with either an identifier or -// an identifier in a comment. If one matches, it returns the -// identifier that matched. Otherwise it returns the empty string. -func linkedField(line []byte, ids map[string]string) string { - line = bytes.TrimSpace(line) - - // For fields with a doc string of the - // conventional form, we put the new span into - // the comment instead of the field. - // The "conventional" form is a complete sentence - // per https://golang.org/s/style#comment-sentences like: - // - // // Foo is an optional Fooer to foo the foos. - // Foo Fooer - // - // In this case, we want the #StructName.Foo - // link to make the browser go to the comment - // line "Foo is an optional Fooer" instead of - // the "Foo Fooer" line, which could otherwise - // obscure the docs above the browser's "fold". - // - // TODO: do this better, so it works for all - // comments, including unconventional ones. - line = bytes.TrimPrefix(line, commentPrefix) - id := scanIdentifier(line) - if len(id) == 0 { - // No leading identifier. Avoid map lookup for - // somewhat common case. - return "" - } - return ids[string(id)] -} - -// scanIdentifier scans a valid Go identifier off the front of v and -// either returns a subslice of v if there's a valid identifier, or -// returns a zero-length slice. -func scanIdentifier(v []byte) []byte { - var n int // number of leading bytes of v belonging to an identifier - for { - r, width := utf8.DecodeRune(v[n:]) - if !(isLetter(r) || n > 0 && isDigit(r)) { - break - } - n += width - } - return v[:n] -} - -func isLetter(ch rune) bool { - return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch) -} - -func isDigit(ch rune) bool { - return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch) -} - func comment_htmlFunc(comment string) string { var buf bytes.Buffer // TODO(gri) Provide list of words (e.g. function parameters) diff --git a/internal/godoc/godoc_test.go b/internal/godoc/godoc_test.go index fd65c7ef..69914cbe 100644 --- a/internal/godoc/godoc_test.go +++ b/internal/godoc/godoc_test.go @@ -250,25 +250,6 @@ func linkifySource(t *testing.T, src []byte) string { return buf.String() } -func TestScanIdentifier(t *testing.T) { - tests := []struct { - in, want string - }{ - {"foo bar", "foo"}, - {"foo/bar", "foo"}, - {" foo", ""}, - {"фоо", "фоо"}, - {"f123", "f123"}, - {"123f", ""}, - } - for _, tt := range tests { - got := scanIdentifier([]byte(tt.in)) - if string(got) != tt.want { - t.Errorf("scanIdentifier(%q) = %q; want %q", tt.in, got, tt.want) - } - } -} - func TestReplaceLeadingIndentation(t *testing.T) { oldIndent := strings.Repeat(" ", 2) newIndent := strings.Repeat(" ", 4) diff --git a/internal/godoc/linkify.go b/internal/godoc/linkify.go deleted file mode 100644 index 3f44ee6b..00000000 --- a/internal/godoc/linkify.go +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.16 -// +build go1.16 - -// This file implements LinkifyText which introduces -// links for identifiers pointing to their declarations. -// The approach does not cover all cases because godoc -// doesn't have complete type information, but it's -// reasonably good for browsing. - -package godoc - -import ( - "fmt" - "go/ast" - "go/doc" - "go/token" - "io" - "strconv" -) - -// LinkifyText HTML-escapes source text and writes it to w. -// Identifiers that are in a "use" position (i.e., that are -// not being declared), are wrapped with HTML links pointing -// to the respective declaration, if possible. Comments are -// formatted the same way as with FormatText. -// -func LinkifyText(w io.Writer, text []byte, n ast.Node) { - links := linksFor(n) - - i := 0 // links index - prev := "" // prev HTML tag - linkWriter := func(w io.Writer, _ int, start bool) { - // end tag - if !start { - if prev != "" { - fmt.Fprintf(w, ``, prev) - prev = "" - } - return - } - - // start tag - prev = "" - if i < len(links) { - switch info := links[i]; { - case info.path != "" && info.name == "": - // package path - fmt.Fprintf(w, ``, info.path) - prev = "a" - case info.path != "" && info.name != "": - // qualified identifier - fmt.Fprintf(w, ``, info.path, info.name) - prev = "a" - case info.path == "" && info.name != "": - // local identifier - if info.isVal { - fmt.Fprintf(w, ``, info.name) - prev = "span" - } else if ast.IsExported(info.name) { - fmt.Fprintf(w, ``, info.name) - prev = "a" - } - } - i++ - } - } - - idents := tokenSelection(text, token.IDENT) - comments := tokenSelection(text, token.COMMENT) - FormatSelections(w, text, linkWriter, idents, selectionTag, comments) -} - -// A link describes the (HTML) link information for an identifier. -// The zero value of a link represents "no link". -// -type link struct { - path, name string // package path, identifier name - isVal bool // identifier is defined in a const or var declaration -} - -// linksFor returns the list of links for the identifiers used -// by node in the same order as they appear in the source. -// -func linksFor(node ast.Node) (links []link) { - // linkMap tracks link information for each ast.Ident node. Entries may - // be created out of source order (for example, when we visit a parent - // definition node). These links are appended to the returned slice when - // their ast.Ident nodes are visited. - linkMap := make(map[*ast.Ident]link) - - ast.Inspect(node, func(node ast.Node) bool { - switch n := node.(type) { - case *ast.Field: - for _, n := range n.Names { - linkMap[n] = link{} - } - case *ast.ImportSpec: - if name := n.Name; name != nil { - linkMap[name] = link{} - } - case *ast.ValueSpec: - for _, n := range n.Names { - linkMap[n] = link{name: n.Name, isVal: true} - } - case *ast.FuncDecl: - linkMap[n.Name] = link{} - case *ast.TypeSpec: - linkMap[n.Name] = link{} - case *ast.AssignStmt: - // Short variable declarations only show up if we apply - // this code to all source code (as opposed to exported - // declarations only). - if n.Tok == token.DEFINE { - // Some of the lhs variables may be re-declared, - // so technically they are not defs. We don't - // care for now. - for _, x := range n.Lhs { - // Each lhs expression should be an - // ident, but we are conservative and check. - if n, _ := x.(*ast.Ident); n != nil { - linkMap[n] = link{isVal: true} - } - } - } - case *ast.SelectorExpr: - // Detect qualified identifiers of the form pkg.ident. - // If anything fails we return true and collect individual - // identifiers instead. - if x, _ := n.X.(*ast.Ident); x != nil { - // Create links only if x is a qualified identifier. - if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg { - if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil { - // spec.Path.Value is the import path - if path, err := strconv.Unquote(spec.Path.Value); err == nil { - // Register two links, one for the package - // and one for the qualified identifier. - linkMap[x] = link{path: path} - linkMap[n.Sel] = link{path: path, name: n.Sel.Name} - } - } - } - } - case *ast.CompositeLit: - // Detect field names within composite literals. These links should - // be prefixed by the type name. - fieldPath := "" - prefix := "" - switch typ := n.Type.(type) { - case *ast.Ident: - prefix = typ.Name + "." - case *ast.SelectorExpr: - if x, _ := typ.X.(*ast.Ident); x != nil { - // Create links only if x is a qualified identifier. - if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg { - if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil { - // spec.Path.Value is the import path - if path, err := strconv.Unquote(spec.Path.Value); err == nil { - // Register two links, one for the package - // and one for the qualified identifier. - linkMap[x] = link{path: path} - linkMap[typ.Sel] = link{path: path, name: typ.Sel.Name} - fieldPath = path - prefix = typ.Sel.Name + "." - } - } - } - } - } - for _, e := range n.Elts { - if kv, ok := e.(*ast.KeyValueExpr); ok { - if k, ok := kv.Key.(*ast.Ident); ok { - // Note: there is some syntactic ambiguity here. We cannot determine - // if this is a struct literal or a map literal without type - // information. We assume struct literal. - name := prefix + k.Name - linkMap[k] = link{path: fieldPath, name: name} - } - } - } - case *ast.Ident: - if l, ok := linkMap[n]; ok { - links = append(links, l) - } else { - l := link{name: n.Name} - if n.Obj == nil && doc.IsPredeclared(n.Name) { - l.path = builtinPkgPath - } - links = append(links, l) - } - } - return true - }) - return -} diff --git a/internal/godoc/server.go b/internal/godoc/server.go index b16449c8..2cf200ba 100644 --- a/internal/godoc/server.go +++ b/internal/godoc/server.go @@ -25,10 +25,14 @@ import ( "os" pathpkg "path" "path/filepath" + "regexp" "sort" + "strconv" "strings" "text/template" "time" + + "golang.org/x/website/internal/texthtml" ) // handlerServer is a migration from an old godoc http Handler type. @@ -555,6 +559,23 @@ func redirectFile(w http.ResponseWriter, r *http.Request) (redirected bool) { return } +var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`) + +// rangeSelection computes the Selection for a text range described +// by the argument str, of the form Start:End, where Start and End +// are decimal byte offsets. +func rangeSelection(str string) texthtml.Selection { + m := selRx.FindStringSubmatch(str) + if len(m) >= 2 { + from, _ := strconv.Atoi(m[1]) + to, _ := strconv.Atoi(m[2]) + if from < to { + return texthtml.Spans(texthtml.Span{Start: from, End: to}) + } + } + return nil +} + func (p *Presentation) serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath, title string) { src, err := fs.ReadFile(p.Corpus.fs, toFS(abspath)) if err != nil { @@ -568,19 +589,18 @@ func (p *Presentation) serveTextFile(w http.ResponseWriter, r *http.Request, abs return } - h := r.FormValue("h") - s := RangeSelection(r.FormValue("s")) + cfg := texthtml.Config{ + GoComments: pathpkg.Ext(abspath) == ".go", + Highlight: r.FormValue("h"), + Selection: rangeSelection(r.FormValue("s")), + Line: 1, + } var buf bytes.Buffer - if pathpkg.Ext(abspath) == ".go" { - buf.WriteString("
")
-		formatGoSource(&buf, src, h, s)
-		buf.WriteString("
") - } else { - buf.WriteString("
")
-		FormatText(&buf, src, 1, false, h, s)
-		buf.WriteString("
") - } + buf.WriteString("
")
+	buf.Write(texthtml.Format(src, cfg))
+	buf.WriteString("
") + fmt.Fprintf(&buf, `

View as plain text

`, htmlpkg.EscapeString(relpath)) p.ServePage(w, Page{ @@ -592,49 +612,6 @@ func (p *Presentation) serveTextFile(w http.ResponseWriter, r *http.Request, abs }) } -// formatGoSource HTML-escapes Go source text and writes it to w. -func formatGoSource(buf *bytes.Buffer, text []byte, pattern string, selection Selection) { - // Emit to a temp buffer so that we can add line anchors at the end. - saved, buf := buf, new(bytes.Buffer) - - comments := tokenSelection(text, token.COMMENT) - var highlights Selection - if pattern != "" { - highlights = regexpSelection(text, pattern) - } - - FormatSelections(buf, text, nil, nil, selectionTag, comments, highlights, selection) - - // Now copy buf to saved, adding line anchors. - - // The lineSelection mechanism can't be composed with our - // linkWriter, so we have to add line spans as another pass. - n := 1 - for _, line := range bytes.Split(buf.Bytes(), []byte("\n")) { - // The line numbers are inserted into the document via a CSS ::before - // pseudo-element. This prevents them from being copied when users - // highlight and copy text. - // ::before is supported in 98% of browsers: https://caniuse.com/#feat=css-gencontent - // This is also the trick Github uses to hide line numbers. - // - // The first tab for the code snippet needs to start in column 9, so - // it indents a full 8 spaces, hence the two nbsp's. Otherwise the tab - // character only indents a short amount. - // - // Due to rounding and font width Firefox might not treat 8 rendered - // characters as 8 characters wide, and subsequently may treat the tab - // character in the 9th position as moving the width from (7.5 or so) up - // to 8. See - // https://github.com/webcompat/web-bugs/issues/17530#issuecomment-402675091 - // for a fuller explanation. The solution is to add a CSS class to - // explicitly declare the width to be 8 characters. - fmt.Fprintf(saved, `%6d  `, n, n) - n++ - saved.Write(line) - saved.WriteByte('\n') - } -} - func (p *Presentation) serveDirectory(w http.ResponseWriter, r *http.Request, abspath, relpath string) { if redirect(w, r) { return diff --git a/internal/godoc/template.go b/internal/godoc/template.go index 3e52453a..5baa1401 100644 --- a/internal/godoc/template.go +++ b/internal/godoc/template.go @@ -41,6 +41,8 @@ import ( "log" "regexp" "strings" + + "golang.org/x/website/internal/texthtml" ) // Functions in this file panic on error, but the panic is recovered @@ -100,7 +102,7 @@ func (p *Presentation) code(file string, arg ...interface{}) (s string, err erro text = strings.Replace(text, "\t", " ", -1) var buf bytes.Buffer // HTML-escape text and syntax-color comments like elsewhere. - FormatText(&buf, []byte(text), -1, true, "", nil) + buf.Write(texthtml.Format([]byte(text), texthtml.Config{GoComments: true})) // Include the command as a comment. text = fmt.Sprintf("
%s
", command, buf.Bytes()) return text, nil diff --git a/internal/texthtml/ast.go b/internal/texthtml/ast.go new file mode 100644 index 00000000..76bd4bb2 --- /dev/null +++ b/internal/texthtml/ast.go @@ -0,0 +1,298 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package texthtml + +import ( + "bytes" + "fmt" + "go/ast" + "go/doc" + "go/token" + "strconv" + "unicode" + "unicode/utf8" +) + +// A goLink describes the (HTML) link information for a Go identifier. +// The zero value of a link represents "no link". +type goLink struct { + path, name string // package path, identifier name + isVal bool // identifier is defined in a const or var declaration +} + +func (l *goLink) tags() (start, end string) { + switch { + case l.path != "" && l.name == "": + // package path + return ``, `` + case l.path != "" && l.name != "": + // qualified identifier + return ``, `` + case l.path == "" && l.name != "": + // local identifier + if l.isVal { + return ``, `` + } + if ast.IsExported(l.name) { + return ``, `` + } + } + return "", "" +} + +// goLinksFor returns the list of links for the identifiers used +// by node in the same order as they appear in the source. +func goLinksFor(node ast.Node) (links []goLink) { + // linkMap tracks link information for each ast.Ident node. Entries may + // be created out of source order (for example, when we visit a parent + // definition node). These links are appended to the returned slice when + // their ast.Ident nodes are visited. + linkMap := make(map[*ast.Ident]goLink) + + ast.Inspect(node, func(node ast.Node) bool { + switch n := node.(type) { + case *ast.Field: + for _, n := range n.Names { + linkMap[n] = goLink{} + } + case *ast.ImportSpec: + if name := n.Name; name != nil { + linkMap[name] = goLink{} + } + case *ast.ValueSpec: + for _, n := range n.Names { + linkMap[n] = goLink{name: n.Name, isVal: true} + } + case *ast.FuncDecl: + linkMap[n.Name] = goLink{} + case *ast.TypeSpec: + linkMap[n.Name] = goLink{} + case *ast.AssignStmt: + // Short variable declarations only show up if we apply + // this code to all source code (as opposed to exported + // declarations only). + if n.Tok == token.DEFINE { + // Some of the lhs variables may be re-declared, + // so technically they are not defs. We don't + // care for now. + for _, x := range n.Lhs { + // Each lhs expression should be an + // ident, but we are conservative and check. + if n, _ := x.(*ast.Ident); n != nil { + linkMap[n] = goLink{isVal: true} + } + } + } + case *ast.SelectorExpr: + // Detect qualified identifiers of the form pkg.ident. + // If anything fails we return true and collect individual + // identifiers instead. + if x, _ := n.X.(*ast.Ident); x != nil { + // Create links only if x is a qualified identifier. + if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg { + if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil { + // spec.Path.Value is the import path + if path, err := strconv.Unquote(spec.Path.Value); err == nil { + // Register two links, one for the package + // and one for the qualified identifier. + linkMap[x] = goLink{path: path} + linkMap[n.Sel] = goLink{path: path, name: n.Sel.Name} + } + } + } + } + case *ast.CompositeLit: + // Detect field names within composite literals. These links should + // be prefixed by the type name. + fieldPath := "" + prefix := "" + switch typ := n.Type.(type) { + case *ast.Ident: + prefix = typ.Name + "." + case *ast.SelectorExpr: + if x, _ := typ.X.(*ast.Ident); x != nil { + // Create links only if x is a qualified identifier. + if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg { + if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil { + // spec.Path.Value is the import path + if path, err := strconv.Unquote(spec.Path.Value); err == nil { + // Register two links, one for the package + // and one for the qualified identifier. + linkMap[x] = goLink{path: path} + linkMap[typ.Sel] = goLink{path: path, name: typ.Sel.Name} + fieldPath = path + prefix = typ.Sel.Name + "." + } + } + } + } + } + for _, e := range n.Elts { + if kv, ok := e.(*ast.KeyValueExpr); ok { + if k, ok := kv.Key.(*ast.Ident); ok { + // Note: there is some syntactic ambiguity here. We cannot determine + // if this is a struct literal or a map literal without type + // information. We assume struct literal. + name := prefix + k.Name + linkMap[k] = goLink{path: fieldPath, name: name} + } + } + } + case *ast.Ident: + if l, ok := linkMap[n]; ok { + links = append(links, l) + } else { + l := goLink{name: n.Name} + if n.Obj == nil && doc.IsPredeclared(n.Name) { + l.path = "builtin" + } + links = append(links, l) + } + } + return true + }) + return +} + +// postFormatAST makes any appropriate changes to the formatting of node in buf. +// Specifically, it adds span links to each struct field, so they can be linked properly. +// TODO(rsc): Why not do this as part of the linking above? +func postFormatAST(buf *bytes.Buffer, node ast.Node) { + if st, name := isStructTypeDecl(node); st != nil { + addStructFieldIDAttributes(buf, name, st) + } +} + +// isStructTypeDecl checks whether n is a struct declaration. +// It either returns a non-nil StructType and its name, or zero values. +func isStructTypeDecl(n ast.Node) (st *ast.StructType, name string) { + gd, ok := n.(*ast.GenDecl) + if !ok || gd.Tok != token.TYPE { + return nil, "" + } + if gd.Lparen > 0 { + // Parenthesized type. Who does that, anyway? + // TODO: Reportedly gri does. Fix this to handle that too. + return nil, "" + } + if len(gd.Specs) != 1 { + return nil, "" + } + ts, ok := gd.Specs[0].(*ast.TypeSpec) + if !ok { + return nil, "" + } + st, ok = ts.Type.(*ast.StructType) + if !ok { + return nil, "" + } + return st, ts.Name.Name +} + +// addStructFieldIDAttributes modifies the contents of buf such that +// all struct fields of the named struct have +// in them, so people can link to /#Struct.Field. +func addStructFieldIDAttributes(buf *bytes.Buffer, name string, st *ast.StructType) { + if st.Fields == nil { + return + } + // needsLink is a set of identifiers that still need to be + // linked, where value == key, to avoid an allocation in func + // linkedField. + needsLink := make(map[string]string) + + for _, f := range st.Fields.List { + if len(f.Names) == 0 { + continue + } + fieldName := f.Names[0].Name + needsLink[fieldName] = fieldName + } + var newBuf bytes.Buffer + foreachLine(buf.Bytes(), func(line []byte) { + if fieldName := linkedField(line, needsLink); fieldName != "" { + fmt.Fprintf(&newBuf, ``, name, fieldName) + delete(needsLink, fieldName) + } + newBuf.Write(line) + }) + buf.Reset() + buf.Write(newBuf.Bytes()) +} + +// foreachLine calls fn for each line of in, where a line includes +// the trailing "\n", except on the last line, if it doesn't exist. +func foreachLine(in []byte, fn func(line []byte)) { + for len(in) > 0 { + nl := bytes.IndexByte(in, '\n') + if nl == -1 { + fn(in) + return + } + fn(in[:nl+1]) + in = in[nl+1:] + } +} + +// commentPrefix is the line prefix for comments after they've been HTMLified. +var commentPrefix = []byte(`// `) + +// linkedField determines whether the given line starts with an +// identifier in the provided ids map (mapping from identifier to the +// same identifier). The line can start with either an identifier or +// an identifier in a comment. If one matches, it returns the +// identifier that matched. Otherwise it returns the empty string. +func linkedField(line []byte, ids map[string]string) string { + line = bytes.TrimSpace(line) + + // For fields with a doc string of the + // conventional form, we put the new span into + // the comment instead of the field. + // The "conventional" form is a complete sentence + // per https://golang.org/s/style#comment-sentences like: + // + // // Foo is an optional Fooer to foo the foos. + // Foo Fooer + // + // In this case, we want the #StructName.Foo + // link to make the browser go to the comment + // line "Foo is an optional Fooer" instead of + // the "Foo Fooer" line, which could otherwise + // obscure the docs above the browser's "fold". + // + // TODO: do this better, so it works for all + // comments, including unconventional ones. + line = bytes.TrimPrefix(line, commentPrefix) + id := scanIdentifier(line) + if len(id) == 0 { + // No leading identifier. Avoid map lookup for + // somewhat common case. + return "" + } + return ids[string(id)] +} + +// scanIdentifier scans a valid Go identifier off the front of v and +// either returns a subslice of v if there's a valid identifier, or +// returns a zero-length slice. +func scanIdentifier(v []byte) []byte { + var n int // number of leading bytes of v belonging to an identifier + for { + r, width := utf8.DecodeRune(v[n:]) + if !(isLetter(r) || n > 0 && isDigit(r)) { + break + } + n += width + } + return v[:n] +} + +func isLetter(ch rune) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch) +} + +func isDigit(ch rune) bool { + return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch) +} diff --git a/internal/texthtml/texthtml.go b/internal/texthtml/texthtml.go new file mode 100644 index 00000000..1175fe19 --- /dev/null +++ b/internal/texthtml/texthtml.go @@ -0,0 +1,355 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package texthtml formats text files to HTML. +package texthtml + +import ( + "bytes" + "fmt" + "go/ast" + "go/scanner" + "go/token" + "io" + "regexp" + "text/template" +) + +// A Span describes a text span [start, end). +// The zero value of a Span is an empty span. +type Span struct { + Start, End int +} + +func (s *Span) isEmpty() bool { return s.Start >= s.End } + +// A Selection is an "iterator" function returning a text span. +// Repeated calls to a selection return consecutive, non-overlapping, +// non-empty spans, followed by an infinite sequence of empty +// spans. The first empty span marks the end of the selection. +type Selection func() Span + +// A Config configures how to format text as HTML. +type Config struct { + Line int // if >= 1, number lines beginning with number Line, with + GoComments bool // mark comments in Go text with + Highlight string // highlight matches for this regexp with + Selection Selection // mark selected spans with + AST ast.Node // link uses to declarations, assuming text is formatting of AST +} + +// Format formats text to HTML according to the configuration cfg. +func Format(text []byte, cfg Config) (html []byte) { + var comments, highlights Selection + if cfg.GoComments { + comments = tokenSelection(text, token.COMMENT) + } + if cfg.Highlight != "" { + highlights = regexpSelection(text, cfg.Highlight) + } + + var buf bytes.Buffer + var idents Selection = Spans() + var goLinks []goLink + if cfg.AST != nil { + idents = tokenSelection(text, token.IDENT) + goLinks = goLinksFor(cfg.AST) + } + + formatSelections(&buf, text, goLinks, comments, highlights, cfg.Selection, idents) + + if cfg.AST != nil { + postFormatAST(&buf, cfg.AST) + } + + if cfg.Line > 0 { + // Add line numbers in a separate pass. + old := buf.Bytes() + buf = bytes.Buffer{} + n := cfg.Line + for _, line := range bytes.Split(old, []byte("\n")) { + // The line numbers are inserted into the document via a CSS ::before + // pseudo-element. This prevents them from being copied when users + // highlight and copy text. + // ::before is supported in 98% of browsers: https://caniuse.com/#feat=css-gencontent + // This is also the trick Github uses to hide line numbers. + // + // The first tab for the code snippet needs to start in column 9, so + // it indents a full 8 spaces, hence the two nbsp's. Otherwise the tab + // character only indents a short amount. + // + // Due to rounding and font width Firefox might not treat 8 rendered + // characters as 8 characters wide, and subsequently may treat the tab + // character in the 9th position as moving the width from (7.5 or so) up + // to 8. See + // https://github.com/webcompat/web-bugs/issues/17530#issuecomment-402675091 + // for a fuller explanation. The solution is to add a CSS class to + // explicitly declare the width to be 8 characters. + fmt.Fprintf(&buf, `%6d  `, n, n) + n++ + buf.Write(line) + buf.WriteByte('\n') + } + } + return buf.Bytes() +} + +// formatSelections takes a text and writes it to w using link and span +// writers lw and sw as follows: lw is invoked for consecutive span starts +// and ends as specified through the links selection, and sw is invoked for +// consecutive spans of text overlapped by the same selections as specified +// by selections. +func formatSelections(w io.Writer, text []byte, goLinks []goLink, selections ...Selection) { + // compute the sequence of consecutive span changes + changes := newMerger(selections) + + // The i'th bit in bitset indicates that the text + // at the current offset is covered by selections[i]. + bitset := 0 + lastOffs := 0 + + // Text spans are written in a delayed fashion + // such that consecutive spans belonging to the + // same selection can be combined (peephole optimization). + // last describes the last span which has not yet been written. + var last struct { + begin, end int // valid if begin < end + bitset int + } + + // flush writes the last delayed text span + flush := func() { + if last.begin < last.end { + selectionTag(w, text[last.begin:last.end], last.bitset) + } + last.begin = last.end // invalidate last + } + + // span runs the span [lastOffs, end) with the selection + // indicated by bitset through the span peephole optimizer. + span := func(end int) { + if lastOffs < end { // ignore empty spans + if last.end != lastOffs || last.bitset != bitset { + // the last span is not adjacent to or + // differs from the new one + flush() + // start a new span + last.begin = lastOffs + } + last.end = end + last.bitset = bitset + } + } + + linkEnd := "" + for { + // get the next span change + index, offs, start := changes.next() + if index < 0 || offs > len(text) { + // no more span changes or the next change + // is past the end of the text - we're done + break + } + + // format the previous selection span, determine + // the new selection bitset and start a new span + span(offs) + if index == 3 { // Go link + flush() + if start { + if len(goLinks) > 0 { + start, end := goLinks[0].tags() + io.WriteString(w, start) + linkEnd = end + goLinks = goLinks[1:] + } + } else { + if linkEnd != "" { + io.WriteString(w, linkEnd) + linkEnd = "" + } + } + } else { + mask := 1 << uint(index) + if start { + bitset |= mask + } else { + bitset &^= mask + } + } + lastOffs = offs + } + span(len(text)) + flush() +} + +// A merger merges a slice of Selections and produces a sequence of +// consecutive span change events through repeated next() calls. +type merger struct { + selections []Selection + spans []Span // spans[i] is the next span of selections[i] +} + +const infinity int = 2e9 + +func newMerger(selections []Selection) *merger { + spans := make([]Span, len(selections)) + for i, sel := range selections { + spans[i] = Span{infinity, infinity} + if sel != nil { + if seg := sel(); !seg.isEmpty() { + spans[i] = seg + } + } + } + return &merger{selections, spans} +} + +// next returns the next span change: index specifies the Selection +// to which the span belongs, offs is the span start or end offset +// as determined by the start value. If there are no more span changes, +// next returns an index value < 0. +func (m *merger) next() (index, offs int, start bool) { + // find the next smallest offset where a span starts or ends + offs = infinity + index = -1 + for i, seg := range m.spans { + switch { + case seg.Start < offs: + offs = seg.Start + index = i + start = true + case seg.End < offs: + offs = seg.End + index = i + start = false + } + } + if index < 0 { + // no offset found => all selections merged + return + } + // offset found - it's either the start or end offset but + // either way it is ok to consume the start offset: set it + // to infinity so it won't be considered in the following + // next call + m.spans[index].Start = infinity + if start { + return + } + // end offset found - consume it + m.spans[index].End = infinity + // advance to the next span for that selection + seg := m.selections[index]() + if !seg.isEmpty() { + m.spans[index] = seg + } + return +} + +// lineSelection returns the line spans for text as a Selection. +func lineSelection(text []byte) Selection { + i, j := 0, 0 + return func() (seg Span) { + // find next newline, if any + for j < len(text) { + j++ + if text[j-1] == '\n' { + break + } + } + if i < j { + // text[i:j] constitutes a line + seg = Span{i, j} + i = j + } + return + } +} + +// tokenSelection returns, as a selection, the sequence of +// consecutive occurrences of token sel in the Go src text. +func tokenSelection(src []byte, sel token.Token) Selection { + var s scanner.Scanner + fset := token.NewFileSet() + file := fset.AddFile("", fset.Base(), len(src)) + s.Init(file, src, nil, scanner.ScanComments) + return func() (seg Span) { + for { + pos, tok, lit := s.Scan() + if tok == token.EOF { + break + } + offs := file.Offset(pos) + if tok == sel { + seg = Span{offs, offs + len(lit)} + break + } + } + return + } +} + +// Spans is a helper function to make a Selection from a slice of spans. +// Empty spans are discarded. +func Spans(spans ...Span) Selection { + i := 0 + return func() Span { + for i < len(spans) { + s := spans[i] + i++ + if s.Start < s.End { + // non-empty + return s + } + } + return Span{} + } +} + +// regexpSelection computes the Selection for the regular expression expr in text. +func regexpSelection(text []byte, expr string) Selection { + var matches [][]int + if rx, err := regexp.Compile(expr); err == nil { + matches = rx.FindAllIndex(text, -1) + } + var spans []Span + for _, m := range matches { + spans = append(spans, Span{m[0], m[1]}) + } + return Spans(spans...) +} + +// Span tags for all the possible selection combinations that may +// be generated by FormatText. Selections are indicated by a bitset, +// and the value of the bitset specifies the tag to be used. +// +// bit 0: comments +// bit 1: highlights +// bit 2: selections +// +var startTags = [][]byte{ + /* 000 */ []byte(``), + /* 001 */ []byte(``), + /* 010 */ []byte(``), + /* 011 */ []byte(``), + /* 100 */ []byte(``), + /* 101 */ []byte(``), + /* 110 */ []byte(``), + /* 111 */ []byte(``), +} + +var endTag = []byte(``) + +func selectionTag(w io.Writer, text []byte, selections int) { + if selections < len(startTags) { + if tag := startTags[selections]; len(tag) > 0 { + w.Write(tag) + template.HTMLEscape(w, text) + w.Write(endTag) + return + } + } + template.HTMLEscape(w, text) +}