internal/godoc/dochtml/internal/render: ensure unique heading ids

The existing implementation takes a simple approach of using the heading
text and replacing non-safe characters.  This leads to potential duplicate
heading ids, so use a map to keep track of used heading ids and generate
unique ids with various strategies until a unique id is generated.

Fixes golang/go#64582

Change-Id: I95f823c3782411454cff7353b49d90fab5799a45
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/573595
TryBot-Result: Gopher Robot <gobot@golang.org>
TryBot-Bypass: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
This commit is contained in:
Joseph Shin 2024-03-02 16:48:55 -05:00 коммит произвёл Jonathan Amsterdam
Родитель 71f98d56b5
Коммит 53b9917759
3 изменённых файлов: 325 добавлений и 24 удалений

Просмотреть файл

@ -23,6 +23,7 @@ import (
safe "github.com/google/safehtml"
"github.com/google/safehtml/legacyconversions"
"github.com/google/safehtml/template"
"golang.org/x/net/html"
"golang.org/x/pkgsite/internal/log"
)
@ -111,24 +112,62 @@ var (
`<li{{with .Number}} value="{{.}}"{{end}}>{{.Content}}</li>`))
)
func (r *Renderer) formatDocHTML(text string, extractLinks bool) safe.HTML {
func (r *Renderer) formatDocHTML(text string, decl ast.Decl, extractLinks bool) safe.HTML {
doc := r.commentParser.Parse(text)
if extractLinks {
r.removeLinks(doc)
}
var headings []heading
for _, b := range doc.Content {
if h, ok := b.(*comment.Heading); ok {
headings = append(headings, r.newHeading(h))
}
}
h := r.blocksToHTML(doc.Content, true, extractLinks)
if len(headings) > 0 {
h := r.blocksToHTML(doc.Content, decl, true, extractLinks)
if headings := extractHeadings(h); len(headings) > 0 {
h = safe.HTMLConcat(ExecuteToHTML(tocTemplate, headings), h)
}
return h
}
func extractHeadings(h safe.HTML) []heading {
var headings []heading
doc, err := html.Parse(strings.NewReader(h.String()))
if err != nil {
return nil
}
var f func(*html.Node)
f = func(n *html.Node) {
for _, a := range n.Attr {
if a.Key == "id" && strings.HasPrefix(a.Val, "hdr-") {
if tn := firstTextNode(n); tn != nil {
title := strings.TrimSpace(tn.Data)
hdrName := strings.TrimPrefix(a.Val, "hdr-")
id := safe.IdentifierFromConstantPrefix("hdr", hdrName)
headings = append(headings, heading{id, safe.HTMLEscaped(title)})
}
break
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
return headings
}
func firstTextNode(n *html.Node) *html.Node {
if n.Type == html.TextNode {
return n
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if r := firstTextNode(c); r != nil {
return r
}
}
return nil
}
func (r *Renderer) removeLinks(doc *comment.Doc) {
var bs []comment.Block
inLinks := false
@ -214,13 +253,13 @@ func parseLink(line string) *Link {
}
}
func (r *Renderer) blocksToHTML(bs []comment.Block, useParagraph, extractLinks bool) safe.HTML {
func (r *Renderer) blocksToHTML(bs []comment.Block, decl ast.Decl, useParagraph, extractLinks bool) safe.HTML {
return concatHTML(bs, func(b comment.Block) safe.HTML {
return r.blockToHTML(b, useParagraph, extractLinks)
return r.blockToHTML(b, decl, useParagraph, extractLinks)
})
}
func (r *Renderer) blockToHTML(b comment.Block, useParagraph, extractLinks bool) safe.HTML {
func (r *Renderer) blockToHTML(b comment.Block, decl ast.Decl, useParagraph, extractLinks bool) safe.HTML {
switch b := b.(type) {
case *comment.Paragraph:
th := r.textsToHTML(b.Text)
@ -233,7 +272,7 @@ func (r *Renderer) blockToHTML(b comment.Block, useParagraph, extractLinks bool)
return ExecuteToHTML(codeTemplate, b.Text)
case *comment.Heading:
return ExecuteToHTML(headingTemplate, r.newHeading(b))
return ExecuteToHTML(headingTemplate, r.newHeading(b, decl))
case *comment.List:
var items []safe.HTML
@ -242,7 +281,7 @@ func (r *Renderer) blockToHTML(b comment.Block, useParagraph, extractLinks bool)
items = append(items, ExecuteToHTML(listItemTemplate, struct {
Number string
Content safe.HTML
}{item.Number, r.blocksToHTML(item.Content, useParagraph, false)}))
}{item.Number, r.blocksToHTML(item.Content, decl, useParagraph, false)}))
}
t := oListTemplate
if b.Items[0].Number == "" {
@ -254,8 +293,53 @@ func (r *Renderer) blockToHTML(b comment.Block, useParagraph, extractLinks bool)
}
}
func (r *Renderer) newHeading(h *comment.Heading) heading {
return heading{headingID(h), r.textsToHTML(h.Text)}
// headingIDs keeps track of used heading ids to prevent duplicates.
type headingIDs map[safe.Identifier]bool
// headingID returns a unique identifier for a *comment.Heading & ast.Decl pair.
func (r *Renderer) headingID(h *comment.Heading, decl ast.Decl) safe.Identifier {
s := textsToString(h.Text)
hdrTitle := badAnchorRx.ReplaceAllString(s, "_")
id := safe.IdentifierFromConstantPrefix("hdr", hdrTitle)
if !r.headingIDs[id] {
r.headingIDs[id] = true
return id
}
// The id is not unique. Attempt to generate an identifier using the decl.
for _, v := range generateAnchorPoints(decl) {
if v.Kind == "field" {
continue
}
hdrTitle = badAnchorRx.ReplaceAllString(v.ID.String()+"_"+s, "_")
if v.Kind == "constant" || v.Kind == "variable" {
if specs := decl.(*ast.GenDecl).Specs; len(specs) > 1 {
// Grouped consts and vars cannot be deterministically identified,
// so treat them as a single section. e.g. "hdr-constant-Title"
hdrTitle = badAnchorRx.ReplaceAllString(v.Kind+"_"+s, "_")
}
}
if v.Kind != "method" {
// Continue iterating until we find a higher precedence kind.
break
}
}
for {
id = safe.IdentifierFromConstantPrefix("hdr", hdrTitle)
if !r.headingIDs[id] {
r.headingIDs[id] = true
break
}
// The id is still not unique. Append _ until unique.
hdrTitle = hdrTitle + "_"
}
return id
}
func (r *Renderer) newHeading(h *comment.Heading, decl ast.Decl) heading {
return heading{r.headingID(h, decl), r.textsToHTML(h.Text)}
}
func (r *Renderer) textsToHTML(ts []comment.Text) safe.HTML {
@ -310,12 +394,6 @@ func badType(x any) safe.HTML {
return safe.HTMLEscaped(fmt.Sprintf("bad type %T", x))
}
func headingID(h *comment.Heading) safe.Identifier {
s := textsToString(h.Text)
id := badAnchorRx.ReplaceAllString(s, "_")
return safe.IdentifierFromConstantPrefix("hdr", id)
}
func textsToString(ts []comment.Text) string {
var b strings.Builder
for _, t := range ts {
@ -375,7 +453,7 @@ func linkRFCs(s string) safe.HTML {
func (r *Renderer) declHTML(doc string, decl ast.Decl, extractLinks bool) (out struct{ Doc, Decl safe.HTML }) {
if doc != "" {
out.Doc = r.formatDocHTML(doc, extractLinks)
out.Doc = r.formatDocHTML(doc, decl, extractLinks)
}
if decl != nil {
idr := &identifierResolver{r.pids, newDeclIDs(decl), r.packageURL}

Просмотреть файл

@ -20,6 +20,17 @@ import (
)
func TestFormatDocHTML(t *testing.T) {
duplicateHeadersDoc := `Documentation.
Information
This is some information.
Information
This is some other information.
`
linksDoc := `Documentation.
The Go Project
@ -38,10 +49,32 @@ Header
More doc.
`
// typeWithFieldsDecl is declared as:
// type I2 interface {
// I1
// M2()
// }
typeWithFieldsDecl := &ast.GenDecl{
Tok: token.TYPE,
Specs: []ast.Spec{
&ast.TypeSpec{
Name: ast.NewIdent("I2"),
Type: &ast.InterfaceType{
Methods: &ast.FieldList{
List: []*ast.Field{
{Type: ast.NewIdent("I1")},
{Type: &ast.FuncType{}, Names: []*ast.Ident{ast.NewIdent("M2")}},
},
},
},
},
},
}
for _, test := range []struct {
name string
doc string
decl ast.Decl
extractLinks []bool // nil means both
want string
wantLinks []Link
@ -89,6 +122,194 @@ Go is an open source project.`,
</div>
<p>Documentation.
</p><h4 id="hdr-The_Go_Project">The Go Project <a class="Documentation-idLink" href="#hdr-The_Go_Project" aria-label="Go to The Go Project"></a></h4><p>Go is an open source project.
</p>`,
},
{
name: "unique header ids in overview section",
doc: duplicateHeadersDoc,
want: `<div role="navigation" aria-label="Table of Contents">
<ul class="Documentation-toc">
<li class="Documentation-tocItem">
<a href="#hdr-Information">Information</a>
</li>
<li class="Documentation-tocItem">
<a href="#hdr-Information_">Information</a>
</li>
</ul>
</div>
<p>Documentation.
</p><h4 id="hdr-Information">Information <a class="Documentation-idLink" href="#hdr-Information" aria-label="Go to Information"></a></h4><p>This is some information.
</p><h4 id="hdr-Information_">Information <a class="Documentation-idLink" href="#hdr-Information_" aria-label="Go to Information"></a></h4><p>This is some other information.
</p>`,
},
{
name: "unique header ids in constants section for grouped constants",
doc: duplicateHeadersDoc,
decl: &ast.GenDecl{
Tok: token.CONST,
Specs: []ast.Spec{&ast.ValueSpec{Names: []*ast.Ident{{}}}, &ast.ValueSpec{Names: []*ast.Ident{{}}}},
},
want: `<div role="navigation" aria-label="Table of Contents">
<ul class="Documentation-toc">
<li class="Documentation-tocItem">
<a href="#hdr-Information">Information</a>
</li>
<li class="Documentation-tocItem">
<a href="#hdr-constant_Information">Information</a>
</li>
</ul>
</div>
<p>Documentation.
</p><h4 id="hdr-Information">Information <a class="Documentation-idLink" href="#hdr-Information" aria-label="Go to Information"></a></h4><p>This is some information.
</p><h4 id="hdr-constant_Information">Information <a class="Documentation-idLink" href="#hdr-constant_Information" aria-label="Go to Information"></a></h4><p>This is some other information.
</p>`,
},
{
name: "unique header ids in variables section for grouped variables",
doc: duplicateHeadersDoc,
decl: &ast.GenDecl{
Tok: token.VAR,
Specs: []ast.Spec{&ast.ValueSpec{Names: []*ast.Ident{{}}}, &ast.ValueSpec{Names: []*ast.Ident{{}}}},
},
want: `<div role="navigation" aria-label="Table of Contents">
<ul class="Documentation-toc">
<li class="Documentation-tocItem">
<a href="#hdr-Information">Information</a>
</li>
<li class="Documentation-tocItem">
<a href="#hdr-variable_Information">Information</a>
</li>
</ul>
</div>
<p>Documentation.
</p><h4 id="hdr-Information">Information <a class="Documentation-idLink" href="#hdr-Information" aria-label="Go to Information"></a></h4><p>This is some information.
</p><h4 id="hdr-variable_Information">Information <a class="Documentation-idLink" href="#hdr-variable_Information" aria-label="Go to Information"></a></h4><p>This is some other information.
</p>`,
},
{
name: "unique header ids in functions section",
doc: duplicateHeadersDoc,
decl: &ast.FuncDecl{Name: ast.NewIdent("FooFunc")},
want: `<div role="navigation" aria-label="Table of Contents">
<ul class="Documentation-toc">
<li class="Documentation-tocItem">
<a href="#hdr-Information">Information</a>
</li>
<li class="Documentation-tocItem">
<a href="#hdr-FooFunc_Information">Information</a>
</li>
</ul>
</div>
<p>Documentation.
</p><h4 id="hdr-Information">Information <a class="Documentation-idLink" href="#hdr-Information" aria-label="Go to Information"></a></h4><p>This is some information.
</p><h4 id="hdr-FooFunc_Information">Information <a class="Documentation-idLink" href="#hdr-FooFunc_Information" aria-label="Go to Information"></a></h4><p>This is some other information.
</p>`,
},
{
name: "unique header ids in functions section for method",
doc: duplicateHeadersDoc,
decl: &ast.FuncDecl{
Recv: &ast.FieldList{List: []*ast.Field{{Type: ast.NewIdent("Bar")}}},
Name: ast.NewIdent("Func"),
},
want: `<div role="navigation" aria-label="Table of Contents">
<ul class="Documentation-toc">
<li class="Documentation-tocItem">
<a href="#hdr-Information">Information</a>
</li>
<li class="Documentation-tocItem">
<a href="#hdr-Bar_Func_Information">Information</a>
</li>
</ul>
</div>
<p>Documentation.
</p><h4 id="hdr-Information">Information <a class="Documentation-idLink" href="#hdr-Information" aria-label="Go to Information"></a></h4><p>This is some information.
</p><h4 id="hdr-Bar_Func_Information">Information <a class="Documentation-idLink" href="#hdr-Bar_Func_Information" aria-label="Go to Information"></a></h4><p>This is some other information.
</p>`,
},
{
name: "unique header ids in types section",
doc: duplicateHeadersDoc,
decl: &ast.GenDecl{
Tok: token.TYPE,
Specs: []ast.Spec{&ast.TypeSpec{Name: ast.NewIdent("Duration")}},
},
want: `<div role="navigation" aria-label="Table of Contents">
<ul class="Documentation-toc">
<li class="Documentation-tocItem">
<a href="#hdr-Information">Information</a>
</li>
<li class="Documentation-tocItem">
<a href="#hdr-Duration_Information">Information</a>
</li>
</ul>
</div>
<p>Documentation.
</p><h4 id="hdr-Information">Information <a class="Documentation-idLink" href="#hdr-Information" aria-label="Go to Information"></a></h4><p>This is some information.
</p><h4 id="hdr-Duration_Information">Information <a class="Documentation-idLink" href="#hdr-Duration_Information" aria-label="Go to Information"></a></h4><p>This is some other information.
</p>`,
},
{
name: "unique header ids in types section for types with fields",
doc: duplicateHeadersDoc,
decl: typeWithFieldsDecl,
want: `<div role="navigation" aria-label="Table of Contents">
<ul class="Documentation-toc">
<li class="Documentation-tocItem">
<a href="#hdr-Information">Information</a>
</li>
<li class="Documentation-tocItem">
<a href="#hdr-I2_Information">Information</a>
</li>
</ul>
</div>
<p>Documentation.
</p><h4 id="hdr-Information">Information <a class="Documentation-idLink" href="#hdr-Information" aria-label="Go to Information"></a></h4><p>This is some information.
</p><h4 id="hdr-I2_Information">Information <a class="Documentation-idLink" href="#hdr-I2_Information" aria-label="Go to Information"></a></h4><p>This is some other information.
</p>`,
},
{
name: "unique header ids in types section for typed variable",
doc: duplicateHeadersDoc,
decl: &ast.GenDecl{
Tok: token.VAR,
Specs: []ast.Spec{&ast.ValueSpec{Type: &ast.StarExpr{X: ast.NewIdent("Location")}, Names: []*ast.Ident{ast.NewIdent("UTC")}}},
},
want: `<div role="navigation" aria-label="Table of Contents">
<ul class="Documentation-toc">
<li class="Documentation-tocItem">
<a href="#hdr-Information">Information</a>
</li>
<li class="Documentation-tocItem">
<a href="#hdr-UTC_Information">Information</a>
</li>
</ul>
</div>
<p>Documentation.
</p><h4 id="hdr-Information">Information <a class="Documentation-idLink" href="#hdr-Information" aria-label="Go to Information"></a></h4><p>This is some information.
</p><h4 id="hdr-UTC_Information">Information <a class="Documentation-idLink" href="#hdr-UTC_Information" aria-label="Go to Information"></a></h4><p>This is some other information.
</p>`,
},
{
name: "unique header ids in types section for typed constant",
doc: duplicateHeadersDoc,
decl: &ast.GenDecl{
Tok: token.CONST,
Specs: []ast.Spec{&ast.ValueSpec{Type: ast.NewIdent("T"), Names: []*ast.Ident{ast.NewIdent("C")}}},
},
want: `<div role="navigation" aria-label="Table of Contents">
<ul class="Documentation-toc">
<li class="Documentation-tocItem">
<a href="#hdr-Information">Information</a>
</li>
<li class="Documentation-tocItem">
<a href="#hdr-C_Information">Information</a>
</li>
</ul>
</div>
<p>Documentation.
</p><h4 id="hdr-Information">Information <a class="Documentation-idLink" href="#hdr-Information" aria-label="Go to Information"></a></h4><p>This is some information.
</p><h4 id="hdr-C_Information">Information <a class="Documentation-idLink" href="#hdr-C_Information" aria-label="Go to Information"></a></h4><p>This is some other information.
</p>`,
},
{
@ -227,7 +448,7 @@ TLSUnique contains the tls-unique channel binding value (see RFC
for _, el := range extractLinks {
t.Run(fmt.Sprintf("extractLinks=%t", el), func(t *testing.T) {
r := New(context.Background(), nil, pkgTime, nil)
got := r.formatDocHTML(test.doc, el)
got := r.formatDocHTML(test.doc, test.decl, el)
want := testconversions.MakeHTMLForTest(test.want)
if diff := cmp.Diff(want, got, cmp.AllowUnexported(safehtml.HTML{})); diff != "" {
t.Errorf("doc mismatch (-want +got)\n%s", diff)

Просмотреть файл

@ -26,6 +26,7 @@ var (
type Renderer struct {
fset *token.FileSet
headingIDs headingIDs
pids *packageIDs
packageURL func(string) string
ctx context.Context
@ -97,6 +98,7 @@ func New(ctx context.Context, fset *token.FileSet, pkg *doc.Package, opts *Optio
return &Renderer{
fset: fset,
headingIDs: headingIDs{},
pids: pids,
packageURL: packageURL,
docTmpl: docDataTmpl,