build/relnote/links.go

318 строки
8.2 KiB
Go

// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package relnote
import (
"fmt"
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/mod/module"
md "rsc.io/markdown"
)
// addSymbolLinks looks for text like [Buffer] and
// [math.Max] and replaces them with links to standard library
// symbols and packages.
// It uses the given default package for links without a package.
func addSymbolLinks(doc *md.Document, defaultPackage string) {
addSymbolLinksBlocks(doc.Blocks, defaultPackage)
}
func addSymbolLinksBlocks(bs []md.Block, defaultPackage string) {
for _, b := range bs {
addSymbolLinksBlock(b, defaultPackage)
}
}
func addSymbolLinksBlock(b md.Block, defaultPackage string) {
switch b := b.(type) {
case *md.Heading:
addSymbolLinksBlock(b.Text, defaultPackage)
case *md.Text:
b.Inline = addSymbolLinksInlines(b.Inline, defaultPackage)
case *md.List:
addSymbolLinksBlocks(b.Items, defaultPackage)
case *md.Item:
addSymbolLinksBlocks(b.Blocks, defaultPackage)
case *md.Paragraph:
addSymbolLinksBlock(b.Text, defaultPackage)
case *md.Quote:
addSymbolLinksBlocks(b.Blocks, defaultPackage)
// no links in these blocks
case *md.CodeBlock:
case *md.HTMLBlock:
case *md.Empty:
case *md.ThematicBreak:
default:
panic(fmt.Sprintf("unknown block type %T", b))
}
}
// addSymbolLinksInlines looks for symbol links in the slice of inline markdown
// elements. It returns a new slice of inline elements with links added.
func addSymbolLinksInlines(ins []md.Inline, defaultPackage string) []md.Inline {
ins = splitAtBrackets(ins)
var res []md.Inline
for i := 0; i < len(ins); i++ {
if txt := symbolLinkText(i, ins); txt != "" {
link, ok := symbolLink(txt, defaultPackage)
if ok {
res = append(res, link)
i += 2
continue
}
}
// Handle inline elements with nested content.
switch in := ins[i].(type) {
case *md.Strong:
res = append(res, &md.Strong{
Marker: in.Marker,
Inner: addSymbolLinksInlines(in.Inner, defaultPackage),
})
case *md.Emph:
res = append(res, &md.Emph{
Marker: in.Marker,
Inner: addSymbolLinksInlines(in.Inner, defaultPackage),
})
// Currently we don't support Del nodes because we don't enable the Strikethrough
// extension. But this can't hurt.
case *md.Del:
res = append(res, &md.Del{
Marker: in.Marker,
Inner: addSymbolLinksInlines(in.Inner, defaultPackage),
})
// Don't look for links in anything else.
default:
res = append(res, in)
}
}
return res
}
// splitAtBrackets rewrites ins so that every '[' and ']' is the only character
// of its Plain.
// For example, the element
//
// [Plain("the [Buffer] is")]
//
// is rewritten to
//
// [Plain("the "), Plain("["), Plain("Buffer"), Plain("]"), Plain(" is")]
//
// This transformation simplifies looking for symbol links.
func splitAtBrackets(ins []md.Inline) []md.Inline {
var res []md.Inline
for _, in := range ins {
if p, ok := in.(*md.Plain); ok {
text := p.Text
for len(text) > 0 {
i := strings.IndexAny(text, "[]")
// If there are no brackets, the remaining text is a single
// Plain and we are done.
if i < 0 {
res = append(res, &md.Plain{Text: text})
break
}
// There is a bracket; make Plains for it and the text before it (if any).
if i > 0 {
res = append(res, &md.Plain{Text: text[:i]})
}
res = append(res, &md.Plain{Text: text[i : i+1]})
text = text[i+1:]
}
} else {
res = append(res, in)
}
}
return res
}
// symbolLinkText returns the text of a possible symbol link.
// It is given a slice of Inline elements and an index into the slice.
// If the index refers to a sequence of elements
//
// [Plain("["), Plain_or_Code(text), Plain("]")]
//
// and the brackets are adjacent to the right kind of runes for a link, then
// symbolLinkText returns the text of the middle element.
// Otherwise it returns the empty string.
func symbolLinkText(i int, ins []md.Inline) string {
// plainText returns the text of ins[j] if it is a Plain element, or "" otherwise.
plainText := func(j int) string {
if j < 0 || j >= len(ins) {
return ""
}
if p, ok := ins[j].(*md.Plain); ok {
return p.Text
}
return ""
}
// ins[i] must be a "[".
if plainText(i) != "[" {
return ""
}
// The open bracket must be preceeded by a link-adjacent rune (or by nothing).
if t := plainText(i - 1); t != "" {
r, _ := utf8.DecodeLastRuneInString(t)
if !isLinkAdjacentRune(r) {
return ""
}
}
// The element after the next must be a ']'.
if plainText(i+2) != "]" {
return ""
}
// The ']' must be followed by a link-adjacent rune (or by nothing).
if t := plainText(i + 3); t != "" {
r, _ := utf8.DecodeRuneInString(t)
if !isLinkAdjacentRune(r) {
return ""
}
}
// ins[i+1] must be a Plain or a Code.
// Its text is the symbol to link to.
if i+1 >= len(ins) {
return ""
}
switch in := ins[i+1].(type) {
case *md.Plain:
return in.Text
case *md.Code:
return in.Text
default:
return ""
}
}
// symbolLink converts s into a Link and returns it and true, or nil and false if
// s is not a valid link or is surrounded by runes that disqualify it from being
// converted to a link.
//
// The argument s is the text between '[' and ']'.
func symbolLink(s, defaultPackage string) (md.Inline, bool) {
pkg, sym, ok := splitRef(s)
if !ok {
return nil, false
}
if pkg == "" {
if defaultPackage == "" {
return nil, false
}
pkg = defaultPackage
}
if sym != "" {
sym = "#" + sym
}
return &md.Link{
Inner: []md.Inline{&md.Code{Text: s}},
URL: fmt.Sprintf("/pkg/%s%s", pkg, sym),
}, true
}
// isLinkAdjacentRune reports whether r can be adjacent to a symbol link.
// The logic is the same as the go/doc/comment package.
func isLinkAdjacentRune(r rune) bool {
return unicode.IsPunct(r) || r == ' ' || r == '\t' || r == '\n'
}
// splitRef splits s into a package and possibly a symbol.
// Examples:
//
// splitRef("math.Max") => ("math", "Max", true)
// splitRef("bytes.Buffer.String") => ("bytes", "Buffer.String", true)
// splitRef("math") => ("math", "", true)
func splitRef(s string) (pkg, name string, ok bool) {
s = strings.TrimPrefix(s, "*")
pkg, name, ok = splitDocName(s)
var recv string
if ok {
pkg, recv, _ = splitDocName(pkg)
}
if pkg != "" {
if err := module.CheckImportPath(pkg); err != nil {
return "", "", false
}
}
if recv != "" {
name = recv + "." + name
}
return pkg, name, true
}
// The following functions were copied from go/doc/comment/parse.go.
// If text is of the form before.Name, where Name is a capitalized Go identifier,
// then splitDocName returns before, name, true.
// Otherwise it returns text, "", false.
func splitDocName(text string) (before, name string, foundDot bool) {
i := strings.LastIndex(text, ".")
name = text[i+1:]
if !isName(name) {
return text, "", false
}
if i >= 0 {
before = text[:i]
}
return before, name, true
}
// isName reports whether s is a capitalized Go identifier (like Name).
func isName(s string) bool {
t, ok := ident(s)
if !ok || t != s {
return false
}
r, _ := utf8.DecodeRuneInString(s)
return unicode.IsUpper(r)
}
// ident checks whether s begins with a Go identifier.
// If so, it returns the identifier, which is a prefix of s, and ok == true.
// Otherwise it returns "", false.
// The caller should skip over the first len(id) bytes of s
// before further processing.
func ident(s string) (id string, ok bool) {
// Scan [\pL_][\pL_0-9]*
n := 0
for n < len(s) {
if c := s[n]; c < utf8.RuneSelf {
if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') {
n++
continue
}
break
}
r, nr := utf8.DecodeRuneInString(s[n:])
if unicode.IsLetter(r) {
n += nr
continue
}
break
}
return s[:n], n > 0
}
// isIdentASCII reports whether c is an ASCII identifier byte.
func isIdentASCII(c byte) bool {
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<26-1)<<'A' |
(1<<26-1)<<'a' |
(1<<10-1)<<'0' |
1<<'_'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}