зеркало из https://github.com/github/docs.git
212 строки
7.8 KiB
JavaScript
Executable File
212 строки
7.8 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
|
|
// [start-readme]
|
|
//
|
|
// Run this script to find internal links in all content and data Markdown files, check if either the title or link
|
|
// (or both) are outdated, and automatically update them if so.
|
|
//
|
|
// Exceptions:
|
|
// * Links with fragments (e.g., [Bar](/foo#bar)) will get their root links updated if necessary, but the fragment
|
|
// and title will be unchanged (e.g., [Bar](/noo#bar)).
|
|
// * Links with hardcoded versions (e.g., [Foo](/enterprise-server/baz)) will get their root links updated if
|
|
// necessary, but the hardcoded versions will be preserved (e.g., [Foo](/enterprise-server/qux)).
|
|
// * Links with Liquid in the titles will have their root links updated if necessary, but the titles will be preserved.
|
|
//
|
|
// [end-readme]
|
|
|
|
import { fileURLToPath } from 'url'
|
|
import path from 'path'
|
|
import fs from 'fs'
|
|
import walk from 'walk-sync'
|
|
import { fromMarkdown } from 'mdast-util-from-markdown'
|
|
import visit from 'unist-util-visit'
|
|
import { loadPages, loadPageMap } from '../lib/page-data.js'
|
|
import loadSiteData from '../lib/site-data.js'
|
|
import loadRedirects from '../lib/redirects/precompile.js'
|
|
import { getPathWithoutLanguage, getPathWithoutVersion } from '../lib/path-utils.js'
|
|
import { allVersionKeys } from '../lib/all-versions.js'
|
|
import frontmatter from '../lib/read-frontmatter.js'
|
|
import renderContent from '../lib/render-content/index.js'
|
|
import patterns from '../lib/patterns.js'
|
|
import getRedirect from '../lib/get-redirect.js'
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
|
|
|
const walkFiles = (pathToWalk) => {
|
|
return walk(path.posix.join(__dirname, '..', pathToWalk), {
|
|
includeBasePath: true,
|
|
directories: false,
|
|
})
|
|
.filter((file) => file.endsWith('.md') && !file.endsWith('README.md'))
|
|
.filter((file) => !file.includes('/early-access/')) // ignore EA for now
|
|
}
|
|
|
|
const allFiles = walkFiles('content').concat(walkFiles('data'))
|
|
|
|
// The script will throw an error if it finds any markup not represented here.
|
|
// Hacky but it captures the current rare edge cases.
|
|
const linkInlineMarkup = {
|
|
emphasis: '*',
|
|
strong: '**',
|
|
}
|
|
|
|
const currentVersionWithSpacesRegex = /\/enterprise\/{{ currentVersion }}/g
|
|
const currentVersionWithoutSpaces = '/enterprise/{{currentVersion}}'
|
|
|
|
main()
|
|
|
|
async function main() {
|
|
console.log('Working...')
|
|
const pageList = await loadPages()
|
|
const pageMap = await loadPageMap(pageList)
|
|
const redirects = await loadRedirects(pageList)
|
|
const site = await loadSiteData()
|
|
|
|
const context = {
|
|
pages: pageMap,
|
|
redirects,
|
|
site: site.en.site,
|
|
currentLanguage: 'en',
|
|
}
|
|
|
|
for (const file of allFiles) {
|
|
const { data, content } = frontmatter(fs.readFileSync(file, 'utf8'))
|
|
let newContent = content
|
|
|
|
// Do a blanket find-replace for /enterprise/{{ currentVersion }}/ to /enterprise/{{currentVersion}}/
|
|
// so that the AST parser recognizes the link as a link node. The spaces prevent it from doing so.
|
|
newContent = newContent.replace(currentVersionWithSpacesRegex, currentVersionWithoutSpaces)
|
|
|
|
const ast = fromMarkdown(newContent)
|
|
|
|
// We can't do async functions within visit, so gather the nodes upfront
|
|
const nodesPerFile = []
|
|
|
|
visit(ast, (node) => {
|
|
if (node.type !== 'link') return
|
|
if (!node.url.startsWith('/')) return
|
|
if (node.url.startsWith('/assets')) return
|
|
if (node.url.startsWith('/public')) return
|
|
if (node.url.includes('/11.10.340/')) return
|
|
if (node.url.includes('/2.1/')) return
|
|
if (node.url === '/') return
|
|
|
|
nodesPerFile.push(node)
|
|
})
|
|
|
|
// For every Markdown link...
|
|
for (const node of nodesPerFile) {
|
|
const oldLink = node.url
|
|
|
|
// Find and preserve any inline markup in link titles, like [*Foo*](/foo)
|
|
let inlineMarkup = ''
|
|
if (node.children[0].children) {
|
|
inlineMarkup = linkInlineMarkup[node.children[0].type]
|
|
|
|
if (!inlineMarkup) {
|
|
console.error(`Cannot find an inline markup entry for ${node.children[0].type}!`)
|
|
process.exit(1)
|
|
}
|
|
}
|
|
|
|
const oldTitle = node.children[0].value || node.children[0].children[0].value
|
|
const oldMarkdownLink = `[${inlineMarkup}${oldTitle}${inlineMarkup}](${oldLink})`
|
|
|
|
// As a blanket rule, only update titles in links that begin with quotes. (Many links
|
|
// have punctuation before the closing quotes, so we'll only check for opening quotes.)
|
|
// Update: "[Foo](/foo)
|
|
// Do not update: [Bar](/bar)
|
|
const hasQuotesAroundLink = newContent.includes(`"${oldMarkdownLink}`)
|
|
|
|
let foundPage, fragmentMatch, versionMatch
|
|
|
|
// Run through all supported versions...
|
|
for (const version of allVersionKeys) {
|
|
context.currentVersion = version
|
|
// Render the link for each version using the renderContent pipeline, which includes the rewrite-local-links plugin.
|
|
const $ = await renderContent(oldMarkdownLink, context, { cheerioObject: true })
|
|
let linkToCheck = $('a').attr('href')
|
|
|
|
// We need to preserve fragments and hardcoded versions if any are found.
|
|
fragmentMatch = oldLink.match(/(#.*$)/)
|
|
versionMatch = oldLink.match(/(enterprise-server(?:@.[^/]*?)?)\//)
|
|
|
|
// Remove the fragment for now.
|
|
linkToCheck = linkToCheck.replace(/#.*$/, '').replace(patterns.trailingSlash, '$1')
|
|
|
|
// Try to find the rendered link in the set of pages!
|
|
foundPage = findPage(linkToCheck, pageMap, redirects)
|
|
|
|
// Once a page is found for a particular version, exit immediately; we don't need to check the other versions
|
|
// because all we care about is the page title and path.
|
|
if (foundPage) {
|
|
break
|
|
}
|
|
}
|
|
|
|
if (!foundPage) {
|
|
console.error(
|
|
`Can't find link in pageMap! ${oldLink} in ${file.replace(process.cwd(), '')}`
|
|
)
|
|
process.exit(1)
|
|
}
|
|
|
|
// If the original link includes a fragment OR the original title includes Liquid, do not change;
|
|
// otherwise, use the found page title. (We don't want to update the title if a fragment is found because
|
|
// the title likely points to the fragment section header, not the page title.)
|
|
const newTitle =
|
|
fragmentMatch || oldTitle.includes('{%') || !hasQuotesAroundLink
|
|
? oldTitle
|
|
: foundPage.title
|
|
|
|
// If the original link includes a fragment, append it to the found page path.
|
|
// Also remove the language code because Markdown links don't include language codes.
|
|
let newLink = getPathWithoutLanguage(
|
|
fragmentMatch ? foundPage.path + fragmentMatch[1] : foundPage.path
|
|
)
|
|
|
|
// If the original link includes a hardcoded version, preserve it; otherwise, remove versioning
|
|
// because Markdown links don't include versioning.
|
|
newLink = versionMatch
|
|
? `/${versionMatch[1]}${getPathWithoutVersion(newLink)}`
|
|
: getPathWithoutVersion(newLink)
|
|
|
|
let newMarkdownLink = `[${inlineMarkup}${newTitle}${inlineMarkup}](${newLink})`
|
|
|
|
// Handle a few misplaced quotation marks.
|
|
if (oldMarkdownLink.includes('["')) {
|
|
newMarkdownLink = `"${newMarkdownLink}`
|
|
}
|
|
|
|
// Stream the results to console as we find them.
|
|
if (oldMarkdownLink !== newMarkdownLink) {
|
|
console.log('old link', oldMarkdownLink)
|
|
console.log('new link', newMarkdownLink)
|
|
console.log('-------')
|
|
}
|
|
|
|
newContent = newContent.replace(oldMarkdownLink, newMarkdownLink)
|
|
}
|
|
|
|
fs.writeFileSync(file, frontmatter.stringify(newContent, data, { lineWidth: 10000 }))
|
|
}
|
|
|
|
console.log('Done!')
|
|
}
|
|
|
|
function findPage(tryPath, pageMap, redirects) {
|
|
if (pageMap[tryPath]) {
|
|
return {
|
|
title: pageMap[tryPath].title,
|
|
path: tryPath,
|
|
}
|
|
}
|
|
|
|
const redirect = getRedirect(tryPath, { redirects, pages: pageMap })
|
|
if (pageMap[redirect]) {
|
|
return {
|
|
title: pageMap[redirect].title,
|
|
path: redirect,
|
|
}
|
|
}
|
|
}
|