зеркало из https://github.com/github/docs.git
123 строки
4.6 KiB
JavaScript
123 строки
4.6 KiB
JavaScript
const cheerio = require('cheerio')
|
|
const findPageInVersion = require('./find-page-in-version')
|
|
const renderContent = require('./render-content')
|
|
const rewriteLocalLinks = require('./rewrite-local-links')
|
|
const nonEnterpriseDefaultVersion = require('./non-enterprise-default-version')
|
|
const { getPathWithoutLanguage } = require('./path-utils')
|
|
const { getEnterpriseVersionNumber, adminProduct } = require('./patterns')
|
|
const { deprecated, latest } = require('./enterprise-server-releases')
|
|
|
|
// internal links will have a language code by the time we're testing them
|
|
// we also want to capture same-page anchors (#foo)
|
|
const languageCode = 'en'
|
|
const internalHrefs = ['/en', '#']
|
|
|
|
const renderedPageCache = {}
|
|
const checkedAnchorCache = {}
|
|
|
|
module.exports = async function checkLinks ($, page, context, version, checkedLinkCache = {}) {
|
|
// run rewriteLocalLinks to version links and add language codes
|
|
rewriteLocalLinks($, version, languageCode)
|
|
|
|
const brokenLinks = {
|
|
anchors: [],
|
|
links: []
|
|
}
|
|
|
|
// internal link check
|
|
for (const href of internalHrefs) {
|
|
const internalLinks = $(`a[href^="${href}"]`).get()
|
|
|
|
for (const internalLink of internalLinks) {
|
|
const href = $(internalLink).attr('href')
|
|
|
|
// enable caching so we don't check links more than once
|
|
// anchor links are cached locally (within this run) since they are specific to the page
|
|
if (checkedLinkCache[href] || checkedAnchorCache[href]) continue
|
|
|
|
const [link, anchor] = href.split('#')
|
|
|
|
// if anchor only (e.g., #foo), look for heading on same page
|
|
if (anchor && !link) {
|
|
// ignore anchors that are autogenerated from headings
|
|
if (anchor === $(internalLink).parent().attr('id')) continue
|
|
|
|
const matchingHeadings = getMatchingHeadings($, anchor)
|
|
|
|
if (matchingHeadings.length === 0) {
|
|
brokenLinks.anchors.push({ 'broken same-page anchor': `#${anchor}`, reason: 'heading not found on page' })
|
|
}
|
|
checkedAnchorCache[href] = true
|
|
continue
|
|
}
|
|
checkedLinkCache[href] = true
|
|
|
|
// skip rare hardcoded links to old GHE versions
|
|
// these paths will always be in the old versioned form
|
|
// example: /enterprise/11.10.340/admin/articles/upgrading-to-the-latest-release
|
|
const gheVersionInLink = link.match(getEnterpriseVersionNumber)
|
|
if (gheVersionInLink && deprecated.includes(gheVersionInLink[1])) continue
|
|
|
|
// look for linked page
|
|
const isDotcomOnly = $(internalLink).attr('class')
|
|
|
|
// special case for GHES Admin links on dotcom, which are not broken; they go to the latest GHES version
|
|
let versionToCheck = version
|
|
if (version === nonEnterpriseDefaultVersion && adminProduct.test(link)) {
|
|
versionToCheck = `enterprise-server@${latest}`
|
|
}
|
|
|
|
const linkedPage = findPageInVersion(link, context.pages, context.redirects, languageCode, versionToCheck, isDotcomOnly)
|
|
|
|
if (!linkedPage) {
|
|
brokenLinks.links.push({ 'broken link': link, reason: 'linked page not found' })
|
|
continue
|
|
}
|
|
|
|
// don't check anchors on developers content
|
|
if (linkedPage.relativePath.match(/^(rest|graphql|developers)/)) continue
|
|
|
|
// create a unique string for caching purposes
|
|
const pathToCache = version + linkedPage.relativePath
|
|
|
|
const anchorToCheck = anchor
|
|
|
|
// if link with anchor (e.g., /some/path#foo), look for heading on linked page
|
|
if (anchorToCheck) {
|
|
// either render page or fetch it from cache if we've already rendered it
|
|
let linkedPageObject
|
|
if (!renderedPageCache[pathToCache]) {
|
|
const linkedPageHtml = await renderContent(linkedPage.markdown, context)
|
|
linkedPageObject = cheerio.load(linkedPageHtml, { xmlMode: true })
|
|
renderedPageCache[pathToCache] = linkedPageObject
|
|
} else {
|
|
linkedPageObject = renderedPageCache[pathToCache]
|
|
}
|
|
|
|
const matchingHeadings = getMatchingHeadings(linkedPageObject, anchorToCheck)
|
|
|
|
if (matchingHeadings.length === 0) {
|
|
if (anchor) {
|
|
brokenLinks.anchors.push({ 'broken anchor': `#${anchor}`, 'full link': `${getPathWithoutLanguage(link)}#${anchor}`, reason: 'heading not found on linked page', 'linked page': linkedPage.fullPath })
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return { brokenLinks, checkedLinkCache }
|
|
}
|
|
|
|
// article titles are h1s; headings can be any subsequent level
|
|
function getMatchingHeadings ($, anchor) {
|
|
return $(`
|
|
h2[id="${anchor}"],
|
|
h3[id="${anchor}"],
|
|
h4[id="${anchor}"],
|
|
h5[id="${anchor}"],
|
|
h6[id="${anchor}"],
|
|
a[name="${anchor}"]
|
|
`)
|
|
}
|