зеркало из https://github.com/github/docs.git
Migrate links check to JS pattern (#30175)
Co-authored-by: Sarah Schneider <sarahs@users.noreply.github.com> Co-authored-by: Peter Bengtsson <peterbe@github.com>
This commit is contained in:
Родитель
6e01c06538
Коммит
7b4429418b
|
@ -0,0 +1,15 @@
|
|||
import { readFileSync } from 'fs'
|
||||
|
||||
// Parses the action event payload sets repo and owner to an object from runner environment
|
||||
export function getActionContext() {
|
||||
const context = JSON.parse(readFileSync(process.env.GITHUB_EVENT_PATH, 'utf8'))
|
||||
if (context.repository) {
|
||||
context.owner = context.repository.owner.login
|
||||
context.repo = context.repository.name
|
||||
} else {
|
||||
const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/')
|
||||
context.owner = owner
|
||||
context.repo = repo
|
||||
}
|
||||
return context
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
const timeInstances = new Map()
|
||||
|
||||
/* Meant to be called before debugTimeEnd with the same instanceName to behave like console.time() */
|
||||
export function debugTimeStart(core, instanceName) {
|
||||
if (timeInstances.has(instanceName)) {
|
||||
core.warn(`instanceName: ${instanceName} has already been used for a debug instance.`)
|
||||
return
|
||||
}
|
||||
|
||||
timeInstances.set(instanceName, new Date())
|
||||
}
|
||||
|
||||
/* Meant to be called after debugTimeStart with the same instanceName to behave like console.timeEnd() */
|
||||
export function debugTimeEnd(core, instanceName) {
|
||||
if (!timeInstances.has(instanceName)) {
|
||||
core.warn(
|
||||
`Invalid instanceName: ${instanceName} in debugTimeEnd. Did you call debugTimeStart first with the same instanceName?`
|
||||
)
|
||||
return
|
||||
}
|
||||
const startTime = timeInstances.get(instanceName)
|
||||
const ms = new Date().getTime() - startTime.getTime()
|
||||
const seconds = ms / 1000
|
||||
const minutes = seconds / 60
|
||||
let display = `${ms.toFixed(1)} ms`
|
||||
if (minutes > 1) {
|
||||
display = `${minutes.toFixed(1)} minutes`
|
||||
} else if (seconds > 1) {
|
||||
display = `${seconds.toFixed(1)} seconds`
|
||||
}
|
||||
core.debug(`Completed ${instanceName} in ${display}`)
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Validates and returns an object of expected environment variables
|
||||
*
|
||||
* @param {Array<string>} options - Array of environment variables expected
|
||||
*
|
||||
* @returns {Object} - key value of expected env variables and their values
|
||||
*/
|
||||
export function getEnvInputs(options) {
|
||||
return Object.fromEntries(
|
||||
options.map((envVarName) => {
|
||||
const envVarValue = process.env[envVarName]
|
||||
if (!envVarValue) {
|
||||
throw new Error(`You must supply a ${envVarName} environment variable`)
|
||||
}
|
||||
return [envVarName, envVarValue]
|
||||
})
|
||||
)
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
/* eslint-disable import/no-extraneous-dependencies */
|
||||
import fs from 'fs'
|
||||
|
||||
/* Writes string to file to be uploaded as an action artifact.
|
||||
* Useful for debugging or passing results to downstream action
|
||||
*
|
||||
* @param {string} name - name of artifact
|
||||
* @param {string} contents - string contents of artifact
|
||||
*/
|
||||
export async function uploadArtifact(name, contents) {
|
||||
if (!fs.existsSync('./artifacts')) {
|
||||
fs.mkdirSync('./artifacts/')
|
||||
}
|
||||
const filePath = `./artifacts/${name}`
|
||||
fs.writeFileSync(filePath, contents)
|
||||
}
|
|
@ -0,0 +1,987 @@
|
|||
/* See function main in this file for documentation */
|
||||
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
import cheerio from 'cheerio'
|
||||
import coreLib from '@actions/core'
|
||||
import got, { RequestError } from 'got'
|
||||
import chalk from 'chalk'
|
||||
|
||||
import shortVersions from '../../middleware/contextualizers/short-versions.js'
|
||||
import contextualize from '../../middleware/context.js'
|
||||
import getRedirect from '../../lib/get-redirect.js'
|
||||
import warmServer from '../../lib/warm-server.js'
|
||||
import renderContent from '../../lib/render-content/index.js'
|
||||
import { deprecated } from '../../lib/enterprise-server-releases.js'
|
||||
import excludedLinks from '../../lib/excluded-links.js'
|
||||
import { getEnvInputs } from './lib/get-env-inputs.js'
|
||||
import { debugTimeEnd, debugTimeStart } from './lib/debug-time-taken.js'
|
||||
import { uploadArtifact as uploadArtifactLib } from './lib/upload-artifact.js'
|
||||
import github from '../../script/helpers/github.js'
|
||||
import { getActionContext } from './lib/action-context.js'
|
||||
|
||||
const STATIC_PREFIXES = {
|
||||
assets: path.resolve('assets'),
|
||||
public: path.resolve(path.join('data', 'graphql')),
|
||||
}
|
||||
// Sanity check that these are valid paths
|
||||
Object.entries(STATIC_PREFIXES).forEach(([key, value]) => {
|
||||
if (!fs.existsSync(value)) {
|
||||
throw new Error(`Can't find static prefix (${key}): ${value}`)
|
||||
}
|
||||
})
|
||||
|
||||
// Return a function that can as quickly as possible check if a certain
|
||||
// href input should be skipped.
|
||||
// Do this so we can use a `Set` and a `iterable.some()` for a speedier
|
||||
// check.
|
||||
function linksToSkipFactory() {
|
||||
const set = new Set(excludedLinks.filter((regexOrURL) => typeof regexOrURL === 'string'))
|
||||
const regexes = excludedLinks.filter((regexOrURL) => regexOrURL instanceof RegExp)
|
||||
return (href) => set.has(href) || regexes.some((regex) => regex.test(href))
|
||||
}
|
||||
|
||||
const linksToSkip = linksToSkipFactory(excludedLinks)
|
||||
|
||||
const CONTENT_ROOT = path.resolve('content')
|
||||
|
||||
const deprecatedVersionPrefixesRegex = new RegExp(
|
||||
`enterprise(-server@|/)(${deprecated.join('|')})(/|$)`
|
||||
)
|
||||
|
||||
// When this file is invoked directly from action as opposed to being imported
|
||||
if (import.meta.url.endsWith(process.argv[1])) {
|
||||
// Validate that required action inputs are present
|
||||
getEnvInputs(['GITHUB_TOKEN'])
|
||||
|
||||
// Optional env vars
|
||||
const {
|
||||
ACTION_RUN_URL,
|
||||
CREATE_REPORT,
|
||||
CHECK_EXTERNAL_LINKS,
|
||||
LEVEL,
|
||||
SHOULD_COMMENT,
|
||||
COMMENT_LIMIT_TO_EXTERNAL_LINKS,
|
||||
FAIL_ON_FLAW,
|
||||
FILES_CHANGED,
|
||||
REPORT_REPOSITORY,
|
||||
REPORT_AUTHOR,
|
||||
REPORT_LABEL,
|
||||
} = process.env
|
||||
|
||||
const octokit = github()
|
||||
|
||||
// Parse changed files JSON string
|
||||
let files
|
||||
if (FILES_CHANGED) {
|
||||
const fileList = JSON.parse(FILES_CHANGED)
|
||||
if (Array.isArray(fileList) && fileList.length > 0) {
|
||||
files = fileList
|
||||
} else {
|
||||
console.warn(`No changed files found in PR: ${FILES_CHANGED}. Exiting...`)
|
||||
process.exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
const opts = {
|
||||
level: LEVEL,
|
||||
files,
|
||||
verbose: true,
|
||||
linkReports: true,
|
||||
checkImages: true,
|
||||
patient: true,
|
||||
random: false,
|
||||
language: 'en',
|
||||
actionUrl: ACTION_RUN_URL,
|
||||
checkExternalLinks: CHECK_EXTERNAL_LINKS === 'true',
|
||||
shouldComment: SHOULD_COMMENT === 'true',
|
||||
commentLimitToExternalLinks: COMMENT_LIMIT_TO_EXTERNAL_LINKS === 'true',
|
||||
failOnFlaw: FAIL_ON_FLAW === 'true',
|
||||
createReport: CREATE_REPORT === 'true',
|
||||
reportRepository: REPORT_REPOSITORY,
|
||||
reportLabel: REPORT_LABEL,
|
||||
reportAuthor: REPORT_AUTHOR,
|
||||
actionContext: getActionContext(),
|
||||
}
|
||||
|
||||
main(coreLib, octokit, uploadArtifactLib, opts, {})
|
||||
}
|
||||
|
||||
/*
|
||||
* Renders all or specified pages to gather all links on them and verify them.
|
||||
* Checks internal links deterministically using filesystem and external links via external requests.
|
||||
* Links are considered broken for reporting and commenting if they are broken at the specified "level".
|
||||
* e.g. redirects are considered a "warning" while 404s are considered "critical"
|
||||
*
|
||||
* When there are broken links (flaws) this action can:
|
||||
* 1. Create a report issue in a specified reportRepository and link it to previous reportIssues
|
||||
* 2. Create a comment similar to a report on a PR that triggered this action
|
||||
* 3. Fail using core.setFailed when there are broken links
|
||||
*
|
||||
* opts:
|
||||
* level {"warning" | "critical"} Counts links as "flaws" based on this value and status criteria
|
||||
* files {Array<string>} - Limit link checking to specific files (usually changed in PR)
|
||||
* language {string | Array<string>} - Render pages to check from included language (or languages array)
|
||||
* checkExternalLinks {boolean} - Checks non docs.github.com urls (takes significantly longer)
|
||||
* checkImages {boolean} - Check image src urls
|
||||
* failOnFlaw {boolean} - When true will fail using core.setFailed when links are broken according to level (flaw)
|
||||
* shouldComment {boolean} - When true attempts to comment flaws on PR that triggered action
|
||||
* commentLimitToExternalLinks {boolean} - When true PR comment only includes external links
|
||||
* createReport {boolean} - Creates an issue comment in reportRepository with links considered broken (flaws)
|
||||
* linkReports {boolean} - When createReport is true, link the issue report to previous report(s) via comments
|
||||
* reportRepository {string} - Repository in form of "owner/repo-name" that report issue will be created in
|
||||
* reportLabel {string} - Label assigned to report issue,
|
||||
* reportAuthor {string} - Expected author of previous report issue for linking reports (a bot user like Docubot)
|
||||
* actionUrl {string} - Used to link report or comment to the action instance for debugging
|
||||
* actionContext {object} - Event payload context when run from action or injected. Should include { repo, owner }
|
||||
* verbose {boolean} - Set to true for more verbose logging
|
||||
* random {boolean} - Randomize page order for debugging when true
|
||||
* patient {boolean} - Wait longer and retry more times for rate-limited external URLS
|
||||
*
|
||||
*/
|
||||
async function main(core, octokit, uploadArtifact, opts = {}) {
|
||||
const {
|
||||
level = 'warning',
|
||||
files = [],
|
||||
random,
|
||||
language = 'en',
|
||||
filter,
|
||||
max,
|
||||
verbose,
|
||||
checkExternalLinks = false,
|
||||
createReport = false,
|
||||
failOnFlaw = false,
|
||||
shouldComment = false,
|
||||
} = opts
|
||||
|
||||
// Note! The reason we're using `warmServer()` in this script,
|
||||
// even though there's no server involved, is because
|
||||
// the `contextualize()` function calls it.
|
||||
// And because warmServer() is actually idempotent, meaning it's
|
||||
// cheap to call it more than once, it would be expensive to call it
|
||||
// twice unnecessarily.
|
||||
// If we'd manually do the same operations that `warmServer()` does
|
||||
// here (e.g. `loadPageMap()`), we'd end up having to do it all over
|
||||
// again, the next time `contextualize()` is called.
|
||||
const { redirects, pages: pageMap, pageList } = await warmServer()
|
||||
|
||||
if (files.length) {
|
||||
core.debug(`Limitting to files list: ${files.join(', ')}`)
|
||||
}
|
||||
|
||||
let languages = language
|
||||
if (!Array.isArray(languages)) {
|
||||
languages = [languages]
|
||||
}
|
||||
|
||||
const filters = filter || []
|
||||
if (!Array.isArray(filters)) {
|
||||
core.warning(`filters, ${filters} is not an array`)
|
||||
}
|
||||
|
||||
if (random) {
|
||||
shuffle(pageList)
|
||||
}
|
||||
|
||||
debugTimeStart(core, 'getPages')
|
||||
const pages = getPages(pageList, languages, filters, files, max)
|
||||
debugTimeEnd(core, 'getPages')
|
||||
|
||||
if (checkExternalLinks && pages.length >= 100) {
|
||||
core.warning(
|
||||
`Warning! Checking external URLs can be time costly. You're testing ${pages.length} pages.`
|
||||
)
|
||||
}
|
||||
|
||||
debugTimeStart(core, 'processPages')
|
||||
const flawsGroups = await Promise.all(
|
||||
pages.map((page) => processPage(core, page, pageMap, redirects, opts))
|
||||
)
|
||||
debugTimeEnd(core, 'processPages')
|
||||
|
||||
const flaws = flawsGroups.flat()
|
||||
|
||||
printGlobalCacheHitRatio(core)
|
||||
|
||||
if (verbose) {
|
||||
summarizeCounts(core, pages)
|
||||
core.info(`Checked ${(globalCacheHitCount + globalCacheMissCount).toLocaleString()} links`)
|
||||
}
|
||||
|
||||
summarizeFlaws(core, flaws)
|
||||
|
||||
if (flaws.length > 0) {
|
||||
await uploadJsonFlawsArtifact(uploadArtifact, flaws, opts)
|
||||
core.info(`All flaws written to artifact log.`)
|
||||
if (createReport) {
|
||||
core.info(`Creating issue for flaws...`)
|
||||
const newReport = await createReportIssue(core, octokit, flaws, opts)
|
||||
if (linkReports) {
|
||||
await linkReports(core, octokit, newReport, opts)
|
||||
}
|
||||
}
|
||||
if (shouldComment) {
|
||||
await commentOnPR(core, octokit, flaws, opts)
|
||||
}
|
||||
|
||||
const flawsInLevel = flaws.filter((flaw) => {
|
||||
if (level === 'critical') {
|
||||
return flaw?.flaw?.CRITICAL
|
||||
}
|
||||
// WARNING level and above
|
||||
return true
|
||||
})
|
||||
|
||||
if (flawsInLevel.length > 0) {
|
||||
core.setOutput('has_flaws_at_level', flawsInLevel.length > 0)
|
||||
if (failOnFlaw) {
|
||||
core.setFailed(
|
||||
`${flaws.length + 1} broken links found. See action artifact uploads for details`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function createReportIssue(core, octokit, flaws, opts) {
|
||||
const { reportRepository = 'github/docs-content', reportLabel = 'broken link report' } = opts
|
||||
const [owner, repo] = reportRepository.split('/')
|
||||
|
||||
const brokenLinksDisplay = flawIssueDisplay(flaws, opts)
|
||||
|
||||
// Create issue with broken links
|
||||
let newReport
|
||||
try {
|
||||
const { data } = await octokit.request('POST /repos/{owner}/{repo}/issues', {
|
||||
owner,
|
||||
repo,
|
||||
title: `${flaws.length + 1} broken links found`,
|
||||
body: brokenLinksDisplay,
|
||||
labels: [reportLabel],
|
||||
})
|
||||
newReport = data
|
||||
core.info(`Created broken links report at ${newReport.html_url}\n`)
|
||||
} catch (error) {
|
||||
core.error(error)
|
||||
core.setFailed('Error creating new issue')
|
||||
throw error
|
||||
}
|
||||
|
||||
return newReport
|
||||
}
|
||||
|
||||
async function linkReports(core, octokit, newReport, opts) {
|
||||
const {
|
||||
reportRepository = 'github/docs-content',
|
||||
reportAuthor = 'docubot',
|
||||
reportLabel = 'broken link report',
|
||||
} = opts
|
||||
|
||||
const [owner, repo] = reportRepository.split('/')
|
||||
|
||||
core.debug('Attempting to link reports...')
|
||||
// Find previous broken link report issue
|
||||
let previousReports
|
||||
try {
|
||||
previousReports = await octokit.rest.issues.listForRepo({
|
||||
owner,
|
||||
repo,
|
||||
creator: reportAuthor,
|
||||
labels: reportLabel,
|
||||
state: 'all', // We want to get the previous report, even if it is closed
|
||||
sort: 'created',
|
||||
direction: 'desc',
|
||||
per_page: 25,
|
||||
})
|
||||
previousReports = previousReports.data
|
||||
} catch (error) {
|
||||
core.setFailed('Error listing issues for repo')
|
||||
throw error
|
||||
}
|
||||
core.debug(`Found ${previousReports.length} previous reports`)
|
||||
|
||||
if (previousReports.length <= 1) {
|
||||
core.info('No previous reports to link to')
|
||||
return
|
||||
}
|
||||
|
||||
// 2nd report should be most recent previous report
|
||||
const previousReport = previousReports[1]
|
||||
|
||||
// Comment the old report link on the new report
|
||||
try {
|
||||
await octokit.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: newReport.number,
|
||||
body: `⬅️ [Previous report](${previousReport.html_url})`,
|
||||
})
|
||||
core.info(`Linked old report to new report via comment on new report, #${newReport.number}`)
|
||||
} catch (error) {
|
||||
core.setFailed(`Error commenting on newReport, #${newReport.number}`)
|
||||
throw error
|
||||
}
|
||||
|
||||
// Comment on all previous reports that are still open
|
||||
for (const previousReport of previousReports) {
|
||||
if (previousReport.state === 'closed' || previousReport.html_url === newReport.html_url) {
|
||||
continue
|
||||
}
|
||||
|
||||
// If an old report is not assigned to someone we close it
|
||||
const shouldClose = !previousReport.assignees.length
|
||||
let body = `➡️ [Newer report](${newReport.html_url})`
|
||||
if (shouldClose) {
|
||||
body += '\n\nClosing in favor of newer report since there are no assignees on this issue'
|
||||
}
|
||||
try {
|
||||
await octokit.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: previousReport.number,
|
||||
body,
|
||||
})
|
||||
core.info(
|
||||
`Linked old report to new report via comment on old report: #${previousReport.number}.`
|
||||
)
|
||||
} catch (error) {
|
||||
core.setFailed(`Error commenting on previousReport, #${previousReport.number}`)
|
||||
throw error
|
||||
}
|
||||
if (shouldClose) {
|
||||
try {
|
||||
await octokit.rest.issues.update({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: previousReport.number,
|
||||
state: 'closed',
|
||||
})
|
||||
core.info(`Closing old report: #${previousReport.number} because it doesn't have assignees`)
|
||||
} catch (error) {
|
||||
core.setFailed(`Error closing previousReport, #${previousReport.number}`)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function commentOnPR(core, octokit, flaws, opts) {
|
||||
const { actionContext = {} } = opts
|
||||
const { owner, repo } = actionContext
|
||||
const pullNumber = actionContext?.pull_request?.number
|
||||
if (!owner || !repo || !pullNumber) {
|
||||
core.warning(`commentOnPR called outside of PR action runner context. Not creating comment.`)
|
||||
return
|
||||
}
|
||||
|
||||
const body = flawIssueDisplay(flaws, opts, false)
|
||||
// Since failed external urls aren't included in PR comment, body may be empty
|
||||
if (!body) {
|
||||
core.info('No flaws qualify for comment')
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
await octokit.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: pullNumber,
|
||||
body,
|
||||
})
|
||||
core.info(`Created comment on PR: ${pullNumber}`)
|
||||
} catch (error) {
|
||||
core.setFailed(`Error commenting on PR when there are flaws`)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
function flawIssueDisplay(flaws, opts, includeExternalLinkList = true) {
|
||||
let output = ''
|
||||
let flawsToDisplay = 0
|
||||
|
||||
// Group broken links for each page
|
||||
const hrefsOnPageGroup = {}
|
||||
for (const { page, permalink, href, text, src, flaw } of flaws) {
|
||||
// When we don't want to include external links in PR comments
|
||||
if (opts.commentLimitToExternalLinks && !flaw.isExternal) {
|
||||
continue
|
||||
}
|
||||
|
||||
flawsToDisplay++
|
||||
|
||||
const pageKey = page.fullPath
|
||||
if (!hrefsOnPageGroup[pageKey]) {
|
||||
hrefsOnPageGroup[pageKey] = {}
|
||||
}
|
||||
|
||||
const linkKey = href || src
|
||||
if (!hrefsOnPageGroup[pageKey][linkKey]) {
|
||||
hrefsOnPageGroup[page.fullPath][linkKey] = { href, text, src, flaw, permalinkHrefs: [] }
|
||||
}
|
||||
|
||||
if (!hrefsOnPageGroup[pageKey][linkKey].permalinkHrefs.includes(permalink.href)) {
|
||||
hrefsOnPageGroup[pageKey][linkKey].permalinkHrefs.push(permalink.href)
|
||||
}
|
||||
}
|
||||
|
||||
// Don't comment if there are no qualifying flaws
|
||||
if (!flawsToDisplay) {
|
||||
return ''
|
||||
}
|
||||
|
||||
// Build flaw display text
|
||||
for (const [pagePath, pageHrefs] of Object.entries(hrefsOnPageGroup)) {
|
||||
const fullPath = prettyFullPath(pagePath)
|
||||
output += `\n\n### In \`${fullPath}\`\n`
|
||||
|
||||
for (const [, hrefObj] of Object.entries(pageHrefs)) {
|
||||
if (hrefObj.href) {
|
||||
output += `\n\n - Href: [${hrefObj.href}](${hrefObj.href})`
|
||||
output += `\n - Text: ${hrefObj.text}`
|
||||
} else if (hrefObj.src) {
|
||||
output += `\n\n - Image src: [${hrefObj.src}](${hrefObj.src})`
|
||||
} else {
|
||||
output += `\n\n - WORKFLOW ERROR: Flaw has neither 'href' nor 'src'`
|
||||
}
|
||||
output += `\n - Flaw: \`${
|
||||
hrefObj.flaw.CRITICAL ? hrefObj.flaw.CRITICAL : hrefObj.flaw.WARNING
|
||||
}\``
|
||||
output += `\n - On permalinks`
|
||||
for (const permalinkHref of hrefObj.permalinkHrefs) {
|
||||
output += `\n - \`${permalinkHref}\``
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (includeExternalLinkList) {
|
||||
output +=
|
||||
'\n\n## External URLs\n\nThe following external URLs must be verified manually. If an external URL gives a false negative, add it to the file `lib/excluded-links.js`\n\n'
|
||||
for (const link of excludedLinks) {
|
||||
if (typeof link === 'string') {
|
||||
output += `\n - [${link}](${link})`
|
||||
} else {
|
||||
output += `\n - Pattern: \`${link.toString()}\``
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return `${flawsToDisplay} broken${
|
||||
opts.commentLimitToExternalLinks ? ' **external** ' : ' '
|
||||
}links found in [this](${opts.actionUrl}) workflow.\n${output}`
|
||||
}
|
||||
|
||||
function printGlobalCacheHitRatio(core) {
|
||||
const hits = globalCacheHitCount
|
||||
const misses = globalCacheMissCount
|
||||
// It could be that the files that were tested didn't have a single
|
||||
// link in them. In that case, there's no cache misses or hits at all.
|
||||
// So avoid the division by zero.
|
||||
if (misses + hits) {
|
||||
core.debug(
|
||||
`Cache hit ratio: ${hits.toLocaleString()} of ${(misses + hits).toLocaleString()} (${(
|
||||
(100 * hits) /
|
||||
(misses + hits)
|
||||
).toFixed(1)}%)`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
function getPages(pageList, languages, filters, files, max) {
|
||||
return pageList
|
||||
.filter((page) => {
|
||||
if (languages.length && !languages.includes(page.languageCode)) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (filters.length && !filters.find((filter) => page.relativePath.includes(filter))) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (
|
||||
files.length &&
|
||||
// The reason for checking each file against the `relativePath`
|
||||
// or the `fullPath` is to make it flexible for the user.
|
||||
!files.find((file) => {
|
||||
if (page.relativePath === file) return true
|
||||
if (page.fullPath === file) return true
|
||||
// The `page.relativePath` will always be *from* the containing
|
||||
// directory it came from an might not be relative to the repo
|
||||
// root. I.e.
|
||||
// `content/education/quickstart.md` is the path relative to
|
||||
// the repo root. But the `page.relativePath` will
|
||||
// in this case be `education/quickstart.md`.
|
||||
// So give it one last chance to relate to the repo root.
|
||||
// This is important because you might use `git diff --name-only`
|
||||
// to get the list of files to focus specifically on.
|
||||
if (path.join(CONTENT_ROOT, page.relativePath) === path.resolve(file)) return true
|
||||
return false
|
||||
})
|
||||
) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
.slice(0, max ? Math.min(max, pageList.length) : pageList.length)
|
||||
}
|
||||
|
||||
async function processPage(core, page, pageMap, redirects, opts) {
|
||||
const { verbose, verboseUrl } = opts
|
||||
|
||||
const allFlawsEach = await Promise.all(
|
||||
page.permalinks.map((permalink) => {
|
||||
return processPermalink(core, permalink, page, pageMap, redirects, opts)
|
||||
})
|
||||
)
|
||||
|
||||
const allFlaws = allFlawsEach.flat()
|
||||
|
||||
if (allFlaws.length > 0) {
|
||||
if (verbose) {
|
||||
printFlaws(core, allFlaws, { verboseUrl })
|
||||
}
|
||||
}
|
||||
|
||||
return allFlaws
|
||||
}
|
||||
|
||||
async function processPermalink(core, permalink, page, pageMap, redirects, opts) {
|
||||
const {
|
||||
level = 'critical',
|
||||
checkAnchors,
|
||||
checkImages,
|
||||
checkExternalLinks,
|
||||
verbose,
|
||||
patient,
|
||||
} = opts
|
||||
const html = await renderInnerHTML(page, permalink)
|
||||
const $ = cheerio.load(html)
|
||||
const flaws = []
|
||||
const links = []
|
||||
$('a[href]').each((i, link) => {
|
||||
links.push(link)
|
||||
})
|
||||
const newFlaws = await Promise.all(
|
||||
links.map(async (link) => {
|
||||
const { href } = link.attribs
|
||||
|
||||
// The global cache can't be used for anchor links because they
|
||||
// depend on each page it renders
|
||||
if (!href.startsWith('#')) {
|
||||
if (globalHrefCheckCache.has(href)) {
|
||||
globalCacheHitCount++
|
||||
return globalHrefCheckCache.get(href)
|
||||
}
|
||||
globalCacheMissCount++
|
||||
}
|
||||
|
||||
const flaw = await checkHrefLink(
|
||||
core,
|
||||
href,
|
||||
$,
|
||||
redirects,
|
||||
pageMap,
|
||||
checkAnchors,
|
||||
checkExternalLinks,
|
||||
{ verbose, patient }
|
||||
)
|
||||
|
||||
if (flaw) {
|
||||
if (level === 'critical' && !flaw.CRITICAL) {
|
||||
return
|
||||
}
|
||||
const text = $(link).text()
|
||||
if (!href.startsWith('#')) {
|
||||
globalHrefCheckCache.set(href, { href, flaw, text })
|
||||
}
|
||||
return { href, flaw, text }
|
||||
} else {
|
||||
if (!href.startsWith('#')) {
|
||||
globalHrefCheckCache.set(href, flaw)
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
for (const flaw of newFlaws) {
|
||||
if (flaw) {
|
||||
flaws.push(Object.assign(flaw, { page, permalink }))
|
||||
}
|
||||
}
|
||||
|
||||
if (checkImages) {
|
||||
$('img[src]').each((i, img) => {
|
||||
let { src } = img.attribs
|
||||
|
||||
// Images get a cache-busting prefix injected in the image
|
||||
// E.g. <img src="/assets/cb-123456/foo/bar.png">
|
||||
// We need to remove that otherwise we can't look up the image
|
||||
// on disk.
|
||||
src = src.replace(/\/cb-\d+\//, '/')
|
||||
|
||||
if (globalImageSrcCheckCache.has(src)) {
|
||||
globalCacheHitCount++
|
||||
return globalImageSrcCheckCache.get(src)
|
||||
}
|
||||
|
||||
const flaw = checkImageSrc(src, $)
|
||||
|
||||
globalImageSrcCheckCache.set(src, flaw)
|
||||
|
||||
if (flaw) {
|
||||
if (level === 'critical' && !flaw.CRITICAL) {
|
||||
return
|
||||
}
|
||||
flaws.push({ permalink, page, src, flaw })
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return flaws
|
||||
}
|
||||
|
||||
async function uploadJsonFlawsArtifact(
|
||||
uploadArtifact,
|
||||
flaws,
|
||||
{ verboseUrl = null } = {},
|
||||
artifactName = 'all-rendered-link-flaws.json'
|
||||
) {
|
||||
const printableFlaws = {}
|
||||
for (const { page, permalink, href, text, src, flaw } of flaws) {
|
||||
const fullPath = prettyFullPath(page.fullPath)
|
||||
|
||||
if (!(fullPath in printableFlaws)) {
|
||||
printableFlaws[fullPath] = []
|
||||
}
|
||||
if (href) {
|
||||
printableFlaws[fullPath].push({
|
||||
href,
|
||||
url: verboseUrl ? new URL(permalink.href, verboseUrl).toString() : permalink.href,
|
||||
text,
|
||||
flaw,
|
||||
})
|
||||
} else if (src) {
|
||||
printableFlaws[fullPath].push({
|
||||
src,
|
||||
})
|
||||
}
|
||||
}
|
||||
const message = JSON.stringify(printableFlaws, undefined, 2)
|
||||
return uploadArtifact(artifactName, message)
|
||||
}
|
||||
|
||||
function printFlaws(core, flaws, { verboseUrl = null } = {}) {
|
||||
let previousPage = null
|
||||
let previousPermalink = null
|
||||
|
||||
for (const { page, permalink, href, text, src, flaw } of flaws) {
|
||||
const fullPath = prettyFullPath(page.fullPath)
|
||||
if (page !== previousPage) {
|
||||
core.info(`PAGE: ${chalk.bold(fullPath)}`)
|
||||
}
|
||||
previousPage = page
|
||||
|
||||
if (href) {
|
||||
if (previousPermalink !== permalink.href) {
|
||||
if (verboseUrl) {
|
||||
core.info(` URL: ${new URL(permalink.href, verboseUrl).toString()}`)
|
||||
} else {
|
||||
core.info(` PERMALINK: ${permalink.href}`)
|
||||
}
|
||||
}
|
||||
previousPermalink = permalink.href
|
||||
|
||||
core.info(` HREF: ${chalk.bold(href)}`)
|
||||
core.info(` TEXT: ${text}`)
|
||||
} else if (src) {
|
||||
core.info(` IMG SRC: ${chalk.bold(src)}`)
|
||||
} else {
|
||||
throw new Error("Flaw has neither 'href' nor 'src'")
|
||||
}
|
||||
|
||||
core.info(` FLAW: ${flaw.CRITICAL ? chalk.red(flaw.CRITICAL) : chalk.yellow(flaw.WARNING)}`)
|
||||
}
|
||||
}
|
||||
|
||||
// Given a full path, change to so it's relative to the `cwd()` so that you
|
||||
// can take it from the output and paste it to something like `code ...here...`
|
||||
// The problem with displaying the full path is that it's quite noisy and
|
||||
// takes up a lot of space. Sure, you can copy and paste it in front of
|
||||
// `vi` or `ls` or `code` but if we display it relative to `cwd()` you
|
||||
// can still paste it to the next command but it's not taking up so much
|
||||
// space.
|
||||
function prettyFullPath(fullPath) {
|
||||
return path.relative(process.cwd(), fullPath)
|
||||
}
|
||||
|
||||
const globalHrefCheckCache = new Map()
|
||||
const globalImageSrcCheckCache = new Map()
|
||||
let globalCacheHitCount = 0
|
||||
let globalCacheMissCount = 0
|
||||
|
||||
async function checkHrefLink(
|
||||
core,
|
||||
href,
|
||||
$,
|
||||
redirects,
|
||||
pageMap,
|
||||
checkAnchors = false,
|
||||
checkExternalLinks = false,
|
||||
{ verbose = false, patient = false } = {}
|
||||
) {
|
||||
if (href === '#') {
|
||||
if (checkAnchors) {
|
||||
return { WARNING: 'Link is just an empty `#`' }
|
||||
}
|
||||
} else if (href.startsWith('#')) {
|
||||
if (checkAnchors) {
|
||||
const countDOMItems = $(href).length
|
||||
if (countDOMItems !== 1) {
|
||||
return { WARNING: `Anchor is an empty string` }
|
||||
}
|
||||
}
|
||||
} else if (href.startsWith('/')) {
|
||||
const pathname = new URL(href, 'http://example.com').pathname
|
||||
|
||||
// Remember, if the Markdown has something like
|
||||
//
|
||||
// See [my link][/some/page/]
|
||||
//
|
||||
// In the post-processing, that will actually become
|
||||
//
|
||||
// See <a href="/en/some/page">my link</a>
|
||||
//
|
||||
// But, if that link was a redirect, that would have been left
|
||||
// untouched.
|
||||
if (pathname.endsWith('/')) {
|
||||
return { WARNING: 'Links with a trailing / will always redirect' }
|
||||
} else {
|
||||
if (pathname.split('/')[1] in STATIC_PREFIXES) {
|
||||
const staticFilePath = path.join(
|
||||
STATIC_PREFIXES[pathname.split('/')[1]],
|
||||
pathname.split(path.sep).slice(2).join(path.sep)
|
||||
)
|
||||
if (!fs.existsSync(staticFilePath)) {
|
||||
return { CRITICAL: `Static file not found ${staticFilePath} (${pathname})` }
|
||||
}
|
||||
} else if (getRedirect(pathname, { redirects, pages: pageMap })) {
|
||||
return { WARNING: `Redirect to ${getRedirect(pathname, { redirects, pages: pageMap })}` }
|
||||
} else if (!pageMap[pathname]) {
|
||||
if (deprecatedVersionPrefixesRegex.test(pathname)) {
|
||||
return
|
||||
}
|
||||
|
||||
return { CRITICAL: 'Broken link' }
|
||||
}
|
||||
}
|
||||
} else if (checkExternalLinks) {
|
||||
if (!href.startsWith('https://')) {
|
||||
return { WARNING: `Will not check external URLs that are not HTTPS (${href})` }
|
||||
}
|
||||
if (linksToSkip(href)) {
|
||||
return
|
||||
}
|
||||
const { ok, ...info } = await checkExternalURL(core, href, { verbose, patient })
|
||||
if (!ok) {
|
||||
return { CRITICAL: `Broken external link (${JSON.stringify(info)})`, isExternal: true }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const _fetchCache = new Map()
|
||||
async function checkExternalURL(core, url, { verbose = false, patient = false } = {}) {
|
||||
if (!url.startsWith('https://')) throw new Error('Invalid URL')
|
||||
const cleanURL = url.split('#')[0]
|
||||
if (!_fetchCache.has(cleanURL)) {
|
||||
_fetchCache.set(cleanURL, innerFetch(core, cleanURL, { verbose, patient }))
|
||||
}
|
||||
return _fetchCache.get(cleanURL)
|
||||
}
|
||||
|
||||
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
|
||||
|
||||
// Global for recording which domains we get rate-limited on.
|
||||
// For example, if you got rate limited on `something.github.com/foo`
|
||||
// and now we're asked to fetch for `something.github.com/bar`
|
||||
// it's good to know to now bother yet.
|
||||
const _rateLimitedDomains = new Map()
|
||||
|
||||
async function innerFetch(core, url, config = {}) {
|
||||
const { verbose, useGET, patient } = config
|
||||
|
||||
const { hostname } = new URL(url)
|
||||
if (_rateLimitedDomains.has(hostname)) {
|
||||
await sleep(_rateLimitedDomains.get(hostname))
|
||||
}
|
||||
// The way `got` does retries:
|
||||
//
|
||||
// sleep = 1000 * Math.pow(2, retry - 1) + Math.random() * 100
|
||||
//
|
||||
// So, it means:
|
||||
//
|
||||
// 1. ~1000ms
|
||||
// 2. ~2000ms
|
||||
// 3. ~4000ms
|
||||
//
|
||||
// ...if the limit we set is 3.
|
||||
// Our own timeout, in ./middleware/timeout.js defaults to 10 seconds.
|
||||
// So there's no point in trying more attempts than 3 because it would
|
||||
// just timeout on the 10s. (i.e. 1000 + 2000 + 4000 + 8000 > 10,000)
|
||||
const retry = {
|
||||
limit: patient ? 5 : 2,
|
||||
}
|
||||
const timeout = { request: patient ? 10000 : 2000 }
|
||||
|
||||
const headers = {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36',
|
||||
}
|
||||
|
||||
const retries = config.retries || 0
|
||||
const httpFunction = useGET ? got.get : got.head
|
||||
|
||||
if (verbose) core.info(`External URL ${useGET ? 'GET' : 'HEAD'}: ${url} (retries: ${retries})`)
|
||||
try {
|
||||
const r = await httpFunction(url, {
|
||||
headers,
|
||||
throwHttpErrors: false,
|
||||
retry,
|
||||
timeout,
|
||||
})
|
||||
if (verbose) {
|
||||
core.info(
|
||||
`External URL ${useGET ? 'GET' : 'HEAD'} ${url}: ${r.statusCode} (retries: ${retries})`
|
||||
)
|
||||
}
|
||||
|
||||
// If we get rate limited, remember that this hostname is now all
|
||||
// rate limited. And sleep for the number of seconds that the
|
||||
// `retry-after` header indicated.
|
||||
if (r.statusCode === 429) {
|
||||
let sleepTime = Math.min(
|
||||
60_000,
|
||||
Math.max(10_000, getRetryAfterSleep(r.headers['retry-after']))
|
||||
)
|
||||
// Sprinkle a little jitter so it doesn't all start again all
|
||||
// at the same time
|
||||
sleepTime += Math.random() * 10 * 1000
|
||||
// Give it a bit extra when we can be really patient
|
||||
if (patient) sleepTime += 30 * 1000
|
||||
|
||||
_rateLimitedDomains.set(hostname, sleepTime + Math.random() * 10 * 1000)
|
||||
if (verbose)
|
||||
core.info(
|
||||
chalk.yellow(
|
||||
`Rate limited on ${hostname} (${url}). Sleeping for ${(sleepTime / 1000).toFixed(1)}s`
|
||||
)
|
||||
)
|
||||
await sleep(sleepTime)
|
||||
return innerFetch(core, url, Object.assign({}, config, { retries: retries + 1 }))
|
||||
} else {
|
||||
_rateLimitedDomains.delete(hostname)
|
||||
}
|
||||
|
||||
// Perhaps the server doesn't support HEAD requests.
|
||||
// If so, try again with a regular GET.
|
||||
if ((r.statusCode === 405 || r.statusCode === 404 || r.statusCode === 403) && !useGET) {
|
||||
return innerFetch(core, url, Object.assign({}, config, { useGET: true }))
|
||||
}
|
||||
if (verbose) {
|
||||
core.info((r.ok ? chalk.green : chalk.red)(`${r.statusCode} on ${url}`))
|
||||
}
|
||||
return { ok: r.ok, statusCode: r.statusCode }
|
||||
} catch (err) {
|
||||
if (err instanceof RequestError) {
|
||||
if (verbose) {
|
||||
core.info(chalk.yellow(`RequestError (${err.message}) on ${url}`))
|
||||
}
|
||||
return { ok: false, requestError: err.message }
|
||||
}
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
// Return number of milliseconds from a `Retry-After` header value
|
||||
function getRetryAfterSleep(headerValue) {
|
||||
if (!headerValue) return 0
|
||||
let ms = Math.round(parseFloat(headerValue) * 1000)
|
||||
if (isNaN(ms)) {
|
||||
ms = Math.max(0, new Date(headerValue) - new Date())
|
||||
}
|
||||
return ms
|
||||
}
|
||||
|
||||
function checkImageSrc(src, $) {
|
||||
const pathname = new URL(src, 'http://example.com').pathname
|
||||
if (!pathname.startsWith('/')) {
|
||||
return { WARNING: "External images can't not be checked" }
|
||||
}
|
||||
const prefix = pathname.split('/')[1]
|
||||
if (prefix in STATIC_PREFIXES) {
|
||||
const staticFilePath = path.join(
|
||||
STATIC_PREFIXES[prefix],
|
||||
pathname.split(path.sep).slice(2).join(path.sep)
|
||||
)
|
||||
if (!fs.existsSync(staticFilePath)) {
|
||||
return { CRITICAL: `Static file not found (${pathname})` }
|
||||
}
|
||||
} else {
|
||||
return { WARNING: `Unrecognized image src prefix (${prefix})` }
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeFlaws(core, flaws) {
|
||||
if (flaws.length) {
|
||||
core.info(
|
||||
chalk.bold(
|
||||
`Found ${flaws.length.toLocaleString()} flaw${flaws.length === 1 ? '' : 's'} in total.`
|
||||
)
|
||||
)
|
||||
} else {
|
||||
core.info(chalk.green('No flaws found! 💖'))
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeCounts(core, pages) {
|
||||
const count = pages.map((page) => page.permalinks.length).reduce((a, b) => a + b, 0)
|
||||
core.info(
|
||||
`Tested ${count.toLocaleString()} permalinks across ${pages.length.toLocaleString()} pages`
|
||||
)
|
||||
}
|
||||
|
||||
function shuffle(array) {
|
||||
let currentIndex = array.length
|
||||
let randomIndex
|
||||
|
||||
// While there remain elements to shuffle...
|
||||
while (currentIndex !== 0) {
|
||||
// Pick a remaining element...
|
||||
randomIndex = Math.floor(Math.random() * currentIndex)
|
||||
currentIndex--
|
||||
|
||||
// And swap it with the current element.
|
||||
;[array[currentIndex], array[randomIndex]] = [array[randomIndex], array[currentIndex]]
|
||||
}
|
||||
|
||||
return array
|
||||
}
|
||||
|
||||
async function renderInnerHTML(page, permalink) {
|
||||
const next = () => {}
|
||||
const res = {}
|
||||
|
||||
const pagePath = permalink.href
|
||||
const req = {
|
||||
path: pagePath,
|
||||
language: permalink.languageCode,
|
||||
pagePath,
|
||||
cookies: {},
|
||||
}
|
||||
await contextualize(req, res, next)
|
||||
await shortVersions(req, res, next)
|
||||
const context = Object.assign({}, req.context, { page })
|
||||
context.relativePath = page.relativePath
|
||||
return await renderContent(page.markdown, context)
|
||||
}
|
||||
|
||||
export default main
|
|
@ -45,7 +45,7 @@ jobs:
|
|||
# Ensure this is actually a pull request and not a merge group
|
||||
# If its a merge group, report success without doing anything
|
||||
# See https://bit.ly/3qB9nZW > If a job in a workflow is skipped due to a conditional, it will report its status as "Success".
|
||||
if: (github.event.pull_request.head.sha || github.event.inputs.COMMIT_REF) && (github.event.number || github.event.inputs.PR_NUMBER || github.run_id)
|
||||
if: ((github.event.pull_request.head.sha || github.event.inputs.COMMIT_REF) && (github.event.number || github.event.inputs.PR_NUMBER || github.run_id)) && (github.repository == 'github/docs-internal' || github.repository == 'github/docs')
|
||||
timeout-minutes: 15
|
||||
environment:
|
||||
name: preview-env-${{ github.event.number }}
|
||||
|
|
|
@ -28,6 +28,7 @@ jobs:
|
|||
destory-azure-preview-env:
|
||||
name: Destroy
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'github/docs-internal' || github.repository == 'github/docs'
|
||||
timeout-minutes: 5
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.number || github.event.inputs.PR_NUMBER }}
|
||||
|
|
|
@ -32,6 +32,7 @@ env:
|
|||
|
||||
jobs:
|
||||
build:
|
||||
if: github.repository == 'github/docs-internal' || github.repository == 'github/docs'
|
||||
runs-on: ${{ fromJSON('["ubuntu-latest", "ubuntu-20.04-xl"]')[github.repository == 'github/docs-internal'] }}
|
||||
steps:
|
||||
- name: Install a local Elasticsearch for testing
|
||||
|
|
|
@ -1,162 +0,0 @@
|
|||
name: Check all English links
|
||||
|
||||
# **What it does**: This script once a day checks all English links and reports in issues.
|
||||
# **Why we have it**: We want to know if any links break.
|
||||
# **Who does it impact**: Docs content.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '40 19 * * *' # once a day at 19:40 UTC / 11:40 PST
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
check_all_english_links:
|
||||
name: Check all links
|
||||
if: github.repository == 'github/docs-internal'
|
||||
runs-on: ubuntu-20.04-xl
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES }}
|
||||
FIRST_RESPONDER_PROJECT: Docs content first responder
|
||||
REPORT_AUTHOR: docubot
|
||||
REPORT_LABEL: broken link report
|
||||
REPORT_REPOSITORY: github/docs-content
|
||||
steps:
|
||||
- name: Check that gh CLI is installed
|
||||
run: gh --version
|
||||
|
||||
- name: Check out repo's default branch
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
|
||||
with:
|
||||
node-version: '16.17.0'
|
||||
cache: npm
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Cache nextjs build
|
||||
uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09
|
||||
with:
|
||||
path: .next/cache
|
||||
key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }}
|
||||
|
||||
- name: Build server
|
||||
run: npm run build
|
||||
|
||||
- name: Start server in the background
|
||||
env:
|
||||
NODE_ENV: production
|
||||
PORT: 4000
|
||||
# We don't want or need the changelog entries in this context.
|
||||
CHANGELOG_DISABLED: true
|
||||
# The default is 10s. But because this runs overnight, we can
|
||||
# be a lot more patient.
|
||||
REQUEST_TIMEOUT: 20000
|
||||
# Don't care about CDN caching image URLs
|
||||
DISABLE_REWRITE_ASSET_URLS: true
|
||||
run: |
|
||||
node server.js > /tmp/stdout.log 2> /tmp/stderr.log &
|
||||
sleep 6
|
||||
curl --retry-connrefused --retry 5 -I http://localhost:4000/
|
||||
|
||||
- if: ${{ failure() }}
|
||||
name: Debug server outputs on errors
|
||||
run: |
|
||||
echo "____STDOUT____"
|
||||
cat /tmp/stdout.log
|
||||
echo "____STDERR____"
|
||||
cat /tmp/stderr.log
|
||||
|
||||
- name: Run script
|
||||
timeout-minutes: 120
|
||||
env:
|
||||
# The default is 300 which works OK on a fast macbook pro
|
||||
# but not so well in Actions.
|
||||
LINKINATOR_CONCURRENCY: 100
|
||||
LINKINATOR_LOG_FILE_PATH: linkinator.log
|
||||
run: |
|
||||
script/check-english-links.js > broken_links.md
|
||||
|
||||
# check-english-links.js returns 0 if no links are broken, and 1 if any links
|
||||
# are broken. When an Actions step's exit code is 1, the action run's job status
|
||||
# is failure and the run ends. The following steps create an issue for the
|
||||
# broken link report only if any links are broken, so `if: ${{ failure() }}`
|
||||
# ensures the steps run despite the previous step's failure of the job.
|
||||
#
|
||||
# https://docs.github.com/actions/reference/context-and-expression-syntax-for-github-actions#job-status-check-functions
|
||||
|
||||
- uses: actions/upload-artifact@6673cd052c4cd6fcf4b4e6e60ea986c889389535
|
||||
with:
|
||||
name: linkinator_log
|
||||
path: linkinator.log
|
||||
- uses: actions/upload-artifact@6673cd052c4cd6fcf4b4e6e60ea986c889389535
|
||||
if: ${{ failure() }}
|
||||
with:
|
||||
name: broken_links
|
||||
path: ./broken_links.md
|
||||
- if: ${{ failure() }}
|
||||
name: Get title for issue
|
||||
id: check
|
||||
run: echo "::set-output name=title::$(head -1 broken_links.md)"
|
||||
- if: ${{ failure() }}
|
||||
name: Create issue from file
|
||||
id: broken-link-report
|
||||
uses: peter-evans/create-issue-from-file@b4f9ee0a9d4abbfc6986601d9b1a4f8f8e74c77e
|
||||
with:
|
||||
token: ${{ env.GITHUB_TOKEN }}
|
||||
title: ${{ steps.check.outputs.title }}
|
||||
content-filepath: ./broken_links.md
|
||||
repository: ${{ env.REPORT_REPOSITORY }}
|
||||
labels: ${{ env.REPORT_LABEL }}
|
||||
- if: ${{ failure() }}
|
||||
name: Close and/or comment on old issues
|
||||
env:
|
||||
NEW_REPORT_URL: 'https://github.com/${{ env.REPORT_REPOSITORY }}/issues/${{ steps.broken-link-report.outputs.issue-number }}'
|
||||
run: |
|
||||
gh alias set list-reports "issue list \
|
||||
--repo ${{ env.REPORT_REPOSITORY }} \
|
||||
--author ${{ env.REPORT_AUTHOR }} \
|
||||
--label '${{ env.REPORT_LABEL }}'"
|
||||
|
||||
# Link to the previous report from the new report that triggered this
|
||||
# workflow run.
|
||||
|
||||
previous_report_url=$(gh list-reports \
|
||||
--state all \
|
||||
--limit 2 \
|
||||
--json url \
|
||||
--jq '.[].url' \
|
||||
| grep -v ${{ env.NEW_REPORT_URL }} | head -1)
|
||||
|
||||
gh issue comment ${{ env.NEW_REPORT_URL }} --body "⬅️ [Previous report]($previous_report_url)"
|
||||
|
||||
# If an old report is open and assigned to someone, link to the newer
|
||||
# report without closing the old report.
|
||||
|
||||
for issue_url in $(gh list-reports \
|
||||
--json assignees,url \
|
||||
--jq '.[] | select (.assignees != []) | .url'); do
|
||||
if [ "$issue_url" != "${{ env.NEW_REPORT_URL }}" ]; then
|
||||
gh issue comment $issue_url --body "➡️ [Newer report](${{ env.NEW_REPORT_URL }})"
|
||||
fi
|
||||
done
|
||||
|
||||
# Link to the newer report from any older report that is still open,
|
||||
# then close the older report and remove it from the first responder's
|
||||
# project board.
|
||||
|
||||
for issue_url in $(gh list-reports \
|
||||
--search 'no:assignee' \
|
||||
--json url \
|
||||
--jq '.[].url'); do
|
||||
if [ "$issue_url" != "${{ env.NEW_REPORT_URL }}" ]; then
|
||||
gh issue comment $issue_url --body "➡️ [Newer report](${{ env.NEW_REPORT_URL }})"
|
||||
gh issue close $issue_url
|
||||
gh issue edit $issue_url --remove-project "${{ env.FIRST_RESPONDER_PROJECT }}"
|
||||
fi
|
||||
done
|
|
@ -31,6 +31,7 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
lint:
|
||||
if: github.repository == 'github/docs-internal' || github.repository == 'github/docs'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repo
|
||||
|
|
|
@ -19,7 +19,7 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
PR-Preview-Links:
|
||||
if: github.event.pull_request.user.login != 'Octomerger'
|
||||
if: github.event.pull_request.user.login != 'Octomerger' && (github.repository == 'github/docs-internal' || github.repository == 'github/docs')
|
||||
name: Add staging/live links to PR
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
|
|
|
@ -1,106 +0,0 @@
|
|||
name: 'Link Checker: All English'
|
||||
|
||||
# **What it does**: Renders the content of every page and check all internal links.
|
||||
# **Why we have it**: To make sure all links connect correctly.
|
||||
# **Who does it impact**: Docs content.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
merge_group:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
# Needed for the 'trilom/file-changes-action' action
|
||||
pull-requests: read
|
||||
|
||||
# This allows a subsequently queued workflow run to interrupt previous runs
|
||||
concurrency:
|
||||
group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-links:
|
||||
runs-on: ${{ fromJSON('["ubuntu-latest", "ubuntu-20.04-xl"]')[github.repository == 'github/docs-internal'] }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
|
||||
with:
|
||||
node-version: '16.17.0'
|
||||
cache: npm
|
||||
|
||||
- name: Install
|
||||
run: npm ci
|
||||
|
||||
- name: Gather files changed
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
PR: ${{ github.event.pull_request.number }}
|
||||
HEAD: ${{ github.event.pull_request.head.ref || github.event.merge_group.head_ref }}
|
||||
run: |
|
||||
# Find the file diff in the pull request or merge group
|
||||
# If its a pull request, use the faster call to the GitHub API
|
||||
# For push, workflow_dispatch, and merge_group, use git diff
|
||||
if [ -n "$PR" ]
|
||||
then
|
||||
echo __ running gh pr diff __
|
||||
DIFF=`gh pr diff $PR --name-only`
|
||||
elif [ -n "$HEAD" ]
|
||||
then
|
||||
echo __ running git fetch main __
|
||||
git fetch origin main --depth 1
|
||||
echo __ running git diff __
|
||||
DIFF=`git diff --name-only origin/main`
|
||||
else
|
||||
echo __ no head, empty diff __
|
||||
DIFF=''
|
||||
fi
|
||||
# So we can inspect the output
|
||||
echo __ DIFF found __
|
||||
echo $DIFF
|
||||
|
||||
# Formats into single line JSON array, removing any empty strings
|
||||
echo __ format, write to files.json __
|
||||
echo $DIFF | \
|
||||
tr ' ' '\n' | \
|
||||
jq --raw-input | \
|
||||
jq --slurp --compact-output 'map(select(length > 0))' \
|
||||
> $HOME/files.json
|
||||
|
||||
- name: Link check (warnings, changed files)
|
||||
env:
|
||||
# Don't care about CDN caching image URLs
|
||||
DISABLE_REWRITE_ASSET_URLS: true
|
||||
run: |
|
||||
# Note as of Aug 2022, we *don't* check external links
|
||||
# on the pages you touched in the PR. We could enable that
|
||||
# but it has the added risk of false positives blocking CI.
|
||||
# We are using this script for the daily/nightly checker that
|
||||
# checks external links too. Once we're confident it really works
|
||||
# well, we can consider enabling it here on every content PR too.
|
||||
|
||||
./script/rendered-content-link-checker.js \
|
||||
--language en \
|
||||
--max 100 \
|
||||
--check-anchors \
|
||||
--check-images \
|
||||
--verbose \
|
||||
--list $HOME/files.json
|
||||
|
||||
- name: Link check (critical, all files)
|
||||
env:
|
||||
# Don't care about CDN caching image URLs
|
||||
DISABLE_REWRITE_ASSET_URLS: true
|
||||
run: |
|
||||
./script/rendered-content-link-checker.js \
|
||||
--language en \
|
||||
--exit \
|
||||
--verbose \
|
||||
--check-images \
|
||||
--level critical
|
|
@ -0,0 +1,55 @@
|
|||
name: 'Link Checker: Daily'
|
||||
|
||||
# **What it does**: This script once a day checks all English links and reports in issue if any are broken.
|
||||
# **Why we have it**: We want to know if any links break internally or externally.
|
||||
# **Who does it impact**: Docs content.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '40 19 * * *' # once a day at 19:40 UTC / 11:40 PST
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
check_all_english_links:
|
||||
name: Check all links
|
||||
if: github.repository == 'github/docs-internal'
|
||||
runs-on: ubuntu-20.04-xl
|
||||
steps:
|
||||
- name: Check that gh CLI is installed
|
||||
run: gh --version
|
||||
|
||||
- name: Check out repo's default branch
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
|
||||
with:
|
||||
node-version: '16.15.0'
|
||||
cache: npm
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Run link checker
|
||||
env:
|
||||
LEVEL: 'critical'
|
||||
# Set this to true in repo scope to enable debug logs
|
||||
# ACTIONS_RUNNER_DEBUG = true
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_FR }}
|
||||
REPORT_AUTHOR: docubot
|
||||
REPORT_LABEL: broken link report
|
||||
REPORT_REPOSITORY: github/docs-content
|
||||
CREATE_REPORT: true
|
||||
CHECK_EXTERNAL_LINKS: true
|
||||
timeout-minutes: 30
|
||||
run: node .github/actions/rendered-content-link-checker.js
|
||||
|
||||
- name: Upload artifact(s)
|
||||
uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8
|
||||
with:
|
||||
name: artifacts
|
||||
path: ./artifacts
|
|
@ -0,0 +1,89 @@
|
|||
name: 'Link Checker: On PR'
|
||||
|
||||
# **What it does**: Renders the content of every page and check all internal links on PR.
|
||||
# **Why we have it**: To make sure all links connect correctly on changed files.
|
||||
# **Who does it impact**: Docs content.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
merge_group:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
# TODO: Uncomment if we uncomment below
|
||||
# Needed for the 'trilom/file-changes-action' action
|
||||
# pull-requests: read
|
||||
|
||||
# This allows a subsequently queued workflow run to interrupt previous runs
|
||||
concurrency:
|
||||
group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-links:
|
||||
runs-on: ${{ fromJSON('["ubuntu-latest", "ubuntu-20.04-xl"]')[github.repository == 'github/docs-internal'] }}
|
||||
if: github.repository == 'github/docs-internal' || github.repository == 'github/docs'
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
|
||||
with:
|
||||
node-version: '16.15.0'
|
||||
cache: npm
|
||||
|
||||
- name: Install
|
||||
run: npm ci
|
||||
|
||||
# TODO: When we want to fix redirects on changed files we can uncomment everything below
|
||||
# Creates file "${{ env.HOME }}/files.json", among others
|
||||
# - name: Gather files changed
|
||||
# if: github.event_name != 'merge_group'
|
||||
# id: file_changes
|
||||
# uses: trilom/file-changes-action@a6ca26c14274c33b15e6499323aac178af06ad4b
|
||||
# with:
|
||||
# fileOutput: 'json'
|
||||
|
||||
# For verification
|
||||
# - name: Show files changed (debug)
|
||||
# if: github.event_name != 'merge_group'
|
||||
# run: cat $HOME/files.json
|
||||
|
||||
# - name: Link check changed pages (external links only)
|
||||
# if: github.event_name != 'merge_group'
|
||||
# id: changed_links
|
||||
# env:
|
||||
# LEVEL: 'warning'
|
||||
# FILES_CHANGED: ${{ steps.file_changes.outputs.files }}
|
||||
# ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
# GITHUB_TOKEN: ${{ secrets.DOCS_BOT_FR }}
|
||||
# SHOULD_COMMENT: true
|
||||
# CREATE_REPORT: false
|
||||
# run: node .github/actions/rendered-content-link-checker.js
|
||||
|
||||
- name: Link check all pages (internal links only)
|
||||
id: all_links
|
||||
env:
|
||||
LEVEL: 'critical'
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_FR }}
|
||||
SHOULD_COMMENT: true
|
||||
CHECK_EXTERNAL_LINKS: false
|
||||
CREATE_REPORT: false
|
||||
run: node .github/actions/rendered-content-link-checker.js
|
||||
|
||||
- name: Upload artifact(s)
|
||||
uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8
|
||||
with:
|
||||
name: artifacts
|
||||
path: ./artifacts
|
||||
|
||||
- name: Fail if either check has broken links in its level
|
||||
if: ${{ steps.changed_links.outputs.has_flaws_at_level == 'true' || steps.all_links.outputs.has_flaws_at_level == 'true' }}
|
||||
run: |
|
||||
exit 1
|
|
@ -21,6 +21,7 @@ permissions:
|
|||
jobs:
|
||||
noResponse:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'github/docs-internal' || github.repository == 'github/docs'
|
||||
steps:
|
||||
- uses: lee-dohm/no-response@9bb0a4b5e6a45046f00353d5de7d90fb8bd773bb
|
||||
with:
|
||||
|
|
|
@ -22,6 +22,7 @@ concurrency:
|
|||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'github/docs-internal' || github.repository == 'github/docs'
|
||||
steps:
|
||||
- name: Check out repo
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
|
|
|
@ -31,6 +31,7 @@ env:
|
|||
jobs:
|
||||
lint:
|
||||
runs-on: ${{ fromJSON('["ubuntu-latest", "ubuntu-20.04-xl"]')[github.repository == 'github/docs-internal'] }}
|
||||
if: github.repository == 'github/docs-internal' || github.repository == 'github/docs'
|
||||
steps:
|
||||
- uses: getong/elasticsearch-action@95b501ab0c83dee0aac7c39b7cea3723bef14954
|
||||
with:
|
||||
|
|
|
@ -29,6 +29,7 @@ env:
|
|||
|
||||
jobs:
|
||||
test:
|
||||
if: github.repository == 'github/docs-internal' || github.repository == 'github/docs'
|
||||
# Run on ubuntu-20.04-xl if the private repo or ubuntu-latest if the public repo
|
||||
# See pull # 17442 in the private repo for context
|
||||
runs-on: ${{ fromJSON('["ubuntu-latest", "ubuntu-20.04-xl"]')[github.repository == 'github/docs-internal'] }}
|
||||
|
|
|
@ -26,3 +26,6 @@ lib/redirects/.redirects-cache*.json
|
|||
# During the preview deploy untrusted user code may be cloned into this directory
|
||||
# We ignore it from git to keep things deterministic
|
||||
user-code/
|
||||
|
||||
# Logs from scripts
|
||||
script/logs/
|
||||
|
|
|
@ -34,4 +34,10 @@ export default [
|
|||
'https://www.ipaddressguide.com/cidr',
|
||||
'https://crates.io/',
|
||||
'https://opensource.org/about',
|
||||
'https://www.openstreetmap.org/user/new',
|
||||
'https://wiki.debian.org/chroot',
|
||||
'https://www.adobe.com/products/coldfusion-builder.html',
|
||||
'https://developer.android.com/studio',
|
||||
'https://lastpass.com/',
|
||||
'https://lastpass.com/auth/',
|
||||
]
|
||||
|
|
|
@ -19905,7 +19905,8 @@
|
|||
},
|
||||
"node_modules/uuid": {
|
||||
"version": "8.3.2",
|
||||
"license": "MIT",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz",
|
||||
"integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==",
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
|
@ -34494,7 +34495,9 @@
|
|||
"version": "1.0.1"
|
||||
},
|
||||
"uuid": {
|
||||
"version": "8.3.2"
|
||||
"version": "8.3.2",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz",
|
||||
"integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg=="
|
||||
},
|
||||
"uvu": {
|
||||
"version": "0.5.2",
|
||||
|
|
|
@ -1,217 +0,0 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
// [start-readme]
|
||||
//
|
||||
// This script runs once per day via a scheduled GitHub Action to check all links in
|
||||
// English content, not including deprecated Enterprise Server content. It opens an issue
|
||||
// if it finds broken links. To exclude a link path, add it to `lib/excluded-links.js`.
|
||||
// Note that linkinator somtimes returns 429 and 503 errors for links that are not actually
|
||||
// broken, so this script double-checks those using `got`.
|
||||
//
|
||||
// [end-readme]
|
||||
|
||||
import { fileURLToPath } from 'url'
|
||||
import path from 'path'
|
||||
import fs from 'fs'
|
||||
import { LinkChecker } from 'linkinator'
|
||||
import { program } from 'commander'
|
||||
import { pull, uniq } from 'lodash-es'
|
||||
import rimraf from 'rimraf'
|
||||
import mkdirp from 'mkdirp'
|
||||
import { deprecated } from '../lib/enterprise-server-releases.js'
|
||||
import got from 'got'
|
||||
import excludedLinks from '../lib/excluded-links.js'
|
||||
import libLanguages from '../lib/languages.js'
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
||||
|
||||
const checker = new LinkChecker()
|
||||
const root = 'http://localhost:4000'
|
||||
const englishRoot = `${root}/en`
|
||||
|
||||
const LINKINATOR_LOG_FILE_PATH =
|
||||
process.env.LINKINATOR_LOG_FILE_PATH || path.join(__dirname, '../.linkinator/full.log')
|
||||
// When using the peter-evans/create-issue-from-file Action to post an
|
||||
// issue comment you might get an error like this:
|
||||
//
|
||||
// "body is too long (maximum is 65536 characters)"
|
||||
//
|
||||
// So we cap our to not exceed that length.
|
||||
// This number doesn't have to be strictly less that the maximum possible
|
||||
// but it just mustn't exceed the validation limit.
|
||||
// Note, a little bit of room must be left for adding
|
||||
// a note in the generated output about the excess.
|
||||
const DISPLAY_MAX_LENGTH = parseInt(process.env.DISPLAY_MAX_LENGTH || '30000', 10)
|
||||
|
||||
// Links with these codes may or may not really be broken.
|
||||
const retryStatusCodes = [429, 503, 'Invalid']
|
||||
|
||||
const LINKINATOR_CONCURRENCY = parseInt(process.env.LINKINATOR_CONCURRENCY || '300')
|
||||
|
||||
program
|
||||
.description('Check all links in the English docs.')
|
||||
.option(
|
||||
'-d, --dry-run',
|
||||
'Turn off recursion to get a fast minimal report (useful for previewing output).'
|
||||
)
|
||||
.option(
|
||||
'-r, --do-not-retry',
|
||||
`Do not retry broken links with status codes ${retryStatusCodes.join(', ')}.`
|
||||
)
|
||||
.option(
|
||||
'-p, --path <PATH>',
|
||||
`Provide an optional path to check. Best used with --dry-run. Default: ${englishRoot}`
|
||||
)
|
||||
.parse(process.argv)
|
||||
|
||||
// Skip excluded links defined in separate file.
|
||||
|
||||
// Skip non-English content.
|
||||
const languagesToSkip = Object.keys(libLanguages)
|
||||
.filter((code) => code !== 'en')
|
||||
.map((code) => new RegExp(`${root}/${code}`))
|
||||
|
||||
// Skip deprecated Enterprise content.
|
||||
// Capture the old format https://docs.github.com/enterprise/2.1/
|
||||
// and the new format https://docs.github.com/enterprise-server@2.19/.
|
||||
const enterpriseReleasesToSkip = new RegExp(`${root}.+?[/@](${deprecated.join('|')})(/|$)`)
|
||||
|
||||
const config = {
|
||||
path: program.opts().path || englishRoot,
|
||||
concurrency: LINKINATOR_CONCURRENCY,
|
||||
// If this is a dry run, turn off recursion.
|
||||
recurse: !program.opts().dryRun,
|
||||
silent: true,
|
||||
// The values in this array are treated as regexes.
|
||||
linksToSkip: linksToSkipFactory([
|
||||
enterpriseReleasesToSkip,
|
||||
...languagesToSkip,
|
||||
...excludedLinks,
|
||||
// Don't leak into the production site
|
||||
/https:\/\/docs\.github\.com/,
|
||||
]),
|
||||
}
|
||||
|
||||
// Return a function that can as quickly as possible check if a certain
|
||||
// href input should be skipped.
|
||||
// Do this so we can use a `Set` and a `iterable.some()` for a speedier
|
||||
// check. The default implementation in Linkinator, if you set
|
||||
// the `linksToSkip` config to be an array, it will, for every URL it
|
||||
// checks turn that into a new regex every single time.
|
||||
function linksToSkipFactory(regexAndURLs) {
|
||||
const set = new Set(regexAndURLs.filter((regexOrURL) => typeof regexOrURL === 'string'))
|
||||
const regexes = regexAndURLs.filter((regexOrURL) => regexOrURL instanceof RegExp)
|
||||
return (href) => set.has(href) || regexes.some((regex) => regex.test(href))
|
||||
}
|
||||
|
||||
main()
|
||||
|
||||
async function main() {
|
||||
// Clear and recreate a directory for logs.
|
||||
const logFile = LINKINATOR_LOG_FILE_PATH
|
||||
rimraf.sync(path.dirname(logFile))
|
||||
await mkdirp(path.dirname(logFile))
|
||||
|
||||
// Update CLI output and append to logfile after each checked link.
|
||||
checker.on('link', (result) => {
|
||||
// We don't need to dump all of the HTTP and HTML details
|
||||
delete result.failureDetails
|
||||
|
||||
fs.appendFileSync(logFile, JSON.stringify(result) + '\n')
|
||||
})
|
||||
|
||||
// Start the scan; events will be logged as they occur.
|
||||
const result = (await checker.check(config)).links
|
||||
|
||||
// Scan is complete! Filter the results for broken links.
|
||||
const brokenLinks = result
|
||||
.filter((link) => link.state === 'BROKEN')
|
||||
// Coerce undefined status codes into `Invalid` strings so we can display them.
|
||||
// Without this, undefined codes get JSON.stringified as `0`, which is not useful output.
|
||||
.map((link) => {
|
||||
link.status = link.status || 'Invalid'
|
||||
return link
|
||||
})
|
||||
|
||||
// It's OK to console.warn because that goes to stderr.
|
||||
console.warn(`${brokenLinks.length} broken links in total (before retry)`)
|
||||
|
||||
if (!program.opts().doNotRetry) {
|
||||
// Links to retry individually.
|
||||
const linksToRetry = brokenLinks.filter((link) => retryStatusCodes.includes(link.status))
|
||||
|
||||
// It's OK to console.warn because that goes to stderr.
|
||||
console.warn(`${linksToRetry.length} links to retry`)
|
||||
|
||||
await Promise.all(
|
||||
linksToRetry.map(async (link) => {
|
||||
try {
|
||||
// got throws an HTTPError if response code is not 2xx or 3xx.
|
||||
// If got succeeds, we can remove the link from the list.
|
||||
await got(link.url)
|
||||
pull(brokenLinks, link)
|
||||
// If got fails, do nothing. The link is already in the broken list.
|
||||
} catch (err) {
|
||||
// noop
|
||||
}
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
// Exit successfully if no broken links!
|
||||
if (!brokenLinks.length) {
|
||||
console.log('All links are good!')
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
// Format and display the results.
|
||||
console.log(`${brokenLinks.length} broken links found on ${root}\n`)
|
||||
console.log(getDisplayBrokenLinks(brokenLinks, DISPLAY_MAX_LENGTH))
|
||||
console.log(
|
||||
'\nIf links are "false positives" (e.g. can only be opened by a browser) ' +
|
||||
'consider making a pull request that edits `lib/excluded-links.js`.'
|
||||
)
|
||||
|
||||
// Exit unsuccessfully if broken links are found.
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
function getDisplayBrokenLinks(brokenLinks, maxLength) {
|
||||
let output = ''
|
||||
// Sort results by status code.
|
||||
const allStatusCodes = uniq(
|
||||
brokenLinks
|
||||
// Coerce undefined status codes into `Invalid` strings so we can display them.
|
||||
// Without this, undefined codes get JSON.stringified as `0`,
|
||||
// which is not useful output.
|
||||
.map((link) => link.status || 'Invalid')
|
||||
)
|
||||
|
||||
allStatusCodes.forEach((statusCode) => {
|
||||
const brokenLinksForStatus = brokenLinks.filter((x) => x.status === statusCode)
|
||||
|
||||
output += `## Status ${statusCode}: Found ${brokenLinksForStatus.length} broken links\n\n`
|
||||
output += '```\n'
|
||||
let exceededDisplayLimit = 0
|
||||
brokenLinksForStatus.forEach((brokenLinkObj) => {
|
||||
// We don't need to dump all of the HTTP and HTML details
|
||||
delete brokenLinkObj.failureDetails
|
||||
const line = JSON.stringify(brokenLinkObj, null, 2)
|
||||
if (output.length + line.length > maxLength) {
|
||||
exceededDisplayLimit++
|
||||
return
|
||||
}
|
||||
|
||||
output += `${line}\n`
|
||||
})
|
||||
output += '```\n'
|
||||
if (exceededDisplayLimit > 0) {
|
||||
output += `\n(🎵! Because the comment is already big,
|
||||
we skipped ${exceededDisplayLimit} additional broken links.
|
||||
It is unlikely that these are real broken links. More likely
|
||||
they are false positives due to a server-related issue that
|
||||
needs investigating. \n`
|
||||
}
|
||||
})
|
||||
|
||||
return output
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copies certain directories over to docs-internal-test and pushes. Useful for debugging actions
|
||||
# Doesn't copy over git lfs files (.json.br), content/, and data/ directories
|
||||
|
||||
echo "Make sure to run this script in the root path of docs-internal!"
|
||||
|
||||
read -p "Relative path to test repo [../docs-internal-test] (enter for default):" TEST_PATH
|
||||
|
||||
TEST_PATH=${TEST_PATH:-../docs-internal-test}
|
||||
|
||||
cd $TEST_PATH
|
||||
REPO_NAME=$(basename `git rev-parse --show-toplevel`)
|
||||
REPO_BRANCH=$(git rev-parse --symbolic-full-name --abbrev-ref HEAD)
|
||||
cd -
|
||||
|
||||
if [[ "$REPO_BRANCH" != "main" ]]; then
|
||||
echo "docs-internal-test isn't on main branch"
|
||||
exit 1
|
||||
fi;
|
||||
|
||||
if [[ "$REPO_NAME" == "docs-internal-test" ]]; then
|
||||
echo "Copying files to $TEST_PATH..."
|
||||
rsync -r --exclude='.git' --exclude='.gitattributes' --exclude='node_modules' --exclude='data' --exclude='content' --exclude="lib/search/indexes" --exclude=".github/CODEOWNERS" . $TEST_PATH
|
||||
cd $TEST_PATH
|
||||
if [[ `git status --porcelain` ]]; then
|
||||
echo "Committing and pushing test files"
|
||||
git add --all
|
||||
git commit -m "testing (commited from script)"
|
||||
git push -f
|
||||
else
|
||||
echo "No changes copied over. Are there relevent changes and are you pointing to the correct -test directory?"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "$TEST_PATH is not the docs-internal-test repo directory"
|
||||
exit 1
|
||||
fi;
|
||||
|
||||
exit
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Dependency injection for scripts that call .github/actions/ code
|
||||
* Replaces action platform specific functionality with local machine functionality
|
||||
*/
|
||||
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
import chalk from 'chalk'
|
||||
|
||||
import github from './github.js'
|
||||
|
||||
// Directs core logging to console
|
||||
export function getCoreInject(debug) {
|
||||
return {
|
||||
info: console.log,
|
||||
debug: (message) => (debug ? console.warn(chalk.blue(message)) : {}),
|
||||
warning: (message) => console.warn(chalk.yellow(message)),
|
||||
error: console.error,
|
||||
setOutput: (name, value) => {
|
||||
if (debug) {
|
||||
console.log(`Output "${name}" set to: "${value}"`)
|
||||
}
|
||||
},
|
||||
setFailed: (message) => {
|
||||
if (debug) {
|
||||
console.log('setFailed called.')
|
||||
}
|
||||
throw new Error(message)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Writes strings that would be uploaded as artifacts to a local logs/ directory
|
||||
const cwd = new URL('', import.meta.url).pathname
|
||||
const logsPath = path.join(cwd, '..', '..', 'logs')
|
||||
if (!fs.existsSync(logsPath)) {
|
||||
fs.mkdirSync(logsPath)
|
||||
}
|
||||
export function getUploadArtifactInject(debug) {
|
||||
return (name, contents) => {
|
||||
const logFilename = path.join(logsPath, `${new Date().toISOString().substr(0, 16)}-${name}`)
|
||||
if (debug) {
|
||||
fs.writeFileSync(logFilename, contents)
|
||||
console.log(`${name} artifact upload written to ${logFilename}`)
|
||||
} else {
|
||||
console.log(`Debug not enabled. ${name} artifact NOT written to ${logFilename}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Uses local process.env GITHUB_TOKEN to create an octokit instance
|
||||
export const octokitInject = github()
|
|
@ -9,19 +9,11 @@
|
|||
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
import cheerio from 'cheerio'
|
||||
import { program, Option, InvalidArgumentError } from 'commander'
|
||||
import chalk from 'chalk'
|
||||
import got, { RequestError } from 'got'
|
||||
|
||||
import shortVersions from '../middleware/contextualizers/short-versions.js'
|
||||
import contextualize from '../middleware/context.js'
|
||||
import { languageKeys } from '../lib/languages.js'
|
||||
import getRedirect from '../lib/get-redirect.js'
|
||||
import warmServer from '../lib/warm-server.js'
|
||||
import renderContent from '../lib/render-content/index.js'
|
||||
import { deprecated } from '../lib/enterprise-server-releases.js'
|
||||
import excludedLinks from '../lib/excluded-links.js'
|
||||
import renderedContentLinkChecker from '../.github/actions/rendered-content-link-checker.js'
|
||||
import { getCoreInject, getUploadArtifactInject } from './helpers/action-injections.js'
|
||||
import github from './helpers/github.js'
|
||||
|
||||
const STATIC_PREFIXES = {
|
||||
assets: path.resolve('assets'),
|
||||
|
@ -34,24 +26,6 @@ Object.entries(STATIC_PREFIXES).forEach(([key, value]) => {
|
|||
}
|
||||
})
|
||||
|
||||
// Return a function that can as quickly as possible check if a certain
|
||||
// href input should be skipped.
|
||||
// Do this so we can use a `Set` and a `iterable.some()` for a speedier
|
||||
// check.
|
||||
function linksToSkipFactory() {
|
||||
const set = new Set(excludedLinks.filter((regexOrURL) => typeof regexOrURL === 'string'))
|
||||
const regexes = excludedLinks.filter((regexOrURL) => regexOrURL instanceof RegExp)
|
||||
return (href) => set.has(href) || regexes.some((regex) => regex.test(href))
|
||||
}
|
||||
|
||||
const linksToSkip = linksToSkipFactory(excludedLinks)
|
||||
|
||||
const CONTENT_ROOT = path.resolve('content')
|
||||
|
||||
const deprecatedVersionPrefixesRegex = new RegExp(
|
||||
`enterprise(-server@|/)(${deprecated.join('|')})(/|$)`
|
||||
)
|
||||
|
||||
program
|
||||
.description('Analyze all checked content files, render them, and check for flaws.')
|
||||
.addOption(
|
||||
|
@ -62,21 +36,47 @@ program
|
|||
])
|
||||
)
|
||||
.addOption(
|
||||
new Option('-l, --language <LANGUAGE...>', 'Which languages to focus on').choices(languageKeys)
|
||||
new Option(
|
||||
'-l, --language <LANGUAGE...>',
|
||||
'Which languages to focus on. (default: "en")'
|
||||
).choices(languageKeys)
|
||||
)
|
||||
.option('--verbose-url <BASE_URL>', 'Print the absolute URL if set')
|
||||
.option('-f, --filter <FILTER...>', 'Search filter(s) on the paths')
|
||||
.option('-e, --exit', 'Exit script by count of flaws (useful for CI)')
|
||||
.option('-b, --bail', 'Exit on the first flaw')
|
||||
.option('-l, --level', 'Level of broken link to be marked as a flaw. (default: "critical")')
|
||||
.option('-v, --verbose', 'Verbose outputs')
|
||||
.option(
|
||||
'--create-report',
|
||||
'Create a report issue in report-repository if there are flaws. (default: false)'
|
||||
)
|
||||
.option(
|
||||
'--report-repository <REPOSITORY>',
|
||||
'Repository to create issue in. (default: "github/docs-content")'
|
||||
)
|
||||
.option(
|
||||
'--link-reports',
|
||||
'If comments should be made on previous report and new report "linking" them. (default: false)'
|
||||
)
|
||||
.option(
|
||||
'--report-author <AUTHOR>',
|
||||
'Previous author of report PR for linking. (default: "docubot")'
|
||||
)
|
||||
.option(
|
||||
'--report-label <LABEL>',
|
||||
'Label to assign to report issue. (default: "broken link report")'
|
||||
)
|
||||
.option(
|
||||
'--comment-on-pr <URI>',
|
||||
'For debugging. Comment on a PR in form "owner/repo-name:pr_number"'
|
||||
)
|
||||
.option('--should-comment', 'Comments failed links on PR')
|
||||
.option('--check-anchors', "Validate links that start with a '#' too")
|
||||
.option('--check-images', 'Validate local images too')
|
||||
.option('--check-external-links', 'Check external URLs too')
|
||||
.option('-v, --verbose', 'Verbose outputs')
|
||||
.option('--debug', "Loud about everything it's doing")
|
||||
.option('--random', 'Load pages in a random order (useful for debugging)')
|
||||
.option('--patient', 'Give external link checking longer timeouts and more retries')
|
||||
.option('-o, --out <file>', 'Put warnings and errors into a file instead of stdout')
|
||||
.option('--json-output', 'Print JSON to stdout or file instead')
|
||||
.option('--random', 'Load pages in a random order (useful for debugging)')
|
||||
.option('--verbose-url <BASE_URL>', 'Print the absolute URL if set')
|
||||
.option('--fail-on-flaw', 'Throw error on link flaws (default: false)')
|
||||
.option('--max <number>', 'integer argument (default: none)', (value) => {
|
||||
const parsed = parseInt(value, 10)
|
||||
if (isNaN(parsed)) {
|
||||
|
@ -107,655 +107,34 @@ program
|
|||
.arguments('[files...]', 'Specific files to check')
|
||||
.parse(process.argv)
|
||||
|
||||
main(program.opts(), program.args)
|
||||
const opts = program.opts()
|
||||
const files = program.args || opts.list
|
||||
const octokit = github()
|
||||
|
||||
async function main(opts, files) {
|
||||
const {
|
||||
random,
|
||||
language,
|
||||
filter,
|
||||
exit,
|
||||
debug,
|
||||
max,
|
||||
verbose,
|
||||
list,
|
||||
checkExternalLinks,
|
||||
jsonOutput,
|
||||
out,
|
||||
} = opts
|
||||
if (opts.list && Array.isArray(files) && files.length > 0) {
|
||||
throw new InvalidArgumentError('Cannot specify both --list and a file list.')
|
||||
}
|
||||
|
||||
// Note! The reason we're using `warmServer()` in this script,
|
||||
// even though there's no server involved, is because
|
||||
// the `contextualize()` function calls it.
|
||||
// And because warmServer() is actually idempotent, meaning it's
|
||||
// cheap to call it more than once, it would be expensive to call it
|
||||
// twice unnecessarily.
|
||||
// If we'd manually do the same operations that `warmServer()` does
|
||||
// here (e.g. `loadPageMap()`), we'd end up having to do it all over
|
||||
// again, the next time `contextualize()` is called.
|
||||
const { redirects, pages: pageMap, pageList } = await warmServer()
|
||||
|
||||
const languages = language || []
|
||||
console.assert(Array.isArray(languages), `${languages} is not an array`)
|
||||
const filters = filter || []
|
||||
console.assert(Array.isArray(filters), `${filters} is not an array`)
|
||||
|
||||
if (list && Array.isArray(files) && files.length > 0) {
|
||||
throw new InvalidArgumentError('Cannot specify both --list and a file list.')
|
||||
}
|
||||
|
||||
if (list) {
|
||||
const fileList = JSON.parse(await fs.promises.readFile(list))
|
||||
if (Array.isArray(fileList) && fileList.length > 0) {
|
||||
files = fileList
|
||||
} else {
|
||||
// This must be allowed for empty PRs that accompany docs-early-access repo PRs
|
||||
console.warn('No files found in --list. Exiting...')
|
||||
process.exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
if (random) {
|
||||
shuffle(pageList)
|
||||
}
|
||||
|
||||
debug && console.time('getPages')
|
||||
const pages = getPages(pageList, languages, filters, files, max)
|
||||
debug && console.timeEnd('getPages')
|
||||
|
||||
if (checkExternalLinks && pages.length >= 100) {
|
||||
console.warn(
|
||||
chalk.yellow(
|
||||
`Warning! Checking external URLs can be time costly. You're testing ${pages.length} pages.`
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
const processPagesStart = new Date()
|
||||
const flawsGroups = await Promise.all(
|
||||
pages.map((page) => processPage(page, pageMap, redirects, opts))
|
||||
)
|
||||
const processPagesEnd = new Date()
|
||||
const flaws = flawsGroups.flat()
|
||||
if (jsonOutput) {
|
||||
jsonPrintFlaws(flaws, opts)
|
||||
}
|
||||
|
||||
debug && printGlobalCacheHitRatio()
|
||||
|
||||
if (verbose) {
|
||||
summarizeCounts(pages)
|
||||
|
||||
console.log(`Checked ${(globalCacheHitCount + globalCacheMissCount).toLocaleString()} links`)
|
||||
console.log(`Took ${getDurationString(processPagesStart, processPagesEnd)}`)
|
||||
|
||||
summarizeFlaws(flaws)
|
||||
if (out && flaws.length > 0) {
|
||||
console.log(`All flaws written to ${chalk.bold(out)}`)
|
||||
}
|
||||
}
|
||||
|
||||
if (exit) {
|
||||
process.exit(flaws.length)
|
||||
// For debugging PR comment. e.g. "github/howie-testing-ebonsignori:140"
|
||||
if (opts.commentOnPr) {
|
||||
const [owner, repoPRNumber] = opts.commentOnPr.split('/')
|
||||
const [repo, number] = repoPRNumber.split(':')
|
||||
opts.shouldComment = true
|
||||
opts.actionContext = {
|
||||
owner,
|
||||
repo,
|
||||
pull_request: {
|
||||
number,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
function printGlobalCacheHitRatio() {
|
||||
const hits = globalCacheHitCount
|
||||
const misses = globalCacheMissCount
|
||||
// It could be that the files that were tested didn't have a single
|
||||
// link in them. In that case, there's no cache misses or hits at all.
|
||||
// So avoid the division by zero.
|
||||
if (misses + hits) {
|
||||
console.log(
|
||||
`Cache hit ratio: ${hits.toLocaleString()} of ${(misses + hits).toLocaleString()} (${(
|
||||
(100 * hits) /
|
||||
(misses + hits)
|
||||
).toFixed(1)}%)`
|
||||
)
|
||||
renderedContentLinkChecker(
|
||||
getCoreInject(opts.debug),
|
||||
octokit,
|
||||
getUploadArtifactInject(opts.debug),
|
||||
{
|
||||
...opts,
|
||||
files,
|
||||
}
|
||||
}
|
||||
|
||||
function getDurationString(date1, date2) {
|
||||
const seconds = (date2.getTime() - date1.getTime()) / 1000
|
||||
const minutes = seconds / 60
|
||||
if (minutes > 1) {
|
||||
return `${minutes.toFixed(1)} minutes`
|
||||
}
|
||||
return `${seconds.toFixed(1)} seconds`
|
||||
}
|
||||
|
||||
function getPages(pageList, languages, filters, files, max) {
|
||||
return pageList
|
||||
.filter((page) => {
|
||||
if (languages.length && !languages.includes(page.languageCode)) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (filters.length && !filters.find((filter) => page.relativePath.includes(filter))) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (
|
||||
files.length &&
|
||||
// The reason for checking each file against the `relativePath`
|
||||
// or the `fullPath` is to make it flexible for the user.
|
||||
!files.find((file) => {
|
||||
if (page.relativePath === file) return true
|
||||
if (page.fullPath === file) return true
|
||||
// The `page.relativePath` will always be *from* the containing
|
||||
// directory it came from an might not be relative to the repo
|
||||
// root. I.e.
|
||||
// `content/education/quickstart.md` is the path relative to
|
||||
// the repo root. But the `page.relativePath` will
|
||||
// in this case be `education/quickstart.md`.
|
||||
// So give it one last chance to relate to the repo root.
|
||||
// This is important because you might use `git diff --name-only`
|
||||
// to get the list of files to focus specifically on.
|
||||
if (path.join(CONTENT_ROOT, page.relativePath) === path.resolve(file)) return true
|
||||
return false
|
||||
})
|
||||
) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
.slice(0, max ? Math.min(max, pageList.length) : pageList.length)
|
||||
}
|
||||
|
||||
async function processPage(page, pageMap, redirects, opts) {
|
||||
const { bail, verboseUrl, jsonOutput, out } = opts
|
||||
|
||||
const allFlawsEach = await Promise.all(
|
||||
page.permalinks.map((permalink) => processPermalink(permalink, page, pageMap, redirects, opts))
|
||||
)
|
||||
|
||||
const allFlaws = allFlawsEach.flat()
|
||||
|
||||
if (bail && allFlaws.length > 0) {
|
||||
if (jsonOutput) {
|
||||
jsonPrintFlaws(allFlaws, opts)
|
||||
} else {
|
||||
printFlaws(allFlaws, { verboseUrl, out })
|
||||
}
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
if (!jsonOutput) {
|
||||
printFlaws(allFlaws, { verboseUrl, out })
|
||||
}
|
||||
|
||||
return allFlaws
|
||||
}
|
||||
|
||||
async function processPermalink(permalink, page, pageMap, redirects, opts) {
|
||||
const { level, checkAnchors, checkImages, checkExternalLinks, verbose, patient } = opts
|
||||
const html = await renderInnerHTML(page, permalink)
|
||||
const $ = cheerio.load(html)
|
||||
const flaws = []
|
||||
const links = []
|
||||
$('a[href]').each((i, link) => {
|
||||
links.push(link)
|
||||
})
|
||||
const newFlaws = await Promise.all(
|
||||
links.map(async (link) => {
|
||||
const { href } = link.attribs
|
||||
|
||||
// The global cache can't be used for anchor links because they
|
||||
// depend on each page it renders
|
||||
if (!href.startsWith('#')) {
|
||||
if (globalHrefCheckCache.has(href)) {
|
||||
globalCacheHitCount++
|
||||
return globalHrefCheckCache.get(href)
|
||||
}
|
||||
globalCacheMissCount++
|
||||
}
|
||||
|
||||
const flaw = await checkHrefLink(
|
||||
href,
|
||||
$,
|
||||
redirects,
|
||||
pageMap,
|
||||
checkAnchors,
|
||||
checkExternalLinks,
|
||||
{ verbose, patient }
|
||||
)
|
||||
|
||||
if (flaw) {
|
||||
if (level === 'critical' && !flaw.CRITICAL) {
|
||||
return
|
||||
}
|
||||
const text = $(link).text()
|
||||
if (!href.startsWith('#')) {
|
||||
globalHrefCheckCache.set(href, { href, flaw, text })
|
||||
}
|
||||
return { href, flaw, text }
|
||||
} else {
|
||||
if (!href.startsWith('#')) {
|
||||
globalHrefCheckCache.set(href, flaw)
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
for (const flaw of newFlaws) {
|
||||
if (flaw) {
|
||||
flaws.push(Object.assign(flaw, { page, permalink }))
|
||||
}
|
||||
}
|
||||
|
||||
if (checkImages) {
|
||||
$('img[src]').each((i, img) => {
|
||||
let { src } = img.attribs
|
||||
|
||||
// Images get a cache-busting prefix injected in the image
|
||||
// E.g. <img src="/assets/cb-123456/foo/bar.png">
|
||||
// We need to remove that otherwise we can't look up the image
|
||||
// on disk.
|
||||
src = src.replace(/\/cb-\d+\//, '/')
|
||||
|
||||
if (globalImageSrcCheckCache.has(src)) {
|
||||
globalCacheHitCount++
|
||||
return globalImageSrcCheckCache.get(src)
|
||||
}
|
||||
|
||||
const flaw = checkImageSrc(src, $)
|
||||
|
||||
globalImageSrcCheckCache.set(src, flaw)
|
||||
|
||||
if (flaw) {
|
||||
if (level === 'critical' && !flaw.CRITICAL) {
|
||||
return
|
||||
}
|
||||
flaws.push({ permalink, page, src, flaw })
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return flaws
|
||||
}
|
||||
|
||||
function jsonPrintFlaws(flaws, { verboseUrl = null, out = null } = {}) {
|
||||
const printableFlaws = {}
|
||||
for (const { page, permalink, href, text, src, flaw } of flaws) {
|
||||
const fullPath = prettyFullPath(page.fullPath)
|
||||
|
||||
if (!(fullPath in printableFlaws)) {
|
||||
printableFlaws[fullPath] = []
|
||||
}
|
||||
if (href) {
|
||||
printableFlaws[fullPath].push({
|
||||
href,
|
||||
url: verboseUrl ? new URL(permalink.href, verboseUrl).toString() : permalink.href,
|
||||
text,
|
||||
flaw,
|
||||
})
|
||||
} else if (src) {
|
||||
printableFlaws[fullPath].push({
|
||||
src,
|
||||
})
|
||||
}
|
||||
}
|
||||
const message = JSON.stringify(printableFlaws, undefined, 2)
|
||||
if (out) {
|
||||
fs.writeFileSync(out, message + '\n', 'utf-8')
|
||||
} else {
|
||||
console.log(message)
|
||||
}
|
||||
}
|
||||
|
||||
function printFlaws(flaws, { verboseUrl = null, out = null } = {}) {
|
||||
let previousPage = null
|
||||
let previousPermalink = null
|
||||
|
||||
function fout(msg) {
|
||||
if (out) {
|
||||
fs.appendFileSync(out, `${msg}\n`, 'utf-8')
|
||||
} else {
|
||||
console.log(msg)
|
||||
}
|
||||
}
|
||||
|
||||
for (const { page, permalink, href, text, src, flaw } of flaws) {
|
||||
const fullPath = prettyFullPath(page.fullPath)
|
||||
if (page !== previousPage) {
|
||||
if (out) {
|
||||
fout(`PAGE: ${fullPath}`)
|
||||
} else {
|
||||
console.log(`PAGE: ${chalk.bold(fullPath)}`)
|
||||
}
|
||||
}
|
||||
previousPage = page
|
||||
|
||||
if (href) {
|
||||
if (previousPermalink !== permalink.href) {
|
||||
if (verboseUrl) {
|
||||
fout(` URL: ${new URL(permalink.href, verboseUrl).toString()}`)
|
||||
} else {
|
||||
fout(` PERMALINK: ${permalink.href}`)
|
||||
}
|
||||
}
|
||||
previousPermalink = permalink.href
|
||||
|
||||
if (out) {
|
||||
fout(` HREF: ${href}`)
|
||||
} else {
|
||||
console.log(` HREF: ${chalk.bold(href)}`)
|
||||
}
|
||||
fout(` TEXT: ${text}`)
|
||||
} else if (src) {
|
||||
if (out) {
|
||||
fout(` IMG SRC: ${src}`)
|
||||
} else {
|
||||
console.log(` IMG SRC: ${chalk.bold(src)}`)
|
||||
}
|
||||
} else {
|
||||
throw new Error("Flaw has neither 'href' nor 'src'")
|
||||
}
|
||||
|
||||
if (out) {
|
||||
fout(` FLAW: ${flaw.CRITICAL ? flaw.CRITICAL : flaw.WARNING}`)
|
||||
} else {
|
||||
console.log(
|
||||
` FLAW: ${flaw.CRITICAL ? chalk.red(flaw.CRITICAL) : chalk.yellow(flaw.WARNING)}`
|
||||
)
|
||||
}
|
||||
fout('')
|
||||
}
|
||||
}
|
||||
|
||||
// Given a full path, change to so it's relative to the `cwd()` so that you
|
||||
// can take it from the output and paste it to something like `code ...here...`
|
||||
// The problem with displaying the full path is that it's quite noisy and
|
||||
// takes up a lot of space. Sure, you can copy and paste it in front of
|
||||
// `vi` or `ls` or `code` but if we display it relative to `cwd()` you
|
||||
// can still paste it to the next command but it's not taking up so much
|
||||
// space.
|
||||
function prettyFullPath(fullPath) {
|
||||
return path.relative(process.cwd(), fullPath)
|
||||
}
|
||||
|
||||
const globalHrefCheckCache = new Map()
|
||||
const globalImageSrcCheckCache = new Map()
|
||||
let globalCacheHitCount = 0
|
||||
let globalCacheMissCount = 0
|
||||
|
||||
async function checkHrefLink(
|
||||
href,
|
||||
$,
|
||||
redirects,
|
||||
pageMap,
|
||||
checkAnchors = false,
|
||||
checkExternalLinks = false,
|
||||
{ verbose = false, patient = false } = {}
|
||||
) {
|
||||
if (href === '#') {
|
||||
if (checkAnchors) {
|
||||
return { WARNING: 'Link is just an empty `#`' }
|
||||
}
|
||||
} else if (href.startsWith('#')) {
|
||||
if (checkAnchors) {
|
||||
const countDOMItems = $(href).length
|
||||
if (countDOMItems !== 1) {
|
||||
return { WARNING: `Anchor is an empty string` }
|
||||
}
|
||||
}
|
||||
} else if (href.startsWith('/')) {
|
||||
const pathname = new URL(href, 'http://example.com').pathname
|
||||
|
||||
// Remember, if the Markdown has something like
|
||||
//
|
||||
// See [my link][/some/page/]
|
||||
//
|
||||
// In the post-processing, that will actually become
|
||||
//
|
||||
// See <a href="/en/some/page">my link</a>
|
||||
//
|
||||
// But, if that link was a redirect, that would have been left
|
||||
// untouched.
|
||||
if (pathname.endsWith('/')) {
|
||||
return { WARNING: 'Links with a trailing / will always redirect' }
|
||||
} else {
|
||||
if (pathname.split('/')[1] in STATIC_PREFIXES) {
|
||||
const staticFilePath = path.join(
|
||||
STATIC_PREFIXES[pathname.split('/')[1]],
|
||||
pathname.split(path.sep).slice(2).join(path.sep)
|
||||
)
|
||||
if (!fs.existsSync(staticFilePath)) {
|
||||
return { CRITICAL: `Static file not found ${staticFilePath} (${pathname})` }
|
||||
}
|
||||
} else if (getRedirect(pathname, { redirects, pages: pageMap })) {
|
||||
return { WARNING: `Redirect to ${getRedirect(pathname, { redirects, pages: pageMap })}` }
|
||||
} else if (!pageMap[pathname]) {
|
||||
if (deprecatedVersionPrefixesRegex.test(pathname)) {
|
||||
return
|
||||
}
|
||||
|
||||
return { CRITICAL: 'Broken link' }
|
||||
}
|
||||
}
|
||||
} else if (checkExternalLinks) {
|
||||
if (!href.startsWith('https://')) {
|
||||
return { WARNING: `Will not check external URLs that are not HTTPS (${href})` }
|
||||
}
|
||||
if (linksToSkip(href)) {
|
||||
return
|
||||
}
|
||||
const { ok, ...info } = await checkExternalURL(href, { verbose, patient })
|
||||
if (!ok) {
|
||||
return { CRITICAL: `Broken external link (${JSON.stringify(info)})` }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const _fetchCache = new Map()
|
||||
async function checkExternalURL(url, { verbose = false, patient = false } = {}) {
|
||||
if (!url.startsWith('https://')) throw new Error('Invalid URL')
|
||||
const cleanURL = url.split('#')[0]
|
||||
if (!_fetchCache.has(cleanURL)) {
|
||||
_fetchCache.set(cleanURL, innerFetch(cleanURL, { verbose, patient }))
|
||||
}
|
||||
return _fetchCache.get(cleanURL)
|
||||
}
|
||||
|
||||
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
|
||||
|
||||
// Global for recording which domains we get rate-limited on.
|
||||
// For example, if you got rate limited on `something.github.com/foo`
|
||||
// and now we're asked to fetch for `something.github.com/bar`
|
||||
// it's good to know to now bother yet.
|
||||
const _rateLimitedDomains = new Map()
|
||||
|
||||
async function innerFetch(url, config = {}) {
|
||||
const { verbose, useGET, patient } = config
|
||||
|
||||
const { hostname } = new URL(url)
|
||||
if (_rateLimitedDomains.has(hostname)) {
|
||||
await sleep(_rateLimitedDomains.get(hostname))
|
||||
}
|
||||
// The way `got` does retries:
|
||||
//
|
||||
// sleep = 1000 * Math.pow(2, retry - 1) + Math.random() * 100
|
||||
//
|
||||
// So, it means:
|
||||
//
|
||||
// 1. ~1000ms
|
||||
// 2. ~2000ms
|
||||
// 3. ~4000ms
|
||||
//
|
||||
// ...if the limit we set is 3.
|
||||
// Our own timeout, in ./middleware/timeout.js defaults to 10 seconds.
|
||||
// So there's no point in trying more attempts than 3 because it would
|
||||
// just timeout on the 10s. (i.e. 1000 + 2000 + 4000 + 8000 > 10,000)
|
||||
const retry = {
|
||||
limit: patient ? 5 : 2,
|
||||
}
|
||||
const timeout = { request: patient ? 10000 : 2000 }
|
||||
|
||||
const headers = {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36',
|
||||
}
|
||||
|
||||
const retries = config.retries || 0
|
||||
const httpFunction = useGET ? got.get : got.head
|
||||
|
||||
if (verbose) console.log(`External URL ${useGET ? 'GET' : 'HEAD'}: ${url} (retries: ${retries})`)
|
||||
try {
|
||||
const r = await httpFunction(url, {
|
||||
headers,
|
||||
throwHttpErrors: false,
|
||||
retry,
|
||||
timeout,
|
||||
})
|
||||
if (verbose) {
|
||||
console.log(
|
||||
`External URL ${useGET ? 'GET' : 'HEAD'} ${url}: ${r.statusCode} (retries: ${retries})`
|
||||
)
|
||||
}
|
||||
|
||||
// If we get rate limited, remember that this hostname is now all
|
||||
// rate limited. And sleep for the number of seconds that the
|
||||
// `retry-after` header indicated.
|
||||
if (r.statusCode === 429) {
|
||||
let sleepTime = Math.min(
|
||||
60_000,
|
||||
Math.max(10_000, getRetryAfterSleep(r.headers['retry-after']))
|
||||
)
|
||||
// Sprinkle a little jitter so it doesn't all start again all
|
||||
// at the same time
|
||||
sleepTime += Math.random() * 10 * 1000
|
||||
// Give it a bit extra when we can be really patient
|
||||
if (patient) sleepTime += 30 * 1000
|
||||
|
||||
_rateLimitedDomains.set(hostname, sleepTime + Math.random() * 10 * 1000)
|
||||
if (verbose)
|
||||
console.log(
|
||||
chalk.yellow(
|
||||
`Rate limited on ${hostname} (${url}). Sleeping for ${(sleepTime / 1000).toFixed(1)}s`
|
||||
)
|
||||
)
|
||||
await sleep(sleepTime)
|
||||
return innerFetch(url, Object.assign({}, config, { retries: retries + 1 }))
|
||||
} else {
|
||||
_rateLimitedDomains.delete(hostname)
|
||||
}
|
||||
|
||||
// Perhaps the server doesn't suppport HEAD requests.
|
||||
// If so, try again with a regular GET.
|
||||
if ((r.statusCode === 405 || r.statusCode === 404) && !useGET) {
|
||||
return innerFetch(url, Object.assign({}, config, { useGET: true }))
|
||||
}
|
||||
if (verbose) {
|
||||
console.log((r.ok ? chalk.green : chalk.red)(`${r.statusCode} on ${url}`))
|
||||
}
|
||||
return { ok: r.ok, statusCode: r.statusCode }
|
||||
} catch (err) {
|
||||
if (err instanceof RequestError) {
|
||||
if (verbose) {
|
||||
console.log(chalk.yellow(`RequestError (${err.message}) on ${url}`))
|
||||
}
|
||||
return { ok: false, requestError: err.message }
|
||||
}
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
// Return number of milliseconds from a `Retry-After` header value
|
||||
function getRetryAfterSleep(headerValue) {
|
||||
if (!headerValue) return 0
|
||||
let ms = Math.round(parseFloat(headerValue) * 1000)
|
||||
if (isNaN(ms)) {
|
||||
ms = Math.max(0, new Date(headerValue) - new Date())
|
||||
}
|
||||
return ms
|
||||
}
|
||||
|
||||
function checkImageSrc(src) {
|
||||
const pathname = new URL(src, 'http://example.com').pathname
|
||||
if (!pathname.startsWith('/')) {
|
||||
return { WARNING: "External images can't not be checked" }
|
||||
}
|
||||
const prefix = pathname.split('/')[1]
|
||||
if (prefix in STATIC_PREFIXES) {
|
||||
const staticFilePath = path.join(
|
||||
STATIC_PREFIXES[prefix],
|
||||
pathname.split(path.sep).slice(2).join(path.sep)
|
||||
)
|
||||
if (!fs.existsSync(staticFilePath)) {
|
||||
return { CRITICAL: `Static file not found (${pathname})` }
|
||||
}
|
||||
} else {
|
||||
return { WARNING: `Unrecognized image src prefix (${prefix})` }
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeFlaws(flaws) {
|
||||
if (flaws.length) {
|
||||
console.log(
|
||||
chalk.bold(
|
||||
`Found ${flaws.length.toLocaleString()} flaw${flaws.length === 1 ? '' : 's'} in total.`
|
||||
)
|
||||
)
|
||||
} else {
|
||||
console.log(chalk.green('No flaws found! 💖'))
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeCounts(pages) {
|
||||
const count = pages.map((page) => page.permalinks.length).reduce((a, b) => a + b, 0)
|
||||
console.log(
|
||||
`Tested ${count.toLocaleString()} permalinks across ${pages.length.toLocaleString()} pages`
|
||||
)
|
||||
}
|
||||
|
||||
function shuffle(array) {
|
||||
let currentIndex = array.length
|
||||
let randomIndex
|
||||
|
||||
// While there remain elements to shuffle...
|
||||
while (currentIndex !== 0) {
|
||||
// Pick a remaining element...
|
||||
randomIndex = Math.floor(Math.random() * currentIndex)
|
||||
currentIndex--
|
||||
|
||||
// And swap it with the current element.
|
||||
;[array[currentIndex], array[randomIndex]] = [array[randomIndex], array[currentIndex]]
|
||||
}
|
||||
|
||||
return array
|
||||
}
|
||||
|
||||
async function renderInnerHTML(page, permalink) {
|
||||
const next = () => {}
|
||||
const res = {}
|
||||
|
||||
const pagePath = permalink.href
|
||||
const req = {
|
||||
path: pagePath,
|
||||
language: permalink.languageCode,
|
||||
pagePath,
|
||||
cookies: {},
|
||||
}
|
||||
await contextualize(req, res, next)
|
||||
await shortVersions(req, res, next)
|
||||
const context = Object.assign({}, req.context, { page })
|
||||
context.relativePath = page.relativePath
|
||||
return await renderContent(page.markdown, context)
|
||||
}
|
||||
|
||||
// Delibertely commented out. Kept temporarily in case it's better.
|
||||
// async function renderPage(page, permalink) {
|
||||
// const next = () => {}
|
||||
// const res = {}
|
||||
// const pagePath = permalink.href
|
||||
// const req = {
|
||||
// path: pagePath,
|
||||
// language: permalink.languageCode,
|
||||
// pagePath,
|
||||
// cookies: {},
|
||||
// }
|
||||
// await contextualize(req, res, next)
|
||||
// const context = Object.assign({}, req.context, { page })
|
||||
// return await page._render(context)
|
||||
// }
|
||||
)
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
import { jest } from '@jest/globals'
|
||||
|
||||
export function coreMock() {
|
||||
return {
|
||||
info: jest.fn(),
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(console.error),
|
||||
setOutput: jest.fn(),
|
||||
}
|
||||
}
|
||||
|
||||
export function octokitMock({ requestMock, listForRepoMock } = {}) {
|
||||
return {
|
||||
request: jest.fn(requestMock),
|
||||
rest: {
|
||||
issues: {
|
||||
listForRepo: jest.fn(listForRepoMock),
|
||||
createComment: jest.fn(),
|
||||
update: jest.fn(),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
export function cheerioMock(argToValueMap) {
|
||||
return {
|
||||
load: jest.fn(async () => {
|
||||
return (arg) => {
|
||||
return argToValueMap[arg]
|
||||
}
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
export function gotMock({ status } = {}) {
|
||||
return jest.fn(async () => {
|
||||
if (status < 200 || status >= 400) {
|
||||
throw new Error({
|
||||
status,
|
||||
})
|
||||
}
|
||||
return new Error({
|
||||
status,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function uploadArtifactMock() {
|
||||
return jest.fn()
|
||||
}
|
Загрузка…
Ссылка в новой задаче