
685 строки
28 KiB

const path = require('path')
const slash = require('slash')
const walk = require('walk-sync')
const { zip, groupBy } = require('lodash')
const yaml = require('js-yaml')
const revalidator = require('revalidator')
const generateMarkdownAST = require('mdast-util-from-markdown')
const visit = require('unist-util-visit')
const readFileAsync = require('../../lib/readfile-async')
const frontmatter = require('../../lib/frontmatter')
const languages = require('../../lib/languages')
const { tags } = require('../../lib/liquid-tags/extended-markdown')
const ghesReleaseNotesSchema = require('../../lib/release-notes-schema')
const renderContent = require('../../lib/render-content')
const { execSync } = require('child_process')
const rootDir = path.join(__dirname, '../..')
const contentDir = path.join(rootDir, 'content')
const reusablesDir = path.join(rootDir, 'data/reusables')
const variablesDir = path.join(rootDir, 'data/variables')
const glossariesDir = path.join(rootDir, 'data/glossaries')
const ghesReleaseNotesDir = path.join(rootDir, 'data/release-notes')
const languageCodes = Object.keys(languages)
// WARNING: Complicated RegExp below!
// Things matched by this RegExp:
// - [link text](link-url)
// - [link text] (link-url)
// - [link-definition-ref]: link-url
// - etc.
// Things intentionally NOT matched by this RegExp:
// - [link text](#link-url)
// - [link text] (#link-url)
// - [link-definition-ref]: #link-url
// - [link text](/link-url)
// - [link-definition-ref]: /link-url
// - [link text](https://link-url)
// - [link-definition-ref]: https://link-url
// - [link text](mailto:mail-url)
// - [link-definition-ref]: mailto:mail-url
// - [link text](tel:phone-url)
// - [link-definition-ref]: tel:phone-url
// - [link text]({{ }})
// - [link-definition-ref]: {{ }}
// - [link text][link-definition-ref]: other text
// - [link text][link-definition-ref] (other text)
// - etc.
const relativeArticleLinkRegex = /(?=^|[^\]]\s*)\[[^\]]+\](?::\n?[ \t]+|\s*\()(?!\/|#|https?:\/\/|tel:|mailto:|\{[%{]\s*)[^)\s]+(?:(?:\s*[%}]\})?\)|\s+|$)/gm
// Things matched by this RegExp:
// - [link text](/en/github/blah)
// - [link text] (
// - [link-definition-ref]:
// - etc.
// Things intentionally NOT matched by this RegExp:
// - [Node.js](
// - etc.
const languageLinkRegex = new RegExp(`(?=^|[^\\]]\\s*)\\[[^\\]]+\\](?::\\n?[ \\t]+|\\s*\\()(?:(?:https?://(?:help|docs|developer)\\.github\\.com)?/(?:${languageCodes.join('|')})(?:/[^)\\s]*)?)(?:\\)|\\s+|$)`, 'gm')
// Things matched by this RegExp:
// - [link text](/enterprise/2.19/admin/blah)
// - [link text] (
// - [link-definition-ref]:
// Things intentionally NOT matched by this RegExp:
// - [link text](
// - [link text](/github/site-policy/enterprise/2.2/admin/blah)
const versionLinkRegEx = /(?=^|[^\]]\s*)\[[^\]]+\](?::\n?[ \t]+|\s*\()(?:(?:https?:\/\/(?:help|docs|developer)\.github\.com)?\/enterprise\/\d+(\.\d+)+(?:\/[^)\s]*)?)(?:\)|\s+|$)/gm
// Things matched by this RegExp:
// - [link text](/early-access/github/blah)
// - [link text] (
// - [link-definition-ref]:
// - etc.
// Things intentionally NOT matched by this RegExp:
// - [Node.js](
// - etc.
const earlyAccessLinkRegex = /(?=^|[^\]]\s*)\[[^\]]+\](?::\n?[ \t]+|\s*\()(?:(?:https?:\/\/(?:help|docs|developer)\.github\.com)?\/early-access(?:\/[^)\s]*)?)(?:\)|\s+|$)/gm
// - [link text](
// - [link text] (
// - [link-definition-ref]:
// - [link text](//
// - etc.
// Things intentionally NOT matched by this RegExp:
// - [link text](/github/blah)
// - [link text[(
// - etc.
const domainLinkRegex = /(?=^|[^\]]\s*)\[[^\]]+\](?::\n?[ \t]+|\s*\()(?:https?:)?\/\/(?:help|docs|developer)\.github\.com(?!\/changes\/)[^)\s]*(?:\)|\s+|$)/gm
// Things matched by this RegExp:
// - ![image text](/assets/images/early-access/github/blah.gif)
// - ![image text] (
// - [image-definition-ref]:
// - [link text](/assets/images/early-access/github/blah.gif)
// - etc.
// Things intentionally NOT matched by this RegExp:
// - [Node.js](
// - etc.
const earlyAccessImageRegex = /(?=^|[^\]]\s*)\[[^\]]+\](?::\n?[ \t]+|\s*\()(?:(?:https?:\/\/(?:help|docs|developer)\.github\.com)?\/assets\/images\/early-access(?:\/[^)\s]*)?)(?:\)|\s+|$)/gm
// Things matched by this RegExp:
// - ![image text](/assets/early-access/images/github/blah.gif)
// - ![image text] (
// - [image-definition-ref]:
// - [link text](/early-access/assets/images/github/blah.gif)
// - [link text](/early-access/images/github/blah.gif)
// - etc.
// Things intentionally NOT matched by this RegExp:
// - [Node.js](
// - etc.
const badEarlyAccessImageRegex = /(?=^|[^\]]\s*)\[[^\]]+\](?::\n?[ \t]+|\s*\()(?:(?:https?:\/\/(?:help|docs|developer)\.github\.com)?\/(?:(?:assets|images)\/early-access|early-access\/(?:assets|images))(?:\/[^)\s]*)?)(?:\)|\s+|$)/gm
// {{ }}
const oldVariableRegex = /{{\s*?site\.data\..*?}}/g
// - {{ octicon-plus }}
// - {{ octicon-plus An example label }}
const oldOcticonRegex = /{{\s*?octicon-([a-z-]+)(\s[\w\s\d-]+)?\s*?}}/g
// - {{#note}}
// - {{/note}}
// - {{ #warning }}
// - {{ /pizza }}
const oldExtendedMarkdownRegex = /{{\s*?[#/][a-z-]+\s*?}}/g
const relativeArticleLinkErrorText = 'Found unexpected relative article links:'
const languageLinkErrorText = 'Found article links with hard-coded language codes:'
const versionLinkErrorText = 'Found article links with hard-coded version numbers:'
const domainLinkErrorText = 'Found article links with hard-coded domain names:'
const earlyAccessLinkErrorText = 'Found article links leaking Early Access docs:'
const earlyAccessImageErrorText = 'Found article images/links leaking Early Access images:'
const badEarlyAccessImageErrorText = 'Found article images/links leaking incorrect Early Access images:'
const oldVariableErrorText = 'Found article uses old {{ }} syntax. Use {% data %} instead!'
const oldOcticonErrorText = 'Found octicon variables with the old {{ octicon-name }} syntax. Use {% octicon "name" %} instead!'
const oldExtendedMarkdownErrorText = 'Found extended markdown tags with the old {{#note}} syntax. Use {% note %}/{% endnote %} instead!'
const mdWalkOptions = {
globs: ['**/*.md'],
ignore: ['**/'],
directories: false,
includeBasePath: true
// Also test the "data/variables/" YAML files
const yamlWalkOptions = {
globs: ['**/*.yml'],
directories: false,
includeBasePath: true
// different lint rules apply to different content types
let mdToLint, ymlToLint, releaseNotesToLint
if (!process.env.TEST_TRANSLATION) {
// compile lists of all the files we want to lint
const contentMarkdownAbsPaths = walk(contentDir, mdWalkOptions).sort()
const contentMarkdownRelPaths = => slash(path.relative(rootDir, p)))
const contentMarkdownTuples = zip(contentMarkdownRelPaths, contentMarkdownAbsPaths)
const reusableMarkdownAbsPaths = walk(reusablesDir, mdWalkOptions).sort()
const reusableMarkdownRelPaths = => slash(path.relative(rootDir, p)))
const reusableMarkdownTuples = zip(reusableMarkdownRelPaths, reusableMarkdownAbsPaths)
mdToLint = [...contentMarkdownTuples, ...reusableMarkdownTuples]
// data/variables
const variableYamlAbsPaths = walk(variablesDir, yamlWalkOptions).sort()
const variableYamlRelPaths = => slash(path.relative(rootDir, p)))
const variableYamlTuples = zip(variableYamlRelPaths, variableYamlAbsPaths)
// data/glossaries
const glossariesYamlAbsPaths = walk(glossariesDir, yamlWalkOptions).sort()
const glossariesYamlRelPaths = => slash(path.relative(rootDir, p)))
const glossariesYamlTuples = zip(glossariesYamlRelPaths, glossariesYamlAbsPaths)
ymlToLint = [...variableYamlTuples, ...glossariesYamlTuples]
// GHES release notes
const ghesReleaseNotesYamlAbsPaths = walk(ghesReleaseNotesDir, yamlWalkOptions).sort()
const ghesReleaseNotesYamlRelPaths = => path.relative(rootDir, p))
releaseNotesToLint = zip(ghesReleaseNotesYamlRelPaths, ghesReleaseNotesYamlAbsPaths)
} else {
// get all translated markdown or yaml files by comparing files changed to main branch
const changedFilesRelPaths = execSync('git diff --name-only origin/main | egrep "^translations/.*/.+.(yml|md)$"', { maxBuffer: 1024 * 1024 * 100 }).toString().split('\n')
if (changedFilesRelPaths === '') process.exit(0)
console.log('testing translations.')
console.log(`Found ${changedFilesRelPaths.length} translated files.`)
const { mdRelPaths = [], ymlRelPaths = [], releaseNotesRelPaths = [] } = groupBy(changedFilesRelPaths, (path) => {
// separate the changed files to different groups
if (path.endsWith('')) {
return 'throwAway'
} else if (path.endsWith('.md')) {
return 'mdRelPaths'
} else if (path.match(/\/data\/(variables|glossaries)\//i)) {
return 'ymlRelPaths'
} else if (path.match(/\/data\/release-notes\//i)) {
return 'releaseNotesRelPaths'
} else {
// we aren't linting the rest
return 'throwAway'
const [mdTuples, ymlTuples, releaseNotesTuples] = [mdRelPaths, ymlRelPaths, releaseNotesRelPaths].map(relPaths => {
const absPaths = => path.join(rootDir, p))
return zip(relPaths, absPaths)
mdToLint = mdTuples
ymlToLint = ymlTuples
releaseNotesToLint = releaseNotesTuples
function formatLinkError (message, links) {
return `${message}\n - ${links.join('\n - ')}`
// Returns `content` if its a string, or `content.description` if it can.
// Used for getting the nested `description` key in glossary files.
function getContent (content) {
if (typeof content === 'string') return content
if (typeof content.description === 'string') return content.description
return null
describe('lint markdown content', () => {
if (mdToLint.length < 1) return
(markdownRelPath, markdownAbsPath) => {
let content, ast, links, yamlScheduledWorkflows, isHidden, isEarlyAccess, isSitePolicy, frontmatterErrors, frontmatterData
beforeAll(async () => {
const fileContents = await readFileAsync(markdownAbsPath, 'utf8')
const { data, content: bodyContent, errors } = frontmatter(fileContents)
content = bodyContent
frontmatterErrors = errors
frontmatterData = data
ast = generateMarkdownAST(content)
isHidden = data.hidden === true
isEarlyAccess = markdownRelPath.split('/').includes('early-access')
isSitePolicy = markdownRelPath.split('/').includes('site-policy-deprecated')
links = []
visit(ast, ['link', 'definition'], node => {
yamlScheduledWorkflows = []
visit(ast, 'code', node => {
if (/ya?ml/.test(node.lang) && node.value.includes('schedule') && node.value.includes('cron')) {
// visit is not async-friendly so we need to do an async map to parse the YML snippets
yamlScheduledWorkflows = (await Promise.all( (snippet) => {
// If we don't parse the Liquid first, yaml loading chokes on {% raw %} tags
const rendered = await renderContent.liquid.parseAndRender(snippet)
const parsed = yaml.safeLoad(rendered)
return parsed.on.schedule
.map(schedule => schedule.cron)
// We need to support some non-Early Access hidden docs in Site Policy
test('hidden docs must be Early Access or Site Policy', async () => {
if (isHidden) {
expect(isEarlyAccess || isSitePolicy).toBe(true)
test('relative URLs must start with "/"', async () => {
const matches = links.filter(link => {
if (
link.startsWith('http://') ||
link.startsWith('https://') ||
link.startsWith('tel:') ||
link.startsWith('mailto:') ||
link.startsWith('#') ||
) return false
return true
const errorMessage = formatLinkError(relativeArticleLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('yaml snippets that include scheduled workflows must not run on the hour', async () => {
const hourlySchedules = yamlScheduledWorkflows.filter(schedule => {
const hour = schedule.split(' ')[0]
// return any minute cron segments that equal 0, 00, 000, etc.
return !/[^0]/.test(hour)
// Note this only ensures that scheduled workflow snippets are unique _per Markdown file_
test('yaml snippets that include scheduled workflows run at unique times', () => {
expect(yamlScheduledWorkflows.length).toEqual(new Set(yamlScheduledWorkflows).size)
test('must not leak Early Access doc URLs', async () => {
// Only execute for docs that are NOT Early Access
if (!isEarlyAccess) {
const matches = (content.match(earlyAccessLinkRegex) || [])
const errorMessage = formatLinkError(earlyAccessLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('must not leak Early Access image URLs', async () => {
// Only execute for docs that are NOT Early Access
if (!isEarlyAccess) {
const matches = (content.match(earlyAccessImageRegex) || [])
const errorMessage = formatLinkError(earlyAccessImageErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('must have correctly formatted Early Access image URLs', async () => {
// Execute for ALL docs (not just Early Access) to ensure non-EA docs
// are not leaking incorrectly formatted EA image URLs
const matches = (content.match(badEarlyAccessImageRegex) || [])
const errorMessage = formatLinkError(badEarlyAccessImageErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
if (!process.env.TEST_TRANSLATION) {
test('does not use old variable syntax', async () => {
const matches = (content.match(oldVariableRegex) || [])
const matchesWithExample = => {
const example = match
.replace(/{{\s*?site\.data\.([a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]+)+)\s*?}}/g, '{% data $1 %}')
return `${match} => ${example}`
const errorMessage = formatLinkError(oldVariableErrorText, matchesWithExample)
expect(matches.length, errorMessage).toBe(0)
test('does not use old octicon variable syntax', async () => {
const matches = (content.match(oldOcticonRegex) || [])
const errorMessage = formatLinkError(oldOcticonErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('does not use old extended markdown syntax', async () => {
Object.keys(tags).forEach(tag => {
const reg = new RegExp(`{{\\s*?[#|/]${tag}`, 'g')
if (reg.test(content)) {
const matches = (content.match(oldExtendedMarkdownRegex)) || []
const tagMessage = oldExtendedMarkdownErrorText
.replace('{{#note}}', `{{#${tag}}}`)
.replace('{% note %}', `{% ${tag} %}`)
.replace('{% endnote %}', `{% end${tag} %}`)
const errorMessage = formatLinkError(tagMessage, matches)
expect(matches.length, errorMessage).toBe(0)
test('URLs must not contain a hard-coded language code', async () => {
const matches = links.filter(link => {
return /\/(?:${languageCodes.join('|')})\//.test(link)
const errorMessage = formatLinkError(languageLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('URLs must not contain a hard-coded version number', async () => {
const initialMatches = (content.match(versionLinkRegEx) || [])
// Filter out some very specific false positive matches
const matches = initialMatches.filter(match => {
if (markdownRelPath === 'content/admin/enterprise-management/') {
return false
return true
const errorMessage = formatLinkError(versionLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('URLs must not contain a hard-coded domain name', async () => {
const matches = (content.match(domainLinkRegex) || [])
const errorMessage = formatLinkError(domainLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('contains valid Liquid', async () => {
// If Liquid can't parse the file, it'll throw an error.
// For example, the following is invalid and will fail this test:
// {% if currentVersion ! "github-ae@latest" %}
expect(() => renderContent.liquid.parse(content))
if (!markdownRelPath.includes('data/reusables')) {
test('contains valid frontmatter', () => {
const errorMessage = => `- [${}]: ${error.actual}, ${error.message}`).join('\n')
expect(frontmatterErrors.length, errorMessage).toBe(0)
test('frontmatter contains valid liquid', async () => {
const fmKeysWithLiquid = ['title', 'shortTitle', 'intro', 'product', 'permission']
.filter(key => Boolean(frontmatterData[key]))
for (const key of fmKeysWithLiquid) {
expect(() => renderContent.liquid.parse(frontmatterData[key]))
describe('lint yaml content', () => {
if (ymlToLint.length < 1) return
(yamlRelPath, yamlAbsPath) => {
let dictionary, isEarlyAccess
beforeAll(async () => {
const fileContents = await readFileAsync(yamlAbsPath, 'utf8')
dictionary = yaml.safeLoad(fileContents, { filename: yamlRelPath })
isEarlyAccess = yamlRelPath.split('/').includes('early-access')
test('relative URLs must start with "/"', async () => {
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(relativeArticleLinkRegex) || [])
if (valMatches.length > 0) {
matches.push( => `Key "${key}": ${match}`))
const errorMessage = formatLinkError(relativeArticleLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('must not leak Early Access doc URLs', async () => {
// Only execute for docs that are NOT Early Access
if (!isEarlyAccess) {
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(earlyAccessLinkRegex) || [])
if (valMatches.length > 0) {
matches.push( => `Key "${key}": ${match}`))
const errorMessage = formatLinkError(earlyAccessLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('must not leak Early Access image URLs', async () => {
// Only execute for docs that are NOT Early Access
if (!isEarlyAccess) {
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(earlyAccessImageRegex) || [])
if (valMatches.length > 0) {
matches.push( => `Key "${key}": ${match}`))
const errorMessage = formatLinkError(earlyAccessImageErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('must have correctly formatted Early Access image URLs', async () => {
// Execute for ALL docs (not just Early Access) to ensure non-EA docs
// are not leaking incorrectly formatted EA image URLs
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(badEarlyAccessImageRegex) || [])
if (valMatches.length > 0) {
matches.push( => `Key "${key}": ${match}`))
const errorMessage = formatLinkError(badEarlyAccessImageErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
if (!process.env.TEST_TRANSLATION) {
test('URLs must not contain a hard-coded language code', async () => {
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(languageLinkRegex) || [])
if (valMatches.length > 0) {
matches.push( => `Key "${key}": ${match}`))
const errorMessage = formatLinkError(languageLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('URLs must not contain a hard-coded version number', async () => {
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(versionLinkRegEx) || [])
if (valMatches.length > 0) {
matches.push( => `Key "${key}": ${match}`))
const errorMessage = formatLinkError(versionLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('URLs must not contain a hard-coded domain name', async () => {
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(domainLinkRegex) || [])
if (valMatches.length > 0) {
matches.push( => `Key "${key}": ${match}`))
const errorMessage = formatLinkError(domainLinkErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('does not use old variable syntax', async () => {
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(oldVariableRegex) || [])
if (valMatches.length > 0) {
matches.push( => {
const example = match
.replace(/{{\s*?site\.data\.([a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]+)+)\s*?}}/g, '{% data $1 %}')
return `Key "${key}": ${match} => ${example}`
const errorMessage = formatLinkError(oldVariableErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('does not use old octicon variable syntax', async () => {
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(oldOcticonRegex) || [])
if (valMatches.length > 0) {
matches.push( => `Key "${key}": ${match}`))
const errorMessage = formatLinkError(oldOcticonErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
test('does not use old extended markdown syntax', async () => {
const matches = []
for (const [key, content] of Object.entries(dictionary)) {
const contentStr = getContent(content)
if (!contentStr) continue
const valMatches = (contentStr.match(oldExtendedMarkdownRegex) || [])
if (valMatches.length > 0) {
matches.push( => `Key "${key}": ${match}`))
const errorMessage = formatLinkError(oldExtendedMarkdownErrorText, matches)
expect(matches.length, errorMessage).toBe(0)
describe('lint release notes', () => {
if (releaseNotesToLint.length < 1) return
(yamlRelPath, yamlAbsPath) => {
let dictionary
beforeAll(async () => {
const fileContents = await readFileAsync(yamlAbsPath, 'utf8')
dictionary = yaml.safeLoad(fileContents, { filename: yamlRelPath })
it('matches the schema', () => {
const { errors } = revalidator.validate(dictionary, ghesReleaseNotesSchema)
const errorMessage = => `- [${}]: ${error.actual}, ${error.message}`).join('\n')
expect(errors.length, errorMessage).toBe(0)
it('contains valid liquid', () => {
const { intro, sections } = dictionary
let toLint = { intro }
for (const key in sections) {
const section = sections[key]
const label = `sections.${key}`
section.forEach((part) => {
if (Array.isArray(part)) {
toLint = { ...toLint, ...{ [label]: section.join('\n') } }
} else {
for (const prop in section) {
toLint = { ...toLint, ...{ [`${label}.${prop}`]: section[prop] } }
for (const key in toLint) {
if (!toLint[key]) continue
expect(() => renderContent.liquid.parse(toLint[key]), `${key} contains invalid liquid`)