use script instead of jest, much faster

This commit is contained in:
Sarah Schneider 2021-01-22 09:28:06 -05:00
Родитель 8555d3dce4
Коммит b0677b2442
5 изменённых файлов: 42 добавлений и 310 удалений

Просмотреть файл

@ -1,7 +1,6 @@
// https://jestjs.io/docs/en/configuration.html
const isBrowser = process.env.BROWSER
const isLinkCheck = process.env.LINKCHECK
const isActions = Boolean(process.env.GITHUB_ACTIONS)
module.exports = {
@ -28,8 +27,7 @@ module.exports = {
'node_modules/',
'vendor/',
'tests/helpers/',
...isBrowser ? [] : ['tests/browser/browser.js'],
...isLinkCheck ? [] : ['tests/links-and-images/links-and-images.js']
...isBrowser ? [] : ['tests/browser/browser.js']
],
testMatch: [
'**/tests/**/*.js'

Просмотреть файл

@ -173,7 +173,7 @@
"pa11y-test": "start-server-and-test browser-test-server 4001 pa11y-ci",
"link-check": "start-server-and-test link-check-server 4002 link-check-test",
"link-check-server": "cross-env NODE_ENV=development ENABLED_LANGUAGES='en' PORT=4002 node server.js",
"link-check-test": "cross-env LINKCHECK=1 jest tests/links-and-images/links-and-images.js",
"link-check-test": "cross-env LINKCHECK=1 node script/check-internal-links.js",
"heroku-postbuild": "node script/early-access/clone-for-build.js && npm run build"
},
"engines": {

40
script/check-internal-links.js Executable file
Просмотреть файл

@ -0,0 +1,40 @@
#!/usr/bin/env node
const linkinator = require('linkinator')
const checker = new linkinator.LinkChecker()
const { deprecated } = require('../lib/enterprise-server-releases')
const config = {
path: 'http://localhost:4002/en',
concurrency: 400,
recurse: true,
linksToSkip: [
// Skip any link that is not an internal link
'^((?!http://localhost:4002/en).)*$',
// Skip dist files
'/dist/index.*',
// Skip deprecated Enterprise content
`enterprise(-server@|/)(${deprecated.join('|')})/?`
]
}
main()
async function main () {
const result = (await checker.check(config)).links
const brokenLinks = result
.filter(link => link.state === 'BROKEN')
.map(link => { delete link.failureDetails; return link })
// Exit successfully if no broken links!
if (!brokenLinks.length) {
console.log('All links are good!')
process.exit(0)
}
console.log(`Found ${brokenLinks.length} total broken links: ${JSON.stringify([...brokenLinks], null, 2)}`)
// Exit unsuccessfully if broken links are found.
process.exit(1)
}

Просмотреть файл

@ -1,269 +0,0 @@
const cheerio = require('cheerio')
const { union, uniq } = require('lodash')
const fs = require('fs')
const path = require('path')
const { getProductStringFromPath } = require('../../lib/path-utils')
const patterns = require('../../lib/patterns')
const { deprecated } = require('../../lib/enterprise-server-releases')
const rest = require('../../middleware/contextualizers/rest')
const graphql = require('../../middleware/contextualizers/graphql')
const contextualize = require('../../middleware/context')
const releaseNotes = require('../../middleware/contextualizers/enterprise-release-notes')
const versionSatisfiesRange = require('../../lib/version-satisfies-range')
class LinksChecker {
constructor (opts = { languageCode: 'en', internalHrefPrefixes: ['/', '#'] }) {
Object.assign(this, { ...opts })
// Some caching mechanism so we do not load pages unnecessarily,
// nor check links that have been checked
this.pageCache = new Map()
this.checkedLinksCache = new Set()
// stores images to check all at once in a Map:
// imageSrc => {
// "usedBy": [version:path, ...]
// }
this.imagesToCheck = new Map()
// Stores broken images in a Map, formatted the same way as imagesToCheck
this.brokenImages = new Map()
// Stores broken links in a Map in the format of:
// link => {
// linkedFrom: [ version:filePath, ... ]
// }, ...
this.brokenLinks = new Map()
// stores anchor links to check all at once in a Map:
// version:filePath => {
// '#anchor-link' : {
// linkedFrom: ['url1', 'url2']
// },
// '#anchor-link2': {...}
// }
this.anchorLinksToCheck = new Map()
// Stores broken anchors in a Map, formatted the same way as anchorLinksToCheck
this.brokenAnchors = new Map()
}
async setRenderedPageObj (pathCacheKey, context, reRender = false) {
if (this.pageCache.has(pathCacheKey) && !reRender) return
let pageHTML = await context.page.render(context)
// handle special pre-rendered snowflake
if (context.page.relativePath.endsWith('graphql/reference/objects.md')) {
pageHTML += context.graphql.prerenderedObjectsForCurrentVersion.html
}
const pageObj = cheerio.load(pageHTML, { xmlMode: true })
this.pageCache.set(pathCacheKey, pageObj)
}
async getRenderedPageObj (pathCacheKey, context) {
if (!this.pageCache.has(pathCacheKey)) {
if (context) {
await this.setRenderedPageObj(pathCacheKey, context)
} else {
console.error('cannot find pre-rendered page, and does not have enough context to render one.')
}
}
return this.pageCache.get(pathCacheKey)
}
addAnchorForLater (pagePath, anchor, linkedFrom) {
const anchorsInPath = this.anchorLinksToCheck.get(pagePath) || {}
const anchorLink = anchorsInPath[anchor] || { linkedFrom: [] }
anchorLink.linkedFrom = union(anchorLink.linkedFrom, [linkedFrom])
anchorsInPath[anchor] = anchorLink
this.anchorLinksToCheck.set(pagePath, anchorsInPath)
}
addImagesForLater (images, pagePath) {
uniq(images).forEach(imageSrc => {
const imageUsage = this.imagesToCheck.get(imageSrc) || { usedBy: [] }
imageUsage.usedBy = union(imageUsage.usedBy, [pagePath])
this.imagesToCheck.set(imageSrc, imageUsage)
})
}
async checkPage (context, checkExternalAnchors) {
const path = context.relativePath
const version = context.currentVersion
const pathCacheKey = `${version}:${path}`
const $ = await this.getRenderedPageObj(pathCacheKey, context)
const imageSrcs = $('img[src^="/assets"]').map((i, el) => $(el).attr('src')).toArray()
this.addImagesForLater(imageSrcs, pathCacheKey)
for (const href of this.internalHrefPrefixes) {
const internalLinks = $(`a[href^="${href}"]`).get()
for (const internalLink of internalLinks) {
const href = $(internalLink).attr('href')
let [link, anchor] = href.split('#')
// remove trailing slash
link = link.replace(patterns.trailingSlash, '$1')
// if it's an external link and has been checked before, skip
if (link && this.checkedLinksCache.has(link)) {
// if it's been determined this link is broken, add to the linkedFrom field
if (this.brokenLinks.has(link)) {
const brokenLink = this.brokenLinks.get(link)
brokenLink.linkedFrom = union(brokenLink.linkedFrom, [pathCacheKey])
this.brokenLinks.set(link, brokenLink)
}
if (!anchor) continue
}
// if it's an internal anchor (e.g., #foo), save for later
if (anchor && !link) {
// ignore anchors that are autogenerated from headings
if (anchor === $(internalLink).parent().attr('id')) continue
this.addAnchorForLater(pathCacheKey, anchor, 'same page')
continue
}
// ------ BEGIN ONEOFF EXCLUSIONS -------///
// skip GraphQL public schema paths (these are checked by separate tests)
if (link.startsWith('/public/') && link.endsWith('.graphql')) continue
// skip links that start with /assets/images, as these are not in the pages collection
// and /assets/images paths should be checked during the image check
if (link.startsWith('/assets/images')) continue
// skip rare hardcoded links to old GHE versions
// these paths will always be in the old versioned format
// example: /enterprise/11.10.340/admin/articles/upgrading-to-the-latest-release
const gheVersionInLink = link.match(patterns.getEnterpriseVersionNumber)
if (gheVersionInLink && deprecated.includes(gheVersionInLink[1])) continue
// ------ END ONEOFF EXCLUSIONS -------///
// look for linked page
const linkedPage = context.pages[link] || context.pages[context.redirects[link]]
this.checkedLinksCache.add(link)
if (!linkedPage) {
this.brokenLinks.set(link, { linkedFrom: [pathCacheKey] })
continue
}
// if we're not checking external anchors, we're done
if (!checkExternalAnchors) {
continue
}
// find the permalink for the current version
const linkedPagePermalink = linkedPage.permalinks.find(permalink => permalink.pageVersion === version)
if (linkedPagePermalink) {
const linkedPageContext = await buildPathContext(context, linkedPage, linkedPagePermalink)
if (anchor) {
await this.setRenderedPageObj(`${version}:${linkedPage.relativePath}`, linkedPageContext)
this.addAnchorForLater(`${version}:${linkedPage.relativePath}`, anchor, pathCacheKey)
}
}
}
}
}
async checkAnchors () {
for await (const [pathCacheKey, anchors] of this.anchorLinksToCheck) {
const $ = await this.getRenderedPageObj(pathCacheKey)
for (const anchorText in anchors) {
const matchingHeadings = $(`[id="${anchorText}"], [name="${anchorText}"]`)
if (matchingHeadings.length === 0) {
const brokenAnchorPath = this.brokenAnchors.get(pathCacheKey) || {}
brokenAnchorPath[anchorText] = anchors[anchorText]
this.brokenAnchors.set(pathCacheKey, brokenAnchorPath)
}
}
}
}
getBrokenLinks () {
return this.brokenLinks
}
async getBrokenAnchors () {
await this.checkAnchors()
return this.brokenAnchors
}
async getBrokenImages () {
for await (const [imageSrc, imageUsage] of this.imagesToCheck) {
try {
await fs.promises.access(path.join(process.cwd(), imageSrc))
} catch (e) {
this.brokenImages.set(imageSrc, imageUsage)
}
}
return this.brokenImages
}
}
// this function is async because the middleware functions are likely async
async function applyMiddleware (middleware, req) {
return middleware(req, null, () => {})
}
async function buildInitialContext () {
const req = {
path: '/en',
language: 'en',
query: {}
}
await applyMiddleware(contextualize, req)
return req.context
}
async function buildPathContext (initialContext, page, permalink) {
// Create a new object with path-specific properties.
// Note this is cherry-picking properties currently only needed by the middleware below;
// See middleware/context.js for the rest of the properties we are NOT refreshing per page.
// If we find this causes problems for link checking, we can call `contextualize` on
// every page. For now, this cherry-picking approach is intended to improve performance so
// we don't have to build the expensive `pages`, `redirects`, etc. data on every page we check.
const path = permalink.href
const pathContext = {
page,
currentVersion: permalink.pageVersion,
currentProduct: getProductStringFromPath(path),
relativePath: permalink.relativePath,
currentPath: permalink.href
}
// Combine it with the initial context object that has pages, redirects, etc.
const combinedContext = Object.assign({}, initialContext, pathContext)
// Create a new req object using the combined context
const req = {
path,
context: combinedContext,
language: 'en',
query: {}
}
// Pass the req to the contextualizing middlewares
await applyMiddleware(rest, req)
await applyMiddleware(graphql, req)
// Release notes are available on docs site starting with GHES 3.0
if (versionSatisfiesRange(permalink.pageVersion, '>=3.0')) {
await applyMiddleware(releaseNotes, req)
}
// Return the resulting context object with REST, GraphQL, and release notes data now attached
return req.context
}
module.exports = {
LinksChecker,
buildPathContext,
buildInitialContext
}

Просмотреть файл

@ -1,37 +0,0 @@
const linkinator = require('linkinator')
const checker = new linkinator.LinkChecker()
const { deprecated } = require('../../lib/enterprise-server-releases')
const config = {
path: 'http://localhost:4002/en',
concurrency: 400,
recurse: true,
linksToSkip: [
// Skip any link that is not an internal link
'^((?!http://localhost:4002/en).)*$',
// Skip dist files
'/dist/index.*',
// Skip deprecated Enterprise content
`enterprise(-server@|/)(${deprecated.join('|')})/?`
]
}
describe('page rendering', () => {
jest.setTimeout(1000 * 1000)
let result
beforeAll(async (done) => {
result = (await checker.check(config)).links
done()
})
test('every page has internal links that can be resolved', async () => {
const brokenLinks = result
.filter(link => link.state === 'BROKEN')
.map(link => {
delete link.failureDetails
return link
})
expect(brokenLinks.length, `Found ${brokenLinks.length} total broken links: ${JSON.stringify([...brokenLinks], null, 2)}`).toBe(0)
})
})