Speed up warmServer by loading pages (and files) asynchronously (#16752)

* Async `new Page`

* Update pages.js

* Update pages.js

* Update pages.js

* Update pages.js

* Update pages.js

* Using mapLimit

* Update pages.js

* Test updates

* Update pages.js

* Move exists to page class

* Test fixes

* Slightly faster localized paths process
This commit is contained in:
Kevin Heis 2020-12-09 08:57:18 -08:00 коммит произвёл GitHub
Родитель ee7c1bce3d
Коммит 1d5e216404
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
10 изменённых файлов: 142 добавлений и 103 удалений

Просмотреть файл

@ -1,5 +1,5 @@
const assert = require('assert')
const fs = require('fs')
const fs = require('fs').promises
const path = require('path')
const cheerio = require('cheerio')
const patterns = require('./patterns')
@ -23,15 +23,30 @@ const slash = require('slash')
const statsd = require('./statsd')
class Page {
constructor (opts) {
static async init (opts) {
assert(opts.relativePath, 'relativePath is required')
assert(opts.basePath, 'basePath is required')
const relativePath = slash(opts.relativePath)
const fullPath = slash(path.join(opts.basePath, relativePath))
const raw = await fs.readFile(fullPath, 'utf8')
return new Page({ ...opts, relativePath, fullPath, raw })
}
static async exists (path) {
try {
return await fs.stat(path)
} catch (err) {
if (err.code === 'ENOENT') return false
console.error(err)
}
}
constructor (opts) {
assert(opts.languageCode, 'languageCode is required')
Object.assign(this, { ...opts })
this.relativePath = slash(this.relativePath)
this.fullPath = slash(path.join(this.basePath, this.relativePath))
this.raw = fs.readFileSync(this.fullPath, 'utf8')
// TODO remove this when crowdin-support issue 66 has been resolved
if (this.languageCode !== 'en' && this.raw.includes(': verdadero')) {

Просмотреть файл

@ -2,42 +2,50 @@ const path = require('path')
const walk = require('walk-sync').entries
const Page = require('./page')
const languages = require('./languages')
const fs = require('fs')
const { mapLimit, filterLimit } = require('async')
const FILE_READ_LIMIT = 500
async function loadPageList () {
const pageList = []
// load english pages
const englishPath = path.join(__dirname, '..', languages.en.dir, 'content')
const englishPages = walk(englishPath)
.filter(({ relativePath }) => {
return relativePath.endsWith('.md') &&
!relativePath.includes('README')
})
.map(fileData => new Page({ ...fileData, languageCode: languages.en.code }))
const englishPaths = walk(englishPath)
.filter(({ relativePath }) =>
relativePath.endsWith('.md') && !relativePath.includes('README')
)
const englishPages = await mapLimit(
englishPaths,
FILE_READ_LIMIT,
async fileData => await Page.init({ ...fileData, languageCode: languages.en.code })
)
pageList.push(...englishPages)
// load matching pages in other languages
for (const page of englishPages) {
for (const language of Object.values(languages)) {
if (language.code === 'en') continue
let localizedPaths = Object.values(languages)
.filter(({ code }) => code !== 'en')
.map(language => {
const basePath = path.join(__dirname, '..', language.dir, 'content')
const localizedPath = path.join(basePath, page.relativePath)
try {
fs.statSync(localizedPath)
} catch (_) {
continue
}
pageList.push(new Page({
relativePath: page.relativePath,
return englishPages.map(page => ({
basePath,
relativePath: page.relativePath,
localizedPath: path.join(basePath, page.relativePath),
languageCode: language.code
}))
}
}
})
.flat()
localizedPaths = await filterLimit(
localizedPaths,
FILE_READ_LIMIT,
async ({ localizedPath }) => Page.exists(localizedPath)
)
const localizedPages = await mapLimit(
localizedPaths,
FILE_READ_LIMIT,
async ({ basePath, relativePath, languageCode }) =>
await Page.init({ basePath, relativePath, languageCode })
)
pageList.push(...localizedPages)
return pageList
}

Просмотреть файл

@ -3,6 +3,8 @@ const sleep = require('await-sleep')
const querystring = require('querystring')
describe('homepage', () => {
jest.setTimeout(60 * 1000)
test('should be titled "GitHub Documentation"', async () => {
await page.goto('http://localhost:4001')
await expect(page.title()).resolves.toMatch('GitHub Documentation')
@ -10,6 +12,8 @@ describe('homepage', () => {
})
describe('algolia browser search', () => {
jest.setTimeout(60 * 1000)
it('works on the homepage', async () => {
await page.goto('http://localhost:4001/en')
await page.click('#search-input-container input[type="search"]')

Просмотреть файл

@ -4,6 +4,8 @@ const ignoredPagePaths = config.files[0].ignore
const ignoredDataPaths = config.files[2].ignore
describe('crowdin.yml config file', () => {
jest.setTimeout(60 * 1000)
let pages
beforeAll(async (done) => {
pages = await loadPages()

Просмотреть файл

@ -7,6 +7,8 @@ const fs = require('fs').promises
const path = require('path')
describe('data references', () => {
jest.setTimeout(60 * 1000)
let data, pages
beforeAll(async (done) => {

Просмотреть файл

@ -17,8 +17,8 @@ describe('redirects', () => {
done()
})
test('page.redirects is an array', () => {
const page = new Page({
test('page.redirects is an array', async () => {
const page = await Page.init({
relativePath: 'github/collaborating-with-issues-and-pull-requests/about-branches.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'
@ -26,8 +26,8 @@ describe('redirects', () => {
expect(isPlainObject(page.redirects)).toBe(true)
})
test('dotcom homepage page.redirects', () => {
const page = new Page({
test('dotcom homepage page.redirects', async () => {
const page = await Page.init({
relativePath: 'github/index.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'
@ -41,7 +41,7 @@ describe('redirects', () => {
})
test('converts single `redirect_from` strings values into arrays', async () => {
const page = new Page({
const page = await Page.init({
relativePath: 'github/collaborating-with-issues-and-pull-requests/about-conversations-on-github.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'

Просмотреть файл

@ -8,7 +8,7 @@ describe('find page', () => {
jest.setTimeout(1000 * 1000)
test('falls back to the English page if it can\'t find a localized page', async () => {
const page = new Page({
const page = await Page.init({
relativePath: 'page-that-does-not-exist-in-translations-dir.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
@ -24,7 +24,7 @@ describe('find page', () => {
})
test('follows redirects', async () => {
const page = new Page({
const page = await Page.init({
relativePath: 'page-with-redirects.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'

Просмотреть файл

@ -5,6 +5,8 @@ const { set } = require('lodash')
const nonEnterpriseDefaultVersion = require('../../lib/non-enterprise-default-version')
describe('liquid helper tags', () => {
jest.setTimeout(60 * 1000)
const context = {}
let pageMap
beforeAll(async (done) => {

Просмотреть файл

@ -15,14 +15,14 @@ const opts = {
}
describe('Page class', () => {
test('preserves file path info', () => {
const page = new Page(opts)
test('preserves file path info', async () => {
const page = await Page.init(opts)
expect(page.relativePath).toBe('github/collaborating-with-issues-and-pull-requests/about-branches.md')
expect(page.fullPath.includes(page.relativePath)).toBe(true)
})
test('does not error out on translated TOC with no links', () => {
const page = new Page({
test('does not error out on translated TOC with no links', async () => {
const page = await Page.init({
relativePath: 'translated-toc-with-no-links-index.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'ja'
@ -31,30 +31,34 @@ describe('Page class', () => {
})
describe('showMiniToc page property', () => {
const article = new Page({
relativePath: 'sample-article.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
})
let article, articleWithFM, tocPage, mapTopic
const articleWithFM = new Page({
showMiniToc: false,
relativePath: article.relativePath,
basePath: article.basePath,
languageCode: article.languageCode
})
beforeAll(async () => {
article = await Page.init({
relativePath: 'sample-article.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
})
const tocPage = new Page({
relativePath: 'sample-toc-index.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
})
articleWithFM = await Page.init({
showMiniToc: false,
relativePath: article.relativePath,
basePath: article.basePath,
languageCode: article.languageCode
})
const mapTopic = new Page({
mapTopic: true,
relativePath: article.relativePath,
basePath: article.basePath,
languageCode: article.languageCode
tocPage = await Page.init({
relativePath: 'sample-toc-index.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
})
mapTopic = await Page.init({
mapTopic: true,
relativePath: article.relativePath,
basePath: article.basePath,
languageCode: article.languageCode
})
})
test('is true by default on articles', () => {
@ -76,7 +80,7 @@ describe('Page class', () => {
describe('page.render(context)', () => {
test('rewrites links to include the current language prefix and version', async () => {
const page = new Page(opts)
const page = await Page.init(opts)
const context = {
page: { version: nonEnterpriseDefaultVersion },
currentVersion: nonEnterpriseDefaultVersion,
@ -99,7 +103,7 @@ describe('Page class', () => {
})
test('rewrites links in the intro to include the current language prefix and version', async () => {
const page = new Page(opts)
const page = await Page.init(opts)
page.rawIntro = '[Pull requests](/articles/about-pull-requests)'
const context = {
page: { version: nonEnterpriseDefaultVersion },
@ -114,7 +118,7 @@ describe('Page class', () => {
})
test('does not rewrite links that include deprecated enterprise release numbers', async () => {
const page = new Page({
const page = await Page.init({
relativePath: 'admin/enterprise-management/migrating-from-github-enterprise-1110x-to-2123.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'
@ -133,7 +137,7 @@ describe('Page class', () => {
})
test('does not rewrite links to external redirects', async () => {
const page = new Page(opts)
const page = await Page.init(opts)
page.markdown = `${page.markdown}\n\nSee [Capistrano](/capistrano).`
const context = {
page: { version: nonEnterpriseDefaultVersion },
@ -150,7 +154,7 @@ describe('Page class', () => {
// But they don't have access to our currently supported versions, which we're testing here.
// This test ensures that this works as expected: {% if enterpriseServerVersions contains currentVersion %}
test('renders the expected Enterprise Server versioned content', async () => {
const page = new Page({
const page = await Page.init({
relativePath: 'page-versioned-for-all-enterprise-releases.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
@ -184,27 +188,27 @@ describe('Page class', () => {
})
})
test('preserves `languageCode`', () => {
const page = new Page(opts)
test('preserves `languageCode`', async () => {
const page = await Page.init(opts)
expect(page.languageCode).toBe('en')
})
test('parentProductId getter', () => {
let page = new Page({
test('parentProductId getter', async () => {
let page = await Page.init({
relativePath: 'github/some-category/some-article.md',
basePath: path.join(__dirname, '../fixtures/products'),
languageCode: 'en'
})
expect(page.parentProductId).toBe('github')
page = new Page({
page = await Page.init({
relativePath: 'actions/some-category/some-article.md',
basePath: path.join(__dirname, '../fixtures/products'),
languageCode: 'en'
})
expect(page.parentProductId).toBe('actions')
page = new Page({
page = await Page.init({
relativePath: 'admin/some-category/some-article.md',
basePath: path.join(__dirname, '../fixtures/products'),
languageCode: 'en'
@ -213,26 +217,26 @@ describe('Page class', () => {
})
describe('permalinks', () => {
test('is an array', () => {
const page = new Page(opts)
test('is an array', async () => {
const page = await Page.init(opts)
expect(Array.isArray(page.permalinks)).toBe(true)
})
test('has a key for every supported enterprise version (and no deprecated versions)', () => {
const page = new Page(opts)
test('has a key for every supported enterprise version (and no deprecated versions)', async () => {
const page = await Page.init(opts)
const pageVersions = page.permalinks.map(permalink => permalink.pageVersion)
expect(enterpriseServerReleases.supported.every(version => pageVersions.includes(`enterprise-server@${version}`))).toBe(true)
expect(enterpriseServerReleases.deprecated.every(version => !pageVersions.includes(`enterprise-server@${version}`))).toBe(true)
})
test('sets versioned values', () => {
const page = new Page(opts)
test('sets versioned values', async () => {
const page = await Page.init(opts)
expect(page.permalinks.find(permalink => permalink.pageVersion === nonEnterpriseDefaultVersion).href).toBe(`/en/${nonEnterpriseDefaultVersion}/github/collaborating-with-issues-and-pull-requests/about-branches`)
expect(page.permalinks.find(permalink => permalink.pageVersion === `enterprise-server@${enterpriseServerReleases.oldestSupported}`).href).toBe(`/en/enterprise-server@${enterpriseServerReleases.oldestSupported}/github/collaborating-with-issues-and-pull-requests/about-branches`)
})
test('homepage permalinks', () => {
const page = new Page({
test('homepage permalinks', async () => {
const page = await Page.init({
relativePath: 'index.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'
@ -242,8 +246,8 @@ describe('Page class', () => {
expect(page.permalinks.find(permalink => permalink.pageVersion === 'homepage').href).toBe('/en')
})
test('permalinks for dotcom-only pages', () => {
const page = new Page({
test('permalinks for dotcom-only pages', async () => {
const page = await Page.init({
relativePath: 'github/getting-started-with-github/signing-up-for-a-new-github-account.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'
@ -252,8 +256,8 @@ describe('Page class', () => {
expect(page.permalinks.length).toBe(1)
})
test('permalinks for enterprise-only pages', () => {
const page = new Page({
test('permalinks for enterprise-only pages', async () => {
const page = await Page.init({
relativePath: 'products/admin/some-category/some-article.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
@ -264,8 +268,8 @@ describe('Page class', () => {
expect(pageVersions.includes(nonEnterpriseDefaultVersion)).toBe(false)
})
test('permalinks for non-GitHub.com products without Enterprise versions', () => {
const page = new Page({
test('permalinks for non-GitHub.com products without Enterprise versions', async () => {
const page = await Page.init({
relativePath: 'products/actions/some-category/some-article.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
@ -274,8 +278,8 @@ describe('Page class', () => {
expect(page.permalinks.length).toBe(1)
})
test('permalinks for non-GitHub.com products with Enterprise versions', () => {
const page = new Page({
test('permalinks for non-GitHub.com products with Enterprise versions', async () => {
const page = await Page.init({
relativePath: '/insights/installing-and-configuring-github-insights/about-github-insights.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'
@ -318,7 +322,7 @@ describe('Page class', () => {
})
test('fixes translated frontmatter that includes verdadero', async () => {
const page = new Page({
const page = await Page.init({
relativePath: 'article-with-mislocalized-frontmatter.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'ja'
@ -333,7 +337,7 @@ describe('Page class', () => {
// Note this test will go out of date when we deprecate 2.20
test('pages that apply to newer enterprise versions', async () => {
const page = new Page({
const page = await Page.init({
relativePath: 'github/administering-a-repository/comparing-releases.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'
@ -343,7 +347,7 @@ describe('Page class', () => {
})
test('index page', async () => {
const page = new Page({
const page = await Page.init({
relativePath: 'index.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'
@ -352,7 +356,7 @@ describe('Page class', () => {
})
test('enterprise admin index page', async () => {
const page = new Page({
const page = await Page.init({
relativePath: 'admin/index.md',
basePath: path.join(__dirname, '../../content'),
languageCode: 'en'
@ -366,50 +370,50 @@ describe('Page class', () => {
describe('catches errors thrown in Page class', () => {
test('frontmatter parsing error', () => {
function getPage () {
return new Page({
async function getPage () {
return await Page.init({
relativePath: 'page-with-frontmatter-error.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
})
}
expect(getPage).toThrowError('invalid frontmatter entry')
expect(getPage).rejects.toThrowError('invalid frontmatter entry')
})
test('missing versions frontmatter', () => {
function getPage () {
return new Page({
async function getPage () {
return await Page.init({
relativePath: 'page-with-missing-product-versions.md',
basePath: path.join(__dirname, '../fixtures'),
languageCode: 'en'
})
}
expect(getPage).toThrowError('versions')
expect(getPage).rejects.toThrowError('versions')
})
test('English page with a version in frontmatter that its parent product is not available in', () => {
function getPage () {
return new Page({
async function getPage () {
return await Page.init({
relativePath: 'admin/some-category/some-article-with-mismatched-versions-frontmatter.md',
basePath: path.join(__dirname, '../fixtures/products'),
languageCode: 'en'
})
}
expect(getPage).toThrowError(/`versions` frontmatter.*? product is not available in/)
expect(getPage).rejects.toThrowError(/`versions` frontmatter.*? product is not available in/)
})
test('non-English page with a version in frontmatter that its parent product is not available in', () => {
function getPage () {
return new Page({
async function getPage () {
return await Page.init({
relativePath: 'admin/some-category/some-article-with-mismatched-versions-frontmatter.md',
basePath: path.join(__dirname, '../fixtures/products'),
languageCode: 'es'
})
}
expect(getPage).toThrowError(/`versions` frontmatter.*? product is not available in/)
expect(getPage).rejects.toThrowError(/`versions` frontmatter.*? product is not available in/)
})
})

Просмотреть файл

@ -10,6 +10,8 @@ const entities = new Entities()
const { chain, difference } = require('lodash')
describe('pages module', () => {
jest.setTimeout(60 * 1000)
let pages
beforeAll(async (done) => {