зеркало из https://github.com/github/docs.git
Pre-computed pageinfos (#40414)
Co-authored-by: Robert Sese <734194+rsese@users.noreply.github.com>
This commit is contained in:
Родитель
deb114a168
Коммит
173abd1a0c
|
@ -0,0 +1,46 @@
|
|||
name: Warmup pageinfo cache
|
||||
|
||||
description: Run this to create a .pageinfo-cache.json.gz file
|
||||
|
||||
inputs:
|
||||
restore-only:
|
||||
description: Only attempt to restore, don't warm up
|
||||
required: false
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
# The caching technique here is to "unboundedly" add to the cache.
|
||||
# By unboundedly, it means the cached item will grow and grow.
|
||||
# The general idea is that we A) restore from cache, B) replace the
|
||||
# file by running the script, and C) save the file back to cache.
|
||||
# Optionally, you can have it just do A (and not B and C).
|
||||
|
||||
- name: Cache .pageinfo-cache.json.gz (restore)
|
||||
# You can't use a SHA on these. Only possible with `actions/cache@SHA...`
|
||||
uses: actions/cache/restore@v3
|
||||
with:
|
||||
path: .pageinfo-cache.json.gz
|
||||
key: pageinfo-cache-
|
||||
restore-keys: pageinfo-cache-
|
||||
|
||||
# When we use this composite action from the workflows like
|
||||
# Azure Preview Deploy and Azure Production Deploy, we don't have
|
||||
# any Node installed or any of its packages. I.e. we never
|
||||
# run `npm ci` in those actions. For security sake.
|
||||
# So we can't do things that require Node code.
|
||||
# Tests and others will omit the `restore-only` input, but
|
||||
# prepping for Docker build and push, will set it to a non-empty
|
||||
# string which basically means "If you can restore it, great.
|
||||
# If not, that's fine, don't bother".
|
||||
- name: Run script
|
||||
if: ${{ inputs.restore-only == '' }}
|
||||
shell: bash
|
||||
run: npm run precompute-pageinfo
|
||||
|
||||
- name: Cache .remotejson-cache (save)
|
||||
if: ${{ inputs.restore-only == '' }}
|
||||
uses: actions/cache/save@v3
|
||||
with:
|
||||
path: .pageinfo-cache.json.gz
|
||||
key: pageinfo-cache-${{ github.sha }}
|
|
@ -198,6 +198,10 @@ jobs:
|
|||
with:
|
||||
restore-only: true
|
||||
|
||||
- uses: ./.github/actions/precompute-pageinfo
|
||||
with:
|
||||
restore-only: true
|
||||
|
||||
# In addition to making the final image smaller, we also save time by not sending unnecessary files to the docker build context
|
||||
- name: 'Prune for preview env'
|
||||
run: src/workflows/prune-for-preview-env.sh
|
||||
|
|
|
@ -78,6 +78,10 @@ jobs:
|
|||
with:
|
||||
restore-only: true
|
||||
|
||||
- uses: ./.github/actions/precompute-pageinfo
|
||||
with:
|
||||
restore-only: true
|
||||
|
||||
- uses: ./.github/actions/clone-translations
|
||||
with:
|
||||
token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }}
|
||||
|
|
|
@ -1,9 +1,17 @@
|
|||
name: Keep caches warm
|
||||
|
||||
# **What it does**: Makes sure the caching of ./node_modules and ./.next
|
||||
# is kept warm for making pull requests more rapid.
|
||||
# **Why we have it**: A PR workflow that depends on caching can't reuse a
|
||||
# cached artifact acorss PRs unless it also runs on `main`.
|
||||
# **What it does**:
|
||||
# Makes sure the caching of ./node_modules and ./.next is kept warm
|
||||
# for making other pull requests faster.
|
||||
# We also use this workflow to precompute other things so that the
|
||||
# actions cache is warmed up with data available during deployment
|
||||
# actions. When you use actions/cache within a run on `main`
|
||||
# what gets saved can be used by other pull requests. But it's
|
||||
# also so that when we make preview or production deployments,
|
||||
# we can just rely on the cache to already be warmed up.
|
||||
# **Why we have it**:
|
||||
# A PR workflow that depends on caching can't reuse a
|
||||
# cached artifact acorss PRs unless it also runs on `main`.
|
||||
# **Who does it impact**: Docs engineering, open-source engineering contributors.
|
||||
|
||||
on:
|
||||
|
@ -31,6 +39,10 @@ jobs:
|
|||
run: npm run build
|
||||
|
||||
- uses: ./.github/actions/warmup-remotejson-cache
|
||||
if: github.repository == 'github/docs-internal'
|
||||
|
||||
- uses: ./.github/actions/precompute-pageinfo
|
||||
if: github.repository == 'github/docs-internal'
|
||||
|
||||
- uses: ./.github/actions/slack-alert
|
||||
if: ${{ failure() && github.event_name != 'workflow_dispatch' }}
|
||||
|
|
|
@ -170,6 +170,12 @@ jobs:
|
|||
# archived enterprise server URLs.
|
||||
if: ${{ matrix.name == 'redirects' }}
|
||||
|
||||
- uses: ./.github/actions/precompute-pageinfo
|
||||
# Only the 'pageinfo' tests include end-to-end tests about this.
|
||||
if: ${{ matrix.name == 'pageinfo' }}
|
||||
env:
|
||||
ROOT: src/fixtures/fixtures
|
||||
|
||||
- name: Index fixtures into the local Elasticsearch
|
||||
# For the sake of saving time, only run this step if the group
|
||||
# is one that will run tests against an Elasticsearch on localhost.
|
||||
|
|
|
@ -48,3 +48,6 @@ assets/images/help/writing/unordered-list-rendered (1).png
|
|||
|
||||
# Used by getRemoteJSON()
|
||||
.remotejson-cache/
|
||||
|
||||
# Used by precompute-pageinfo
|
||||
.pageinfo-cache.json.br
|
||||
|
|
|
@ -47,6 +47,8 @@ FROM all_deps as builder
|
|||
COPY src ./src
|
||||
# The star is because it's an optional directory
|
||||
COPY .remotejson-cache* ./.remotejson-cache
|
||||
# The star is because it's an optional file
|
||||
COPY .pageinfo-cache.json.gz* ./.pageinfo-cache.json.gz
|
||||
# Certain content is necessary for being able to build
|
||||
COPY content/index.md ./content/index.md
|
||||
COPY content/rest ./content/rest
|
||||
|
@ -88,6 +90,7 @@ COPY --chown=node:node assets ./assets
|
|||
COPY --chown=node:node content ./content
|
||||
COPY --chown=node:node src ./src
|
||||
COPY --chown=node:node .remotejson-cache* ./.remotejson-cache
|
||||
COPY --chown=node:node .pageinfo-cache.json* ./.pageinfo-cache.json
|
||||
COPY --chown=node:node data ./data
|
||||
COPY --chown=node:node next.config.js ./
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
"playwright-test": "playwright test --config src/fixtures/playwright.config.ts --project=\"Google Chrome\"",
|
||||
"post-lints": "node src/content-linter/scripts/post-lints.js",
|
||||
"postinstall": "cp package-lock.json .installed.package-lock.json && echo \"Updated .installed.package-lock.json\" # see husky/post-checkout and husky/post-merge",
|
||||
"precompute-pageinfo": "node src/pageinfo/scripts/precompute-pageinfo.js",
|
||||
"prepare": "husky install src/workflows/husky",
|
||||
"prettier": "prettier -w \"**/*.{ts,tsx,js,mjs,scss,yml,yaml}\"",
|
||||
"prettier-check": "prettier -c \"**/*.{ts,tsx,js,mjs,scss,yml,yaml}\"",
|
||||
|
|
|
@ -13,9 +13,18 @@ import contextualize from '#src/frame/middleware/context/context.js'
|
|||
import features from '#src/versions/middleware/features.js'
|
||||
import getRedirect from '#src/redirects/lib/get-redirect.js'
|
||||
import { isArchivedVersionByPath } from '#src/archives/lib/is-archived-version.js'
|
||||
import { readCompressedJsonFile } from '#src/frame/lib/read-json-file.js'
|
||||
|
||||
const router = express.Router()
|
||||
|
||||
// If you have pre-computed page info into a JSON file on disk, this is
|
||||
// where it would be expected to be found.
|
||||
// Note that if the file does not exist, it will be ignored and
|
||||
// every pageinfo is computed every time.
|
||||
// Note! The only reason this variable is exported is so that
|
||||
// it can be imported by the script scripts/precompute-pageinfo.js
|
||||
export const CACHE_FILE_PATH = '.pageinfo-cache.json.br'
|
||||
|
||||
const validationMiddleware = (req, res, next) => {
|
||||
const { pathname } = req.query
|
||||
if (!pathname) {
|
||||
|
@ -83,6 +92,90 @@ const pageinfoMiddleware = (req, res, next) => {
|
|||
return next()
|
||||
}
|
||||
|
||||
export async function getPageInfo(page, pathname) {
|
||||
const renderingReq = {
|
||||
path: pathname,
|
||||
language: page.languageCode,
|
||||
pagePath: pathname,
|
||||
cookies: {},
|
||||
}
|
||||
const next = () => {}
|
||||
const res = {}
|
||||
await contextualize(renderingReq, res, next)
|
||||
await shortVersions(renderingReq, res, next)
|
||||
renderingReq.context.page = page
|
||||
await features(renderingReq, res, next)
|
||||
const context = renderingReq.context
|
||||
|
||||
const title = await page.renderProp('title', context, { textOnly: true })
|
||||
const intro = await page.renderProp('intro', context, { textOnly: true })
|
||||
|
||||
let productPage = null
|
||||
for (const permalink of page.permalinks) {
|
||||
const rootHref = permalink.href
|
||||
.split('/')
|
||||
.slice(0, permalink.pageVersion === 'free-pro-team@latest' ? 3 : 4)
|
||||
.join('/')
|
||||
const rootPage = context.pages[rootHref]
|
||||
if (rootPage) {
|
||||
productPage = rootPage
|
||||
break
|
||||
}
|
||||
}
|
||||
const product = productPage ? await getProductPageInfo(productPage, context) : ''
|
||||
|
||||
return { title, intro, product }
|
||||
}
|
||||
|
||||
const _productPageCache = {}
|
||||
// The title of the product is much easier to cache because it's often
|
||||
// repeated. What determines the title of the product is the language
|
||||
// and the version. A lot of pages have the same title for the product.
|
||||
async function getProductPageInfo(page, context) {
|
||||
const cacheKey = `${page.relativePath}:${context.currentVersion}:${context.currentLanguage}`
|
||||
if (!(cacheKey in _productPageCache)) {
|
||||
const title =
|
||||
(await page.renderProp('shortTitle', context, {
|
||||
textOnly: true,
|
||||
})) ||
|
||||
(await page.renderProp('title', context, {
|
||||
textOnly: true,
|
||||
}))
|
||||
_productPageCache[cacheKey] = title
|
||||
}
|
||||
return _productPageCache[cacheKey]
|
||||
}
|
||||
|
||||
let _cache = null
|
||||
async function getPageInfoFromCache(page, pathname) {
|
||||
if (_cache === null) {
|
||||
try {
|
||||
_cache = readCompressedJsonFile(CACHE_FILE_PATH)
|
||||
} catch (error) {
|
||||
if (error.code !== 'ENOENT') {
|
||||
throw error
|
||||
}
|
||||
_cache = {}
|
||||
}
|
||||
}
|
||||
|
||||
let info = _cache[pathname]
|
||||
if (!info) {
|
||||
info = await getPageInfo(page, pathname)
|
||||
// You might wonder; why do we not store this compute information
|
||||
// into the `_cache` from here?
|
||||
// The short answer is; it won't be used again.
|
||||
// In production, which is the only place where performance matters,
|
||||
// a HTTP GET request will only happen once per deployment. That's
|
||||
// because the CDN will cache it until the next deployment (which is
|
||||
// followed by a CDN purge).
|
||||
// In development (local preview), the performance doesn't really matter.
|
||||
// In CI, we use the caching because the CI runs
|
||||
// `npm run precompute-pageinfo` right before it runs jest tests.
|
||||
}
|
||||
return info
|
||||
}
|
||||
|
||||
router.get(
|
||||
'/v1',
|
||||
validationMiddleware,
|
||||
|
@ -113,51 +206,7 @@ router.get(
|
|||
throw new Error(`pathname '${pathname}' not one of the page's permalinks`)
|
||||
}
|
||||
|
||||
const renderingReq = {
|
||||
path: pathname,
|
||||
language: page.languageCode,
|
||||
pagePath: pathname,
|
||||
cookies: {},
|
||||
}
|
||||
const next = () => {}
|
||||
await contextualize(renderingReq, res, next)
|
||||
await shortVersions(renderingReq, res, next)
|
||||
renderingReq.context.page = page
|
||||
await features(renderingReq, res, next)
|
||||
const context = renderingReq.context
|
||||
|
||||
const title = await page.renderProp('title', context, { textOnly: true })
|
||||
const intro = await page.renderProp('intro', context, { textOnly: true })
|
||||
|
||||
let productPage = null
|
||||
for (const permalink of page.permalinks) {
|
||||
const rootHref = permalink.href
|
||||
.split('/')
|
||||
.slice(0, permalink.pageVersion === 'free-pro-team@latest' ? 3 : 4)
|
||||
.join('/')
|
||||
const rootPage = context.pages[rootHref]
|
||||
if (rootPage) {
|
||||
productPage = rootPage
|
||||
break
|
||||
}
|
||||
}
|
||||
let product = ''
|
||||
if (productPage) {
|
||||
product = await productPage.renderProp('shortTitle', context, {
|
||||
textOnly: true,
|
||||
})
|
||||
if (!product) {
|
||||
product = await productPage.renderProp('title', context, {
|
||||
textOnly: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const info = {
|
||||
product,
|
||||
title,
|
||||
intro,
|
||||
}
|
||||
const info = await getPageInfoFromCache(page, pathname)
|
||||
|
||||
const tags = [
|
||||
// According to https://docs.datadoghq.com/getting_started/tagging/#define-tags
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* This script gathers all English pages, computes each page's
|
||||
* 'title', 'intro' and 'product' properties. These things are then stored
|
||||
* in a JSON file (and gzipped) on disk. Then, the pageinfo middleware
|
||||
* can load in that JSON file to have a cache of pageinfo for all English
|
||||
* pages.
|
||||
* Now, when someone requests `/api/pageinfo?pathname=/en/foo/bar`, for the
|
||||
* backend, it just needs to read from a precomputed cache file instead
|
||||
* of having to do this computation on every request. Time saved, up front.
|
||||
*
|
||||
* Why cache?: Despite being a fast computation (3 Liquid + Markdown renders),
|
||||
* it still adds up. And it's safe and cheap to precompute in advance.
|
||||
*
|
||||
* Why only the English?: To make the file not too large.
|
||||
* Given how good these things compress, we might consider, in the
|
||||
* future, to do all languages.
|
||||
*
|
||||
* Why brotli?: Because the file gets included in the Docker container and
|
||||
* there every byte counts.
|
||||
*
|
||||
* When is this script run?: On every push to main, it gets computed
|
||||
* and uses actions/cache to store the result. Meaning, it's not run
|
||||
* during deployment. (During the deploy it only *downloads* from
|
||||
* actions/cache).
|
||||
*/
|
||||
|
||||
import fs from 'fs'
|
||||
import { brotliCompressSync } from 'zlib'
|
||||
|
||||
import { loadPages, loadUnversionedTree } from '#src/frame/lib/page-data.js'
|
||||
import { CACHE_FILE_PATH, getPageInfo } from '../middleware.js'
|
||||
|
||||
main()
|
||||
|
||||
const CI = Boolean(JSON.parse(process.env.CI || 'false'))
|
||||
|
||||
async function main() {
|
||||
const unversionedTree = await loadUnversionedTree(['en'])
|
||||
const pageList = await loadPages(unversionedTree, ['en'])
|
||||
|
||||
let label = `Compute pageinfos for ${pageList.length.toLocaleString()} pages`
|
||||
console.time(label)
|
||||
const pageinfos = {}
|
||||
for (const page of pageList) {
|
||||
const pathname = page.permalinks[0].href
|
||||
try {
|
||||
const computed = await getPageInfo(page, pathname)
|
||||
if (computed) {
|
||||
pageinfos[pathname] = computed
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error computing pageinfo for ${page.fullPath} (${pathname})`)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
console.timeEnd(label)
|
||||
|
||||
label = `Serialize, compress, and write to ${CACHE_FILE_PATH}`
|
||||
console.time(label)
|
||||
const payload = CI ? JSON.stringify(pageinfos) : JSON.stringify(pageinfos, null, 2)
|
||||
const payloadBuffer = Buffer.from(payload, 'utf-8')
|
||||
const payloadCompressed = brotliCompressSync(payloadBuffer)
|
||||
fs.writeFileSync(CACHE_FILE_PATH, payloadCompressed)
|
||||
console.timeEnd(label)
|
||||
console.log(
|
||||
`Wrote ${Object.keys(pageinfos).length.toLocaleString()} pageinfos to ${CACHE_FILE_PATH}`,
|
||||
)
|
||||
}
|
Загрузка…
Ссылка в новой задаче