зеркало из https://github.com/github/docs.git
Merge branch 'main' into repo-sync
This commit is contained in:
Коммит
cb5e9c19d0
|
@ -153,6 +153,15 @@ jobs:
|
||||||
- name: Run build script
|
- name: Run build script
|
||||||
run: npm run build
|
run: npm run build
|
||||||
|
|
||||||
|
- name: Disk cache used by getRemoteJSON function in middleware
|
||||||
|
uses: actions/cache@9b0c1fce7a93df8e3bb8926b0d6e9d89e92f20a7
|
||||||
|
with:
|
||||||
|
path: .remotejson-cache
|
||||||
|
# Very liberal cache key. Note, for this to become populated
|
||||||
|
# for other branches, you have to manually run this workflow
|
||||||
|
# at least once using the "Run workflow" button.
|
||||||
|
key: ${{ runner.os }}-remotejson
|
||||||
|
|
||||||
- name: Index fixtures into the local Elasticsearch
|
- name: Index fixtures into the local Elasticsearch
|
||||||
# For the sake of saving time, only run this step if the group
|
# For the sake of saving time, only run this step if the group
|
||||||
# is one that will run tests against an Elasticsearch on localhost.
|
# is one that will run tests against an Elasticsearch on localhost.
|
||||||
|
|
|
@ -44,3 +44,6 @@ semmle-code
|
||||||
|
|
||||||
.installed.package-lock.json
|
.installed.package-lock.json
|
||||||
assets/images/help/writing/unordered-list-rendered (1).png
|
assets/images/help/writing/unordered-list-rendered (1).png
|
||||||
|
|
||||||
|
# Used by getRemoteJSON()
|
||||||
|
.remotejson-cache/
|
||||||
|
|
|
@ -16,6 +16,7 @@ import { readCompressedJsonFileFallbackLazily } from '../lib/read-json-file.js'
|
||||||
import { archivedCacheControl, languageCacheControl } from './cache-control.js'
|
import { archivedCacheControl, languageCacheControl } from './cache-control.js'
|
||||||
import { pathLanguagePrefixed, languagePrefixPathRegex } from '../lib/languages.js'
|
import { pathLanguagePrefixed, languagePrefixPathRegex } from '../lib/languages.js'
|
||||||
import getRedirect, { splitPathByLanguage } from '../lib/get-redirect.js'
|
import getRedirect, { splitPathByLanguage } from '../lib/get-redirect.js'
|
||||||
|
import getRemoteJSON from './get-remote-json.js'
|
||||||
|
|
||||||
const REMOTE_ENTERPRISE_STORAGE_URL = 'https://githubdocs.azureedge.net/enterprise'
|
const REMOTE_ENTERPRISE_STORAGE_URL = 'https://githubdocs.azureedge.net/enterprise'
|
||||||
|
|
||||||
|
@ -75,29 +76,6 @@ const retryConfiguration = { limit: 3 }
|
||||||
// unnecessary error reporting.
|
// unnecessary error reporting.
|
||||||
const timeoutConfiguration = { response: 1500 }
|
const timeoutConfiguration = { response: 1500 }
|
||||||
|
|
||||||
async function getRemoteJSON(url, config) {
|
|
||||||
let fromCache = true
|
|
||||||
if (!_getRemoteJSONCache.has(url)) {
|
|
||||||
fromCache = false
|
|
||||||
// got will, by default, follow redirects and it will throw if the ultimate
|
|
||||||
// response is not a 2xx.
|
|
||||||
// But it's possible that the page is a 200 OK but it's just not a JSON
|
|
||||||
// page at all. Then we can't assume we can deserialize it.
|
|
||||||
const res = await got(url, config)
|
|
||||||
if (!res.headers['content-type'].startsWith('application/json')) {
|
|
||||||
throw new Error(
|
|
||||||
`Fetching '${url}' resulted in a non-JSON response (${res.headers['content-type']})`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
_getRemoteJSONCache.set(url, JSON.parse(res.body))
|
|
||||||
}
|
|
||||||
const tags = [`url:${url}`, `from_cache:${fromCache}`]
|
|
||||||
statsd.increment('middleware.archived_get_remote_json', 1, tags)
|
|
||||||
return _getRemoteJSONCache.get(url)
|
|
||||||
}
|
|
||||||
const _getRemoteJSONCache = new Map()
|
|
||||||
|
|
||||||
// This module handles requests for deprecated GitHub Enterprise versions
|
// This module handles requests for deprecated GitHub Enterprise versions
|
||||||
// by routing them to static content in help-docs-archived-enterprise-versions
|
// by routing them to static content in help-docs-archived-enterprise-versions
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,87 @@
|
||||||
|
import path from 'path'
|
||||||
|
import fs from 'fs'
|
||||||
|
import crypto from 'crypto'
|
||||||
|
|
||||||
|
import got from 'got'
|
||||||
|
import statsd from '../lib/statsd.js'
|
||||||
|
|
||||||
|
// The only reason this is exported is for the sake of the unit tests'
|
||||||
|
// ability to test in-memory miss after purging this with a mutation
|
||||||
|
export const cache = new Map()
|
||||||
|
|
||||||
|
const inProd = process.env.NODE_ENV === 'production'
|
||||||
|
|
||||||
|
// Wrapper on `got()` that is able to both cache in memory and on disk.
|
||||||
|
// The on-disk caching is in `.remotejson/`.
|
||||||
|
// We use this for downloading `redirects.json` files from the
|
||||||
|
// help-docs-archived-enterprise-versions repo as a proxy. A lot of those
|
||||||
|
// .json files are large and they're also static which makes them
|
||||||
|
// ideal for caching.
|
||||||
|
// Note that there's 2 layers of caching here:
|
||||||
|
// 1. Is it in memory cache?
|
||||||
|
// 2. No, is it on disk?
|
||||||
|
// 3. No, download from the internet then store responses in memory and disk
|
||||||
|
export default async function getRemoteJSON(url, config) {
|
||||||
|
// We could get fancy and make the cache key depend on the `config` too
|
||||||
|
// given that this is A) only used for archived enterprise stuff,
|
||||||
|
// and B) the config is only applicable on cache miss when doing the `got()`.
|
||||||
|
const cacheKey = url
|
||||||
|
|
||||||
|
// Assume it's in the in-memory cache first.
|
||||||
|
// Later we'll update this if we find we need to.
|
||||||
|
let fromCache = 'memory'
|
||||||
|
|
||||||
|
if (!cache.has(cacheKey)) {
|
||||||
|
fromCache = 'not'
|
||||||
|
|
||||||
|
let foundOnDisk = false
|
||||||
|
const tempFilename = crypto.createHash('md5').update(url).digest('hex')
|
||||||
|
|
||||||
|
// Do this here instead of at the top of the file so that it becomes
|
||||||
|
// possible to override this in unit tests.
|
||||||
|
const ROOT = process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT || '.remotejson-cache'
|
||||||
|
|
||||||
|
const onDisk = path.join(ROOT, `${tempFilename}.json`)
|
||||||
|
// Never even try reading from disk in production.
|
||||||
|
if (!inProd && fs.existsSync(onDisk)) {
|
||||||
|
const body = fs.readFileSync(onDisk, 'utf-8')
|
||||||
|
// It might exist on disk, but it could be empty
|
||||||
|
if (body) {
|
||||||
|
try {
|
||||||
|
// It might be corrupted JSON.
|
||||||
|
cache.set(cacheKey, JSON.parse(body))
|
||||||
|
fromCache = 'disk'
|
||||||
|
foundOnDisk = true
|
||||||
|
} catch (error) {
|
||||||
|
if (!(error instanceof SyntaxError)) {
|
||||||
|
throw error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!foundOnDisk) {
|
||||||
|
// got will, by default, follow redirects and it will throw if the ultimate
|
||||||
|
// response is not a 2xx.
|
||||||
|
// But it's possible that the page is a 200 OK but it's just not a JSON
|
||||||
|
// page at all. Then we can't assume we can deserialize it.
|
||||||
|
const res = await got(url, config)
|
||||||
|
if (!res.headers['content-type'].startsWith('application/json')) {
|
||||||
|
throw new Error(
|
||||||
|
`Fetching '${url}' resulted in a non-JSON response (${res.headers['content-type']})`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
cache.set(cacheKey, JSON.parse(res.body))
|
||||||
|
|
||||||
|
// Only write to disk for testing and local preview.
|
||||||
|
// In production, we never write to disk. Only in-memory.
|
||||||
|
if (!inProd) {
|
||||||
|
fs.mkdirSync(path.dirname(onDisk), { recursive: true })
|
||||||
|
fs.writeFileSync(onDisk, res.body, 'utf-8')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const tags = [`url:${url}`, `from_cache:${fromCache}`]
|
||||||
|
statsd.increment('middleware.get_remote_json', 1, tags)
|
||||||
|
return cache.get(cacheKey)
|
||||||
|
}
|
|
@ -0,0 +1,115 @@
|
||||||
|
import fs from 'fs'
|
||||||
|
import path from 'path'
|
||||||
|
import os from 'os'
|
||||||
|
|
||||||
|
import rimraf from 'rimraf'
|
||||||
|
import { expect, test, describe, beforeAll, afterAll } from '@jest/globals'
|
||||||
|
import nock from 'nock'
|
||||||
|
import getRemoteJSON, { cache } from '../../middleware/get-remote-json.js'
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* These unit tests test that the in-memory cache works and when it's
|
||||||
|
* not a cache it, it can benefit from using the disk cache.
|
||||||
|
*/
|
||||||
|
|
||||||
|
describe('getRemoteJSON', () => {
|
||||||
|
const envVarValueBefore = process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT
|
||||||
|
const tempDir = path.join(os.tmpdir(), 'remotejson-test')
|
||||||
|
|
||||||
|
beforeAll(() => {
|
||||||
|
process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT = tempDir
|
||||||
|
})
|
||||||
|
|
||||||
|
afterAll(() => {
|
||||||
|
process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT = envVarValueBefore
|
||||||
|
rimraf.sync(tempDir)
|
||||||
|
})
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
nock.cleanAll()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('simple in-memory caching', async () => {
|
||||||
|
const url = 'http://example.com/redirects.json'
|
||||||
|
const { origin, pathname } = new URL(url)
|
||||||
|
nock(origin).get(pathname).reply(200, { foo: 'bar' })
|
||||||
|
const data = await getRemoteJSON(url, {})
|
||||||
|
expect(data.foo).toBe('bar')
|
||||||
|
expect(cache.get(url)).toBeTruthy()
|
||||||
|
// Second time, despite not setting up a second nock(), will work
|
||||||
|
// because it can use memory now.
|
||||||
|
const data2 = await getRemoteJSON(url, {})
|
||||||
|
expect(data2.foo).toBe('bar')
|
||||||
|
expect(cache.get(url)).toBeTruthy()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('benefit from disk-based caching', async () => {
|
||||||
|
const url = 'http://example.com/cool.json'
|
||||||
|
const { origin, pathname } = new URL(url)
|
||||||
|
nock(origin).get(pathname).reply(200, { cool: true })
|
||||||
|
const data = await getRemoteJSON(url, {})
|
||||||
|
expect(data.cool).toBe(true)
|
||||||
|
expect(cache.get(url)).toBeTruthy()
|
||||||
|
cache.delete(url)
|
||||||
|
|
||||||
|
// This time, the nock won't fail despite not using `.persist()`.
|
||||||
|
// That means it didn't need the network because it was able to
|
||||||
|
// use the disk cache.
|
||||||
|
const data2 = await getRemoteJSON(url, {})
|
||||||
|
expect(data2.cool).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('recover from disk corruption (empty)', async () => {
|
||||||
|
const tempTempDir = path.join(tempDir, 'empty-files')
|
||||||
|
process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT = tempTempDir
|
||||||
|
const url = 'http://example.com/empty.json'
|
||||||
|
const { origin, pathname } = new URL(url)
|
||||||
|
nock(origin).get(pathname).reply(200, { cool: true })
|
||||||
|
await getRemoteJSON(url, {})
|
||||||
|
|
||||||
|
// Make every file in the cache directory an empty file
|
||||||
|
for (const file of fs.readdirSync(tempTempDir)) {
|
||||||
|
fs.writeFileSync(path.join(tempTempDir, file), '')
|
||||||
|
}
|
||||||
|
|
||||||
|
cache.delete(url)
|
||||||
|
// If we don't do this, nock will fail because a second network
|
||||||
|
// request became necessary.
|
||||||
|
nock(origin).get(pathname).reply(200, { cool: true })
|
||||||
|
|
||||||
|
const data = await getRemoteJSON(url, {})
|
||||||
|
expect(data.cool).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('recover from disk corruption (bad JSON)', async () => {
|
||||||
|
const tempTempDir = path.join(tempDir, 'corrupt-files')
|
||||||
|
process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT = tempTempDir
|
||||||
|
const url = 'http://example.com/corrupt.json'
|
||||||
|
const { origin, pathname } = new URL(url)
|
||||||
|
nock(origin).get(pathname).reply(200, { cool: true })
|
||||||
|
await getRemoteJSON(url, {})
|
||||||
|
|
||||||
|
// Make every file in the cache directory an empty file
|
||||||
|
for (const file of fs.readdirSync(tempTempDir)) {
|
||||||
|
fs.writeFileSync(path.join(tempTempDir, file), '{"not:JSON{')
|
||||||
|
}
|
||||||
|
|
||||||
|
cache.delete(url)
|
||||||
|
// If we don't do this, nock will fail because a second network
|
||||||
|
// request became necessary.
|
||||||
|
nock(origin).get(pathname).reply(200, { cool: true })
|
||||||
|
|
||||||
|
const data = await getRemoteJSON(url, {})
|
||||||
|
expect(data.cool).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('not-actually JSON despite URL', async () => {
|
||||||
|
const url = 'http://example.com/might-look-like.json'
|
||||||
|
const { origin, pathname } = new URL(url)
|
||||||
|
nock(origin).get(pathname).reply(200, '<html>here</html>', {
|
||||||
|
'Content-Type': 'text/html',
|
||||||
|
})
|
||||||
|
await expect(getRemoteJSON(url, {})).rejects.toThrowError(/resulted in a non-JSON response/)
|
||||||
|
})
|
||||||
|
})
|
Загрузка…
Ссылка в новой задаче