зеркало из https://github.com/github/docs.git
356 строки
13 KiB
JavaScript
356 строки
13 KiB
JavaScript
import fs from 'fs'
|
|
import path from 'path'
|
|
|
|
import yaml from 'js-yaml'
|
|
import matter from 'gray-matter'
|
|
import { merge, get } from 'lodash-es'
|
|
|
|
import languages from './languages.js'
|
|
import { correctTranslatedContentStrings } from './correct-translation-content.js'
|
|
|
|
// If you run `export DEBUG_JIT_DATA_READS=true` in your terminal,
|
|
// next time it will mention every file it reads from disk.
|
|
const DEBUG_JIT_DATA_READS = Boolean(JSON.parse(process.env.DEBUG_JIT_DATA_READS || 'false'))
|
|
|
|
// This is a list of files that we should always immediately fall back to
|
|
// English for.
|
|
// Having this is safer than trying to wrangle the translations to NOT
|
|
// have them translated.
|
|
const ALWAYS_ENGLISH_YAML_FILES = new Set([
|
|
'data/variables/product.yml',
|
|
'data/variables/release_candidate.yml',
|
|
])
|
|
|
|
// Returns all the things inside a directory
|
|
export const getDeepDataByLanguage = memoize((dottedPath, langCode) => {
|
|
if (!(langCode in languages))
|
|
throw new Error(`langCode '${langCode}' not a recognized language code`)
|
|
const { dir } = languages[langCode]
|
|
return getDeepDataByDir(dottedPath, dir)
|
|
})
|
|
|
|
// Doesn't need to be memoized because it's used by getDataKeysByLanguage
|
|
// which is already memoized.
|
|
function getDeepDataByDir(dottedPath, dir) {
|
|
const fullPath = ['data']
|
|
const split = dottedPath.split(/\./g)
|
|
fullPath.push(...split)
|
|
|
|
const things = {}
|
|
const relPath = fullPath.join(path.sep)
|
|
for (const dirent of getDirents(dir, relPath)) {
|
|
if (dirent.name === 'README.md') continue
|
|
const key = dirent.isDirectory() ? dirent.name : dirent.name.replace(/\.yml$/, '') // e.g. '3-5' or '0-rc2'
|
|
if (dirent.isDirectory()) {
|
|
things[key] = getDeepDataByDir(`${dottedPath}.${key}`, dir)
|
|
} else if (dirent.name.endsWith('.yml')) {
|
|
things[key] = getYamlContent(dir, path.join(relPath, dirent.name))
|
|
} else if (dirent.name.endsWith('.md')) {
|
|
things[key] = getMarkdownContent(dir, path.join(relPath, dirent.name))
|
|
} else {
|
|
throw new Error(`don't know how to read '${dirent.name}'`)
|
|
}
|
|
}
|
|
return things
|
|
}
|
|
|
|
function getDirents(root, relPath) {
|
|
const filePath = root ? path.join(root, relPath) : relPath
|
|
return fs.readdirSync(filePath, { withFileTypes: true })
|
|
}
|
|
|
|
export const getUIDataMerged = memoize((langCode) => {
|
|
const uiEnglish = getUIData('en')
|
|
if (langCode === 'en') return uiEnglish
|
|
// Got to combine. Start with the English and put the translation on top.
|
|
// E.g.
|
|
// english = {food: "Food", drink: "Drink"}
|
|
// swedish = {food: "Mat"}
|
|
// =>
|
|
// combind = {food: "Mat", drink: "Drink"}
|
|
const combined = {}
|
|
merge(combined, uiEnglish)
|
|
merge(combined, getUIData(langCode))
|
|
return combined
|
|
})
|
|
|
|
// Doesn't need to be memoized because it's used by another function
|
|
// that is memoized.
|
|
const getUIData = (langCode) => {
|
|
const fullPath = ['data', 'ui.yml']
|
|
const { dir } = languages[langCode]
|
|
return getYamlContent(dir, fullPath.join(path.sep))
|
|
}
|
|
|
|
export const getDataByLanguage = memoize((dottedPath, langCode) => {
|
|
if (!(langCode in languages))
|
|
throw new Error(`langCode '${langCode}' not a recognized language code`)
|
|
const { dir } = languages[langCode]
|
|
|
|
try {
|
|
const value = getDataByDir(dottedPath, dir, languages.en.dir)
|
|
|
|
// What could happens is that a new key has only been added to
|
|
// the English data/ui.yml but hasn't been added to Japanese, but
|
|
// there nevertheless exists a Japanse `data/ui.yml`.
|
|
// Since getDataByDir() uses `get(dataObject, 'dott.ed.path')` it
|
|
// will return `undefined` if it's not present.
|
|
// If this happens, we can't rely on `err.code === 'ENOENT'` to
|
|
// fall back the English one. So we just start over using the English data.
|
|
if (value === undefined && langCode !== 'en') {
|
|
return getDataByDir(dottedPath, languages.en.dir)
|
|
}
|
|
return value
|
|
} catch (error) {
|
|
if (error instanceof Error && error.mark && error.message) {
|
|
// It's a yaml.load() generated error!
|
|
// Remember, the file that we read might have been a .yml or a .md
|
|
// file. If it was a .md file, with corrupt front-matter that too
|
|
// would have caused a YAMLException
|
|
if (langCode !== 'en') {
|
|
if (DEBUG_JIT_DATA_READS) {
|
|
console.warn(`Unable to parse Yaml in (${langCode}) '${dottedPath}': ${error.message}`)
|
|
}
|
|
// Give it one more chance, but use English this time
|
|
return getDataByDir(dottedPath, languages.en.dir)
|
|
}
|
|
// Always throw English Yaml reading errors. Staff writers
|
|
// need to know early and explicitly that they are corrupt.
|
|
throw error
|
|
}
|
|
|
|
if (error.code === 'ENOENT') return undefined
|
|
throw error
|
|
}
|
|
})
|
|
|
|
function getDataByDir(dottedPath, dir, englishRoot) {
|
|
const fullPath = ['data']
|
|
|
|
// Using English here because it doesn't matter. We just want to
|
|
// figure out how to turn `foo.version-3.4.deeper.key' into
|
|
// `['foo', 'version-3.4', 'deeper', 'key']` here and we'll need
|
|
// any directory to do that and English is always the most up-to-date.
|
|
// We need the getSmartSplit() as long as there's a chance that a
|
|
// directory or file inside data/ might contain a dot in the name,
|
|
// however the exception is the file names in data/release-notes/**/*.yml
|
|
// because it contains files that are just numbers like 3-7/0.yml and
|
|
// that can cause problems inside getSmartSplit().
|
|
const split = dottedPath.startsWith('release-notes')
|
|
? dottedPath.split('.')
|
|
: getSmartSplit(dottedPath)
|
|
|
|
// For early-access data stuff, they're referred to as...
|
|
//
|
|
// {% data early-access.reusables.foo.bar %}
|
|
//
|
|
// When we "merge" in the early-access data, we put the whole directory
|
|
// within the root `data/` so it exists, on disk, as
|
|
//
|
|
// data/early-access/reusables/foo/bar.md
|
|
//
|
|
if (split[0] === 'early-access') {
|
|
fullPath.push(split.shift())
|
|
}
|
|
const first = split[0]
|
|
|
|
if (first === 'variables') {
|
|
const key = split.pop()
|
|
const basename = split.pop()
|
|
fullPath.push(...split)
|
|
fullPath.push(`${basename}.yml`)
|
|
const allData = getYamlContent(dir, fullPath.join(path.sep), englishRoot)
|
|
if (allData) {
|
|
const value = allData[key]
|
|
if (value) {
|
|
return matter(value).content
|
|
}
|
|
} else {
|
|
console.warn(`Unable to find variables Yaml file ${fullPath.join(path.sep)}`)
|
|
}
|
|
return
|
|
}
|
|
|
|
if (first === 'reusables') {
|
|
const nakedname = split.pop()
|
|
fullPath.push(...split)
|
|
fullPath.push(`${nakedname}.md`)
|
|
const markdown = getMarkdownContent(dir, fullPath.join(path.sep), englishRoot)
|
|
let { content } = matter(markdown)
|
|
if (dir !== englishRoot) {
|
|
// If we're reading a translation, we need to replace the possible
|
|
// corruptions. For example `[AUTOTITLE"을](/foo/bar)`.
|
|
// To do this we'll need the English equivalent
|
|
let englishContent = content
|
|
try {
|
|
englishContent = getMarkdownContent(englishRoot, fullPath.join(path.sep), englishRoot)
|
|
} catch (error) {
|
|
// In some real but rare cases a reusable doesn't exist in English.
|
|
// At all.
|
|
// This can happen when the translation is really out of date.
|
|
// You might have an old `docs-internal.locale/content/**/*.md`
|
|
// file that mentions `{% data reusables.foo.bar %}`. And it's
|
|
// working fine, except none of that exists in English.
|
|
// If this is the case, we still want to executed the
|
|
// correctTranslatedContentStrings() function, but we can't
|
|
// genuinely give it the English equivalent content, which it
|
|
// sometimes uses to correct some Liquid tags. At least other
|
|
// good corrections might happen.
|
|
if (error.code !== 'ENOENT') {
|
|
throw error
|
|
}
|
|
}
|
|
content = correctTranslatedContentStrings(content, englishContent)
|
|
}
|
|
return content
|
|
}
|
|
|
|
// E.g. {% data ui.pages.foo.bar %}
|
|
if (first === 'ui') {
|
|
const basename = split.shift() // i.e. 'ui'
|
|
fullPath.push(`${basename}.yml`)
|
|
const allData = getYamlContent(dir, fullPath.join(path.sep), englishRoot)
|
|
return get(allData, split.join('.'))
|
|
}
|
|
|
|
if (first === 'product-examples' || first === 'glossaries' || first === 'release-notes') {
|
|
const basename = split.pop()
|
|
fullPath.push(...split)
|
|
fullPath.push(`${basename}.yml`)
|
|
return getYamlContent(dir, fullPath.join(path.sep), englishRoot)
|
|
}
|
|
|
|
if (first === 'learning-tracks') {
|
|
const key = split.pop()
|
|
const basename = split.pop()
|
|
fullPath.push(...split)
|
|
fullPath.push(`${basename}.yml`)
|
|
const allData = getYamlContent(dir, fullPath.join(path.sep), englishRoot)
|
|
return allData[key]
|
|
}
|
|
|
|
throw new Error(`Can't find the key '${dottedPath}' in the scope.`)
|
|
}
|
|
|
|
function getSmartSplit(dottedPath) {
|
|
const split = dottedPath.split('.')
|
|
const bits = []
|
|
for (let i = 0, len = split.length; i < len; i++) {
|
|
const bit = split[i]
|
|
if (i === len - 1) {
|
|
bits.push(bit)
|
|
} else {
|
|
const next = split[i + 1]
|
|
if (/\d$/.test(bit) && /^\d/.test(next)) {
|
|
bits.push([bit, next].join('.'))
|
|
i++ // jump ahead one position in the loop
|
|
} else {
|
|
bits.push(bit)
|
|
}
|
|
}
|
|
}
|
|
return bits
|
|
}
|
|
|
|
// The reason this is memoized, even though the parent caller function
|
|
// (`getDataByLanguage`) is also memoized is because we might read
|
|
// the same file for two different keys. E.g.
|
|
//
|
|
// getDataByLanguage('variables.product.prodname_ghe_server', 'en')
|
|
// getDataByLanguage('variables.product.company_short', 'en')
|
|
//
|
|
// ...will actually depend on reading `data/variables/product.yml`. Twice.
|
|
// Well, actually not twice because we cache the disk reading. So the outcome
|
|
// becomes this:
|
|
//
|
|
// 1. getDataByLanguage('variables.product.prodname_ghe_server', 'en')
|
|
// -> cache MISS
|
|
// 1.1. read and parse data/variables/product.yml
|
|
// -> cache MISS
|
|
// 2. getDataByLanguage('variables.product.company_short', 'en')
|
|
// -> cache MISS
|
|
// 2.1. read and parse data/variables/product.yml
|
|
// -> cache HIT (Yay!)
|
|
//
|
|
const getYamlContent = memoize((root, relPath, englishRoot) => {
|
|
// Certain Yaml files we know we always want the English one
|
|
// no matter what the specified language is.
|
|
// For example, we never want `data/variables/product.yml` translated
|
|
// so we know to immediately fall back to the English one.
|
|
if (ALWAYS_ENGLISH_YAML_FILES.has(relPath)) {
|
|
root = '' // this forces it to read from English
|
|
}
|
|
const fileContent = getFileContent(root, relPath, englishRoot)
|
|
return yaml.load(fileContent, { filename: relPath })
|
|
})
|
|
|
|
// The reason why this is memoized, is the same as for getYamlContent() above.
|
|
const getMarkdownContent = memoize((root, relPath, englishRoot) => {
|
|
const fileContent = getFileContent(root, relPath, englishRoot)
|
|
return matter(fileContent).content.trimEnd()
|
|
})
|
|
|
|
const getFileContent = (root, relPath, englishRoot) => {
|
|
const filePath = root ? path.join(root, relPath) : relPath
|
|
if (DEBUG_JIT_DATA_READS) console.log('READ', filePath)
|
|
try {
|
|
return fs.readFileSync(filePath, 'utf-8')
|
|
} catch (err) {
|
|
// It might fail because that particular data entry doesn't yet
|
|
// exist in a translation
|
|
if (err.code === 'ENOENT') {
|
|
// If looking it up as a file fails, give it one more chance if the
|
|
// read was for a translation.
|
|
if (root !== englishRoot) {
|
|
// We can try again but this time using the English files
|
|
return getFileContent(englishRoot, relPath, englishRoot)
|
|
}
|
|
}
|
|
throw err
|
|
}
|
|
}
|
|
|
|
function memoize(func) {
|
|
const cache = new Map()
|
|
return (...args) => {
|
|
if (process.env.NODE_ENV === 'development') {
|
|
// It is very possible that certain files, when caching is disabled,
|
|
// are read multiple times in short succession. E.g. `product.yml`.
|
|
// So how expensive is it to read these files excessively?
|
|
// To answer that, we benchmarked it by sampling 10 files from the
|
|
// most common files that are used from `data/`. In fact, we ran 100
|
|
// runs of 10 *different* files. About 80% of them were `.yml` files.
|
|
// As a median, it takes **0.5ms to read 10 files from disk**
|
|
// all in a sync manner.
|
|
// Since most files coming through here is `.yml` files (e.g.
|
|
// product.yml and ui.yml) if you also do the `yaml.load()` of the
|
|
// read content, that number becomes **2.1ms to read and parse 10 files**.
|
|
// So in conclusion, not a lot of time.
|
|
return func(...args)
|
|
}
|
|
|
|
const key = args.join(':')
|
|
if (!cache.has(key)) {
|
|
cache.set(key, func(...args))
|
|
}
|
|
const value = cache.get(key)
|
|
// If what was stored in the cache is a mutable, this time, return
|
|
// a shallow copy.
|
|
// Otherwise, what *might* happen is this:
|
|
//
|
|
// > const getNames = memoize(() => ["peter", "tucker"])
|
|
// > var names = getNames()
|
|
// > names.push("ashley")
|
|
// > var names2 = getNames()
|
|
// > names2.push("charlotte")
|
|
// > console.log(names2)
|
|
//
|
|
// ["peter", "tucker", "ashley", "charlotte"]
|
|
//
|
|
// Note that these are shallow copies only.
|
|
if (Array.isArray(value)) return [...value]
|
|
if (typeof value === 'object') return { ...value }
|
|
return value
|
|
}
|
|
}
|