2021-07-14 23:49:18 +03:00
|
|
|
import path from 'path'
|
2022-11-17 16:08:49 +03:00
|
|
|
|
2021-07-14 23:49:18 +03:00
|
|
|
import languages from './languages.js'
|
2021-07-22 22:29:00 +03:00
|
|
|
import { allVersions } from './all-versions.js'
|
2022-02-04 17:31:56 +03:00
|
|
|
import createTree, { getBasePath } from './create-tree.js'
|
2021-07-14 23:49:18 +03:00
|
|
|
import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js'
|
2022-02-04 17:31:56 +03:00
|
|
|
import Page from './page.js'
|
2022-07-22 21:32:30 +03:00
|
|
|
|
2021-07-22 22:29:00 +03:00
|
|
|
const versions = Object.keys(allVersions)
|
2021-03-29 23:46:06 +03:00
|
|
|
|
2022-02-04 17:31:56 +03:00
|
|
|
// These are the exceptions to the rule.
|
|
|
|
// If a URI starts with one of these prefixes, it basically means we don't
|
|
|
|
// bother to "backfill" a translation in its spot.
|
|
|
|
// For example, `/en/github/site-policy-deprecated/foo` works
|
|
|
|
// only in English and we don't bother making `/ja/github/site-policy-deprecated/foo`
|
|
|
|
// work too.
|
|
|
|
const TRANSLATION_DRIFT_EXCEPTIONS = [
|
|
|
|
'github/site-policy-deprecated',
|
|
|
|
// Early access stuff never has translations.
|
|
|
|
'early-access',
|
|
|
|
]
|
|
|
|
|
2021-03-30 03:14:01 +03:00
|
|
|
/**
|
2021-04-01 19:34:46 +03:00
|
|
|
* We only need to initialize pages _once per language_ since pages don't change per version. So we do that
|
|
|
|
* first since it's the most expensive work. This gets us a nested object with pages attached that we can use
|
|
|
|
* as the basis for the siteTree after we do some versioning. We can also use it to derive the pageList.
|
2021-07-15 00:35:01 +03:00
|
|
|
*/
|
2022-03-04 19:24:24 +03:00
|
|
|
export async function loadUnversionedTree(languagesOnly = null) {
|
|
|
|
if (languagesOnly && !Array.isArray(languagesOnly)) {
|
|
|
|
throw new Error("'languagesOnly' has to be an array")
|
|
|
|
}
|
2021-04-01 19:34:46 +03:00
|
|
|
const unversionedTree = {}
|
2021-03-30 03:14:01 +03:00
|
|
|
|
2022-03-04 19:24:24 +03:00
|
|
|
const languagesValues = Object.entries(languages)
|
|
|
|
.filter(([language]) => {
|
|
|
|
return !languagesOnly || languagesOnly.includes(language)
|
|
|
|
})
|
|
|
|
.map(([, data]) => {
|
|
|
|
return data
|
|
|
|
})
|
2021-07-15 00:35:01 +03:00
|
|
|
await Promise.all(
|
2022-03-04 19:24:24 +03:00
|
|
|
languagesValues.map(async (langObj) => {
|
2022-11-07 22:00:16 +03:00
|
|
|
const localizedContentPath = path.posix.join(langObj.dir, 'content')
|
2021-07-01 19:13:06 +03:00
|
|
|
unversionedTree[langObj.code] = await createTree(localizedContentPath, langObj)
|
2021-07-15 00:35:01 +03:00
|
|
|
})
|
|
|
|
)
|
2021-03-29 23:46:06 +03:00
|
|
|
|
2021-04-01 19:34:46 +03:00
|
|
|
return unversionedTree
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The siteTree is a nested object with pages for every language and version, useful for nav because it
|
|
|
|
* contains parent, child, and sibling relationships:
|
|
|
|
*
|
|
|
|
* siteTree[languageCode][version].childPages[<array of pages>].childPages[<array of pages>] (etc...)
|
|
|
|
|
|
|
|
* Given an unversioned tree of all pages per language, we can walk it for each version and do a couple operations:
|
|
|
|
* 1. Add a versioned href to every item, where the href is the relevant permalink for the current version.
|
|
|
|
* 2. Drop any child pages that are not available in the current version.
|
|
|
|
*
|
|
|
|
* Order of languages and versions doesn't matter, but order of child page arrays DOES matter (for navigation).
|
|
|
|
*/
|
2022-11-17 16:08:49 +03:00
|
|
|
export async function loadSiteTree(unversionedTree) {
|
2021-07-15 00:35:01 +03:00
|
|
|
const rawTree = Object.assign({}, unversionedTree || (await loadUnversionedTree()))
|
2021-03-29 23:46:06 +03:00
|
|
|
const siteTree = {}
|
|
|
|
|
2021-04-07 21:30:42 +03:00
|
|
|
// For every language...
|
2021-07-15 00:35:01 +03:00
|
|
|
await Promise.all(
|
|
|
|
Object.keys(languages).map(async (langCode) => {
|
|
|
|
const treePerVersion = {}
|
|
|
|
// in every version...
|
|
|
|
await Promise.all(
|
|
|
|
versions.map(async (version) => {
|
|
|
|
// "version" the pages.
|
|
|
|
treePerVersion[version] = await versionPages(
|
|
|
|
Object.assign({}, rawTree[langCode]),
|
|
|
|
version,
|
2022-11-17 16:08:49 +03:00
|
|
|
langCode
|
2021-07-15 00:35:01 +03:00
|
|
|
)
|
|
|
|
})
|
|
|
|
)
|
|
|
|
|
|
|
|
siteTree[langCode] = treePerVersion
|
|
|
|
})
|
|
|
|
)
|
2021-03-29 23:46:06 +03:00
|
|
|
|
2021-04-07 21:30:42 +03:00
|
|
|
return siteTree
|
|
|
|
}
|
2021-03-29 23:46:06 +03:00
|
|
|
|
2022-11-17 16:08:49 +03:00
|
|
|
export async function versionPages(obj, version, langCode) {
|
2021-04-07 21:30:42 +03:00
|
|
|
// Add a versioned href as a convenience for use in layouts.
|
2021-07-15 00:35:01 +03:00
|
|
|
obj.href = obj.page.permalinks.find(
|
|
|
|
(pl) =>
|
|
|
|
pl.pageVersion === version ||
|
|
|
|
(pl.pageVersion === 'homepage' && version === nonEnterpriseDefaultVersion)
|
|
|
|
).href
|
2021-03-30 19:16:18 +03:00
|
|
|
|
2021-04-07 21:30:42 +03:00
|
|
|
if (!obj.childPages) return obj
|
2021-07-15 00:35:01 +03:00
|
|
|
const versionedChildPages = await Promise.all(
|
|
|
|
obj.childPages
|
2021-07-20 19:32:35 +03:00
|
|
|
// Drop child pages that do not apply to the current version
|
2021-07-15 00:35:01 +03:00
|
|
|
.filter((childPage) => childPage.page.applicableVersions.includes(version))
|
|
|
|
// Version the child pages recursively.
|
2022-11-17 16:08:49 +03:00
|
|
|
.map((childPage) => versionPages(Object.assign({}, childPage), version, langCode))
|
2021-07-15 00:35:01 +03:00
|
|
|
)
|
2021-03-29 23:46:06 +03:00
|
|
|
|
2021-04-07 21:30:42 +03:00
|
|
|
obj.childPages = [...versionedChildPages]
|
2021-03-29 23:46:06 +03:00
|
|
|
|
2021-04-07 21:30:42 +03:00
|
|
|
return obj
|
2021-04-01 19:34:46 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Derive a flat array of Page objects in all languages.
|
2022-01-11 05:58:45 +03:00
|
|
|
export async function loadPageList(unversionedTree, languagesOnly = null) {
|
2022-03-04 19:24:24 +03:00
|
|
|
if (languagesOnly && !Array.isArray(languagesOnly)) {
|
|
|
|
throw new Error("'languagesOnly' has to be an array")
|
|
|
|
}
|
|
|
|
const rawTree = unversionedTree || (await loadUnversionedTree(languagesOnly))
|
2021-04-01 19:34:46 +03:00
|
|
|
const pageList = []
|
|
|
|
|
2021-07-15 00:35:01 +03:00
|
|
|
await Promise.all(
|
2022-01-11 05:58:45 +03:00
|
|
|
(languagesOnly || Object.keys(languages)).map(async (langCode) => {
|
2021-07-15 00:35:01 +03:00
|
|
|
await addToCollection(rawTree[langCode], pageList)
|
|
|
|
})
|
|
|
|
)
|
2021-04-01 19:34:46 +03:00
|
|
|
|
2021-07-15 00:35:01 +03:00
|
|
|
async function addToCollection(item, collection) {
|
2021-04-01 19:34:46 +03:00
|
|
|
if (!item.page) return
|
|
|
|
collection.push(item.page)
|
|
|
|
|
|
|
|
if (!item.childPages) return
|
2021-07-15 00:35:01 +03:00
|
|
|
await Promise.all(
|
|
|
|
item.childPages.map(async (childPage) => await addToCollection(childPage, collection))
|
|
|
|
)
|
2021-04-01 19:34:46 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return pageList
|
|
|
|
}
|
|
|
|
|
2021-07-14 23:49:18 +03:00
|
|
|
export const loadPages = loadPageList
|
|
|
|
|
2021-04-01 19:34:46 +03:00
|
|
|
// Create an object from the list of all pages with permalinks as keys for fast lookup.
|
2021-07-15 00:35:01 +03:00
|
|
|
export function createMapFromArray(pageList) {
|
|
|
|
const pageMap = pageList.reduce((pageMap, page) => {
|
|
|
|
for (const permalink of page.permalinks) {
|
|
|
|
pageMap[permalink.href] = page
|
|
|
|
}
|
|
|
|
return pageMap
|
|
|
|
}, {})
|
2021-04-01 19:34:46 +03:00
|
|
|
|
|
|
|
return pageMap
|
|
|
|
}
|
|
|
|
|
2021-07-15 00:35:01 +03:00
|
|
|
export async function loadPageMap(pageList) {
|
2022-02-04 17:31:56 +03:00
|
|
|
const pages = await correctTranslationOrphans(pageList || (await loadPageList()))
|
|
|
|
const pageMap = createMapFromArray(pages)
|
|
|
|
return pageMap
|
|
|
|
}
|
|
|
|
|
|
|
|
// If a translation page exists, that doesn't have an English equivalent,
|
|
|
|
// remove it.
|
|
|
|
// If an English page exists, that doesn't have an translation equivalent,
|
|
|
|
// add it.
|
|
|
|
// Note, this function is exported purely for the benefit of the unit tests.
|
|
|
|
export async function correctTranslationOrphans(pageList, basePath = null) {
|
|
|
|
const englishRelativePaths = new Set()
|
|
|
|
for (const page of pageList) {
|
|
|
|
if (page.languageCode === 'en') {
|
|
|
|
englishRelativePaths.add(page.relativePath)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Prime the Map with an empty set for each language prefix.
|
|
|
|
// It's important that we do this for *every* language rather than
|
|
|
|
// just populating `nonEnglish` based on those pages that *are* present.
|
|
|
|
// Otherwise, we won't have an index of all the languages
|
|
|
|
// that *might* be missing.
|
|
|
|
const nonEnglish = new Map()
|
|
|
|
Object.keys(languages)
|
|
|
|
.filter((lang) => lang !== 'en')
|
|
|
|
.forEach((languageCode) => {
|
|
|
|
nonEnglish.set(languageCode, new Set())
|
|
|
|
})
|
|
|
|
|
|
|
|
// By default, when backfilling, we set the `basePath` to be that of
|
|
|
|
// English. But for the benefit of being able to do unit tests,
|
|
|
|
// we make this an optional argument. Then, unit tests can use
|
|
|
|
// its "tests/fixtures" directory.
|
|
|
|
const englishBasePath = basePath || getBasePath(languages.en.dir)
|
|
|
|
|
|
|
|
// Filter out all non-English pages that appear to be excess.
|
|
|
|
// E.g. if an English doc was renamed from `content/foo.md` to
|
2022-10-18 19:59:59 +03:00
|
|
|
// `content/bar.md` what will happen is that `TRANSLATIONS_ROOT/*/content/foo.md`
|
2022-02-04 17:31:56 +03:00
|
|
|
// will still linger around and we want to remove that even if it was
|
|
|
|
// scooped up from disk.
|
|
|
|
const newPageList = []
|
|
|
|
for (const page of pageList) {
|
|
|
|
if (page.languageCode === 'en') {
|
|
|
|
// English pages are never considered "excess"
|
|
|
|
newPageList.push(page)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// If this translation page exists in English, keep it but also
|
|
|
|
// add it to the set of relative paths that is known.
|
|
|
|
if (englishRelativePaths.has(page.relativePath)) {
|
|
|
|
nonEnglish.get(page.languageCode).add(page.relativePath)
|
|
|
|
newPageList.push(page)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const pageLoadPromises = []
|
|
|
|
for (const relativePath of englishRelativePaths) {
|
|
|
|
for (const [languageCode, relativePaths] of nonEnglish) {
|
|
|
|
if (!relativePaths.has(relativePath)) {
|
|
|
|
// At this point, we've found an English `relativePath` that is
|
|
|
|
// not used by this language.
|
|
|
|
// But before we decide to "backfill" it from the English equivalent
|
|
|
|
// we first need to figure out if it should be excluded.
|
|
|
|
// The reason for doing this check this late is for the benefit
|
|
|
|
// of optimization. In general, when the translation pipeline has
|
|
|
|
// done its magic, this should be very rare, so it's unnecessary
|
|
|
|
// to do this exception check on every single English relativePath.
|
|
|
|
if (TRANSLATION_DRIFT_EXCEPTIONS.find((exception) => relativePath.startsWith(exception))) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// The magic right here!
|
|
|
|
// The trick is that we can't clone instances of class Page. We need
|
|
|
|
// to create them for this language. But the trick is that we
|
|
|
|
// use the English relative path so it can have something to read.
|
|
|
|
// For example, if we have figured out that
|
2022-10-18 19:59:59 +03:00
|
|
|
// `TRANSLATIONS_ROOT/ja-JP/content/foo.md` doesn't exist, we pretend
|
2022-02-04 17:31:56 +03:00
|
|
|
// that we can use `foo.md` and the base path of `content/`.
|
|
|
|
pageLoadPromises.push(
|
|
|
|
Page.init({
|
|
|
|
basePath: englishBasePath,
|
2022-05-23 21:22:44 +03:00
|
|
|
relativePath,
|
|
|
|
languageCode,
|
2022-02-04 17:31:56 +03:00
|
|
|
})
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const additionalPages = await Promise.all(pageLoadPromises)
|
|
|
|
newPageList.push(...additionalPages)
|
|
|
|
|
|
|
|
return newPageList
|
2021-04-01 19:34:46 +03:00
|
|
|
}
|
2021-03-29 23:46:06 +03:00
|
|
|
|
2021-07-14 23:49:18 +03:00
|
|
|
export default {
|
2021-04-01 19:34:46 +03:00
|
|
|
loadUnversionedTree,
|
|
|
|
loadSiteTree,
|
|
|
|
loadPages: loadPageList,
|
2021-07-15 00:35:01 +03:00
|
|
|
loadPageMap,
|
2021-03-29 23:46:06 +03:00
|
|
|
}
|