зеркало из https://github.com/github/docs.git
Create translation-health-report.yml (#32486)
This commit is contained in:
Родитель
198459522d
Коммит
bd23217796
|
@ -0,0 +1,132 @@
|
|||
name: Translation health report
|
||||
|
||||
# **What it does**: Provides errors and summary statistics on rendering translated content.
|
||||
# **Why we have it**: To improve our translations by having clearer visibility.
|
||||
# **Who does it impact**: Docs engineering, Microsoft translators.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '20 16 * * *' # Run every day at 16:20 UTC / 8:20 PST
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
create-translation-health-report:
|
||||
name: Create translation health report
|
||||
if: github.repository == 'github/docs-internal'
|
||||
runs-on: ubuntu-latest
|
||||
# This sets a maximum execution time of 300 minutes (5 hours)
|
||||
# to prevent the workflow from running longer than necessary.
|
||||
timeout-minutes: 300
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
include:
|
||||
- language: es
|
||||
language_dir: translations/es-ES
|
||||
language_repo: github/docs-internal.es-es
|
||||
|
||||
- language: ja
|
||||
language_dir: translations/ja-JP
|
||||
language_repo: github/docs-internal.ja-jp
|
||||
|
||||
- language: pt
|
||||
language_dir: translations/pt-BR
|
||||
language_repo: github/docs-internal.pt-br
|
||||
|
||||
- language: cn
|
||||
language_dir: translations/zh-CN
|
||||
language_repo: github/docs-internal.zh-cn
|
||||
|
||||
# We'll be ready to add the following languages in a future effort.
|
||||
|
||||
# - language: ru
|
||||
# language_dir: translations/ru-RU
|
||||
# language_repo: github/docs-internal.ru-ru
|
||||
|
||||
# - language: ko
|
||||
# language_dir: translations/ko-KR
|
||||
# language_repo: github/docs-internal.ko-kr
|
||||
|
||||
# - language: fr
|
||||
# language_dir: translations/fr-FR
|
||||
# language_repo: github/docs-internal.fr-fr
|
||||
|
||||
# - language: de
|
||||
# language_dir: translations/de-DE
|
||||
# language_repo: github/docs-internal.de-de
|
||||
|
||||
steps:
|
||||
- name: Checkout the docs-internal repo
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
|
||||
- name: Remove all language translations
|
||||
run: |
|
||||
git rm -rf --quiet ${{ matrix.language_dir }}/content
|
||||
git rm -rf --quiet ${{ matrix.language_dir }}/data
|
||||
|
||||
- name: Checkout the language-specific repo
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
with:
|
||||
repository: ${{ matrix.language_repo }}
|
||||
token: ${{ secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES }}
|
||||
path: ${{ matrix.language_dir }}
|
||||
|
||||
- name: Get language SHA
|
||||
run: |
|
||||
gitref=$(cd ${{ matrix.language_dir }} && git rev-parse --short HEAD)
|
||||
echo "gitref=$gitref" >> $GITHUB_ENV
|
||||
|
||||
- name: 'Setup node'
|
||||
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
|
||||
with:
|
||||
node-version: '16.17.0'
|
||||
|
||||
- name: npm ci
|
||||
run: npm ci
|
||||
|
||||
- name: Create translation health report
|
||||
run: |
|
||||
translation_health_report=$( \
|
||||
node script/i18n/create-translation-health-report.js \
|
||||
--language ${{ matrix.language }} \
|
||||
--gitref ${{ env.gitref }} \
|
||||
| jq -Rsa .
|
||||
)
|
||||
echo "translation_health_report=$translation_health_report" >> $GITHUB_ENV
|
||||
|
||||
- name: Log in to Azure
|
||||
uses: azure/login@1f63701bf3e6892515f1b7ce2d2bf1708b46beaf
|
||||
with:
|
||||
creds: ${{ secrets.PROD_AZURE_CREDENTIALS }}
|
||||
|
||||
- name: Upload to Azure blob storage
|
||||
uses: azure/CLI@61bb69d64d613b52663984bf12d6bac8fd7b3cc8
|
||||
with:
|
||||
inlineScript: |
|
||||
az storage blob upload \
|
||||
--name "${{ matrix.language }}-latest.json" \
|
||||
--data $translation_health_report \
|
||||
--container-name translation-health-reports
|
||||
az storage blob upload \
|
||||
--name "${{ matrix.language }}-$(date +%Y-%m-%d).json" \
|
||||
--data $translation_health_report \
|
||||
--container-name translation-health-reports
|
||||
|
||||
- name: Log out from Azure
|
||||
if: always()
|
||||
run: |
|
||||
az logout
|
||||
|
||||
# Emit a notification for the first responder to triage if the workflow failed.
|
||||
- name: Send Slack notification if workflow failed
|
||||
uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
|
||||
if: failure()
|
||||
with:
|
||||
channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
|
||||
bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
|
||||
color: failure
|
||||
text: 'The health report for ${{ matrix.language }} failed.'
|
|
@ -0,0 +1,163 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
// [start-readme]
|
||||
//
|
||||
// Create a list of errors and summary statistics for errors in a particular language.
|
||||
//
|
||||
// [end-readme]
|
||||
|
||||
/* Nota bene:
|
||||
If you are getting more errors all the sudden, try running this:
|
||||
$ script/i18n/create-translation-health-report.js -l en -r 000
|
||||
If there's any errors, const context = { ... } probably needs more data.
|
||||
*/
|
||||
|
||||
import { program } from 'commander'
|
||||
import fs from 'fs/promises'
|
||||
import { pick } from 'lodash-es'
|
||||
|
||||
import { loadPages, loadPageMap } from '../../lib/page-data.js'
|
||||
import loadSiteData from '../../lib/site-data.js'
|
||||
import loadRedirects from '../../lib/redirects/precompile.js'
|
||||
import { allVersions, allVersionKeys } from '../../lib/all-versions.js'
|
||||
import { languageKeys } from '../../lib/languages.js'
|
||||
import { getProductStringFromPath } from '../../lib/path-utils.js'
|
||||
|
||||
program
|
||||
.description('Create a translation health report for one language.')
|
||||
.requiredOption('-l, --language <language>', 'The language to health check')
|
||||
.requiredOption('-r, --gitref <sha>', 'Language repo latest git commit short SHA')
|
||||
.parse(process.argv)
|
||||
|
||||
// Gather popularity data the search uses to prioritize errors
|
||||
async function fetchPopularityData() {
|
||||
const output = {}
|
||||
const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8')
|
||||
for (const line of popularPagesRaw.split('\n')) {
|
||||
try {
|
||||
const row = JSON.parse(line)
|
||||
output[row.path_article] = row.path_count
|
||||
} catch {}
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
async function collectPageErrors(page, { language, data, redirects, plainPath, pageMap }) {
|
||||
// Go through each version...
|
||||
const promises = allVersionKeys
|
||||
.filter((version) => page.applicableVersions.includes(version))
|
||||
.map(async (version) => {
|
||||
// Collect if errors
|
||||
const pageVersionErrors = []
|
||||
try {
|
||||
const path = `/${language}/${version}/${plainPath}`
|
||||
// Reference middleware/context.js for data shape
|
||||
const context = {
|
||||
...data, // needed for all pages
|
||||
currentVersion: version, // needed for all pages
|
||||
currentLanguage: language, // needed for all pages
|
||||
currentPath: path, // needed for all pages
|
||||
currentVersionObj: allVersions[version], // needed for ifversion tag
|
||||
currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages
|
||||
pages: pageMap, // needed for learning-track on guides pages
|
||||
redirects, // needed for learning-track on guides pages
|
||||
}
|
||||
await page.render(context, pageVersionErrors)
|
||||
} catch (err) {
|
||||
pageVersionErrors.push(err)
|
||||
}
|
||||
if (pageVersionErrors.length) {
|
||||
return [
|
||||
version,
|
||||
// Filter down properties to make it easier for
|
||||
// translators to get the clearest information on the error
|
||||
pageVersionErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
|
||||
]
|
||||
// Other fields: Object.getOwnPropertyNames(err)
|
||||
}
|
||||
})
|
||||
const arr = (await Promise.all(promises)).filter(Boolean)
|
||||
if (arr.length) {
|
||||
return Object.fromEntries(arr)
|
||||
}
|
||||
}
|
||||
|
||||
function groupErrors(errors) {
|
||||
return errors
|
||||
.map((page) => Object.values(page.versions).flat())
|
||||
.flat()
|
||||
.map((version) => version.message)
|
||||
.reduce((sum, val) => {
|
||||
sum[val] = sum[val] || 0
|
||||
sum[val]++
|
||||
return sum
|
||||
}, {})
|
||||
}
|
||||
|
||||
async function createReport() {
|
||||
// Check that the language is valid
|
||||
const { language, gitref } = program.opts()
|
||||
if (!languageKeys.includes(language)) {
|
||||
throw new Error(`Language ${language} is not in ${languageKeys.join()}.`)
|
||||
}
|
||||
|
||||
// Load popularity data to sort errors
|
||||
const popularity = await fetchPopularityData()
|
||||
|
||||
// Load all pages
|
||||
const allPages = await loadPages()
|
||||
const dataErrors = []
|
||||
const data = loadSiteData(dataErrors)[language]
|
||||
const pages = allPages
|
||||
.filter((page) => page.languageCode === language)
|
||||
// Early access pages log to the console, which would show in the report
|
||||
.filter((page) => !page.relativePath.includes('early-access'))
|
||||
const pageMap = await loadPageMap(pages)
|
||||
const redirects = await loadRedirects(pages)
|
||||
|
||||
// Try to render each page
|
||||
const pageErrors = (
|
||||
await Promise.all(
|
||||
pages.map(async (page) => {
|
||||
const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '')
|
||||
const errorsByVersion = await collectPageErrors(page, {
|
||||
language,
|
||||
data,
|
||||
redirects,
|
||||
plainPath,
|
||||
pageMap,
|
||||
})
|
||||
if (errorsByVersion) {
|
||||
return {
|
||||
path: plainPath,
|
||||
popularity: popularity[plainPath] || 0,
|
||||
versions: errorsByVersion,
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
)
|
||||
.filter(Boolean)
|
||||
// Sort by popularity desc so the translators know what to focus on first
|
||||
.sort((a, b) => b.popularity - a.popularity)
|
||||
|
||||
// Begin an output report
|
||||
const report = {
|
||||
language,
|
||||
gitref,
|
||||
datetime: new Date().toJSON(),
|
||||
totalPages: pages.length,
|
||||
// totalErrorPages should be around en: 0, es: 1043, ja: 1004, pt: 995, cn: 1063
|
||||
totalErrorPages: pageErrors.length,
|
||||
pageErrors,
|
||||
// To group errors by message instead
|
||||
groupedPageErrors: groupErrors(pageErrors),
|
||||
// Filter down properties to make it easier for
|
||||
// translators to get the clearest information on the error
|
||||
dataErrors: dataErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
|
||||
}
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
console.log(JSON.stringify(await createReport(), null, 2))
|
Загрузка…
Ссылка в новой задаче