Create translation-health-report.yml (#32486)

This commit is contained in:
Kevin Heis 2022-11-16 10:35:42 -08:00 коммит произвёл GitHub
Родитель 198459522d
Коммит bd23217796
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 295 добавлений и 0 удалений

132
.github/workflows/translation-health-report.yml поставляемый Normal file
Просмотреть файл

@ -0,0 +1,132 @@
name: Translation health report
# **What it does**: Provides errors and summary statistics on rendering translated content.
# **Why we have it**: To improve our translations by having clearer visibility.
# **Who does it impact**: Docs engineering, Microsoft translators.
on:
workflow_dispatch:
schedule:
- cron: '20 16 * * *' # Run every day at 16:20 UTC / 8:20 PST
permissions:
contents: read
jobs:
create-translation-health-report:
name: Create translation health report
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-latest
# This sets a maximum execution time of 300 minutes (5 hours)
# to prevent the workflow from running longer than necessary.
timeout-minutes: 300
strategy:
fail-fast: false
max-parallel: 1
matrix:
include:
- language: es
language_dir: translations/es-ES
language_repo: github/docs-internal.es-es
- language: ja
language_dir: translations/ja-JP
language_repo: github/docs-internal.ja-jp
- language: pt
language_dir: translations/pt-BR
language_repo: github/docs-internal.pt-br
- language: cn
language_dir: translations/zh-CN
language_repo: github/docs-internal.zh-cn
# We'll be ready to add the following languages in a future effort.
# - language: ru
# language_dir: translations/ru-RU
# language_repo: github/docs-internal.ru-ru
# - language: ko
# language_dir: translations/ko-KR
# language_repo: github/docs-internal.ko-kr
# - language: fr
# language_dir: translations/fr-FR
# language_repo: github/docs-internal.fr-fr
# - language: de
# language_dir: translations/de-DE
# language_repo: github/docs-internal.de-de
steps:
- name: Checkout the docs-internal repo
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
- name: Remove all language translations
run: |
git rm -rf --quiet ${{ matrix.language_dir }}/content
git rm -rf --quiet ${{ matrix.language_dir }}/data
- name: Checkout the language-specific repo
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
with:
repository: ${{ matrix.language_repo }}
token: ${{ secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES }}
path: ${{ matrix.language_dir }}
- name: Get language SHA
run: |
gitref=$(cd ${{ matrix.language_dir }} && git rev-parse --short HEAD)
echo "gitref=$gitref" >> $GITHUB_ENV
- name: 'Setup node'
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
with:
node-version: '16.17.0'
- name: npm ci
run: npm ci
- name: Create translation health report
run: |
translation_health_report=$( \
node script/i18n/create-translation-health-report.js \
--language ${{ matrix.language }} \
--gitref ${{ env.gitref }} \
| jq -Rsa .
)
echo "translation_health_report=$translation_health_report" >> $GITHUB_ENV
- name: Log in to Azure
uses: azure/login@1f63701bf3e6892515f1b7ce2d2bf1708b46beaf
with:
creds: ${{ secrets.PROD_AZURE_CREDENTIALS }}
- name: Upload to Azure blob storage
uses: azure/CLI@61bb69d64d613b52663984bf12d6bac8fd7b3cc8
with:
inlineScript: |
az storage blob upload \
--name "${{ matrix.language }}-latest.json" \
--data $translation_health_report \
--container-name translation-health-reports
az storage blob upload \
--name "${{ matrix.language }}-$(date +%Y-%m-%d).json" \
--data $translation_health_report \
--container-name translation-health-reports
- name: Log out from Azure
if: always()
run: |
az logout
# Emit a notification for the first responder to triage if the workflow failed.
- name: Send Slack notification if workflow failed
uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
if: failure()
with:
channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
color: failure
text: 'The health report for ${{ matrix.language }} failed.'

Просмотреть файл

@ -0,0 +1,163 @@
#!/usr/bin/env node
// [start-readme]
//
// Create a list of errors and summary statistics for errors in a particular language.
//
// [end-readme]
/* Nota bene:
If you are getting more errors all the sudden, try running this:
$ script/i18n/create-translation-health-report.js -l en -r 000
If there's any errors, const context = { ... } probably needs more data.
*/
import { program } from 'commander'
import fs from 'fs/promises'
import { pick } from 'lodash-es'
import { loadPages, loadPageMap } from '../../lib/page-data.js'
import loadSiteData from '../../lib/site-data.js'
import loadRedirects from '../../lib/redirects/precompile.js'
import { allVersions, allVersionKeys } from '../../lib/all-versions.js'
import { languageKeys } from '../../lib/languages.js'
import { getProductStringFromPath } from '../../lib/path-utils.js'
program
.description('Create a translation health report for one language.')
.requiredOption('-l, --language <language>', 'The language to health check')
.requiredOption('-r, --gitref <sha>', 'Language repo latest git commit short SHA')
.parse(process.argv)
// Gather popularity data the search uses to prioritize errors
async function fetchPopularityData() {
const output = {}
const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8')
for (const line of popularPagesRaw.split('\n')) {
try {
const row = JSON.parse(line)
output[row.path_article] = row.path_count
} catch {}
}
return output
}
async function collectPageErrors(page, { language, data, redirects, plainPath, pageMap }) {
// Go through each version...
const promises = allVersionKeys
.filter((version) => page.applicableVersions.includes(version))
.map(async (version) => {
// Collect if errors
const pageVersionErrors = []
try {
const path = `/${language}/${version}/${plainPath}`
// Reference middleware/context.js for data shape
const context = {
...data, // needed for all pages
currentVersion: version, // needed for all pages
currentLanguage: language, // needed for all pages
currentPath: path, // needed for all pages
currentVersionObj: allVersions[version], // needed for ifversion tag
currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages
pages: pageMap, // needed for learning-track on guides pages
redirects, // needed for learning-track on guides pages
}
await page.render(context, pageVersionErrors)
} catch (err) {
pageVersionErrors.push(err)
}
if (pageVersionErrors.length) {
return [
version,
// Filter down properties to make it easier for
// translators to get the clearest information on the error
pageVersionErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
]
// Other fields: Object.getOwnPropertyNames(err)
}
})
const arr = (await Promise.all(promises)).filter(Boolean)
if (arr.length) {
return Object.fromEntries(arr)
}
}
function groupErrors(errors) {
return errors
.map((page) => Object.values(page.versions).flat())
.flat()
.map((version) => version.message)
.reduce((sum, val) => {
sum[val] = sum[val] || 0
sum[val]++
return sum
}, {})
}
async function createReport() {
// Check that the language is valid
const { language, gitref } = program.opts()
if (!languageKeys.includes(language)) {
throw new Error(`Language ${language} is not in ${languageKeys.join()}.`)
}
// Load popularity data to sort errors
const popularity = await fetchPopularityData()
// Load all pages
const allPages = await loadPages()
const dataErrors = []
const data = loadSiteData(dataErrors)[language]
const pages = allPages
.filter((page) => page.languageCode === language)
// Early access pages log to the console, which would show in the report
.filter((page) => !page.relativePath.includes('early-access'))
const pageMap = await loadPageMap(pages)
const redirects = await loadRedirects(pages)
// Try to render each page
const pageErrors = (
await Promise.all(
pages.map(async (page) => {
const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '')
const errorsByVersion = await collectPageErrors(page, {
language,
data,
redirects,
plainPath,
pageMap,
})
if (errorsByVersion) {
return {
path: plainPath,
popularity: popularity[plainPath] || 0,
versions: errorsByVersion,
}
}
})
)
)
.filter(Boolean)
// Sort by popularity desc so the translators know what to focus on first
.sort((a, b) => b.popularity - a.popularity)
// Begin an output report
const report = {
language,
gitref,
datetime: new Date().toJSON(),
totalPages: pages.length,
// totalErrorPages should be around en: 0, es: 1043, ja: 1004, pt: 995, cn: 1063
totalErrorPages: pageErrors.length,
pageErrors,
// To group errors by message instead
groupedPageErrors: groupErrors(pageErrors),
// Filter down properties to make it easier for
// translators to get the clearest information on the error
dataErrors: dataErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
}
return report
}
console.log(JSON.stringify(await createReport(), null, 2))