Delete orphan translation files from translation repos (#47494)

Co-authored-by: Robert Sese <734194+rsese@users.noreply.github.com>
This commit is contained in:
Peter Bengtsson 2023-12-11 13:55:16 -05:00 коммит произвёл GitHub
Родитель d1c102437f
Коммит 65ab895d0e
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 201 добавлений и 0 удалений

116
.github/workflows/delete-orphan-translation-files.yml поставляемый Normal file
Просмотреть файл

@ -0,0 +1,116 @@
name: Delete orphan translation files
# **What it does**:
# Compares content & data files left in each translation that aren't
# in docs-internal. Then creates a PR to delete these files.
# **Why we have it**:
# When Juno dumps to each translation repo it can not account for the
# fact that files in docs-internal get moved or deleted. So the
# sum total of files constantly grows.
# This leads to excess files in each translation repo that are not
# ever used but has to be put into every production build.
# **Who does it impact**: Docs engineering
on:
workflow_dispatch:
schedule:
- cron: '20 16 * * 1' # Run every Monday at 16:20 UTC / 8:20 PST
permissions:
contents: write
jobs:
delete-orphan-translation-files:
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- language: zh
language_dir: translations/zh-cn
language_repo: github/docs-internal.zh-cn
- language: es
language_dir: translations/es-es
language_repo: github/docs-internal.es-es
- language: pt
language_dir: translations/pt-br
language_repo: github/docs-internal.pt-br
- language: ru
language_dir: translations/ru-ru
language_repo: github/docs-internal.ru-ru
- language: ja
language_dir: translations/ja-jp
language_repo: github/docs-internal.ja-jp
- language: fr
language_dir: translations/fr-fr
language_repo: github/docs-internal.fr-fr
- language: de
language_dir: translations/de-de
language_repo: github/docs-internal.de-de
- language: ko
language_dir: translations/ko-kr
language_repo: github/docs-internal.ko-kr
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- name: Checkout the language-specific repo
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
with:
repository: ${{ matrix.language_repo }}
token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }}
path: ${{ matrix.language_dir }}
- uses: ./.github/actions/node-npm-setup
- name: Delete orphan files
run: |
npm run delete-orphan-translation-files -- ${{ matrix.language_dir }}
- name: Debug deleted files
working-directory: ${{ matrix.language_dir }}
run: git status
- name: Git config
working-directory: ${{ matrix.language_dir }}
run: |
git config --global user.name "docs-bot"
git config --global user.email "77750099+docs-bot@users.noreply.github.com"
- name: Git commit and push, create and merge PR
working-directory: ${{ matrix.language_dir }}
env:
# Needed for gh
GH_TOKEN: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }}
run: |
git status
current_timestamp=$(date '+%Y-%m-%d-%H%M%S')
branch_name="delete-orphan-files-$current_timestamp"
git checkout -b "$branch_name"
current_daystamp=$(date '+%Y-%m-%d')
git commit -a -m "Delete orphan files ($current_daystamp)"
git push origin "$branch_name"
# Create PR
echo "Creating pull request..."
gh pr create \
--title "Delete orphan files ($current_daystamp)" \
--body '👋 humans. This PR was generated from docs-internal/.github/workflows/delete-orphan-translation-files.yml.
' \
--repo "${{ matrix.language_repo }}"
echo "Merge created PR..."
gh pr merge --merge --auto --delete-branch "$branch_name"
- uses: ./.github/actions/slack-alert
if: ${{ failure() && github.event_name != 'workflow_dispatch' }}
with:
slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}

Просмотреть файл

@ -23,6 +23,7 @@
"copy-fixture-data": "node src/tests/scripts/copy-fixture-data.js",
"create-translation-health-report": "node src/languages/scripts/create-translation-health-report.js",
"debug": "cross-env NODE_ENV=development ENABLED_LANGUAGES=en nodemon --inspect src/frame/server.js",
"delete-orphan-translation-files": "tsx src/workflows/delete-orphan-translation-files.ts",
"dev": "cross-env npm start",
"find-orphaned-assets": "node src/assets/scripts/find-orphaned-assets.js",
"fixture-dev": "cross-env ROOT=src/fixtures/fixtures npm start",

Просмотреть файл

@ -0,0 +1,84 @@
#!/usr/bin/env node
/**
* This script will delete files from a translation repo of files that
* only exist there and not "here". Here being the docs repo.
* It will only look at *.md files in `content/` and
* only look at *.md and *.yml files in `data/`.
*
* If executed with `--dry-run` it will only print what it would delete.
*
* To avoid deleting too many files at once, which can make PRs too big,
* there's a `--max <number>` options which is defaulted to 100.
*
* To run this locally, check out a translation repo and then run it like this:
*
* git clone git@github.com:github/docs-internal.ja-jp.git /tmp/docs-internal.ja-jp
* npm run delete-orphan-translation-files -- /tmp/docs-internal.ja-jp
*
* Note that it doesn't execute `git rm ...` for you. Just regular
* file deletion. It's up to you now to commit and push.
*/
import fs from 'fs'
import path from 'path'
import { program } from 'commander'
import walkFiles from 'src/workflows/walk-files.js'
import { ROOT } from 'src/frame/lib/constants.js'
program
.description('Delete orphan translation files')
.option('--dry-run', 'Just print what it would delete')
.option('--max <number>', 'Max. number of files to delete', '100')
.argument('<repo-root>', 'path to repo root')
.parse(process.argv)
const opts = program.opts()
type Options = {
dryRun: boolean
max: number
}
main(program.args[0], {
dryRun: Boolean(opts.dryRun),
max: parseInt(opts.max, 10),
})
function main(root: string, options: Options) {
let deleted = 0
let countInSync = 0
let countOrphan = 0
for (const filePath of getContentAndDataFiles(root)) {
const relPath = path.relative(root, filePath)
if (!fs.existsSync(path.join(ROOT, relPath))) {
countOrphan++
if (deleted < options.max) {
if (options.dryRun) {
console.log('DELETE', filePath)
} else {
fs.rmSync(filePath)
console.log('DELETED', filePath)
}
deleted++
if (deleted >= options.max) {
console.log(`Max. number (${options.max}) of files deleted`)
}
}
} else {
countInSync++
}
}
console.log(`In conclusion, deleted ${deleted.toLocaleString()} files.`)
console.log(
`There are ${countInSync.toLocaleString()} files in sync and ${countOrphan.toLocaleString()} orphan files in ${root}`,
)
}
function getContentAndDataFiles(root: string) {
return [
...walkFiles(path.join(root, 'content'), ['.md']),
...walkFiles(path.join(root, 'data'), ['.md', '.yml']),
]
}