Remove unused CrowdIn workflows and scripts (#30234)

This commit is contained in:
Hector Alfaro 2022-09-12 13:59:28 -04:00 коммит произвёл GitHub
Родитель 6e493149ce
Коммит 25b455c35f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
27 изменённых файлов: 22 добавлений и 1772 удалений

6
.github/CODEOWNERS поставляемый
Просмотреть файл

@ -16,10 +16,8 @@ package-lock.json @github/docs-engineering
package.json @github/docs-engineering
# Localization
/.github/actions-scripts/create-translation-batch-pr.js @github/docs-engineering
/.github/workflows/create-translation-batch-pr.yml @github/docs-engineering
/.github/workflows/crowdin.yml @github/docs-engineering
/crowdin*.yml @github/docs-engineering
/.github/actions-scripts/msft-create-translation-batch-pr.js @github/docs-engineering
/.github/workflows/msft-create-translation-batch-pr.yml @github/docs-engineering
/translations/ @Octomerger
# Site Policy

Просмотреть файл

@ -1,142 +0,0 @@
#!/usr/bin/env node
import fs from 'fs'
import github from '@actions/github'
const OPTIONS = Object.fromEntries(
['BASE', 'BODY_FILE', 'GITHUB_TOKEN', 'HEAD', 'LANGUAGE', 'TITLE', 'GITHUB_REPOSITORY'].map(
(envVarName) => {
const envVarValue = process.env[envVarName]
if (!envVarValue) {
throw new Error(`You must supply a ${envVarName} environment variable`)
}
return [envVarName, envVarValue]
}
)
)
if (!process.env.GITHUB_REPOSITORY) {
throw new Error('GITHUB_REPOSITORY environment variable not set')
}
const RETRY_STATUSES = [
422, // Retry the operation if the PR already exists
502, // Retry the operation if the API responds with a `502 Bad Gateway` error.
]
const RETRY_ATTEMPTS = 3
const {
// One of the default environment variables provided by Actions.
GITHUB_REPOSITORY,
// These are passed in from the step in the workflow file.
TITLE,
BASE,
HEAD,
LANGUAGE,
BODY_FILE,
GITHUB_TOKEN,
} = OPTIONS
const [OWNER, REPO] = GITHUB_REPOSITORY.split('/')
const octokit = github.getOctokit(GITHUB_TOKEN)
/**
* @param {object} config Configuration options for finding the PR.
* @returns {Promise<number | undefined>} The PR number.
*/
async function findPullRequestNumber(config) {
// Get a list of PRs and see if one already exists.
const { data: listOfPullRequests } = await octokit.rest.pulls.list({
owner: config.owner,
repo: config.repo,
head: `${config.owner}:${config.head}`,
})
return listOfPullRequests[0]?.number
}
/**
* When this file was first created, we only introduced support for creating a pull request for some translation batch.
* However, some of our first workflow runs failed during the pull request creation due to a timeout error.
* There have been cases where, despite the timeout error, the pull request gets created _anyway_.
* To accommodate this reality, we created this function to look for an existing pull request before a new one is created.
* Although the "find" check is redundant in the first "cycle", it's designed this way to recursively call the function again via its retry mechanism should that be necessary.
*
* @param {object} config Configuration options for creating the pull request.
* @returns {Promise<number>} The PR number.
*/
async function findOrCreatePullRequest(config) {
const found = await findPullRequestNumber(config)
if (found) {
return found
}
try {
const { data: pullRequest } = await octokit.rest.pulls.create({
owner: config.owner,
repo: config.repo,
base: config.base,
head: config.head,
title: config.title,
body: config.body,
draft: false,
})
return pullRequest.number
} catch (error) {
if (!error.response || !config.retryCount) {
throw error
}
if (!config.retryStatuses.includes(error.response.status)) {
throw error
}
console.error(`Error creating pull request: ${error.message}`)
console.warn(`Retrying in 5 seconds...`)
await new Promise((resolve) => setTimeout(resolve, 5000))
config.retryCount -= 1
return findOrCreatePullRequest(config)
}
}
/**
* @param {object} config Configuration options for labeling the PR
* @returns {Promise<undefined>}
*/
// async function labelPullRequest(config) {
// await octokit.rest.issues.update({
// owner: config.owner,
// repo: config.repo,
// issue_number: config.issue_number,
// labels: config.labels,
// })
// }
async function main() {
const options = {
title: TITLE,
base: BASE,
head: HEAD,
body: fs.readFileSync(BODY_FILE, 'utf8'),
labels: ['translation-batch', `translation-batch-${LANGUAGE}`],
owner: OWNER,
repo: REPO,
retryStatuses: RETRY_STATUSES,
retryCount: RETRY_ATTEMPTS,
}
options.issue_number = await findOrCreatePullRequest(options)
const pr = `${GITHUB_REPOSITORY}#${options.issue_number}`
console.log(`Created PR ${pr}`)
// metadata parameters aren't currently available in `github.rest.pulls.create`,
// but they are in `github.rest.issues.update`.
// await labelPullRequest(options)
// console.log(`Updated ${pr} with these labels: ${options.labels.join(', ')}`)
}
main()

Просмотреть файл

@ -1,222 +0,0 @@
name: Create translation Batch Pull Request
# **What it does**:
# - Creates one pull request per language after running a series of automated checks,
# removing translations that are broken in any known way
# **Why we have it**:
# - To deploy translations
# **Who does it impact**: It automates what would otherwise be manual work,
# helping docs engineering focus on higher value work
on:
workflow_dispatch:
# schedule:
# - cron: '02 17 * * *' # Once a day at 17:02 UTC / 9:02 PST
permissions:
contents: write
jobs:
create-translation-batch:
name: Create translation batch
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-latest
# A sync's average run time is ~3.2 hours.
# This sets a maximum execution time of 300 minutes (5 hours) to prevent the workflow from running longer than necessary.
timeout-minutes: 300
strategy:
fail-fast: false
max-parallel: 1
matrix:
include:
- language: pt
crowdin_language: pt-BR
language_dir: translations/pt-BR
- language: es
crowdin_language: es-ES
language_dir: translations/es-ES
- language: cn
crowdin_language: zh-CN
language_dir: translations/zh-CN
- language_dir: translations/ja-JP
crowdin_language: ja
language: ja
steps:
- name: Set branch name
id: set-branch
run: |
echo "::set-output name=BRANCH_NAME::translation-batch-${{ matrix.language }}-$(date +%Y-%m-%d__%H-%M)"
- run: git config --global user.name "docubot"
- run: git config --global user.email "67483024+docubot@users.noreply.github.com"
- name: Checkout
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
with:
fetch-depth: 0
lfs: true
- run: git checkout -b ${{ steps.set-branch.outputs.BRANCH_NAME }}
- name: Remove unwanted git hooks
run: rm .git/hooks/post-checkout
# https://support.crowdin.com/cli-tool/#debian
- name: Download and install the public signing key
run: wget -qO - https://artifacts.crowdin.com/repo/GPG-KEY-crowdin | sudo apt-key add -
- name: Create the crowdin.list file in the /etc/apt/sources.list.d directory
run: |
sudo touch /etc/apt/sources.list.d/crowdin.list
echo "deb https://artifacts.crowdin.com/repo/deb/ /" | sudo tee -a /etc/apt/sources.list.d/crowdin.list
- name: Install the Crowdin CLI Debian Package
run: sudo apt-get update && sudo apt-get install crowdin3
# Delete empty source files that would be rejected by Crowdin breaking the workflow
- name: Remove empty source files
run: |
find content -type f -empty -delete
find data -type f -empty -delete
- name: Upload files to crowdin
run: crowdin upload sources --delete-obsolete --no-progress --no-colors --verbose --debug '--branch=main' '--config=crowdin.yml'
env:
# This is a numeric id, not to be confused with Crowdin API v1 "project identifier" string
# See "API v2" on https://crowdin.com/project/<your-project>/settings#api
CROWDIN_PROJECT_ID: ${{ secrets.CROWDIN_PROJECT_ID }}
# A personal access token, not to be confused with Crowdin API v1 "API key"
# See https://crowdin.com/settings#api-key to generate a token
# This token was created by logging into Crowdin with the octoglot user
CROWDIN_PERSONAL_TOKEN: ${{ secrets.CROWDIN_PERSONAL_TOKEN }}
- name: Remove all language translations
run: |
git rm -rf --quiet ${{ matrix.language_dir }}/content
git rm -rf --quiet ${{ matrix.language_dir }}/data
- name: Download crowdin translations
run: crowdin download --no-progress --no-colors --verbose --debug '--branch=main' '--config=crowdin.yml' --language=${{ matrix.crowdin_language }}
env:
# This is a numeric id, not to be confused with Crowdin API v1 "project identifier" string
# See "API v2" on https://crowdin.com/project/<your-project>/settings#api
CROWDIN_PROJECT_ID: ${{ secrets.CROWDIN_PROJECT_ID }}
# A personal access token, not to be confused with Crowdin API v1 "API key"
# See https://crowdin.com/settings#api-key to generate a token
# This token was created by logging into Crowdin with the octoglot user
CROWDIN_PERSONAL_TOKEN: ${{ secrets.CROWDIN_PERSONAL_TOKEN }}
- name: Commit crowdin sync
run: |
git add ${{ matrix.language_dir }}
git commit -m "Add crowdin translations" || echo "Nothing to commit"
- name: 'Setup node'
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
with:
node-version: '16.15.0'
- run: npm ci
# step 6 in docs-engineering/crowdin.md
- name: Homogenize frontmatter
run: |
node script/i18n/homogenize-frontmatter.js
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/homogenize-frontmatter.js" || echo "Nothing to commit"
# step 7 in docs-engineering/crowdin.md
- name: Fix translation errors
run: |
node script/i18n/fix-translation-errors.js
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/fix-translation-errors.js" || echo "Nothing to commit"
# step 8a in docs-engineering/crowdin.md
- name: Check parsing
run: |
node script/i18n/lint-translation-files.js --check parsing | tee -a /tmp/batch.log | cat
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/lint-translation-files.js --check parsing" || echo "Nothing to commit"
# step 8b in docs-engineering/crowdin.md
- name: Check rendering
run: |
node script/i18n/lint-translation-files.js --check rendering | tee -a /tmp/batch.log | cat
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/lint-translation-files.js --check rendering" || echo "Nothing to commit"
- name: Reset files with broken liquid tags
run: |
node script/i18n/reset-files-with-broken-liquid-tags.js --language=${{ matrix.language }} | tee -a /tmp/batch.log | cat
git add ${{ matrix.language_dir }} && git commit -m "run script/i18n/reset-files-with-broken-liquid-tags.js --language=${{ matrix.language }}" || echo "Nothing to commit"
# step 5 in docs-engineering/crowdin.md using script from docs-internal#22709
- name: Reset known broken files
run: |
node script/i18n/reset-known-broken-translation-files.js | tee -a /tmp/batch.log | cat
git add ${{ matrix.language_dir }} && git commit -m "run script/i18n/reset-known-broken-translation-files.js" || echo "Nothing to commit"
env:
GITHUB_TOKEN: ${{ secrets.DOCUBOT_REPO_PAT }}
- name: Check in CSV report
run: |
mkdir -p translations/log
csvFile=translations/log/${{ matrix.language }}-resets.csv
script/i18n/report-reset-files.js --report-type=csv --language=${{ matrix.language }} --log-file=/tmp/batch.log > $csvFile
git add -f $csvFile && git commit -m "Check in ${{ matrix.language }} CSV report" || echo "Nothing to commit"
- name: Write the reported files that were reset to /tmp/pr-body.txt
run: script/i18n/report-reset-files.js --report-type=pull-request-body --language=${{ matrix.language }} --log-file=/tmp/batch.log > /tmp/pr-body.txt
- name: Push filtered translations
run: git push origin ${{ steps.set-branch.outputs.BRANCH_NAME }}
# - name: Close existing stale batches
# uses: lee-dohm/close-matching-issues@e9e43aad2fa6f06a058cedfd8fb975fd93b56d8f
# with:
# token: ${{ secrets.OCTOMERGER_PAT_WITH_REPO_AND_WORKFLOW_SCOPE }}
# query: 'type:pr label:translation-batch-${{ matrix.language }}'
- name: Create translation batch pull request
env:
GITHUB_TOKEN: ${{ secrets.DOCUBOT_REPO_PAT }}
TITLE: 'New translation batch for ${{ matrix.language }}'
BASE: 'main'
HEAD: ${{ steps.set-branch.outputs.BRANCH_NAME }}
LANGUAGE: ${{ matrix.language }}
BODY_FILE: '/tmp/pr-body.txt'
run: .github/actions-scripts/create-translation-batch-pr.js
# - name: Approve PR
# if: github.ref_name == 'main'
# env:
# GITHUB_TOKEN: ${{ secrets.OCTOMERGER_PAT_WITH_REPO_AND_WORKFLOW_SCOPE }}
# run: gh pr review --approve || echo "Nothing to approve"
# - name: Set auto-merge
# if: github.ref_name == 'main'
# env:
# GITHUB_TOKEN: ${{ secrets.OCTOMERGER_PAT_WITH_REPO_AND_WORKFLOW_SCOPE }}
# run: gh pr merge ${{ steps.set-branch.outputs.BRANCH_NAME }} --auto --squash || echo "Nothing to merge"
# # When the maximum execution time is reached for this job, Actions cancels the workflow run.
# # This emits a notification for the first responder to triage.
# - name: Send Slack notification if workflow is cancelled
# uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
# if: cancelled()
# with:
# channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
# bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
# color: failure
# text: 'The new translation batch for ${{ matrix.language }} was cancelled.'
# # Emit a notification for the first responder to triage if the workflow failed.
# - name: Send Slack notification if workflow failed
# uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
# if: failure()
# with:
# channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
# bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
# color: failure
# text: 'The new translation batch for ${{ matrix.language }} failed.'

54
.github/workflows/crowdin-cleanup.yml поставляемый
Просмотреть файл

@ -1,54 +0,0 @@
name: Crowdin Cleanup
# **What it does**: Homogenizes localized non-translatable frontmatter after every push by the octoglot bot to the translations branch.
# **Why we have it**: So Crowdin doesn't break frontmatter in production.
# **Who does it impact**: Docs engineering and international expansion.
on:
workflow_dispatch:
push:
branches:
- translations
permissions:
contents: write
# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
cancel-in-progress: true
jobs:
homogenize_frontmatter:
name: Homogenize frontmatter
# Only run this after octoglot commits or when a Hubber is running this using the workflow dispatch button.
if: github.repository == 'github/docs-internal' && (github.event.pusher.name == 'octoglot' || github.event_name == 'workflow_dispatch')
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
- name: Setup Node
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
with:
node-version: '16.15.0'
cache: npm
- name: Install dependencies
run: npm ci
- name: Homogenize frontmatter
run: script/i18n/homogenize-frontmatter.js
- name: Check in homogenized files
uses: EndBug/add-and-commit@050a66787244b10a4874a2a5f682130263edc192
with:
# The arguments for the `git add` command
add: 'translations'
# The message for the commit
message: 'Run script/i18n/homogenize-frontmatter.js'
env:
# Disable pre-commit hooks; they don't play nicely with add-and-commit
HUSKY: '0'

Просмотреть файл

@ -1,72 +0,0 @@
name: Copy to REST API issue to docs-content
# **What it does**: Copies an issue in the open source repo to the docs-content repo, comments on and closes the original issue
# **Why we have it**: REST API updates cannot be made in the open source repo. Instead, we copy the issue to an internal issue (we do not transfer so that the issue does not disappear for the contributor) and close the original issue.
# **Who does it impact**: Open source and docs-content maintainers
on:
issues:
types:
- labeled
permissions:
contents: none
jobs:
transfer-issue:
name: Transfer issue
runs-on: ubuntu-latest
if: (github.event.label.name == 'localization ' && github.repository == 'github/docs')
steps:
- name: Check if this run was triggered by a member of the docs team
uses: actions/github-script@2b34a689ec86a68d8ab9478298f91d5401337b7d
id: triggered-by-member
with:
github-token: ${{secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES}}
result-encoding: string
script: |
const triggerer_login = context.payload.sender.login
const teamMembers = await github.request(
`/orgs/github/teams/docs/members?per_page=100`
)
const logins = teamMembers.data.map(member => member.login)
if (logins.includes(triggerer_login)) {
console.log(`This workflow was triggered by ${triggerer_login} (on the docs team).`)
return 'true'
}
console.log(`This workflow was triggered by ${triggerer_login} (not on the docs team), so no action will be taken.`)
return 'false'
- name: Exit if not triggered by a docs team member
if: steps.triggered-by-member.outputs.result == 'false'
run: |
echo Aborting. This workflow must be triggered by a member of the docs team.
exit 1
- name: Create an issue in the localization repo
run: |
new_issue_url="$(gh issue create --title "$ISSUE_TITLE" --body "$ISSUE_BODY" --repo github/localization)"
echo 'NEW_ISSUE='$new_issue_url >> $GITHUB_ENV
env:
GITHUB_TOKEN: ${{secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES}}
ISSUE_TITLE: ${{ github.event.issue.title }}
ISSUE_BODY: ${{ github.event.issue.body }}
- name: Comment on the new issue
run: gh issue comment $NEW_ISSUE --body "This issue was originally opened in the open source repo as $OLD_ISSUE"
env:
GITHUB_TOKEN: ${{secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES}}
NEW_ISSUE: ${{ env.NEW_ISSUE }}
OLD_ISSUE: ${{ github.event.issue.html_url }}
- name: Comment on the old issue
run: gh issue comment $OLD_ISSUE --body "Thanks for opening this issue! Unfortunately, we are not able to accept issues for translated content. Our translation process involves an integration with an external service at crowdin.com, where all translation activity happens. We hope to eventually open up the translation process to the open-source community, but we're not there yet.See https://github.com/github/docs/blob/main/contributing/types-of-contributions.md#earth_asia-translations for more information."
env:
GITHUB_TOKEN: ${{secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES}}
OLD_ISSUE: ${{ github.event.issue.html_url }}
- name: Close the old issue
run: gh issue close $OLD_ISSUE
env:
GITHUB_TOKEN: ${{secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES}}
OLD_ISSUE: ${{ github.event.issue.html_url }}

Просмотреть файл

@ -1,33 +0,0 @@
files:
- source: /content/**/*.md
translation: /translations/%locale%/%original_path%/%original_file_name%
# See lib/page-data.js for a matching list of prefix exceptions
# Try to keep these in sync when editing in either location.
ignore:
- '/content/README.md'
- '/content/early-access'
- '/content/site-policy/site-policy-deprecated'
- '/content/github/index'
- '/content/search'
excluded_target_languages: ['de', 'ko', 'ru']
- source: /data/**/*.yml
translation: /translations/%locale%/%original_path%/%original_file_name%
excluded_target_languages: ['de', 'ko', 'ru']
- source: /data/**/*.md
translation: /translations/%locale%/%original_path%/%original_file_name%
ignore:
- '/data/README.md'
- '/data/reusables/README.md'
- '/data/variables/product.yml'
- '/data/variables/README.md'
- '/data/early-access'
- '/data/graphql'
- '/data/products.yml'
excluded_target_languages: ['de', 'ko', 'ru']
# These end up as env vars used by the GitHub Actions workflow
project_id_env: CROWDIN_PROJECT_ID
api_token_env: CROWDIN_PERSONAL_TOKEN
# https://support.crowdin.com/configuration-file-v3/#saving-directory-structure-on-server
preserve_hierarchy: true

Просмотреть файл

@ -2,7 +2,7 @@
This directory contains data files that are parsed and made available to pages in the `site.data` object.
All YML and Markdown files in this directory are configured to be translated on Crowdin by default.
All YML and Markdown files in this directory are configured to be translated by default.
## Features
@ -10,7 +10,7 @@ Feature files are used for feature-based versioning. See [features/README.md](fe
## Glossaries
We provide a customer-facing glossary on the site. Other glossary files are used by our Crowdin integration. See [glossaries/README.md](glossaries/README.md).
We provide a customer-facing glossary on the site. Other glossary files are used by our translation pipeline. See [glossaries/README.md](glossaries/README.md).
## GraphQL

Просмотреть файл

@ -1,8 +1,5 @@
# Glossaries
The following files comprise our [Crowdin Glossary](https://support.crowdin.com/glossary/):
* `external.yml` contains customer-facing glossary entries.
* Strings within `external.yml` support Liquid conditionals. See [contributing/liquid-helpers.md](/contributing/liquid-helpers.md).
* `internal.yml` contains entries used by translators only. These terms are displayed in the Crowdin UI to give translators additional context about what they're translating, plus a suggested localized string for that term.
* `candidates.yml` contains terms that should potentially be in either the internal or external glossary but haven't been defined yet.
* `candidates.yml` contains terms that should potentially be in the external glossary but haven't been defined yet.

Просмотреть файл

@ -1,703 +0,0 @@
- term: 2-up
description: The default mode of viewing images on GitHub.
- term: alternate object database
description: >-
Via the alternates mechanism, a repository can inherit part of its object
database from another object database, which is called an "alternate".
- term: AMI
description: >-
Amazon Machine Image. A virtual appliance for use with the Amazon Elastic
Compute Cloud.
- term: anonymized image URL
description: >-
An anonymous URL proxy for each image that hides your browser details and
related information from other users.
- term: apex domain
description: A root domain that does not contain a subdomain part.
- term: API
description: >-
Application programing interface. A set of clearly defined methods of
communication between various software components.
- term: API token
description: >-
A token that is used in place of a password in scripts and on the command
line.
- term: app
description: >-
Third-party service that integrates with GitHub. This generally refers to
OAuth applications or GitHub Apps. This is also referred to as an app.
- term: application
description: >-
Third-party service that integrates with GitHub. This generally refers to
OAuth applications or GitHub Apps. This is also referred to as an app.
- term: argument
description: 'In GraphQL, a set of key-value pairs attached to a specific field.'
- term: AsciiDoc
description: >-
A text document format for writing notes, documentation, articles, books,
ebooks, slideshows, web pages, man pages and blogs.
- term: assets
description: 'Individual files such as images, photos, videos, and text files.'
- term: Atom feed
description: A lightweight XML format allowing for easy syndication of web content.
- term: audit log
description: >-
In an organization, the audit log includes details about activities
performed in the organization, such as who performed the action, what the
action was, and when it was performed.
- term: avatar
description: >-
A custom image users upload to GitHub to identify their activity, usually
along with their username. This is also referred to as a profile photo.
- term: AWS
description: Amazon Web Services. A secure cloud services platform.
- term: Azure
description: A Microsoft cloud-computing platform.
- term: Azure DevOps
description: >-
A Microsoft product offering source code hosting, issues, CI/CD
pipelines, and other developer services. The on-premises version
was formerly known as Team Foundation Server. The cloud-hosted
version was formerly known as Visual Studio Team Services.
- term: bare repository
description: >-
A bare repository is normally an appropriately named directory with a .git
suffix that does not have a locally checked-out copy of any of the files
under revision control. That is, all of the Git administrative and control
files that would normally be present in the hidden .git sub-directory are
directly present in the repository.git directory instead, and no other files
are present and checked out. Usually publishers of public repositories make
bare repositories available.
- term: BFG repo cleaner
description: BFG. A third-party tool that cleanses data from your Git repository history.
- term: blob object
description: 'Untyped object, e.g. the contents of a file.'
- term: bot
description: A software application that runs automated tasks.
- term: Bundler
description: A way to manage Ruby gems that an application depends on.
- term: camo
description: >-
An SSL image proxy to prevent mixed content warnings on secure pages served
from GitHub.
- term: chain
description: >-
A list of objects, where each object in the list contains a reference to its
successor (for example, the successor of a commit could be one of its
parents).
- term: CIDR notation
description: A compact representation of an IP address and its associated routing prefix.
- term: CLI
description: Command line interface.
- term: CNAME record
description: >-
Canonical Name record. A type of resource record in the Domain Name System
(DNS) used to specify that a domain name is an alias for another domain (the
'canonical' domain).
- term: conditional request
description: >-
In the REST API, an HTTP method that is only responded to in certain
circumstances.
- term: connection
description: 'In GraphQL, a way to query related objects as part of the same call.'
- term: core Git
description: >-
Fundamental data structures and utilities of Git. Exposes only limited
source code management tools.
- term: CPU
description: Central processing unit.
- term: credential helper
description: A program that stores and fetches Git credentials.
- term: creole
description: >-
A lightweight markup language, aimed at being a common markup language for
wikis, enabling and simplifying the transfer of content between different
wiki engines.
- term: CSV
description: Comma-separated files.
- term: DAG
description: >-
Directed acyclic graph. The commit objects form a directed acyclic graph,
because they have parents (directed), and the graph of commit objects is
acyclic (there is no chain which begins and ends with the same object).
- term: dangling object
description: >-
An unreachable object which is not reachable even from other unreachable
objects; a dangling object has no references to it from any reference or
object in the repository.
- term: data pack
description: >-
Storage and bandwidth package that users can purchase. Each data pack
provides 50 GB of bandwidth and 50 GB for storage.
- term: DELETE
description: A type of method in the REST API
- term: DHCP
description: >-
Dynamic Host Configuration Protocol (DHCP). A client/server protocol that
automatically provides an Internet Protocol (IP) host with its IP address
and other related configuration information such as the subnet mask and
default gateway.
- term: directive
description: >-
In GraphQL, a way to affect the execution of a query in any way the server
desires.
- term: directory
description: The list you get with the command "ls".
- term: disaster recovery
description: >-
Also known as DR. Tools and processes that recover technology infrastructure
and systems following a human or natural disaster.
- term: DNS provider
description: >-
A company that allows users to buy and register a unique domain name and
connect that name to an IP (Internet Protocol) address by pointing your
domain name to an IP address or a different domain name.
- term: DSA
description: Digital Signature Algorithm. A processing standard for digital signatures.
- term: DSA key
description: Public and private keys used in DSA.
- term: Early Access Program
description: >-
A GitHub program that individuals and organizations enter into to receive
pre-released features.
- term: EBS
description: >-
Amazon Elastic Block Store. Provides persistent block storage volumes for
use with Amazon EC2 instances in the AWS Cloud.
- term: EC2
description: >-
Amazon Elastic Compute Cloud. A web service that provides secure, resizable
compute capacity in the cloud.
- term: edge
description: 'In GraphQL, connections between nodes.'
- term: EIP
description: Elastic IP. A static IPv4 address designed for dynamic cloud computing.
- term: ElasticSearch
description: A search engine based on Lucene.
- term: evil merge
description: >-
An evil merge is a merge that introduces changes that do not appear in any
parent.
- term: exclude
description: >-
After a path matches any non-exclude pathspec, it will be run through all
exclude pathspec (magic signature: ! or its synonym ^). If it matches, the
path is ignored. When there is no non-exclude pathspec, the exclusion is
applied to the result set as if invoked without any pathspec.
- term: FIDO U2F
description: >-
An open authentication standard that strengthens and simplifies two-factor
authentication using specialized USB or NFC devices based on similar
security technology found in smart cards.
- term: field
description: 'In GraphQL, a unit of data you can retrieve from an object.'
- term: file system
description: >-
Linus Torvalds originally designed Git to be a user space file system, i.e.
the infrastructure to hold files and directories. That ensured the
efficiency and speed of Git.
- term: flame war
description: A heated and abusive discussion online between users.
- term: fragment
description: 'In GraphQL, reusable units that let you construct sets of fields.'
- term: GCE
description: Google Compute Engine.
- term: gem
description: A command line tool that can install libraries and manage RubyGems.
- term: Gemfile
description: A format for describing gem dependencies for Ruby programs.
- term: GET
description: A type of method in the REST API
- term: GeoJSON
description: A format for encoding a variety of geographic data structures.
- term: GitHub Marketplace Developer Agreement
description: An agreement users sign when using GitHub Marketplace.
- term: GPG
description: >-
GNU Privacy Guard. Encryption software that you can use to encrypt (and
decrypt) files that contain sensitive data
- term: GPG key
description: An encryption key used with GPG.
- term: hash
description: synonym for object name
- term: head
description: >-
A named reference to the commit at the tip of a branch. Heads are stored in
a file in $GIT_DIR/refs/heads/ directory, except when using packed refs.
- term: HEAD
description: A type of method in the REST API
- term: headers
description: >-
In the REST API, a required component of the message that defines the
metadata of the transaction.
- term: health check
description: >-
A way to allow a load balancer to stop sending traffic to a node that is not
responding if a pre-configured check fails on that node.
- term: HTTP verb
description: An HTTP method.
- term: Hyper-V
description: >-
A Microsoft product that creates virtual machines on x86-64 systems running
Windows.
- term: hypermedia
description: 'In the REST API, links from one resource state to another.'
- term: icase
description: Case insensitive match.
- term: implementation
description: 'In GraphQL, how an object inherits from an interface.'
- term: index
description: >-
A collection of files with stat information, whose contents are stored as
objects. The index is a stored version of your working tree. Truth be told,
it can also contain a second, and even a third version of a working tree,
which are used when merging.
- term: index entry
description: >-
The information regarding a particular file, stored in the index. An index
entry can be unmerged, if a merge was started, but not yet finished (i.e. if
the index contains multiple versions of that file).
- term: introspection
description: >-
Also referred to as "introspective." A way to ask a GraphQL schema for
information about what queries it supports.
- term: iPython notebook
description: >-
A web-based application that captures the whole computation process:
developing, documenting, and executing code, as well as communicating the
results.
- term: JIRA
description: An Atlassian product that tracks issues.
- term: Jupyter notebook
description: Notebook that contains both code and rich text elements.
- term: kernel
description: A computer program that is the core of a computer's operating system.
- term: kramdown
description: Jekyll's 3.0.0 default Markdown processor.
- term: LDAP
description: Lightweight Directory Access Protocol.
- term: linter
description: A program that verifies code quality.
- term: Liquid
description: A templating language that's used to load dynamic content.
- term: load balancer
description: >-
A device that acts as a reverse proxy and distributes network or application
traffic across a number of servers.
- term: media type
description: A two-part identifier for file formats and format contents.
- term: MediaWiki
description: >-
A free and open-source wiki software written in the PHP programming language
that stores the contents into a database.
- term: Mercurial
description: 'A free, distributed source control management tool.'
- term: metadata
description: A set of data that describes and gives information about other data.
- term: MIME-type
description: >-
Multipurpose Internet Mail Extensions. A way of identifying files according
to their nature and format.
- term: mutations
description: >-
In GraphQL, a way to define GraphQL operations that change data on the
server.
- term: nameserver
description: >-
A server on the internet specialized in handling queries regarding the
location of a domain name's various services.
- term: NFC
description: >-
Near Field Communication. A set of communication protocols that enable two
electronic devices, one of which is usually a portable device such as a
smartphone, to establish communication by bringing them within a certain
range of each other.
- term: node
description: >-
An active electronic device that is attached to a network, and is capable of
creating, receiving, or transmitting information over a communications
channel.
- term: node
description: 'In GraphQL, a generic term for an object.'
- term: NTP
description: Network Time Protocol.
- term: object
description: >-
The unit of storage in Git. It is uniquely identified by the SHA-1 of its
contents. Consequently, an object can not be changed.
- term: object database
description: >-
Stores a set of "objects", and an individual object is identified by its
object name. The objects usually live in $GIT_DIR/objects/.
- term: object identifier
description: synonym for object name
- term: object name
description: >-
The unique identifier of an object. The object name is usually represented
by a 40 character hexadecimal string. Also colloquially called SHA-1.
- term: object type
description: >-
One of the identifiers "commit", "tree", "tag" or "blob" describing the type
of an object.
- term: octopus
description: To merge more than two branches.
- term: onion skin
description: >-
A way to view images on GitHub by decreasing the opacity of an overlaid
replacement image.
- term: OOM
description: Out of memory.
- term: Open Stack
description: A software platform for cloud computing.
- term: OpenSSH
description: >-
A suite of security-related network-level utilities based on the Secure
Shell (SSH) protocol.
- term: ordered list
description: A numbered list.
- term: Org
description: >-
A plain-text system for keeping notes, maintaining TODO lists, planning
projects, and authoring documents.
- term: pack
description: >-
A set of objects which have been compressed into one file (to save space or
to transmit them efficiently).
- term: pack index
description: >-
The list of identifiers, and other information, of the objects in a pack, to
assist in efficiently accessing the contents of a pack. Pathspecs are used
on the command line of "git ls-files", "git ls-tree", "git add", "git grep",
"git diff", "git checkout", and many other commands to limit the scope of
operations to some subset of the tree or worktree.
- term: parameter
description: >-
In the REST API, data that is either sent in the request or received in the
response as part of an input or output parameter.
- term: parent
description: >-
A commit object contains a (possibly empty) list of the logical
predecessor(s) in the line of development, i.e. its parents.
- term: passphrase
description: >-
A sequence of words or other text used to control access to a computer
system, program, or data.
- term: PATCH
description: A type of method in the REST API
- term: pathspec
description: Pattern used to limit paths in Git commands.
- term: PEM
description: Privacy Enhanced Mail
- term: persistent identifier
description: >-
Also known as Digital Object Identifiers (DOIs). Globally unique numeric
and/or character strings that reference a digital object. Persistent
identifiers can be actionable in that they enable a user to access the
digital resource via a persistent link.
- term: pickaxe
description: >-
An option to the diffcore routines that help select changes that add or
delete a given text string. With the --pickaxe-all option, it can be used to
view the full changeset that introduced or removed, say, a particular line
of text.
- term: plugin
description: A software component that adds a specific feature to an existing program.
- term: Pod
description: Plain Old Documentation. A mark-up language used by perl developers.
- term: pointer file
description: A reference that points to an actual file.
- term: port
description: An endpoint of communication in an operating system.
- term: priority question
description: >-
Questions for GitHub support from organizations on the Business plan.
Questions must meet the criteria set forth by GitHub to qualify as a
priority question.
- term: priority response
description: >-
Answers from GitHub support for priority questions from organizations on the
Business plan.
- term: polling
description: >-
Regular automatic checks of other programs or devices by one progam or
device to see what state they are in.
- term: POST
description: A type of method in the REST API
- term: Pre-release Program
description: >-
GitHub program that allows users to apply new features and functionality
before they're officially launched.
- term: PUT
description: A type of method in the REST API
- term: QCOW2
description: An image format.
- term: QR code
description: >-
Quick Response code. A barcode is a machine-readable optical label that
contains information about the item to which it is attached.
- term: queries
description: 'In GraphQL, a way to ask for specific fields on objects.'
- term: rate limit
description: The process by which an API rejects requests.
- term: RDoc
description: An embedded documentation generator for the Ruby programming language.
- term: reachable
description: >-
All of the ancestors of a given commit are said to be "reachable" from that
commit. More generally, one object is reachable from another if we can reach
the one from the other by a chain that follows tags to whatever they tag,
commits to their parents or trees, and trees to the trees or blobs that they
contain.
- term: ref
description: >-
A name that begins with refs/ (e.g. refs/heads/master) that points to an
object name or another ref (the latter is called a symbolic ref).
- term: reflog
description: A reflog shows the local "history" of a ref.
- term: refspec
description: >-
A "refspec" is used by fetch and push to describe the mapping between remote
ref and local ref.
- term: relative link
description: A link that is relative to the current file.
- term: remote-tracking branch
description: A ref that is used to follow changes from another repository.
- term: REST API
description: >-
An API that defines a set of functions so developers can perform requests
and receive responses via HTTP.
- term: reStructured text
description: A what-you-see-is-what-you-get plaintext markup syntax and parser system.
- term: revision
description: Synonym for commit.
- term: rewind
description: >-
To throw away part of the development, i.e. to assign the head to an earlier
revision.
- term: root endpoint
description: 'In the REST API, the directory that all endpoints are under.'
- term: RSA
description: Algorithm used to encrypt user data using a public key and a private key.
- term: RSA key
description: A private key based on the RSA algorithm.
- term: SAML
description: >-
Security Assertion Markup Language. An XML-based, open-standard data format
for exchanging authentication and authorization data between parties, in
particular, between an identity provider and a service provider.
- term: SAN
description: Subject Alternative Name
- term: Sass
description: A CSS extension language.
- term: schema
description: Metadata that tells us how our data is structured.
- term: SCIM
description: >-
System for Cross-domain Identity Management. An open standard for automating
the exchange of user identity information between identity domains.
- term: SCM
description: Source code management (tool).
- term: SCSS
description: A CSS extension language.
- term: service account
description: >-
A special user account that an application or service uses to interact with
the operating system.
- term: SHA-1
description: >-
"Secure Hash Algorithm 1"; a cryptographic hash function. In the context of
Git used as a synonym for object name.
- term: shell
description: A user interface for access to an operating system's services.
- term: shallow repository
description: >-
A shallow repository has an incomplete history some of whose commits have
parents cauterized away (in other words, Git is told to pretend that these
commits do not have the parents, even though they are recorded in the commit
object). This is sometimes useful when you are interested only in the recent
history of a project even though the real history recorded in the upstream
is much larger. A shallow repository is created by giving the --depth option
to git-clone(1), and its history can be later deepened with git-fetch(1).
- term: SMS
description: A text message.
- term: SMTP
description: Simple Mail Transfer Protocol. A standard for email transmission.
- term: SNMP
description: Simple Network Management Protocol. A protocol for network management.
- term: spam
description: Unsolicited communications from another user.
- term: SSD
description: Solid-state drive.
- term: SSH
description: >-
Secure Shell (SSH) is a cryptographic network protocol for operating network
services securely over an unsecured network.
- term: ssh-agent
description: A program to hold private keys used for public key authentication.
- term: SSH Key
description: >-
SSH keys are a way to identify yourself to an online server, using an
encrypted message. It's as if your computer has its own unique password to
another service. GitHub uses SSH keys to securely transfer information from
GitHub.com to your computer.
- term: SSH key fingerprint
description: >-
Identifies and verifies the host you're connecting to and is based on the
host's Public key.
- term: SSL
description: Secure Sockets Layer.
- term: static site generator
description: >-
A program that generates an HTML-only website using raw data (such as
Markdown files) and templates.
- term: String
description: An object type that denotes plain text
- term: STL file
description: >-
STL (STereoLithography) is a file format native to the stereolithography CAD
software created by 3D Systems.
- term: subdomain
description: >-
A customizable and optional part of a domain name located before the root or
apex domain that looks like a domain prefix.
- term: submodule
description: >-
A repository that holds the history of a separate project inside another
repository (the latter of which is called superproject).
- term: subproject
description: >-
A project that's developed and managed somewhere outside of your main
project.
- term: Subversion
description: An open source version control system.
- term: sudo mode
description: >-
A way for users to run programs with the security privileges of another
user. Users still provide their own password and are authenticated.
- term: superproject
description: >-
A repository that references repositories of other projects in its working
tree as submodules. The superproject knows about the names of (but does not
hold copies of) commit objects of the contained submodules.
- term: support bundle
description: >-
A gzip-compressed tar archive that contains important logs from your GitHub
Enterprise instance.
- term: swipe
description: A way to view portions of your GitHub image side by side.
- term: symlink
description: >-
A file that contains a reference to another file or directory in the form of
an absolute or relative path and that affects pathname resolution.
- term: symref
description: >-
Symbolic reference: instead of containing the SHA-1 id itself, it is of the
format ref: refs/some/thing and when referenced, it recursively dereferences
to this reference.
- term: tag
description: >-
A ref under refs/tags/ namespace that points to an object of an arbitrary
type (typically a tag points to either a tag or a commit object). A tag is
most typically used to mark a particular point in the commit ancestry chain.
- term: tag object
description: >-
An object containing a ref pointing to another object, which can contain a
message just like a commit object. It can also contain a (PGP) signature, in
which case it is called a "signed tag object".
- term: Team Foundation Server
description: >-
The former name of a Microsoft product that provides source code management and other team
services. Now known as Azure DevOps Server.
- term: Textile
description: >-
A lightweight markup language that uses a text formatting syntax to convert
plain text into structured HTML markup.
- term: TLS
description: Transport Layer Security.
- term: token
description: >-
Can be used in place of a password. Tokens can be personal access tokens,
OAuth tokens, or API tokens.
- term: TopoJSON
description: An extension of GeoJSON that encodes topology.
- term: TOTP application
description: >-
Time-based One-Time Password. This type of application automatically
generates an authentication code that changes after a certain period of
time.
- term: tree
description: >-
Either a working tree, or a tree object together with the dependent blob and
tree objects (i.e. a stored representation of a working tree).
- term: tree object
description: >-
An object containing a list of file names and modes along with refs to the
associated blob and/or tree objects. A tree is equivalent to a directory.
- term: TSV
description: Tab-separated files.
- term: two-factor authentication
description: >-
Also called 2FA. An extra layer of security that not only requires a
standard log in procedure with a username and password but also an
authentication code that's generated by an application on the user's
smartphone or sent as a text message (SMS).
- term: UFW
description: Ubuntu's default firewall configuration tool.
- term: unmerged index
description: An index which contains unmerged index entries.
- term: unordered list
description: A bulleted list.
- term: unreachable object
description: 'An object which is not reachable from a branch, tag, or any other reference.'
- term: URI
description: >-
Uniform Resource Identifier. A string of characters used to identify a
resource.
- term: UTF-8
description: A character encoding capable of encoding all possible Unicode code points.
- term: variable
description: 'In GraphQL, a way to make queries more dynamic and powerful.'
- term: VAT ID
description: A value added tax identification number used for tax purposes in the EU.
- term: verified email address
description: >-
An email address tied to a personal account that the user has verified is
valid with a security confirmation process.
- term: virtual machine
description: >-
An application environment that is installed on software and imitates
dedicated hardware. Also called a VM.
- term: VPC
description: Virtual private cloud. A virtual network dedicated to your AWS account.
- term: VPN
description: Virtual private network.
- term: VMware
description: >-
A Dell product that provides cloud computing and platform virtualization
software and services.
- term: allowlisted
description: >-
A list or register of entities that are being provided a particular
privilege, service, mobility, access or recognition. Entities on the list
will be accepted, approved and/or recognized.
- term: working directory
description: The directory of files you're currently working in.
- term: working tree
description: >-
The tree of actual checked out files. The working tree normally contains the
contents of the HEAD commits tree, plus any local changes that you have
made but not yet committed.
- term: WYSIWYG
description: >-
What You See Is What You Get. An acronym meaning the text on the screen
looks exactly as it will when it's rendered.
- term: XenServer
description: A virtualization platform.
- term: YAML
description: >-
A human-readable data serialization language that is commonly used for
configuration files.
- term: Continuous Integration
description: Also abbreviated as CI
- term: Continuous Delivery
description: Also abbreviated as CD

Просмотреть файл

@ -28,7 +28,7 @@ export default function getEnglishHeadings(page, context) {
)
if (!englishPage) return
// FIX there may be bugs if English headings are updated before Crowdin syncs up :/
// FIX there may be bugs if English headings are updated before translations sync up :/
const englishHeadings = getHeadings(englishPage.markdown)
if (!englishHeadings.length) return

Просмотреть файл

@ -1,87 +0,0 @@
import walk from 'walk-sync'
import { Tokenizer } from 'liquidjs'
import { readFileSync } from 'fs'
import gitDiff from 'git-diff'
import _ from 'lodash'
function getGitDiff(a, b) {
return gitDiff(a, b, { flags: '--ignore-all-space' })
}
function getMissingLines(diff) {
return diff
.split('\n')
.filter((line) => line.startsWith('-'))
.map((line) => line.replace('-', ''))
}
function getExceedingLines(diff) {
return diff
.split('\n')
.filter((line) => line.startsWith('+'))
.map((line) => line.replace('+', ''))
}
export function languageFiles(language, folder = 'content') {
const englishFiles = walk(folder, { directories: false })
const languageFiles = walk(`${language.dir}/${folder}`, { directories: false })
return _.intersection(englishFiles, languageFiles).map((file) => `${folder}/${file}`)
}
export function compareLiquidTags(file, language) {
const translation = `${language.dir}/${file}`
const sourceTokens = getTokensFromFile(file).rejectType('html')
const otherFileTokens = getTokensFromFile(translation).rejectType('html')
const diff = sourceTokens.diff(otherFileTokens)
return {
file,
translation,
diff,
}
}
function getTokens(contents) {
const tokenizer = new Tokenizer(contents)
return new Tokens(...tokenizer.readTopLevelTokens())
}
export function getTokensFromFile(filePath) {
const contents = readFileSync(filePath, 'utf8')
try {
return new Tokens(...getTokens(contents))
} catch (e) {
const error = new Error(`Error parsing ${filePath}: ${e.message}`)
error.filePath = filePath
throw error
}
}
export class Tokens extends Array {
rejectType(tagType) {
return this.filter(
(token) => token.constructor.name.toUpperCase() !== `${tagType}Token`.toUpperCase()
)
}
onlyText() {
return this.map((token) => token.getText())
}
diff(otherTokens) {
const a = this.onlyText()
const b = otherTokens.onlyText()
const diff = getGitDiff(a.join('\n'), b.join('\n'))
if (!diff) {
return { count: 0, missing: [], exceeding: [], output: '' }
}
const missing = getMissingLines(diff)
const exceeding = getExceedingLines(diff)
const count = exceeding.length + missing.length
return { count, missing, exceeding, output: diff }
}
}

Просмотреть файл

@ -15,7 +15,6 @@ const enterpriseServerVersions = versions.filter((v) => v.startsWith('enterprise
const renderOpts = { textOnly: true, encodeEntities: true }
// These are the exceptions to the rule.
// These URI prefixes should match what you'll find in crowdin.yml.
// If a URI starts with one of these prefixes, it basically means we don't
// bother to "backfill" a translation in its spot.
// For example, `/en/github/site-policy-deprecated/foo` works

Просмотреть файл

@ -39,13 +39,6 @@ Usage: script/anonymize-branch.js <new-commit-message> [base-branch] Example: sc
---
### [`backfill-missing-localizations.js`](backfill-missing-localizations.js)
This script copies any English files that are missing from the translations directory into the translations directory. We only need to run this if problems occur with Crowdin's automatic sync.
---
### [`check-english-links.js`](check-english-links.js)
This script runs once per day via a scheduled GitHub Action to check all links in English content, not including deprecated Enterprise Server content. It opens an issue if it finds broken links. To exclude a link path, add it to `lib/excluded-links.js`.
@ -370,8 +363,6 @@ Run this script to remove reusables and image files that exist in the repo but a
This is a convenience script for replacing the contents of translated files with the English content from their corresponding source file.
It's intended to be a workaround to temporarily bypass Crowdin parser bugs while we wait for translators to fix them.
Usage: script/i18n/reset-translated-file.js <filename>
Examples:

Просмотреть файл

@ -1,40 +0,0 @@
#!/usr/bin/env node
// [start-readme]
//
// This script copies any English files that are missing from the translations directory into the translations directory.
// We only need to run this if problems occur with Crowdin's automatic sync.
//
// [end-readme]
import { fileURLToPath } from 'url'
import path from 'path'
import fs from 'fs'
import walk from 'walk-sync'
import mkdirp from 'mkdirp'
import languages from '../lib/languages.js'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const dirs = ['content', 'data']
for (const dir of dirs) {
const englishPath = path.join(__dirname, `../${dir}`)
const filenames = walk(englishPath).filter((filename) => {
return (
(filename.endsWith('.yml') || filename.endsWith('.md')) && !filename.endsWith('README.md')
)
})
for (const filename of filenames) {
for (const language of Object.values(languages)) {
if (language.code === 'en') continue
const fullPath = path.join(__dirname, '..', language.dir, dir, filename)
if (!fs.existsSync(fullPath)) {
console.log('missing', fullPath)
const englishFullPath = path.join(__dirname, '..', dir, filename)
await mkdirp(path.dirname(fullPath))
fs.writeFileSync(fullPath, fs.readFileSync(englishFullPath))
}
}
}
}

Просмотреть файл

@ -4,4 +4,4 @@ This directory stores scripts that modify content and/or data files. Because
writers are updating content all the time, scripts in here require more
cross-team coordination and planning before they are run. Make sure to consider
whether a script added here also needs to be run on translation files or if we
can wait for the changes to come in organically via Crowdin.
can wait for the changes to come in through out translation automation.

Просмотреть файл

@ -2,7 +2,7 @@
// [start-readme]
//
// Use this script as part of the Crowdin merge process to output a list of either parsing
// Use this script as part of the translation merge process to output a list of either parsing
// or rendering errors in translated files and run script/i18n/reset-translated-file.js on them.
//
// [end-readme]

Просмотреть файл

@ -1,35 +0,0 @@
#!/usr/bin/env node
import { program } from 'commander'
import { compareLiquidTags } from '../../lib/liquid-tags/tokens.js'
import languages from '../../lib/languages.js'
program
.argument('<files...>', 'The file name(s) without the language dir. \nI.E. content/foo.md')
.description('Shows the differences of liquid tags between two files')
.requiredOption(
'-l, --language <language>',
`Choose one of these languages to compare: ${Object.keys(languages).filter((l) => l !== 'en')}`
)
.parse(process.argv)
function reportFileDifference(diff) {
console.log(`File: ${diff.file}`)
console.log(`Translation: ${diff.translation}`)
console.log(`Differences:`)
console.log(diff.diff.output)
}
function main() {
const files = program.args
const options = program.opts()
files.forEach((file) => {
const language = languages[options.language]
if (!language) throw new Error(`${options.language} is not a recognized language`)
const diff = compareLiquidTags(file, language)
reportFileDifference(diff)
})
}
main()

Просмотреть файл

@ -1,86 +0,0 @@
#!/usr/bin/env node
import { program } from 'commander'
import fs from 'fs'
import languages from '../../lib/languages.js'
const defaultWorkflowUrl = [
process.env.GITHUB_SERVER_URL,
process.env.GITHUB_REPOSITORY,
'actions/runs',
process.env.GITHUB_RUN_ID,
].join('/')
const reportTypes = {
'pull-request-body': pullRequestBodyReport,
csv: csvReport,
}
program
.description('Reads a translation batch log and generates a report')
.requiredOption('--language <language>', 'The language to compare')
.requiredOption('--log-file <log-file>', 'The batch log file')
.requiredOption(
'--report-type <report-type>',
'The batch log file, I.E: ' + Object.keys(reportTypes).join(', ')
)
.option('--workflow-url <workflow-url>', 'The workflow url', defaultWorkflowUrl)
.parse(process.argv)
const options = program.opts()
const language = languages[options.language]
const { logFile, workflowUrl, reportType } = options
if (!Object.keys(reportTypes).includes(reportType)) {
throw new Error(`Invalid report type: ${reportType}`)
}
const logFileContents = fs.readFileSync(logFile, 'utf8')
const revertLines = logFileContents
.split('\n')
.filter((line) => line.match(/^-> reverted to English/))
.filter((line) => line.match(language.dir))
const reportEntries = revertLines.sort().map((line) => {
const [, file, reason] = line.match(/^-> reverted to English: (.*) Reason: (.*)$/)
return { file, reason }
})
function pullRequestBodyReport() {
const body = [
`New translation batch for ${language.name}. Product of [this workflow](${workflowUrl}).`,
'\n',
`## ${reportEntries.length} files reverted.`,
]
const filesByReason = {}
reportEntries.forEach(({ file, reason }) => {
filesByReason[reason] = filesByReason[reason] || []
filesByReason[reason].push(file)
})
Object.keys(filesByReason)
.sort()
.forEach((reason) => {
const files = filesByReason[reason]
body.push(`\n### ${reason}`)
body.push(`\n<details><summary>${files.length} files:</summary>\n`)
const checkBoxes = files.map((file) => `- [ ] ${file}`)
body.push(checkBoxes)
body.push('\n</details>')
})
return body.flat().join('\n')
}
function csvReport() {
const lines = reportEntries.map(({ file, reason }) => {
return [file, reason].join(',')
})
return ['file,reason', lines].flat().join('\n')
}
console.log(reportTypes[reportType]())

Просмотреть файл

@ -1,57 +0,0 @@
#!/usr/bin/env node
import { program } from 'commander'
import { execSync } from 'child_process'
import { languageFiles, compareLiquidTags } from '../../lib/liquid-tags/tokens.js'
import languages from '../../lib/languages.js'
program
.description('show-liquid-tags-diff')
.requiredOption('-l, --language <language>', 'The language to compare')
.option('-d, --dry-run', 'Just pretend to reset files')
.parse(process.argv)
function resetFiles(files) {
console.log(`Reseting ${files.length} files:`)
const dryRun = program.opts().dryRun ? '--dry-run' : ''
files.forEach((file) => {
execSync(
`script/i18n/reset-translated-file.js ${file} --reason="broken liquid tags" ${dryRun}`,
{ stdio: 'inherit' }
)
})
}
async function main() {
const options = program.opts()
const language = languages[options.language]
if (!language) {
throw new Error(`Language ${options.language} not found`)
}
const files = [languageFiles(language, 'content'), languageFiles(language, 'data')].flat()
const brokenFiles = []
files.forEach((file) => {
try {
// it throws error if the the syntax is invalid
const comparison = compareLiquidTags(file, language)
if (comparison.diff.count === 0) {
return
}
brokenFiles.push(comparison.translation)
} catch (e) {
brokenFiles.push(e.filePath)
}
})
await resetFiles(brokenFiles)
}
main()

Просмотреть файл

@ -1,67 +0,0 @@
#!/usr/bin/env node
// [start-readme]
//
// Use this script as part of the Crowdin merge process to get the list of known broken
// files and run script/i18n/reset-translated-file.js on them.
//
// [end-readme]
import dotenv from 'dotenv'
import Github from '../helpers/github.js'
import { execSync } from 'child_process'
import uniq from 'lodash/uniq.js'
import { existsSync } from 'fs'
dotenv.config()
const github = Github()
// Check for required PAT
if (!process.env.GITHUB_TOKEN) {
console.error('Error! You must have a GITHUB_TOKEN set in an .env file to run this script.')
process.exit(1)
}
main()
async function main() {
// Get body text of OP from https://github.com/github/localization-support/issues/489.
const {
data: { body },
} = await github.issues.get({
owner: 'github',
repo: 'localization-support',
issue_number: '489',
})
// Get the list of broken files from the body text.
const brokenFiles = body.replace(/^[\s\S]*?## List of Broken Translations/m, '').trim()
// De-duplicate the list of broken files and filter out any that don't exist in the repo.
const brokenFilesArray = uniq(
brokenFiles
.split('\n')
.filter((line) => !line.toLowerCase().startsWith('- [x]'))
.map((line) => line.replace('- [ ] ', '').trim())
.filter((line) => existsSync(line))
)
// Revert each of the broken files.
// This is done sequentially to ensure only one Git operation is running at any given time.
brokenFilesArray.forEach((file) => {
console.log(`Resetting ${file}`)
execSync(
`script/i18n/reset-translated-file.js ${file} --reason="Listed in localization-support#489"`,
{ stdio: 'inherit' }
)
})
// Print a message with next steps.
console.log(`
Success!
Verify changes with git status and then run:
git commit --no-verify -m "Reset broken translated files to English"
`)
}

Просмотреть файл

@ -5,9 +5,6 @@
// This is a convenience script for replacing the contents of translated
// files with the English content from their corresponding source file.
//
// It's intended to be a workaround to temporarily bypass Crowdin parser bugs
// while we wait for translators to fix them.
//
// Usage:
// script/i18n/reset-translated-file.js <filename>
//

Просмотреть файл

@ -28,9 +28,9 @@ async function main() {
const contextByLanguage = {}
for (const lang in languages) {
const langObj = languages[lang]
const [crowdinLangCode] = langObj.dir === '' ? 'en' : langObj.dir.split('/').slice(1)
if (!crowdinLangCode) continue
contextByLanguage[crowdinLangCode] = {
const [langCode] = langObj.dir === '' ? 'en' : langObj.dir.split('/').slice(1)
if (!langCode) continue
contextByLanguage[langCode] = {
site: siteData[langObj.code].site,
currentLanguage: langObj.code,
currentVersion: 'free-pro-team@latest',

Просмотреть файл

@ -1,121 +0,0 @@
#!/usr/bin/env node
// [start-readme]
//
// Use this script as the last step of the Crowdin merge process to:
// 1. Add newly found broken translated files to the localization-support issue OP.
// 2. Add a comment on the issue with more details.
//
// [end-readme]
import dotenv from 'dotenv'
import Github from '../helpers/github.js'
import { execSync } from 'child_process'
dotenv.config()
const github = Github()
// Check for required PAT
if (!process.env.GITHUB_TOKEN) {
console.error('Error! You must have a GITHUB_TOKEN set in an .env file to run this script.')
process.exit(1)
}
const fixableErrorsLog = '~/docs-translation-errors-fixable.txt'
const parsingErrorsLog = '~/docs-translation-parsing-error.txt'
const renderingErrorsLog = '~/docs-translation-rendering-error.txt'
// Get just the fixable files:
const fixable = execSync(
`cat ${fixableErrorsLog} | egrep "^translations/.*/(.+.md|.+.yml)$" | sed -e 's/^/- [ ] /' | uniq`
).toString()
// Get a list of files to be added to the body of the issue
const filesToAdd = execSync(
`cat ${parsingErrorsLog} ${renderingErrorsLog} | egrep "^translations/.*/(.+.md|.+.yml)$" | sed -e 's/^/- [ ] /' | uniq`
).toString()
// Cat the three error logs together
const allErrors = execSync('cat ~/docs-*').toString()
const comment = `
Did a fresh merge today!
<details>
<summary>In addition to the files in the PR body, these files also have errors, but can be fixed programmatically:</summary>
${fixable}
</details>
<details>
<summary>Here are the <b>new</b> errors:</summary>
\`\`\`
${allErrors}
\`\`\`
</details>
`
const owner = 'github'
const repo = 'localization-support'
const issueNumber = '489'
main()
async function main() {
await updateIssueBody()
await addNewComment()
console.log('Success! You can safely delete the temporary logfiles under ~/docs-*.')
}
async function updateIssueBody() {
// Get current body text of OP from https://github.com/github/localization-support/issues/489.
const {
data: { body },
} = await github.issues.get({
owner,
repo,
issue_number: issueNumber,
})
// Update the body with the list of newly broken files
const newBody = body + '\n' + filesToAdd
// Update the issue
try {
await github.issues.update({
owner,
repo,
issue_number: issueNumber,
body: newBody,
})
console.log(
'Added newly found broken files to OP of https://github.com/github/localization-support/issues/489!\n'
)
} catch (err) {
console.error(err)
}
}
async function addNewComment() {
try {
await github.issues.createComment({
owner,
repo,
issue_number: issueNumber,
body: comment,
})
console.log(
'Added comment to the end of https://github.com/github/localization-support/issues/489!\n'
)
} catch (err) {
console.error(err)
}
}

Просмотреть файл

@ -28,7 +28,7 @@ if (localizedFilenames.length) {
)
console.table(localizedFilenames.join('\n'))
console.error(
'The content in this directory is managed by our Crowdin integration and should not be edited directly in the repo.'
'The content in this directory is managed by our translation pipeline and should not be edited directly in the repo.'
)
console.error(
'For more information on how the localization process works, see translations/README.md'

Просмотреть файл

@ -5,10 +5,9 @@ describe('glossaries', () => {
test('are broken into external, internal, and candidates', async () => {
const keys = Object.keys(glossaries)
expect(keys).toHaveLength(3)
expect(keys).toHaveLength(2)
expect(keys).toContain('candidates')
expect(keys).toContain('external')
expect(keys).toContain('internal')
})
test('every entry has a valid term', async () => {
@ -17,7 +16,6 @@ describe('glossaries', () => {
}
expect(glossaries.external.every(hasValidTerm)).toBe(true)
expect(glossaries.internal.every(hasValidTerm)).toBe(true)
expect(glossaries.candidates.every(hasValidTerm)).toBe(true)
})
@ -29,14 +27,6 @@ describe('glossaries', () => {
})
})
test('internal glossary has entries, and they all have descriptions', async () => {
expect(glossaries.internal.length).toBeGreaterThan(20)
glossaries.internal.forEach((entry) => {
const message = `entry '${entry.term}' is missing a description`
expect(entry.description && entry.description.length > 0, message).toBe(true)
})
})
test('candidates all have a term, but no description', async () => {
expect(glossaries.candidates.length).toBeGreaterThan(20)
glossaries.candidates.forEach((entry) => {

Просмотреть файл

@ -1,5 +1,4 @@
import languages from '../../lib/languages.js'
import { execSync } from 'child_process'
import { jest } from '@jest/globals'
describe('files', () => {
@ -13,14 +12,4 @@ describe('files', () => {
expect(lang.hreflang).toMatch(/\w{2}/)
if (lang.redirectPatterns) expect(lang.redirectPatterns).toBeInstanceOf(Array)
})
// crowdin upload sources command fails if there are empty source files
// please refer to crowdin-support #117 for more details
it('should not contain empty files', () => {
const command = 'find content data -type f -empty'
const emptyFiles = execSync(command).toString().split('\n')
const disallowedEmptyFiles = emptyFiles.filter((file) => file.match(/\.(yml|md)$/))
expect(disallowedEmptyFiles).toEqual([])
})
})

Просмотреть файл

@ -2,7 +2,7 @@ import { expect } from '@jest/globals'
import path from 'path'
import { fileURLToPath } from 'url'
import { getTokensFromFile, Tokens } from '../../../lib/liquid-tags/tokens'
import { getTokensFromFile, Tokens } from '../../../script/i18n/msft-tokens'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
function getFixturePath(name) {
@ -36,6 +36,7 @@ describe('getTokensFromFile', () => {
describe('.diff', () => {
let tokens
let otherTokens
let reverseTokens
const addTokens = (collection, elements) => {
elements.forEach((element) => {
@ -46,8 +47,10 @@ describe('getTokensFromFile', () => {
beforeEach(() => {
tokens = new Tokens()
otherTokens = new Tokens()
reverseTokens = new Tokens()
addTokens(tokens, ['apples', 'bananas', 'oranges'])
addTokens(otherTokens, ['apples', 'oranges'])
addTokens(reverseTokens, ['oranges', 'bananas', 'apples'])
})
it('shows elements that are missing', () => {
@ -69,6 +72,11 @@ describe('getTokensFromFile', () => {
expect(diff.count).toEqual(0)
})
it('shows no difference when tokens are in different order', () => {
const diff = tokens.diff(reverseTokens)
expect(diff.count).toEqual(0)
})
})
})
})