firefox-translations-training/Taskfile.yml

309 строки
9.6 KiB
YAML

version: '3'
tasks:
poetry-install-*:
internal: true
desc: Install only the group need for the dependencies of a task.
vars:
GROUP: '{{index .MATCH 0}}'
cmds:
- poetry install --only {{.GROUP}} --no-root
clean-venvs:
desc: Remove the virtual envs created by the test runner.
cmds:
- rm -rf data/task-venvs/*
download-logs:
desc: Download the logs for taskcluster. Requires --task-group-id
summary: |
The logs will be saved to: ./data/taskcluster-logs
Example:
task download-logs -- --task-group-id GU9ZyWFhRDe_nxlAHcen8g
deps: [poetry-install-utils]
cmds:
- >-
poetry run python -W ignore utils/taskcluster_downloader.py
--mode=logs {{.CLI_ARGS}}
download-evals:
desc: Downloads evaluation results from Taskcluster
summary: |
The evals will be saved to: ./data/taskcluster-evals
Example: `task download-evals -- --task-group-id GU9ZyWFhRDe_nxlAHcen8g`
deps: [poetry-install-utils]
cmds:
- >-
poetry run python -W ignore utils/taskcluster_downloader.py
--mode=evals {{.CLI_ARGS}}
download-models:
desc: Downloads models from Taskcluster
summary: |
The models will be saved to: ./data/taskcluster-model
Example: `task download-models -- --task-group-id GU9ZyWFhRDe_nxlAHcen8g`
deps: [poetry-install-utils]
cmds:
- >-
poetry run python -W ignore utils/taskcluster_downloader.py
--mode=model {{.CLI_ARGS}}
config-generator:
desc: Create a training config for a language pair
summary: |
The models will be saved to: ./data/taskcluster-model
Example: `task config-generator -- en fi`
deps: [poetry-install-utils]
cmds:
- >-
PYTHONPATH=$(pwd) poetry run python -W ignore utils/config_generator.py {{.CLI_ARGS}}
build-mono-nllb:
desc: Build a monolingual NLLB datasets.
summary: |
The dataset will be saved to: ./data/nllb/nllb-mono-{lang}.txt.gz
Example: `task build-mono-nllb -- sl`
deps: [poetry-install-utils]
cmds:
- >-
PYTHONPATH=$(pwd) poetry run python -W ignore utils/build-mono-nllb.py {{.CLI_ARGS}}
opuscleaner:
desc: Run the opuscleaner tool.
deps: [poetry-install-opuscleaner]
cmds:
- poetry run opuscleaner-server serve --host=0.0.0.0 --port=8000
inference-clean:
desc: Clean build artifacts from the inference directory.
cmds:
- >-
task docker-run -- ./inference/scripts/clean.sh
inference-build:
desc: Build inference engine.
cmds:
- >-
task docker-run -- ./inference/scripts/build-local.sh
inference-test:
desc: Run inference tests.
cmds:
- >-
task docker-run -- ./inference/scripts/unit-tests.sh
inference-build-wasm:
desc: Build inference engine WASM.
cmds:
- >-
task docker-run -- ./inference/scripts/build-wasm.sh
lint-black:
desc: Checks the styling of the Python code with Black.
deps: [poetry-install-black]
cmds:
- ./utils/tasks/black-check.sh
lint-black-fix:
desc: Fixes the styling of the Python code with Black.
deps: [poetry-install-black]
cmds:
- poetry run black . {{.CLI_ARGS}}
lint-ruff:
desc: Lints the Python code with the ruff linter.
deps: [poetry-install-lint]
cmds:
- poetry run ruff --version
- poetry run ruff check . {{.CLI_ARGS}}
lint-ruff-fix:
desc: Fixes Python code lint errors with the ruff linter.
deps: [poetry-install-lint]
cmds:
- poetry run ruff --version
- poetry run ruff check . --fix {{.CLI_ARGS}}
lint-fix:
desc: Fix all automatically fixable errors. This is useful to run before pushing.
cmds:
- task: lint-black-fix
- task: lint-ruff-fix
lint:
desc: Run all available linting tools.
cmds:
- task: lint-black
- task: lint-ruff
test:
desc: Run python pytests in the current host.
summary: |
Some tests only pass in Docker. You can run this command outside of docker for
some of the tests, or after running `task docker` to run them in the docker image.
Without any arguments, it runs all of the tests searching the paths specifiied in
testpaths in pyproject.toml.
You can also specificy a specific test to run:
task test -- tests/test_alignments.py
cmds:
- poetry install --only tests --only utils --no-root
- PYTHONPATH="$(pwd):$(pwd)/taskcluster/scripts/pipeline" poetry run pytest -vv {{.CLI_ARGS}}
test-fast:
desc: Re-run tests in a faster configuration.
summary: |
This command skips taskgraph generation and skips the poetry install in order to
re-run tests quickly. If the taskgraph or dependencies are out of date, then tests
may incorrectly fail. It also outputs the captured stdout.
task test-fast -- tests/test_alignments.py
cmds:
- >-
SKIP_TASKGRAPH=1 PYTHONPATH="$(pwd):$(pwd)/taskcluster/scripts/pipeline"
poetry run pytest -vv -s {{.CLI_ARGS}}
test-docker:
desc: Run the unit tests in the docker image. Some tests require the pre-built Linux executables.
cmds:
- task docker-run -- task test
train:
desc: Start a training run
summary: Open up the train task from the CLI based on your current branch.
deps: [poetry-install-utils, poetry-install-taskcluster]
cmds:
- >-
poetry run python -W ignore utils/train.py {{.CLI_ARGS}}
docker:
desc: Interactively run the local docker test image.
deps: [docker-build]
summary: |
The local docker image includes the Linux x86 image, and pre-built binaries
that are used in training.
cmds:
- utils/tasks/docker-run.sh bash
docker-run:
desc: Run a command in the local docker instance. e.g. `docker-run -- echo "hello"`
deps: [docker-build]
summary: |
The local docker image includes the Linux x86 image, and pre-built binaries
that are used in training.
cmds:
- utils/tasks/docker-run.sh {{.CLI_ARGS}}
docker-build:
desc: Build the local docker image that includes the proper Linux binaries for training
cmds:
- ./utils/tasks/docker-build.sh
taskgraph-requirements:
desc: Installs the taskgraph requirements.
internal: true
cmds:
- poetry run --directory ./taskgraph -- pip3 install -r taskcluster/requirements.txt
taskgraph-validate:
desc: Validates Taskcluster task graph locally
deps: [taskgraph-requirements]
cmds:
- >-
TASKCLUSTER_ROOT_URL=""
poetry run --directory ./taskgraph --
taskgraph full
taskgraph-diff:
desc: Validates Taskcluster task graph locally
summary: |
Generates diffs of the full taskgraph against BASE_REV. Any parameters that were
different between the current code and BASE_REV will have their diffs logged
to OUTPUT_DIR.
deps: [taskgraph-requirements]
vars:
OUTPUT_FILE: '{{.OUTPUT_FILE | default "./data/taskgraph.diff"}}'
BASE_REV: '{{.BASE_REV | default "main"}}'
cmds:
- >-
TASKCLUSTER_ROOT_URL=""
poetry run --directory ./taskgraph --
taskgraph full --json
--parameters "taskcluster/test/params"
--output-file "{{.OUTPUT_FILE}}"
--diff "{{.BASE_REV}}"
taskgraph-test:
desc: Run tests and validations against task generation
cmds:
- >-
poetry run --directory taskgraph --
pytest taskcluster/test
docs:
desc: Run the GitHub pages Jekyll theme locally.
cmds:
- ./utils/tasks/serve-docs.sh
preflight-check:
desc: Perform pre-flight checks for a training run.
deps: [poetry-install-utils]
cmds:
- poetry run python -W ignore utils/preflight_check.py {{.CLI_ARGS}}
tensorboard:
desc: Visualize training logs from task `download-logs` at http://localhost:6006
summary: |
Runs Tensorboard for Marian training logs in the ./data/taskcluster-logs directory.
The logs are converted to tensorboard in the ./data/tensorboard-logs directory.
deps: [poetry-install-tensorboard]
cmds:
- mkdir -p data/tensorboard-logs
- >-
poetry run marian-tensorboard
--offline
--log-file data/taskcluster-logs/**/*.log
--work-dir data/tensorboard-logs
find-corpus:
desc: Finds all datasets for a language pair
deps: [poetry-install-utils]
cmds:
- poetry run python -W ignore utils/find_corpus.py {{.CLI_ARGS}}
run-model:
desc: Run a Marian server that loads a model from data/models/$MODEL_TASK
deps: [poetry-install-utils]
cmds:
- >-
PYTHONPATH=$(pwd) poetry run python -W ignore utils/run_model.py {{.CLI_ARGS}}
update-requirements:
desc: Update the requirements.txt file for a pipeline script.
summary: |
Example usage:
task update-requirements -- pipeline/eval/requirements/eval.in
cmds:
# Make sure a file was given to update.
- >-
if [[ -z "{{.CLI_ARGS}}" ]]; then
echo "Provide a path to the .in file";
echo "For example:"
echo "task update-requirements -- pipeline/eval/requirements/eval.in";
exit 1
fi
# Make sure the command is being run for docker
- >-
if [[ -z "$IS_DOCKER" ]]; then
task docker-run -- task update-requirements -- {{.CLI_ARGS}} && exit
fi
# Make sure pip-tools are available in docker.
- >-
if ! command -v pip-compile &> /dev/null; then
pip install pip-tools
fi
# Finally generate the hashes.
- pip-compile --generate-hashes {{.CLI_ARGS}} --allow-unsafe