309 строки
9.6 KiB
YAML
309 строки
9.6 KiB
YAML
version: '3'
|
|
|
|
tasks:
|
|
poetry-install-*:
|
|
internal: true
|
|
desc: Install only the group need for the dependencies of a task.
|
|
vars:
|
|
GROUP: '{{index .MATCH 0}}'
|
|
cmds:
|
|
- poetry install --only {{.GROUP}} --no-root
|
|
|
|
clean-venvs:
|
|
desc: Remove the virtual envs created by the test runner.
|
|
cmds:
|
|
- rm -rf data/task-venvs/*
|
|
|
|
download-logs:
|
|
desc: Download the logs for taskcluster. Requires --task-group-id
|
|
summary: |
|
|
The logs will be saved to: ./data/taskcluster-logs
|
|
|
|
Example:
|
|
task download-logs -- --task-group-id GU9ZyWFhRDe_nxlAHcen8g
|
|
deps: [poetry-install-utils]
|
|
cmds:
|
|
- >-
|
|
poetry run python -W ignore utils/taskcluster_downloader.py
|
|
--mode=logs {{.CLI_ARGS}}
|
|
|
|
download-evals:
|
|
desc: Downloads evaluation results from Taskcluster
|
|
summary: |
|
|
The evals will be saved to: ./data/taskcluster-evals
|
|
Example: `task download-evals -- --task-group-id GU9ZyWFhRDe_nxlAHcen8g`
|
|
deps: [poetry-install-utils]
|
|
cmds:
|
|
- >-
|
|
poetry run python -W ignore utils/taskcluster_downloader.py
|
|
--mode=evals {{.CLI_ARGS}}
|
|
|
|
download-models:
|
|
desc: Downloads models from Taskcluster
|
|
summary: |
|
|
The models will be saved to: ./data/taskcluster-model
|
|
Example: `task download-models -- --task-group-id GU9ZyWFhRDe_nxlAHcen8g`
|
|
deps: [poetry-install-utils]
|
|
cmds:
|
|
- >-
|
|
poetry run python -W ignore utils/taskcluster_downloader.py
|
|
--mode=model {{.CLI_ARGS}}
|
|
|
|
config-generator:
|
|
desc: Create a training config for a language pair
|
|
summary: |
|
|
The models will be saved to: ./data/taskcluster-model
|
|
Example: `task config-generator -- en fi`
|
|
deps: [poetry-install-utils]
|
|
cmds:
|
|
- >-
|
|
PYTHONPATH=$(pwd) poetry run python -W ignore utils/config_generator.py {{.CLI_ARGS}}
|
|
|
|
build-mono-nllb:
|
|
desc: Build a monolingual NLLB datasets.
|
|
summary: |
|
|
The dataset will be saved to: ./data/nllb/nllb-mono-{lang}.txt.gz
|
|
Example: `task build-mono-nllb -- sl`
|
|
deps: [poetry-install-utils]
|
|
cmds:
|
|
- >-
|
|
PYTHONPATH=$(pwd) poetry run python -W ignore utils/build-mono-nllb.py {{.CLI_ARGS}}
|
|
|
|
opuscleaner:
|
|
desc: Run the opuscleaner tool.
|
|
deps: [poetry-install-opuscleaner]
|
|
cmds:
|
|
- poetry run opuscleaner-server serve --host=0.0.0.0 --port=8000
|
|
|
|
inference-clean:
|
|
desc: Clean build artifacts from the inference directory.
|
|
cmds:
|
|
- >-
|
|
task docker-run -- ./inference/scripts/clean.sh
|
|
|
|
inference-build:
|
|
desc: Build inference engine.
|
|
cmds:
|
|
- >-
|
|
task docker-run -- ./inference/scripts/build-local.sh
|
|
|
|
inference-test:
|
|
desc: Run inference tests.
|
|
cmds:
|
|
- >-
|
|
task docker-run -- ./inference/scripts/unit-tests.sh
|
|
|
|
inference-build-wasm:
|
|
desc: Build inference engine WASM.
|
|
cmds:
|
|
- >-
|
|
task docker-run -- ./inference/scripts/build-wasm.sh
|
|
|
|
lint-black:
|
|
desc: Checks the styling of the Python code with Black.
|
|
deps: [poetry-install-black]
|
|
cmds:
|
|
- ./utils/tasks/black-check.sh
|
|
|
|
lint-black-fix:
|
|
desc: Fixes the styling of the Python code with Black.
|
|
deps: [poetry-install-black]
|
|
cmds:
|
|
- poetry run black . {{.CLI_ARGS}}
|
|
|
|
lint-ruff:
|
|
desc: Lints the Python code with the ruff linter.
|
|
deps: [poetry-install-lint]
|
|
cmds:
|
|
- poetry run ruff --version
|
|
- poetry run ruff check . {{.CLI_ARGS}}
|
|
|
|
lint-ruff-fix:
|
|
desc: Fixes Python code lint errors with the ruff linter.
|
|
deps: [poetry-install-lint]
|
|
cmds:
|
|
- poetry run ruff --version
|
|
- poetry run ruff check . --fix {{.CLI_ARGS}}
|
|
|
|
lint-fix:
|
|
desc: Fix all automatically fixable errors. This is useful to run before pushing.
|
|
cmds:
|
|
- task: lint-black-fix
|
|
- task: lint-ruff-fix
|
|
|
|
lint:
|
|
desc: Run all available linting tools.
|
|
cmds:
|
|
- task: lint-black
|
|
- task: lint-ruff
|
|
|
|
test:
|
|
desc: Run python pytests in the current host.
|
|
summary: |
|
|
Some tests only pass in Docker. You can run this command outside of docker for
|
|
some of the tests, or after running `task docker` to run them in the docker image.
|
|
Without any arguments, it runs all of the tests searching the paths specifiied in
|
|
testpaths in pyproject.toml.
|
|
|
|
You can also specificy a specific test to run:
|
|
|
|
task test -- tests/test_alignments.py
|
|
cmds:
|
|
- poetry install --only tests --only utils --no-root
|
|
- PYTHONPATH="$(pwd):$(pwd)/taskcluster/scripts/pipeline" poetry run pytest -vv {{.CLI_ARGS}}
|
|
|
|
test-fast:
|
|
desc: Re-run tests in a faster configuration.
|
|
summary: |
|
|
This command skips taskgraph generation and skips the poetry install in order to
|
|
re-run tests quickly. If the taskgraph or dependencies are out of date, then tests
|
|
may incorrectly fail. It also outputs the captured stdout.
|
|
|
|
task test-fast -- tests/test_alignments.py
|
|
cmds:
|
|
- >-
|
|
SKIP_TASKGRAPH=1 PYTHONPATH="$(pwd):$(pwd)/taskcluster/scripts/pipeline"
|
|
poetry run pytest -vv -s {{.CLI_ARGS}}
|
|
|
|
test-docker:
|
|
desc: Run the unit tests in the docker image. Some tests require the pre-built Linux executables.
|
|
cmds:
|
|
- task docker-run -- task test
|
|
|
|
train:
|
|
desc: Start a training run
|
|
summary: Open up the train task from the CLI based on your current branch.
|
|
deps: [poetry-install-utils, poetry-install-taskcluster]
|
|
cmds:
|
|
- >-
|
|
poetry run python -W ignore utils/train.py {{.CLI_ARGS}}
|
|
|
|
docker:
|
|
desc: Interactively run the local docker test image.
|
|
deps: [docker-build]
|
|
summary: |
|
|
The local docker image includes the Linux x86 image, and pre-built binaries
|
|
that are used in training.
|
|
cmds:
|
|
- utils/tasks/docker-run.sh bash
|
|
|
|
docker-run:
|
|
desc: Run a command in the local docker instance. e.g. `docker-run -- echo "hello"`
|
|
deps: [docker-build]
|
|
summary: |
|
|
The local docker image includes the Linux x86 image, and pre-built binaries
|
|
that are used in training.
|
|
cmds:
|
|
- utils/tasks/docker-run.sh {{.CLI_ARGS}}
|
|
|
|
docker-build:
|
|
desc: Build the local docker image that includes the proper Linux binaries for training
|
|
cmds:
|
|
- ./utils/tasks/docker-build.sh
|
|
|
|
taskgraph-requirements:
|
|
desc: Installs the taskgraph requirements.
|
|
internal: true
|
|
cmds:
|
|
- poetry run --directory ./taskgraph -- pip3 install -r taskcluster/requirements.txt
|
|
|
|
taskgraph-validate:
|
|
desc: Validates Taskcluster task graph locally
|
|
deps: [taskgraph-requirements]
|
|
cmds:
|
|
- >-
|
|
TASKCLUSTER_ROOT_URL=""
|
|
poetry run --directory ./taskgraph --
|
|
taskgraph full
|
|
|
|
taskgraph-diff:
|
|
desc: Validates Taskcluster task graph locally
|
|
summary: |
|
|
Generates diffs of the full taskgraph against BASE_REV. Any parameters that were
|
|
different between the current code and BASE_REV will have their diffs logged
|
|
to OUTPUT_DIR.
|
|
deps: [taskgraph-requirements]
|
|
vars:
|
|
OUTPUT_FILE: '{{.OUTPUT_FILE | default "./data/taskgraph.diff"}}'
|
|
BASE_REV: '{{.BASE_REV | default "main"}}'
|
|
cmds:
|
|
- >-
|
|
TASKCLUSTER_ROOT_URL=""
|
|
poetry run --directory ./taskgraph --
|
|
taskgraph full --json
|
|
--parameters "taskcluster/test/params"
|
|
--output-file "{{.OUTPUT_FILE}}"
|
|
--diff "{{.BASE_REV}}"
|
|
|
|
taskgraph-test:
|
|
desc: Run tests and validations against task generation
|
|
cmds:
|
|
- >-
|
|
poetry run --directory taskgraph --
|
|
pytest taskcluster/test
|
|
|
|
docs:
|
|
desc: Run the GitHub pages Jekyll theme locally.
|
|
cmds:
|
|
- ./utils/tasks/serve-docs.sh
|
|
|
|
preflight-check:
|
|
desc: Perform pre-flight checks for a training run.
|
|
deps: [poetry-install-utils]
|
|
cmds:
|
|
- poetry run python -W ignore utils/preflight_check.py {{.CLI_ARGS}}
|
|
|
|
tensorboard:
|
|
desc: Visualize training logs from task `download-logs` at http://localhost:6006
|
|
summary: |
|
|
Runs Tensorboard for Marian training logs in the ./data/taskcluster-logs directory.
|
|
The logs are converted to tensorboard in the ./data/tensorboard-logs directory.
|
|
deps: [poetry-install-tensorboard]
|
|
cmds:
|
|
- mkdir -p data/tensorboard-logs
|
|
- >-
|
|
poetry run marian-tensorboard
|
|
--offline
|
|
--log-file data/taskcluster-logs/**/*.log
|
|
--work-dir data/tensorboard-logs
|
|
|
|
find-corpus:
|
|
desc: Finds all datasets for a language pair
|
|
deps: [poetry-install-utils]
|
|
cmds:
|
|
- poetry run python -W ignore utils/find_corpus.py {{.CLI_ARGS}}
|
|
|
|
run-model:
|
|
desc: Run a Marian server that loads a model from data/models/$MODEL_TASK
|
|
deps: [poetry-install-utils]
|
|
cmds:
|
|
- >-
|
|
PYTHONPATH=$(pwd) poetry run python -W ignore utils/run_model.py {{.CLI_ARGS}}
|
|
|
|
update-requirements:
|
|
desc: Update the requirements.txt file for a pipeline script.
|
|
summary: |
|
|
Example usage:
|
|
task update-requirements -- pipeline/eval/requirements/eval.in
|
|
cmds:
|
|
# Make sure a file was given to update.
|
|
- >-
|
|
if [[ -z "{{.CLI_ARGS}}" ]]; then
|
|
echo "Provide a path to the .in file";
|
|
echo "For example:"
|
|
echo "task update-requirements -- pipeline/eval/requirements/eval.in";
|
|
exit 1
|
|
fi
|
|
# Make sure the command is being run for docker
|
|
- >-
|
|
if [[ -z "$IS_DOCKER" ]]; then
|
|
task docker-run -- task update-requirements -- {{.CLI_ARGS}} && exit
|
|
fi
|
|
# Make sure pip-tools are available in docker.
|
|
- >-
|
|
if ! command -v pip-compile &> /dev/null; then
|
|
pip install pip-tools
|
|
fi
|
|
# Finally generate the hashes.
|
|
- pip-compile --generate-hashes {{.CLI_ARGS}} --allow-unsafe
|