hi-ml/hi-ml-azure/run_pytest.py

183 строки
8.8 KiB
Python

import logging
import sys
from pathlib import Path
import pytest
import param
from _pytest.main import ExitCode
from azureml._restclient.constants import RunStatus
from azureml.core import Run
# Add hi-ml packages to sys.path so that AML can find them if we are using the runner directly from the git repo
himl_root = Path(__file__).resolve().parent.parent
def add_to_sys_path(folder: Path) -> None:
folder_str = str(folder)
if folder.is_dir() and folder_str not in sys.path:
sys.path.insert(0, str(folder))
folders_to_add = [himl_root / "hi-ml" / "src", himl_root / "hi-ml-azure" / "src"]
for folder in folders_to_add:
add_to_sys_path(folder)
from health_azure import submit_to_azure_if_needed # noqa: E402
from health_azure.himl import OUTPUT_FOLDER # noqa: E402
from health_azure.logging import logging_to_stdout # noqa: E402
from health_azure.paths import git_repo_root_folder # noqa: E402
from health_azure.utils import ( # noqa: E402
WORKSPACE_CONFIG_JSON,
check_config_json,
create_argparser,
is_running_in_azure_ml,
parse_arguments,
)
from health_ml.utils.common_utils import DEFAULT_AML_UPLOAD_DIR # noqa: E402
PYTEST_RESULTS_FILE = "pytest_results.xml"
PYTEST_GPU_COVERAGE_FILE = "pytest_gpu_coverage.xml"
class RunPytestConfig(param.Parameterized):
mark: str = param.String(default="", doc="The value to pass to pytest for the -m (mark) argument.")
folder: str = param.String(
default="",
doc="The file or folder of tests that should be run. This value is used as the first argument to start "
"pytest, so it can also be a specific test like 'my_test.py::any_test'",
)
coverage_module: str = param.String(
default="",
doc="This value is used as an argument to --cov of pytest to collect code coverage for the specified pyhton "
"module. For example, in the subfolder hi-ml-cpath, one can collect code coverage for the "
"histopathology module by setting `module=histopathology`. If set to '' (default), no coverage is collected."
)
cluster: str = param.String(default="", doc="The name of the AzureML compute cluster where the script should run.")
conda_env: str = param.String(
default="", doc="The path to the Conda environment file that should be used when starting pytest in AzureML."
)
experiment: str = param.String(
default="run_pytest", doc="The name of the AzureML experiment where the run should start."
)
max_run_duration: str = param.String(
default="30m", doc="The maximum runtime that is allowed for this job in AzureML. This is given as a floating"
"point number with a string suffix s, m, h, d for seconds, minutes, hours, day. Examples: '3.5h', '2d'"
)
add_to_sys_path: str = param.String(
default="",
doc="A folder name that should be added to sys.path. The folder name should be relative to repository root."
)
def run_pytest(folder_to_test: str, pytest_mark: str, coverage_module: str) -> None:
"""
Runs pytest on a given folder, restricting to the tests that have the given pytest mark.
If pytest finds no tests, or any of the tests fail, this function raises a ValueError. When run inside
AzureML, this will make the job fail.
:param pytest_mark: The pytest mark to use for filtering out the tests to run.
:param folder_to_test: The folder with tests that should be run.
:param coverage_module: The module for which test code coverage should be collected. When set to empty string '', no
code coverage is collected.
"""
output_dir = Path(OUTPUT_FOLDER)
output_dir.mkdir(exist_ok=True)
results_file = output_dir / PYTEST_RESULTS_FILE
pytest_args = [folder_to_test, f"--junitxml={str(results_file)}"]
if coverage_module:
pytest_args += [f"--cov={coverage_module}", "--cov-branch", "--cov-report=html",
f"--cov-report=xml:{OUTPUT_FOLDER}/{PYTEST_GPU_COVERAGE_FILE}",
"--cov-report=term-missing", "--cov-config=.coveragerc"]
if pytest_mark:
pytest_args += ["-m", pytest_mark]
logging.info(f"Starting pytest with these args: {pytest_args}")
status_code = pytest.main(pytest_args)
if status_code == ExitCode.NO_TESTS_COLLECTED:
raise ValueError(f"PyTest did not find any tests to run, when restricting with this mark: {pytest_mark}")
if status_code != ExitCode.OK:
raise ValueError(f"PyTest failed with exit code: {status_code}")
def download_run_output_file(blob_path: Path, destination: Path, run: Run) -> Path:
"""
Downloads a single file from the run's default output directory: ("outputs").
:param blob_path: The relative path to the file to download. For example, if blobs_path = "foo/bar.csv", then the
run result file "outputs/foo/bar.csv" will be downloaded to <destination>/bar.csv (the directory will be
stripped off).
:param run: The AzureML run to download the files from.
:param destination: Local path to save the downloaded blob to.
:return: Destination path to the downloaded file(s).
"""
blobs_prefix = str((DEFAULT_AML_UPLOAD_DIR / blob_path).as_posix())
destination = destination / blob_path.name
logging.info(f"Downloading single file from run {run.id}: {blobs_prefix} -> {str(destination)}")
try:
run.download_file(blobs_prefix, str(destination), _validate_checksum=True)
except Exception as ex:
raise ValueError(f"Unable to download file '{blobs_prefix}' from run {run.id}") from ex
return destination
def download_pytest_coverage_result(run: Run, destination_folder: Path = Path.cwd()) -> Path:
"""
Downloads the pytest result file that is stored in the output folder of the given AzureML run.
If there is no pytest result file, throw an Exception.
:param run: The run from which the files should be read.
:param destination_folder: The folder into which the pytest result file is downloaded.
:return: The path (folder and filename) of the downloaded file.
"""
logging.info(f"Downloading pytest gpu coverage file: {PYTEST_GPU_COVERAGE_FILE}")
try:
return download_run_output_file(Path(PYTEST_GPU_COVERAGE_FILE), destination=destination_folder, run=run)
except Exception as ex:
raise ValueError(f"No pytest result file {PYTEST_GPU_COVERAGE_FILE} was found for run {run.id}") from ex
def pytest_after_submission_hook(azure_run: Run) -> None:
"""A hook that will be called right after pytest gpu tests submission."""
# We want the job output to be visible on the console. Do not exit yet if the job fails, because we
# may need to download the pytest result file.
azure_run.wait_for_completion(show_output=True, raise_on_error=False)
# The AzureML job can optionally run pytest. Attempt to download it to the current directory.
# A build step will pick up that file and publish it to Azure DevOps.
# If pytest_mark is set, this file must exist.
logging.info("Downloading pytest result file.")
download_pytest_coverage_result(azure_run)
if azure_run.status == RunStatus.FAILED:
raise ValueError(f"The AzureML run failed. Please check this URL for details: " f"{azure_run.get_portal_url()}")
if __name__ == "__main__":
config = RunPytestConfig()
parser = create_argparser(
config,
description="Invoke pytest either locally or inside of an AzureML run. The value of the '--folder' option is "
"becoming the first argument to pytest.To run on AzureML, provide the '--cluster' option.",
)
parser_results = parse_arguments(parser, fail_on_unknown_args=True)
config = RunPytestConfig(**parser_results.args)
if config.add_to_sys_path:
add_to_sys_path(himl_root / config.add_to_sys_path)
logging_to_stdout()
submit_to_azureml = config.cluster != ""
if submit_to_azureml and not is_running_in_azure_ml():
# For runs on the github agents: Create a workspace config file from environment variables.
# For local runs, this will fall back to a config.json file in the current folder or at repository root
root_config_json = himl_root / WORKSPACE_CONFIG_JSON
with check_config_json(script_folder=Path.cwd(), shared_config_json=root_config_json):
submit_to_azure_if_needed(
compute_cluster_name=config.cluster,
submit_to_azureml=submit_to_azureml,
wait_for_completion=True,
snapshot_root_directory=git_repo_root_folder(),
conda_environment_file=config.conda_env,
experiment_name=config.experiment,
max_run_duration=config.max_run_duration,
after_submission=pytest_after_submission_hook,
)
run_pytest(folder_to_test=config.folder, pytest_mark=config.mark, coverage_module=config.coverage_module)