зеркало из https://github.com/microsoft/hi-ml.git
ENH: Add regression test framework to runner (#289)
This commit is contained in:
Родитель
a5437dd46e
Коммит
8735521c08
|
@ -103,7 +103,10 @@ class Foo:
|
|||
|
||||
def method(self, arg: int) -> None:
|
||||
"""
|
||||
Method description, followed by an empty line.
|
||||
Method description, followed by an empty line. Argument names like ``arg`` are rendered nicely
|
||||
if enclosed in double backtick.
|
||||
|
||||
This method can raise a :exc:`ValueError`.
|
||||
|
||||
:param arg: This is a description for the method argument.
|
||||
Long descriptions should be indented.
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
env:
|
||||
conda env create --file environment.yml
|
||||
pip install -r ../test_requirements.txt
|
||||
pip install -r ../build_requirements.txt
|
||||
|
||||
# call make for parent
|
||||
define call_parent
|
||||
|
|
|
@ -146,6 +146,18 @@ class WorkflowParams(param.Parameterized):
|
|||
hyperdrive: bool = param.Boolean(False, doc="If True, use the Hyperdrive configuration specified in the "
|
||||
"LightningContainer to run hyperparameter tuning. If False, just "
|
||||
"run a plain single training job.")
|
||||
regression_test_folder: Optional[Path] = \
|
||||
param.ClassSelector(class_=Path, default=None, allow_None=True,
|
||||
doc="A path to a folder that contains a set of files. At the end of training and "
|
||||
"model evaluation, all files given in that folder must be present in the job's output "
|
||||
"folder, and their contents must match exactly. When running in AzureML, you need to "
|
||||
"ensure that this folder is part of the snapshot that gets uploaded. The path should "
|
||||
"be relative to the repository root directory.")
|
||||
regression_test_csv_tolerance: float = \
|
||||
param.Number(default=0.0, allow_None=False,
|
||||
doc="When comparing CSV files during regression tests, use this value as the maximum allowed "
|
||||
"relative difference of actual and expected results. Default: 0.0 (must match exactly)")
|
||||
|
||||
CROSSVAL_INDEX_ARG_NAME = "crossval_index"
|
||||
CROSSVAL_COUNT_ARG_NAME = "crossval_count"
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ from health_ml.utils.common_utils import (
|
|||
EFFECTIVE_RANDOM_SEED_KEY_NAME, change_working_directory, logging_section,
|
||||
RUN_RECOVERY_ID_KEY, RUN_RECOVERY_FROM_ID_KEY_NAME)
|
||||
from health_ml.utils.lightning_loggers import StoringLogger
|
||||
from health_ml.utils.regression_test_utils import compare_folders_and_run_outputs
|
||||
from health_ml.utils.type_annotations import PathOrString
|
||||
|
||||
|
||||
|
@ -154,6 +155,26 @@ class MLRunner:
|
|||
with logging_section("Model inference"):
|
||||
self.run_inference(checkpoint_paths_for_testing)
|
||||
|
||||
if self.container.regression_test_folder:
|
||||
# Comparison with stored results for cross-validation runs only operates on child run 0. This run
|
||||
# has usually already downloaded the results for the other runs, and uploaded files to the parent
|
||||
# run context.
|
||||
logging.info("Comparing the current results against stored results")
|
||||
if self.is_crossval_disabled_or_child_0():
|
||||
compare_folders_and_run_outputs(expected=self.container.regression_test_folder,
|
||||
actual=self.container.outputs_folder,
|
||||
csv_relative_tolerance=self.container.regression_test_csv_tolerance)
|
||||
else:
|
||||
logging.info("Skipping as this is not cross-validation child run 0")
|
||||
|
||||
def is_crossval_disabled_or_child_0(self) -> bool:
|
||||
"""
|
||||
Returns True if the present run is a non-cross-validation run, or child run 0 of a cross-validation run.
|
||||
"""
|
||||
if self.container.is_crossvalidation_enabled:
|
||||
return self.container.crossval_index == 0
|
||||
return True
|
||||
|
||||
def run_inference(self, checkpoint_paths: List[Path]) -> None:
|
||||
"""
|
||||
Run inference on the test set for all models.
|
||||
|
|
|
@ -0,0 +1,180 @@
|
|||
# -------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
|
||||
# -------------------------------------------------------------------------------------------
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import tempfile
|
||||
from typing import List, Optional
|
||||
|
||||
from azureml.core import Run
|
||||
import pandas as pd
|
||||
|
||||
from health_azure.utils import PARENT_RUN_CONTEXT, RUN_CONTEXT, is_running_in_azure_ml
|
||||
|
||||
|
||||
REGRESSION_TEST_OUTPUT_FOLDER = "OUTPUT"
|
||||
REGRESSION_TEST_AZUREML_FOLDER = "AZUREML_OUTPUT"
|
||||
REGRESSION_TEST_AZUREML_PARENT_FOLDER = "AZUREML_PARENT_OUTPUT"
|
||||
CONTENTS_MISMATCH = "Contents mismatch"
|
||||
FILE_FORMAT_ERROR = "File format error"
|
||||
MISSING_FILE = "Missing"
|
||||
CSV_SUFFIX = ".csv"
|
||||
TEXT_FILE_SUFFIXES = [".txt", ".json", ".html", ".md"]
|
||||
|
||||
|
||||
def compare_files(expected: Path, actual: Path, csv_relative_tolerance: float = 0.0) -> str:
|
||||
"""
|
||||
Compares two individual files for regression testing. It returns an empty string if the two files appear identical.
|
||||
If the files are not identical, a brief error message is returned. Details about the mismatch are logged via
|
||||
logging.warning. This method handles CSV files (which are treated and compared as dataframes) and text files (TXT,
|
||||
JSON, HTML, MD, which are all compared while ignoring linebreaks. All other extensions are treated as binary,
|
||||
and compared on a byte-by-byte basis.
|
||||
|
||||
:param expected: A file that contains the expected contents. The type of comparison (text or binary) is chosen
|
||||
based on the extension of this file.
|
||||
:param actual: A file to be checked against the expected file.
|
||||
:param csv_relative_tolerance: When comparing CSV files, maximum allowed relative discrepancy.
|
||||
If 0.0, do not allow any discrepancy.
|
||||
:return: An empty string if the files appear identical, or otherwise a brief error message. If there is a
|
||||
mismatch, details about the mismatch are printed via logging.warning.
|
||||
"""
|
||||
def print_lines(prefix: str, lines: List[str]) -> None:
|
||||
num_lines = len(lines)
|
||||
count = min(5, num_lines)
|
||||
logging.warning(f"{prefix} {num_lines} lines, first {count} of those:")
|
||||
logging.warning(os.linesep.join(lines[:count]))
|
||||
|
||||
def try_read_csv(prefix: str, file: Path) -> Optional[pd.DataFrame]:
|
||||
try:
|
||||
return pd.read_csv(file)
|
||||
except Exception as ex:
|
||||
logging.warning(f"{prefix} file can't be read as CSV: {str(ex)}")
|
||||
return None
|
||||
|
||||
if expected.suffix == CSV_SUFFIX:
|
||||
expected_df = try_read_csv("Expected", expected)
|
||||
actual_df = try_read_csv("Actual", actual)
|
||||
if expected_df is None or actual_df is None:
|
||||
return FILE_FORMAT_ERROR
|
||||
try:
|
||||
pd.testing.assert_frame_equal(actual_df, expected_df, rtol=csv_relative_tolerance)
|
||||
except Exception as ex:
|
||||
logging.warning(str(ex))
|
||||
return CONTENTS_MISMATCH
|
||||
elif expected.suffix in TEXT_FILE_SUFFIXES:
|
||||
# Compare line-by-line to avoid issues with line separators
|
||||
expected_lines = expected.read_text().splitlines()
|
||||
actual_lines = actual.read_text().splitlines()
|
||||
if expected_lines != actual_lines:
|
||||
print_lines("Expected", expected_lines)
|
||||
print_lines("Actual", actual_lines)
|
||||
return CONTENTS_MISMATCH
|
||||
else:
|
||||
expected_binary = expected.read_bytes()
|
||||
actual_binary = actual.read_bytes()
|
||||
if expected_binary != actual_binary:
|
||||
logging.warning(f"Expected {len(expected_binary)} bytes, actual {len(actual_binary)} bytes")
|
||||
return CONTENTS_MISMATCH
|
||||
return ""
|
||||
|
||||
|
||||
def compare_folder_contents(
|
||||
expected_folder: Path,
|
||||
actual_folder: Optional[Path] = None,
|
||||
run: Optional[Run] = None,
|
||||
csv_relative_tolerance: float = 0.0,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Compares a set of files in a folder, against files in either the other folder or files stored in the given
|
||||
AzureML run. Each file that is present in the ``expected`` folder must be also present in the ``actual`` folder
|
||||
(or the AzureML run), with exactly the same contents, in the same folder structure.
|
||||
For example, if there is a file ``<expected>/foo/bar/contents.txt``, then there must also be a file
|
||||
``<actual>/foo/bar/contents.txt``. If ``actual_folder`` is provided, then this is used to compare files against the
|
||||
set file files in ``expected_folder``, irrespective of the value in ``run``. If ``run`` is provided, the files
|
||||
uploaded to the AzureML run are compared against files in ``expected_folder``. If neither ``run`` nor
|
||||
``actual_folder`` are provided, a :exc:`ValueError` is raised.
|
||||
|
||||
:param expected_folder: A folder with files that are expected to be present.
|
||||
:param actual_folder: The output folder with the actually produced files.
|
||||
:param run: An AzureML run
|
||||
:param csv_relative_tolerance: When comparing CSV files, use this as the maximum allowed relative discrepancy.
|
||||
If 0.0, do not allow any discrepancy.
|
||||
:return: A list of human readable error messages, with message and file path. If no errors are found, the list is
|
||||
empty.
|
||||
"""
|
||||
messages = []
|
||||
if run and not is_running_in_azure_ml(run):
|
||||
logging.warning("Skipping file comparison because the given run context is an AzureML offline run")
|
||||
return []
|
||||
files_in_run: List[str] = run.get_file_names() if run else []
|
||||
temp_folder = Path(tempfile.mkdtemp()) if run else None
|
||||
for file in expected_folder.rglob("*"):
|
||||
# rglob also returns folders, skip those
|
||||
if file.is_dir():
|
||||
continue
|
||||
# All files stored in AzureML runs use Linux-style path
|
||||
file_relative = file.relative_to(expected_folder).as_posix()
|
||||
if actual_folder:
|
||||
actual_file = actual_folder / file_relative
|
||||
elif temp_folder is not None and run is not None:
|
||||
actual_file = temp_folder / file_relative
|
||||
if file_relative in files_in_run:
|
||||
run.download_file(name=str(file_relative), output_file_path=str(actual_file))
|
||||
else:
|
||||
raise ValueError("Either of the two arguments 'run' or 'actual_folder' must be provided")
|
||||
message = compare_files(expected=file, actual=actual_file,
|
||||
csv_relative_tolerance=csv_relative_tolerance) if actual_file.exists() else MISSING_FILE
|
||||
if message:
|
||||
messages.append(f"{message}: {file_relative}")
|
||||
logging.warning(f"File {file_relative}: {message}")
|
||||
else:
|
||||
logging.info(f"File {file_relative}: OK")
|
||||
if temp_folder:
|
||||
shutil.rmtree(temp_folder)
|
||||
return messages
|
||||
|
||||
|
||||
def compare_folders_and_run_outputs(expected: Path, actual: Path, csv_relative_tolerance: float = 0.0) -> None:
|
||||
"""
|
||||
Compares the actual set of run outputs in the ``actual`` folder against an expected set of files in the ``expected``
|
||||
folder. The ``expected`` folder can have two special subfolders AZUREML_OUTPUT and AZUREML_PARENT_OUTPUT, that
|
||||
contain files that are expected to be present in the AzureML run context of the present run (AZUREML_OUTPUT)
|
||||
or the run context of the parent run (AZUREML_PARENT_OUTPUT).
|
||||
If a file is missing, or does not have the expected contents, an exception is raised.
|
||||
|
||||
:param expected: A folder with files that are expected to be present.
|
||||
:param actual: The output folder with the actually produced files.
|
||||
:param csv_relative_tolerance: When comparing CSV files, use this as the maximum allowed relative discrepancy.
|
||||
If 0.0, do not allow any discrepancy.
|
||||
"""
|
||||
if not expected.is_dir():
|
||||
raise ValueError(f"Folder with expected files does not exist: {expected}")
|
||||
logging.debug(f"Current working directory: {Path.cwd()}")
|
||||
messages = []
|
||||
folders_to_check = [
|
||||
(REGRESSION_TEST_OUTPUT_FOLDER, "run output files", actual, None),
|
||||
(REGRESSION_TEST_AZUREML_FOLDER, "AzureML outputs in present run", None, RUN_CONTEXT),
|
||||
(REGRESSION_TEST_AZUREML_PARENT_FOLDER, "AzureML outputs in parent run", None, PARENT_RUN_CONTEXT)
|
||||
]
|
||||
for (subfolder, message_prefix, actual_folder, run_to_compare) in folders_to_check:
|
||||
folder = expected / subfolder
|
||||
if folder.is_dir():
|
||||
logging.info(f"Comparing results in {folder} against {message_prefix}:")
|
||||
if actual_folder is None and run_to_compare is None:
|
||||
logging.info("No AzureML run to compare against. Skipping")
|
||||
continue
|
||||
new_messages = compare_folder_contents(folder,
|
||||
actual_folder=actual_folder,
|
||||
run=run_to_compare,
|
||||
csv_relative_tolerance=csv_relative_tolerance)
|
||||
if new_messages:
|
||||
messages.append(f"Issues in {message_prefix}:")
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
logging.info(f"Folder {subfolder} not found, skipping comparison against {message_prefix}")
|
||||
if messages:
|
||||
raise ValueError(f"Some expected files were missing or did not have the expected contents:{os.linesep}"
|
||||
f"{os.linesep.join(messages)}")
|
|
@ -4,8 +4,13 @@ from pathlib import Path
|
|||
|
||||
|
||||
root = Path(__file__).parent.parent.parent
|
||||
for folder in ["hi-ml-azure", "hi-ml"]:
|
||||
full_folder = str(root / folder / "src")
|
||||
paths_to_add = [
|
||||
Path("hi-ml-azure") / "src",
|
||||
Path("hi-ml-azure") / "testazure",
|
||||
Path("hi-ml") / "src",
|
||||
]
|
||||
for folder in paths_to_add:
|
||||
full_folder = str(root / folder)
|
||||
if full_folder not in sys.path:
|
||||
print(f"Adding to sys.path for running hi-ml: {full_folder}")
|
||||
sys.path.insert(0, full_folder)
|
||||
|
|
|
@ -0,0 +1,241 @@
|
|||
# ------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
|
||||
# ------------------------------------------------------------------------------------------
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from health_azure.utils import create_aml_run_object
|
||||
from health_ml.experiment_config import ExperimentConfig
|
||||
|
||||
from health_ml.run_ml import MLRunner
|
||||
from health_ml.configs.hello_world import HelloWorld
|
||||
from health_ml.utils.regression_test_utils import (
|
||||
CONTENTS_MISMATCH,
|
||||
FILE_FORMAT_ERROR,
|
||||
MISSING_FILE,
|
||||
REGRESSION_TEST_OUTPUT_FOLDER,
|
||||
REGRESSION_TEST_AZUREML_FOLDER,
|
||||
REGRESSION_TEST_AZUREML_PARENT_FOLDER,
|
||||
TEXT_FILE_SUFFIXES,
|
||||
compare_files,
|
||||
compare_folder_contents,
|
||||
compare_folders_and_run_outputs,
|
||||
)
|
||||
from testazure.utils_testazure import DEFAULT_WORKSPACE
|
||||
|
||||
|
||||
def create_folder_and_write_text(file: Path, text: str) -> None:
|
||||
"""
|
||||
Writes the given text to a file. The folders in which the file lives are created too, unless they exist already.
|
||||
Writing the text keeps the line separators as-is (no translation).
|
||||
"""
|
||||
file.parent.mkdir(exist_ok=True, parents=True)
|
||||
with file.open(mode="w", newline="") as f:
|
||||
f.write(text)
|
||||
|
||||
|
||||
def test_regression_test() -> None:
|
||||
"""
|
||||
Test that the file comparison for regression tests is actually called in the workflow.
|
||||
"""
|
||||
container = HelloWorld()
|
||||
container.regression_test_folder = Path(str(uuid.uuid4().hex))
|
||||
runner = MLRunner(container=container, experiment_config=ExperimentConfig())
|
||||
runner.setup()
|
||||
with pytest.raises(ValueError) as ex:
|
||||
runner.run()
|
||||
assert "Folder with expected files does not exist" in str(ex)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("file_extension", TEXT_FILE_SUFFIXES)
|
||||
def test_compare_files_text(tmp_path: Path, file_extension: str) -> None:
|
||||
"""
|
||||
Checks the basic code to compare the contents of two text files.
|
||||
:param test_output_dirs:
|
||||
:param file_extension: The extension of the file to create.
|
||||
"""
|
||||
expected = tmp_path / f"expected{file_extension}"
|
||||
actual = tmp_path / "actual.does_not_matter"
|
||||
# Make sure that we test different line endings - the files should still match
|
||||
create_folder_and_write_text(expected, "Line1\r\nLine2")
|
||||
create_folder_and_write_text(actual, "Line1\nLine2")
|
||||
assert compare_files(expected=expected, actual=actual) == ""
|
||||
actual.write_text("does_not_match")
|
||||
assert compare_files(expected=expected, actual=actual) == CONTENTS_MISMATCH
|
||||
|
||||
|
||||
def test_compare_files_csv(tmp_path: Path) -> None:
|
||||
expected = tmp_path / "expected.csv"
|
||||
actual = tmp_path / "actual.does_not_matter"
|
||||
expected.write_text(
|
||||
"""foo,bar
|
||||
1.0,10.0"""
|
||||
)
|
||||
actual.write_text(
|
||||
"""foo,bar
|
||||
1.0001,10.001"""
|
||||
)
|
||||
assert compare_files(expected=expected, actual=actual, csv_relative_tolerance=1e-2) == ""
|
||||
assert compare_files(expected=expected, actual=actual, csv_relative_tolerance=1e-3) == ""
|
||||
assert compare_files(expected=expected, actual=actual, csv_relative_tolerance=2e-4) == ""
|
||||
assert compare_files(expected=expected, actual=actual, csv_relative_tolerance=9e-5) == CONTENTS_MISMATCH
|
||||
|
||||
|
||||
def test_compare_files_empty_csv(tmp_path: Path) -> None:
|
||||
"""
|
||||
If either of the two CSV files is empty, it should not raise an error, but exit gracefully.
|
||||
"""
|
||||
expected = tmp_path / "expected.csv"
|
||||
actual = tmp_path / "actual.csv"
|
||||
valid_csv = """foo,bar
|
||||
1.0,10.0"""
|
||||
empty_csv = ""
|
||||
for expected_contents, actual_contents in [(empty_csv, empty_csv), (valid_csv, empty_csv), (empty_csv, valid_csv)]:
|
||||
expected.write_text(expected_contents)
|
||||
actual.write_text(actual_contents)
|
||||
assert compare_files(expected=expected, actual=actual) == FILE_FORMAT_ERROR
|
||||
expected.write_text(valid_csv)
|
||||
actual.write_text(valid_csv)
|
||||
assert compare_files(expected=expected, actual=actual) == ""
|
||||
|
||||
|
||||
@pytest.mark.parametrize("file_extension", [".png", ".whatever"])
|
||||
def test_compare_files_binary(tmp_path: Path, file_extension: str) -> None:
|
||||
"""
|
||||
Checks the comparison of files that are not recognized as text files, for example images.
|
||||
:param tmp_path: A folder for temporary files
|
||||
:param file_extension: The extension of the file to create.
|
||||
"""
|
||||
expected = tmp_path / f"expected{file_extension}"
|
||||
actual = tmp_path / "actual.does_not_matter"
|
||||
data1 = bytes([1, 2, 3])
|
||||
data2 = bytes([4, 5, 6])
|
||||
expected.write_bytes(data1)
|
||||
actual.write_bytes(data1)
|
||||
assert compare_files(expected=expected, actual=actual) == ""
|
||||
actual.write_bytes(data2)
|
||||
assert compare_files(expected=expected, actual=actual) == CONTENTS_MISMATCH
|
||||
|
||||
|
||||
def test_compare_folder(tmp_path: Path) -> None:
|
||||
"""
|
||||
Test the comparison of folders that we use for regression tests.
|
||||
"""
|
||||
# Create a test of expected and actual files on the fly.
|
||||
expected = tmp_path / "expected"
|
||||
actual = tmp_path / "actual"
|
||||
matching = "matching.txt"
|
||||
missing = "missing.txt"
|
||||
ignored = "ignored.txt"
|
||||
# Comparison should cover at least .csv and .txt files
|
||||
mismatch = "mismatch.csv"
|
||||
extra = "extra.txt"
|
||||
subfolder = Path("folder")
|
||||
# This file exists in both expected and actual, should not raise any alerts because it contents matches
|
||||
# apart from linebreaks
|
||||
create_folder_and_write_text(expected / subfolder / matching, "Line1\r\nLine2")
|
||||
create_folder_and_write_text(actual / subfolder / matching, "Line1\nLine2")
|
||||
# This file only exists in the expected results, and should create an error saying that it is missing
|
||||
# from the actual results
|
||||
(expected / subfolder / missing).write_text("missing")
|
||||
# This file exists only in the actual results, and not the expected results, and so should not create an error.
|
||||
(actual / extra).write_text("extra")
|
||||
# This file exists in both actual and expected, but has different contents, hence should create an error
|
||||
(expected / subfolder / mismatch).write_text("contents1")
|
||||
(actual / subfolder / mismatch).write_text("contents2")
|
||||
|
||||
messages = compare_folder_contents(expected_folder=expected, actual_folder=actual)
|
||||
all_messages = " ".join(messages)
|
||||
# No issues expected
|
||||
assert matching not in all_messages
|
||||
assert extra not in all_messages
|
||||
assert ignored not in all_messages
|
||||
# Folders should be skipped in the comparison
|
||||
assert f"{MISSING_FILE}: {subfolder}" not in messages
|
||||
assert f"{MISSING_FILE}: {subfolder}/{missing}" in messages
|
||||
assert f"{CONTENTS_MISMATCH}: {subfolder}/{mismatch}" in messages
|
||||
|
||||
|
||||
def test_compare_plain_outputs(tmp_path: Path) -> None:
|
||||
"""
|
||||
Test if we can compare that a set of files from the job outputs.
|
||||
"""
|
||||
expected_root = tmp_path / "expected"
|
||||
expected = expected_root / REGRESSION_TEST_OUTPUT_FOLDER
|
||||
actual = tmp_path / "my_output"
|
||||
for folder in [expected, actual]:
|
||||
file1 = folder / "output.txt"
|
||||
create_folder_and_write_text(file1, "Something")
|
||||
# First comparison should pass
|
||||
compare_folders_and_run_outputs(expected=expected_root, actual=actual)
|
||||
# Now add a file to the set of expected files that does not exist in the run: comparison should now fail
|
||||
no_such_file = "no_such_file.txt"
|
||||
file2 = expected / no_such_file
|
||||
create_folder_and_write_text(file2, "foo")
|
||||
with pytest.raises(ValueError) as ex:
|
||||
compare_folders_and_run_outputs(expected=expected_root, actual=actual)
|
||||
message = ex.value.args[0].splitlines()
|
||||
assert f"{MISSING_FILE}: {no_such_file}" in message
|
||||
|
||||
|
||||
def test_compare_folder_against_run(tmp_path: Path) -> None:
|
||||
"""
|
||||
Test if we can compare that a set of files exists in an AML run.
|
||||
"""
|
||||
upload_to_run_and_compare(
|
||||
regression_test_subfolder=REGRESSION_TEST_AZUREML_FOLDER,
|
||||
run_to_mock="RUN_CONTEXT",
|
||||
tmp_path=tmp_path,
|
||||
)
|
||||
|
||||
|
||||
def test_compare_folder_against_run_parent(tmp_path: Path) -> None:
|
||||
"""
|
||||
Test if we can compare that a set of files exists in an AML run.
|
||||
"""
|
||||
upload_to_run_and_compare(
|
||||
regression_test_subfolder=REGRESSION_TEST_AZUREML_PARENT_FOLDER,
|
||||
run_to_mock="PARENT_RUN_CONTEXT",
|
||||
tmp_path=tmp_path,
|
||||
)
|
||||
|
||||
|
||||
def upload_to_run_and_compare(regression_test_subfolder: str, run_to_mock: str, tmp_path: Path) -> None:
|
||||
"""Creates a set of files in an AzureML run, and checks if the comparison tools accept/fail
|
||||
in the right way.
|
||||
|
||||
:param regression_test_subfolder: The subfolder of the regression test results where the files
|
||||
should be created (either REGRESSION_TEST_AZUREML_FOLDER or REGRESSION_TEST_AZUREML_PARENT_FOLDER)
|
||||
:param run_to_mock: either RUN_CONTEXT or PARENT_RUN_CONTEXT
|
||||
:param tmp_path: A temporary folder to use
|
||||
"""
|
||||
file_contents = "some file contents"
|
||||
file_name = "contents.txt"
|
||||
regression_test_folder = tmp_path / "expected"
|
||||
run = create_aml_run_object(workspace=DEFAULT_WORKSPACE.workspace, experiment_name="test_regression_tests")
|
||||
# Upload a single file to the newly created run. When comparing the run output files,
|
||||
# and seeing this in the set of files that are expected to exist on the run, this should pass.
|
||||
file1 = tmp_path / file_name
|
||||
create_folder_and_write_text(file1, file_contents)
|
||||
run.upload_file(file_name, str(file1))
|
||||
run.flush()
|
||||
file1_expected = regression_test_folder / regression_test_subfolder / file_name
|
||||
create_folder_and_write_text(file1_expected, file_contents)
|
||||
|
||||
with mock.patch("health_ml.utils.regression_test_utils." + run_to_mock, run):
|
||||
# First comparison only on the single file should pass. Value passed for the 'actual' argument is irrelevant.
|
||||
compare_folders_and_run_outputs(expected=regression_test_folder, actual=Path.cwd())
|
||||
# Now add a file to the set of expected files that does not exist in the run: comparison should now fail
|
||||
no_such_file = "no_such_file.txt"
|
||||
file2_expected = regression_test_folder / regression_test_subfolder / no_such_file
|
||||
create_folder_and_write_text(file2_expected, "foo")
|
||||
with pytest.raises(ValueError) as ex:
|
||||
compare_folders_and_run_outputs(expected=regression_test_folder, actual=Path.cwd())
|
||||
message = ex.value.args[0].splitlines()
|
||||
assert f"{MISSING_FILE}: {no_such_file}" in message
|
||||
# Now run the same comparison that failed previously, without mocking. This should now
|
||||
# realize that the present run is an offline run, and skip the comparison
|
||||
compare_folders_and_run_outputs(expected=regression_test_folder, actual=Path.cwd())
|
|
@ -23,6 +23,7 @@
|
|||
"root": "hi-ml/testhiml",
|
||||
"extraPaths": [
|
||||
"hi-ml-azure/src",
|
||||
"hi-ml-azure/testazure",
|
||||
"hi-ml/src",
|
||||
]
|
||||
},
|
||||
|
|
Загрузка…
Ссылка в новой задаче