ENH: Add regression test framework to runner (#289)

This commit is contained in:
Anton Schwaighofer 2022-04-05 17:21:51 +02:00 коммит произвёл GitHub
Родитель a5437dd46e
Коммит 8735521c08
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 467 добавлений и 3 удалений

Просмотреть файл

@ -103,7 +103,10 @@ class Foo:
def method(self, arg: int) -> None:
"""
Method description, followed by an empty line.
Method description, followed by an empty line. Argument names like ``arg`` are rendered nicely
if enclosed in double backtick.
This method can raise a :exc:`ValueError`.
:param arg: This is a description for the method argument.
Long descriptions should be indented.

Просмотреть файл

@ -4,6 +4,7 @@
env:
conda env create --file environment.yml
pip install -r ../test_requirements.txt
pip install -r ../build_requirements.txt
# call make for parent
define call_parent

Просмотреть файл

@ -146,6 +146,18 @@ class WorkflowParams(param.Parameterized):
hyperdrive: bool = param.Boolean(False, doc="If True, use the Hyperdrive configuration specified in the "
"LightningContainer to run hyperparameter tuning. If False, just "
"run a plain single training job.")
regression_test_folder: Optional[Path] = \
param.ClassSelector(class_=Path, default=None, allow_None=True,
doc="A path to a folder that contains a set of files. At the end of training and "
"model evaluation, all files given in that folder must be present in the job's output "
"folder, and their contents must match exactly. When running in AzureML, you need to "
"ensure that this folder is part of the snapshot that gets uploaded. The path should "
"be relative to the repository root directory.")
regression_test_csv_tolerance: float = \
param.Number(default=0.0, allow_None=False,
doc="When comparing CSV files during regression tests, use this value as the maximum allowed "
"relative difference of actual and expected results. Default: 0.0 (must match exactly)")
CROSSVAL_INDEX_ARG_NAME = "crossval_index"
CROSSVAL_COUNT_ARG_NAME = "crossval_count"

Просмотреть файл

@ -23,6 +23,7 @@ from health_ml.utils.common_utils import (
EFFECTIVE_RANDOM_SEED_KEY_NAME, change_working_directory, logging_section,
RUN_RECOVERY_ID_KEY, RUN_RECOVERY_FROM_ID_KEY_NAME)
from health_ml.utils.lightning_loggers import StoringLogger
from health_ml.utils.regression_test_utils import compare_folders_and_run_outputs
from health_ml.utils.type_annotations import PathOrString
@ -154,6 +155,26 @@ class MLRunner:
with logging_section("Model inference"):
self.run_inference(checkpoint_paths_for_testing)
if self.container.regression_test_folder:
# Comparison with stored results for cross-validation runs only operates on child run 0. This run
# has usually already downloaded the results for the other runs, and uploaded files to the parent
# run context.
logging.info("Comparing the current results against stored results")
if self.is_crossval_disabled_or_child_0():
compare_folders_and_run_outputs(expected=self.container.regression_test_folder,
actual=self.container.outputs_folder,
csv_relative_tolerance=self.container.regression_test_csv_tolerance)
else:
logging.info("Skipping as this is not cross-validation child run 0")
def is_crossval_disabled_or_child_0(self) -> bool:
"""
Returns True if the present run is a non-cross-validation run, or child run 0 of a cross-validation run.
"""
if self.container.is_crossvalidation_enabled:
return self.container.crossval_index == 0
return True
def run_inference(self, checkpoint_paths: List[Path]) -> None:
"""
Run inference on the test set for all models.

Просмотреть файл

@ -0,0 +1,180 @@
# -------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# -------------------------------------------------------------------------------------------
import logging
import os
from pathlib import Path
import shutil
import tempfile
from typing import List, Optional
from azureml.core import Run
import pandas as pd
from health_azure.utils import PARENT_RUN_CONTEXT, RUN_CONTEXT, is_running_in_azure_ml
REGRESSION_TEST_OUTPUT_FOLDER = "OUTPUT"
REGRESSION_TEST_AZUREML_FOLDER = "AZUREML_OUTPUT"
REGRESSION_TEST_AZUREML_PARENT_FOLDER = "AZUREML_PARENT_OUTPUT"
CONTENTS_MISMATCH = "Contents mismatch"
FILE_FORMAT_ERROR = "File format error"
MISSING_FILE = "Missing"
CSV_SUFFIX = ".csv"
TEXT_FILE_SUFFIXES = [".txt", ".json", ".html", ".md"]
def compare_files(expected: Path, actual: Path, csv_relative_tolerance: float = 0.0) -> str:
"""
Compares two individual files for regression testing. It returns an empty string if the two files appear identical.
If the files are not identical, a brief error message is returned. Details about the mismatch are logged via
logging.warning. This method handles CSV files (which are treated and compared as dataframes) and text files (TXT,
JSON, HTML, MD, which are all compared while ignoring linebreaks. All other extensions are treated as binary,
and compared on a byte-by-byte basis.
:param expected: A file that contains the expected contents. The type of comparison (text or binary) is chosen
based on the extension of this file.
:param actual: A file to be checked against the expected file.
:param csv_relative_tolerance: When comparing CSV files, maximum allowed relative discrepancy.
If 0.0, do not allow any discrepancy.
:return: An empty string if the files appear identical, or otherwise a brief error message. If there is a
mismatch, details about the mismatch are printed via logging.warning.
"""
def print_lines(prefix: str, lines: List[str]) -> None:
num_lines = len(lines)
count = min(5, num_lines)
logging.warning(f"{prefix} {num_lines} lines, first {count} of those:")
logging.warning(os.linesep.join(lines[:count]))
def try_read_csv(prefix: str, file: Path) -> Optional[pd.DataFrame]:
try:
return pd.read_csv(file)
except Exception as ex:
logging.warning(f"{prefix} file can't be read as CSV: {str(ex)}")
return None
if expected.suffix == CSV_SUFFIX:
expected_df = try_read_csv("Expected", expected)
actual_df = try_read_csv("Actual", actual)
if expected_df is None or actual_df is None:
return FILE_FORMAT_ERROR
try:
pd.testing.assert_frame_equal(actual_df, expected_df, rtol=csv_relative_tolerance)
except Exception as ex:
logging.warning(str(ex))
return CONTENTS_MISMATCH
elif expected.suffix in TEXT_FILE_SUFFIXES:
# Compare line-by-line to avoid issues with line separators
expected_lines = expected.read_text().splitlines()
actual_lines = actual.read_text().splitlines()
if expected_lines != actual_lines:
print_lines("Expected", expected_lines)
print_lines("Actual", actual_lines)
return CONTENTS_MISMATCH
else:
expected_binary = expected.read_bytes()
actual_binary = actual.read_bytes()
if expected_binary != actual_binary:
logging.warning(f"Expected {len(expected_binary)} bytes, actual {len(actual_binary)} bytes")
return CONTENTS_MISMATCH
return ""
def compare_folder_contents(
expected_folder: Path,
actual_folder: Optional[Path] = None,
run: Optional[Run] = None,
csv_relative_tolerance: float = 0.0,
) -> List[str]:
"""
Compares a set of files in a folder, against files in either the other folder or files stored in the given
AzureML run. Each file that is present in the ``expected`` folder must be also present in the ``actual`` folder
(or the AzureML run), with exactly the same contents, in the same folder structure.
For example, if there is a file ``<expected>/foo/bar/contents.txt``, then there must also be a file
``<actual>/foo/bar/contents.txt``. If ``actual_folder`` is provided, then this is used to compare files against the
set file files in ``expected_folder``, irrespective of the value in ``run``. If ``run`` is provided, the files
uploaded to the AzureML run are compared against files in ``expected_folder``. If neither ``run`` nor
``actual_folder`` are provided, a :exc:`ValueError` is raised.
:param expected_folder: A folder with files that are expected to be present.
:param actual_folder: The output folder with the actually produced files.
:param run: An AzureML run
:param csv_relative_tolerance: When comparing CSV files, use this as the maximum allowed relative discrepancy.
If 0.0, do not allow any discrepancy.
:return: A list of human readable error messages, with message and file path. If no errors are found, the list is
empty.
"""
messages = []
if run and not is_running_in_azure_ml(run):
logging.warning("Skipping file comparison because the given run context is an AzureML offline run")
return []
files_in_run: List[str] = run.get_file_names() if run else []
temp_folder = Path(tempfile.mkdtemp()) if run else None
for file in expected_folder.rglob("*"):
# rglob also returns folders, skip those
if file.is_dir():
continue
# All files stored in AzureML runs use Linux-style path
file_relative = file.relative_to(expected_folder).as_posix()
if actual_folder:
actual_file = actual_folder / file_relative
elif temp_folder is not None and run is not None:
actual_file = temp_folder / file_relative
if file_relative in files_in_run:
run.download_file(name=str(file_relative), output_file_path=str(actual_file))
else:
raise ValueError("Either of the two arguments 'run' or 'actual_folder' must be provided")
message = compare_files(expected=file, actual=actual_file,
csv_relative_tolerance=csv_relative_tolerance) if actual_file.exists() else MISSING_FILE
if message:
messages.append(f"{message}: {file_relative}")
logging.warning(f"File {file_relative}: {message}")
else:
logging.info(f"File {file_relative}: OK")
if temp_folder:
shutil.rmtree(temp_folder)
return messages
def compare_folders_and_run_outputs(expected: Path, actual: Path, csv_relative_tolerance: float = 0.0) -> None:
"""
Compares the actual set of run outputs in the ``actual`` folder against an expected set of files in the ``expected``
folder. The ``expected`` folder can have two special subfolders AZUREML_OUTPUT and AZUREML_PARENT_OUTPUT, that
contain files that are expected to be present in the AzureML run context of the present run (AZUREML_OUTPUT)
or the run context of the parent run (AZUREML_PARENT_OUTPUT).
If a file is missing, or does not have the expected contents, an exception is raised.
:param expected: A folder with files that are expected to be present.
:param actual: The output folder with the actually produced files.
:param csv_relative_tolerance: When comparing CSV files, use this as the maximum allowed relative discrepancy.
If 0.0, do not allow any discrepancy.
"""
if not expected.is_dir():
raise ValueError(f"Folder with expected files does not exist: {expected}")
logging.debug(f"Current working directory: {Path.cwd()}")
messages = []
folders_to_check = [
(REGRESSION_TEST_OUTPUT_FOLDER, "run output files", actual, None),
(REGRESSION_TEST_AZUREML_FOLDER, "AzureML outputs in present run", None, RUN_CONTEXT),
(REGRESSION_TEST_AZUREML_PARENT_FOLDER, "AzureML outputs in parent run", None, PARENT_RUN_CONTEXT)
]
for (subfolder, message_prefix, actual_folder, run_to_compare) in folders_to_check:
folder = expected / subfolder
if folder.is_dir():
logging.info(f"Comparing results in {folder} against {message_prefix}:")
if actual_folder is None and run_to_compare is None:
logging.info("No AzureML run to compare against. Skipping")
continue
new_messages = compare_folder_contents(folder,
actual_folder=actual_folder,
run=run_to_compare,
csv_relative_tolerance=csv_relative_tolerance)
if new_messages:
messages.append(f"Issues in {message_prefix}:")
messages.extend(new_messages)
else:
logging.info(f"Folder {subfolder} not found, skipping comparison against {message_prefix}")
if messages:
raise ValueError(f"Some expected files were missing or did not have the expected contents:{os.linesep}"
f"{os.linesep.join(messages)}")

Просмотреть файл

@ -4,8 +4,13 @@ from pathlib import Path
root = Path(__file__).parent.parent.parent
for folder in ["hi-ml-azure", "hi-ml"]:
full_folder = str(root / folder / "src")
paths_to_add = [
Path("hi-ml-azure") / "src",
Path("hi-ml-azure") / "testazure",
Path("hi-ml") / "src",
]
for folder in paths_to_add:
full_folder = str(root / folder)
if full_folder not in sys.path:
print(f"Adding to sys.path for running hi-ml: {full_folder}")
sys.path.insert(0, full_folder)

Просмотреть файл

@ -0,0 +1,241 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import uuid
from pathlib import Path
from unittest import mock
import pytest
from health_azure.utils import create_aml_run_object
from health_ml.experiment_config import ExperimentConfig
from health_ml.run_ml import MLRunner
from health_ml.configs.hello_world import HelloWorld
from health_ml.utils.regression_test_utils import (
CONTENTS_MISMATCH,
FILE_FORMAT_ERROR,
MISSING_FILE,
REGRESSION_TEST_OUTPUT_FOLDER,
REGRESSION_TEST_AZUREML_FOLDER,
REGRESSION_TEST_AZUREML_PARENT_FOLDER,
TEXT_FILE_SUFFIXES,
compare_files,
compare_folder_contents,
compare_folders_and_run_outputs,
)
from testazure.utils_testazure import DEFAULT_WORKSPACE
def create_folder_and_write_text(file: Path, text: str) -> None:
"""
Writes the given text to a file. The folders in which the file lives are created too, unless they exist already.
Writing the text keeps the line separators as-is (no translation).
"""
file.parent.mkdir(exist_ok=True, parents=True)
with file.open(mode="w", newline="") as f:
f.write(text)
def test_regression_test() -> None:
"""
Test that the file comparison for regression tests is actually called in the workflow.
"""
container = HelloWorld()
container.regression_test_folder = Path(str(uuid.uuid4().hex))
runner = MLRunner(container=container, experiment_config=ExperimentConfig())
runner.setup()
with pytest.raises(ValueError) as ex:
runner.run()
assert "Folder with expected files does not exist" in str(ex)
@pytest.mark.parametrize("file_extension", TEXT_FILE_SUFFIXES)
def test_compare_files_text(tmp_path: Path, file_extension: str) -> None:
"""
Checks the basic code to compare the contents of two text files.
:param test_output_dirs:
:param file_extension: The extension of the file to create.
"""
expected = tmp_path / f"expected{file_extension}"
actual = tmp_path / "actual.does_not_matter"
# Make sure that we test different line endings - the files should still match
create_folder_and_write_text(expected, "Line1\r\nLine2")
create_folder_and_write_text(actual, "Line1\nLine2")
assert compare_files(expected=expected, actual=actual) == ""
actual.write_text("does_not_match")
assert compare_files(expected=expected, actual=actual) == CONTENTS_MISMATCH
def test_compare_files_csv(tmp_path: Path) -> None:
expected = tmp_path / "expected.csv"
actual = tmp_path / "actual.does_not_matter"
expected.write_text(
"""foo,bar
1.0,10.0"""
)
actual.write_text(
"""foo,bar
1.0001,10.001"""
)
assert compare_files(expected=expected, actual=actual, csv_relative_tolerance=1e-2) == ""
assert compare_files(expected=expected, actual=actual, csv_relative_tolerance=1e-3) == ""
assert compare_files(expected=expected, actual=actual, csv_relative_tolerance=2e-4) == ""
assert compare_files(expected=expected, actual=actual, csv_relative_tolerance=9e-5) == CONTENTS_MISMATCH
def test_compare_files_empty_csv(tmp_path: Path) -> None:
"""
If either of the two CSV files is empty, it should not raise an error, but exit gracefully.
"""
expected = tmp_path / "expected.csv"
actual = tmp_path / "actual.csv"
valid_csv = """foo,bar
1.0,10.0"""
empty_csv = ""
for expected_contents, actual_contents in [(empty_csv, empty_csv), (valid_csv, empty_csv), (empty_csv, valid_csv)]:
expected.write_text(expected_contents)
actual.write_text(actual_contents)
assert compare_files(expected=expected, actual=actual) == FILE_FORMAT_ERROR
expected.write_text(valid_csv)
actual.write_text(valid_csv)
assert compare_files(expected=expected, actual=actual) == ""
@pytest.mark.parametrize("file_extension", [".png", ".whatever"])
def test_compare_files_binary(tmp_path: Path, file_extension: str) -> None:
"""
Checks the comparison of files that are not recognized as text files, for example images.
:param tmp_path: A folder for temporary files
:param file_extension: The extension of the file to create.
"""
expected = tmp_path / f"expected{file_extension}"
actual = tmp_path / "actual.does_not_matter"
data1 = bytes([1, 2, 3])
data2 = bytes([4, 5, 6])
expected.write_bytes(data1)
actual.write_bytes(data1)
assert compare_files(expected=expected, actual=actual) == ""
actual.write_bytes(data2)
assert compare_files(expected=expected, actual=actual) == CONTENTS_MISMATCH
def test_compare_folder(tmp_path: Path) -> None:
"""
Test the comparison of folders that we use for regression tests.
"""
# Create a test of expected and actual files on the fly.
expected = tmp_path / "expected"
actual = tmp_path / "actual"
matching = "matching.txt"
missing = "missing.txt"
ignored = "ignored.txt"
# Comparison should cover at least .csv and .txt files
mismatch = "mismatch.csv"
extra = "extra.txt"
subfolder = Path("folder")
# This file exists in both expected and actual, should not raise any alerts because it contents matches
# apart from linebreaks
create_folder_and_write_text(expected / subfolder / matching, "Line1\r\nLine2")
create_folder_and_write_text(actual / subfolder / matching, "Line1\nLine2")
# This file only exists in the expected results, and should create an error saying that it is missing
# from the actual results
(expected / subfolder / missing).write_text("missing")
# This file exists only in the actual results, and not the expected results, and so should not create an error.
(actual / extra).write_text("extra")
# This file exists in both actual and expected, but has different contents, hence should create an error
(expected / subfolder / mismatch).write_text("contents1")
(actual / subfolder / mismatch).write_text("contents2")
messages = compare_folder_contents(expected_folder=expected, actual_folder=actual)
all_messages = " ".join(messages)
# No issues expected
assert matching not in all_messages
assert extra not in all_messages
assert ignored not in all_messages
# Folders should be skipped in the comparison
assert f"{MISSING_FILE}: {subfolder}" not in messages
assert f"{MISSING_FILE}: {subfolder}/{missing}" in messages
assert f"{CONTENTS_MISMATCH}: {subfolder}/{mismatch}" in messages
def test_compare_plain_outputs(tmp_path: Path) -> None:
"""
Test if we can compare that a set of files from the job outputs.
"""
expected_root = tmp_path / "expected"
expected = expected_root / REGRESSION_TEST_OUTPUT_FOLDER
actual = tmp_path / "my_output"
for folder in [expected, actual]:
file1 = folder / "output.txt"
create_folder_and_write_text(file1, "Something")
# First comparison should pass
compare_folders_and_run_outputs(expected=expected_root, actual=actual)
# Now add a file to the set of expected files that does not exist in the run: comparison should now fail
no_such_file = "no_such_file.txt"
file2 = expected / no_such_file
create_folder_and_write_text(file2, "foo")
with pytest.raises(ValueError) as ex:
compare_folders_and_run_outputs(expected=expected_root, actual=actual)
message = ex.value.args[0].splitlines()
assert f"{MISSING_FILE}: {no_such_file}" in message
def test_compare_folder_against_run(tmp_path: Path) -> None:
"""
Test if we can compare that a set of files exists in an AML run.
"""
upload_to_run_and_compare(
regression_test_subfolder=REGRESSION_TEST_AZUREML_FOLDER,
run_to_mock="RUN_CONTEXT",
tmp_path=tmp_path,
)
def test_compare_folder_against_run_parent(tmp_path: Path) -> None:
"""
Test if we can compare that a set of files exists in an AML run.
"""
upload_to_run_and_compare(
regression_test_subfolder=REGRESSION_TEST_AZUREML_PARENT_FOLDER,
run_to_mock="PARENT_RUN_CONTEXT",
tmp_path=tmp_path,
)
def upload_to_run_and_compare(regression_test_subfolder: str, run_to_mock: str, tmp_path: Path) -> None:
"""Creates a set of files in an AzureML run, and checks if the comparison tools accept/fail
in the right way.
:param regression_test_subfolder: The subfolder of the regression test results where the files
should be created (either REGRESSION_TEST_AZUREML_FOLDER or REGRESSION_TEST_AZUREML_PARENT_FOLDER)
:param run_to_mock: either RUN_CONTEXT or PARENT_RUN_CONTEXT
:param tmp_path: A temporary folder to use
"""
file_contents = "some file contents"
file_name = "contents.txt"
regression_test_folder = tmp_path / "expected"
run = create_aml_run_object(workspace=DEFAULT_WORKSPACE.workspace, experiment_name="test_regression_tests")
# Upload a single file to the newly created run. When comparing the run output files,
# and seeing this in the set of files that are expected to exist on the run, this should pass.
file1 = tmp_path / file_name
create_folder_and_write_text(file1, file_contents)
run.upload_file(file_name, str(file1))
run.flush()
file1_expected = regression_test_folder / regression_test_subfolder / file_name
create_folder_and_write_text(file1_expected, file_contents)
with mock.patch("health_ml.utils.regression_test_utils." + run_to_mock, run):
# First comparison only on the single file should pass. Value passed for the 'actual' argument is irrelevant.
compare_folders_and_run_outputs(expected=regression_test_folder, actual=Path.cwd())
# Now add a file to the set of expected files that does not exist in the run: comparison should now fail
no_such_file = "no_such_file.txt"
file2_expected = regression_test_folder / regression_test_subfolder / no_such_file
create_folder_and_write_text(file2_expected, "foo")
with pytest.raises(ValueError) as ex:
compare_folders_and_run_outputs(expected=regression_test_folder, actual=Path.cwd())
message = ex.value.args[0].splitlines()
assert f"{MISSING_FILE}: {no_such_file}" in message
# Now run the same comparison that failed previously, without mocking. This should now
# realize that the present run is an offline run, and skip the comparison
compare_folders_and_run_outputs(expected=regression_test_folder, actual=Path.cwd())

Просмотреть файл

@ -23,6 +23,7 @@
"root": "hi-ml/testhiml",
"extraPaths": [
"hi-ml-azure/src",
"hi-ml-azure/testazure",
"hi-ml/src",
]
},