Switch more code to using Path (#305)

- Rename the `TestOutputDirectories` class because it is picked up by pytest as something it expects to contain tests
- Switch fields to using `Path`, rather than `str`
This commit is contained in:
Anton Schwaighofer 2020-11-02 19:49:13 +00:00 коммит произвёл GitHub
Родитель d4b9720c81
Коммит e7a88877c5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
55 изменённых файлов: 394 добавлений и 437 удалений

Просмотреть файл

@ -252,8 +252,8 @@ class SourceConfig:
Contains all information that is required to submit a script to AzureML: Entry script, arguments,
and information to set up the Python environment inside of the AzureML virtual machine.
"""
root_folder: str
entry_script: str
root_folder: Path
entry_script: Path
conda_dependencies_files: List[Path]
script_params: Optional[Dict[str, str]] = None
hyperdrive_config_func: Optional[Callable[[MMLBaseEstimator], HyperDriveConfig]] = None

Просмотреть файл

@ -265,7 +265,7 @@ def create_estimator_from_configs(azure_config: AzureConfig,
:return:
"""
# AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
entry_script_relative_path = Path(source_config.entry_script).relative_to(source_config.root_folder).as_posix()
entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix()
logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to "
f"source directory {source_config.root_folder})")
environment_variables = {
@ -293,7 +293,7 @@ def create_estimator_from_configs(azure_config: AzureConfig,
max_run_duration = run_duration_string_to_seconds(azure_config.max_run_duration)
workspace = azure_config.get_workspace()
estimator = PyTorch(
source_directory=source_config.root_folder,
source_directory=str(source_config.root_folder),
entry_script=entry_script_relative_path,
script_params=source_config.script_params,
compute_target=azure_config.cluster,

Просмотреть файл

@ -29,7 +29,7 @@ def run_pytest(pytest_mark: str, outputs_folder: Path) -> Tuple[bool, Path]:
# Only run on tests in Tests/, to avoid the Tests/ directory if this repo is consumed as a submodule
pytest_args = ["Tests/", f"--junitxml={str(_outputs_file)}"]
if pytest_mark is not None and len(pytest_mark) != 0:
if pytest_mark:
pytest_args += ["-m", pytest_mark]
logging.info(f"Starting pytest, with args: {pytest_args}")
status_code = pytest.main(pytest_args)

Просмотреть файл

@ -15,8 +15,8 @@ def repository_root_directory(path: Optional[PathOrString] = None) -> Path:
:param path: if provided, a relative path to append to the absolute path to the repository root.
:return: The full path to the repository's root directory, with symlinks resolved if any.
"""
current = os.path.dirname(os.path.realpath(__file__))
root = Path(os.path.realpath(os.path.join(current, "..", "..")))
current = Path(__file__)
root = current.parent.parent.parent
if path:
return root / path
else:

Просмотреть файл

@ -2,49 +2,50 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import os
import shutil
from dataclasses import dataclass
from pathlib import Path
from InnerEye.Common import common_util
from InnerEye.Common.type_annotations import PathOrString
def make_test_output_dir(folder: PathOrString) -> None:
def remove_and_create_folder(folder: PathOrString) -> None:
"""
Delete the folder if it exists, and remakes it. This method ignores errors that can come from
an explorer window still being open inside of the test result folder.
"""
folder = str(folder)
folder = Path(folder)
if os.path.exists(folder):
if folder.is_dir():
shutil.rmtree(folder, ignore_errors=True)
os.makedirs(folder, exist_ok=True)
folder.mkdir(exist_ok=True)
@dataclass(frozen=True)
class TestOutputDirectories:
class OutputFolderForTests:
"""
Data class for the output directories for a given test
"""
root_dir: str
root_dir: Path
def __post_init__(self) -> None:
common_util.check_properties_are_not_none(self)
def create_file_or_folder_path(self, file_or_folder_name: str) -> str:
def create_file_or_folder_path(self, file_or_folder_name: str) -> Path:
"""
Prepends root dir to the given file or folder name
Creates a full path for the given file or folder name relative to the root directory stored in the present
object.
:param file_or_folder_name: Name of file or folder to be created under root_dir
"""
return os.path.join(self.root_dir, file_or_folder_name)
return self.root_dir / file_or_folder_name
def make_sub_dir(self, dir_name: str) -> str:
def make_sub_dir(self, dir_name: str) -> Path:
"""
Makes a sub directory under root_dir
:param dir_name: Name of subdirectory to be created.
"""
sub_dir_path = os.path.join(self.root_dir, dir_name)
os.makedirs(sub_dir_path)
return str(sub_dir_path)
sub_dir_path = self.create_file_or_folder_path(dir_name)
sub_dir_path.mkdir()
return sub_dir_path

Просмотреть файл

@ -166,7 +166,7 @@ def get_comparison_baselines(outputs_folder: Path, azure_config: AzureConfig,
# have to look higher up.
comparison_dataset_path: Optional[Path] = None
comparison_metrics_path: Optional[Path] = None
destination_folder = Path(outputs_folder) / run_rec_id / blob_path
destination_folder = outputs_folder / run_rec_id / blob_path
# Look for dataset.csv inside epoch_NNN/Test, epoch_NNN/ and at top level
for blob_path_parent in step_up_directories(blob_path):
try:

Просмотреть файл

@ -7,6 +7,7 @@ from __future__ import annotations
from dataclasses import dataclass
from enum import Enum, unique
from math import isclose
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
import numpy as np
@ -591,11 +592,11 @@ class SegmentationModelBase(ModelConfigBase):
output_size=self.get_output_size(ModelExecutionMode.TEST))
@property
def example_images_folder(self) -> str:
def example_images_folder(self) -> Path:
"""
Gets the full path in which the example images should be stored during training.
"""
return str(self.outputs_folder / EXAMPLE_IMAGES_FOLDER)
return self.outputs_folder / EXAMPLE_IMAGES_FOLDER
@property
def largest_connected_component_foreground_classes(self) -> LARGEST_CC_TYPE:

Просмотреть файл

@ -458,9 +458,9 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
return self.file_system_config.logs_folder
@property
def checkpoint_folder(self) -> str:
def checkpoint_folder(self) -> Path:
"""Gets the full path in which the model checkpoints should be stored during training."""
return str(self.outputs_folder / CHECKPOINT_FOLDER)
return self.outputs_folder / CHECKPOINT_FOLDER
@property
def visualization_folder(self) -> Path:
@ -599,12 +599,9 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
"""
Returns full path to a checkpoint given an epoch
:param epoch: the epoch number
:param for_mean_teacher_model: if True looking returns path to the mean teacher checkpoint. Else returns the
path to the (main / student) model checkpoint.
:return: path to a checkpoint given an epoch
"""
return create_checkpoint_path(path=fixed_paths.repository_root_directory() / self.checkpoint_folder,
epoch=epoch)
return create_checkpoint_path(self.checkpoint_folder, epoch=epoch)
def get_effective_random_seed(self) -> int:
"""

Просмотреть файл

@ -137,7 +137,6 @@ def segmentation_model_test_epoch(config: SegmentationModelBase,
:return A list with the mean dice score (across all structures apart from background) for each image.
"""
ml_util.set_random_seed(config.get_effective_random_seed(), "Model testing")
results_folder = Path(results_folder)
results_folder.mkdir(exist_ok=True)
test_dataframe = config.get_dataset_splits()[data_split]
@ -260,13 +259,13 @@ def get_patient_results_folder(results_folder: Path, patient_id: int) -> Path:
:param patient_id: The numeric ID of the patient.
:return: A path like "root/017"
"""
return results_folder / Path("{0:03d}".format(int(patient_id)))
return results_folder / f"{int(patient_id):03d}"
def store_inference_results(inference_result: InferencePipeline.Result,
config: SegmentationModelBase,
results_folder: Path,
image_header: ImageHeader) -> List[str]:
image_header: ImageHeader) -> List[Path]:
"""
Store the segmentation, posteriors, and binary predictions into Nifti files.
:param inference_result: The inference result for a given patient_id and epoch. Posteriors must be in
@ -284,7 +283,7 @@ def store_inference_results(inference_result: InferencePipeline.Result,
:return: A full path to the results folder for the file
"""
file_path = _file_name + MedicalImageFileType.NIFTI_COMPRESSED_GZ.value
return _results_folder / Path(file_path)
return _results_folder / file_path
# create the directory for the given patient inside the results dir
patient_results_folder = get_patient_results_folder(results_folder, inference_result.patient_id)
@ -294,7 +293,7 @@ def store_inference_results(inference_result: InferencePipeline.Result,
image_paths = [io_util.store_as_ubyte_nifti(
image=inference_result.segmentation,
header=image_header,
file_name=str(create_file_path(patient_results_folder, "segmentation")))]
file_name=create_file_path(patient_results_folder, "segmentation"))]
class_names_and_indices = config.class_and_index_with_background().items()
binaries = binaries_from_multi_label_array(inference_result.segmentation, config.number_of_classes)

Просмотреть файл

@ -4,7 +4,6 @@
# ------------------------------------------------------------------------------------------
import argparse
import logging
import os
from time import time
from typing import Optional, Tuple, TypeVar
@ -101,9 +100,9 @@ def model_train(config: ModelConfigBase, run_recovery: Optional[RunRecovery] = N
.format(config.start_epoch))
# Create checkpoint directory for this run if it doesn't already exist
logging.info("Models are saved at {}".format(config.checkpoint_folder))
if not os.path.isdir(config.checkpoint_folder):
os.makedirs(config.checkpoint_folder)
logging.info(f"Models are saved at {config.checkpoint_folder}")
if not config.checkpoint_folder.is_dir():
config.checkpoint_folder.mkdir()
# Create the SummaryWriters for Tensorboard
writers = create_summary_writers(config)

Просмотреть файл

@ -192,7 +192,7 @@ class Runner:
# perform aggregation as cross val splits are now ready
plot_crossval_config = crossval_config_from_model_config(self.model_config)
plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[RUN_RECOVERY_ID_KEY_NAME]
plot_crossval_config.outputs_directory = str(self.model_config.outputs_folder)
plot_crossval_config.outputs_directory = self.model_config.outputs_folder
plot_crossval_config.settings_yaml_file = self.yaml_config_file
cross_val_results_root = plot_cross_validation(plot_crossval_config)
if self.post_cross_validation_hook:
@ -328,9 +328,8 @@ class Runner:
raise ValueError("When running on AzureML, the 'azure_dataset_id' property must be set.")
model_config_overrides = str(self.model_config.overrides)
source_config = SourceConfig(
root_folder=str(self.project_root),
entry_script=os.path.abspath(sys.argv[0]),
root_folder=self.project_root,
entry_script=Path(sys.argv[0]).resolve(),
conda_dependencies_files=[get_environment_yaml_file(),
self.project_root / fixed_paths.ENVIRONMENT_YAML_FILE_NAME],
hyperdrive_config_func=lambda estimator: self.model_config.get_hyperdrive_config(estimator),

Просмотреть файл

@ -140,7 +140,7 @@ class DatasetExample:
def store_and_upload_example(dataset_example: DatasetExample,
args: Optional[SegmentationModelBase],
images_folder: Optional[str] = None) -> None:
images_folder: Optional[Path] = None) -> None:
"""
Stores an example input and output of the network to Nifti files.
@ -151,7 +151,7 @@ def store_and_upload_example(dataset_example: DatasetExample,
the args.example_images_folder is used instead.
"""
folder = "" if images_folder is None else images_folder
folder = Path("") if images_folder is None else images_folder
if args is not None:
folder = args.example_images_folder
if folder != "" and not os.path.exists(folder):

Просмотреть файл

@ -466,8 +466,8 @@ def load_images_from_dataset_source(dataset_source: PatientDatasetSource) -> Sam
def store_image_as_short_nifti(image: np.ndarray,
header: ImageHeader,
file_name: str,
args: Optional[SegmentationModelBase]) -> str:
file_name: PathOrString,
args: Optional[SegmentationModelBase]) -> Path:
"""
Saves an image in nifti format as ubyte, and performs the following operations:
1) transpose the image back into X,Y,Z from Z,Y,X
@ -490,7 +490,7 @@ def store_image_as_short_nifti(image: np.ndarray,
return store_as_nifti(image=image * 1000, header=header, file_name=file_name, image_type=np.short)
def store_posteriors_as_nifti(image: np.ndarray, header: ImageHeader, file_name: str) -> str:
def store_posteriors_as_nifti(image: np.ndarray, header: ImageHeader, file_name: PathOrString) -> Path:
"""
Saves an array of posteriors in nifti format as ubyte, and performs the following operations:
1) transpose the image back into X,Y,Z from Z,Y,X
@ -511,8 +511,8 @@ def store_posteriors_as_nifti(image: np.ndarray, header: ImageHeader, file_name:
def store_as_scaled_ubyte_nifti(image: np.ndarray,
header: ImageHeader,
file_name: str,
input_range: Union[Iterable[int], Iterable[float]]) -> str:
file_name: PathOrString,
input_range: Union[Iterable[int], Iterable[float]]) -> Path:
"""
Saves an image in nifti format as ubyte, and performs the following operations:
1) transpose the image back into X,Y,Z from Z,Y,X
@ -535,7 +535,7 @@ def store_as_scaled_ubyte_nifti(image: np.ndarray,
def store_as_ubyte_nifti(image: np.ndarray,
header: ImageHeader,
file_name: str) -> str:
file_name: PathOrString) -> Path:
"""
Saves an image in nifti format as ubyte, and performs the following operations:
1) transpose the image back into X,Y,Z from Z,Y,X
@ -549,7 +549,7 @@ def store_as_ubyte_nifti(image: np.ndarray,
return store_as_nifti(image, header, file_name, np.ubyte)
def store_binary_mask_as_nifti(image: np.ndarray, header: ImageHeader, file_name: str) -> str:
def store_binary_mask_as_nifti(image: np.ndarray, header: ImageHeader, file_name: PathOrString) -> Path:
"""
Saves a binary mask to nifti format, and performs the following operations:
1) Check that the image really only contains binary values (0 and 1)
@ -570,11 +570,11 @@ def store_binary_mask_as_nifti(image: np.ndarray, header: ImageHeader, file_name
def store_as_nifti(image: np.ndarray,
header: ImageHeader,
file_name: str,
file_name: PathOrString,
image_type: Union[str, type, np.dtype],
scale: bool = False,
input_range: Optional[Iterable[Union[int, float]]] = None,
output_range: Optional[Iterable[Union[int, float]]] = None) -> str:
output_range: Optional[Iterable[Union[int, float]]] = None) -> Path:
"""
Saves an image in nifti format (uploading to Azure also if an online Run), and performs the following operations:
1) transpose the image back into X,Y,Z from Z,Y,X
@ -617,8 +617,8 @@ def store_as_nifti(image: np.ndarray,
image.SetSpacing(sitk.VectorDouble(reverse_tuple_float3(header.spacing))) # Spacing needs to be X Y Z
image.SetOrigin(header.origin)
image.SetDirection(header.direction)
sitk.WriteImage(image, file_name)
return file_name
sitk.WriteImage(image, str(file_name))
return Path(file_name)
def save_lines_to_file(file: Path, values: List[str]) -> None:

Просмотреть файл

@ -78,12 +78,12 @@ class RunRecovery:
if output_subdir_name:
# From e.g. parent_dir/checkpoints we want parent_dir/output_subdir_name, to which we will
# append split_index / checkpoints below to create child_dst.
checkpoint_path = Path(config.checkpoint_folder)
checkpoint_path = config.checkpoint_folder
parent_path = checkpoint_path.parent
checkpoint_subdir_name = checkpoint_path.name
root_output_dir = parent_path / output_subdir_name
else:
root_output_dir = Path(config.checkpoint_folder) / run.id
root_output_dir = config.checkpoint_folder / run.id
checkpoint_subdir_name = None
# download checkpoints for the run
download_outputs_from_run(
@ -99,7 +99,7 @@ class RunRecovery:
for child in child_runs:
if child.id == RUN_CONTEXT.id:
# We expect to find the file(s) we need in config.checkpoint_folder
child_dst = Path(config.checkpoint_folder)
child_dst = config.checkpoint_folder
else:
subdir = str(child.tags[tag_to_use] if can_use_split_indices else child.number)
if checkpoint_subdir_name:

Просмотреть файл

@ -108,8 +108,9 @@ class PlotCrossValidationConfig(GenericConfig):
compare_all_against_all: bool = param.Boolean(default=False,
doc="If set, include comparisons of comparison runs against "
"each other")
outputs_directory: str = param.String(default=".", doc="The path to store results and get results "
"of plotting results for the current run")
outputs_directory: Path = param.ClassSelector(class_=Path, default=Path("."),
doc="The path to store results and get results "
"of plotting results for the current run")
outlier_range: float = param.Number(3.0, doc="Number of standard deviations away from the mean to "
"use for outlier range")
wilcoxon_test_p_value: float = param.Number(0.05, doc="Threshold for statistical tests")
@ -235,7 +236,7 @@ class PlotCrossValidationConfig(GenericConfig):
if self.local_run_result_split_suffix:
local_src = local_src / self.local_run_result_split_suffix
else:
local_src = Path(self.outputs_directory)
local_src = self.outputs_directory
if local_src_subdir is not None:
local_src = local_src / local_src_subdir
local_src = local_src / blob_path
@ -382,7 +383,7 @@ def download_crossval_result_files(config: PlotCrossValidationConfig,
runs_to_evaluate = []
# create the root path to store the outputs
if not download_to_folder:
download_to_folder = Path(config.outputs_directory) / CROSSVAL_RESULTS_FOLDER
download_to_folder = config.outputs_directory / CROSSVAL_RESULTS_FOLDER
# Make the folder if it doesn't exist, but preserve any existing contents.
download_to_folder.mkdir(parents=True, exist_ok=True)
start_time = time.time()
@ -417,7 +418,7 @@ def download_crossval_result_files(config: PlotCrossValidationConfig,
folder_for_run.mkdir(parents=True, exist_ok=True)
dataset_file = folder_for_run / DATASET_CSV_FILE_NAME
# Copy the run-0 dataset.csv, which should be the same, as the parent run won't have one.
shutil.copy(str(Path(config.outputs_directory) / DATASET_CSV_FILE_NAME), str(dataset_file))
shutil.copy(str(config.outputs_directory / DATASET_CSV_FILE_NAME), str(dataset_file))
else:
dataset_file = config.download_or_get_local_file(run, DATASET_CSV_FILE_NAME, folder_for_run)
if config.model_category == ModelCategory.Segmentation and not dataset_file:
@ -465,7 +466,7 @@ def get_config_and_results_for_offline_runs(train_config: DeepLearningConfig) ->
"""
plot_crossval_config = crossval_config_from_model_config(train_config)
download_to_folder = train_config.outputs_folder / CROSSVAL_RESULTS_FOLDER
plot_crossval_config.outputs_directory = str(download_to_folder)
plot_crossval_config.outputs_directory = download_to_folder
plot_crossval_config.local_run_results = str(train_config.outputs_folder)
splits = [str(i) for i in range(plot_crossval_config.number_of_cross_validation_splits)] \

Просмотреть файл

@ -130,17 +130,16 @@ def submit_for_inference(args: SubmitForInferenceConfig, azure_config: AzureConf
model_id = model.id
logging.info(f"Identified model {model_id}")
source_directory = tempfile.TemporaryDirectory()
source_directory_name = source_directory.name
logging.info(f"Building inference run submission in {source_directory_name}")
source_directory_path = Path(source_directory_name)
source_directory_path = Path(source_directory.name)
logging.info(f"Building inference run submission in {source_directory_path}")
copy_image_file(args.image_file, source_directory_path / DEFAULT_DATA_FOLDER)
# We copy over run_scoring.py, and score.py as well in case the model we're using
# does not have sufficiently recent versions of those files.
for base in ["run_scoring.py", "score.py"]:
shutil.copyfile(base, str(source_directory_path / base))
source_config = SourceConfig(
root_folder=source_directory_name,
entry_script=str(source_directory_path / "run_scoring.py"),
root_folder=source_directory_path,
entry_script=source_directory_path / "run_scoring.py",
script_params={"--data-folder": ".", "--spawnprocess": "python",
"--model-id": model_id, "score.py": ""},
conda_dependencies_files=download_conda_dependency_files(model, source_directory_path)
@ -152,7 +151,7 @@ def submit_for_inference(args: SubmitForInferenceConfig, azure_config: AzureConf
logging.info(f"Run URL: {run.get_portal_url()}")
if not args.keep_upload_folder:
source_directory.cleanup()
logging.info(f"Deleted submission directory {source_directory_name}")
logging.info(f"Deleted submission directory {source_directory_path}")
if args.download_folder is None:
return None
logging.info("Awaiting run completion")

Просмотреть файл

@ -7,8 +7,8 @@ import sys
from pathlib import Path
# This file here mimics how the InnerEye code would be used as a git submoTestdule. The test script will
# copy the InnerEye code to a folder Submodule. The test will then invoke the present file as a runner,
# This file here mimics how the InnerEye code would be used as a git submodule. The test script will
# copy the InnerEye code to a folder called Submodule. The test will then invoke the present file as a runner,
# and train a model in AzureML.
repository_root = Path(__file__).absolute().parent.parent

Просмотреть файл

@ -15,7 +15,7 @@ from InnerEye.Azure.azure_util import DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, fetc
to_azure_friendly_container_path
from InnerEye.Common import fixed_paths
from InnerEye.Common.fixed_paths import ENVIRONMENT_YAML_FILE_NAME
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from Tests.Common.test_util import DEFAULT_ENSEMBLE_RUN_RECOVERY_ID, DEFAULT_ENSEMBLE_RUN_RECOVERY_ID_NUMERIC, \
DEFAULT_RUN_RECOVERY_ID, DEFAULT_RUN_RECOVERY_ID_NUMERIC
from Tests.ML.util import get_default_workspace
@ -71,7 +71,7 @@ def test_is_cross_validation_child_run(is_ensemble: bool, is_numeric: bool) -> N
assert all([is_cross_validation_child_run(x) for x in fetch_child_runs(run)])
def test_merge_conda(test_output_dirs: TestOutputDirectories) -> None:
def test_merge_conda(test_output_dirs: OutputFolderForTests) -> None:
"""
Tests the logic for merging Conda environment files.
"""
@ -97,9 +97,9 @@ dependencies:
- azureml-sdk==1.6.0
- bar==2.0
"""
file1 = Path(test_output_dirs.root_dir) / "env1.yml"
file1 = test_output_dirs.root_dir / "env1.yml"
file1.write_text(env1)
file2 = Path(test_output_dirs.root_dir) / "env2.yml"
file2 = test_output_dirs.root_dir / "env2.yml"
file2.write_text(env2)
conda_dep = merge_conda_dependencies([file1, file2])
# We expect to see the union of channels.
@ -119,11 +119,12 @@ def test_experiment_name() -> None:
assert create_experiment_name(c) == "foo"
def test_framework_version(test_output_dirs: TestOutputDirectories) -> None:
def test_framework_version(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if the Pytorch framework version can be read correctly from the current environment file.
"""
environment_file = fixed_paths.repository_root_directory(ENVIRONMENT_YAML_FILE_NAME)
assert environment_file.is_file(), "Environment file must be present"
conda_dep = CondaDependencies(conda_dependencies_file_path=environment_file)
framework = pytorch_version_from_conda_dependencies(conda_dep)
# If this fails, it is quite likely that the AzureML SDK is behind pytorch, and does not yet know about a

Просмотреть файл

@ -10,12 +10,12 @@ from azureml.core import Experiment
from InnerEye.Azure.azure_util import to_azure_friendly_string
from InnerEye.Azure.run_pytest import download_pytest_result
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from Tests.ML.util import get_default_azure_config
def test_download_pytest_file(test_output_dirs: TestOutputDirectories) -> None:
output_dir = Path(test_output_dirs.root_dir)
def test_download_pytest_file(test_output_dirs: OutputFolderForTests) -> None:
output_dir = test_output_dirs.root_dir
azure_config = get_default_azure_config()
workspace = azure_config.get_workspace()

Просмотреть файл

@ -3,6 +3,7 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from enum import Enum
from pathlib import Path
from typing import Any, Optional
from unittest import mock
@ -153,7 +154,7 @@ def test_source_config_set_params() -> None:
Check that commandline arguments are set correctly when submitting the script to AzureML.
In particular, the azureml flag should be omitted, irrespective of how the argument is written.
"""
s = SourceConfig(root_folder="", entry_script="something.py", conda_dependencies_files=[])
s = SourceConfig(root_folder=Path(""), entry_script=Path("something.py"), conda_dependencies_files=[])
def assert_has_params(expected_args: str) -> None:
assert s.script_params is not None

Просмотреть файл

@ -3,7 +3,6 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import os
from pathlib import Path
from typing import Dict
import pytest
@ -11,7 +10,7 @@ import pytest
from InnerEye.Azure import secrets_handling
from InnerEye.Azure.secrets_handling import SecretsHandling
from InnerEye.Common import fixed_paths
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
# A list of all secrets that are stored in environment variables or local secrets files.
SECRETS_IN_ENVIRONMENT = [fixed_paths.SERVICE_PRINCIPAL_KEY]
@ -74,11 +73,11 @@ def test_all_secrets_is_upper() -> None:
assert name == name.upper(), "Secret '{}' should have a only uppercase value".format(name)
def test_read_variables_from_yaml(test_output_dirs: TestOutputDirectories) -> None:
def test_read_variables_from_yaml(test_output_dirs: OutputFolderForTests) -> None:
"""
Test that variables are read from a yaml file correctly.
"""
root = Path(test_output_dirs.root_dir)
root = test_output_dirs.root_dir
# this will return a dictionary of all variables in the yaml file
yaml_path = root / "foo.yml"
yaml_path.write_text("""variables:

Просмотреть файл

@ -16,7 +16,7 @@ from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, \
CROSS_VALIDATION_SUB_FOLD_SPLIT_INDEX_TAG_KEY
from InnerEye.Common.build_config import BUILDINFORMATION_JSON, ExperimentResultLocation, \
build_information_to_dot_net_json, build_information_to_dot_net_json_file
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.common import TrackedMetrics
from InnerEye.ML.config import SegmentationModelBase
from InnerEye.ML.model_config_base import ModelConfigBase
@ -41,7 +41,7 @@ class HyperDriveTestModelScalar(ScalarModelBase):
return _create_dummy_hyperdrive_param_search_config(estimator)
def test_build_config(test_output_dirs: TestOutputDirectories) -> None:
def test_build_config(test_output_dirs: OutputFolderForTests) -> None:
"""
Test that json with build information is created correctly.
"""
@ -60,7 +60,7 @@ def test_build_config(test_output_dirs: TestOutputDirectories) -> None:
'"ResultsContainerName": null, "ResultsUri": null, "DatasetFolder": null, "DatasetFolderUri": null, ' \
'"AzureBatchJobName": "job"}'
assert expected == net_json
result_folder = Path(test_output_dirs.root_dir) / "buildinfo"
result_folder = test_output_dirs.root_dir / "buildinfo"
build_information_to_dot_net_json_file(config, result_location, folder=result_folder)
result_file = result_folder / BUILDINFORMATION_JSON
assert result_file.exists()
@ -136,7 +136,7 @@ def test_get_total_number_of_cross_validation_runs(number_of_cross_validation_sp
@pytest.mark.parametrize("number_of_cross_validation_splits_per_fold", [0, 2])
def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
number_of_cross_validation_splits_per_fold: int,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
"""
Test to make sure the number of dataset reader workers are set correctly
"""
@ -150,17 +150,17 @@ def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
config.number_of_cross_validation_splits = number_of_cross_validation_splits
# create HyperDrive config with dummy estimator for testing
source_config = SourceConfig(root_folder=test_output_dirs.root_dir,
entry_script="something.py", conda_dependencies_files=[])
entry_script=Path("something.py"), conda_dependencies_files=[])
estimator = Estimator(
source_directory=source_config.root_folder,
entry_script=source_config.entry_script,
source_directory=str(source_config.root_folder),
entry_script=str(source_config.entry_script),
compute_target="Local"
)
hd_config = config.get_hyperdrive_config(estimator=estimator)
assert hd_config.estimator.source_directory == source_config.root_folder
assert hd_config.estimator.run_config.script == source_config.entry_script
assert hd_config.estimator.source_directory == str(source_config.root_folder)
assert hd_config.estimator.run_config.script == str(source_config.entry_script)
assert hd_config.estimator._script_params == source_config.script_params
if number_of_cross_validation_splits > 0 and number_of_cross_validation_splits_per_fold > 0:

Просмотреть файл

@ -2,7 +2,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from pathlib import Path
from unittest import mock
import pytest
@ -10,7 +9,7 @@ import pytest
from InnerEye.Common import fixed_paths
from InnerEye.Common.common_util import logging_to_stdout
from InnerEye.Common.fixed_paths import DEFAULT_AML_UPLOAD_DIR, DEFAULT_LOGS_DIR_NAME
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.config import PhotometricNormalizationMethod, SegmentationModelBase
from InnerEye.ML.runner import Runner
@ -18,14 +17,14 @@ from InnerEye.ML.runner import Runner
@pytest.mark.parametrize("is_default_namespace", [True, False])
@pytest.mark.parametrize("is_offline_run", [True, False])
def test_create_ml_runner_args(is_default_namespace: bool,
test_output_dirs: TestOutputDirectories,
test_output_dirs: OutputFolderForTests,
is_offline_run: bool) -> None:
"""Test round trip parsing of commandline arguments:
From arguments to the Azure runner to the arguments of the ML runner, checking that
whatever is passed on can be correctly parsed."""
logging_to_stdout()
model_name = "Lung"
outputs_folder = Path(test_output_dirs.root_dir)
outputs_folder = test_output_dirs.root_dir
project_root = fixed_paths.repository_root_directory()
if is_default_namespace:
model_configs_namespace = None
@ -97,15 +96,15 @@ def test_non_overridable_properties() -> None:
assert all([x in unknown for x in non_overridable])
def test_read_yaml_file_into_args(test_output_dirs: TestOutputDirectories) -> None:
def test_read_yaml_file_into_args(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if the arguments for specifying the YAML config file with storage account, etc
are correctly wired up.
"""
empty_yaml = Path(test_output_dirs.root_dir) / "nothing.yaml"
empty_yaml = test_output_dirs.root_dir / "nothing.yaml"
empty_yaml.write_text("variables:\n")
with mock.patch("sys.argv", ["", "--model=Lung"]):
# Default behaviour: Application ID (service principal) should be picked up from YAML
# Default behaviour: tenant_id should be picked up from YAML
runner1 = Runner(project_root=fixed_paths.repository_root_directory(),
yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
runner1.parse_and_load_model()
@ -120,11 +119,11 @@ def test_read_yaml_file_into_args(test_output_dirs: TestOutputDirectories) -> No
assert runner2.azure_config.resource_group == ""
def test_parsing_with_custom_yaml(test_output_dirs: TestOutputDirectories) -> None:
def test_parsing_with_custom_yaml(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if additional model or Azure config settings can be read correctly from YAML files.
"""
yaml_file = Path(test_output_dirs.root_dir) / "custom.yml"
yaml_file = test_output_dirs.root_dir / "custom.yml"
yaml_file.write_text("""variables:
tenant_id: 'foo'
datasets_storage_account: 'account'
@ -139,7 +138,7 @@ def test_parsing_with_custom_yaml(test_output_dirs: TestOutputDirectories) -> No
"--random_seed", "2"]
with mock.patch("sys.argv", args):
runner = Runner(project_root=fixed_paths.repository_root_directory(),
yaml_config_file=yaml_file)
yaml_config_file=yaml_file)
loader_result = runner.parse_and_load_model()
assert loader_result is not None
assert runner.azure_config is not None

Просмотреть файл

@ -4,7 +4,6 @@
# ------------------------------------------------------------------------------------------
import math
from io import StringIO
from pathlib import Path
from statistics import mean
from typing import List, Optional
@ -16,7 +15,7 @@ from sklearn.metrics import roc_auc_score, roc_curve
from InnerEye.Common.common_util import DataframeLogger
from InnerEye.Common.metrics_dict import Hue, MetricType, MetricsDict, PredictionEntry, ScalarMetricsDict, \
SequenceMetricsDict, average_metric_values
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML import metrics
from InnerEye.ML.common import ModelExecutionMode
from InnerEye.ML.config import BACKGROUND_CLASS_NAME
@ -408,7 +407,7 @@ def test_hue_entries() -> None:
]
def test_load_metrics_from_df_with_hues(test_output_dirs: TestOutputDirectories) -> None:
def test_load_metrics_from_df_with_hues(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if we can re-create a MetricsDict object with model predictions and labels, when the data file contains
a prediction target value.
@ -439,7 +438,7 @@ def test_load_metrics_from_df_with_hues(test_output_dirs: TestOutputDirectories)
for hue in expected_hues:
assert len(metrics_dict._get_hue(hue).get_predictions()) == 4
logger_output_file = test_output_dirs.create_file_or_folder_path("output.csv")
logger = DataframeLogger(csv_path=Path(logger_output_file))
logger = DataframeLogger(csv_path=logger_output_file)
ScalarMetricsDict.aggregate_and_save_execution_mode_metrics(metrics, logger)
output = pd.read_csv(logger_output_file, dtype=str)
assert LoggingColumns.Hue.value in output

Просмотреть файл

@ -2,14 +2,13 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from pathlib import Path
import pytest
from InnerEye.Common import common_util
from InnerEye.Common.common_util import check_is_any_of, get_namespace_root, is_private_field_name, namespace_to_path, \
path_to_namespace, print_exception
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from Tests.fixed_paths_for_tests import full_ml_test_data_path, tests_root_directory
DEFAULT_ENSEMBLE_RUN_RECOVERY_ID = "dacart_local_branch_202008:HD_99938f3c-b25f-4604-bfcc-bb7c9ed3516f"
@ -82,13 +81,13 @@ def test_print_exception() -> None:
@pytest.mark.parametrize("is_external", [True, False])
def test_namespace_to_path(is_external: bool, test_output_dirs: TestOutputDirectories) -> None:
def test_namespace_to_path(is_external: bool, test_output_dirs: OutputFolderForTests) -> None:
"""
A test to check conversion between path to namespace for InnerEye and external namespaces
"""
if is_external:
folder_name = "logs"
full_folder = Path(test_output_dirs.root_dir) / folder_name
full_folder = test_output_dirs.root_dir / folder_name
assert namespace_to_path(folder_name, root=test_output_dirs.root_dir) == full_folder
else:
from Tests.ML import test_data
@ -96,13 +95,13 @@ def test_namespace_to_path(is_external: bool, test_output_dirs: TestOutputDirect
@pytest.mark.parametrize("is_external", [True, False])
def test_path_to_namespace(is_external: bool, test_output_dirs: TestOutputDirectories) -> None:
def test_path_to_namespace(is_external: bool, test_output_dirs: OutputFolderForTests) -> None:
"""
A test to check conversion between namespace to path for InnerEye and external namespaces
"""
if is_external:
folder_name = "logs"
full_folder = Path(test_output_dirs.root_dir) / folder_name
full_folder = test_output_dirs.root_dir / folder_name
assert path_to_namespace(
path=full_folder,
root=test_output_dirs.root_dir

Просмотреть файл

@ -9,7 +9,7 @@ import pandas as pd
import pytest
from InnerEye.Common.common_util import is_windows
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.dataset.scalar_dataset import ScalarDataset
from InnerEye.ML.dataset.scalar_sample import ScalarItem
from InnerEye.ML.scalar_config import ScalarModelBase
@ -20,7 +20,7 @@ from InnerEye.ML.utils import ml_util
@pytest.mark.parametrize("shuffle", [False, True])
@pytest.mark.skipif(is_windows(),
reason="This test runs fine on local Windows boxes, but leads to odd timeouts in Azure")
def test_dataloader_speed(test_output_dirs: TestOutputDirectories,
def test_dataloader_speed(test_output_dirs: OutputFolderForTests,
num_dataload_workers: int,
shuffle: bool) -> None:
"""

Просмотреть файл

@ -16,7 +16,7 @@ import torch
from pandas.util.testing import assert_frame_equal
from InnerEye.Common import common_util
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.Common.type_annotations import TupleInt3
from InnerEye.ML.dataset.sample import GeneralSampleMetadata
from InnerEye.ML.dataset.scalar_dataset import DataSourceReader, ScalarDataSource, ScalarDataset, \
@ -75,7 +75,7 @@ S1,single,foo
assert "2 rows" in str(ex)
def test_load_items(test_output_dirs: TestOutputDirectories) -> None:
def test_load_items(test_output_dirs: OutputFolderForTests) -> None:
"""
Test loading file paths and labels from a datafrome.
"""
@ -107,7 +107,7 @@ S2,label,,False,2.1,2.2,B1,A2
assert items[1].numerical_non_image_features.tolist() == pytest.approx([0.7071068286895752, 0.7071067690849304])
def test_load_items_classification_versus_regression(test_output_dirs: TestOutputDirectories) -> None:
def test_load_items_classification_versus_regression(test_output_dirs: OutputFolderForTests) -> None:
"""
Test loading file paths and labels from a datafrome with diferrent configuration
"""
@ -551,11 +551,11 @@ def test_string_to_float(text: str, expected: float) -> None:
assert actual == expected
def test_files_by_stem(test_output_dirs: TestOutputDirectories) -> None:
def test_files_by_stem(test_output_dirs: OutputFolderForTests) -> None:
"""
Test enumeration of files recursively.
"""
root = Path(test_output_dirs.root_dir) / "foo"
root = test_output_dirs.root_dir / "foo"
folder1 = root / "bar"
folder1.mkdir(parents=True)
f1 = root / "1.txt"
@ -575,7 +575,7 @@ def test_files_by_stem(test_output_dirs: TestOutputDirectories) -> None:
@pytest.mark.parametrize("center_crop_size", [(2, 2, 2), None])
def test_dataset_traverse_dirs(test_output_dirs: TestOutputDirectories, center_crop_size: Optional[TupleInt3]) -> None:
def test_dataset_traverse_dirs(test_output_dirs: OutputFolderForTests, center_crop_size: Optional[TupleInt3]) -> None:
"""
Test dataset loading when the dataset file only contains file name stems, not full paths.
"""
@ -604,7 +604,7 @@ S4,label,,False,3.0
numerical_columns=[],
traverse_dirs_when_loading=True,
center_crop_size=center_crop_size,
local_dataset=Path(test_output_dirs.root_dir))
local_dataset=test_output_dirs.root_dir)
dataset = ScalarDataset(args, data_frame=df)
assert len(dataset) == 4
for i in range(4):
@ -617,7 +617,7 @@ S4,label,,False,3.0
assert images.shape == (1,) + expected_image_size
def test_dataset_normalize_image(test_output_dirs: TestOutputDirectories) -> None:
def test_dataset_normalize_image(test_output_dirs: OutputFolderForTests) -> None:
"""
Test dataset loading with window normalization image processing.
"""
@ -642,7 +642,7 @@ S4,label,,False,3.0
non_image_feature_channels={},
numerical_columns=[],
traverse_dirs_when_loading=True,
local_dataset=Path(test_output_dirs.root_dir))
local_dataset=test_output_dirs.root_dir)
raw_dataset = ScalarDataset(args, data_frame=df)
normalized = ScalarDataset(args, data_frame=df, sample_transforms=WindowNormalizationForScalarItem())
assert len(raw_dataset) == 4
@ -751,7 +751,7 @@ def test_is_index_valid() -> None:
assert not is_valid_item_index(_create(1), max_sequence_position_value=0)
def test_categorical_and_numerical_columns_are_mutually_exclusive(test_output_dirs: TestOutputDirectories) -> None:
def test_categorical_and_numerical_columns_are_mutually_exclusive(test_output_dirs: OutputFolderForTests) -> None:
csv_string = """USUBJID,week,path,value,scalar1,categorical1
S1,image,foo.nii
S1,label,,True,1.1,False
@ -791,7 +791,7 @@ def test_imbalanced_sampler() -> None:
assert count_negative_subjects / float(len(drawn_subjects)) > 0.3
def test_get_class_weights_dataset(test_output_dirs: TestOutputDirectories) -> None:
def test_get_class_weights_dataset(test_output_dirs: OutputFolderForTests) -> None:
"""
Test training and testing of sequence models that predicts at multiple time points,
when it is started via run_ml.

Просмотреть файл

@ -2,19 +2,19 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import math
from io import StringIO
from pathlib import Path
from typing import List, Optional, Union
from unittest import mock
import math
import numpy as np
import pandas as pd
import pytest
import torch
from InnerEye.Common.common_util import is_windows
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.common import ModelExecutionMode
from InnerEye.ML.dataset.full_image_dataset import collate_with_metadata
from InnerEye.ML.dataset.sample import GeneralSampleMetadata
@ -530,7 +530,7 @@ def test_seq_to_tensor() -> None:
assert stacked.shape == (len(items), item1.numel())
def test_sequence_dataset_all(test_output_dirs: TestOutputDirectories) -> None:
def test_sequence_dataset_all(test_output_dirs: OutputFolderForTests) -> None:
"""
Check that the sequence dataset works end-to-end, including applying the right standardization.
"""
@ -599,7 +599,7 @@ S4,0,True,4,40,M2,B1
assert_tensors_equal(test_items[0].items[0].get_all_non_imaging_features(), [3., 3., 0., 1., 1., 0.])
def test_get_class_weights_dataset(test_output_dirs: TestOutputDirectories) -> None:
def test_get_class_weights_dataset(test_output_dirs: OutputFolderForTests) -> None:
"""
Test training and testing of sequence models that predicts at multiple time points,
when it is started via run_ml.

Просмотреть файл

@ -3,7 +3,6 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from io import StringIO
from pathlib import Path
from typing import Any, List, Optional, Tuple
from unittest import mock
@ -13,9 +12,9 @@ import pytest
import torch
from InnerEye.Common import common_util
from InnerEye.Common.common_util import METRICS_FILE_NAME, logging_to_stdout, ModelExecutionMode
from InnerEye.Common.common_util import METRICS_FILE_NAME, ModelExecutionMode, logging_to_stdout
from InnerEye.Common.metrics_dict import MetricType, SequenceMetricsDict
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.dataset.sequence_dataset import SequenceDataset
from InnerEye.ML.deep_learning_config import TemperatureScalingConfig
from InnerEye.ML.model_config_base import ModelTransformsPerExecutionMode
@ -195,7 +194,7 @@ def test_rnn_classifier_via_config_1(use_combined_model: bool,
combine_hidden_state: bool,
use_encoder_layer_norm: bool,
use_mean_teacher_model: bool,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
"""
Test if we can build a simple RNN model that only feeds off non-image features.
This just tests the mechanics of training, but not if the model learned.
@ -226,7 +225,7 @@ def test_rnn_classifier_via_config_1(use_combined_model: bool,
(True, ImagingFeatureType.ImageAndSegmentation)])
def test_run_ml_with_sequence_model(use_combined_model: bool,
imaging_feature_type: ImagingFeatureType,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
"""
Test training and testing of sequence models, when it is started together via run_ml.
"""
@ -259,7 +258,7 @@ def test_run_ml_with_sequence_model(use_combined_model: bool,
(True, ImagingFeatureType.ImageAndSegmentation)])
def test_visualization_with_sequence_model(use_combined_model: bool,
imaging_feature_type: ImagingFeatureType,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
config = ToySequenceModel(use_combined_model, imaging_feature_type, should_validate=False)
config.set_output_to(test_output_dirs.root_dir)
config.dataset_data_frame = _get_mock_sequence_dataset()
@ -356,7 +355,7 @@ class ToySequenceModel2(SequenceModelBase):
# Only test the non-combined model because otherwise the build takes too much time.
@pytest.mark.skipif(common_util.is_windows(), reason="Has issues on windows build")
@pytest.mark.gpu
def test_rnn_classifier_via_config_2(test_output_dirs: TestOutputDirectories) -> None:
def test_rnn_classifier_via_config_2(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if we can build an RNN classifier that learns sequences, of the same kind as in
test_rnn_classifier_toy_problem, but built via the config.
@ -430,7 +429,7 @@ class ToyMultiLabelSequenceModel(SequenceModelBase):
@pytest.mark.skipif(common_util.is_windows(), reason="Has issues on windows build")
def test_run_ml_with_multi_label_sequence_model(test_output_dirs: TestOutputDirectories) -> None:
def test_run_ml_with_multi_label_sequence_model(test_output_dirs: OutputFolderForTests) -> None:
"""
Test training and testing of sequence models that predicts at multiple time points,
when it is started via run_ml.
@ -447,7 +446,7 @@ def test_run_ml_with_multi_label_sequence_model(test_output_dirs: TestOutputDire
assert metrics_dict.get_hue_names(include_default=False) == expected_prediction_targets
config.set_output_to(test_output_dirs.root_dir)
# Create a fake dataset directory to make config validation pass
config.local_dataset = Path(test_output_dirs.root_dir)
config.local_dataset = test_output_dirs.root_dir
config.dataset_data_frame = _get_multi_label_sequence_dataframe()
config.pre_process_dataset_dataframe()
config.num_epochs = 1
@ -498,7 +497,7 @@ def test_pad_gru_output(combine_hidden_states: bool) -> None:
assert torch.allclose(expected, padded)
def test_visualization_for_different_target_weeks(test_output_dirs: TestOutputDirectories) -> None:
def test_visualization_for_different_target_weeks(test_output_dirs: OutputFolderForTests) -> None:
"""
Tests that the visualizations are differentiated depending on the target week
for which we visualize it.
@ -564,7 +563,7 @@ def _get_multi_label_sequence_dataframe() -> pd.DataFrame:
return pd.read_csv(StringIO(dataset_contents), dtype=str)
def test_sequence_dataset_stats_hook(test_output_dirs: TestOutputDirectories) -> None:
def test_sequence_dataset_stats_hook(test_output_dirs: OutputFolderForTests) -> None:
model = ToySequenceModel()
model.set_output_to(test_output_dirs.root_dir)
model.dataset_data_frame = _get_mock_sequence_dataset()

Просмотреть файл

@ -15,7 +15,7 @@ import torch
from InnerEye.Common import common_util
from InnerEye.Common.common_util import logging_to_stdout
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.Common.type_annotations import TupleInt3
from InnerEye.ML.dataset.scalar_dataset import ScalarDataset
from InnerEye.ML.model_config_base import ModelTransformsPerExecutionMode
@ -120,7 +120,7 @@ class ImageEncoder(ScalarModelBase):
@pytest.mark.parametrize("aggregation_type", [AggregationType.Average,
AggregationType.ZAdaptive3dAvg,
AggregationType.GatedPooling])
def test_image_encoder(test_output_dirs: TestOutputDirectories, encode_channels_jointly: bool,
def test_image_encoder(test_output_dirs: OutputFolderForTests, encode_channels_jointly: bool,
use_non_imaging_features: bool,
kernel_size_per_encoding_block: Optional[Union[TupleInt3, List[TupleInt3]]],
stride_size_per_encoding_block: Optional[Union[TupleInt3, List[TupleInt3]]],
@ -227,7 +227,7 @@ S3,week1,scan3.npy,True,6,60,Male,Val2
AggregationType.ZAdaptive3dAvg])
@pytest.mark.parametrize("imaging_feature_type", [ImagingFeatureType.Segmentation,
ImagingFeatureType.ImageAndSegmentation])
def test_image_encoder_with_segmentation(test_output_dirs: TestOutputDirectories,
def test_image_encoder_with_segmentation(test_output_dirs: OutputFolderForTests,
encode_channels_jointly: bool,
aggregation_type: AggregationType,
imaging_feature_type: ImagingFeatureType) -> None:
@ -314,7 +314,7 @@ def test_segmentation_to_one_hot(use_gpu: bool, input_on_gpu: bool) -> None:
def test_visualization_with_scalar_model(use_non_imaging_features: bool,
imaging_feature_type: ImagingFeatureType,
encode_channels_jointly: bool,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
dataset_contents = """subject,channel,path,label,numerical1,numerical2,categorical1,categorical2
S1,week0,scan1.npy,,1,10,Male,Val1
S1,week1,scan2.npy,True,2,20,Female,Val2

Просмотреть файл

@ -9,7 +9,7 @@ import pandas as pd
import pytest
from InnerEye.Common import common_util
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.common import DATASET_CSV_FILE_NAME
from InnerEye.ML.model_training import model_train
from InnerEye.ML.models.architectures.classification.image_encoder_with_mlp import create_mlp
@ -56,7 +56,7 @@ class NonImageEncoder(ScalarModelBase):
@pytest.mark.skipif(common_util.is_windows(), reason="Has issue on Windows build")
@pytest.mark.parametrize("hidden_layer_num_feature_channels", [None, 2])
def test_non_image_encoder(test_output_dirs: TestOutputDirectories,
def test_non_image_encoder(test_output_dirs: OutputFolderForTests,
hidden_layer_num_feature_channels: Optional[int]) -> None:
"""
Test if we can build a simple MLP model that only feeds off non-image features.

Просмотреть файл

@ -11,7 +11,7 @@ from more_itertools import flatten
from InnerEye.Common.common_util import logging_to_stdout
from InnerEye.Common.metrics_dict import MetricType, MetricsDict
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML import model_testing, model_training
from InnerEye.ML.common import ModelExecutionMode
from InnerEye.ML.metrics import InferenceMetricsForClassification
@ -19,7 +19,7 @@ from Tests.ML.configs.ClassificationModelForTesting2D import ClassificationModel
@pytest.mark.parametrize("use_mixed_precision", [False])
def test_train_2d_classification_model(test_output_dirs: TestOutputDirectories,
def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests,
use_mixed_precision: bool) -> None:
"""
Test training and testing of 2d classification models.

Просмотреть файл

@ -18,7 +18,7 @@ from InnerEye.Common import common_util, fixed_paths
from InnerEye.Common.common_util import CROSSVAL_RESULTS_FOLDER, EPOCH_METRICS_FILE_NAME, METRICS_AGGREGATES_FILE, \
METRICS_FILE_NAME, logging_to_stdout, epoch_folder_name
from InnerEye.Common.metrics_dict import MetricType, MetricsDict, ScalarMetricsDict
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML import model_testing, model_training, runner
from InnerEye.ML.common import ModelExecutionMode
from InnerEye.ML.dataset.scalar_dataset import ScalarDataset
@ -38,7 +38,7 @@ from Tests.fixed_paths_for_tests import full_ml_test_data_path
@pytest.mark.cpu_and_gpu
@pytest.mark.parametrize("use_mixed_precision", [False, True])
def test_train_classification_model(test_output_dirs: TestOutputDirectories,
def test_train_classification_model(test_output_dirs: OutputFolderForTests,
use_mixed_precision: bool) -> None:
"""
Test training and testing of classification models, asserting on the individual results from training and testing.
@ -155,7 +155,7 @@ def check_log_file(path: Path, expected_csv: str, ignore_columns: List[str]) ->
@pytest.mark.parametrize("model_name", ["DummyClassification", "DummyRegression"])
@pytest.mark.parametrize("number_of_offline_cross_validation_splits", [2])
@pytest.mark.parametrize("number_of_cross_validation_splits_per_fold", [2])
def test_run_ml_with_classification_model(test_output_dirs: TestOutputDirectories,
def test_run_ml_with_classification_model(test_output_dirs: OutputFolderForTests,
number_of_offline_cross_validation_splits: int,
number_of_cross_validation_splits_per_fold: int,
model_name: str) -> None:
@ -195,7 +195,7 @@ def test_run_ml_with_classification_model(test_output_dirs: TestOutputDirectorie
@pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows")
def test_run_ml_with_segmentation_model(test_output_dirs: TestOutputDirectories) -> None:
def test_run_ml_with_segmentation_model(test_output_dirs: OutputFolderForTests) -> None:
"""
Test training and testing of segmentation models, when it is started together via run_ml.
"""
@ -214,7 +214,7 @@ def test_run_ml_with_segmentation_model(test_output_dirs: TestOutputDirectories)
MLRunner(train_config, azure_config).run()
def test_runner1(test_output_dirs: TestOutputDirectories) -> None:
def test_runner1(test_output_dirs: OutputFolderForTests) -> None:
"""
Test starting a classification model via the commandline runner. Test if we can provide overrides
for parameters that live inside the DeepLearningConfig, and ones that are specific to classification models.
@ -244,7 +244,7 @@ def test_runner1(test_output_dirs: TestOutputDirectories) -> None:
assert (config.logs_folder / runner.LOG_FILE_NAME).exists()
def test_runner2(test_output_dirs: TestOutputDirectories) -> None:
def test_runner2(test_output_dirs: OutputFolderForTests) -> None:
"""
Test starting a classification model via the commandline runner, and provide the same arguments
that would be passed in via the YAML files.
@ -444,12 +444,12 @@ def test_unroll_aggregates() -> None:
assert unrolled[-1] == EpochMetricValues(4, LoggingColumns.SubjectCount.value, 3)
def test_dataset_stats_hook(test_output_dirs: TestOutputDirectories) -> None:
def test_dataset_stats_hook(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if the flexible hook for computing dataset statistics is called correctly in create_and_set_torch_datasets
"""
model = ClassificationModelForTesting()
root_dir = Path(test_output_dirs.root_dir)
root_dir = test_output_dirs.root_dir
out_file = root_dir / "stats.txt"
def hook(datasets: Dict[ModelExecutionMode, ScalarDataset]) -> None:
@ -469,7 +469,7 @@ def test_dataset_stats_hook(test_output_dirs: TestOutputDirectories) -> None:
assert out_file.read_text() == "\n".join(["Train: 2", "Test: 1", "Val: 1"])
def test_dataset_stats_hook_failing(test_output_dirs: TestOutputDirectories) -> None:
def test_dataset_stats_hook_failing(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if the hook for computing dataset statistics can safely fail.
"""

Просмотреть файл

@ -2,7 +2,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from pathlib import Path
from typing import Any, List, Optional
import numpy as np
@ -13,7 +12,7 @@ from torch.nn import Identity
from InnerEye.Common import common_util
from InnerEye.Common.common_util import MetricsDataframeLoggers
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.common import ModelExecutionMode
from InnerEye.ML.config import SegmentationModelBase
from InnerEye.ML.configs.classification.DummyClassification import DummyClassification
@ -25,10 +24,10 @@ from InnerEye.ML.models.architectures.base_model import BaseModel, CropSizeConst
from InnerEye.ML.models.parallel.data_parallel import DataParallelModel
from InnerEye.ML.pipelines.forward_pass import SegmentationForwardPass
from InnerEye.ML.utils import ml_util
from InnerEye.ML.utils.device_aware_module import DeviceAwareModule
from InnerEye.ML.utils.io_util import ImageDataType
from InnerEye.ML.utils.metrics_util import SummaryWriters
from InnerEye.ML.utils.model_util import ModelAndInfo
from InnerEye.ML.utils.device_aware_module import DeviceAwareModule
from Tests.ML.configs.ClassificationModelForTesting import ClassificationModelForTesting
from Tests.ML.models.architectures.DummyScalarModel import DummyScalarModel
from Tests.ML.util import machine_has_gpu, no_gpu_available
@ -218,7 +217,7 @@ def test_use_gpu_flag(use_gpu_override: bool) -> None:
@pytest.mark.azureml
def test_mean_teacher_model(test_output_dirs: TestOutputDirectories) -> None:
def test_mean_teacher_model(test_output_dirs: OutputFolderForTests) -> None:
"""
Test training and weight updates of the mean teacher model computation.
"""
@ -291,12 +290,13 @@ def test_mean_teacher_model(test_output_dirs: TestOutputDirectories) -> None:
@pytest.mark.skipif(no_gpu_available, reason="Testing AMP requires a GPU")
@pytest.mark.parametrize("use_mixed_precision", [False, True])
@pytest.mark.parametrize("execution_mode", [ModelExecutionMode.TRAIN, ModelExecutionMode.VAL])
def test_amp_and_parallel_for_scalar_models(test_output_dirs: TestOutputDirectories,
def test_amp_and_parallel_for_scalar_models(test_output_dirs: OutputFolderForTests,
execution_mode: ModelExecutionMode,
use_mixed_precision: bool) -> None:
"""
Tests the mix precision flag and data parallel for scalar models.
"""
class ClassificationModelWithIdentity(ClassificationModelForTesting):
def create_model(self) -> Any:
return DummyScalarModel(expected_image_size_zyx=config.expected_image_size_zyx,
@ -325,7 +325,7 @@ def test_amp_and_parallel_for_scalar_models(test_output_dirs: TestOutputDirector
data_loader=data_loaders[execution_mode],
in_training_mode=execution_mode == ModelExecutionMode.TRAIN,
gradient_scaler=gradient_scaler,
dataframe_loggers=MetricsDataframeLoggers(Path(test_output_dirs.root_dir)),
dataframe_loggers=MetricsDataframeLoggers(test_output_dirs.root_dir),
summary_writers=SummaryWriters(train=None, val=None) # type: ignore
)
training_steps = ModelTrainingStepsForScalarModel(config, train_val_parameters)

Просмотреть файл

@ -2,23 +2,23 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import shutil
import pytest
import math
import pandas as pd
import numpy as np
import shutil
from pathlib import Path
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.ML.reports.notebook_report import generate_classification_notebook
from InnerEye.ML.reports.classification_report import ReportedMetrics, get_results, get_metric, \
get_k_best_and_worst_performing, get_correct_and_misclassified_examples, get_image_filepath_from_subject_id, \
import numpy as np
import pandas as pd
import pytest
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.reports.classification_report import ReportedMetrics, get_correct_and_misclassified_examples, \
get_image_filepath_from_subject_id, get_k_best_and_worst_performing, get_metric, get_results, \
plot_image_from_filepath
from InnerEye.ML.reports.notebook_report import generate_classification_notebook
from InnerEye.ML.utils.metrics_constants import LoggingColumns
def test_generate_classification_report(test_output_dirs: TestOutputDirectories) -> None:
def test_generate_classification_report(test_output_dirs: OutputFolderForTests) -> None:
reports_folder = Path(__file__).parent
test_metrics_file = reports_folder / "test_metrics_classification.csv"
val_metrics_file = reports_folder / "val_metrics_classification.csv"
@ -26,7 +26,7 @@ def test_generate_classification_report(test_output_dirs: TestOutputDirectories)
dataset_subject_column = "subject"
dataset_file_column = "filePath"
current_dir = Path(test_output_dirs.make_sub_dir("test_classification_report"))
current_dir = test_output_dirs.make_sub_dir("test_classification_report")
result_file = current_dir / "report.ipynb"
result_html = generate_classification_notebook(result_notebook=result_file,
val_metrics=val_metrics_file,
@ -45,11 +45,11 @@ def test_get_results() -> None:
results = get_results(test_metrics_file)
assert all([results.subject_ids[i] == i for i in range(12)])
assert all([results.labels[i] == label for i, label in enumerate([1]*6 + [0]*6)])
assert all([results.labels[i] == label for i, label in enumerate([1] * 6 + [0] * 6)])
assert all([results.model_outputs[i] == op for i, op in enumerate([0.0, 0.2, 0.4, 0.6, 0.8, 1.0] * 2)])
def test_functions_with_invalid_csv(test_output_dirs: TestOutputDirectories) -> None:
def test_functions_with_invalid_csv(test_output_dirs: OutputFolderForTests) -> None:
reports_folder = Path(__file__).parent
test_metrics_file = reports_folder / "test_metrics_classification.csv"
val_metrics_file = reports_folder / "val_metrics_classification.csv"
@ -89,7 +89,7 @@ def test_get_metric() -> None:
val_metrics_csv=val_metrics_file,
metric=ReportedMetrics.AUC_PR)
assert math.isclose(auc_pr, 13/24, abs_tol=1e-15)
assert math.isclose(auc_pr, 13 / 24, abs_tol=1e-15)
accuracy = get_metric(test_metrics_csv=test_metrics_file,
val_metrics_csv=val_metrics_file,
@ -183,7 +183,7 @@ def test_get_image_filepath_from_subject_id_invalid_id() -> None:
assert not filepath
def test_plot_image_from_filepath(test_output_dirs: TestOutputDirectories) -> None:
def test_plot_image_from_filepath(test_output_dirs: OutputFolderForTests) -> None:
im_size = (200, 300)
array = np.ones([10, 10])

Просмотреть файл

@ -7,17 +7,17 @@ from pathlib import Path
import pandas as pd
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.reports.notebook_report import generate_segmentation_notebook
from InnerEye.ML.reports.segmentation_report import describe_score, worst_patients_and_outliers
from InnerEye.ML.utils.csv_util import COL_IS_OUTLIER
from InnerEye.ML.utils.metrics_constants import MetricsFileColumns
def test_generate_segmentation_report(test_output_dirs: TestOutputDirectories) -> None:
def test_generate_segmentation_report(test_output_dirs: OutputFolderForTests) -> None:
reports_folder = Path(__file__).parent
metrics_file = reports_folder / "metrics_hn.csv"
current_dir = Path(test_output_dirs.make_sub_dir("test_segmentation_report"))
current_dir = test_output_dirs.make_sub_dir("test_segmentation_report")
result_file = current_dir / "report.ipynb"
result_html = generate_segmentation_notebook(result_notebook=result_file,
test_metrics=metrics_file)

Просмотреть файл

@ -2,13 +2,12 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from pathlib import Path
import pandas as pd
import pytest
from InnerEye.Common import common_util
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.baselines_util import ComparisonBaseline, get_comparison_baselines, perform_score_comparisons
from Tests.Common.test_util import DEFAULT_RUN_RECOVERY_ID
from Tests.ML.util import get_default_azure_config
@ -40,11 +39,11 @@ def test_perform_score_comparisons() -> None:
assert list(result.plots.keys()) == [f"{comparison_name}_vs_CURRENT"]
def test_get_comparison_data(test_output_dirs: TestOutputDirectories) -> None:
def test_get_comparison_data(test_output_dirs: OutputFolderForTests) -> None:
azure_config = get_default_azure_config()
comparison_name = "DefaultName"
comparison_path = DEFAULT_RUN_RECOVERY_ID + "/outputs/epoch_002/Test"
baselines = get_comparison_baselines(Path(test_output_dirs.root_dir),
baselines = get_comparison_baselines(test_output_dirs.root_dir,
azure_config, [(comparison_name, comparison_path)])
assert len(baselines) == 1
assert baselines[0].name == comparison_name

Просмотреть файл

@ -5,12 +5,11 @@
import logging
import shutil
import time
from pathlib import Path
import pytest
from InnerEye.Common import common_util
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.common import ModelExecutionMode
from InnerEye.ML.metrics import InferenceMetricsForSegmentation
from InnerEye.ML.run_ml import MLRunner
@ -21,7 +20,7 @@ from Tests.fixed_paths_for_tests import full_ml_test_data_path
@pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows")
@pytest.mark.parametrize("perform_cross_validation", [True, False])
@pytest.mark.parametrize("perform_training_set_inference", [True, False])
def test_model_inference_train_and_test(test_output_dirs: TestOutputDirectories,
def test_model_inference_train_and_test(test_output_dirs: OutputFolderForTests,
perform_cross_validation: bool,
perform_training_set_inference: bool) -> None:
config = DummyModel()
@ -52,9 +51,9 @@ def test_model_inference_train_and_test(test_output_dirs: TestOutputDirectories,
assert folder_exists
def test_logging_to_file(test_output_dirs: TestOutputDirectories) -> None:
def test_logging_to_file(test_output_dirs: OutputFolderForTests) -> None:
# Log file should go to a new, non-existent folder, 2 levels deep
file_path = Path(test_output_dirs.root_dir) / "subdir1" / "subdir2" / "logfile.txt"
file_path = test_output_dirs.root_dir / "subdir1" / "subdir2" / "logfile.txt"
assert common_util.logging_to_file_handler is None
common_util.logging_to_file(file_path)
assert common_util.logging_to_file_handler is not None

Просмотреть файл

@ -11,7 +11,7 @@ from InnerEye.Azure.azure_config import AzureConfig
from InnerEye.Azure.azure_util import fetch_child_runs, fetch_run, get_results_blob_path
from InnerEye.Common import common_util, fixed_paths
from InnerEye.Common.common_util import logging_section, logging_to_stdout
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML import run_ml
from InnerEye.ML.common import CHECKPOINT_FILE_SUFFIX, DATASET_CSV_FILE_NAME
from InnerEye.ML.config import SegmentationModelBase
@ -38,9 +38,9 @@ def runner_config() -> AzureConfig:
@pytest.mark.parametrize("is_ensemble", [True, False])
def test_download_checkpoints(test_output_dirs: TestOutputDirectories, is_ensemble: bool,
def test_download_checkpoints(test_output_dirs: OutputFolderForTests, is_ensemble: bool,
runner_config: AzureConfig) -> None:
output_dir = Path(test_output_dirs.root_dir)
output_dir = test_output_dirs.root_dir
assert get_results_blob_path("some_run_id") == "azureml/ExperimentRun/dcid.some_run_id"
# Any recent run ID from a PR build will do. Use a PR build because the checkpoint files are small there.
config = SegmentationModelBase(should_validate=False)
@ -52,11 +52,11 @@ def test_download_checkpoints(test_output_dirs: TestOutputDirectories, is_ensemb
expected_checkpoint_file = "1" + CHECKPOINT_FILE_SUFFIX
if is_ensemble:
child_runs = fetch_child_runs(run_to_recover)
expected_files = [Path(config.checkpoint_folder) / run_to_recover.id
expected_files = [config.checkpoint_folder / run_to_recover.id
/ str(x.get_tags()['cross_validation_split_index']) / expected_checkpoint_file
for x in child_runs]
else:
expected_files = [Path(config.checkpoint_folder) / run_to_recover.id / expected_checkpoint_file]
expected_files = [config.checkpoint_folder / run_to_recover.id / expected_checkpoint_file]
checkpoint_paths = run_recovery.get_checkpoint_paths(1)
if is_ensemble:
@ -72,9 +72,9 @@ def test_download_checkpoints(test_output_dirs: TestOutputDirectories, is_ensemb
@pytest.mark.skipif(common_util.is_windows(), reason="Has issues on the windows build")
def test_download_checkpoints_hyperdrive_run(test_output_dirs: TestOutputDirectories,
def test_download_checkpoints_hyperdrive_run(test_output_dirs: OutputFolderForTests,
runner_config: AzureConfig) -> None:
output_dir = Path(test_output_dirs.root_dir)
output_dir = test_output_dirs.root_dir
config = SegmentationModelBase(should_validate=False)
config.set_output_to(output_dir)
runner_config.run_recovery_id = DEFAULT_ENSEMBLE_RUN_RECOVERY_ID
@ -82,18 +82,18 @@ def test_download_checkpoints_hyperdrive_run(test_output_dirs: TestOutputDirecto
# recover child runs separately also to test hyperdrive child run recovery functionality
expected_checkpoint_file = "1" + CHECKPOINT_FILE_SUFFIX
for child in child_runs:
expected_files = [Path(config.checkpoint_folder) / child.id / expected_checkpoint_file]
expected_files = [config.checkpoint_folder / child.id / expected_checkpoint_file]
run_recovery = RunRecovery.download_checkpoints_from_recovery_run(runner_config, config, child)
assert all([x in expected_files for x in run_recovery.get_checkpoint_paths(epoch=1)])
assert all([expected_file.exists() for expected_file in expected_files])
def test_download_azureml_dataset(test_output_dirs: TestOutputDirectories) -> None:
def test_download_azureml_dataset(test_output_dirs: OutputFolderForTests) -> None:
dataset_name = "test-dataset"
config = SegmentationModelBase(should_validate=False)
azure_config = get_default_azure_config()
runner = MLRunner(config, azure_config)
runner.project_root = Path(test_output_dirs.root_dir)
runner.project_root = test_output_dirs.root_dir
# If the model has neither local_dataset or azure_dataset_id, mount_or_download_dataset should fail.
with pytest.raises(ValueError):
@ -130,11 +130,11 @@ def test_download_azureml_dataset(test_output_dirs: TestOutputDirectories) -> No
assert f.is_file()
def test_download_dataset_via_blobxfer(test_output_dirs: TestOutputDirectories) -> None:
def test_download_dataset_via_blobxfer(test_output_dirs: OutputFolderForTests) -> None:
azure_config = get_default_azure_config()
result_path = run_ml.download_dataset_via_blobxfer(dataset_id="test-dataset",
azure_config=azure_config,
target_folder=Path(test_output_dirs.root_dir))
target_folder=test_output_dirs.root_dir)
assert result_path
assert result_path.is_dir()
dataset_csv = Path(result_path) / DATASET_CSV_FILE_NAME
@ -142,12 +142,12 @@ def test_download_dataset_via_blobxfer(test_output_dirs: TestOutputDirectories)
@pytest.mark.parametrize("is_file", [True, False])
def test_download_blobxfer(test_output_dirs: TestOutputDirectories, is_file: bool, runner_config: AzureConfig) -> None:
def test_download_blobxfer(test_output_dirs: OutputFolderForTests, is_file: bool, runner_config: AzureConfig) -> None:
"""
Test for a bug in early versions of download_blobs: download is happening via prefixes, but because of
stripping leading directory names, blobs got overwritten.
"""
root = Path(test_output_dirs.root_dir)
root = test_output_dirs.root_dir
account_key = runner_config.get_dataset_storage_account_key()
assert account_key is not None
# Expected test data in Azure blobs:

Просмотреть файл

@ -3,7 +3,6 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import shutil
from pathlib import Path
from typing import Optional
import numpy as np
@ -12,7 +11,7 @@ import pytest
from InnerEye.Common import common_util
from InnerEye.Common.common_util import get_epoch_results_path
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML import model_testing
from InnerEye.ML.common import DATASET_CSV_FILE_NAME, ModelExecutionMode
from InnerEye.ML.config import DATASET_ID_FILE, GROUND_TRUTH_IDS_FILE
@ -27,13 +26,12 @@ from InnerEye.ML.utils.run_recovery import RunRecovery
from InnerEye.ML.visualizers.plot_cross_validation import get_config_and_results_for_offline_runs
from Tests.ML.configs.ClassificationModelForTesting import ClassificationModelForTesting
from Tests.ML.configs.DummyModel import DummyModel
from Tests.ML.util import assert_file_contains_string, assert_text_files_match, assert_nifti_content, \
get_image_shape
from Tests.ML.util import assert_file_contains_string, assert_nifti_content, assert_text_files_match, get_image_shape
from Tests.fixed_paths_for_tests import full_ml_test_data_path
@pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows")
def test_model_test(test_output_dirs: TestOutputDirectories) -> None:
def test_model_test(test_output_dirs: OutputFolderForTests) -> None:
train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
config = DummyModel()
@ -65,9 +63,9 @@ def test_model_test(test_output_dirs: TestOutputDirectories) -> None:
assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id)
assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region")
assert_text_files_match(epoch_dir / model_testing.METRICS_FILE_NAME,
Path(train_and_test_data_dir) / model_testing.METRICS_FILE_NAME)
train_and_test_data_dir / model_testing.METRICS_FILE_NAME)
assert_text_files_match(epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
Path(train_and_test_data_dir) / model_testing.METRICS_AGGREGATES_FILE)
train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE)
# Plotting results vary between platforms. Can only check if the file is generated, but not its contents.
assert (epoch_dir / model_testing.BOXPLOT_FILE).exists()
@ -116,7 +114,7 @@ def test_model_test(test_output_dirs: TestOutputDirectories) -> None:
(ClassificationModelForTesting(), "classification_data_generated_random/checkpoints")])
def test_create_inference_pipeline_invalid_epoch(config: ModelConfigBase,
checkpoint_folder: str,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
config.set_output_to(test_output_dirs.root_dir)
# Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
stored_checkpoints = full_ml_test_data_path(checkpoint_folder)
@ -140,7 +138,7 @@ def test_create_inference_pipeline(with_run_recovery: bool,
checkpoint_folder: str,
inference_type: type,
ensemble_type: type,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
config.set_output_to(test_output_dirs.root_dir)
# Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
stored_checkpoints = full_ml_test_data_path(checkpoint_folder)

Просмотреть файл

@ -2,7 +2,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import os
from pathlib import Path
from typing import Any, List
@ -13,7 +12,7 @@ import pytest
from torch.utils.data import DataLoader
from InnerEye.Common.metrics_dict import MetricType, MetricsDict
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML import metrics, model_training
from InnerEye.ML.common import CHECKPOINT_FILE_SUFFIX, DATASET_CSV_FILE_NAME, ModelExecutionMode, STORED_CSV_FILE_NAMES
from InnerEye.ML.config import MixtureLossComponent, SegmentationLoss
@ -112,18 +111,18 @@ def test_get_total_number_of_training_epochs() -> None:
@pytest.mark.parametrize("image_channels", [["region"], ["random_123"]])
@pytest.mark.parametrize("ground_truth_ids", [["region", "region"], ["region", "other_region"]])
def test_invalid_model_train(test_output_dirs: TestOutputDirectories, image_channels: Any,
def test_invalid_model_train(test_output_dirs: OutputFolderForTests, image_channels: Any,
ground_truth_ids: Any) -> None:
with pytest.raises(ValueError):
_test_model_train(test_output_dirs, image_channels, ground_truth_ids)
@pytest.mark.parametrize("no_mask_channel", [True, False])
def test_valid_model_train(test_output_dirs: TestOutputDirectories, no_mask_channel: bool) -> None:
def test_valid_model_train(test_output_dirs: OutputFolderForTests, no_mask_channel: bool) -> None:
_test_model_train(test_output_dirs, ["channel1", "channel2"], ["region", "region_1"], no_mask_channel)
def _test_model_train(output_dirs: TestOutputDirectories,
def _test_model_train(output_dirs: OutputFolderForTests,
image_channels: Any,
ground_truth_ids: Any,
no_mask_channel: bool = False) -> None:
@ -187,16 +186,16 @@ def _test_model_train(output_dirs: TestOutputDirectories,
assert train_config.save_start_epoch == 1
assert train_config.save_step_epochs == 100
assert train_config.num_epochs == 2
assert os.path.isdir(train_config.checkpoint_folder)
assert os.path.isfile(os.path.join(train_config.checkpoint_folder, "2" + CHECKPOINT_FILE_SUFFIX))
assert train_config.checkpoint_folder.is_dir()
assert (train_config.checkpoint_folder / ("2" + CHECKPOINT_FILE_SUFFIX)).is_file()
assert (train_config.outputs_folder / DATASET_CSV_FILE_NAME).is_file()
assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.TRAIN]).is_file()
assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.VAL]).is_file()
assert_file_contains_string(train_config.outputs_folder / TRAIN_STATS_FILE, expected_stats)
# Test for saving of example images
assert os.path.isdir(train_config.example_images_folder)
example_files = os.listdir(train_config.example_images_folder)
assert train_config.example_images_folder.is_dir()
example_files = list(train_config.example_images_folder.rglob("*.*"))
assert len(example_files) == 3 * 2
# Path visualization: There should be 3 slices for each of the 2 subjects
sampling_folder = train_config.outputs_folder / PATCH_SAMPLING_FOLDER
@ -243,8 +242,8 @@ def create_data_loaders(train_config: DummyModel) -> None:
check_patient_id_in_dataset(loader, split)
def test_create_data_loaders_hdf5(test_output_dirs: TestOutputDirectories) -> None:
dataset_dir = convert_nifti_data_to_hdf5(Path(test_output_dirs.root_dir))
def test_create_data_loaders_hdf5(test_output_dirs: OutputFolderForTests) -> None:
dataset_dir = convert_nifti_data_to_hdf5(test_output_dirs.root_dir)
train_config = DummyModel()
train_config.local_dataset = dataset_dir
create_data_loaders(train_config)
@ -252,35 +251,33 @@ def test_create_data_loaders_hdf5(test_output_dirs: TestOutputDirectories) -> No
def convert_nifti_data_to_hdf5(output_hdf5_dir: Path) -> Path:
# create dataset in hdf5
with open(base_path / "dataset.csv", "r") as f:
csv_str = f.read()
csv_str = csv_str.replace("train_and_test_data/id1_channel1.nii.gz,channel1",
"p1.h5|volume|0,channel1")
csv_str = csv_str.replace("train_and_test_data/id1_channel1.nii.gz,channel2",
"p1.h5|volume|1,channel2")
csv_str = csv_str.replace("train_and_test_data/id2_channel1.nii.gz,channel1",
"p2.h5|volume|0,channel1")
csv_str = csv_str.replace("train_and_test_data/id2_channel1.nii.gz,channel2",
"p2.h5|volume|1,channel2")
# segmentation
csv_str = csv_str.replace("train_and_test_data/id1_region.nii.gz,region",
"p1.h5|region|0,region")
csv_str = csv_str.replace("train_and_test_data/id1_region.nii.gz,region_1",
"p2.h5|region|0,region_1")
csv_str = csv_str.replace("train_and_test_data/id2_region.nii.gz,region",
"p2.h5|region|0,region")
csv_str = csv_str.replace("train_and_test_data/id2_region.nii.gz,region_1",
"p2.h5|region_1|1,region_1")
# mask
csv_str = csv_str.replace("train_and_test_data/id1_mask.nii.gz,mask",
"p1.h5|mask|0,mask")
csv_str = csv_str.replace("train_and_test_data/id2_mask.nii.gz,mask",
"p2.h5|mask|0,mask")
csv_str = (base_path / "dataset.csv").read_text()
csv_str = csv_str.replace("train_and_test_data/id1_channel1.nii.gz,channel1",
"p1.h5|volume|0,channel1")
csv_str = csv_str.replace("train_and_test_data/id1_channel1.nii.gz,channel2",
"p1.h5|volume|1,channel2")
csv_str = csv_str.replace("train_and_test_data/id2_channel1.nii.gz,channel1",
"p2.h5|volume|0,channel1")
csv_str = csv_str.replace("train_and_test_data/id2_channel1.nii.gz,channel2",
"p2.h5|volume|1,channel2")
# segmentation
csv_str = csv_str.replace("train_and_test_data/id1_region.nii.gz,region",
"p1.h5|region|0,region")
csv_str = csv_str.replace("train_and_test_data/id1_region.nii.gz,region_1",
"p2.h5|region|0,region_1")
csv_str = csv_str.replace("train_and_test_data/id2_region.nii.gz,region",
"p2.h5|region|0,region")
csv_str = csv_str.replace("train_and_test_data/id2_region.nii.gz,region_1",
"p2.h5|region_1|1,region_1")
# mask
csv_str = csv_str.replace("train_and_test_data/id1_mask.nii.gz,mask",
"p1.h5|mask|0,mask")
csv_str = csv_str.replace("train_and_test_data/id2_mask.nii.gz,mask",
"p2.h5|mask|0,mask")
dataset_dir = output_hdf5_dir / "hdf5_dataset"
dataset_dir.mkdir(parents=True, exist_ok=True)
with open(dataset_dir / "dataset.csv", "w") as f:
f.write(csv_str)
(dataset_dir / "dataset.csv").write_text(csv_str)
train_data = base_path / "train_and_test_data"
create_hdf5_from_nifti(train_data / "id1_channel1.nii.gz", train_data / "id1_region.nii.gz",
train_data / "id1_mask.nii.gz",
@ -332,10 +329,10 @@ def test_recover_training_mean_teacher_model() -> None:
# First round of training
config.num_epochs = 2
model_train(config)
assert len(os.listdir(config.checkpoint_folder)) == 1
assert len(list(config.checkpoint_folder.rglob("*.*"))) == 1
# Restart training from previous run
config.start_epoch = 2
config.num_epochs = 3
model_train(config)
assert len(os.listdir(config.checkpoint_folder)) == 2
assert len(list(config.checkpoint_folder.rglob("*.*"))) == 2

Просмотреть файл

@ -2,7 +2,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import os
from pathlib import Path
from typing import Any, Tuple
@ -11,7 +10,7 @@ import numpy as np
import pytest
import torch
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.config import DATASET_ID_FILE, GROUND_TRUTH_IDS_FILE, IMAGE_CHANNEL_IDS_FILE, \
PhotometricNormalizationMethod, SegmentationModelBase
from InnerEye.ML.model_testing import DEFAULT_RESULT_IMAGE_NAME, METRICS_AGGREGATES_FILE, store_inference_results, \
@ -26,7 +25,7 @@ from InnerEye.ML.utils.metrics_constants import MetricsFileColumns
from InnerEye.ML.utils.metrics_util import MetricsPerPatientWriter
from InnerEye.ML.utils.transforms import LinearTransform, get_range_for_window_level
from Tests.ML.configs.DummyModel import DummyModel
from Tests.ML.util import assert_file_contains_string, assert_text_files_match, assert_nifti_content
from Tests.ML.util import assert_file_contains_string, assert_nifti_content, assert_text_files_match
from Tests.fixed_paths_for_tests import full_ml_test_data_path
model_name = "Basic"
@ -41,7 +40,7 @@ dim_y = 2
dim_z = 3
def _create_config_with_folders(test_dirs: TestOutputDirectories) -> SegmentationModelBase:
def _create_config_with_folders(test_dirs: OutputFolderForTests) -> SegmentationModelBase:
config = DummyModel()
config.set_output_to(test_dirs.root_dir)
return config
@ -54,7 +53,7 @@ def to_unique_bytes(a: np.ndarray, input_range: Tuple[float, float]) -> Any:
return np.unique(a.astype(np.ubyte))
def test_store_inference_results(test_output_dirs: TestOutputDirectories) -> None:
def test_store_inference_results(test_output_dirs: OutputFolderForTests) -> None:
np.random.seed(0)
num_classes = 2
posterior = torch.nn.functional.softmax(
@ -81,33 +80,33 @@ def test_store_inference_results(test_output_dirs: TestOutputDirectories) -> Non
results_folder = test_output_dirs.root_dir
store_inference_results(inference_result, test_config, Path(results_folder), header)
assert_nifti_content(os.path.join(results_folder, "012", "posterior_background.nii.gz"),
assert_nifti_content(results_folder / "012" / "posterior_background.nii.gz",
segmentation.shape, header, list(posterior0), np.ubyte)
assert_nifti_content(os.path.join(results_folder, "012", "posterior_region.nii.gz"),
assert_nifti_content(results_folder / "012" / "posterior_region.nii.gz",
segmentation.shape, header, list(posterior1), np.ubyte)
assert_nifti_content(os.path.join(results_folder, "012", "background.nii.gz"),
assert_nifti_content(results_folder / "012" / "background.nii.gz",
segmentation.shape, header, list([0, 1]), np.ubyte)
assert_nifti_content(os.path.join(results_folder, "012", "region.nii.gz"),
assert_nifti_content(results_folder / "012" / "region.nii.gz",
segmentation.shape, header, list([0, 1]), np.ubyte)
assert_nifti_content(os.path.join(results_folder, "012", DEFAULT_RESULT_IMAGE_NAME),
assert_nifti_content(results_folder / "012" / DEFAULT_RESULT_IMAGE_NAME,
segmentation.shape, header, list(np.unique(segmentation)), np.ubyte)
assert_nifti_content(os.path.join(results_folder, "012", "uncertainty.nii.gz"),
assert_nifti_content(results_folder / "012" / "uncertainty.nii.gz",
inference_result.uncertainty.shape, header, list([248, 249, 253, 254]), np.ubyte)
def test_metrics_file(test_output_dirs: TestOutputDirectories) -> None:
def test_metrics_file(test_output_dirs: OutputFolderForTests) -> None:
"""Test if metrics files with Dice scores are written as expected."""
folder = test_output_dirs.make_sub_dir("test_metrics_file")
def new_file(suffix: str) -> str:
file = os.path.join(folder, suffix)
if os.path.exists(file):
os.remove(file)
def new_file(suffix: str) -> Path:
file = folder / suffix
if file.is_file():
file.unlink()
return file
d = MetricsPerPatientWriter()
@ -127,11 +126,11 @@ def test_metrics_file(test_output_dirs: TestOutputDirectories) -> None:
d.to_csv(Path(metrics_file))
# Sorting should be first by structure name alphabetically, then Dice with lowest scores first.
assert_file_contains_string(metrics_file, "Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm\n"
"Patient3,kidney,0.400,1.000,0.100\n"
"Patient2,kidney,0.700,1.000,0.200\n"
"Patient1,liver,0.400,1.000,0.400\n"
"Patient2,liver,0.800,1.000,0.300\n"
"Patient1,liver,1.000,1.000,0.500\n")
"Patient3,kidney,0.400,1.000,0.100\n"
"Patient2,kidney,0.700,1.000,0.200\n"
"Patient1,liver,0.400,1.000,0.400\n"
"Patient2,liver,0.800,1.000,0.300\n"
"Patient1,liver,1.000,1.000,0.500\n")
aggregates_file = new_file(METRICS_AGGREGATES_FILE)
d.save_aggregates_to_csv(Path(aggregates_file))
# Sorting should be first by structure name alphabetically, then Dice with lowest scores first.
@ -154,11 +153,11 @@ def test_metrics_file(test_output_dirs: TestOutputDirectories) -> None:
resize_and_save(5, 4, boxplot2)
def test_store_run_information(test_output_dirs: TestOutputDirectories) -> None:
def test_store_run_information(test_output_dirs: OutputFolderForTests) -> None:
dataset_id = "placeholder_dataset_id"
ground_truth_ids = ["id1", "id2"]
channel_ids = ["channel1", "channel2"]
results_folder = Path(test_output_dirs.root_dir)
results_folder = test_output_dirs.root_dir
files = [results_folder / DATASET_ID_FILE,
results_folder / GROUND_TRUTH_IDS_FILE,
@ -176,7 +175,7 @@ def test_store_run_information(test_output_dirs: TestOutputDirectories) -> None:
(np.short, False, (0, 1), (0, 1)),
(np.ubyte, False, None, None),
(np.short, False, (0, 1), None)])
def test_store_as_nifti(test_output_dirs: TestOutputDirectories, image_type: Any, scale: Any, input_range: Any,
def test_store_as_nifti(test_output_dirs: OutputFolderForTests, image_type: Any, scale: Any, input_range: Any,
output_range: Any) \
-> None:
image = np.random.random_sample((dim_z, dim_y, dim_x))
@ -199,7 +198,7 @@ def test_store_as_nifti(test_output_dirs: TestOutputDirectories, image_type: Any
[(None, None, None, None),
(np.ubyte, True, [0, 1], None),
(np.short, True, None, [0, 1])])
def test_store_as_nifti_fail(test_output_dirs: TestOutputDirectories, image_type: Any, scale: Any, input_range: Any,
def test_store_as_nifti_fail(test_output_dirs: OutputFolderForTests, image_type: Any, scale: Any, input_range: Any,
output_range: Any) \
-> None:
header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 1, 0, 0, 1, 0, 0), spacing=(1, 2, 4))
@ -210,7 +209,7 @@ def test_store_as_nifti_fail(test_output_dirs: TestOutputDirectories, image_type
@pytest.mark.parametrize("input_range", [(0, 1), (-1, 1), (0, 255)])
def test_store_as_scaled_ubyte_nifti(test_output_dirs: TestOutputDirectories, input_range: Any) -> None:
def test_store_as_scaled_ubyte_nifti(test_output_dirs: OutputFolderForTests, input_range: Any) -> None:
image = np.random.random_sample((dim_z, dim_y, dim_x))
header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 2, 4))
io_util.store_as_scaled_ubyte_nifti(image, header,
@ -223,7 +222,7 @@ def test_store_as_scaled_ubyte_nifti(test_output_dirs: TestOutputDirectories, in
@pytest.mark.parametrize("input_range", [None])
def test_store_as_scaled_ubyte_nifti_fail(test_output_dirs: TestOutputDirectories, input_range: Any) -> None:
def test_store_as_scaled_ubyte_nifti_fail(test_output_dirs: OutputFolderForTests, input_range: Any) -> None:
image = np.random.random_sample((dim_z, dim_y, dim_x))
header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 2, 4))
with pytest.raises(Exception):
@ -232,7 +231,7 @@ def test_store_as_scaled_ubyte_nifti_fail(test_output_dirs: TestOutputDirectorie
input_range)
def test_store_as_ubyte_nifti(test_output_dirs: TestOutputDirectories) -> None:
def test_store_as_ubyte_nifti(test_output_dirs: OutputFolderForTests) -> None:
image = np.random.random_sample((dim_z, dim_y, dim_x))
# get values in [0, 255] range
image = np.array((image + 1) * 255).astype(int)
@ -247,7 +246,7 @@ def test_store_as_ubyte_nifti(test_output_dirs: TestOutputDirectories) -> None:
[([[[1]], [[1]], [[1]]]),
([[[0]], [[0]], [[0]]]),
([[[0]], [[1]], [[1]]])])
def test_store_as_binary_nifti(test_output_dirs: TestOutputDirectories, image: Any) -> None:
def test_store_as_binary_nifti(test_output_dirs: OutputFolderForTests, image: Any) -> None:
image = np.array(image)
header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 2, 4))
io_util.store_binary_mask_as_nifti(image, header,
@ -258,7 +257,7 @@ def test_store_as_binary_nifti(test_output_dirs: TestOutputDirectories, image: A
@pytest.mark.parametrize("image", [([[[0]], [[1]], [[2]]])])
def test_store_as_binary_nifti_fail(test_output_dirs: TestOutputDirectories, image: Any) -> None:
def test_store_as_binary_nifti_fail(test_output_dirs: OutputFolderForTests, image: Any) -> None:
image = np.array(image)
header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 1, 0, 0, 1, 0, 0), spacing=(1, 2, 4))
with pytest.raises(Exception):
@ -270,7 +269,7 @@ def test_store_as_binary_nifti_fail(test_output_dirs: TestOutputDirectories, ima
[([[[1]], [[1]], [[1]]], [255]),
([[[0]], [[0]], [[0]]], [0]),
([[[0.8]], [[0.1]], [[0.4]]], [25, 102, 204])])
def test_store_posteriors_nifti(test_output_dirs: TestOutputDirectories, image: Any, expected: Any) -> None:
def test_store_posteriors_nifti(test_output_dirs: OutputFolderForTests, image: Any, expected: Any) -> None:
image = np.array(image)
header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 1, 1))
io_util.store_posteriors_as_nifti(image, header, test_output_dirs.create_file_or_folder_path(default_image_name))
@ -279,7 +278,7 @@ def test_store_posteriors_nifti(test_output_dirs: TestOutputDirectories, image:
@pytest.mark.parametrize("image", [([[[0]], [[1]], [[2]]])])
def test_store_posteriors_nifti_fail(test_output_dirs: TestOutputDirectories, image: Any) -> None:
def test_store_posteriors_nifti_fail(test_output_dirs: OutputFolderForTests, image: Any) -> None:
image = np.array(image)
header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 1, 0, 0, 1, 0, 0), spacing=(1, 1, 1))
with pytest.raises(Exception):
@ -287,7 +286,7 @@ def test_store_posteriors_nifti_fail(test_output_dirs: TestOutputDirectories, im
test_output_dirs.create_file_or_folder_path(default_image_name))
def test_store_posteriors_nifti_invalid_entries(test_output_dirs: TestOutputDirectories) -> None:
def test_store_posteriors_nifti_invalid_entries(test_output_dirs: OutputFolderForTests) -> None:
image = np.array([0, 1, 2.71, np.nan])
header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 1, 0, 0, 1, 0, 0), spacing=(1, 1, 1))
with pytest.raises(ValueError) as ex:
@ -303,7 +302,7 @@ def test_store_posteriors_nifti_invalid_entries(test_output_dirs: TestOutputDire
(PhotometricNormalizationMethod.CtWindow, [0, 255], (40, 50)),
(PhotometricNormalizationMethod.Unchanged, [-1, 1], None),
(PhotometricNormalizationMethod.Unchanged, [-40, 40], None)])
def test_store_image_as_short_nifti(test_output_dirs: TestOutputDirectories,
def test_store_image_as_short_nifti(test_output_dirs: OutputFolderForTests,
norm_method: PhotometricNormalizationMethod,
image_range: Any,
window_level: Any) -> None:
@ -332,7 +331,7 @@ def test_store_image_as_short_nifti(test_output_dirs: TestOutputDirectories,
assert_nifti_content(nifti_name, image_shape, header, list(t), np.short)
def test_scale_and_unscale_image(test_output_dirs: TestOutputDirectories) -> None:
def test_scale_and_unscale_image(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if an image in the CT value range can be recovered when we save dataset examples
(undoing the effects of CT Windowing)

Просмотреть файл

@ -3,7 +3,6 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import itertools
import os
from pathlib import Path
from typing import Any, Dict, List, Union
@ -11,7 +10,7 @@ import numpy as np
import pytest
from InnerEye.Common import common_util
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML import plotting
from InnerEye.ML.config import SegmentationModelBase
from InnerEye.ML.dataset.full_image_dataset import Sample
@ -27,7 +26,7 @@ def file_as_bytes(name: Union[str, Path]) -> bytes:
@pytest.mark.parametrize("num_classes", [3, 15])
def test_plot_dice_per_epoch(test_output_dirs: TestOutputDirectories, num_classes: int) -> None:
def test_plot_dice_per_epoch(test_output_dirs: OutputFolderForTests, num_classes: int) -> None:
metrics: Dict[str, Any] = {}
epoch = [1, 2, 3]
for i in range(num_classes):
@ -36,10 +35,10 @@ def test_plot_dice_per_epoch(test_output_dirs: TestOutputDirectories, num_classe
metrics[metric_name] = {"epoch": epoch, "loss": loss}
metrics["baz"] = [17]
series_count = plotting.plot_val_dice_per_epoch(metrics)
file_name = Path(test_output_dirs.root_dir) / f"dice_per_epoch_{num_classes}classes.png"
file_name = test_output_dirs.root_dir / f"dice_per_epoch_{num_classes}classes.png"
plotting.add_legend(series_count)
plotting.resize_and_save(5, 4, file_name)
assert os.path.exists(file_name)
assert file_name.is_file()
# Try writing the same figure again, to see what the file overwrite behaviour is.
# In actual training runs, the file will be overwritten repeatedly.
plotting.resize_and_save(5, 4, file_name)
@ -49,7 +48,7 @@ def test_plot_dice_per_epoch(test_output_dirs: TestOutputDirectories, num_classe
# assert file_as_bytes(file_name) == file_as_bytes(expected)
def test_plot_image_and_contour(test_output_dirs: TestOutputDirectories) -> None:
def test_plot_image_and_contour(test_output_dirs: OutputFolderForTests) -> None:
"""
Test plotting of an image with an overlaid contour.
"""
@ -60,7 +59,7 @@ def test_plot_image_and_contour(test_output_dirs: TestOutputDirectories) -> None
labels = np.zeros(size)
labels[1, 1] = 1
file_name = "image_and_contour.png"
plot_file = Path(test_output_dirs.root_dir) / file_name
plot_file = test_output_dirs.root_dir / file_name
plotting.plot_image_and_label_contour(image, labels, contour_arguments={'colors': 'r'}, plot_file_name=plot_file)
assert plot_file.exists()
expected = full_ml_test_data_path(file_name)
@ -69,7 +68,7 @@ def test_plot_image_and_contour(test_output_dirs: TestOutputDirectories) -> None
assert file_as_bytes(plot_file) == file_as_bytes(expected)
def test_plot_image_and_contour_scaled(test_output_dirs: TestOutputDirectories) -> None:
def test_plot_image_and_contour_scaled(test_output_dirs: OutputFolderForTests) -> None:
"""
When providing an additional scaling that is a lot larger than the image range,
the output should be mostly grey.
@ -81,7 +80,7 @@ def test_plot_image_and_contour_scaled(test_output_dirs: TestOutputDirectories)
labels = np.zeros(size)
labels[1, 1] = 1
file_name = "image_scaled_and_contour.png"
plot_file = Path(test_output_dirs.root_dir) / file_name
plot_file = test_output_dirs.root_dir / file_name
plotting.plot_image_and_label_contour(image, labels, contour_arguments={'colors': 'b'},
image_range=(-5, 5), plot_file_name=plot_file)
assert plot_file.exists()
@ -91,7 +90,7 @@ def test_plot_image_and_contour_scaled(test_output_dirs: TestOutputDirectories)
assert file_as_bytes(plot_file) == file_as_bytes(expected)
def test_plot_image_and_multiple_contours(test_output_dirs: TestOutputDirectories) -> None:
def test_plot_image_and_multiple_contours(test_output_dirs: OutputFolderForTests) -> None:
"""
Test plotting of an image with two overlaid contours.
"""
@ -104,7 +103,7 @@ def test_plot_image_and_multiple_contours(test_output_dirs: TestOutputDirectorie
labels2 = np.zeros(size)
labels2[0, 0] = 1
file_name = "image_and_multiple_contours.png"
plot_file = Path(test_output_dirs.root_dir) / file_name
plot_file = test_output_dirs.root_dir / file_name
args1 = {'colors': 'r', 'linestyles': 'dashed'}
args2 = {'colors': 'b'}
plotting.plot_image_and_label_contour(image, [labels1, labels2],
@ -143,7 +142,7 @@ def compare_files(actual: List[Path], expected: List[str]) -> None:
@pytest.mark.skipif(common_util.is_windows(), reason="Rendering of the graph is slightly different on Linux")
def test_plot_normalization_result(test_output_dirs: TestOutputDirectories) -> None:
def test_plot_normalization_result(test_output_dirs: OutputFolderForTests) -> None:
"""
Tests plotting of before/after histograms in photometric normalization.
:return:
@ -163,13 +162,13 @@ def test_plot_normalization_result(test_output_dirs: TestOutputDirectories) -> N
config = SegmentationModelBase(norm_method=PhotometricNormalizationMethod.CtWindow, window=4, level=13,
should_validate=False)
normalizer = PhotometricNormalization(config)
folder = Path(test_output_dirs.root_dir)
folder = test_output_dirs.root_dir
files = plotting.plot_normalization_result(sample, normalizer, folder)
expected = ["042_slice_001.png", "042_slice_001_contour.png"]
compare_files(files, expected)
def test_plot_contours_for_all_classes(test_output_dirs: TestOutputDirectories) -> None:
def test_plot_contours_for_all_classes(test_output_dirs: OutputFolderForTests) -> None:
size = (3, 3, 3)
image = np.zeros((1,) + size)
for i, (z, y, x) in enumerate(itertools.product(range(size[0]), range(size[1]), range(size[2]))):
@ -196,7 +195,7 @@ def test_plot_contours_for_all_classes(test_output_dirs: TestOutputDirectories)
plots = plotting.plot_contours_for_all_classes(sample,
segmentation,
foreground_class_names=["class1", "class2"],
result_folder=Path(test_output_dirs.root_dir),
result_folder=test_output_dirs.root_dir,
result_prefix="prefix")
expected = ["prefix042_class1_slice_001.png",
"prefix042_class2_slice_002.png"]
@ -205,7 +204,7 @@ def test_plot_contours_for_all_classes(test_output_dirs: TestOutputDirectories)
plotting.plot_contours_for_all_classes(sample,
segmentation,
foreground_class_names=["background", "class1", "class2"],
result_folder=Path(test_output_dirs.root_dir),
result_folder=test_output_dirs.root_dir,
result_prefix="prefix")
assert "3 classes" in str(err)
assert "background" in str(err)

Просмотреть файл

@ -3,12 +3,11 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import os
from pathlib import Path
from unittest import mock
from GPUtil import GPU
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.Common.resource_monitor import GpuUtilization, ResourceMonitor
@ -122,11 +121,11 @@ def test_utilization_max() -> None:
)
def test_resource_monitor(test_output_dirs: TestOutputDirectories) -> None:
def test_resource_monitor(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if metrics are correctly updated in the ResourceMonitor class.
"""
tensorboard_folder = Path(test_output_dirs.root_dir)
tensorboard_folder = test_output_dirs.root_dir
r = ResourceMonitor(interval_seconds=5, tensorboard_folder=tensorboard_folder)
def create_gpu(id: int, load: float, mem_total: float, mem_used: float) -> GPU:
@ -157,7 +156,7 @@ def test_resource_monitor(test_output_dirs: TestOutputDirectories) -> None:
}
r.writer.flush()
r.store_to_file()
tb_file = list(Path(tensorboard_folder).rglob("*tfevents*"))[0]
tb_file = list(tensorboard_folder.rglob("*tfevents*"))[0]
assert os.path.getsize(str(tb_file)) > 100
assert r.aggregate_metrics_file.is_file
assert len(r.aggregate_metrics_file.read_text().splitlines()) == 17
@ -165,11 +164,11 @@ def test_resource_monitor(test_output_dirs: TestOutputDirectories) -> None:
assert len(parsed_metrics) == 16
def test_resource_monitor_store_to_file(test_output_dirs: TestOutputDirectories) -> None:
def test_resource_monitor_store_to_file(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if storing metrics to a file works correctly.
"""
tensorboard_folder = Path(test_output_dirs.root_dir)
tensorboard_folder = test_output_dirs.root_dir
r = ResourceMonitor(interval_seconds=5, tensorboard_folder=tensorboard_folder)
r.gpu_aggregates = {
1: GpuUtilization(id=1, mem_util=1, load=2, mem_reserved_gb=30.0, mem_allocated_gb=40.0, count=10),

Просмотреть файл

@ -34,9 +34,9 @@ machine_has_gpu = is_gpu_available()
no_gpu_available = not machine_has_gpu
def create_dataset_csv_file(csv_string: str, dst: str) -> Path:
def create_dataset_csv_file(csv_string: str, dst: Path) -> Path:
"""Creates a dataset.csv in the destination path from the csv_string provided"""
(Path(dst) / "dataset.csv").write_text(csv_string)
(dst / "dataset.csv").write_text(csv_string)
return Path(dst)

Просмотреть файл

@ -3,23 +3,23 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import os
from pathlib import Path
from typing import Any, Callable, Optional, Tuple
from unittest import mock
import SimpleITK as sitk
import torch
import numpy as np
import pytest
from pathlib import Path
from typing import Any, Optional, Tuple, Callable
from unittest import mock
import torch
from skimage.transform import resize
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.dataset.sample import PatientDatasetSource, PatientMetadata
from InnerEye.ML.utils import io_util
from InnerEye.ML.utils.dataset_util import DatasetExample, store_and_upload_example
from InnerEye.ML.utils.io_util import ImageHeader, is_nifti_file_path, is_numpy_file_path, \
load_image_in_known_formats, load_numpy_image, is_dicom_file_path, load_dicom_image, \
ImageAndSegmentations, load_images_and_stack, DicomTags, PhotometricInterpretation, reverse_tuple_float3
from InnerEye.ML.utils.io_util import DicomTags, ImageAndSegmentations, ImageHeader, PhotometricInterpretation, \
is_dicom_file_path, is_nifti_file_path, is_numpy_file_path, load_dicom_image, load_image_in_known_formats, \
load_images_and_stack, load_numpy_image, reverse_tuple_float3
from Tests.ML.util import assert_file_contains_string
from Tests.fixed_paths_for_tests import full_ml_test_data_path
@ -47,7 +47,7 @@ def test_nii_load_image() -> None:
assert np.array_equal(image_with_header.image, known_array)
def test_nii_load_zyx(test_output_dirs: TestOutputDirectories) -> None:
def test_nii_load_zyx(test_output_dirs: OutputFolderForTests) -> None:
expected_shape = (44, 167, 167)
file_path = full_ml_test_data_path("patch_sampling/scan_small.nii.gz")
image: sitk.Image = sitk.ReadImage(str(file_path))
@ -146,7 +146,7 @@ def test_hdf5_loading_multimap_class_do_not_exists() -> None:
assert np.all(seg_header.image == 0)
def test_save_dataset_example(test_output_dirs: TestOutputDirectories) -> None:
def test_save_dataset_example(test_output_dirs: OutputFolderForTests) -> None:
"""
Test if the example dataset can be saved as expected.
"""
@ -210,11 +210,11 @@ def test_is_numpy_file(input: Tuple[str, bool]) -> None:
assert is_numpy_file_path(Path(file)) == expected
def test_load_numpy_image(test_output_dirs: TestOutputDirectories) -> None:
def test_load_numpy_image(test_output_dirs: OutputFolderForTests) -> None:
array_size = (20, 30, 40)
array = np.ones(array_size)
assert array.shape == array_size
npy_file = Path(test_output_dirs.root_dir) / "file.npy"
npy_file = test_output_dirs.root_dir / "file.npy"
assert is_numpy_file_path(npy_file)
np.save(npy_file, array)
image = load_numpy_image(npy_file)
@ -268,7 +268,7 @@ def get_mock_function(is_monochrome2: bool, bits_stored: Optional[int] = None) -
@pytest.mark.parametrize("is_signed", [True, False])
@pytest.mark.parametrize("is_monochrome2", [True, False])
def test_load_dicom_image_ones(test_output_dirs: TestOutputDirectories,
def test_load_dicom_image_ones(test_output_dirs: OutputFolderForTests,
is_signed: bool, is_monochrome2: bool) -> None:
"""
Test loading of 2D Dicom images filled with binary array of type (uint16) and (int16).
@ -293,7 +293,7 @@ def test_load_dicom_image_ones(test_output_dirs: TestOutputDirectories,
to_write = np.zeros(array_size, dtype='int16')
to_write[::2] = -1
dcm_file = Path(test_output_dirs.root_dir) / "file.dcm"
dcm_file = test_output_dirs.root_dir / "file.dcm"
assert is_dicom_file_path(dcm_file)
write_test_dicom(array=to_write, path=dcm_file)
@ -311,7 +311,7 @@ def test_load_dicom_image_ones(test_output_dirs: TestOutputDirectories,
@pytest.mark.parametrize("is_signed", [True, False])
@pytest.mark.parametrize("is_monochrome2", [True, False])
@pytest.mark.parametrize("bits_stored", [14, 16])
def test_load_dicom_image_random(test_output_dirs: TestOutputDirectories,
def test_load_dicom_image_random(test_output_dirs: OutputFolderForTests,
is_signed: bool, is_monochrome2: bool, bits_stored: int) -> None:
"""
Test loading of 2D Dicom images of type (uint16) and (int16).
@ -331,7 +331,7 @@ def test_load_dicom_image_random(test_output_dirs: TestOutputDirectories,
else:
to_write = -1 * array - 1
dcm_file = Path(test_output_dirs.root_dir) / "file.dcm"
dcm_file = test_output_dirs.root_dir / "file.dcm"
assert is_dicom_file_path(dcm_file)
write_test_dicom(array=to_write, path=dcm_file)
@ -420,20 +420,20 @@ def test_load_images_when_empty() -> None:
assert stacked.segmentations.shape == (0,)
def test_load_images_and_stack_2d_ones(test_output_dirs: TestOutputDirectories) -> None:
def test_load_images_and_stack_2d_ones(test_output_dirs: OutputFolderForTests) -> None:
"""
Test load of 2D images filled with (int) ones.
"""
image_size = (20, 30)
array = np.ones(image_size, dtype='uint16')
write_test_dicom(array, Path(test_output_dirs.root_dir) / "file1.dcm")
write_test_dicom(array, Path(test_output_dirs.root_dir) / "file2.dcm")
write_test_dicom(array, Path(test_output_dirs.root_dir) / "file3.dcm")
write_test_dicom(array, test_output_dirs.root_dir / "file1.dcm")
write_test_dicom(array, test_output_dirs.root_dir / "file2.dcm")
write_test_dicom(array, test_output_dirs.root_dir / "file3.dcm")
expected_tensor = torch.from_numpy(np.ones((3, 1) + image_size))
file_list = [Path(test_output_dirs.root_dir) / f"file{i}.dcm" for i in range(1, 4)]
file_list = [test_output_dirs.root_dir / f"file{i}.dcm" for i in range(1, 4)]
imaging_data = load_images_and_stack(file_list,
load_segmentation=False,
image_size=(1,) + image_size)
@ -445,7 +445,7 @@ def test_load_images_and_stack_2d_ones(test_output_dirs: TestOutputDirectories)
assert torch.allclose(imaging_data.images, expected_tensor)
def test_load_images_and_stack_2d_random(test_output_dirs: TestOutputDirectories) -> None:
def test_load_images_and_stack_2d_random(test_output_dirs: OutputFolderForTests) -> None:
"""
Test load of 2D images
"""
@ -454,15 +454,15 @@ def test_load_images_and_stack_2d_random(test_output_dirs: TestOutputDirectories
high = 200
array1 = np.random.randint(low=low, high=high, size=image_size, dtype='uint16')
write_test_dicom(array1, Path(test_output_dirs.root_dir) / "file1.dcm")
write_test_dicom(array1, test_output_dirs.root_dir / "file1.dcm")
array2 = np.random.randint(low=low, high=high, size=image_size, dtype='uint16')
write_test_dicom(array2, Path(test_output_dirs.root_dir) / "file2.dcm")
write_test_dicom(array2, test_output_dirs.root_dir / "file2.dcm")
array3 = np.random.randint(low=low, high=high, size=image_size, dtype='uint16')
write_test_dicom(array3, Path(test_output_dirs.root_dir) / "file3.dcm")
write_test_dicom(array3, test_output_dirs.root_dir / "file3.dcm")
expected_tensor = torch.from_numpy(np.expand_dims(np.stack([array1, array2, array3]).astype(float), axis=1))
file_list = [Path(test_output_dirs.root_dir) / f"file{i}.dcm" for i in range(1, 4)]
file_list = [test_output_dirs.root_dir / f"file{i}.dcm" for i in range(1, 4)]
imaging_data = load_images_and_stack(file_list,
load_segmentation=False,
image_size=(1,) + image_size)
@ -474,22 +474,22 @@ def test_load_images_and_stack_2d_random(test_output_dirs: TestOutputDirectories
assert torch.allclose(imaging_data.images, expected_tensor)
def test_load_images_and_stack_2d_with_resize_ones(test_output_dirs: TestOutputDirectories) -> None:
def test_load_images_and_stack_2d_with_resize_ones(test_output_dirs: OutputFolderForTests) -> None:
"""
Test load and resize of 2D images filled with (int) ones.
"""
image_size = (20, 30)
array = np.ones((10, 20), dtype='uint16')
write_test_dicom(array, Path(test_output_dirs.root_dir) / "file1.dcm")
write_test_dicom(array, test_output_dirs.root_dir / "file1.dcm")
array = np.ones((20, 30), dtype='uint16')
write_test_dicom(array, Path(test_output_dirs.root_dir) / "file2.dcm")
write_test_dicom(array, test_output_dirs.root_dir / "file2.dcm")
array = np.ones((30, 10), dtype='uint16')
write_test_dicom(array, Path(test_output_dirs.root_dir) / "file3.dcm")
write_test_dicom(array, test_output_dirs.root_dir / "file3.dcm")
expected_tensor = torch.from_numpy(np.ones((3, 1) + image_size))
file_list = [Path(test_output_dirs.root_dir) / f"file{i}.dcm" for i in range(1, 4)]
file_list = [test_output_dirs.root_dir / f"file{i}.dcm" for i in range(1, 4)]
imaging_data = load_images_and_stack(file_list,
load_segmentation=False,
image_size=(1,) + image_size)
@ -501,7 +501,7 @@ def test_load_images_and_stack_2d_with_resize_ones(test_output_dirs: TestOutputD
assert torch.allclose(imaging_data.images, expected_tensor)
def test_load_images_and_stack_2d_with_resize_random(test_output_dirs: TestOutputDirectories) -> None:
def test_load_images_and_stack_2d_with_resize_random(test_output_dirs: OutputFolderForTests) -> None:
"""
Test load and resize of 2D images
"""
@ -510,18 +510,18 @@ def test_load_images_and_stack_2d_with_resize_random(test_output_dirs: TestOutpu
high = 200
array1 = np.random.randint(low=low, high=high, size=(10, 20), dtype='uint16')
write_test_dicom(array1, Path(test_output_dirs.root_dir) / "file1.dcm")
write_test_dicom(array1, test_output_dirs.root_dir / "file1.dcm")
array2 = np.random.randint(low=low, high=high, size=(20, 30), dtype='uint16')
write_test_dicom(array2, Path(test_output_dirs.root_dir) / "file2.dcm")
write_test_dicom(array2, test_output_dirs.root_dir / "file2.dcm")
array3 = np.random.randint(low=low, high=high, size=(30, 20), dtype='uint16')
write_test_dicom(array3, Path(test_output_dirs.root_dir) / "file3.dcm")
write_test_dicom(array3, test_output_dirs.root_dir / "file3.dcm")
array1 = resize(array1.astype(np.float), image_size, anti_aliasing=True)
array3 = resize(array3.astype(np.float), image_size, anti_aliasing=True)
expected_tensor = torch.from_numpy(np.expand_dims(np.stack([array1, array2, array3]).astype(float), axis=1))
file_list = [Path(test_output_dirs.root_dir) / f"file{i}.dcm" for i in range(1, 4)]
file_list = [test_output_dirs.root_dir / f"file{i}.dcm" for i in range(1, 4)]
imaging_data = load_images_and_stack(file_list,
load_segmentation=False,
image_size=(1,) + image_size)
@ -533,22 +533,22 @@ def test_load_images_and_stack_2d_with_resize_random(test_output_dirs: TestOutpu
assert torch.allclose(imaging_data.images, expected_tensor)
def test_load_images_and_stack_3d_with_resize_ones(test_output_dirs: TestOutputDirectories) -> None:
def test_load_images_and_stack_3d_with_resize_ones(test_output_dirs: OutputFolderForTests) -> None:
"""
Test load and resize of 3D images filled with (float) ones.
"""
image_size = (20, 30, 20)
array = np.ones((10, 20, 10))
np.save(Path(test_output_dirs.root_dir) / "file1.npy", array)
np.save(test_output_dirs.root_dir / "file1.npy", array)
array = np.ones((20, 30, 20))
np.save(Path(test_output_dirs.root_dir) / "file2.npy", array)
np.save(test_output_dirs.root_dir / "file2.npy", array)
array = np.ones((30, 10, 30))
np.save(Path(test_output_dirs.root_dir) / "file3.npy", array)
np.save(test_output_dirs.root_dir / "file3.npy", array)
expected_tensor = torch.from_numpy(np.ones((3,) + image_size))
file_list = [Path(test_output_dirs.root_dir) / f"file{i}.npy" for i in range(1, 4)]
file_list = [test_output_dirs.root_dir / f"file{i}.npy" for i in range(1, 4)]
imaging_data = load_images_and_stack(file_list,
load_segmentation=False,
image_size=image_size)
@ -559,7 +559,7 @@ def test_load_images_and_stack_3d_with_resize_ones(test_output_dirs: TestOutputD
assert torch.allclose(imaging_data.images, expected_tensor)
def test_load_images_and_stack_3d_with_resize_random(test_output_dirs: TestOutputDirectories) -> None:
def test_load_images_and_stack_3d_with_resize_random(test_output_dirs: OutputFolderForTests) -> None:
"""
Test load and resize of 3D images
"""
@ -568,18 +568,18 @@ def test_load_images_and_stack_3d_with_resize_random(test_output_dirs: TestOutpu
high = 200
array1 = np.random.randint(low=low, high=high, size=(10, 20, 10)).astype(np.float)
np.save(Path(test_output_dirs.root_dir) / "file1.npy", array1)
np.save(test_output_dirs.root_dir / "file1.npy", array1)
array2 = np.random.randint(low=low, high=high, size=(20, 30, 20)).astype(np.float)
np.save(Path(test_output_dirs.root_dir) / "file2.npy", array2)
np.save(test_output_dirs.root_dir / "file2.npy", array2)
array3 = np.random.randint(low=low, high=high, size=(30, 10, 30)).astype(np.float)
np.save(Path(test_output_dirs.root_dir) / "file3.npy", array3)
np.save(test_output_dirs.root_dir / "file3.npy", array3)
array1 = resize(array1.astype(np.float), image_size, anti_aliasing=True)
array3 = resize(array3.astype(np.float), image_size, anti_aliasing=True)
expected_tensor = torch.from_numpy(np.stack([array1, array2, array3]).astype(float))
file_list = [Path(test_output_dirs.root_dir) / f"file{i}.npy" for i in range(1, 4)]
file_list = [test_output_dirs.root_dir / f"file{i}.npy" for i in range(1, 4)]
imaging_data = load_images_and_stack(file_list,
load_segmentation=False,
image_size=image_size)
@ -590,7 +590,7 @@ def test_load_images_and_stack_3d_with_resize_random(test_output_dirs: TestOutpu
assert torch.allclose(imaging_data.images, expected_tensor)
def test_load_images_and_stack_with_resize_only_float(test_output_dirs: TestOutputDirectories) -> None:
def test_load_images_and_stack_with_resize_only_float(test_output_dirs: OutputFolderForTests) -> None:
"""
Don't allow int type images to be loaded if image_size is set:
skimage.transform.resize will not resize these correctly
@ -598,8 +598,8 @@ def test_load_images_and_stack_with_resize_only_float(test_output_dirs: TestOutp
image_size = (20, 30, 20)
array = np.ones((10, 20, 20), dtype='uint16')
np.save(Path(test_output_dirs.root_dir) / "file.npy", array)
file_list = [Path(test_output_dirs.root_dir) / "file.npy"]
np.save(test_output_dirs.root_dir / "file.npy", array)
file_list = [test_output_dirs.root_dir / "file.npy"]
with pytest.raises(ValueError):
load_images_and_stack(file_list,

Просмотреть файл

@ -14,7 +14,7 @@ from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, fetc
from InnerEye.Common.common_util import CROSSVAL_RESULTS_FOLDER, FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, \
METRICS_FILE_NAME, logging_to_stdout
from InnerEye.Common.fixed_paths import DEFAULT_AML_UPLOAD_DIR
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.common import DATASET_CSV_FILE_NAME, ModelExecutionMode
from InnerEye.ML.deep_learning_config import ModelCategory
from InnerEye.ML.run_ml import MLRunner
@ -173,7 +173,7 @@ def _test_result_aggregation_for_classification(files: List[RunResultFiles],
Test how metrics are aggregated for cross validation runs on classification models.
"""
print(f"Writing aggregated metrics to {plotting_config.outputs_directory}")
root_folder = Path(plotting_config.outputs_directory)
root_folder = plotting_config.outputs_directory
plot_cross_validation_from_files(OfflineCrossvalConfigAndFiles(config=plotting_config, files=files),
root_folder=root_folder)
aggregates_file = root_folder / METRICS_AGGREGATES_FILE
@ -199,7 +199,7 @@ def _test_result_aggregation_for_classification(files: List[RunResultFiles],
assert per_subject_metrics[LoggingColumns.DataSplit.value].unique() == ["Val"]
def test_result_aggregation_for_classification(test_output_dirs: TestOutputDirectories) -> None:
def test_result_aggregation_for_classification(test_output_dirs: OutputFolderForTests) -> None:
"""
Test how metrics are aggregated for cross validation runs on classification models.
"""
@ -210,7 +210,7 @@ def test_result_aggregation_for_classification(test_output_dirs: TestOutputDirec
_test_result_aggregation_for_classification(files, plotting_config,
expected_aggregate_metrics=expected_aggregates,
expected_epochs={plotting_config.epoch})
dataset_csv = Path(plotting_config.outputs_directory) / DATASET_CSV_FILE_NAME
dataset_csv = plotting_config.outputs_directory / DATASET_CSV_FILE_NAME
assert dataset_csv.exists()
@ -222,10 +222,9 @@ def test_invalid_number_of_cv_files() -> None:
files, plotting_config = load_result_files_for_classification()
plotting_config.number_of_cross_validation_splits = 4
print(f"Writing aggregated metrics to {plotting_config.outputs_directory}")
root_folder = Path(plotting_config.outputs_directory)
with pytest.raises(ValueError):
plot_cross_validation_from_files(OfflineCrossvalConfigAndFiles(config=plotting_config, files=files),
root_folder=root_folder)
root_folder=plotting_config.outputs_directory)
def test_check_result_file_counts() -> None:
@ -254,7 +253,7 @@ def test_check_result_file_counts() -> None:
check_result_file_counts(config_and_files3)
def test_result_aggregation_for_classification_all_epochs(test_output_dirs: TestOutputDirectories) -> None:
def test_result_aggregation_for_classification_all_epochs(test_output_dirs: OutputFolderForTests) -> None:
"""
Test how metrics are aggregated for classification models, when no epoch is specified.
"""
@ -283,17 +282,15 @@ def test_add_comparison_data(test_config_comparison: PlotCrossValidationConfig)
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
"""Test to make sure the outlier file for a split is as expected"""
test_config_ensemble.outputs_directory = test_output_dirs.root_dir
test_config_ensemble.outlier_range = 0
dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]}
save_outliers(test_config_ensemble, dataset_split_metrics, Path(test_config_ensemble.outputs_directory))
assert_text_files_match(full_file=Path(test_config_ensemble.outputs_directory)
/ f"{ModelExecutionMode.VAL.value}_outliers.txt",
expected_file=Path(
full_ml_test_data_path(
f"{ModelExecutionMode.VAL.value}_outliers.txt")))
save_outliers(test_config_ensemble, dataset_split_metrics, test_config_ensemble.outputs_directory)
f = f"{ModelExecutionMode.VAL.value}_outliers.txt"
assert_text_files_match(full_file=test_config_ensemble.outputs_directory / f,
expected_file=full_ml_test_data_path(f))
def test_create_portal_query_for_outliers() -> None:
@ -309,11 +306,11 @@ def test_create_portal_query_for_outliers() -> None:
assert expected == create_portal_query_for_outliers(test_df)
def test_create_summary(test_output_dirs: TestOutputDirectories) -> None:
def test_create_summary(test_output_dirs: OutputFolderForTests) -> None:
"""
Test that summaries of CV performance per mode, and per mode per structure, look like they should.
"""
root = Path(test_output_dirs.root_dir)
root = test_output_dirs.root_dir
test_file = full_ml_test_data_path("MetricsAcrossAllRuns.csv")
df = pd.read_csv(test_file)
file1, file2 = create_results_breakdown(df, root)
@ -352,30 +349,30 @@ def test_get_split_index() -> None:
@pytest.mark.parametrize("is_current_run", [True, False])
def test_download_or_get_local_blobs(is_current_run: bool,
test_config: PlotCrossValidationConfig,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
azure_config = get_default_azure_config()
azure_config.get_workspace()
assert test_config.run_recovery_id is not None
run = Run.get_context() if is_current_run else fetch_run(azure_config.get_workspace(),
test_config.run_recovery_id)
run_outputs_dir = str(full_ml_test_data_path()) if is_current_run else DEFAULT_AML_UPLOAD_DIR
run_outputs_dir = full_ml_test_data_path() if is_current_run else Path(DEFAULT_AML_UPLOAD_DIR)
test_config.outputs_directory = run_outputs_dir
dst = test_config.download_or_get_local_file(
blob_to_download="dataset.csv",
destination=Path(test_output_dirs.root_dir),
destination=test_output_dirs.root_dir,
run=run
)
assert dst is not None
assert dst.exists()
def test_download_or_get_local_file_2(test_output_dirs: TestOutputDirectories) -> None:
def test_download_or_get_local_file_2(test_output_dirs: OutputFolderForTests) -> None:
config = PlotCrossValidationConfig(run_recovery_id=None,
model_category=ModelCategory.Classification,
epoch=None,
should_validate=False)
download_to_folder = Path(test_output_dirs.root_dir) / CROSSVAL_RESULTS_FOLDER
config.outputs_directory = str(download_to_folder)
download_to_folder = test_output_dirs.root_dir / CROSSVAL_RESULTS_FOLDER
config.outputs_directory = download_to_folder
local_results = full_ml_test_data_path("plot_cross_validation") / "HD_cfff5ceb-a227-41d6-a23c-0ebbc33b6301"
config.local_run_results = str(local_results)
# A file that sits in the root folder of the local_results should be downloaded into the
@ -398,7 +395,7 @@ def test_download_or_get_local_file_2(test_output_dirs: TestOutputDirectories) -
@pytest.mark.skip(reason="This test is only used to create input for test_load_files_with_prediction_target")
def test_run_ml_with_multi_label_sequence_in_crossval(test_output_dirs: TestOutputDirectories) -> None:
def test_run_ml_with_multi_label_sequence_in_crossval(test_output_dirs: OutputFolderForTests) -> None:
"""
Test training and testing of sequence models that predicts at multiple time points,
including aggregation of cross validation results.
@ -448,7 +445,7 @@ def test_load_files_with_prediction_target() -> None:
assert len(metrics[LoggingColumns.Patient.value].unique()) == 4
def test_aggregate_files_with_prediction_target(test_output_dirs: TestOutputDirectories) -> None:
def test_aggregate_files_with_prediction_target(test_output_dirs: OutputFolderForTests) -> None:
"""
For multi-week RNNs that predict at multiple sequence points: Test that the dataframes
including the prediction_target column can be aggregated.
@ -460,7 +457,7 @@ def test_aggregate_files_with_prediction_target(test_output_dirs: TestOutputDire
)
files = create_run_result_file_list(plotting_config, "multi_label_sequence_in_crossval")
root_folder = Path(test_output_dirs.root_dir)
root_folder = test_output_dirs.root_dir
print(f"Writing result files to {root_folder}")
plot_cross_validation_from_files(OfflineCrossvalConfigAndFiles(config=plotting_config, files=files),
root_folder=root_folder)

Просмотреть файл

@ -8,12 +8,12 @@ import numpy as np
import pytest
from InnerEye.Common import common_util
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.visualizers.regression_visualization import plot_variation_error_prediction
@pytest.mark.skipif(common_util.is_windows(), reason="Test execution time is longer on Windows")
def test_plot_variation_errors_for_regression(test_output_dirs: TestOutputDirectories) -> None:
def test_plot_variation_errors_for_regression(test_output_dirs: OutputFolderForTests) -> None:
plot_variation_error_prediction(
labels=np.array([10, 20, 20, 40, 10, 60, 90]),
predictions=np.array([12, 25, 10, 36, 11, 69, 90]),

Просмотреть файл

@ -9,7 +9,7 @@ import numpy as np
import pytest
from InnerEye.Common.common_util import is_windows
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.config import SegmentationModelBase, equally_weighted_classes
from InnerEye.ML.dataset.sample import PatientMetadata, Sample
from InnerEye.ML.plotting import resize_and_save, scan_with_transparent_overlay
@ -24,7 +24,7 @@ from Tests.fixed_paths_for_tests import full_ml_test_data_path
@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
@pytest.mark.parametrize("labels_to_boundary", [True, False])
def test_visualize_patch_sampling(test_output_dirs: TestOutputDirectories,
def test_visualize_patch_sampling(test_output_dirs: OutputFolderForTests,
labels_to_boundary: bool) -> None:
"""
Tests if patch sampling and producing diagnostic images works as expected.
@ -92,7 +92,7 @@ def test_visualize_patch_sampling(test_output_dirs: TestOutputDirectories,
@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
def test_visualize_patch_sampling_2d(test_output_dirs: TestOutputDirectories) -> None:
def test_visualize_patch_sampling_2d(test_output_dirs: OutputFolderForTests) -> None:
"""
Tests if patch sampling works for 2D images.
:param test_output_dirs:
@ -139,7 +139,7 @@ def test_visualize_patch_sampling_2d(test_output_dirs: TestOutputDirectories) ->
@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
@pytest.mark.parametrize("dimension", [0, 1, 2])
def test_plot_overlay(test_output_dirs: TestOutputDirectories,
def test_plot_overlay(test_output_dirs: OutputFolderForTests,
dimension: int) -> None:
set_random_seed(0)
shape = (10, 30, 30)
@ -160,7 +160,7 @@ def test_plot_overlay(test_output_dirs: TestOutputDirectories,
@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
def test_show_non_square_images(test_output_dirs: TestOutputDirectories) -> None:
def test_show_non_square_images(test_output_dirs: OutputFolderForTests) -> None:
input_file = full_ml_test_data_path("patch_sampling") / "scan_small.nii.gz"
input = load_nifti_image(input_file)
image = input.image

Просмотреть файл

@ -17,7 +17,7 @@ from InnerEye.Azure.azure_config import AzureConfig
from InnerEye.Common import common_util, fixed_paths
from InnerEye.Common.common_util import ModelProcessing
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML.config import SegmentationModelBase
from InnerEye.ML.model_config_base import ModelConfigBase
from InnerEye.ML.model_inference_config import ModelInferenceConfig
@ -55,7 +55,7 @@ class SubprocessConfig(GenericConfig):
def test_register_and_score_model(is_ensemble: bool,
dataset_expected_spacing_xyz: Any,
model_outside_package: bool,
test_output_dirs: TestOutputDirectories) -> None:
test_output_dirs: OutputFolderForTests) -> None:
"""
End-to-end test which ensures the scoring pipeline is functioning as expected by performing the following:
1) Registering a pre-trained model to AML
@ -72,8 +72,8 @@ def test_register_and_score_model(is_ensemble: bool,
config.set_output_to(test_output_dirs.root_dir)
# copy checkpoints into the outputs (simulating a run)
stored_checkpoints = full_ml_test_data_path(os.path.join("train_and_test_data", "checkpoints"))
shutil.copytree(str(stored_checkpoints), config.checkpoint_folder)
paths = [Path(config.checkpoint_folder) / "1_checkpoint.pth.tar"]
shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder))
paths = [config.checkpoint_folder / "1_checkpoint.pth.tar"]
checkpoints = paths * 2 if is_ensemble else paths
model = None
model_path = None
@ -115,7 +115,7 @@ def test_register_and_score_model(is_ensemble: bool,
model_root = Path(model.download(str(test_output_dirs.root_dir)))
# create a dummy datastore to store model checkpoints and image data
# this simulates the code shapshot being executed in a real run
test_datastore = Path(test_output_dirs.root_dir) / "test_datastore"
test_datastore = test_output_dirs.root_dir / "test_datastore"
shutil.move(
str(model_root / "test_outputs"),
str(test_datastore / RELATIVE_TEST_OUTPUTS_PATH)

Просмотреть файл

@ -1,3 +1,8 @@
pr:
branches:
include:
- '*'
name: PR-$(Date:yyyyMMdd)$(Rev:-r)
variables:
model: 'BasicModel2Epochs'

Просмотреть файл

@ -92,7 +92,7 @@ steps:
testResultsFiles: '**/test-*.xml'
testRunTitle: 'tests_on_$(Agent.OS)'
condition: succeededOrFailed()
displayName: Publish test results
displayName: Publish test results tests_on_$(Agent.OS)
- task: PublishCodeCoverageResults@1
inputs:

Просмотреть файл

@ -19,4 +19,4 @@ steps:
env:
PYTHONPATH: $(Agent.TempDirectory)/InnerEye
APPLICATION_KEY: $(InnerEyeDeepLearningServicePrincipalKey)
displayName: 'Queue AzureML Job'
displayName: 'Training and Recovery in AzureML'

Просмотреть файл

@ -13,14 +13,14 @@ from typing import Generator
import pytest
from InnerEye.Common.output_directories import TestOutputDirectories, make_test_output_dir
from InnerEye.Common.output_directories import OutputFolderForTests, remove_and_create_folder
from Tests.fixed_paths_for_tests import TEST_OUTPUTS_PATH
@pytest.fixture(autouse=True, scope='session')
def test_suite_setup() -> Generator:
# create a default outputs root for all tests
make_test_output_dir(TEST_OUTPUTS_PATH)
remove_and_create_folder(TEST_OUTPUTS_PATH)
# run the entire test suite
yield
@ -32,17 +32,8 @@ def test_output_dirs() -> Generator:
removing this directory after the test has been executed.
"""
# create dirs before executing the test
root_dir = make_output_dirs_for_test()
root_dir = TEST_OUTPUTS_PATH / str(uuid.uuid4().hex)
remove_and_create_folder(root_dir)
print(f"Created temporary folder for test: {root_dir}")
# let the test function run
yield TestOutputDirectories(root_dir=root_dir)
def make_output_dirs_for_test() -> str:
"""
Create a random output directory for a test inside the global test outputs root.
"""
test_output_dir = TEST_OUTPUTS_PATH / str(uuid.uuid4().hex)
make_test_output_dir(test_output_dir)
return str(test_output_dir)
yield OutputFolderForTests(root_dir=root_dir)

19
tqdm.py
Просмотреть файл

@ -1,19 +0,0 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
"""
Alternative versions of tqdm.tqdm and tqdm.tqdm_notebook, so that we don't have to rely on the real tqdm.
This file must be located at top level so that batchflow's "import tqdm" works.
"""
from typing import Any
def tqdm(arg: Any, *_rest: Any) -> Any:
return arg
def tqdm_notebook(arg: Any, *rest: Any) -> Any:
return tqdm(arg, *rest)