Adding patch sampling diagnostics by default (#290)

Always create thumbnails that show patch sampling behaviour
This commit is contained in:
Anton Schwaighofer 2020-10-23 11:42:52 +01:00 коммит произвёл GitHub
Родитель 88d48a6204
Коммит 52f5c77f81
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
56 изменённых файлов: 559 добавлений и 127 удалений

2
.gitattributes поставляемый
Просмотреть файл

@ -4,9 +4,9 @@
*.pth.tar filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
* text=auto
*.tar binary
*.png binary
*.nii binary
*.nii.gz binary
*.pth.tar binary

Просмотреть файл

@ -68,7 +68,7 @@ class AzureConfig(GenericConfig):
"This is only used for downloading datasets outside of AzureML.")
datasets_container: str = param.String(doc="Optional: The blob storage container with the datasets.")
workspace_name: str = param.String(doc="The name of the AzureML workspace that should be used.")
resource_group: str = param.String(None, doc="The Azure resource group that contains the AzureML workspace.")
resource_group: str = param.String(doc="The Azure resource group that contains the AzureML workspace.")
docker_shm_size: str = param.String("440g", doc="The shared memory in the docker image for the AzureML VMs.")
hyperdrive: bool = param.Boolean(False, doc="If True, use AzureML HyperDrive for run execution.")
cluster: str = param.String(doc="The name of the GPU cluster inside the AzureML workspace, that should "

Просмотреть файл

@ -146,6 +146,9 @@ def read_settings_yaml_file(yaml_file: Path) -> Dict[str, Any]:
yaml_contents = yaml.load(yaml_file.open('r'), Loader=yaml.Loader)
v = "variables"
if v in yaml_contents:
return cast(Dict[str, Any], yaml_contents[v])
if yaml_contents[v]:
return cast(Dict[str, Any], yaml_contents[v])
# If the file only contains the "variable:" prefix, but nothing below, then yaml_contents becomes None
return dict()
else:
raise KeyError(f"The Yaml file must contain a section '{v}', but that was not found in {yaml_file}")

Просмотреть файл

@ -165,8 +165,7 @@ def any_pairwise_larger(items1: Any, items2: Any) -> bool:
The two lists must have the same length.
"""
if len(items1) != len(items2):
raise ValueError("Arguments must have the same length. len(items1): {}, len(items2): {}"
.format(len(items1), len(items2)))
raise ValueError(f"Arguments must have the same length. len(items1): {len(items1)}, len(items2): {len(items2)}")
for i in range(len(items1)):
if items1[i] > items2[i]:
return True

Просмотреть файл

@ -6,6 +6,7 @@ from __future__ import annotations
from dataclasses import dataclass
from enum import Enum, unique
from math import isclose
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
import numpy as np
@ -13,7 +14,6 @@ import pandas as pd
import param
from azureml.train.estimator import Estimator
from azureml.train.hyperdrive import HyperDriveConfig
from math import isclose
from pandas import DataFrame
from InnerEye.Common.common_util import any_pairwise_larger, any_smaller_or_equal_than, check_is_any_of
@ -445,6 +445,11 @@ class SegmentationModelBase(ModelConfigBase):
is_plotting_enabled: bool = param.Boolean(True, doc="If true, various overview plots with results are generated "
"during model evaluation. Set to False if you see "
"non-deterministic pull request build failures.")
show_patch_sampling: int = param.Integer(10, bounds=(0, None),
doc="Number of patients from the training set for which the effect of"
"patch sampling will be shown. Nifti images and thumbnails for each"
"of the first N subjects in the training set will be "
"written to the outputs folder.")
def __init__(self, center_size: Optional[TupleInt3] = None,
inference_stride_size: Optional[TupleInt3] = None,

Просмотреть файл

@ -96,7 +96,7 @@ class CroppingDataset(FullImageDataset):
:return: CroppedSample
"""
# crop the original raw sample
sample, center_point = augmentation.random_crop(
sample, center_point, _ = augmentation.random_crop(
sample=sample,
crop_size=crop_size,
class_weights=class_weights

Просмотреть файл

@ -32,6 +32,7 @@ from InnerEye.ML.utils.ml_util import RandomStateSnapshot
from InnerEye.ML.utils.model_util import ModelAndInfo, generate_and_print_model_summary
from InnerEye.ML.utils.run_recovery import RunRecovery, get_recovery_path_train
from InnerEye.ML.utils.training_util import ModelOutputsAndMetricsForEpoch, ModelTrainingResults
from InnerEye.ML.visualizers.patch_sampling import visualize_random_crops_for_dataset
MAX_ITEM_LOAD_TIME_SEC = 0.5
MAX_LOAD_TIME_WARNINGS = 3
@ -53,6 +54,10 @@ def model_train(config: ModelConfigBase, run_recovery: Optional[RunRecovery] = N
config.write_dataset_files()
# set the random seed for all libraries
ml_util.set_random_seed(config.get_effective_random_seed(), "Patch visualization")
# Visualize how patches are sampled for segmentation models. This changes the random generator, but we don't
# want training to depend on how many patients we visualized, and hence set the random seed again right after.
visualize_random_crops_for_dataset(config)
ml_util.set_random_seed(config.get_effective_random_seed(), "Model Training")
logging.debug("Creating the PyTorch model.")
@ -85,8 +90,9 @@ def model_train(config: ModelConfigBase, run_recovery: Optional[RunRecovery] = N
if config.compute_mean_teacher_model:
mean_teacher_model_loaded = models_and_optimizer.try_create_mean_teacher_model_load_from_checkpoint_and_adjust()
if not mean_teacher_model_loaded:
raise ValueError("There was no checkpoint file available for the mean teacher model for given start_epoch {}"
.format(config.start_epoch))
raise ValueError(
"There was no checkpoint file available for the mean teacher model for given start_epoch {}"
.format(config.start_epoch))
# Create optimizer
optimizer_loaded = models_and_optimizer.try_create_optimizer_and_load_from_checkpoint()

Просмотреть файл

@ -10,7 +10,7 @@ import numpy as np
from matplotlib import colors
from matplotlib.pyplot import Axes
from InnerEye.Common.type_annotations import PathOrString, TupleFloat2
from InnerEye.Common.type_annotations import PathOrString, TupleFloat2, TupleFloat3
from InnerEye.ML.dataset.full_image_dataset import Sample
from InnerEye.ML.photometric_normalization import PhotometricNormalization
from InnerEye.ML.utils import plotting_util
@ -88,17 +88,17 @@ def add_legend(series_count: int) -> None:
plt.legend(ncol=num_columns, loc="upper left", fontsize="x-small")
def resize_and_save(width_inch: int, height_inch: int, filename: PathOrString, dpi: Optional[int] = 150) -> None:
def resize_and_save(width_inch: int, height_inch: int, filename: PathOrString, dpi: int = 150) -> None:
"""
Resizes the present figure to the given (width, height) in inches, and saves it to the given filename.
:param width_inch: The width of the figure in inches.
:param height_inch: The height of the figure in inches.
:param filename: The filename to save to.
:param dpi: Image resolution dots per inch
:param dpi: Image resolution in dots per inch
"""
fig = plt.gcf()
fig.set_size_inches(width_inch, height_inch)
plt.savefig(filename, dpi=dpi)
plt.savefig(filename, dpi=dpi, bbox_inches='tight', pad_inches=0.1)
def plot_image_and_label_contour(image: np.ndarray,
@ -375,9 +375,10 @@ def segmentation_and_groundtruth_plot(prediction: np.ndarray, ground_truth: np.n
fig.show()
def sds_ground_truth_plot(ct: np.ndarray, ground_truth: np.ndarray, sds_full: np.ndarray, subject_id: int,
structure: str, plane: Plane, output_img_dir: Path, dice: float = None, save_fig: bool = True,
annotator: str = None) -> None:
def surface_distance_ground_truth_plot(ct: np.ndarray, ground_truth: np.ndarray, sds_full: np.ndarray, subject_id: int,
structure: str, plane: Plane, output_img_dir: Path, dice: float = None,
save_fig: bool = True,
annotator: str = None) -> None:
"""
Plot surface distances where prediction > 0, with ground truth contour
:param ct: CT scan
@ -450,3 +451,51 @@ def sds_ground_truth_plot(ct: np.ndarray, ground_truth: np.ndarray, sds_full: np
resize_and_save(5, 5, figpath)
else:
fig.show()
def scan_with_transparent_overlay(scan: np.ndarray,
overlay: np.ndarray,
dimension: int,
position: int,
spacing: TupleFloat3) -> None:
"""
Creates a plot with one slice of a (CT) scan, with a transparent overlay that contains a second piece of
information in the range [0, 1]. High values of the `overlay` are shown as opaque red, low values as transparent
red.
Plots are created in the current axis.
:param scan: A 3-dimensional image in (Z, Y, X) ordering
:param overlay: A 3-dimensional image in (Z, Y, X) ordering, with values between 0 and 1.
:param dimension: The array dimension along with the plot should be created. dimension=0 will generate
an axial slice.
:param position: The index in the chosen dimension where the plot should be created.
:param spacing: The tuple of voxel spacings, in (Z, Y, X) order.
"""
if dimension < 0 or dimension > 2:
raise ValueError(f"Dimension must be in the range [0, 2], but got: {dimension}")
if position < 0 or position >= scan.shape[dimension]:
raise IndexError(f"Position is outside valid range: {position}")
slicers = []
for i in range(0, 3):
if i == dimension:
slicers.append(slice(position, position + 1))
else:
slicers.append(slice(0, scan.shape[i]))
# Slice both the scan and the overlay
scan_sliced = scan[slicers[0], slicers[1], slicers[2]].squeeze(axis=dimension)
overlay_sliced = overlay[slicers[0], slicers[1], slicers[2]].squeeze(axis=dimension)
ax = plt.gca()
# Account for non-square pixel sizes. Spacing usually comes from Nifti headers.
if dimension == 0:
aspect = spacing[1] / spacing[2]
elif dimension == 1:
aspect = spacing[0] / spacing[2]
else:
aspect = spacing[0] / spacing[1]
# This ensures that the coronal and sagittal plot are showing with the head up. For the axial plot (dimension == 0)
# the default setting of imshow with origin 'upper' is OK.
origin = 'upper' if dimension == 0 else 'lower'
ax.imshow(scan_sliced, vmin=np.min(scan), vmax=np.max(scan), cmap='Greys_r', aspect=aspect, origin=origin)
red = np.ones_like(overlay_sliced)
ax.set_xticks([])
ax.set_yticks([])
ax.imshow(red, vmin=0, vmax=1, cmap='Reds', alpha=overlay_sliced, aspect=aspect, origin=origin)

Просмотреть файл

@ -193,7 +193,7 @@ class Runner:
plot_crossval_config = crossval_config_from_model_config(self.model_config)
plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[RUN_RECOVERY_ID_KEY_NAME]
plot_crossval_config.outputs_directory = str(self.model_config.outputs_folder)
plot_crossval_config.settings_yaml_file = str(self.yaml_config_file)
plot_crossval_config.settings_yaml_file = self.yaml_config_file
cross_val_results_root = plot_cross_validation(plot_crossval_config)
if self.post_cross_validation_hook:
self.post_cross_validation_hook(self.model_config, cross_val_results_root)

Просмотреть файл

@ -14,7 +14,7 @@ from InnerEye.Azure.azure_runner import create_runner_parser, parse_args_and_add
from InnerEye.Azure.azure_util import download_outputs_from_run
from InnerEye.ML.common import ModelExecutionMode
from InnerEye.ML.config import SegmentationModelBase
from InnerEye.ML.plotting import sds_ground_truth_plot, segmentation_and_groundtruth_plot
from InnerEye.ML.plotting import surface_distance_ground_truth_plot, segmentation_and_groundtruth_plot
from InnerEye.ML.utils import surface_distance_utils as sd_util
from InnerEye.ML.utils.config_util import ModelConfigLoader
from InnerEye.ML.utils.csv_util import get_worst_performing_outliers, load_csv
@ -162,8 +162,8 @@ def main() -> None:
# Calculate and plot surface distance
sds_full = sd_util.calculate_surface_distances(ground_truth, binary_prediction_mask, list(voxel_spacing))
sds_ground_truth_plot(ct, ground_truth, sds_full, subject_id, structure_name, plane, output_img_dir,
annotator=annotator)
surface_distance_ground_truth_plot(ct, ground_truth, sds_full, subject_id, structure_name, plane, output_img_dir,
annotator=annotator)
if annotator is not None:
sds_for_annotator[annotator] += sds_full
@ -173,8 +173,8 @@ def main() -> None:
for annotator, sds in sds_for_annotator.items():
num_classes = int(np.amax(np.unique(overall_gold_standard)))
binarised_gold_standard = multi_label_array_to_binary(overall_gold_standard, num_classes)[1:].sum(axis=0)
sds_ground_truth_plot(ct, binarised_gold_standard, sds, subject_id, 'All', plane, output_img_dir,
annotator=annotator)
surface_distance_ground_truth_plot(ct, binarised_gold_standard, sds, subject_id, 'All', plane, output_img_dir,
annotator=annotator)
if __name__ == "__main__":

Просмотреть файл

@ -65,7 +65,7 @@ def random_select_patch_center(sample: Sample, class_weights: List[float] = None
def random_crop(sample: Sample,
crop_size: TupleInt3,
class_weights: List[float] = None) -> Tuple[Sample, np.ndarray]:
class_weights: List[float] = None) -> Tuple[Sample, np.ndarray, List[slice]]:
"""
Randomly crops images, mask, and labels arrays according to the crop_size argument.
The selection of the center is dependant on background probability.
@ -76,7 +76,8 @@ def random_crop(sample: Sample,
:param class_weights: A weighting vector with values [0, 1] to influence the class the center crop
voxel belongs to (must sum to 1), uniform distribution assumed if none provided.
:return: Tuple item 1: The cropped images, labels, and mask. Tuple item 2: The center that was chosen for the crop,
before shifting to be inside of the image.
before shifting to be inside of the image. Tuple item 3: The slicers that convert the input image to the chosen
crop.
:raises TypeError: If any of the arguments are of the wrong type.
:raises ValueError: If there are shape mismatches among the arguments or if the crop size is larger than the image.
"""
@ -119,7 +120,7 @@ def random_crop(sample: Sample,
mask=mask_cropped,
metadata=sample.metadata
)
return sample, center
return sample, center, slicers
class ImageTransformationBase(Transform3D):

Просмотреть файл

@ -44,6 +44,17 @@ class ImageHeader:
common_util.check_properties_are_not_none(self)
def get_unit_image_header(spacing: Optional[TupleFloat3] = None) -> ImageHeader:
"""
Creates an ImageHeader object with the origin at 0, and unit direction. The spacing is set to the argument,
defaulting to (1, 1, 1) if not provided.
:param spacing: The image spacing, as a (Z, Y, X) tuple.
"""
if not spacing:
spacing = (1, 1, 1)
return ImageHeader(origin=(0, 0, 0), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacing)
class ImageDataType(Enum):
"""
Data type for medical image data (e.g. masks and labels)

Просмотреть файл

@ -2,18 +2,28 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import logging
from pathlib import Path
from typing import Optional
import matplotlib.pyplot as plt
import numpy as np
import param
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.Common.type_annotations import TupleInt3
from InnerEye.ML.config import SegmentationModelBase
from InnerEye.ML.dataset.cropping_dataset import CroppingDataset
from InnerEye.ML.dataset.full_image_dataset import FullImageDataset
from InnerEye.ML.dataset.sample import Sample
from InnerEye.ML.deep_learning_config import DeepLearningConfig
from InnerEye.ML.plotting import resize_and_save, scan_with_transparent_overlay
from InnerEye.ML.utils import augmentation, io_util, ml_util
from InnerEye.ML.utils.config_util import ModelConfigLoader
# The name of the folder inside the default outputs folder that will holds plots that show the effect of
# sampling random patches
from InnerEye.ML.utils.image_util import get_unit_image_header
PATCH_SAMPLING_FOLDER = "patch_sampling"
class CheckPatchSamplingConfig(GenericConfig):
@ -22,27 +32,102 @@ class CheckPatchSamplingConfig(GenericConfig):
"""
model_name: str = param.String("Lung", doc="InnerEye model name e.g. Lung")
local_dataset: str = param.String(None, doc="Path to the local dataset (e.g. dataset folder name)")
output_folder: str = param.String("patch_sampling_visualisations",
doc="Output folder where heatmaps and sampled images are saved")
output_folder: Path = param.ClassSelector(class_=Path, default=Path("patch_sampling_visualisations"),
doc="Output folder where heatmaps and sampled images are saved")
number_samples: int = param.Number(10, bounds=(1, None), doc="Number of images sampled")
number_crop_iterations: int = param.Number(100, bounds=(1, None), doc="Number of images sampled")
def create_mask_for_patch(output_shape: np.ndarray.shape,
output_dtype: np.ndarray.dtype,
center: np.ndarray,
crop_size: TupleInt3) -> np.ndarray:
# Create an empty array with zeros
mask = np.zeros(output_shape, dtype=output_dtype)
def visualize_random_crops(sample: Sample,
config: SegmentationModelBase,
output_folder: Path) -> np.ndarray:
"""
Simulate the effect of sampling random crops (as is done for trainig segmentation models), and store the results
as a Nifti heatmap and as 3 axial/sagittal/coronal slices. The heatmap and the slices are stored in the given
output folder, with filenames that contain the patient ID as the prefix.
:param sample: The patient information from the dataset, with scans and ground truth labels.
:param config: The model configuration.
:param output_folder: The folder into which the heatmap and thumbnails should be written.
:return: A numpy array that has the same size as the image, containing how often each voxel was contained in
"""
output_folder.mkdir(exist_ok=True, parents=True)
sample = CroppingDataset.create_possibly_padded_sample_for_cropping(
sample=sample,
crop_size=config.crop_size,
padding_mode=config.padding_mode)
print(f"Processing sample: {sample.patient_id}")
# Define the slicers for the images and labels
slicers = [slice(center[i] - int(crop_size[i] / 2),
center[i] - int(crop_size[i] / 2) + int(crop_size[i])) for i in range(0, 3)]
# Exhaustively sample with random crop function
image_channel0 = sample.image[0]
heatmap = np.zeros(image_channel0.shape, dtype=np.uint16)
# Number of repeats should fit into the range of UInt16, because we will later save the heatmap as an integer
# Nifti file of that datatype.
repeats = 1000
for _ in range(repeats):
_, _, slicers = augmentation.random_crop(sample=sample,
crop_size=config.crop_size,
class_weights=config.class_weights)
heatmap[slicers[0], slicers[1], slicers[2]] += 1
is_3dim = heatmap.shape[0] > 1
header = sample.metadata.image_header
if not header:
logging.warning(f"No image header found for patient {sample.patient_id}. Using default header.")
header = get_unit_image_header()
if is_3dim:
ct_output_name = str(output_folder / f"{sample.patient_id}_ct.nii.gz")
heatmap_output_name = str(output_folder / f"{sample.patient_id}_sampled_patches.nii.gz")
io_util.store_as_nifti(image=heatmap,
header=header,
file_name=heatmap_output_name,
image_type=heatmap.dtype,
scale=False)
io_util.store_as_nifti(image=image_channel0,
header=header,
file_name=ct_output_name,
image_type=sample.image.dtype,
scale=False)
heatmap_scaled = heatmap.astype(dtype=np.float) / heatmap.max()
# If the incoming image is effectively a 2D image with degenerate Z dimension, then only plot a single
# axial thumbnail. Otherwise, plot thumbnails for all 3 dimensions.
dimensions = list(range(3)) if is_3dim else [0]
# Center the 3 thumbnails at one of the points where the heatmap attains a maximum. This should ensure that
# the thumbnails are in an area where many of the organs of interest are located.
max_heatmap_index = np.unravel_index(heatmap.argmax(), heatmap.shape) if is_3dim else (0, 0, 0)
for dimension in dimensions:
plt.clf()
scan_with_transparent_overlay(scan=image_channel0,
overlay=heatmap_scaled,
dimension=dimension,
position=max_heatmap_index[dimension] if is_3dim else 0,
spacing=header.spacing)
# Construct a filename that has a dimension suffix if we are generating 3 of them. For 2dim images, skip
# the suffix.
thumbnail = f"{sample.patient_id}_sampled_patches"
if is_3dim:
thumbnail += f"_dim{dimension}"
thumbnail += ".png"
resize_and_save(width_inch=5, height_inch=5, filename=output_folder / thumbnail)
return heatmap
# Crop the tensors
mask[slicers[0], slicers[1], slicers[2]] = 1
return mask
def visualize_random_crops_for_dataset(config: DeepLearningConfig,
output_folder: Optional[Path] = None) -> None:
"""
For segmentation models only: This function generates visualizations of the effect of sampling random patches
for training. Visualizations are stored in both Nifti format, and as 3 PNG thumbnail files, in the output folder.
:param config: The model configuration.
:param output_folder: The folder in which the visualizations should be written. If not provided, use a subfolder
"patch_sampling" in the models's default output folder
"""
if not isinstance(config, SegmentationModelBase):
return
dataset_splits = config.get_dataset_splits()
# Load a sample using the full image data loader
full_image_dataset = FullImageDataset(config, dataset_splits.train)
output_folder = output_folder or config.outputs_folder / PATCH_SAMPLING_FOLDER
count = min(config.show_patch_sampling, len(full_image_dataset))
for sample_index in range(count):
sample = full_image_dataset.get_samples_at_index(index=sample_index)[0]
visualize_random_crops(sample, config, output_folder=output_folder)
def main(args: CheckPatchSamplingConfig) -> None:
@ -57,49 +142,9 @@ def main(args: CheckPatchSamplingConfig) -> None:
# Create a config file
config = ModelConfigLoader[SegmentationModelBase]().create_model_config_from_name(
args.model_name, overrides=commandline_args)
# Set a random seed
config.show_patch_sampling = args.number_samples
ml_util.set_random_seed(config.random_seed)
# Get a dataloader object that checks csv
dataset_splits = config.get_dataset_splits()
# Load a sample using the full image data loader
full_image_dataset = FullImageDataset(config, dataset_splits.train)
for sample_index in range(args.number_samples):
sample = CroppingDataset.create_possibly_padded_sample_for_cropping(
sample=full_image_dataset.get_samples_at_index(index=sample_index)[0],
crop_size=config.crop_size,
padding_mode=config.padding_mode)
print("Processing sample: ", sample.patient_id)
# Exhaustively sample with random crop function
heatmap = np.zeros(sample.mask.shape, dtype=np.uint16)
for _ in range(args.number_crop_iterations):
cropped_sample, center_point = augmentation.random_crop(sample=sample,
crop_size=config.crop_size,
class_weights=config.class_weights)
patch_mask = create_mask_for_patch(output_shape=heatmap.shape,
output_dtype=heatmap.dtype,
center=center_point,
crop_size=config.crop_size)
heatmap += patch_mask
ct_output_name = str(output_folder / "{}_ct.nii.gz".format(int(sample.patient_id)))
heatmap_output_name = str(output_folder / "{}_sampled_patches.nii.gz".format(int(sample.patient_id)))
if not sample.metadata.image_header:
raise ValueError("None header expected some header")
io_util.store_as_nifti(image=heatmap,
header=sample.metadata.image_header,
file_name=heatmap_output_name,
image_type=heatmap.dtype,
scale=False)
io_util.store_as_nifti(image=sample.image[0],
header=sample.metadata.image_header,
file_name=ct_output_name,
image_type=sample.image.dtype,
scale=False)
visualize_random_crops_for_dataset(config, output_folder=output_folder)
if __name__ == "__main__":

Просмотреть файл

@ -117,9 +117,12 @@ class PlotCrossValidationConfig(GenericConfig):
ignore_subjects: List[int] = param.List(None, class_=int, bounds=(1, None), allow_None=True, instantiate=False,
doc="List of the subject ids to ignore from the results")
is_zero_index: bool = param.Boolean(True, doc="If True, start cross validation split indices from 0 otherwise 1")
settings_yaml_file: str = param.String(default=str(fixed_paths.SETTINGS_YAML_FILE),
doc="Path to settings.yml file containing the Azure configuration "
"for the workspace")
settings_yaml_file: Path = param.ClassSelector(class_=Path, default=fixed_paths.SETTINGS_YAML_FILE,
doc="Path to settings.yml file containing the Azure configuration "
"for the workspace")
project_root: Path = param.ClassSelector(class_=Path, default=fixed_paths.repository_root_directory(),
doc="The root folder of the repository that starts the run. Used to "
"read a private settings file.")
_azure_config: Optional[AzureConfig] = \
param.ClassSelector(class_=AzureConfig, allow_None=True,
doc="Azure-related options created from YAML file.")
@ -190,7 +193,7 @@ class PlotCrossValidationConfig(GenericConfig):
:return:
"""
if self._azure_config is None:
self._azure_config = AzureConfig.from_yaml(Path(self.settings_yaml_file), project_root=None)
self._azure_config = AzureConfig.from_yaml(self.settings_yaml_file, project_root=self.project_root)
return self._azure_config
def download_or_get_local_file(self,

Просмотреть файл

@ -75,6 +75,8 @@ Assuming that your current directory is the repository root folder, on Linux `ba
export PYTHONPATH=`pwd`
python InnerEye/ML/runner.py --model=HelloWorld
```
(Note the "backtick" around the `pwd` command, this is not a standard single quote!)
On Windows:
```shell script
set PYTHONPATH=%cd%
@ -91,6 +93,7 @@ Detailed instructions, including setup in Azure, are here:
1. [Building models in Azure ML](docs/building_models.md)
1. [Sample Segmentation and Classification tasks](docs/sample_tasks.md)
1. [Debugging and monitoring models](docs/debugging_and_monitoring.md)
1. [Model diagnostics](docs/model_diagnostics.md)
## More information

Просмотреть файл

@ -110,12 +110,14 @@ def test_read_yaml_file_into_args(test_output_dirs: TestOutputDirectories) -> No
yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
runner1.parse_and_load_model()
assert len(runner1.azure_config.application_id) > 0
# When specifying a dummy YAML file that does not contain the application ID, it should not
# be set.
assert len(runner1.azure_config.resource_group) > 0
# When specifying a dummy YAML file that does not contain any settings, no information in AzureConfig should
# be set. Some settings are read from a private settings file, most notably application ID, which should
# be present on people's local dev boxes. Hence, only assert on `resource_group` here.
runner2 = Runner(project_root=fixed_paths.repository_root_directory(),
yaml_config_file=empty_yaml)
runner2.parse_and_load_model()
assert runner2.azure_config.application_id == ""
assert runner2.azure_config.resource_group == ""
def test_parsing_with_custom_yaml(test_output_dirs: TestOutputDirectories) -> None:

Двоичные данные
Tests/ML/test_data/042_slice_001.png

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 20 KiB

После

Ширина:  |  Высота:  |  Размер: 130 B

Двоичные данные
Tests/ML/test_data/042_slice_001_contour.png

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 5.6 KiB

После

Ширина:  |  Высота:  |  Размер: 129 B

Двоичные данные
Tests/ML/test_data/dice_per_epoch_15classes.png

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 91 KiB

После

Ширина:  |  Высота:  |  Размер: 130 B

Двоичные данные
Tests/ML/test_data/dice_per_epoch_3classes.png

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 30 KiB

После

Ширина:  |  Высота:  |  Размер: 130 B

Двоичные данные
Tests/ML/test_data/image_and_contour.png

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 5.6 KiB

После

Ширина:  |  Высота:  |  Размер: 129 B

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 8.9 KiB

После

Ширина:  |  Высота:  |  Размер: 129 B

Двоичные данные
Tests/ML/test_data/image_scaled_and_contour.png

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 5.6 KiB

После

Ширина:  |  Высота:  |  Размер: 129 B

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d548c8df54244f090e6b15f110b466306b203bc125f9be6a03aac2ee36d50936
size 9484

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:97e2c80ffb6c0777d9e06262280687bdb18a079c3c489c65aca4be2b3f14fa68
size 3820

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6bb75449e6e0ac97ce70ec89318b8327cbfc4754f49580b49a48a87e321686ab
size 3756

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f91a769bbce02bd6d636b163df9f623e2ef2a09a04b69e4a88557419e6cc50b7
size 9252

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3c982ca7892b08e73c231d9c2c8137f312355ae8d3e99ba096f705260a3b8b0c
size 3752

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1fa353210ce0756a0cf6b8a99244ab676335acd28b5ce5f6c55e8b5bfeb60b5d
size 3887

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3eac7b6a239f91243b00aa2b90ae3c0396cadd8128104c47eda95ec58eb257a9
size 27756

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3855d6aacb25a8d3c35922cd8543344d59586d8b64db26ae250571656ba4cd43
size 15490

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4051ab32986bb78bb3239f0273b5fd034310206d10f652accf75419741dd9055
size 13173

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:18754b38e55c09a7cd10bb9bd46c31a90797a8466a2e4aac72705e26b62becc6
size 4992

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:97f514d7005e4483ed7d7f06cbeab1954e4f0045aa6d88173ac62465fb4ccd0a
size 18128

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:672fd010b79c359e5e60248424d08cde7622576f6b79163816d77e46c0d6cefe
size 6440

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9c34791cc48c387272a7545d0f4cfbf23d9cc6daa68c53e5143661ea42c88a9d
size 18128

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:75c88cac934f3b6c27fa6aeab3ac0c3a062b74ad84585f03881a366067bf4c53
size 1328

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0adbde8f48bcd373f09ef73f606e2e479be73e4128374ecaf84d28b8cd75dc2f
size 6715

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4ae3c1d9e6d2b3c371b1edc4608795b0df15171be094480dccf78ae93b34a306
size 1146920

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 10 KiB

После

Ширина:  |  Высота:  |  Размер: 129 B

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 10 KiB

После

Ширина:  |  Высота:  |  Размер: 129 B

Просмотреть файл

@ -27,7 +27,7 @@ from InnerEye.ML.utils.run_recovery import RunRecovery
from InnerEye.ML.visualizers.plot_cross_validation import get_config_and_results_for_offline_runs
from Tests.ML.configs.ClassificationModelForTesting import ClassificationModelForTesting
from Tests.ML.configs.DummyModel import DummyModel
from Tests.ML.util import assert_file_contents, assert_file_contents_match_exactly, assert_nifti_content, \
from Tests.ML.util import assert_file_contains_string, assert_text_files_match, assert_nifti_content, \
get_image_shape
from Tests.fixed_paths_for_tests import full_ml_test_data_path
@ -62,12 +62,12 @@ def test_model_test(test_output_dirs: TestOutputDirectories) -> None:
patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz")
patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz")
assert_file_contents(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id)
assert_file_contents(epoch_dir / GROUND_TRUTH_IDS_FILE, "region")
assert_file_contents_match_exactly(epoch_dir / model_testing.METRICS_FILE_NAME,
Path(train_and_test_data_dir) / model_testing.METRICS_FILE_NAME)
assert_file_contents_match_exactly(epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
Path(train_and_test_data_dir) / model_testing.METRICS_AGGREGATES_FILE)
assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id)
assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region")
assert_text_files_match(epoch_dir / model_testing.METRICS_FILE_NAME,
Path(train_and_test_data_dir) / model_testing.METRICS_FILE_NAME)
assert_text_files_match(epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
Path(train_and_test_data_dir) / model_testing.METRICS_AGGREGATES_FILE)
# Plotting results vary between platforms. Can only check if the file is generated, but not its contents.
assert (epoch_dir / model_testing.BOXPLOT_FILE).exists()

Просмотреть файл

@ -24,8 +24,9 @@ from InnerEye.ML.model_training_steps import ModelTrainingStepsForSegmentation
from InnerEye.ML.models.losses.mixture import MixtureLoss
from InnerEye.ML.sequence_config import SequenceModelBase
from InnerEye.ML.utils.training_util import ModelTrainingResults
from InnerEye.ML.visualizers.patch_sampling import PATCH_SAMPLING_FOLDER
from Tests.ML.configs.DummyModel import DummyModel
from Tests.ML.util import assert_file_contents
from Tests.ML.util import assert_file_contains_string
from Tests.fixed_paths_for_tests import full_ml_test_data_path
config_path = full_ml_test_data_path()
@ -188,12 +189,16 @@ def _test_model_train(output_dirs: TestOutputDirectories,
assert (train_config.outputs_folder / DATASET_CSV_FILE_NAME).is_file()
assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.TRAIN]).is_file()
assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.VAL]).is_file()
assert_file_contents(train_config.outputs_folder / TRAIN_STATS_FILE, expected_stats)
assert_file_contains_string(train_config.outputs_folder / TRAIN_STATS_FILE, expected_stats)
# Test for saving of example images
assert os.path.isdir(train_config.example_images_folder)
example_files = os.listdir(train_config.example_images_folder)
assert len(example_files) == 3 * 2
# Path visualization: There should be 3 slices for each of the 2 subjects
sampling_folder = train_config.outputs_folder / PATCH_SAMPLING_FOLDER
assert sampling_folder.is_dir()
assert len(list(sampling_folder.rglob("*.png"))) == 3 * 2
@pytest.mark.parametrize(["rates", "expected"],

Просмотреть файл

@ -20,12 +20,13 @@ from InnerEye.ML.pipelines.inference import InferencePipeline
from InnerEye.ML.plotting import resize_and_save
from InnerEye.ML.reports.segmentation_report import boxplot_per_structure
from InnerEye.ML.utils import io_util
from InnerEye.ML.utils.image_util import get_unit_image_header
from InnerEye.ML.utils.io_util import ImageHeader
from InnerEye.ML.utils.metrics_constants import MetricsFileColumns
from InnerEye.ML.utils.metrics_util import MetricsPerPatientWriter
from InnerEye.ML.utils.transforms import LinearTransform, get_range_for_window_level
from Tests.ML.configs.DummyModel import DummyModel
from Tests.ML.util import assert_file_contents, assert_file_contents_match_exactly, assert_nifti_content
from Tests.ML.util import assert_file_contains_string, assert_text_files_match, assert_nifti_content
from Tests.fixed_paths_for_tests import full_ml_test_data_path
model_name = "Basic"
@ -64,7 +65,7 @@ def test_store_inference_results(test_output_dirs: TestOutputDirectories) -> Non
posterior0 = to_unique_bytes(posterior[0], (0, 1))
posterior1 = to_unique_bytes(posterior[1], (0, 1))
spacing = (2.0, 2.0, 2.0)
header = ImageHeader(origin=(0, 0, 0), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacing)
header = get_unit_image_header(spacing=spacing)
inference_result = InferencePipeline.Result(
epoch=1,
patient_id=12,
@ -125,7 +126,7 @@ def test_metrics_file(test_output_dirs: TestOutputDirectories) -> None:
metrics_file = new_file("metrics_file.csv")
d.to_csv(Path(metrics_file))
# Sorting should be first by structure name alphabetically, then Dice with lowest scores first.
assert_file_contents(metrics_file, "Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm\n"
assert_file_contains_string(metrics_file, "Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm\n"
"Patient3,kidney,0.400,1.000,0.100\n"
"Patient2,kidney,0.700,1.000,0.200\n"
"Patient1,liver,0.400,1.000,0.400\n"
@ -134,8 +135,8 @@ def test_metrics_file(test_output_dirs: TestOutputDirectories) -> None:
aggregates_file = new_file(METRICS_AGGREGATES_FILE)
d.save_aggregates_to_csv(Path(aggregates_file))
# Sorting should be first by structure name alphabetically, then Dice with lowest scores first.
assert_file_contents_match_exactly(Path(aggregates_file),
full_ml_test_data_path() / METRICS_AGGREGATES_FILE)
assert_text_files_match(Path(aggregates_file),
full_ml_test_data_path() / METRICS_AGGREGATES_FILE)
boxplot_per_structure(d.to_data_frame(),
column_name=MetricsFileColumns.DiceNumeric.value,
title="Dice score")

Просмотреть файл

@ -64,6 +64,8 @@ def test_plot_image_and_contour(test_output_dirs: TestOutputDirectories) -> None
plotting.plot_image_and_label_contour(image, labels, contour_arguments={'colors': 'r'}, plot_file_name=plot_file)
assert plot_file.exists()
expected = full_ml_test_data_path(file_name)
# To update the stored results, uncomment this line:
# expected.write_bytes(plot_file.read_bytes())
assert file_as_bytes(plot_file) == file_as_bytes(expected)
@ -84,6 +86,8 @@ def test_plot_image_and_contour_scaled(test_output_dirs: TestOutputDirectories)
image_range=(-5, 5), plot_file_name=plot_file)
assert plot_file.exists()
expected = full_ml_test_data_path(file_name)
# To update the stored results, uncomment this line:
# expected.write_bytes(plot_file.read_bytes())
assert file_as_bytes(plot_file) == file_as_bytes(expected)
@ -108,6 +112,8 @@ def test_plot_image_and_multiple_contours(test_output_dirs: TestOutputDirectorie
plot_file_name=plot_file)
assert plot_file.exists()
expected = full_ml_test_data_path(file_name)
# To update the stored results, uncomment this line:
# expected.write_bytes(plot_file.read_bytes())
assert file_as_bytes(plot_file) == file_as_bytes(expected)
@ -131,10 +137,12 @@ def compare_files(actual: List[Path], expected: List[str]) -> None:
full_expected = full_ml_test_data_path(e)
assert full_expected.exists()
assert str(f).endswith(e)
# To update the stored results, uncomment this line:
# full_expected.write_bytes(f.read_bytes())
assert file_as_bytes(f) == file_as_bytes(full_expected)
@pytest.mark.skipif(common_util.is_linux(), reason="Rendering of the graph is slightly different on Linux")
@pytest.mark.skipif(common_util.is_windows(), reason="Rendering of the graph is slightly different on Linux")
def test_plot_normalization_result(test_output_dirs: TestOutputDirectories) -> None:
"""
Tests plotting of before/after histograms in photometric normalization.

Просмотреть файл

@ -3,12 +3,14 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import logging
import os
from pathlib import Path
from typing import Any, List, Optional, Union
import numpy as np
import pytest
import torch
from PIL import Image
from azureml.core import Workspace
from InnerEye.Azure.azure_config import AzureConfig
@ -79,7 +81,7 @@ def load_train_and_test_data_channels(patient_ids: List[int],
return samples
def assert_file_contents(full_file: Union[str, Path], expected: Any = None) -> None:
def assert_file_contains_string(full_file: Union[str, Path], expected: Any = None) -> None:
"""
Checks if the given file contains an expected string
:param full_file: The path to the file.
@ -92,7 +94,7 @@ def assert_file_contents(full_file: Union[str, Path], expected: Any = None) -> N
_assert_line(file_path.read_text(), expected)
def assert_file_contents_match_exactly(full_file: Path, expected_file: Path) -> None:
def assert_text_files_match(full_file: Path, expected_file: Path) -> None:
"""
Checks line by line (ignoring leading and trailing spaces) if the given two files contains the exact same strings
:param full_file: The path to the file.
@ -157,6 +159,28 @@ def assert_tensors_equal(t1: torch.Tensor, t2: Union[torch.Tensor, List], abs: f
assert v1 == pytest.approx(v2, abs=abs), f"Tensor elements don't match with tolerance {abs}: {v1} != {v2}"
def assert_binary_files_match(actual_file: Path, expected_file: Path) -> None:
"""
Checks if two files contain exactly the same bytes. If PNG files mismatch, additional diagnostics is printed.
"""
# Uncomment this line to batch-update all result files that use this assert function
# expected_file.write_bytes(actual_file.read_bytes())
assert_file_exists(actual_file)
assert_file_exists(expected_file)
actual = actual_file.read_bytes()
expected = expected_file.read_bytes()
if actual == expected:
return
if actual_file.suffix == ".png" and expected_file.suffix == ".png":
actual_image = Image.open(actual_file)
expected_image = Image.open(expected_file)
actual_size = actual_image.size
expected_size = expected_image.size
assert actual_size == expected_size, f"Image sizes don't match: actual {actual_size}, expected {expected_size}"
assert np.allclose(np.array(actual_image), np.array(expected_image)), "Image pixel data does not match."
assert False, f"File contents does not match: len(actual)={len(actual)}, len(expected)={len(expected)}"
DummyPatientMetadata = PatientMetadata(patient_id=42)
@ -182,3 +206,11 @@ def get_default_workspace() -> Workspace:
:return:
"""
return get_default_azure_config().get_workspace()
def is_running_on_azure() -> bool:
"""
Returns True if the code appears to be running on an Azure build agent, and False otherwise.
"""
# Guess by looking at the AGENT_OS variable, that all Azure hosted agents define.
return bool(os.environ.get("AGENT_OS", None))

Просмотреть файл

@ -34,12 +34,12 @@ crop_size_requires_padding = (9, 8, 12)
# Random Crop Tests
def test_valid_full_crop() -> None:
metadata = DummyPatientMetadata
sample, _ = augmentation.random_crop(sample=Sample(image=valid_image_4d,
labels=valid_labels,
mask=valid_mask,
metadata=metadata),
crop_size=valid_full_crop_size,
class_weights=valid_class_weights)
sample, _, _ = augmentation.random_crop(sample=Sample(image=valid_image_4d,
labels=valid_labels,
mask=valid_mask,
metadata=metadata),
crop_size=valid_full_crop_size,
class_weights=valid_class_weights)
assert np.array_equal(sample.image, valid_image_4d)
assert np.array_equal(sample.labels, valid_labels)
@ -99,7 +99,7 @@ def test_random_crop(crop_size: Any) -> None:
# this should ne handled gracefully (class being ignored from sampling)
labels[0] = 1
labels[1] = 0
sample, _ = augmentation.random_crop(Sample(
sample, _, _ = augmentation.random_crop(Sample(
image=valid_image_4d,
labels=valid_labels,
mask=valid_mask,
@ -147,7 +147,7 @@ def test_valid_class_weights(class_weights: List[float]) -> None:
return
for _ in range(0, total_crops):
crop_sample, center = augmentation.random_crop(sample, crop_size, class_weights)
crop_sample, center, _ = augmentation.random_crop(sample, crop_size, class_weights)
sampled_class = list(labels[:, center[0], center[1], center[2]]).index(1)
sampled_label_center_distribution[sampled_class] += 1

Просмотреть файл

@ -20,7 +20,7 @@ from InnerEye.ML.utils.dataset_util import DatasetExample, store_and_upload_exam
from InnerEye.ML.utils.io_util import ImageHeader, is_nifti_file_path, is_numpy_file_path, \
load_image_in_known_formats, load_numpy_image, is_dicom_file_path, load_dicom_image, \
ImageAndSegmentations, load_images_and_stack, DicomTags, PhotometricInterpretation
from Tests.ML.util import assert_file_contents
from Tests.ML.util import assert_file_contains_string
from Tests.fixed_paths_for_tests import full_ml_test_data_path
known_nii_path = full_ml_test_data_path("test_good.nii.gz")
@ -99,7 +99,7 @@ def test_save_file(value: Any, expected: Any) -> None:
file = full_ml_test_data_path("test.txt")
io_util.save_lines_to_file(Path(file), value)
assert_file_contents(file, expected)
assert_file_contains_string(file, expected)
os.remove(str(file))

Просмотреть файл

@ -29,7 +29,7 @@ from InnerEye.ML.visualizers.plot_cross_validation import COL_MODE, \
from Tests.Common.test_util import DEFAULT_ENSEMBLE_RUN_RECOVERY_ID, DEFAULT_RUN_RECOVERY_ID
from Tests.ML.models.architectures.sequential.test_rnn_classifier import ToyMultiLabelSequenceModel, \
_get_multi_label_sequence_dataframe
from Tests.ML.util import assert_file_contents_match_exactly, get_default_azure_config
from Tests.ML.util import assert_text_files_match, get_default_azure_config
from Tests.fixed_paths_for_tests import full_ml_test_data_path
@ -108,7 +108,8 @@ def create_run_result_file_list(config: PlotCrossValidationConfig, folder: str,
return files
def create_file_list_for_segmentation_recovery_run(test_config_ensemble: PlotCrossValidationConfig) -> List[RunResultFiles]:
def create_file_list_for_segmentation_recovery_run(test_config_ensemble: PlotCrossValidationConfig) -> \
List[RunResultFiles]:
return create_run_result_file_list(config=test_config_ensemble,
folder="master_1570466706163110")
@ -288,9 +289,9 @@ def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig,
test_config_ensemble.outlier_range = 0
dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]}
save_outliers(test_config_ensemble, dataset_split_metrics, Path(test_config_ensemble.outputs_directory))
assert_file_contents_match_exactly(full_file=Path(test_config_ensemble.outputs_directory)
/ f"{ModelExecutionMode.VAL.value}_outliers.txt",
expected_file=Path(
assert_text_files_match(full_file=Path(test_config_ensemble.outputs_directory)
/ f"{ModelExecutionMode.VAL.value}_outliers.txt",
expected_file=Path(
full_ml_test_data_path(
f"{ModelExecutionMode.VAL.value}_outliers.txt")))

Просмотреть файл

@ -0,0 +1,177 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pytest
from InnerEye.Common.common_util import is_windows
from InnerEye.Common.output_directories import TestOutputDirectories
from InnerEye.ML.config import SegmentationModelBase, equally_weighted_classes
from InnerEye.ML.dataset.sample import PatientMetadata, Sample
from InnerEye.ML.plotting import resize_and_save, scan_with_transparent_overlay
from InnerEye.ML.utils import io_util
from InnerEye.ML.utils.image_util import get_unit_image_header
from InnerEye.ML.utils.io_util import load_nifti_image
from InnerEye.ML.utils.ml_util import set_random_seed
from InnerEye.ML.visualizers.patch_sampling import visualize_random_crops
from Tests.ML.util import assert_binary_files_match, assert_file_exists, is_running_on_azure
from Tests.fixed_paths_for_tests import full_ml_test_data_path
@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
@pytest.mark.parametrize("labels_to_boundary", [True, False])
def test_visualize_patch_sampling(test_output_dirs: TestOutputDirectories,
labels_to_boundary: bool) -> None:
"""
Tests if patch sampling and producing diagnostic images works as expected.
:param test_output_dirs:
:param labels_to_boundary: If true, the ground truth labels are placed close to the image boundary, so that
crops have to be adjusted inwards. If false, ground truth labels are all far from the image boundaries.
"""
set_random_seed(0)
shape = (10, 30, 30)
foreground_classes = ["fg"]
class_weights = equally_weighted_classes(foreground_classes)
config = SegmentationModelBase(should_validate=False,
crop_size=(2, 10, 10),
class_weights=class_weights)
image = np.random.rand(1, *shape).astype(np.float32) * 1000
mask = np.ones(shape)
labels = np.zeros((len(class_weights),) + shape)
if labels_to_boundary:
# Generate foreground labels in such a way that a patch centered around a foreground pixel would
# reach outside of the image.
labels[1, 4:8, 3:27, 3:27] = 1
else:
labels[1, 4:8, 15:18, 15:18] = 1
labels[0] = 1 - labels[1]
output_folder = Path(test_output_dirs.root_dir)
image_header = get_unit_image_header()
sample = Sample(image=image,
mask=mask,
labels=labels,
metadata=PatientMetadata(patient_id=123,
image_header=image_header))
expected_folder = full_ml_test_data_path("patch_sampling")
heatmap = visualize_random_crops(sample, config, output_folder=output_folder)
expected_heatmap = expected_folder / ("sampled_to_boundary.npy" if labels_to_boundary else "sampled_center.npy")
# To update the stored results, uncomment this line:
# np.save(str(expected_heatmap), heatmap)
assert np.allclose(heatmap, np.load(str(expected_heatmap))), "Patch sampling created a different heatmap."
f1 = output_folder / "123_ct.nii.gz"
assert_file_exists(f1)
f2 = output_folder / "123_sampled_patches.nii.gz"
assert_file_exists(f2)
thumbnails = [
"123_sampled_patches_dim0.png",
"123_sampled_patches_dim1.png",
"123_sampled_patches_dim2.png",
]
for f in thumbnails:
assert_file_exists(output_folder / f)
expected = expected_folder / ("sampled_to_boundary.nii.gz" if labels_to_boundary else "sampled_center.nii.gz")
# To update test results:
# shutil.copy(str(f2), str(expected))
expected_image = io_util.load_nifti_image(expected)
actual_image = io_util.load_nifti_image(f2)
np.allclose(expected_image.image, actual_image.image)
if labels_to_boundary:
for f in thumbnails:
# Uncomment this line to update test results
# (expected_folder / f).write_bytes((output_folder / f).read_bytes())
if not is_running_on_azure():
# When running on the Azure build agents, it appears that the bounding box of the images
# is slightly different than on local runs, even with equal dpi settings.
# Not able to figure out how to make the run results consistent, hence disable in cloud runs.
assert_binary_files_match(output_folder / f, expected_folder / f)
@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
def test_visualize_patch_sampling_2d(test_output_dirs: TestOutputDirectories) -> None:
"""
Tests if patch sampling works for 2D images.
:param test_output_dirs:
"""
set_random_seed(0)
shape = (1, 20, 30)
foreground_classes = ["fg"]
class_weights = equally_weighted_classes(foreground_classes)
config = SegmentationModelBase(should_validate=False,
crop_size=(1, 5, 10),
class_weights=class_weights)
image = np.random.rand(1, *shape).astype(np.float32) * 1000
mask = np.ones(shape)
labels = np.zeros((len(class_weights),) + shape)
labels[1, 0, 8:12, 5:25] = 1
labels[0] = 1 - labels[1]
output_folder = Path(test_output_dirs.root_dir)
image_header = None
sample = Sample(image=image,
mask=mask,
labels=labels,
metadata=PatientMetadata(patient_id=123,
image_header=image_header))
heatmap = visualize_random_crops(sample, config, output_folder=output_folder)
expected_folder = full_ml_test_data_path("patch_sampling")
expected_heatmap = expected_folder / "sampling_2d.npy"
# To update the stored results, uncomment this line:
# np.save(str(expected_heatmap), heatmap)
assert np.allclose(heatmap, np.load(str(expected_heatmap))), "Patch sampling created a different heatmap."
assert len(list(output_folder.rglob("*.nii.gz"))) == 0
assert len(list(output_folder.rglob("*.png"))) == 1
actual_file = output_folder / "123_sampled_patches.png"
assert_file_exists(actual_file)
expected = expected_folder / "sampling_2d.png"
# To update the stored results, uncomment this line:
# expected.write_bytes(actual_file.read_bytes())
if not is_running_on_azure():
# When running on the Azure build agents, it appears that the bounding box of the images
# is slightly different than on local runs, even with equal dpi settings.
# It says: Image sizes don't match: actual (685, 469), expected (618, 424)
# Not able to figure out how to make the run results consistent, hence disable in cloud runs.
assert_binary_files_match(actual_file, expected)
@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
@pytest.mark.parametrize("dimension", [0, 1, 2])
def test_plot_overlay(test_output_dirs: TestOutputDirectories,
dimension: int) -> None:
set_random_seed(0)
shape = (10, 30, 30)
image = np.random.rand(*shape).astype(np.float32) * 1000
mask = np.zeros(shape).flatten()
for i in range(len(mask)):
mask[i] = i
mask = mask.reshape(shape)
plt.figure()
scan_with_transparent_overlay(image, mask, dimension, shape[dimension] // 2, spacing=(1.0, 1.0, 1.0))
file = Path(test_output_dirs.root_dir) / "plot.png"
resize_and_save(5, 5, file)
assert file.exists()
expected = full_ml_test_data_path("patch_sampling") / f"overlay_{dimension}.png"
# To update the stored results, uncomment this line:
# expected.write_bytes(file.read_bytes())
assert_binary_files_match(file, expected)
@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
def test_show_non_square_images(test_output_dirs: TestOutputDirectories) -> None:
input_file = full_ml_test_data_path("patch_sampling") / "scan_small.nii.gz"
input = load_nifti_image(input_file)
image = input.image
shape = image.shape
mask = np.zeros_like(image)
mask[shape[0] // 2, shape[1] // 2, shape[2] // 2] = 1
for dim in range(3):
scan_with_transparent_overlay(image, mask, dim, shape[dim] // 2, spacing=input.header.spacing)
actual_file = Path(test_output_dirs.root_dir) / f"dim_{dim}.png"
resize_and_save(5, 5, actual_file)
expected = full_ml_test_data_path("patch_sampling") / f"overlay_with_aspect_dim{dim}.png"
# To update the stored results, uncomment this line:
# expected.write_bytes(actual_file.read_bytes())
assert_binary_files_match(actual_file, expected)

Просмотреть файл

@ -23,7 +23,7 @@ from InnerEye.ML.model_config_base import ModelConfigBase
from InnerEye.ML.model_inference_config import ModelInferenceConfig
from InnerEye.ML.model_testing import DEFAULT_RESULT_IMAGE_NAME
from InnerEye.ML.run_ml import MLRunner
from InnerEye.ML.utils.io_util import ImageHeader
from InnerEye.ML.utils.image_util import get_unit_image_header
from Tests.ML.util import assert_nifti_content, get_default_azure_config, get_default_workspace, get_model_loader, \
get_nifti_shape
from Tests.fixed_paths_for_tests import RELATIVE_TEST_OUTPUTS_PATH, full_ml_test_data_path, tests_root_directory
@ -142,7 +142,7 @@ def test_register_and_score_model(is_ensemble: bool,
# sanity check the resulting segmentation
expected_shape = get_nifti_shape(img_channel_1_path)
image_header = ImageHeader(origin=(0, 0, 0), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 1, 1))
image_header = get_unit_image_header()
assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [0], np.ubyte)
finally:

Просмотреть файл

@ -17,7 +17,8 @@ steps:
- bash: |
conda env create --file environment.yml --name InnerEye --quiet
source activate InnerEye
pip freeze > requirements.txt
echo "Environment has been created with these packages:"
pip freeze
failOnStderr: false # Conda env create does not have an option to suppress warnings generated in wheel.py
condition: succeeded()
displayName: Create conda environment

Просмотреть файл

@ -17,8 +17,9 @@ session, monitoring the newly queued job.
### Resource Monitor
GPU and CPU usage can be monitored throughout the execution of a run (local and AML) by setting the monitoring interval
for the resource monitor eg: `--monitoring_interval_seconds=1`. This will spawn a separate process at the start of the
run which will log the diagnostics to AML metrics and TensorBoard logs under `Diagnostics`.
for the resource monitor eg: `--monitoring_interval_seconds=5`. This will spawn a separate process at the start of the
run which will log both GPU and CPU utilization and memory consumption. These metrics will be written to AzureML as
well as a separate TensorBoard logs file under `Diagnostics`.
### Debugging setup on local machine

25
docs/model_diagnostics.md Normal file
Просмотреть файл

@ -0,0 +1,25 @@
# Model Diagnostics
The InnerEye toolbox has extensive reporting about the model building process, as well as the performance
of the final model. Our goal is to provide as much insight as possible about the critical steps (and
pitfalls) of building a model.
## Patch sampling for segmentation models
When building a segmentation model, one of the crucial steps is how equally-shaped crops are taken from
the raw medical image, that are later fed into the model training. An outline of that process is
given [here](https://github.com/microsoft/InnerEye-DeepLearning/wiki/Adjusting-and-tuning-a-segmentation-model).
At the start of training, the toolbox inspects the first 10 images of the training set. For each of them,
1000 random crops are drawn at random, similar to how they would be drawn during training. From that, a
heatmap is constructed, where each voxel value contains how often that specific voxels was actually contained
in the random crop (a value between 0 and 1000). The heatmap is stored as a Nifti file, alongside the
original scan, in folder `outputs/patch_sampling/`. When running inside AzureML, navigate to the
"Outputs" tab, and go to the folder (see screenshot below).
In addition, for each patient, 3 thumbnail images are generated, that overlay the heatmap on top of the
scan. Dark red indicates voxels that are sampled very often, transparent red indicates voxels that are used
infrequently.
Example thumbnail when viewed in the AzureML UI:
![](screenshot_azureml_patch_sampling.png)

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ed3bb4d800d269eb1b4fe8f82bd8be32f0db225d502904ef8af25a53c9f65ae8
size 161947

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3d435820b637b59e1a2b8db8567749f4bf69b69cd075fece87065992ae0a364f
size 288927