Adding patch sampling diagnostics by default (#290)

Always create thumbnails that show patch sampling behaviour
2020-10-23 11:42:52 +01:00 · 2020-10-23 11:42:52 +01:00 · 52f5c77f81
--- a/.gitattributes
+++ b/.gitattributes
@ -4,9 +4,9 @@
 *.pth.tar filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
 * text=auto
 *.tar binary
-*.png binary
 *.nii binary
 *.nii.gz binary
 *.pth.tar binary
--- a/InnerEye/Azure/azure_config.py
+++ b/InnerEye/Azure/azure_config.py
@ -68,7 +68,7 @@ class AzureConfig(GenericConfig):
                         "This is only used for downloading datasets outside of AzureML.")
    datasets_container: str = param.String(doc="Optional: The blob storage container with the datasets.")
    workspace_name: str = param.String(doc="The name of the AzureML workspace that should be used.")
-    resource_group: str = param.String(None, doc="The Azure resource group that contains the AzureML workspace.")
+    resource_group: str = param.String(doc="The Azure resource group that contains the AzureML workspace.")
    docker_shm_size: str = param.String("440g", doc="The shared memory in the docker image for the AzureML VMs.")
    hyperdrive: bool = param.Boolean(False, doc="If True, use AzureML HyperDrive for run execution.")
    cluster: str = param.String(doc="The name of the GPU cluster inside the AzureML workspace, that should "
--- a/InnerEye/Azure/secrets_handling.py
+++ b/InnerEye/Azure/secrets_handling.py
@ -146,6 +146,9 @@ def read_settings_yaml_file(yaml_file: Path) -> Dict[str, Any]:
    yaml_contents = yaml.load(yaml_file.open('r'), Loader=yaml.Loader)
    v = "variables"
    if v in yaml_contents:
-        return cast(Dict[str, Any], yaml_contents[v])
+        if yaml_contents[v]:
+            return cast(Dict[str, Any], yaml_contents[v])
+        # If the file only contains the "variable:" prefix, but nothing below, then yaml_contents becomes None
+        return dict()
    else:
        raise KeyError(f"The Yaml file must contain a section '{v}', but that was not found in {yaml_file}")
--- a/InnerEye/Common/common_util.py
+++ b/InnerEye/Common/common_util.py
@ -165,8 +165,7 @@ def any_pairwise_larger(items1: Any, items2: Any) -> bool:
    The two lists must have the same length.
    """
    if len(items1) != len(items2):
-        raise ValueError("Arguments must have the same length. len(items1): {}, len(items2): {}"
-                         .format(len(items1), len(items2)))
+        raise ValueError(f"Arguments must have the same length. len(items1): {len(items1)}, len(items2): {len(items2)}")
    for i in range(len(items1)):
        if items1[i] > items2[i]:
            return True
--- a/InnerEye/ML/config.py
+++ b/InnerEye/ML/config.py
@ -6,6 +6,7 @@ from __future__ import annotations

 from dataclasses import dataclass
 from enum import Enum, unique
+from math import isclose
 from typing import Any, Dict, List, Optional, Sequence, Tuple, Union

 import numpy as np
@ -13,7 +14,6 @@ import pandas as pd
 import param
 from azureml.train.estimator import Estimator
 from azureml.train.hyperdrive import HyperDriveConfig
-from math import isclose
 from pandas import DataFrame

 from InnerEye.Common.common_util import any_pairwise_larger, any_smaller_or_equal_than, check_is_any_of
@ -445,6 +445,11 @@ class SegmentationModelBase(ModelConfigBase):
    is_plotting_enabled: bool = param.Boolean(True, doc="If true, various overview plots with results are generated "
                                                        "during model evaluation. Set to False if you see "
                                                        "non-deterministic pull request build failures.")
+    show_patch_sampling: int = param.Integer(10, bounds=(0, None),
+                                             doc="Number of patients from the training set for which the effect of"
+                                                 "patch sampling will be shown. Nifti images and thumbnails for each"
+                                                 "of the first N subjects in the training set will be "
+                                                 "written to the outputs folder.")

    def __init__(self, center_size: Optional[TupleInt3] = None,
                 inference_stride_size: Optional[TupleInt3] = None,
--- a/InnerEye/ML/dataset/cropping_dataset.py
+++ b/InnerEye/ML/dataset/cropping_dataset.py
@ -96,7 +96,7 @@ class CroppingDataset(FullImageDataset):
        :return: CroppedSample
        """
        # crop the original raw sample
-        sample, center_point = augmentation.random_crop(
+        sample, center_point, _ = augmentation.random_crop(
            sample=sample,
            crop_size=crop_size,
            class_weights=class_weights
--- a/InnerEye/ML/model_training.py
+++ b/InnerEye/ML/model_training.py
@ -32,6 +32,7 @@ from InnerEye.ML.utils.ml_util import RandomStateSnapshot
 from InnerEye.ML.utils.model_util import ModelAndInfo, generate_and_print_model_summary
 from InnerEye.ML.utils.run_recovery import RunRecovery, get_recovery_path_train
 from InnerEye.ML.utils.training_util import ModelOutputsAndMetricsForEpoch, ModelTrainingResults
+from InnerEye.ML.visualizers.patch_sampling import visualize_random_crops_for_dataset

 MAX_ITEM_LOAD_TIME_SEC = 0.5
 MAX_LOAD_TIME_WARNINGS = 3
@ -53,6 +54,10 @@ def model_train(config: ModelConfigBase, run_recovery: Optional[RunRecovery] = N
    config.write_dataset_files()

    # set the random seed for all libraries
+    ml_util.set_random_seed(config.get_effective_random_seed(), "Patch visualization")
+    # Visualize how patches are sampled for segmentation models. This changes the random generator, but we don't
+    # want training to depend on how many patients we visualized, and hence set the random seed again right after.
+    visualize_random_crops_for_dataset(config)
    ml_util.set_random_seed(config.get_effective_random_seed(), "Model Training")

    logging.debug("Creating the PyTorch model.")
@ -85,8 +90,9 @@ def model_train(config: ModelConfigBase, run_recovery: Optional[RunRecovery] = N
    if config.compute_mean_teacher_model:
        mean_teacher_model_loaded = models_and_optimizer.try_create_mean_teacher_model_load_from_checkpoint_and_adjust()
        if not mean_teacher_model_loaded:
-            raise ValueError("There was no checkpoint file available for the mean teacher model for given start_epoch {}"
-                             .format(config.start_epoch))
+            raise ValueError(
+                "There was no checkpoint file available for the mean teacher model for given start_epoch {}"
+                .format(config.start_epoch))

    # Create optimizer
    optimizer_loaded = models_and_optimizer.try_create_optimizer_and_load_from_checkpoint()
--- a/InnerEye/ML/plotting.py
+++ b/InnerEye/ML/plotting.py
@ -10,7 +10,7 @@ import numpy as np
 from matplotlib import colors
 from matplotlib.pyplot import Axes

-from InnerEye.Common.type_annotations import PathOrString, TupleFloat2
+from InnerEye.Common.type_annotations import PathOrString, TupleFloat2, TupleFloat3
 from InnerEye.ML.dataset.full_image_dataset import Sample
 from InnerEye.ML.photometric_normalization import PhotometricNormalization
 from InnerEye.ML.utils import plotting_util
@ -88,17 +88,17 @@ def add_legend(series_count: int) -> None:
    plt.legend(ncol=num_columns, loc="upper left", fontsize="x-small")


-def resize_and_save(width_inch: int, height_inch: int, filename: PathOrString, dpi: Optional[int] = 150) -> None:
+def resize_and_save(width_inch: int, height_inch: int, filename: PathOrString, dpi: int = 150) -> None:
    """
    Resizes the present figure to the given (width, height) in inches, and saves it to the given filename.
    :param width_inch: The width of the figure in inches.
    :param height_inch: The height of the figure in inches.
    :param filename: The filename to save to.
-    :param dpi: Image resolution dots per inch
+    :param dpi: Image resolution in dots per inch
    """
    fig = plt.gcf()
    fig.set_size_inches(width_inch, height_inch)
-    plt.savefig(filename, dpi=dpi)
+    plt.savefig(filename, dpi=dpi, bbox_inches='tight', pad_inches=0.1)


 def plot_image_and_label_contour(image: np.ndarray,
@ -375,9 +375,10 @@ def segmentation_and_groundtruth_plot(prediction: np.ndarray, ground_truth: np.n
        fig.show()


-def sds_ground_truth_plot(ct: np.ndarray, ground_truth: np.ndarray, sds_full: np.ndarray, subject_id: int,
-                          structure: str, plane: Plane, output_img_dir: Path, dice: float = None, save_fig: bool = True,
-                          annotator: str = None) -> None:
+def surface_distance_ground_truth_plot(ct: np.ndarray, ground_truth: np.ndarray, sds_full: np.ndarray, subject_id: int,
+                                       structure: str, plane: Plane, output_img_dir: Path, dice: float = None,
+                                       save_fig: bool = True,
+                                       annotator: str = None) -> None:
    """
    Plot surface distances where prediction > 0, with ground truth contour
    :param ct: CT scan
@ -450,3 +451,51 @@ def sds_ground_truth_plot(ct: np.ndarray, ground_truth: np.ndarray, sds_full: np
        resize_and_save(5, 5, figpath)
    else:
        fig.show()
+
+
+def scan_with_transparent_overlay(scan: np.ndarray,
+                                  overlay: np.ndarray,
+                                  dimension: int,
+                                  position: int,
+                                  spacing: TupleFloat3) -> None:
+    """
+    Creates a plot with one slice of a (CT) scan, with a transparent overlay that contains a second piece of
+    information in the range [0, 1]. High values of the `overlay` are shown as opaque red, low values as transparent
+    red.
+    Plots are created in the current axis.
+    :param scan: A 3-dimensional image in (Z, Y, X) ordering
+    :param overlay: A 3-dimensional image in (Z, Y, X) ordering, with values between 0 and 1.
+    :param dimension: The array dimension along with the plot should be created. dimension=0 will generate
+    an axial slice.
+    :param position: The index in the chosen dimension where the plot should be created.
+    :param spacing: The tuple of voxel spacings, in (Z, Y, X) order.
+    """
+    if dimension < 0 or dimension > 2:
+        raise ValueError(f"Dimension must be in the range [0, 2], but got: {dimension}")
+    if position < 0 or position >= scan.shape[dimension]:
+        raise IndexError(f"Position is outside valid range: {position}")
+    slicers = []
+    for i in range(0, 3):
+        if i == dimension:
+            slicers.append(slice(position, position + 1))
+        else:
+            slicers.append(slice(0, scan.shape[i]))
+    # Slice both the scan and the overlay
+    scan_sliced = scan[slicers[0], slicers[1], slicers[2]].squeeze(axis=dimension)
+    overlay_sliced = overlay[slicers[0], slicers[1], slicers[2]].squeeze(axis=dimension)
+    ax = plt.gca()
+    # Account for non-square pixel sizes. Spacing usually comes from Nifti headers.
+    if dimension == 0:
+        aspect = spacing[1] / spacing[2]
+    elif dimension == 1:
+        aspect = spacing[0] / spacing[2]
+    else:
+        aspect = spacing[0] / spacing[1]
+    # This ensures that the coronal and sagittal plot are showing with the head up. For the axial plot (dimension == 0)
+    # the default setting of imshow with origin 'upper' is OK.
+    origin = 'upper' if dimension == 0 else 'lower'
+    ax.imshow(scan_sliced, vmin=np.min(scan), vmax=np.max(scan), cmap='Greys_r', aspect=aspect, origin=origin)
+    red = np.ones_like(overlay_sliced)
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.imshow(red, vmin=0, vmax=1, cmap='Reds', alpha=overlay_sliced, aspect=aspect, origin=origin)
--- a/InnerEye/ML/runner.py
+++ b/InnerEye/ML/runner.py
@ -193,7 +193,7 @@ class Runner:
        plot_crossval_config = crossval_config_from_model_config(self.model_config)
        plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[RUN_RECOVERY_ID_KEY_NAME]
        plot_crossval_config.outputs_directory = str(self.model_config.outputs_folder)
-        plot_crossval_config.settings_yaml_file = str(self.yaml_config_file)
+        plot_crossval_config.settings_yaml_file = self.yaml_config_file
        cross_val_results_root = plot_cross_validation(plot_crossval_config)
        if self.post_cross_validation_hook:
            self.post_cross_validation_hook(self.model_config, cross_val_results_root)
--- a/InnerEye/ML/surface_distance_heatmaps.py
+++ b/InnerEye/ML/surface_distance_heatmaps.py
@ -14,7 +14,7 @@ from InnerEye.Azure.azure_runner import create_runner_parser, parse_args_and_add
 from InnerEye.Azure.azure_util import download_outputs_from_run
 from InnerEye.ML.common import ModelExecutionMode
 from InnerEye.ML.config import SegmentationModelBase
-from InnerEye.ML.plotting import sds_ground_truth_plot, segmentation_and_groundtruth_plot
+from InnerEye.ML.plotting import surface_distance_ground_truth_plot, segmentation_and_groundtruth_plot
 from InnerEye.ML.utils import surface_distance_utils as sd_util
 from InnerEye.ML.utils.config_util import ModelConfigLoader
 from InnerEye.ML.utils.csv_util import get_worst_performing_outliers, load_csv
@ -162,8 +162,8 @@ def main() -> None:

        # Calculate and plot surface distance
        sds_full = sd_util.calculate_surface_distances(ground_truth, binary_prediction_mask, list(voxel_spacing))
-        sds_ground_truth_plot(ct, ground_truth, sds_full, subject_id, structure_name, plane, output_img_dir,
-                              annotator=annotator)
+        surface_distance_ground_truth_plot(ct, ground_truth, sds_full, subject_id, structure_name, plane, output_img_dir,
+                                           annotator=annotator)

        if annotator is not None:
            sds_for_annotator[annotator] += sds_full
@ -173,8 +173,8 @@ def main() -> None:
        for annotator, sds in sds_for_annotator.items():
            num_classes = int(np.amax(np.unique(overall_gold_standard)))
            binarised_gold_standard = multi_label_array_to_binary(overall_gold_standard, num_classes)[1:].sum(axis=0)
-            sds_ground_truth_plot(ct, binarised_gold_standard, sds, subject_id, 'All', plane, output_img_dir,
-                                  annotator=annotator)
+            surface_distance_ground_truth_plot(ct, binarised_gold_standard, sds, subject_id, 'All', plane, output_img_dir,
+                                               annotator=annotator)


 if __name__ == "__main__":
--- a/InnerEye/ML/utils/augmentation.py
+++ b/InnerEye/ML/utils/augmentation.py
@ -65,7 +65,7 @@ def random_select_patch_center(sample: Sample, class_weights: List[float] = None

 def random_crop(sample: Sample,
                crop_size: TupleInt3,
-                class_weights: List[float] = None) -> Tuple[Sample, np.ndarray]:
+                class_weights: List[float] = None) -> Tuple[Sample, np.ndarray, List[slice]]:
    """
    Randomly crops images, mask, and labels arrays according to the crop_size argument.
    The selection of the center is dependant on background probability.
@ -76,7 +76,8 @@ def random_crop(sample: Sample,
    :param class_weights: A weighting vector with values [0, 1] to influence the class the center crop
                          voxel belongs to (must sum to 1), uniform distribution assumed if none provided.
    :return: Tuple item 1: The cropped images, labels, and mask. Tuple item 2: The center that was chosen for the crop,
-    before shifting to be inside of the image.
+    before shifting to be inside of the image. Tuple item 3: The slicers that convert the input image to the chosen
+    crop.
    :raises TypeError: If any of the arguments are of the wrong type.
    :raises ValueError: If there are shape mismatches among the arguments or if the crop size is larger than the image.
    """
@ -119,7 +120,7 @@ def random_crop(sample: Sample,
        mask=mask_cropped,
        metadata=sample.metadata
    )
-    return sample, center
+    return sample, center, slicers


 class ImageTransformationBase(Transform3D):
--- a/InnerEye/ML/utils/image_util.py
+++ b/InnerEye/ML/utils/image_util.py
@ -44,6 +44,17 @@ class ImageHeader:
        common_util.check_properties_are_not_none(self)


+def get_unit_image_header(spacing: Optional[TupleFloat3] = None) -> ImageHeader:
+    """
+    Creates an ImageHeader object with the origin at 0, and unit direction. The spacing is set to the argument,
+    defaulting to (1, 1, 1) if not provided.
+    :param spacing: The image spacing, as a (Z, Y, X) tuple.
+    """
+    if not spacing:
+        spacing = (1, 1, 1)
+    return ImageHeader(origin=(0, 0, 0), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacing)
+
+
 class ImageDataType(Enum):
    """
    Data type for medical image data (e.g. masks and labels)
--- a/InnerEye/ML/visualizers/patch_sampling.py
+++ b/InnerEye/ML/visualizers/patch_sampling.py
@ -2,18 +2,28 @@
 #  Copyright (c) Microsoft Corporation. All rights reserved.
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
+import logging
 from pathlib import Path
+from typing import Optional

+import matplotlib.pyplot as plt
 import numpy as np
 import param

 from InnerEye.Common.generic_parsing import GenericConfig
-from InnerEye.Common.type_annotations import TupleInt3
 from InnerEye.ML.config import SegmentationModelBase
 from InnerEye.ML.dataset.cropping_dataset import CroppingDataset
 from InnerEye.ML.dataset.full_image_dataset import FullImageDataset
+from InnerEye.ML.dataset.sample import Sample
+from InnerEye.ML.deep_learning_config import DeepLearningConfig
+from InnerEye.ML.plotting import resize_and_save, scan_with_transparent_overlay
 from InnerEye.ML.utils import augmentation, io_util, ml_util
 from InnerEye.ML.utils.config_util import ModelConfigLoader
+# The name of the folder inside the default outputs folder that will holds plots that show the effect of
+# sampling random patches
+from InnerEye.ML.utils.image_util import get_unit_image_header
+
+PATCH_SAMPLING_FOLDER = "patch_sampling"


 class CheckPatchSamplingConfig(GenericConfig):
@ -22,27 +32,102 @@ class CheckPatchSamplingConfig(GenericConfig):
    """
    model_name: str = param.String("Lung", doc="InnerEye model name e.g. Lung")
    local_dataset: str = param.String(None, doc="Path to the local dataset (e.g. dataset folder name)")
-    output_folder: str = param.String("patch_sampling_visualisations",
-                                      doc="Output folder where heatmaps and sampled images are saved")
+    output_folder: Path = param.ClassSelector(class_=Path, default=Path("patch_sampling_visualisations"),
+                                              doc="Output folder where heatmaps and sampled images are saved")
    number_samples: int = param.Number(10, bounds=(1, None), doc="Number of images sampled")
-    number_crop_iterations: int = param.Number(100, bounds=(1, None), doc="Number of images sampled")


-def create_mask_for_patch(output_shape: np.ndarray.shape,
-                          output_dtype: np.ndarray.dtype,
-                          center: np.ndarray,
-                          crop_size: TupleInt3) -> np.ndarray:
-    # Create an empty array with zeros
-    mask = np.zeros(output_shape, dtype=output_dtype)
+def visualize_random_crops(sample: Sample,
+                           config: SegmentationModelBase,
+                           output_folder: Path) -> np.ndarray:
+    """
+    Simulate the effect of sampling random crops (as is done for trainig segmentation models), and store the results
+    as a Nifti heatmap and as 3 axial/sagittal/coronal slices. The heatmap and the slices are stored in the given
+    output folder, with filenames that contain the patient ID as the prefix.
+    :param sample: The patient information from the dataset, with scans and ground truth labels.
+    :param config: The model configuration.
+    :param output_folder: The folder into which the heatmap and thumbnails should be written.
+    :return: A numpy array that has the same size as the image, containing how often each voxel was contained in
+    """
+    output_folder.mkdir(exist_ok=True, parents=True)
+    sample = CroppingDataset.create_possibly_padded_sample_for_cropping(
+        sample=sample,
+        crop_size=config.crop_size,
+        padding_mode=config.padding_mode)
+    print(f"Processing sample: {sample.patient_id}")

-    # Define the slicers for the images and labels
-    slicers = [slice(center[i] - int(crop_size[i] / 2),
-                     center[i] - int(crop_size[i] / 2) + int(crop_size[i])) for i in range(0, 3)]
+    # Exhaustively sample with random crop function
+    image_channel0 = sample.image[0]
+    heatmap = np.zeros(image_channel0.shape, dtype=np.uint16)
+    # Number of repeats should fit into the range of UInt16, because we will later save the heatmap as an integer
+    # Nifti file of that datatype.
+    repeats = 1000
+    for _ in range(repeats):
+        _, _, slicers = augmentation.random_crop(sample=sample,
+                                                 crop_size=config.crop_size,
+                                                 class_weights=config.class_weights)
+        heatmap[slicers[0], slicers[1], slicers[2]] += 1
+    is_3dim = heatmap.shape[0] > 1
+    header = sample.metadata.image_header
+    if not header:
+        logging.warning(f"No image header found for patient {sample.patient_id}. Using default header.")
+        header = get_unit_image_header()
+    if is_3dim:
+        ct_output_name = str(output_folder / f"{sample.patient_id}_ct.nii.gz")
+        heatmap_output_name = str(output_folder / f"{sample.patient_id}_sampled_patches.nii.gz")
+        io_util.store_as_nifti(image=heatmap,
+                               header=header,
+                               file_name=heatmap_output_name,
+                               image_type=heatmap.dtype,
+                               scale=False)
+        io_util.store_as_nifti(image=image_channel0,
+                               header=header,
+                               file_name=ct_output_name,
+                               image_type=sample.image.dtype,
+                               scale=False)
+    heatmap_scaled = heatmap.astype(dtype=np.float) / heatmap.max()
+    # If the incoming image is effectively a 2D image with degenerate Z dimension, then only plot a single
+    # axial thumbnail. Otherwise, plot thumbnails for all 3 dimensions.
+    dimensions = list(range(3)) if is_3dim else [0]
+    # Center the 3 thumbnails at one of the points where the heatmap attains a maximum. This should ensure that
+    # the thumbnails are in an area where many of the organs of interest are located.
+    max_heatmap_index = np.unravel_index(heatmap.argmax(), heatmap.shape) if is_3dim else (0, 0, 0)
+    for dimension in dimensions:
+        plt.clf()
+        scan_with_transparent_overlay(scan=image_channel0,
+                                      overlay=heatmap_scaled,
+                                      dimension=dimension,
+                                      position=max_heatmap_index[dimension] if is_3dim else 0,
+                                      spacing=header.spacing)
+        # Construct a filename that has a dimension suffix if we are generating 3 of them. For 2dim images, skip
+        # the suffix.
+        thumbnail = f"{sample.patient_id}_sampled_patches"
+        if is_3dim:
+            thumbnail += f"_dim{dimension}"
+        thumbnail += ".png"
+        resize_and_save(width_inch=5, height_inch=5, filename=output_folder / thumbnail)
+    return heatmap

-    # Crop the tensors
-    mask[slicers[0], slicers[1], slicers[2]] = 1

-    return mask
+def visualize_random_crops_for_dataset(config: DeepLearningConfig,
+                                       output_folder: Optional[Path] = None) -> None:
+    """
+    For segmentation models only: This function generates visualizations of the effect of sampling random patches
+    for training. Visualizations are stored in both Nifti format, and as 3 PNG thumbnail files, in the output folder.
+    :param config: The model configuration.
+    :param output_folder: The folder in which the visualizations should be written. If not provided, use a subfolder
+    "patch_sampling" in the models's default output folder
+    """
+    if not isinstance(config, SegmentationModelBase):
+        return
+    dataset_splits = config.get_dataset_splits()
+    # Load a sample using the full image data loader
+    full_image_dataset = FullImageDataset(config, dataset_splits.train)
+    output_folder = output_folder or config.outputs_folder / PATCH_SAMPLING_FOLDER
+    count = min(config.show_patch_sampling, len(full_image_dataset))
+    for sample_index in range(count):
+        sample = full_image_dataset.get_samples_at_index(index=sample_index)[0]
+        visualize_random_crops(sample, config, output_folder=output_folder)


 def main(args: CheckPatchSamplingConfig) -> None:
@ -57,49 +142,9 @@ def main(args: CheckPatchSamplingConfig) -> None:
    # Create a config file
    config = ModelConfigLoader[SegmentationModelBase]().create_model_config_from_name(
        args.model_name, overrides=commandline_args)
-
-    # Set a random seed
+    config.show_patch_sampling = args.number_samples
    ml_util.set_random_seed(config.random_seed)
-
-    # Get a dataloader object that checks csv
-    dataset_splits = config.get_dataset_splits()
-
-    # Load a sample using the full image data loader
-    full_image_dataset = FullImageDataset(config, dataset_splits.train)
-
-    for sample_index in range(args.number_samples):
-        sample = CroppingDataset.create_possibly_padded_sample_for_cropping(
-            sample=full_image_dataset.get_samples_at_index(index=sample_index)[0],
-            crop_size=config.crop_size,
-            padding_mode=config.padding_mode)
-        print("Processing sample: ", sample.patient_id)
-
-        # Exhaustively sample with random crop function
-        heatmap = np.zeros(sample.mask.shape, dtype=np.uint16)
-        for _ in range(args.number_crop_iterations):
-            cropped_sample, center_point = augmentation.random_crop(sample=sample,
-                                                                    crop_size=config.crop_size,
-                                                                    class_weights=config.class_weights)
-            patch_mask = create_mask_for_patch(output_shape=heatmap.shape,
-                                               output_dtype=heatmap.dtype,
-                                               center=center_point,
-                                               crop_size=config.crop_size)
-            heatmap += patch_mask
-
-        ct_output_name = str(output_folder / "{}_ct.nii.gz".format(int(sample.patient_id)))
-        heatmap_output_name = str(output_folder / "{}_sampled_patches.nii.gz".format(int(sample.patient_id)))
-        if not sample.metadata.image_header:
-            raise ValueError("None header expected some header")
-        io_util.store_as_nifti(image=heatmap,
-                               header=sample.metadata.image_header,
-                               file_name=heatmap_output_name,
-                               image_type=heatmap.dtype,
-                               scale=False)
-        io_util.store_as_nifti(image=sample.image[0],
-                               header=sample.metadata.image_header,
-                               file_name=ct_output_name,
-                               image_type=sample.image.dtype,
-                               scale=False)
+    visualize_random_crops_for_dataset(config, output_folder=output_folder)


 if __name__ == "__main__":
--- a/InnerEye/ML/visualizers/plot_cross_validation.py
+++ b/InnerEye/ML/visualizers/plot_cross_validation.py
@ -117,9 +117,12 @@ class PlotCrossValidationConfig(GenericConfig):
    ignore_subjects: List[int] = param.List(None, class_=int, bounds=(1, None), allow_None=True, instantiate=False,
                                            doc="List of the subject ids to ignore from the results")
    is_zero_index: bool = param.Boolean(True, doc="If True, start cross validation split indices from 0 otherwise 1")
-    settings_yaml_file: str = param.String(default=str(fixed_paths.SETTINGS_YAML_FILE),
-                                           doc="Path to settings.yml file containing the Azure configuration "
-                                               "for the workspace")
+    settings_yaml_file: Path = param.ClassSelector(class_=Path, default=fixed_paths.SETTINGS_YAML_FILE,
+                                                   doc="Path to settings.yml file containing the Azure configuration "
+                                                       "for the workspace")
+    project_root: Path = param.ClassSelector(class_=Path, default=fixed_paths.repository_root_directory(),
+                                             doc="The root folder of the repository that starts the run. Used to "
+                                                 "read a private settings file.")
    _azure_config: Optional[AzureConfig] = \
        param.ClassSelector(class_=AzureConfig, allow_None=True,
                            doc="Azure-related options created from YAML file.")
@ -190,7 +193,7 @@ class PlotCrossValidationConfig(GenericConfig):
        :return:
        """
        if self._azure_config is None:
-            self._azure_config = AzureConfig.from_yaml(Path(self.settings_yaml_file), project_root=None)
+            self._azure_config = AzureConfig.from_yaml(self.settings_yaml_file, project_root=self.project_root)
        return self._azure_config

    def download_or_get_local_file(self,
--- a/README.md
+++ b/README.md
@ -75,6 +75,8 @@ Assuming that your current directory is the repository root folder, on Linux `ba
 export PYTHONPATH=`pwd`
 python InnerEye/ML/runner.py --model=HelloWorld
 ```
+(Note the "backtick" around the `pwd` command, this is not a standard single quote!)
+
 On Windows:
 ```shell script
 set PYTHONPATH=%cd%
@ -91,6 +93,7 @@ Detailed instructions, including setup in Azure, are here:
 1. [Building models in Azure ML](docs/building_models.md)
 1. [Sample Segmentation and Classification tasks](docs/sample_tasks.md)
 1. [Debugging and monitoring models](docs/debugging_and_monitoring.md)
+1. [Model diagnostics](docs/model_diagnostics.md)

 ## More information

--- a/Tests/Common/test_commandline_parsing.py
+++ b/Tests/Common/test_commandline_parsing.py
@ -110,12 +110,14 @@ def test_read_yaml_file_into_args(test_output_dirs: TestOutputDirectories) -> No
                         yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
        runner1.parse_and_load_model()
        assert len(runner1.azure_config.application_id) > 0
-        # When specifying a dummy YAML file that does not contain the application ID, it should not
-        # be set.
+        assert len(runner1.azure_config.resource_group) > 0
+        # When specifying a dummy YAML file that does not contain any settings, no information in AzureConfig should
+        # be set. Some settings are read from a private settings file, most notably application ID, which should
+        # be present on people's local dev boxes. Hence, only assert on `resource_group` here.
        runner2 = Runner(project_root=fixed_paths.repository_root_directory(),
                         yaml_config_file=empty_yaml)
        runner2.parse_and_load_model()
-        assert runner2.azure_config.application_id == ""
+        assert runner2.azure_config.resource_group == ""


 def test_parsing_with_custom_yaml(test_output_dirs: TestOutputDirectories) -> None:
--- a/Tests/ML/test_data/042_slice_001.png
+++ b/Tests/ML/test_data/042_slice_001.png
--- a/Tests/ML/test_data/042_slice_001_contour.png
+++ b/Tests/ML/test_data/042_slice_001_contour.png
--- a/Tests/ML/test_data/dice_per_epoch_15classes.png
+++ b/Tests/ML/test_data/dice_per_epoch_15classes.png
--- a/Tests/ML/test_data/dice_per_epoch_3classes.png
+++ b/Tests/ML/test_data/dice_per_epoch_3classes.png
--- a/Tests/ML/test_data/image_and_contour.png
+++ b/Tests/ML/test_data/image_and_contour.png
--- a/Tests/ML/test_data/image_and_multiple_contours.png
+++ b/Tests/ML/test_data/image_and_multiple_contours.png
--- a/Tests/ML/test_data/image_scaled_and_contour.png
+++ b/Tests/ML/test_data/image_scaled_and_contour.png
--- a/Tests/ML/test_data/patch_sampling/123_sampled_patches_dim0.png
+++ b/Tests/ML/test_data/patch_sampling/123_sampled_patches_dim0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d548c8df54244f090e6b15f110b466306b203bc125f9be6a03aac2ee36d50936
+size 9484
--- a/Tests/ML/test_data/patch_sampling/123_sampled_patches_dim1.png
+++ b/Tests/ML/test_data/patch_sampling/123_sampled_patches_dim1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97e2c80ffb6c0777d9e06262280687bdb18a079c3c489c65aca4be2b3f14fa68
+size 3820
--- a/Tests/ML/test_data/patch_sampling/123_sampled_patches_dim2.png
+++ b/Tests/ML/test_data/patch_sampling/123_sampled_patches_dim2.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bb75449e6e0ac97ce70ec89318b8327cbfc4754f49580b49a48a87e321686ab
+size 3756
--- a/Tests/ML/test_data/patch_sampling/overlay_0.png
+++ b/Tests/ML/test_data/patch_sampling/overlay_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f91a769bbce02bd6d636b163df9f623e2ef2a09a04b69e4a88557419e6cc50b7
+size 9252
--- a/Tests/ML/test_data/patch_sampling/overlay_1.png
+++ b/Tests/ML/test_data/patch_sampling/overlay_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c982ca7892b08e73c231d9c2c8137f312355ae8d3e99ba096f705260a3b8b0c
+size 3752
--- a/Tests/ML/test_data/patch_sampling/overlay_2.png
+++ b/Tests/ML/test_data/patch_sampling/overlay_2.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fa353210ce0756a0cf6b8a99244ab676335acd28b5ce5f6c55e8b5bfeb60b5d
+size 3887
--- a/Tests/ML/test_data/patch_sampling/overlay_with_aspect_dim0.png
+++ b/Tests/ML/test_data/patch_sampling/overlay_with_aspect_dim0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3eac7b6a239f91243b00aa2b90ae3c0396cadd8128104c47eda95ec58eb257a9
+size 27756
--- a/Tests/ML/test_data/patch_sampling/overlay_with_aspect_dim1.png
+++ b/Tests/ML/test_data/patch_sampling/overlay_with_aspect_dim1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3855d6aacb25a8d3c35922cd8543344d59586d8b64db26ae250571656ba4cd43
+size 15490
--- a/Tests/ML/test_data/patch_sampling/overlay_with_aspect_dim2.png
+++ b/Tests/ML/test_data/patch_sampling/overlay_with_aspect_dim2.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4051ab32986bb78bb3239f0273b5fd034310206d10f652accf75419741dd9055
+size 13173
--- a/Tests/ML/test_data/patch_sampling/sampled_center.nii.gz
+++ b/Tests/ML/test_data/patch_sampling/sampled_center.nii.gz
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18754b38e55c09a7cd10bb9bd46c31a90797a8466a2e4aac72705e26b62becc6
+size 4992
--- a/Tests/ML/test_data/patch_sampling/sampled_center.npy
+++ b/Tests/ML/test_data/patch_sampling/sampled_center.npy
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97f514d7005e4483ed7d7f06cbeab1954e4f0045aa6d88173ac62465fb4ccd0a
+size 18128
--- a/Tests/ML/test_data/patch_sampling/sampled_to_boundary.nii.gz
+++ b/Tests/ML/test_data/patch_sampling/sampled_to_boundary.nii.gz
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:672fd010b79c359e5e60248424d08cde7622576f6b79163816d77e46c0d6cefe
+size 6440
--- a/Tests/ML/test_data/patch_sampling/sampled_to_boundary.npy
+++ b/Tests/ML/test_data/patch_sampling/sampled_to_boundary.npy
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c34791cc48c387272a7545d0f4cfbf23d9cc6daa68c53e5143661ea42c88a9d
+size 18128
--- a/Tests/ML/test_data/patch_sampling/sampling_2d.npy
+++ b/Tests/ML/test_data/patch_sampling/sampling_2d.npy
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75c88cac934f3b6c27fa6aeab3ac0c3a062b74ad84585f03881a366067bf4c53
+size 1328
--- a/Tests/ML/test_data/patch_sampling/sampling_2d.png
+++ b/Tests/ML/test_data/patch_sampling/sampling_2d.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0adbde8f48bcd373f09ef73f606e2e479be73e4128374ecaf84d28b8cd75dc2f
+size 6715
--- a/Tests/ML/test_data/patch_sampling/scan_small.nii.gz
+++ b/Tests/ML/test_data/patch_sampling/scan_small.nii.gz
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ae3c1d9e6d2b3c371b1edc4608795b0df15171be094480dccf78ae93b34a306
+size 1146920
--- a/Tests/ML/test_data/prefix042_class1_slice_001.png
+++ b/Tests/ML/test_data/prefix042_class1_slice_001.png
--- a/Tests/ML/test_data/prefix042_class2_slice_002.png
+++ b/Tests/ML/test_data/prefix042_class2_slice_002.png
--- a/Tests/ML/test_model_testing.py
+++ b/Tests/ML/test_model_testing.py
@ -27,7 +27,7 @@ from InnerEye.ML.utils.run_recovery import RunRecovery
 from InnerEye.ML.visualizers.plot_cross_validation import get_config_and_results_for_offline_runs
 from Tests.ML.configs.ClassificationModelForTesting import ClassificationModelForTesting
 from Tests.ML.configs.DummyModel import DummyModel
-from Tests.ML.util import assert_file_contents, assert_file_contents_match_exactly, assert_nifti_content, \
+from Tests.ML.util import assert_file_contains_string, assert_text_files_match, assert_nifti_content, \
    get_image_shape
 from Tests.fixed_paths_for_tests import full_ml_test_data_path

@ -62,12 +62,12 @@ def test_model_test(test_output_dirs: TestOutputDirectories) -> None:
    patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz")
    patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz")

-    assert_file_contents(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id)
-    assert_file_contents(epoch_dir / GROUND_TRUTH_IDS_FILE, "region")
-    assert_file_contents_match_exactly(epoch_dir / model_testing.METRICS_FILE_NAME,
-                                       Path(train_and_test_data_dir) / model_testing.METRICS_FILE_NAME)
-    assert_file_contents_match_exactly(epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
-                                       Path(train_and_test_data_dir) / model_testing.METRICS_AGGREGATES_FILE)
+    assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id)
+    assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region")
+    assert_text_files_match(epoch_dir / model_testing.METRICS_FILE_NAME,
+                            Path(train_and_test_data_dir) / model_testing.METRICS_FILE_NAME)
+    assert_text_files_match(epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
+                            Path(train_and_test_data_dir) / model_testing.METRICS_AGGREGATES_FILE)
    # Plotting results vary between platforms. Can only check if the file is generated, but not its contents.
    assert (epoch_dir / model_testing.BOXPLOT_FILE).exists()

--- a/Tests/ML/test_model_training.py
+++ b/Tests/ML/test_model_training.py
@ -24,8 +24,9 @@ from InnerEye.ML.model_training_steps import ModelTrainingStepsForSegmentation
 from InnerEye.ML.models.losses.mixture import MixtureLoss
 from InnerEye.ML.sequence_config import SequenceModelBase
 from InnerEye.ML.utils.training_util import ModelTrainingResults
+from InnerEye.ML.visualizers.patch_sampling import PATCH_SAMPLING_FOLDER
 from Tests.ML.configs.DummyModel import DummyModel
-from Tests.ML.util import assert_file_contents
+from Tests.ML.util import assert_file_contains_string
 from Tests.fixed_paths_for_tests import full_ml_test_data_path

 config_path = full_ml_test_data_path()
@ -188,12 +189,16 @@ def _test_model_train(output_dirs: TestOutputDirectories,
    assert (train_config.outputs_folder / DATASET_CSV_FILE_NAME).is_file()
    assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.TRAIN]).is_file()
    assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.VAL]).is_file()
-    assert_file_contents(train_config.outputs_folder / TRAIN_STATS_FILE, expected_stats)
+    assert_file_contains_string(train_config.outputs_folder / TRAIN_STATS_FILE, expected_stats)

    # Test for saving of example images
    assert os.path.isdir(train_config.example_images_folder)
    example_files = os.listdir(train_config.example_images_folder)
    assert len(example_files) == 3 * 2
+    # Path visualization: There should be 3 slices for each of the 2 subjects
+    sampling_folder = train_config.outputs_folder / PATCH_SAMPLING_FOLDER
+    assert sampling_folder.is_dir()
+    assert len(list(sampling_folder.rglob("*.png"))) == 3 * 2


@pytest.mark.parametrize(["rates", "expected"],
--- a/Tests/ML/test_outputs.py
+++ b/Tests/ML/test_outputs.py
@ -20,12 +20,13 @@ from InnerEye.ML.pipelines.inference import InferencePipeline
 from InnerEye.ML.plotting import resize_and_save
 from InnerEye.ML.reports.segmentation_report import boxplot_per_structure
 from InnerEye.ML.utils import io_util
+from InnerEye.ML.utils.image_util import get_unit_image_header
 from InnerEye.ML.utils.io_util import ImageHeader
 from InnerEye.ML.utils.metrics_constants import MetricsFileColumns
 from InnerEye.ML.utils.metrics_util import MetricsPerPatientWriter
 from InnerEye.ML.utils.transforms import LinearTransform, get_range_for_window_level
 from Tests.ML.configs.DummyModel import DummyModel
-from Tests.ML.util import assert_file_contents, assert_file_contents_match_exactly, assert_nifti_content
+from Tests.ML.util import assert_file_contains_string, assert_text_files_match, assert_nifti_content
 from Tests.fixed_paths_for_tests import full_ml_test_data_path

 model_name = "Basic"
@ -64,7 +65,7 @@ def test_store_inference_results(test_output_dirs: TestOutputDirectories) -> Non
    posterior0 = to_unique_bytes(posterior[0], (0, 1))
    posterior1 = to_unique_bytes(posterior[1], (0, 1))
    spacing = (2.0, 2.0, 2.0)
-    header = ImageHeader(origin=(0, 0, 0), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacing)
+    header = get_unit_image_header(spacing=spacing)
    inference_result = InferencePipeline.Result(
        epoch=1,
        patient_id=12,
@ -125,7 +126,7 @@ def test_metrics_file(test_output_dirs: TestOutputDirectories) -> None:
    metrics_file = new_file("metrics_file.csv")
    d.to_csv(Path(metrics_file))
    # Sorting should be first by structure name alphabetically, then Dice with lowest scores first.
-    assert_file_contents(metrics_file, "Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm\n"
+    assert_file_contains_string(metrics_file, "Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm\n"
                                       "Patient3,kidney,0.400,1.000,0.100\n"
                                       "Patient2,kidney,0.700,1.000,0.200\n"
                                       "Patient1,liver,0.400,1.000,0.400\n"
@ -134,8 +135,8 @@ def test_metrics_file(test_output_dirs: TestOutputDirectories) -> None:
    aggregates_file = new_file(METRICS_AGGREGATES_FILE)
    d.save_aggregates_to_csv(Path(aggregates_file))
    # Sorting should be first by structure name alphabetically, then Dice with lowest scores first.
-    assert_file_contents_match_exactly(Path(aggregates_file),
-                                       full_ml_test_data_path() / METRICS_AGGREGATES_FILE)
+    assert_text_files_match(Path(aggregates_file),
+                            full_ml_test_data_path() / METRICS_AGGREGATES_FILE)
    boxplot_per_structure(d.to_data_frame(),
                          column_name=MetricsFileColumns.DiceNumeric.value,
                          title="Dice score")
--- a/Tests/ML/test_plotting.py
+++ b/Tests/ML/test_plotting.py
@ -64,6 +64,8 @@ def test_plot_image_and_contour(test_output_dirs: TestOutputDirectories) -> None
    plotting.plot_image_and_label_contour(image, labels, contour_arguments={'colors': 'r'}, plot_file_name=plot_file)
    assert plot_file.exists()
    expected = full_ml_test_data_path(file_name)
+    # To update the stored results, uncomment this line:
+    # expected.write_bytes(plot_file.read_bytes())
    assert file_as_bytes(plot_file) == file_as_bytes(expected)


@ -84,6 +86,8 @@ def test_plot_image_and_contour_scaled(test_output_dirs: TestOutputDirectories)
                                          image_range=(-5, 5), plot_file_name=plot_file)
    assert plot_file.exists()
    expected = full_ml_test_data_path(file_name)
+    # To update the stored results, uncomment this line:
+    # expected.write_bytes(plot_file.read_bytes())
    assert file_as_bytes(plot_file) == file_as_bytes(expected)


@ -108,6 +112,8 @@ def test_plot_image_and_multiple_contours(test_output_dirs: TestOutputDirectorie
                                          plot_file_name=plot_file)
    assert plot_file.exists()
    expected = full_ml_test_data_path(file_name)
+    # To update the stored results, uncomment this line:
+    # expected.write_bytes(plot_file.read_bytes())
    assert file_as_bytes(plot_file) == file_as_bytes(expected)


@ -131,10 +137,12 @@ def compare_files(actual: List[Path], expected: List[str]) -> None:
        full_expected = full_ml_test_data_path(e)
        assert full_expected.exists()
        assert str(f).endswith(e)
+        # To update the stored results, uncomment this line:
+        # full_expected.write_bytes(f.read_bytes())
        assert file_as_bytes(f) == file_as_bytes(full_expected)


-@pytest.mark.skipif(common_util.is_linux(), reason="Rendering of the graph is slightly different on Linux")
+@pytest.mark.skipif(common_util.is_windows(), reason="Rendering of the graph is slightly different on Linux")
 def test_plot_normalization_result(test_output_dirs: TestOutputDirectories) -> None:
    """
    Tests plotting of before/after histograms in photometric normalization.
--- a/Tests/ML/util.py
+++ b/Tests/ML/util.py
@ -3,12 +3,14 @@
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
 import logging
+import os
 from pathlib import Path
 from typing import Any, List, Optional, Union

 import numpy as np
 import pytest
 import torch
+from PIL import Image
 from azureml.core import Workspace

 from InnerEye.Azure.azure_config import AzureConfig
@ -79,7 +81,7 @@ def load_train_and_test_data_channels(patient_ids: List[int],
    return samples


-def assert_file_contents(full_file: Union[str, Path], expected: Any = None) -> None:
+def assert_file_contains_string(full_file: Union[str, Path], expected: Any = None) -> None:
    """
    Checks if the given file contains an expected string
    :param full_file: The path to the file.
@ -92,7 +94,7 @@ def assert_file_contents(full_file: Union[str, Path], expected: Any = None) -> N
        _assert_line(file_path.read_text(), expected)


-def assert_file_contents_match_exactly(full_file: Path, expected_file: Path) -> None:
+def assert_text_files_match(full_file: Path, expected_file: Path) -> None:
    """
    Checks line by line (ignoring leading and trailing spaces) if the given two files contains the exact same strings
    :param full_file: The path to the file.
@ -157,6 +159,28 @@ def assert_tensors_equal(t1: torch.Tensor, t2: Union[torch.Tensor, List], abs: f
    assert v1 == pytest.approx(v2, abs=abs), f"Tensor elements don't match with tolerance {abs}: {v1} != {v2}"


+def assert_binary_files_match(actual_file: Path, expected_file: Path) -> None:
+    """
+    Checks if two files contain exactly the same bytes. If PNG files mismatch, additional diagnostics is printed.
+    """
+    # Uncomment this line to batch-update all result files that use this assert function
+    # expected_file.write_bytes(actual_file.read_bytes())
+    assert_file_exists(actual_file)
+    assert_file_exists(expected_file)
+    actual = actual_file.read_bytes()
+    expected = expected_file.read_bytes()
+    if actual == expected:
+        return
+    if actual_file.suffix == ".png" and expected_file.suffix == ".png":
+        actual_image = Image.open(actual_file)
+        expected_image = Image.open(expected_file)
+        actual_size = actual_image.size
+        expected_size = expected_image.size
+        assert actual_size == expected_size, f"Image sizes don't match: actual {actual_size}, expected {expected_size}"
+        assert np.allclose(np.array(actual_image), np.array(expected_image)), "Image pixel data does not match."
+    assert False, f"File contents does not match: len(actual)={len(actual)}, len(expected)={len(expected)}"
+
+
 DummyPatientMetadata = PatientMetadata(patient_id=42)


@ -182,3 +206,11 @@ def get_default_workspace() -> Workspace:
    :return:
    """
    return get_default_azure_config().get_workspace()
+
+
+def is_running_on_azure() -> bool:
+    """
+    Returns True if the code appears to be running on an Azure build agent, and False otherwise.
+    """
+    # Guess by looking at the AGENT_OS variable, that all Azure hosted agents define.
+    return bool(os.environ.get("AGENT_OS", None))
--- a/Tests/ML/utils/test_augmentation.py
+++ b/Tests/ML/utils/test_augmentation.py
@ -34,12 +34,12 @@ crop_size_requires_padding = (9, 8, 12)
 # Random Crop Tests
 def test_valid_full_crop() -> None:
    metadata = DummyPatientMetadata
-    sample, _ = augmentation.random_crop(sample=Sample(image=valid_image_4d,
-                                                       labels=valid_labels,
-                                                       mask=valid_mask,
-                                                       metadata=metadata),
-                                         crop_size=valid_full_crop_size,
-                                         class_weights=valid_class_weights)
+    sample, _, _ = augmentation.random_crop(sample=Sample(image=valid_image_4d,
+                                                          labels=valid_labels,
+                                                          mask=valid_mask,
+                                                          metadata=metadata),
+                                            crop_size=valid_full_crop_size,
+                                            class_weights=valid_class_weights)

    assert np.array_equal(sample.image, valid_image_4d)
    assert np.array_equal(sample.labels, valid_labels)
@ -99,7 +99,7 @@ def test_random_crop(crop_size: Any) -> None:
    # this should ne handled gracefully (class being ignored from sampling)
    labels[0] = 1
    labels[1] = 0
-    sample, _ = augmentation.random_crop(Sample(
+    sample, _, _ = augmentation.random_crop(Sample(
        image=valid_image_4d,
        labels=valid_labels,
        mask=valid_mask,
@ -147,7 +147,7 @@ def test_valid_class_weights(class_weights: List[float]) -> None:
        return

    for _ in range(0, total_crops):
-        crop_sample, center = augmentation.random_crop(sample, crop_size, class_weights)
+        crop_sample, center, _ = augmentation.random_crop(sample, crop_size, class_weights)
        sampled_class = list(labels[:, center[0], center[1], center[2]]).index(1)
        sampled_label_center_distribution[sampled_class] += 1

--- a/Tests/ML/utils/test_io_util.py
+++ b/Tests/ML/utils/test_io_util.py
@ -20,7 +20,7 @@ from InnerEye.ML.utils.dataset_util import DatasetExample, store_and_upload_exam
 from InnerEye.ML.utils.io_util import ImageHeader, is_nifti_file_path, is_numpy_file_path, \
    load_image_in_known_formats, load_numpy_image, is_dicom_file_path, load_dicom_image, \
    ImageAndSegmentations, load_images_and_stack, DicomTags, PhotometricInterpretation
-from Tests.ML.util import assert_file_contents
+from Tests.ML.util import assert_file_contains_string
 from Tests.fixed_paths_for_tests import full_ml_test_data_path

 known_nii_path = full_ml_test_data_path("test_good.nii.gz")
@ -99,7 +99,7 @@ def test_save_file(value: Any, expected: Any) -> None:
    file = full_ml_test_data_path("test.txt")
    io_util.save_lines_to_file(Path(file), value)

-    assert_file_contents(file, expected)
+    assert_file_contains_string(file, expected)

    os.remove(str(file))

--- a/Tests/ML/visualizers/test_plot_cross_validation.py
+++ b/Tests/ML/visualizers/test_plot_cross_validation.py
@ -29,7 +29,7 @@ from InnerEye.ML.visualizers.plot_cross_validation import COL_MODE, \
 from Tests.Common.test_util import DEFAULT_ENSEMBLE_RUN_RECOVERY_ID, DEFAULT_RUN_RECOVERY_ID
 from Tests.ML.models.architectures.sequential.test_rnn_classifier import ToyMultiLabelSequenceModel, \
    _get_multi_label_sequence_dataframe
-from Tests.ML.util import assert_file_contents_match_exactly, get_default_azure_config
+from Tests.ML.util import assert_text_files_match, get_default_azure_config
 from Tests.fixed_paths_for_tests import full_ml_test_data_path


@ -108,7 +108,8 @@ def create_run_result_file_list(config: PlotCrossValidationConfig, folder: str,
    return files


-def create_file_list_for_segmentation_recovery_run(test_config_ensemble: PlotCrossValidationConfig) -> List[RunResultFiles]:
+def create_file_list_for_segmentation_recovery_run(test_config_ensemble: PlotCrossValidationConfig) -> \
+        List[RunResultFiles]:
    return create_run_result_file_list(config=test_config_ensemble,
                                       folder="master_1570466706163110")

@ -288,9 +289,9 @@ def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig,
    test_config_ensemble.outlier_range = 0
    dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]}
    save_outliers(test_config_ensemble, dataset_split_metrics, Path(test_config_ensemble.outputs_directory))
-    assert_file_contents_match_exactly(full_file=Path(test_config_ensemble.outputs_directory)
-                                                 / f"{ModelExecutionMode.VAL.value}_outliers.txt",
-                                       expected_file=Path(
+    assert_text_files_match(full_file=Path(test_config_ensemble.outputs_directory)
+                                      / f"{ModelExecutionMode.VAL.value}_outliers.txt",
+                            expected_file=Path(
                                           full_ml_test_data_path(
                                               f"{ModelExecutionMode.VAL.value}_outliers.txt")))

--- a/Tests/ML/visualizers/test_visualize_patches.py
+++ b/Tests/ML/visualizers/test_visualize_patches.py
@ -0,0 +1,177 @@
+#  ------------------------------------------------------------------------------------------
+#  Copyright (c) Microsoft Corporation. All rights reserved.
+#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+#  ------------------------------------------------------------------------------------------
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pytest
+
+from InnerEye.Common.common_util import is_windows
+from InnerEye.Common.output_directories import TestOutputDirectories
+from InnerEye.ML.config import SegmentationModelBase, equally_weighted_classes
+from InnerEye.ML.dataset.sample import PatientMetadata, Sample
+from InnerEye.ML.plotting import resize_and_save, scan_with_transparent_overlay
+from InnerEye.ML.utils import io_util
+from InnerEye.ML.utils.image_util import get_unit_image_header
+from InnerEye.ML.utils.io_util import load_nifti_image
+from InnerEye.ML.utils.ml_util import set_random_seed
+from InnerEye.ML.visualizers.patch_sampling import visualize_random_crops
+from Tests.ML.util import assert_binary_files_match, assert_file_exists, is_running_on_azure
+from Tests.fixed_paths_for_tests import full_ml_test_data_path
+
+
+@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
+@pytest.mark.parametrize("labels_to_boundary", [True, False])
+def test_visualize_patch_sampling(test_output_dirs: TestOutputDirectories,
+                                  labels_to_boundary: bool) -> None:
+    """
+    Tests if patch sampling and producing diagnostic images works as expected.
+    :param test_output_dirs:
+    :param labels_to_boundary: If true, the ground truth labels are placed close to the image boundary, so that
+    crops have to be adjusted inwards. If false, ground truth labels are all far from the image boundaries.
+    """
+    set_random_seed(0)
+    shape = (10, 30, 30)
+    foreground_classes = ["fg"]
+    class_weights = equally_weighted_classes(foreground_classes)
+    config = SegmentationModelBase(should_validate=False,
+                                   crop_size=(2, 10, 10),
+                                   class_weights=class_weights)
+    image = np.random.rand(1, *shape).astype(np.float32) * 1000
+    mask = np.ones(shape)
+    labels = np.zeros((len(class_weights),) + shape)
+    if labels_to_boundary:
+        # Generate foreground labels in such a way that a patch centered around a foreground pixel would
+        # reach outside of the image.
+        labels[1, 4:8, 3:27, 3:27] = 1
+    else:
+        labels[1, 4:8, 15:18, 15:18] = 1
+    labels[0] = 1 - labels[1]
+    output_folder = Path(test_output_dirs.root_dir)
+    image_header = get_unit_image_header()
+    sample = Sample(image=image,
+                    mask=mask,
+                    labels=labels,
+                    metadata=PatientMetadata(patient_id=123,
+                                             image_header=image_header))
+    expected_folder = full_ml_test_data_path("patch_sampling")
+    heatmap = visualize_random_crops(sample, config, output_folder=output_folder)
+    expected_heatmap = expected_folder / ("sampled_to_boundary.npy" if labels_to_boundary else "sampled_center.npy")
+    # To update the stored results, uncomment this line:
+    # np.save(str(expected_heatmap), heatmap)
+    assert np.allclose(heatmap, np.load(str(expected_heatmap))), "Patch sampling created a different heatmap."
+    f1 = output_folder / "123_ct.nii.gz"
+    assert_file_exists(f1)
+    f2 = output_folder / "123_sampled_patches.nii.gz"
+    assert_file_exists(f2)
+    thumbnails = [
+        "123_sampled_patches_dim0.png",
+        "123_sampled_patches_dim1.png",
+        "123_sampled_patches_dim2.png",
+    ]
+    for f in thumbnails:
+        assert_file_exists(output_folder / f)
+
+    expected = expected_folder / ("sampled_to_boundary.nii.gz" if labels_to_boundary else "sampled_center.nii.gz")
+    # To update test results:
+    # shutil.copy(str(f2), str(expected))
+    expected_image = io_util.load_nifti_image(expected)
+    actual_image = io_util.load_nifti_image(f2)
+    np.allclose(expected_image.image, actual_image.image)
+    if labels_to_boundary:
+        for f in thumbnails:
+            # Uncomment this line to update test results
+            # (expected_folder / f).write_bytes((output_folder / f).read_bytes())
+            if not is_running_on_azure():
+                # When running on the Azure build agents, it appears that the bounding box of the images
+                # is slightly different than on local runs, even with equal dpi settings.
+                # Not able to figure out how to make the run results consistent, hence disable in cloud runs.
+                assert_binary_files_match(output_folder / f, expected_folder / f)
+
+
+@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
+def test_visualize_patch_sampling_2d(test_output_dirs: TestOutputDirectories) -> None:
+    """
+    Tests if patch sampling works for 2D images.
+    :param test_output_dirs:
+    """
+    set_random_seed(0)
+    shape = (1, 20, 30)
+    foreground_classes = ["fg"]
+    class_weights = equally_weighted_classes(foreground_classes)
+    config = SegmentationModelBase(should_validate=False,
+                                   crop_size=(1, 5, 10),
+                                   class_weights=class_weights)
+    image = np.random.rand(1, *shape).astype(np.float32) * 1000
+    mask = np.ones(shape)
+    labels = np.zeros((len(class_weights),) + shape)
+    labels[1, 0, 8:12, 5:25] = 1
+    labels[0] = 1 - labels[1]
+    output_folder = Path(test_output_dirs.root_dir)
+    image_header = None
+    sample = Sample(image=image,
+                    mask=mask,
+                    labels=labels,
+                    metadata=PatientMetadata(patient_id=123,
+                                             image_header=image_header))
+    heatmap = visualize_random_crops(sample, config, output_folder=output_folder)
+    expected_folder = full_ml_test_data_path("patch_sampling")
+    expected_heatmap = expected_folder / "sampling_2d.npy"
+    # To update the stored results, uncomment this line:
+    # np.save(str(expected_heatmap), heatmap)
+    assert np.allclose(heatmap, np.load(str(expected_heatmap))), "Patch sampling created a different heatmap."
+    assert len(list(output_folder.rglob("*.nii.gz"))) == 0
+    assert len(list(output_folder.rglob("*.png"))) == 1
+    actual_file = output_folder / "123_sampled_patches.png"
+    assert_file_exists(actual_file)
+    expected = expected_folder / "sampling_2d.png"
+    # To update the stored results, uncomment this line:
+    # expected.write_bytes(actual_file.read_bytes())
+    if not is_running_on_azure():
+        # When running on the Azure build agents, it appears that the bounding box of the images
+        # is slightly different than on local runs, even with equal dpi settings.
+        # It says: Image sizes don't match: actual (685, 469), expected (618, 424)
+        # Not able to figure out how to make the run results consistent, hence disable in cloud runs.
+        assert_binary_files_match(actual_file, expected)
+
+
+@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
+@pytest.mark.parametrize("dimension", [0, 1, 2])
+def test_plot_overlay(test_output_dirs: TestOutputDirectories,
+                      dimension: int) -> None:
+    set_random_seed(0)
+    shape = (10, 30, 30)
+    image = np.random.rand(*shape).astype(np.float32) * 1000
+    mask = np.zeros(shape).flatten()
+    for i in range(len(mask)):
+        mask[i] = i
+    mask = mask.reshape(shape)
+    plt.figure()
+    scan_with_transparent_overlay(image, mask, dimension, shape[dimension] // 2, spacing=(1.0, 1.0, 1.0))
+    file = Path(test_output_dirs.root_dir) / "plot.png"
+    resize_and_save(5, 5, file)
+    assert file.exists()
+    expected = full_ml_test_data_path("patch_sampling") / f"overlay_{dimension}.png"
+    # To update the stored results, uncomment this line:
+    # expected.write_bytes(file.read_bytes())
+    assert_binary_files_match(file, expected)
+
+
+@pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.")
+def test_show_non_square_images(test_output_dirs: TestOutputDirectories) -> None:
+    input_file = full_ml_test_data_path("patch_sampling") / "scan_small.nii.gz"
+    input = load_nifti_image(input_file)
+    image = input.image
+    shape = image.shape
+    mask = np.zeros_like(image)
+    mask[shape[0] // 2, shape[1] // 2, shape[2] // 2] = 1
+    for dim in range(3):
+        scan_with_transparent_overlay(image, mask, dim, shape[dim] // 2, spacing=input.header.spacing)
+        actual_file = Path(test_output_dirs.root_dir) / f"dim_{dim}.png"
+        resize_and_save(5, 5, actual_file)
+        expected = full_ml_test_data_path("patch_sampling") / f"overlay_with_aspect_dim{dim}.png"
+        # To update the stored results, uncomment this line:
+        # expected.write_bytes(actual_file.read_bytes())
+        assert_binary_files_match(actual_file, expected)
--- a/TestsOutsidePackage/test_register_model.py
+++ b/TestsOutsidePackage/test_register_model.py
@ -23,7 +23,7 @@ from InnerEye.ML.model_config_base import ModelConfigBase
 from InnerEye.ML.model_inference_config import ModelInferenceConfig
 from InnerEye.ML.model_testing import DEFAULT_RESULT_IMAGE_NAME
 from InnerEye.ML.run_ml import MLRunner
-from InnerEye.ML.utils.io_util import ImageHeader
+from InnerEye.ML.utils.image_util import get_unit_image_header
 from Tests.ML.util import assert_nifti_content, get_default_azure_config, get_default_workspace, get_model_loader, \
    get_nifti_shape
 from Tests.fixed_paths_for_tests import RELATIVE_TEST_OUTPUTS_PATH, full_ml_test_data_path, tests_root_directory
@ -142,7 +142,7 @@ def test_register_and_score_model(is_ensemble: bool,

            # sanity check the resulting segmentation
            expected_shape = get_nifti_shape(img_channel_1_path)
-            image_header = ImageHeader(origin=(0, 0, 0), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 1, 1))
+            image_header = get_unit_image_header()
            assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [0], np.ubyte)

        finally:
--- a/azure-pipelines/build.yaml
+++ b/azure-pipelines/build.yaml
@ -17,7 +17,8 @@ steps:
  - bash: |
      conda env create --file environment.yml --name InnerEye --quiet
      source activate InnerEye
-      pip freeze > requirements.txt
+      echo "Environment has been created with these packages:"
+      pip freeze
    failOnStderr: false # Conda env create does not have an option to suppress warnings generated in wheel.py
    condition: succeeded()
    displayName: Create conda environment
--- a/docs/debugging_and_monitoring.md
+++ b/docs/debugging_and_monitoring.md
@ -17,8 +17,9 @@ session, monitoring the newly queued job.

 ### Resource Monitor
 GPU and CPU usage can be monitored throughout the execution of a run (local and AML) by setting the monitoring interval 
-for the resource monitor eg: `--monitoring_interval_seconds=1`. This will spawn a separate process at the start of the
-run which will log the diagnostics to AML metrics and TensorBoard logs under `Diagnostics`.
+for the resource monitor eg: `--monitoring_interval_seconds=5`. This will spawn a separate process at the start of the
+run which will log both GPU and CPU utilization and memory consumption. These metrics will be written to AzureML as
+well as a separate TensorBoard logs file under `Diagnostics`.

 ### Debugging setup on local machine

--- a/docs/model_diagnostics.md
+++ b/docs/model_diagnostics.md
@ -0,0 +1,25 @@
+# Model Diagnostics
+
+The InnerEye toolbox has extensive reporting about the model building process, as well as the performance
+of the final model. Our goal is to provide as much insight as possible about the critical steps (and 
+pitfalls) of building a model.
+
+## Patch sampling for segmentation models
+
+When building a segmentation model, one of the crucial steps is how equally-shaped crops are taken from
+the raw medical image, that are later fed into the model training. An outline of that process is
+given [here](https://github.com/microsoft/InnerEye-DeepLearning/wiki/Adjusting-and-tuning-a-segmentation-model).
+
+At the start of training, the toolbox inspects the first 10 images of the training set. For each of them,
+1000 random crops are drawn at random, similar to how they would be drawn during training. From that, a
+heatmap is constructed, where each voxel value contains how often that specific voxels was actually contained
+in the random crop (a value between 0 and 1000). The heatmap is stored as a Nifti file, alongside the 
+original scan, in folder `outputs/patch_sampling/`. When running inside AzureML, navigate to the 
+"Outputs" tab, and go to the folder (see screenshot below).
+
+In addition, for each patient, 3 thumbnail images are generated, that overlay the heatmap on top of the
+scan. Dark red indicates voxels that are sampled very often, transparent red indicates voxels that are used
+infrequently.
+
+Example thumbnail when viewed in the AzureML UI:
+![](screenshot_azureml_patch_sampling.png)
--- a/docs/patch_sampling_example.png
+++ b/docs/patch_sampling_example.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed3bb4d800d269eb1b4fe8f82bd8be32f0db225d502904ef8af25a53c9f65ae8
+size 161947
--- a/docs/screenshot_azureml_patch_sampling.png
+++ b/docs/screenshot_azureml_patch_sampling.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d435820b637b59e1a2b8db8567749f4bf69b69cd075fece87065992ae0a364f
+size 288927