Replace data quality folder with a link to the commit (#692)

* Move to commit link * Remove build * Update paper link * Update README * Add changelog
2022-03-10 09:14:25 +00:00 · 2022-03-10 09:14:25 +00:00 · 3c919dca14
--- a/.gitignore
+++ b/.gitignore
@ -160,12 +160,6 @@ most_recent_run.txt
 # The default folder that contains downloaded Tensorboard files
 tensorboard_runs

-# InnerEye-DataQuality
-InnerEye-DataQuality/cifar-10-python.tar.gz
-InnerEye-DataQuality/name_stats_scoring.png
-InnerEye-DataQuality/cifar-10-batches-py
-InnerEye-DataQuality/logs
-InnerEye-DataQuality/data
 None
 cifar-10-batches-py
 cifar-10-python.tar.gz
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -136,7 +136,7 @@ in inference-only runs when using lightning containers.
 - ([#674](https://github.com/microsoft/InnerEye-DeepLearning/pull/674)) Fix DeepMIL metrics bug whereby hard labels were used instead of probabilities.

 ### Removed
-
+- ([#692](https://github.com/microsoft/InnerEye-DeepLearning/pull/692)) Replace InnerEye-DataQuality with a link to commit,
 - ([#577](https://github.com/microsoft/InnerEye-DeepLearning/pull/577)) Removing the monitoring of batch loading time,
  use the `BatchTimeCallback` from `hi-ml` instead
 - ([#542](https://github.com/microsoft/InnerEye-DeepLearning/pull/542)) Removed Windows test leg from build pipeline.
--- a/InnerEye-DataQuality/InnerEyeDataQuality/init.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/init.py
@ -1,5 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/algorithms/graph.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/algorithms/graph.py
@ -1,115 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-import time
-from dataclasses import dataclass
-from typing import Any, Optional
-
-import numpy as np
-import scipy
-from scipy.special import softmax
-from sklearn.neighbors import kneighbors_graph
-
-
-@dataclass(init=True, frozen=True)
-class GraphParameters:
-    """Class for setting graph connectivity and diffusion parameters."""
-    n_neighbors: int
-    diffusion_alpha: float
-    cg_solver_max_iter: int
-    diffusion_batch_size: Optional[int]
-    distance_kernel: str  # {'euclidean' or 'cosine'}
-
-
-def _get_affinity_matrix(embeddings: np.ndarray,
-                         n_neighbors: int,
-                         distance_kernel: str = 'cosine') -> scipy.sparse.csr.csr_matrix:
-    """
-    :param embeddings: Input sample embeddings (n_samples x n_embedding_dim)
-    :param n_neighbors: Number of neighbors in the KNN Graph
-    :param distance_kernel: Distance kernel to compute sample similarities {'euclidean' or 'cosine'}
-    """
-
-    # Build a k-NN graph using the embeddings
-    if distance_kernel == 'euclidean':
-        sigma = embeddings.shape[1]
-        knn_dist_graph = kneighbors_graph(embeddings, n_neighbors, mode='distance', metric='euclidean', n_jobs=-1)
-        knn_dist_graph.data = np.exp(-1.0 * np.asarray(knn_dist_graph.data) ** 2 / (2.0 * sigma ** 2))
-    elif distance_kernel == 'cosine':
-        knn_dist_graph = kneighbors_graph(embeddings, n_neighbors, mode='distance', metric='cosine', n_jobs=-1)
-        knn_dist_graph.data = 1.0 - np.asarray(knn_dist_graph.data) / 2.0
-    else:
-        raise ValueError(f"Unknown sample distance kernel {distance_kernel}")
-
-    return knn_dist_graph
-
-
-def build_connectivity_graph(normalised: bool = True, **affinity_kwargs: Any) -> np.ndarray:
-    """
-    Builds connectivity graph and returns adjacency and degree matrix
-    :param normalised: If set to True, graph adjacency is normalised with the norm of degree matrix
-    :param affinity_kwargs: Arguments required to construct an affinity matrix
-                            (weights representing similarity between points)
-    """
-
-    # Build a symmetric adjacency matrix
-    A = _get_affinity_matrix(**affinity_kwargs)
-    W = 0.5 * (A + A.T)
-    if normalised:
-        # Normalize the similarity graph
-        W = W - scipy.sparse.diags(W.diagonal())
-        D = W.sum(axis=1)
-        D[D == 0] = 1
-        D_sqrt_inv = np.array(1. / np.sqrt(D))
-        D_sqrt_inv = scipy.sparse.diags(D_sqrt_inv.reshape(-1))
-        L_norm = D_sqrt_inv * W * D_sqrt_inv
-        return L_norm
-    else:
-        num_samples = W.shape[0]
-        D = W.sum(axis=1)
-        D = np.diag(np.asarray(D).reshape(num_samples, ))
-        L = D - W
-        return L
-
-
-def label_diffusion(inv_laplacian: np.ndarray,
-                    labels: np.ndarray,
-                    query_batch_ids: np.ndarray,
-                    class_priors: Optional[np.ndarray] = None,
-                    diffusion_normalizing_factor: float = 0.01) -> np.ndarray:
-    """
-    :param laplacian_inv: inverse laplacian of the graph
-    :param labels:
-    :param query_batch_ids: the ids of the "labeled" samples
-    :param class_priors: prior distribution of each class [n_classes,]
-    :param diffusion_normalizing_factor: factor to normalize the diffused labels
-    """
-    diffusion_start = time.time()
-
-    # Input number of nodes and classes
-    n_samples = labels.shape[0]
-    n_classes = labels.shape[1]
-
-    # Find the labelled set of nodes in the graph
-    all_idx = np.array(range(n_samples))
-    labeled_idx = np.setdiff1d(all_idx, query_batch_ids.flatten())
-    assert (np.all(np.isin(query_batch_ids, all_idx)))
-    assert (np.allclose(np.sum(labels, axis=1), np.ones(shape=(labels.shape[0])), rtol=1e-3))
-
-    # Initialize the y vector for each class (eq 5 from the paper, normalized with the class size)
-    # and apply label propagation
-    y = np.zeros((n_samples, n_classes))
-    y[labeled_idx] = labels[labeled_idx] / np.sum(labels[labeled_idx], axis=0, keepdims=True)
-    if class_priors is not None:
-        y = y * class_priors
-    Z = np.matmul(inv_laplacian[query_batch_ids, :], y)
-
-    # Normalise the diffused logits
-    output = softmax(Z / diffusion_normalizing_factor, axis=1)
-    # output = Z / Z.sum(axis=1)
-    logging.debug(f"Graph diffusion time: {0: .2f} secs".format(time.time() - diffusion_start))
-
-    return output
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/README.md
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/README.md
@ -1,60 +0,0 @@
-## Config arguments for model training:
-All possible config arguments are defined in [model_config.py](InnerEyeDataQuality/configs/models/model_config.py). Here you will find a summary of the most important config arguments:
-* If you want to train a model with co_teaching, you will need to set `train.use_co_teaching: True` in your config.
-* If you want to finetune from a pretrained SSL checkpoint:
-    * You will need to set `train.use_self_supervision: True` to tell the code to load a pretrained checkpoint.
-    * You will need update the `train.self_supervision.checkpoints: [PATH_TO_SSL]` field with the checkpoints to use for initialization of your model. Note that if you want to train a co-teaching model in combination with SSL pretrained initialization your list of checkpoint needs to be of length 2. 
-    * You can also choose whether to freeze the encoder or not during finetuning with `train.self_supervision.freeze_encoder` field. 
-* If you want to train a model using ELR, you can set `train.use_elr: True`
-
-### CIFAR10H
-We provide configurations to run experiments on CIFAR10H with resp. 15% and 30% noise rate. 
-* Configs for 15% noise rate experiments can be found in [configs/models/cifar10h_noise_15](InnerEyeDataQuality/configs/models/cifar10h_noise_15). In detail this folder contains configs for
-    * vanilla resnet training: [InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet.yaml](InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet.yaml)
-    * co-teaching resnet training:  [InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_co_teaching.yaml](InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_co_teaching.yaml)
-    * SSL + linear head training: [InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_self_supervision_v3.yaml](InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_self_supervision_v3.yaml)
-* Configs for 30% noise rate experiments can be found in [configs/models/cifar10h_noise_30](InnerEyeDataQuality/configs/models/cifar10h_noise_30). In detail this folder contains configs for:
-    * vanilla resnet training: [InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet.yaml](InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet.yaml)
-    * co-teaching resnet training:  [InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_co_teaching.yaml](InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_co_teaching.yaml)
-    * SSL + linear head training: [InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_self_supervision_v3.yaml](InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_self_supervision_v3.yaml)
-    * ELR training: [InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_elr.yaml](InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_elr.yaml)
-* Examples of configs for models used in the model selection benchmark experiment can be found in the [configs/models/benchmark_3_idn](InnerEyeDataQuality/configs/models/benchmark_3_idn)
-
-### Noisy Chest-Xray
-*Note:* To run any model on this dataset, you will need to first make sure you have the dataset ready onto your machine (see Benchmark datasets > Noisy Chest-Xray > Pre-requisite section).
-
-* With provide configurations corresponding to the experiments on the NoisyChestXray dataset with 13% noise rate in the [configs/models/cxr](InnerEyeDataQuality/configs/models/cxr) folder:
-    * Vanilla resnet training: [InnerEyeDataQuality/configs/models/cxr/resnet.yaml](InnerEyeDataQuality/configs/models/cxr/resnet.yaml)
-    * Co-teaching resnet training:  [InnerEyeDataQuality/configs/models/cxr/resnet_coteaching.yaml](InnerEyeDataQuality/configs/models/cxr/resnet_coteaching.yaml)
-    * Co-teaching from a pretrained SSL checkpoint: [InnerEyeDataQuality/configs/models/cxr/resnet_ssl_coteaching.yaml]([InnerEyeDataQuality/configs/models/cxr/resnet_ssl_coteaching.yaml])
-<br/><br/>
-
-## Config arguments for label cleaning simulation:
-
-#### More details about the selector config
-Here is an example of a selector config, with details about each field:
-
-* `selector:`
-  * `type`: Which selector to use. There are several options available:
-    * `PosteriorBasedSelectorJoint`: Using the ranking function proposed in the manuscript CE(posteriors, labels) - H(posteriors)
-    * `PosteriorBasedSelector`: Using CE(posteriors, labels) as the ranking function
-    * `GraphBasedSelector`: Graph based selection of the next samples based on the embeddings of each sample.
-    * `BaldSelector`: Selection of the next sample with the BALD objective.
-  * `model_name`: The name that will be used for the legend of the simulation plot
-  * `model_config_path`: Path to the config file used to train the selection model.
-  * `use_active_relabelling`: Whether to turn on the active component of the active learning framework. If set to True, the model will be retrained regularly during the selection process.
-  * `output_directory`: Optional field where can specify the output directory to store the results in. 
-
-
-#### Off-the-shelf simulation configs
-* We provide the configs for various selectors for the CIFAR10H experiments in the [configs/selection/cifar10h_noise_15](InnerEyeDataQuality/configs/selection/cifar10h_noise_15) and in the [configs/selection/cifar10h_noise_30](InnerEyeDataQuality/configs/selection/cifar10h_noise_30) folders. 
-* And likewise for the NoisyChestXray dataset, you can find a set of selector configs in the [configs/selection/cxr](InnerEyeDataQuality/configs/selection/cxr) folder.
-<br/><br/>
-
-## Configs for self-supervised model training:
-
-CIFAR10H: To pretrain embeddings with contrastive learning on CIFAR10H you can use the 
-[cifar10h_byol.yaml](InnerEyeDataQuality/deep_learning/self_supervised/configs/cifar10h_byol.yaml) or the [cifar10h_simclr.yaml](InnerEyeDataQuality/deep_learning/self_supervised/configs/cifar10h_simclr.yaml) config files. 
-
-Chest X-ray: Provided that you have downloaded the dataset as explained in the Benchmark Datasets > Other Chest Xray Datasets > NIH Datasets > Pre-requisites section, you can easily launch a unsupervised pretraining run on the full NIH dataset using the [nih_byol.yaml](InnerEyeDataQuality/deep_learning/self_supervised/configs/nih_byol.yaml) or the [nih_simclr.yaml](InnerEyeDataQuality/deep_learning/self_supervised/configs/nih_simclr.yaml)
-configs. Don't forget to update the `dataset_dir` field of your config to reflect your local path.
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/config_node.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/config_node.py
@ -1,48 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Dict, List, Optional, Union
-
-import yacs.config
-
-
-class ConfigNode(yacs.config.CfgNode):
-    def __init__(self, init_dict: Optional[Dict] = None, key_list: Optional[List] = None, new_allowed: bool = False):
-        super().__init__(init_dict, key_list, new_allowed)
-
-    def __str__(self) -> str:
-        def _indent(s_: str, num_spaces: int) -> Union[str, List[str]]:
-            s = s_.split('\n')
-            if len(s) == 1:
-                return s_
-            first = s.pop(0)
-            s = [(num_spaces * ' ') + line for line in s]
-            s = '\n'.join(s)      # type: ignore
-            s = first + '\n' + s  # type: ignore
-            return s
-
-        r = ''
-        s = []
-        for k, v in self.items():
-            separator = '\n' if isinstance(v, ConfigNode) else ' '
-            if isinstance(v, str) and not v:
-                v = '\'\''
-            attr_str = f'{str(k)}:{separator}{str(v)}'
-            attr_str = _indent(attr_str, 2)     # type: ignore
-            s.append(attr_str)
-        r += '\n'.join(s)
-        return r
-
-    def as_dict(self) -> Dict:
-        def convert_to_dict(node: ConfigNode) -> Dict:
-            if not isinstance(node, ConfigNode):
-                return node
-            else:
-                dic = dict()
-                for k, v in node.items():
-                    dic[k] = convert_to_dict(v)
-                return dic
-
-        return convert_to_dict(self)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/model_config.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/model_config.py
@ -1,205 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from .config_node import ConfigNode
-
-config = ConfigNode()
-
-config.pretty_name = ""
-config.device = 'cuda'
-
-# cuDNN
-config.cudnn = ConfigNode()
-config.cudnn.benchmark = True
-config.cudnn.deterministic = False
-
-config.dataset = ConfigNode()
-config.dataset.name = 'CIFAR10'
-config.dataset.dataset_dir = ''
-config.dataset.image_size = 32
-config.dataset.n_channels = 3
-config.dataset.n_classes = 10
-config.dataset.num_samples = None
-config.dataset.noise_temperature = 1.0
-config.dataset.noise_offset = 0.0
-config.dataset.noise_rate = None
-config.dataset.csv_to_ignore = None
-config.dataset.cxr_consolidation_noise_rate = 0.10
-
-config.model = ConfigNode()
-# options: 'cifar', 'imagenet'
-# Use 'cifar' for small input images
-config.model.type = 'cifar'
-config.model.name = 'resnet_preact'
-config.model.init_mode = 'kaiming_fan_out'
-config.model.use_dropout = False
-
-config.model.resnet = ConfigNode()
-config.model.resnet.depth = 110  # for cifar type model
-config.model.resnet.n_blocks = [2, 2, 2, 2]  # for imagenet type model
-config.model.resnet.block_type = 'basic'
-config.model.resnet.initial_channels = 16
-config.model.resnet.apply_l2_norm = False  # if set to True, last activations are l2-normalised.
-
-config.model.densenet = ConfigNode()
-config.model.densenet.depth = 100  # for cifar type model
-config.model.densenet.n_blocks = [6, 12, 24, 16]  # for imagenet type model
-config.model.densenet.block_type = 'bottleneck'
-config.model.densenet.growth_rate = 12
-config.model.densenet.drop_rate = 0.0
-config.model.densenet.compression_rate = 0.5
-
-config.train = ConfigNode()
-config.train.root_dir = ''
-config.train.checkpoint = ''
-config.train.resume_epoch = 0
-config.train.restore_scheduler = True
-config.train.batch_size = 128
-# optimizer (options: sgd, adam)
-config.train.optimizer = 'sgd'
-config.train.base_lr = 0.1
-config.train.momentum = 0.9
-config.train.nesterov = True
-config.train.weight_decay = 1e-4
-config.train.start_epoch = 0
-config.train.seed = 0
-config.train.pretrained = False
-config.train.no_weight_decay_on_bn = False
-config.train.use_balanced_sampler = False
-
-config.train.output_dir = 'experiments/exp00'
-config.train.log_period = 100
-config.train.checkpoint_period = 10
-
-config.train.use_elr = False
-
-# co_teaching defaults
-config.train.use_co_teaching = False
-config.train.use_teacher_model = False
-config.train.co_teaching_consistency_loss = False
-config.train.co_teaching_forget_rate = 0.2
-config.train.co_teaching_num_gradual = 10
-config.train.co_teaching_use_graph = False
-config.train.co_teaching_num_warmup = 25
-
-# self-supervision defaults
-config.train.use_self_supervision = False
-config.train.self_supervision = ConfigNode()
-config.train.self_supervision.checkpoints = ['', '']
-config.train.self_supervision.freeze_encoder = True
-config.train.tanh_regularisation = 0.0
-
-# optimizer
-config.optim = ConfigNode()
-# Adam
-config.optim.adam = ConfigNode()
-config.optim.adam.betas = (0.9, 0.999)
-
-# scheduler
-config.scheduler = ConfigNode()
-config.scheduler.epochs = 160
-
-# warm up (options: none, linear, exponential)
-config.scheduler.warmup = ConfigNode()
-config.scheduler.warmup.type = 'none'
-config.scheduler.warmup.epochs = 0
-config.scheduler.warmup.start_factor = 1e-3
-config.scheduler.warmup.exponent = 4
-
-# main scheduler (options: constant, linear, multistep, cosine)
-config.scheduler.type = 'multistep'
-config.scheduler.milestones = [80, 120]
-config.scheduler.lr_decay = 0.1
-config.scheduler.lr_min_factor = 0.001
-
-# tensorboard
-config.tensorboard = ConfigNode()
-config.tensorboard.save_events = True
-
-# train data loader
-config.train.dataloader = ConfigNode()
-config.train.dataloader.num_workers = 2
-config.train.dataloader.drop_last = True
-config.train.dataloader.pin_memory = False
-config.train.dataloader.non_blocking = False
-
-# validation data loader
-config.validation = ConfigNode()
-config.validation.batch_size = 256
-config.validation.dataloader = ConfigNode()
-config.validation.dataloader.num_workers = 2
-config.validation.dataloader.drop_last = False
-config.validation.dataloader.pin_memory = False
-config.validation.dataloader.non_blocking = False
-
-config.augmentation = ConfigNode()
-config.augmentation.use_random_crop = True
-config.augmentation.use_random_horizontal_flip = True
-config.augmentation.use_random_affine = False
-config.augmentation.use_label_smoothing = False
-config.augmentation.use_random_color = False
-config.augmentation.add_gaussian_noise = False
-config.augmentation.use_gamma_transform = False
-config.augmentation.use_random_erasing = False
-config.augmentation.use_elastic_transform = False
-
-config.augmentation.elastic_transform = ConfigNode()
-config.augmentation.elastic_transform.sigma = 4
-config.augmentation.elastic_transform.alpha = 35
-config.augmentation.elastic_transform.p_apply = 0.5
-
-config.augmentation.random_crop = ConfigNode()
-config.augmentation.random_crop.scale = (0.9, 1.0)
-
-config.augmentation.gaussian_noise = ConfigNode()
-config.augmentation.gaussian_noise.std = 0.01
-config.augmentation.gaussian_noise.p_apply = 0.5
-
-config.augmentation.random_horizontal_flip = ConfigNode()
-config.augmentation.random_horizontal_flip.prob = 0.5
-
-config.augmentation.random_affine = ConfigNode()
-config.augmentation.random_affine.max_angle = 0
-config.augmentation.random_affine.max_horizontal_shift = 0.0
-config.augmentation.random_affine.max_vertical_shift = 0.0
-config.augmentation.random_affine.max_shear = 5
-
-config.augmentation.random_color = ConfigNode()
-config.augmentation.random_color.brightness = 0.0
-config.augmentation.random_color.contrast = 0.1
-config.augmentation.random_color.saturation = 0.1
-
-config.augmentation.gamma = ConfigNode()
-config.augmentation.gamma.scale = (0.5, 1.5)
-
-config.augmentation.label_smoothing = ConfigNode()
-config.augmentation.label_smoothing.epsilon = 0.1
-
-config.augmentation.random_erasing = ConfigNode()
-config.augmentation.random_erasing.scale = (0.01, 0.1)
-config.augmentation.random_erasing.ratio = (0.3, 3.3)
-
-config.preprocess = ConfigNode()
-config.preprocess.use_resize = False
-config.preprocess.use_center_crop = False
-config.preprocess.center_crop_size = None
-config.preprocess.histogram_normalization = ConfigNode()
-config.preprocess.histogram_normalization.disk_size = 30
-config.preprocess.resize = 32
-
-# test config
-config.test = ConfigNode()
-config.test.checkpoint = None
-config.test.output_dir = None
-config.test.batch_size = 256
-
-# test data loader
-config.test.dataloader = ConfigNode()
-config.test.dataloader.num_workers = 2
-config.test.dataloader.pin_memory = False
-
-
-def get_default_model_config() -> ConfigNode:
-    return config.clone()
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_co_teaching_v5_res50.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_co_teaching_v5_res50.yaml
@ -1,75 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10IDN
-  num_samples: 10000
-  noise_rate: 0.4
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  resnet:
-    depth: 50
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  resume_epoch: 0
-  seed: 1
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.05
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-4
-  output_dir: experiments/benchmark_3_idn/co_teaching_v5_res50
-  log_period: 100
-  checkpoint_period: 100
-  use_co_teaching: True
-  co_teaching_forget_rate: 0.38
-  co_teaching_num_gradual: 10
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 120
-  type: multistep
-  milestones: [70, 100]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: False
-  use_random_affine: False
-  use_random_color: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 15
-    max_horizontal_shift: 0.05
-    max_vertical_shift: 0.05
-    max_shear: 5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_cleaning_selector_v2.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_cleaning_selector_v2.yaml
@ -1,7 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelector']
-  model_name: 'Self-Supervision'
-  model_config_path: 'InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_cleaning_v2.yaml'
-  output_directory: 'b3_ssl_cleaning_v2'
-tensorboard:
-  save_events: False
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_cleaning_selector_v2_active.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_cleaning_selector_v2_active.yaml
@ -1,8 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelector']
-  model_name: 'Self-Supervision-Active'
-  model_config_path: 'InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_cleaning_v2.yaml'
-  output_directory: 'b3_ssl_cleaning_v2_active'
-  use_active_relabelling: True
-tensorboard:
-  save_events: False
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_cleaning_v2.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_cleaning_v2.yaml
@ -1,73 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10IDN
-  num_samples: 10000
-  noise_rate: 0.4
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  resnet:
-    depth: 110
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  resume_epoch: 0
-  seed: 1
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.005
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-3
-  output_dir: experiments/benchmark_3_idn/ssl_cleaning_v2
-  log_period: 100
-  checkpoint_period: 100
-  tanh_regularisation: 0.30
-  use_self_supervision: True
-  self_supervision:
-    checkpoints: ["cifar10h/self_supervised/lightning_logs/BYOL_seed_1/checkpoints/last.ckpt"]
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 128
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 120
-  type: multistep
-  milestones: [90]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: True
-  use_random_affine: False
-  use_random_color: True
-  random_crop:
-    scale: (0.9, 1.0)
-  random_horizontal_flip:
-    prob: 0.5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_v1.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_ssl_v1.yaml
@ -1,73 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10IDN
-  num_samples: 10000
-  noise_rate: 0.4
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  resnet:
-    depth: 110
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  resume_epoch: 0
-  seed: 1
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.005
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-4
-  output_dir: experiments/benchmark_3_idn/ssl_v1
-  log_period: 100
-  checkpoint_period: 100
-  tanh_regularisation: 0.00
-  use_self_supervision: True
-  self_supervision:
-    checkpoints: ["cifar10h/self_supervised/lightning_logs/BYOL_seed_1/checkpoints/last.ckpt"]
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 128
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 120
-  type: multistep
-  milestones: [90]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: False
-  use_random_affine: False
-  use_random_color: True
-  random_crop:
-    scale: (0.9, 1.0)
-  random_horizontal_flip:
-    prob: 0.5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_vanilla_res50.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/benchmark_3_idn/b3_vanilla_res50.yaml
@ -1,72 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10IDN
-  num_samples: 10000
-  noise_rate: 0.4
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  resnet:
-    depth: 50
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  resume_epoch: 0
-  seed: 1
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.05
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-4
-  output_dir: experiments/benchmark_3_idn/vanilla_model_res50
-  log_period: 100
-  checkpoint_period: 100
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 120
-  type: multistep
-  milestones: [70, 100]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: False
-  use_random_affine: False
-  use_random_color: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 15
-    max_horizontal_shift: 0.05
-    max_vertical_shift: 0.05
-    max_shear: 5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet.yaml
@ -1,72 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10H
-  noise_temperature: 2.0
-  num_samples: 5000
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  resnet:
-    depth: 110
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  resume_epoch: 0
-  seed: 1
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.1
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-4
-  output_dir: cifar10h_noise_15/vanilla_model
-  log_period: 100
-  checkpoint_period: 100
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 160
-  type: multistep
-  milestones: [80, 120]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: False
-  use_random_affine: True
-  use_random_color: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 15
-    max_horizontal_shift: 0.05
-    max_vertical_shift: 0.05
-    max_shear: 5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_co_teaching.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_co_teaching.yaml
@ -1,75 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10H
-  noise_temperature: 2.0
-  num_samples: 5000
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  resnet:
-    depth: 110
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  resume_epoch: 0
-  seed: 1
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.1
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-4
-  output_dir: cifar10h_noise_15/co_teaching
-  log_period: 100
-  checkpoint_period: 100
-  use_co_teaching: True
-  co_teaching_forget_rate: 0.15
-  co_teaching_num_gradual: 10
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 2048
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 128
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 160
-  type: multistep
-  milestones: [80, 120]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: False
-  use_random_affine: True
-  use_random_color: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 15
-    max_horizontal_shift: 0.05
-    max_vertical_shift: 0.05
-    max_shear: 5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_self_supervision_v3.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_self_supervision_v3.yaml
@ -1,64 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10H
-  noise_temperature: 2.0
-  num_samples: 5000
-train:
-  resume_epoch: 0
-  seed: 2
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.01
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-3
-  output_dir: experiments/cifar10h_noise_15/vanilla_self_supervision_v3
-  log_period: 100
-  checkpoint_period: 100
-  tanh_regularisation: 0.30
-  use_self_supervision: True
-  self_supervision:
-    checkpoints: ["cifar10h/self_supervised/lightning_logs/BYOL_seed_1/checkpoints/last.ckpt"]
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 128
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 120
-  type: multistep
-  milestones: [90]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: True
-  use_random_affine: False
-  use_random_color: True
-  random_crop:
-    scale: (0.9, 1.0)
-  random_horizontal_flip:
-    prob: 0.5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet.yaml
@ -1,72 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10H
-  noise_temperature: 10.0
-  num_samples: 5000
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  resnet:
-    depth: 110
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  resume_epoch: 0
-  seed: 1
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.1
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-4
-  output_dir: experiments/cifar10h_noise_30/vanilla_model
-  log_period: 100
-  checkpoint_period: 100
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 160
-  type: multistep
-  milestones: [80, 120]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: False
-  use_random_affine: True
-  use_random_color: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 15
-    max_horizontal_shift: 0.05
-    max_vertical_shift: 0.05
-    max_shear: 5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_co_teaching.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_co_teaching.yaml
@ -1,75 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10H
-  noise_temperature: 10.0
-  num_samples: 5000
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  resnet:
-    depth: 110
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  resume_epoch: 0
-  seed: 1
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.1
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-4
-  output_dir: experiments/cifar10h_noise_30/co_teaching
-  log_period: 100
-  checkpoint_period: 100
-  use_co_teaching: True
-  co_teaching_forget_rate: 0.28
-  co_teaching_num_gradual: 10
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 2048
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 128
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 160
-  type: multistep
-  milestones: [80, 120]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: False
-  use_random_affine: True
-  use_random_color: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 15
-    max_horizontal_shift: 0.05
-    max_vertical_shift: 0.05
-    max_shear: 5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_elr.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_elr.yaml
@ -1,67 +0,0 @@
-device: cuda:2
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10H
-  noise_temperature: 10.0
-  num_samples: 5000
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  use_dropout: False
-  resnet:
-    depth: 110
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  use_elr: True
-  resume_epoch: 0
-  seed: 0
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.1
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-4
-  output_dir: experiments/cifar10h_noise_30/elr
-  log_period: 100
-  checkpoint_period: 100
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 160
-  type: multistep
-  milestones: [80, 120]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: False
-  use_random_affine: True
-  use_random_color: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 15
-    max_horizontal_shift: 0.05
-    max_vertical_shift: 0.05
-    max_shear: 5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_self_supervision_v3.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_self_supervision_v3.yaml
@ -1,64 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10H
-  noise_temperature: 10.0
-  num_samples: 5000
-train:
-  resume_epoch: 0
-  seed: 2
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.01
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-3
-  output_dir: experiments/cifar10h_noise_30/vanilla_self_supervision_v3
-  log_period: 100
-  checkpoint_period: 100
-  tanh_regularisation: 0.30
-  use_self_supervision: True
-  self_supervision:
-    checkpoints: ["logs/cifar10h/self_supervised/byol_seed_1/checkpoints/last.ckpt"]
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 128
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 120
-  type: multistep
-  milestones: [90]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: True
-  use_random_affine: False
-  use_random_color: True
-  random_crop:
-    scale: (0.9, 1.0)
-  random_horizontal_flip:
-    prob: 0.5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
-
-  
-  
-  
-
-
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_with_dropout.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_with_dropout.yaml
@ -1,66 +0,0 @@
-device: cuda
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: CIFAR10H
-  noise_temperature: 10.0
-  num_samples: 5000
-model:
-  type: cifar
-  name: resnet
-  init_mode: kaiming_fan_out
-  use_dropout: True
-  resnet:
-    depth: 110
-    initial_channels: 16
-    block_type: basic
-    apply_l2_norm: True
-train:
-  resume_epoch: 0
-  seed: 1
-  batch_size: 256
-  optimizer: sgd
-  base_lr: 0.1
-  momentum: 0.9
-  nesterov: True
-  weight_decay: 1e-4
-  output_dir: experiments/cifar10h_noise_30/vanilla_with_dropout
-  log_period: 100
-  checkpoint_period: 100
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    drop_last: False
-    pin_memory: False
-test:
-  batch_size: 512
-  dataloader:
-    num_workers: 2
-    pin_memory: False
-scheduler:
-  epochs: 160
-  type: multistep
-  milestones: [80, 120]
-  lr_decay: 0.1
-augmentation:
-  use_random_horizontal_flip: True
-  use_label_smoothing: False
-  use_random_affine: True
-  use_random_color: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 15
-    max_horizontal_shift: 0.05
-    max_vertical_shift: 0.05
-    max_shear: 5
-  random_color:
-    brightness: 0.5
-    contrast: 0.5
-    saturation: 0.5
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cxr/resnet.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cxr/resnet.yaml
@ -1,62 +0,0 @@
-device: cuda:0
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: NoisyChestXray
-  dataset_dir: /datadrive/rsna_train
-  n_channels: 3
-  n_classes: 2
-  cxr_consolidation_noise_rate: 0.10
-model:
-  type: cxr
-  name: resnet50
-train:
-  use_balanced_sampler: True
-  pretrained: False
-  seed: 5
-  batch_size: 32
-  optimizer: adam
-  base_lr: 1e-5
-  output_dir: cxr/nih10/vanilla
-  log_period: 5
-  checkpoint_period: 20
-  use_co_teaching: False
-  use_teacher_model: False
-  dataloader:
-    num_workers: 6
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 32
-  dataloader:
-    num_workers: 6
-    drop_last: False
-    pin_memory: False
-preprocess:
-  center_crop_size: 224
-  resize: 256
-scheduler:
-  epochs: 150
-  type: constant
-augmentation:
-  use_random_horizontal_flip: True
-  use_random_affine: True
-  use_random_color: True
-  use_random_crop: True
-  use_gamma_transform: False
-  use_random_erasing: False
-  add_gaussian_noise: False
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 30
-    max_horizontal_shift: 0.00
-    max_vertical_shift: 0.00
-    max_shear: 15
-  random_color:
-    brightness: 0.2
-    contrast: 0.2
-    saturation: 0.0
-  random_crop:
-      scale: (0.8, 1.0)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cxr/resnet_coteaching.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cxr/resnet_coteaching.yaml
@ -1,67 +0,0 @@
-device: cuda:3
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: NoisyChestXray
-  dataset_dir: /datadrive/rsna_train
-  n_channels: 3
-  n_classes: 2
-  cxr_consolidation_noise_rate: 0.10
-model:
-  type: cxr
-  name: resnet50
-train:
-  use_balanced_sampler: True
-  pretrained: False
-  seed: 5
-  batch_size: 32
-  optimizer: adam
-  base_lr: 1e-5
-  output_dir: cxr/nih10/co_teaching
-  log_period: 5
-  checkpoint_period: 20
-  use_co_teaching: True
-  use_teacher_model: False
-  co_teaching_consistency_loss: False
-  co_teaching_use_graph: False
-  co_teaching_forget_rate: 0.15
-  co_teaching_num_gradual: 0
-  co_teaching_num_warmup: 20
-  dataloader:
-    num_workers: 6
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 16
-  dataloader:
-    num_workers: 6
-    drop_last: False
-    pin_memory: False
-preprocess:
-  center_crop_size: 224
-  resize: 256
-scheduler:
-  epochs: 150
-  type: constant
-augmentation:
-  use_random_horizontal_flip: True
-  use_random_affine: True
-  use_random_color: True
-  use_random_crop: True
-  use_gamma_transform: False
-  use_random_erasing: False
-  add_gaussian_noise: False
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 30
-    max_horizontal_shift: 0.00
-    max_vertical_shift: 0.00
-    max_shear: 15
-  random_color:
-    brightness: 0.2
-    contrast: 0.2
-    saturation: 0.0
-  random_crop:
-      scale: (0.8, 1.0)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cxr/resnet_ssl_coteaching.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/models/cxr/resnet_ssl_coteaching.yaml
@ -1,72 +0,0 @@
-device: cuda:3
-cudnn:
-  benchmark: True
-  deterministic: True
-dataset:
-  name: NoisyChestXray
-  dataset_dir: /datadrive/rsna_train
-  n_channels: 3
-  n_classes: 2
-  cxr_consolidation_noise_rate: 0.10
-model:
-  type: cxr
-  name: resnet50
-train:
-  use_balanced_sampler: True
-  pretrained: False
-  seed: 5
-  batch_size: 32
-  optimizer: adam
-  base_lr: 1e-6
-  output_dir: cxr/nih10/co_ssl
-  log_period: 5
-  checkpoint_period: 20
-  use_co_teaching: True
-  co_teaching_consistency_loss: False
-  co_teaching_use_graph: False
-  co_teaching_forget_rate: 0.15
-  co_teaching_num_gradual: 0
-  co_teaching_num_warmup: 10
-  use_teacher_model: False
-  use_self_supervision: True
-  self_supervision:
-    freeze_encoder: False
-    checkpoints: ["/datadrive/InnerEye-DataQuality/logs/nih/ssup/byol_seed_3/checkpoints/epoch=999.ckpt",
-                  "/datadrive/InnerEye-DataQuality/logs/nih/ssup/byol_seed_3/checkpoints/epoch=999.ckpt"]
-  dataloader:
-    num_workers: 6
-    drop_last: False
-    pin_memory: False
-validation:
-  batch_size: 32
-  dataloader:
-    num_workers: 6
-    drop_last: False
-    pin_memory: False
-scheduler:
-  epochs: 100
-  type: constant
-preprocess:
-  center_crop_size: 224
-  resize: 256
-augmentation:
-  use_random_horizontal_flip: True
-  use_random_affine: True
-  use_random_color: True
-  use_random_crop: True
-  use_gamma_transform: False
-  use_random_erasing: False
-  add_gaussian_noise: False
-  random_horizontal_flip:
-    prob: 0.5
-  random_affine:
-    max_angle: 30
-    max_horizontal_shift: 0.00
-    max_vertical_shift: 0.00
-    max_shear: 15
-  random_color:
-    brightness: 0.2
-    contrast: 0.2
-    saturation: 0.0
-  random_crop:
-      scale: (0.8, 1.0)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_15/co-teaching.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_15/co-teaching.yaml
@ -1,5 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelectorJoint']
-  model_name: 'Coteaching'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_co_teaching.yaml'
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_15/resnet.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_15/resnet.yaml
@ -1,5 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelectorJoint']
-  model_name: 'Vanilla'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet.yaml'
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_15/self_supervision_v3.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_15/self_supervision_v3.yaml
@ -1,5 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelectorJoint']
-  model_name: 'Self-Supervision-Joint'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_15/resnet_self_supervision_v3.yaml'
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/co-teaching.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/co-teaching.yaml
@ -1,6 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelectorJoint']
-  model_name: 'Coteaching'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_co_teaching.yaml'
-  output_directory: 'outputs/cifar10h_30/coteaching'
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/co-teaching_active.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/co-teaching_active.yaml
@ -1,6 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelectorJoint']
-  model_name: 'Coteaching-Active'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_co_teaching.yaml'
-  use_active_relabelling: True
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/resnet.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/resnet.yaml
@ -1,6 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelectorJoint']
-  model_name: 'Vanilla'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet.yaml'
-  output_directory: 'outputs/cifar10h_30/vanilla'
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/resnet_active.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/resnet_active.yaml
@ -1,6 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelectorJoint']
-  model_name: 'Vanilla-Active'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet.yaml'
-  use_active_relabelling: True
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/resnet_bald_active.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/resnet_bald_active.yaml
@ -1,6 +0,0 @@
-selector:
-  type: ['BaldSelector']
-  model_name: 'BALD-MC-Active'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_with_dropout.yaml'
-  use_active_relabelling: True
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/self_supervision_v3.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/self_supervision_v3.yaml
@ -1,6 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelectorJoint']
-  model_name: 'Self-Supervision'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_self_supervision_v3.yaml'
-  output_directory: 'outputs/cifar10h_30/ssup_v3'
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/self_supervision_v3_active.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cifar10h_noise_30/self_supervision_v3_active.yaml
@ -1,6 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelectorJoint']
-  model_name: 'Self-Supervision-Active'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cifar10h_noise_30/resnet_self_supervision_v3.yaml'
-  use_active_relabelling: True
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_coteaching.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_coteaching.yaml
@ -1,4 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelector']
-  model_name: 'Coteaching'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cxr/resnet_coteaching.yaml'
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_ssl_coteaching.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_ssl_coteaching.yaml
@ -1,5 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelector']
-  model_name: 'SSL+Coteaching'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cxr/resnet_ssl_coteaching.yaml'
-  use_active_relabelling: False
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_ssl_coteaching_active.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_ssl_coteaching_active.yaml
@ -1,5 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelector']
-  model_name: 'SSL+Coteaching+Active'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cxr/resnet_ssl_coteaching.yaml'
-  use_active_relabelling: True
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_vanilla.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_vanilla.yaml
@ -1,4 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelector']
-  model_name: 'Vanilla'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cxr/resnet.yaml'
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_vanilla_active.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selection/cxr/resnet_vanilla_active.yaml
@ -1,5 +0,0 @@
-selector:
-  type: ['PosteriorBasedSelector']
-  model_name: 'Vanilla (heavy aug) Active'
-  model_config_path: 'InnerEyeDataQuality/configs/models/cxr/resnet.yaml'
-  use_active_relabelling: True
--- a/InnerEye-DataQuality/InnerEyeDataQuality/configs/selector_config.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/configs/selector_config.py
@ -1,30 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-
-config = ConfigNode()
-
-# data selector
-config.selector = ConfigNode()
-config.selector.type = None
-config.selector.model_name = None
-config.selector.model_config_path = None
-config.selector.use_active_relabelling = False
-
-# Other selector parameters (unused)
-config.selector.training_dynamics_data_path = None
-config.selector.burnout_period = 0
-config.selector.number_samples_to_relabel = 10
-
-# output files
-config.selector.output_directory = None
-
-# tensorboard
-config.tensorboard = ConfigNode()
-config.tensorboard.save_events = False
-
-def get_default_selector_config() -> ConfigNode:
-    return config.clone()
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/cifar10_asym_noise.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/cifar10_asym_noise.py
@ -1,140 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-from typing import Callable, Optional, Tuple
-
-import numpy as np
-import torchvision
-import PIL.Image
-
-from default_paths import FIGURE_DIR
-from InnerEyeDataQuality.datasets.label_distribution import LabelDistribution
-from InnerEyeDataQuality.datasets.label_noise_model import get_cifar10h_confusion_matrix, \
-    get_cifar10_asym_noise_model, get_cifar10_sym_noise_model
-from InnerEyeDataQuality.utils.plot import plot_confusion_matrix
-from InnerEyeDataQuality.datasets.cifar10_utils import get_cifar10_label_names
-
-
-class CIFAR10AsymNoise(torchvision.datasets.CIFAR10):
-    """
-    Dataset class for the CIFAR10 dataset where target labels are sampled from a confusion matrix.
-    """
-
-    def __init__(self,
-                 root: str,
-                 train: bool,
-                 transform: Optional[Callable] = None,
-                 download: bool = True,
-                 use_fixed_labels: bool = True,
-                 seed: int = 1234,
-                 ) -> None:
-        """
-        :param root: The directory in which the CIFAR10 images will be stored.
-        :param train: If True, creates dataset from training set, otherwise creates from test set.
-        :param transform: Transform to apply to the images.
-        :param download: Whether to download the dataset if it is not already in the local disk.
-        :param use_fixed_labels: If true labels are sampled only once and are kept fixed. If false labels are sampled at
-                                 each get_item() function call from label distribution.
-        :param seed: The random seed that defines which samples are train/test and which labels are sampled.
-        """
-        super().__init__(root, train=train, transform=transform, target_transform=None, download=download)
-
-        self.seed = seed
-        self.targets = np.array(self.targets, dtype=np.int64)  # type: ignore
-        self.num_classes = np.unique(self.targets, return_counts=False).size
-        self.num_samples = len(self.data)
-        self.label_counts = np.eye(self.num_classes, dtype=np.int64)[self.targets]
-        self.np_random_state = np.random.RandomState(seed)
-        self.use_fixed_labels = use_fixed_labels
-        self.clean_targets = np.argmax(self.label_counts, axis=1)
-        logging.info(f"Preparing dataset: CIFAR10-Asym-Noise (N={self.num_samples})")
-
-        # Create label distribution for simulation of label adjudication
-        self.label_distribution = LabelDistribution(seed, self.label_counts, temperature=1.0)
-
-        # Add asymmetric noise on the labels
-        self.noise_model = self.create_noise_transition_model(self.targets, self.num_classes, "cifar10_sym")
-
-        # Sample fixed labels from the distribution
-        if use_fixed_labels:
-            self.targets = self.sample_labels_from_model()
-            # Identify label noise cases
-            noise_rate = np.mean(self.clean_targets != self.targets) * 100.0
-
-            # Check the class distribution after sampling
-            class_distribution = self.get_class_frequencies(targets=self.targets, num_classes=self.num_classes)
-
-            # Log dataset details
-            logging.info(f"Class distribution (%) (true labels): {class_distribution * 100.0}")
-            logging.info(f"Label noise rate: {noise_rate}")
-
-    @staticmethod
-    def create_noise_transition_model(labels: np.ndarray, num_classes: int, noise_model: str) -> np.ndarray:
-        logging.info(f"Using {noise_model} label noise model")
-        if noise_model == "cifar10h":
-            transition_matrix = get_cifar10h_confusion_matrix(temperature=2.0)
-        elif noise_model == "cifar10_asym":
-            transition_matrix = get_cifar10_asym_noise_model(eta=0.4)
-        elif noise_model == "cifar10_sym":
-            transition_matrix = get_cifar10_sym_noise_model(eta=0.4)
-        else:
-            raise ValueError("Unknown noise transition model")
-        assert(np.all(np.sum(transition_matrix, axis=1) - 1.00 < 1e-6))  # Checks = it sums up to one.
-
-        # Visualise the noise model
-        plot_confusion_matrix(list(), list(), get_cifar10_label_names(), cm=transition_matrix, save_path=FIGURE_DIR)
-
-        # Compute the expected noise rate
-        assert labels.ndim == 1
-        exp_noise_rate = 1.0 - np.sum(
-            np.diag(transition_matrix) * CIFAR10AsymNoise.get_class_frequencies(labels, num_classes))
-        logging.info(f"Expected noise rate (transition model): {exp_noise_rate}")
-
-        return transition_matrix
-
-    def sample_labels_from_model(self, sample_index: Optional[int] = None) -> np.ndarray:
-        # Sample based on the transition matrix and original labels (labels, transition mat, seed)
-        if sample_index is not None:
-            cur_label = self.targets[sample_index]
-            label = self.np_random_state.choice(self.num_classes, 1, p=self.noise_model[cur_label, :])[0]
-            return label
-
-        noisy_targets = np.zeros_like(self.targets)
-        for ii in range(self.num_samples):
-            cur_label = self.targets[ii]
-            noisy_targets[ii] = self.np_random_state.choice(self.num_classes, 1, p=self.noise_model[cur_label, :])[0]      
-
-        return noisy_targets
-
-    @staticmethod
-    def get_class_frequencies(targets: np.ndarray, num_classes: int) -> np.ndarray:
-        """
-        Returns normalised frequency of each semantic class
-        """
-        assert targets.ndim == 1
-        class_ids, class_counts = np.unique(targets, return_counts=True)
-        class_distribution = np.zeros(num_classes, dtype=np.float)
-        for ii in range(num_classes):
-            if np.any(class_ids == ii):
-                class_distribution[ii] = class_counts[class_ids == ii]/targets.size
-
-        return class_distribution
-
-    def __getitem__(self, index: int) -> Tuple[PIL.Image.Image, int]:
-        """
-        :param index: The index of the sample to be fetched
-        :return: The image and label tensors
-        """
-        img = PIL.Image.fromarray(self.data[index])
-        if self.transform is not None:
-            img = self.transform(img)
-
-        if self.use_fixed_labels:
-            target = self.targets[index]
-        else:
-            target = self.sample_labels_from_model(sample_index=index)
-
-        return img, int(target)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/cifar10_idn.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/cifar10_idn.py
@ -1,153 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-from typing import Callable, Optional, Tuple, List, Generator
-
-import numpy as np
-import PIL.Image
-import torch
-import torchvision
-from InnerEyeDataQuality.datasets.label_distribution import LabelDistribution
-from InnerEyeDataQuality.datasets.tools import get_instance_noise_model
-from InnerEyeDataQuality.utils.generic import convert_labels_to_one_hot
-from pl_bolts.models.self_supervised.resnets import resnet50_bn
-from InnerEyeDataQuality.datasets.cifar10h import CIFAR10H
-
-def chunks(lst: List, n: int) -> Generator:
-    """Yield successive n-sized chunks from lst."""
-    for i in range(0, len(lst), n):
-        yield lst[i:i + n]
-
-class CIFAR10IDN(torchvision.datasets.CIFAR10):
-    """
-    Dataset class for the CIFAR10 dataset where target labels are sampled from a confusion matrix.
-    """
-
-    def __init__(self,
-                 root: str,
-                 train: bool,
-                 noise_rate: float,
-                 transform: Optional[Callable] = None,
-                 download: bool = True,
-                 use_fixed_labels: bool = True,
-                 seed: int = 1
-                 ) -> None:
-        """
-        :param root: The directory in which the CIFAR10 images will be stored.
-        :param train: If True, creates dataset from training set, otherwise creates from test set.
-        :param transform: Transform to apply to the images.
-        :param download: Whether to download the dataset if it is not already in the local disk.
-        :param noise_rate: Expected noise rate in the sampled labels.
-        :param use_fixed_labels: If true labels are sampled only once and are kept fixed. If false labels are sampled at
-                                 each get_item() function call from label distribution.
-        :param seed: The random seed that defines which samples are train/test and which labels are sampled.
-        """
-        super().__init__(root, train=train, transform=transform, target_transform=None, download=download)
-        self.seed = seed
-        self.targets = np.array(self.targets, dtype=np.int64)  # type: ignore
-        self.num_classes = np.unique(self.targets, return_counts=False).size
-        self.num_samples = len(self.data)
-        self.clean_targets = np.copy(self.targets)
-        self.np_random_state = np.random.RandomState(seed)
-        self.use_fixed_labels = use_fixed_labels
-        self.indices = np.array(range(self.num_samples))
-        logging.info(f"Preparing dataset: CIFAR10-IDN (N={self.num_samples})")
-
-        # Set seed for torch operations
-        initial_state = torch.get_rng_state()
-        torch.manual_seed(self.seed)
-        torch.cuda.manual_seed_all(self.seed)
-
-        # Collect image embeddings
-        embeddings = self.get_cnn_image_embeddings(self.data)
-        targets = torch.from_numpy(self.targets)
-
-        # Create label distribution for simulation of label adjudication
-        label_counts = convert_labels_to_one_hot(self.clean_targets, n_classes=self.num_classes) if train else \
-                       CIFAR10H.download_cifar10h_labels(self.root)
-        self.label_distribution = LabelDistribution(seed, label_counts, temperature=1.0)
-
-        # Add asymmetric noise on the labels
-        self.noise_models = get_instance_noise_model(n=noise_rate,
-                                                     dataset=zip(embeddings, targets),
-                                                     labels=targets,
-                                                     num_classes=self.num_classes,
-                                                     feature_size=embeddings.shape[1],
-                                                     norm_std=0.01,
-                                                     seed=self.seed)
-
-        if self.use_fixed_labels:
-            # Sample target labels 
-            self.targets = self.sample_labels_from_model()
-
-            # Check the class distribution after sampling
-            class_distribution = self.get_class_frequencies(targets=self.targets, num_classes=self.num_classes)
-            noise_rate = np.mean(self.clean_targets != self.targets) * 100.0
-
-            # Log dataset details
-            logging.info(f"Class distribution (%) (true labels): {class_distribution * 100.0}")
-            logging.info(f"Label noise rate: {noise_rate}")
-        else: 
-            self.targets = None
-
-        # Restore initial state
-        torch.set_rng_state(initial_state)
-
-    def sample_labels_from_model(self, sample_index: Optional[int] = None) -> np.ndarray:
-        """
-        Samples class labels for each data point based on true label and instance dependent noise model
-        """
-        classes = [i for i in range(self.num_classes)]
-        if sample_index is not None:
-            _t = self.np_random_state.choice(classes, p=self.noise_models[sample_index]) 
-        else:
-            _t = [self.np_random_state.choice(classes, p=self.noise_models[i]) for i in range(self.num_samples)]
-        return np.array(_t)
-
-    @staticmethod
-    def get_class_frequencies(targets: np.ndarray, num_classes: int) -> np.ndarray:
-        """
-        Returns normalised frequency of each semantic class
-        """
-        assert targets.ndim == 1
-        class_ids, class_counts = np.unique(targets, return_counts=True)
-        class_distribution = np.zeros(num_classes, dtype=np.float)
-        for ii in range(num_classes):
-            if np.any(class_ids == ii):
-                class_distribution[ii] = class_counts[class_ids == ii]/targets.size
-
-        return class_distribution
-
-    @staticmethod
-    def get_cnn_image_embeddings(data: np.ndarray) -> torch.Tensor:
-        """
-        Extracts image embeddings using a pre-trained model
-        """
-        num_samples = data.shape[0]
-        embeddings = list()
-        data = torch.from_numpy(data).float().cuda()
-        encoder = resnet50_bn(return_all_feature_maps=False, pretrained=True).cuda().eval()
-        with torch.no_grad():
-            for i in chunks(list(range(num_samples)), n=100):
-                input = data[i].permute(0, 3, 1, 2)
-                embeddings.append(encoder(input)[-1].cpu())
-        return torch.cat(embeddings, dim=0).view(num_samples, -1)
-
-    def __getitem__(self, index: int) -> Tuple[PIL.Image.Image, int]:
-        """
-        :param index: The index of the sample to be fetched
-        :return: The image and label tensors
-        """
-        img = PIL.Image.fromarray(self.data[index])
-        if self.transform is not None:
-            img = self.transform(img)
-
-        if self.use_fixed_labels:
-            target = self.targets[index]
-        else:
-            target = self.sample_labels_from_model(sample_index=index)
-
-        return img, int(target)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/cifar10_utils.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/cifar10_utils.py
@ -1,165 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import io
-import logging
-import os
-import pickle
-import tarfile
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple
-
-import matplotlib.pyplot as plt
-import numpy as np
-import requests
-
-logging.getLogger('matplotlib').setLevel(logging.WARNING)
-
-
-def split_dataset(labels: np.ndarray,
-                  reference_split: float,
-                  shuffle: bool = True,
-                  seed: int = 1234) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-    """
-    :param labels: Complete label array that is split into two subsets, reference and noisy test set.
-    :param reference_split: Ratio of total samples that will be put in the reference set.
-    :param shuffle: If set to true, rows of the label matrix are shuffled prior to split.
-    :param seed: Random seed used in sample shuffle
-    """
-    # Create two testing sets from CIFAR10H Test Samples (10k - 10 classes)
-    num_samples = labels.shape[0]
-    num_samples_set1 = int(num_samples * reference_split)
-    perm = np.random.RandomState(seed=seed).permutation(num_samples) if shuffle else np.array(range(num_samples))
-    d_set1 = labels[perm[:num_samples_set1], :]
-    d_set2 = labels[perm[num_samples_set1:], :]
-
-    return d_set1, d_set2, perm
-
-
-def get_cifar10h_labels(reference_split: float = 0.5,
-                        shuffle: bool = True,
-                        immutable: bool = True) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    :param reference_split: The sample ratio between gold standard test set and full test set
-    :param shuffle: Shuffle samples prior to split.
-    :param immutable: If set to True, the returned arrays are only read only.
-    """
-    cifar10h_counts = download_cifar10h_data()  # Num_samples x n_classes
-    d_split1_standard, d_split2_complete, permutation = split_dataset(cifar10h_counts, reference_split, shuffle)
-    d_split1_permutation = permutation[:d_split1_standard.shape[0]]
-
-    if immutable:
-        d_split1_standard.setflags(write=False)
-        d_split2_complete.setflags(write=False)
-
-    return d_split1_standard, d_split1_permutation
-
-
-def download_cifar10h_data() -> np.ndarray:
-    """
-    Pulls cifar10h label data stream and returns it in numpy array.
-    """
-
-    url = 'https://raw.githubusercontent.com/jcpeterson/cifar-10h/master/data/cifar10h-counts.npy'
-    response = requests.get(url)
-    response.raise_for_status()
-    if response.status_code == requests.codes.ok:
-        cifar10h_data = np.load(io.BytesIO(response.content))
-    else:
-        raise ValueError('CIFAR10H content was not found.')
-
-    return cifar10h_data
-
-
-def download_cifar10_data() -> Path:
-    """
-    Download CIFAR10 dataset and returns path to the test set
-    """
-    import wget
-    local_path = Path.cwd() / 'InnerEyeDataQuality' / 'downloaded_data'
-    local_path_to_test_batch = local_path / 'cifar-10-batches-py/test_batch'
-
-    if not local_path_to_test_batch.exists():
-        local_path.mkdir(parents=True, exist_ok=True)
-        url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
-        path_to_tar = local_path / 'cifar10.tar.gz'
-        wget.download(url, str(path_to_tar))
-        tf = tarfile.open(str(path_to_tar))
-        tf.extractall(local_path)
-        os.remove(path_to_tar)
-
-    return local_path_to_test_batch
-
-
-def get_cifar10_label_names(file: Optional[Path] = None) -> List[str]:
-    """
-    TBD
-    """
-    if file:
-        dict = load_cifar10_file(file)
-        label_names = [_s.decode("utf-8") for _s in dict[b"label_names"]]
-    else:
-        label_names = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
-
-    return label_names
-
-
-def load_cifar10_file(file: Path) -> Dict:
-    """
-    TBD
-    """
-    with open(file, 'rb') as fo:
-        dict = pickle.load(fo, encoding='bytes')
-
-    return dict
-
-
-def plot_cifar10_images(sample_ids: List[int], save_directory: Path) -> None:
-    """
-    Displays a set of CIFAR-10 Images based on the input sample ids. In the title of the figure,
-    label distribution is displayed as well to understand the sample difficulty.
-    """
-
-    path_cifar10_test_batch = download_cifar10_data()
-    test_batch = load_cifar10_file(path_cifar10_test_batch)
-    plot_images(test_batch[b'data'], sample_ids, save_directory=save_directory)
-
-
-def plot_images(images: np.ndarray,
-                selected_sample_ids: List[int],
-                cifar10h_labels: Optional[np.ndarray] = None,
-                label_names: Optional[List[str]] = None,
-                save_directory: Optional[Path] = None) -> None:
-    """
-    Displays a set of CIFAR-10 Images based on the input sample ids. In the title of the figure,
-    label distribution is displayed as well to understand the sample difficulty.
-    """
-
-    f, ax = plt.subplots(figsize=(2, 2))
-    for sample_id in selected_sample_ids:
-        img = np.reshape(images[sample_id, :], (3, 32, 32)).transpose(1, 2, 0)
-        ax.imshow(img)
-
-        if (cifar10h_labels is not None) and (label_names is not None):
-            num_k_classes = 3
-            label_distribution = cifar10h_labels[sample_id, :]
-            k_min_val = np.sort(label_distribution)[-num_k_classes]
-            available_classes = np.where(label_distribution >= k_min_val)[0]
-            class_counts = label_distribution[available_classes]
-            class_names = [label_names[_c] for _c in available_classes]
-            ax_title = ''.join([a + '_' + str(b) + ' ' for a, b in zip(class_names, class_counts)])
-            ax.set_title(ax_title)
-        else:
-            ax_title = f'CIFAR10H - Sample ID {sample_id}'
-            ax.set_title(ax_title)
-
-        if save_directory:
-            save_directory.mkdir(parents=True, exist_ok=True)
-            f.savefig(save_directory / f"{sample_id}.png", bbox_inches='tight')
-            ax.clear()
-            plt.close(f)
-        else:
-            plt.show()
-            f, ax = plt.subplots(figsize=(2, 2))
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/cifar10h.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/cifar10h.py
@ -1,184 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import io
-import logging
-from pathlib import Path
-from typing import Callable, List, Optional, Tuple
-
-import PIL.Image
-import numpy as np
-import requests
-import torchvision
-
-from InnerEyeDataQuality.datasets.cifar10_utils import get_cifar10_label_names
-from InnerEyeDataQuality.datasets.label_distribution import LabelDistribution
-from InnerEyeDataQuality.evaluation.metrics import compute_label_entropy
-from InnerEyeDataQuality.selection.simulation_statistics import SimulationStats, get_ambiguous_sample_ids
-from InnerEyeDataQuality.utils.generic import convert_labels_to_one_hot
-
-
-TOTAL_CIFAR10H_DATASET_SIZE = 10000
-
-class CIFAR10H(torchvision.datasets.CIFAR10):
-    """
-    Dataset class for the CIFAR10H dataset. The CIFAR10H dataset is the CIFAR10 test set but all the samples have
-    been labelled my multiple annotators
-    """
-
-    def __init__(self,
-                 root: str,
-                 transform: Optional[Callable] = None,
-                 num_samples: Optional[int] = None,
-                 preset_indices: Optional[np.ndarray] = None,
-                 seed: int = 1234,
-                 noise_temperature: float = 1.0,
-                 noise_offset: float = 0.0) -> None:
-        """
-        :param root: The directory in which the CIFAR10 images will be stored
-        :param transform: Transform to apply to the images
-        :param num_samples: The number of samples to use out of a maximum of TOTAL_CIFAR10H_DATASET_SIZE
-        :param seed: The random seed that defines which samples are train/test and which labels are sampled
-        :param preset_indices: Image indices that will be used to create a subset of CIFAR10H dataset.
-                               If not specified and num_samples < 10000 then random sub-selection is performed.
-        :param shuffle: Whether to shuffle the data before splitting into training and test sets.
-        :param noise_temperature: A temperature a value that is used to temperature scale the label distribution.
-        :param noise_offset: Offset parameter to control the noise rate in sampling initial labels.
-        """
-        super().__init__(root, train=False, transform=transform, target_transform=None, download=True)
-        num_samples = TOTAL_CIFAR10H_DATASET_SIZE if num_samples is None else num_samples
-
-        self.seed = seed
-        cifar10h_labels = self.download_cifar10h_labels(self.root)
-        self.num_classes = cifar10h_labels.shape[1]
-        assert cifar10h_labels.shape[0] == TOTAL_CIFAR10H_DATASET_SIZE
-        assert self.num_classes == 10
-        assert 0 < num_samples <= TOTAL_CIFAR10H_DATASET_SIZE
-        self.num_samples = num_samples
-
-        # Create a set indices that 
-        self.indices = self.get_dataset_indices(num_samples, cifar10h_labels, keep_hard_samples=True, seed=seed) \
-                                                if preset_indices is None else preset_indices    
-        self.verify_data_indices()
-        self.label_counts = cifar10h_labels[self.indices]
-        self.label_counts.flags.writeable = False
-        self.true_label_entropy = compute_label_entropy(label_counts=self.label_counts)
-
-        self.label_distribution = LabelDistribution(seed, self.label_counts, noise_temperature, noise_offset)
-        self.targets = self.label_distribution.sample_initial_labels_for_all()
-
-        # Check the class distribution
-        _, class_counts = np.unique(self.targets, return_counts=True)
-        class_distribution = np.array([_c/self.num_samples for _c in class_counts])
-        logging.info(f"Preparing dataset: CIFAR10H (N={self.num_samples})")
-        logging.info(f"Class distribution (%) (true labels): {class_distribution * 100.0}")
-        self.clean_targets = np.argmax(self.label_counts, axis=1)
-        # Identify true ambiguous and clear label noise cases
-        self._identify_sample_types()
-
-    def _identify_sample_types(self) -> None:
-        """
-        Stores and logs clear label noise and ambiguous case types.
-        """
-        label_stats = SimulationStats(name="cifar10h", true_label_counts=self.label_counts,
-                                      initial_labels=convert_labels_to_one_hot(self.targets, self.num_classes))
-        self.ambiguous_mislabelled_cases = label_stats.mislabelled_ambiguous_sample_ids[0]
-        self.clear_mislabeled_cases = label_stats.mislabelled_not_ambiguous_sample_ids[0]
-        self.ambiguity_metric_args = {"ambiguous_mislabelled_ids": self.ambiguous_mislabelled_cases,
-                                      "clear_mislabelled_ids": self.clear_mislabeled_cases,
-                                      "true_label_entropy": self.true_label_entropy}
-
-        # Log dataset details
-        logging.info(f"Ambiguous mislabeled cases: {100 * len(self.ambiguous_mislabelled_cases) / self.num_samples}%")
-        logging.info(f"Clear mislabeled cases: {100 * len(self.clear_mislabeled_cases) / self.num_samples}%\n")
-
-    @classmethod
-    def download_cifar10h_labels(self, root: str = ".") -> np.ndarray:
-        """
-        Pulls cifar10h label data stream and returns it in numpy array.
-        """
-        try:
-            cifar10h_labels = np.load(Path(root) / "cifar10h-counts.npy")
-        except FileNotFoundError:
-            url = 'https://raw.githubusercontent.com/jcpeterson/cifar-10h/master/data/cifar10h-counts.npy'
-            response = requests.get(url)
-            response.raise_for_status()
-            if response.status_code == requests.codes.ok:
-                cifar10h_labels = np.load(io.BytesIO(response.content))
-            else:
-                raise ValueError('Failed to download CIFAR10H labels!')
-
-        return cifar10h_labels
-
-    def __getitem__(self, index: int) -> Tuple[PIL.Image.Image, int]:
-        """
-        :param index: The index of the sample to be fetched
-        :return: The image and label tensors
-        """
-        img = PIL.Image.fromarray(self.data[self.indices[index]])
-        target = self.targets[index]
-        if self.transform is not None:
-            img = self.transform(img)
-        return img, int(target)
-
-    def __len__(self) -> int:
-        """
-
-        :return: The size of the dataset
-        """
-        return len(self.indices)
-
-    def get_label_names(self) -> List[str]:
-        return get_cifar10_label_names()
-
-    def verify_data_indices(self) -> None:
-        assert isinstance(self.indices, np.ndarray)
-        assert self.indices.size == self.num_samples
-        assert np.all(self.indices < TOTAL_CIFAR10H_DATASET_SIZE)
-        assert np.all(0 <= self.indices)
-        _, c = np.unique(self.indices, return_counts=True)
-        assert np.all(c == 1)
-
-    def get_dataset_indices(self,
-                            num_samples: int,
-                            true_label_counts: np.ndarray,
-                            keep_hard_samples: bool,
-                            seed: int = 1234) -> np.ndarray:
-        """
-        Function to choose a subset of the CIFAR10H dataset. Returns selected subset of sample
-        indices in a shuffled order.
-
-        :param num_samples: Number of samples in the selected subset.
-                            If the full dataset size is specified then indices are just shuffled and returned.
-        :param true_label_counts: True label counts of CIFAR10H images (num_samples x num_classes)
-        :param keep_hard_samples: If set to True, all hard examples are kept in the selected subset of points.
-        :param seed: Random seed used in shuffling data indices.
-        """
-        random_state = np.random.RandomState(seed=seed)
-
-        assert num_samples <= TOTAL_CIFAR10H_DATASET_SIZE
-        if (not keep_hard_samples) or (num_samples == TOTAL_CIFAR10H_DATASET_SIZE):
-            indices = random_state.permutation(true_label_counts.shape[0])
-            return indices[:num_samples]
-
-        # Identify difficult samples and keep them in the dataset
-        hard_sample_indices = get_ambiguous_sample_ids(true_label_counts)
-        if hard_sample_indices.shape[0] > num_samples:
-            logging.info(f"Total number of hard samples: {hard_sample_indices.shape[0]} and requested: {num_samples}")
-            hard_sample_indices = hard_sample_indices[:num_samples]
-        num_hard_samples = hard_sample_indices.shape[0]
-
-        # Sample the remaining indices randomly and aggregate
-        remaining_indices = np.setdiff1d(range(TOTAL_CIFAR10H_DATASET_SIZE), hard_sample_indices)
-        easy_sample_indices = random_state.choice(remaining_indices, num_samples - num_hard_samples, replace=False)
-        indices = np.concatenate([hard_sample_indices, easy_sample_indices], axis=0)
-        random_state.shuffle(indices)
-
-        # Assert that there are no repeated indices
-        _, _counts = np.unique(indices, return_counts=True)
-        assert not np.any(_counts > 1)
-
-        return indices
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/kaggle_cxr.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/kaggle_cxr.py
@ -1,116 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-import logging
-from pathlib import Path
-from typing import Callable, List, Optional, Tuple, Union
-
-import PIL
-import numpy as np
-import pandas as pd
-import pydicom as dicom
-from PIL import Image
-from torch.utils.data import Dataset
-
-KAGGLE_TOTAL_SIZE = 26684
-
-
-class KaggleCXR(Dataset):
-    def __init__(self,
-                 data_directory: str,
-                 use_training_split: bool,
-                 train_fraction: float = 0.8,
-                 seed: int = 1234,
-                 shuffle: bool = True,
-                 transform: Optional[Callable] = None,
-                 num_samples: int = None,
-                 return_index: bool = True) -> None:
-        """
-        Class for the full Kaggle RSNA Pneumonia Detection Dataset.
-
-        :param data_directory: the directory containing all training images from the Challenge (stage 1) as well as the
-        dataset.csv containing the kaggle and the original labels.
-        :param use_training_split: whether to return the training or the validation split of the dataset.
-        :param train_fraction: the proportion of samples to use for training
-        :param seed: random seed to use for dataset creation
-        :param shuffle: whether to shuffle the dataset prior to spliting between validation and training
-        :param transform: a preprocessing function that takes a PIL image as input and returns a tensor
-        :param num_samples: number of the samples to return (has to been smaller than the dataset split)
-        """
-
-        self.data_directory = Path(data_directory)
-        if not self.data_directory.exists():
-            logging.error(
-                f"The data directory {self.data_directory} does not exist. Make sure to download to Kaggle data "
-                f"first.The kaggle dataset can "
-                "be acceded via the Kaggle CLI kaggle competitions download -c rsna-pneumonia-detection-challenge or "
-                "on the main page of the challenge "
-                "https://www.kaggle.com/c/rsna-pneumonia-detection-challenge/data?select=stage_2_train_images")
-
-        self.train = use_training_split
-        self.train_fraction = train_fraction
-        self.seed = seed
-        self.random_state = np.random.RandomState(seed)
-        full_dataset = pd.read_csv(self.data_directory / "dataset.csv")
-        self.dataset_dataframe = full_dataset
-        self.transforms = transform
-
-        targets = self.dataset_dataframe.label.values.astype(np.int64)
-        subjects_ids = self.dataset_dataframe.subject.values
-
-        self.num_classes = 2
-        self.num_datapoints = len(self.dataset_dataframe)
-        all_indices = np.arange(len(self.dataset_dataframe))
-
-        # ------------- Split the data into training and validation sets ------------- #
-        num_samples_set1 = int(self.num_datapoints * self.train_fraction)
-        sampled_indices = self.random_state.permutation(all_indices) \
-            if shuffle else all_indices
-        train_indices = sampled_indices[:num_samples_set1]
-        val_indices = sampled_indices[num_samples_set1:]
-        self.indices = train_indices if use_training_split else val_indices
-
-        # ------------- Select subset of current split ------------- #
-        if num_samples is not None:
-            assert 0 < num_samples <= len(self.indices)
-            self.indices = self.indices[:num_samples]
-
-        self.subject_ids = subjects_ids[self.indices]
-
-        self.targets = targets[self.indices].reshape(-1)
-
-        dataset_type = "TRAIN" if use_training_split else "VAL"
-        logging.info(f"Proportion of positive labels - {dataset_type}: {np.mean(self.targets)}")
-        logging.info(f"Number samples - {dataset_type}: {self.targets.shape[0]}")
-        self.return_index = return_index
-        self.weight = np.mean(self.targets)
-        logging.info(f"Weight negative {self.weight:.2f} - weight positive {(1 - self.weight):.2f}")
-
-    def __getitem__(self, index: int) -> Union[Tuple[int, PIL.Image.Image, int], Tuple[PIL.Image.Image, int]]:
-        """
-
-        :param index: The index of the sample to be fetched
-        :return: The image and label tensors
-        """
-        subject_id = self.subject_ids[index]
-        filename = self.data_directory / f"{subject_id}.dcm"
-        target = self.targets[index]
-        scan_image = dicom.dcmread(filename).pixel_array
-        scan_image = Image.fromarray(scan_image)
-        if self.transforms is not None:
-            scan_image = self.transforms(scan_image)
-        if self.return_index:
-            return index, scan_image, int(target)
-        return scan_image, int(target)
-
-    def __len__(self) -> int:
-        """
-
-        :return: The size of the dataset
-        """
-        return len(self.indices)
-
-    def get_label_names(self) -> List[str]:
-        return ["Normal", "Opacity"]
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/label_distribution.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/label_distribution.py
@ -1,90 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import numpy as np
-
-class LabelDistribution(object):
-    """
-    LabelDistribution class handles sampling from a label distribution with reproducible behavior given a seed
-    """
-
-    def __init__(self,
-                 seed: int,
-                 label_counts: np.ndarray,
-                 temperature: float = 1.0,
-                 offset: float = 0.0) -> None:
-        """
-        :param seed: The random seed used to ensure reproducible behaviour
-        :param label_counts: An array of shape (num_samples, num_classes) where each entry represents the number of
-        labels available for each sample and class
-        :param temperature: A temperature a value that will be used to temperature scale the distribution, default is
-        1.0  which is equivalent to no scaling; temperature must be greater than 0.0, values between 0 and 1 will result
-        in a sharper distribution and values greater than 1 in a more uniform distribution over classes.
-        :param offset: Offset parameter to control the noise rate in sampling initial labels.
-        All classes are assigned a uniform fixed offset amount.
-        """
-        assert label_counts.dtype == np.int64
-        assert label_counts.ndim == 2
-        self.num_classes = label_counts.shape[1]
-        self.num_samples = label_counts.shape[0]
-        self.seed = seed
-        self.random_state = np.random.RandomState(seed)
-        self.label_counts = label_counts
-        self.temperature = temperature
-
-        # make the distribution
-        self.distribution = label_counts / np.sum(label_counts, axis=1, keepdims=True)
-        assert np.isfinite(self.distribution).all()
-        assert self.temperature > 0
-
-        # scale distribution based on temperature and offset
-        _d = np.power(self.distribution, 1. / temperature)
-        _d = _d / np.sum(_d, axis=1, keepdims=True)
-        self.distribution_temp_scaled = self.add_noise_to_distribution(offset, _d, 'asym') if offset > 0.0 else _d
-
-        # check if there are multiple labels per data point
-        self.is_multi_label_per_sample = np.all(np.sum(label_counts, axis=1) > 1.0)
-
-    def sample_initial_labels_for_all(self) -> np.ndarray:
-        """
-        Sample one label for each sample in the dataset according to its label distribution
-        :return: None
-        """
-        if not self.is_multi_label_per_sample:
-            RuntimeWarning("Sampling labels from one-hot encoded distribution - Multi labels are not available")
-
-        sampling_fn = lambda p: self.random_state.choice(self.num_classes, 1, p=p)
-
-        return np.squeeze(np.apply_along_axis(sampling_fn, arr=self.distribution_temp_scaled, axis=1))
-
-    def sample(self, sample_idx: int) -> int:
-        """
-        Sample one label for a given sample index
-        :param sample_idx: The sample index for which the label will be sampled
-        :return: None
-        """
-        return self.random_state.choice(self.num_classes, 1, p=self.distribution[sample_idx])[0]
-
-    def add_noise_to_distribution(self, offset: float, distribution: np.ndarray, noise_model_type: str) -> np.ndarray:
-        from InnerEyeDataQuality.datasets.label_noise_model import get_cifar10_asym_noise_model
-        from InnerEyeDataQuality.datasets.label_noise_model import get_cifar10_sym_noise_model
-
-        # Create noise model
-        if noise_model_type == 'sym':
-            noise_model = get_cifar10_sym_noise_model(eta=1.0)
-        elif noise_model_type == 'asym':
-            noise_model = get_cifar10_asym_noise_model(eta=1.0)
-        else:
-            raise ValueError("Unknown noise model type")
-        np.fill_diagonal(noise_model, 0.0)
-        noise_model *= offset
-
-        # Add this noise on top of every sample in the dataset
-        for _ii in range(self.num_samples):
-            true_label = np.argmax(self.label_counts[_ii])
-            distribution[_ii] += noise_model[true_label]
-
-        # Normalise the distribution
-        return distribution / np.sum(distribution, axis=1, keepdims=True)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/label_noise_model.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/label_noise_model.py
@ -1,86 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import numpy as np
-
-from default_paths import CIFAR10_ROOT_DIR
-from InnerEyeDataQuality.datasets.cifar10h import CIFAR10H
-from InnerEyeDataQuality.selection.simulation_statistics import get_ambiguous_sample_ids
-
-from sklearn.metrics import confusion_matrix
-
-
-def get_cifar10h_confusion_matrix(temperature: float = 1.0, only_difficult_cases: bool = False) -> np.ndarray:
-    """
-    Generates a class confusion matrix based on the label distribution in CIFAR10H.
-    """
-    cifar10h_labels = CIFAR10H.download_cifar10h_labels(str(CIFAR10_ROOT_DIR))
-
-    if only_difficult_cases:
-        ambiguous_sample_ids = get_ambiguous_sample_ids(cifar10h_labels)
-        cifar10h_labels = cifar10h_labels[ambiguous_sample_ids, :]
-
-    # Temperature scale the original distribution
-    if temperature > 1.0:
-        orig_distribution = cifar10h_labels / np.sum(cifar10h_labels, axis=1, keepdims=True)
-        _d = np.power(orig_distribution, 1. / temperature)
-        scaled_distribution = _d / np.sum(_d, axis=1, keepdims=True)
-        sample_counts = (scaled_distribution * np.sum(cifar10h_labels, axis=1, keepdims=True)).astype(np.int64)
-    else:
-        sample_counts = cifar10h_labels
-
-    y_pred, y_true = list(), list()
-    for image_index in range(sample_counts.shape[0]):
-        image_label_counts = sample_counts[image_index]
-        for _iter, _el in enumerate(image_label_counts.tolist()):
-            y_pred.extend([_iter] * _el)
-        y_true.extend([np.argmax(image_label_counts)] * np.sum(image_label_counts))
-    cm = confusion_matrix(y_true, y_pred, normalize="true")
-
-    return cm
-
-
-def get_cifar10_asym_noise_model(eta: float = 0.3) -> np.ndarray:
-    """
-    CLASS-DEPENDENT ASYMMETRIC LABEL NOISE
-    https://proceedings.neurips.cc/paper/2018/file/f2925f97bc13ad2852a7a551802feea0-Paper.pdf
-    TRUCK -> AUTOMOBILE, BIRD -> AIRPLANE, DEER -> HORSE, CAT -> DOG, and DOG -> CAT
-
-    :param eta: The likelihood of true label switching from one of the specified classes to nearest class.
-                In other words, likelihood of introducing a class-dependent label noise
-    """
-
-    # Generate a noise transition matrix.
-    assert (0.0 <= eta) and (eta <= 1.0)
-
-    eps = 1e-12
-    num_classes = 10
-    conf_mat = np.eye(N=num_classes)
-    indices = [[2, 0], [9, 1], [5, 3], [3, 5], [4, 7]]
-    for ind in indices:
-        conf_mat[ind[0], ind[1]] = eta / (1.0 - eta + eps)
-    return conf_mat / np.sum(conf_mat, axis=1, keepdims=True)
-
-
-def get_cifar10_sym_noise_model(eta: float = 0.3) -> np.ndarray:
-    """
-    Symmetric LABEL NOISE
-    :param eta: The likelihood of true label switching from true class to rest of the classes.
-    """
-    # Generate a noise transition matrix.
-    assert (0.0 <= eta) and (eta <= 1.0)
-    assert isinstance(eta, float)
-
-    num_classes = 10
-    conf_mat = np.eye(N=num_classes)
-    for ind in range(num_classes):
-        conf_mat[ind, ind] -= eta
-        other_classes = np.setdiff1d(range(num_classes), ind)
-        for o_c in other_classes:
-            conf_mat[ind, o_c] += eta / other_classes.size
-
-    assert np.all(np.abs(np.sum(conf_mat, axis=1) - 1.0) < 1e-9)
-
-    return conf_mat
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/nih_cxr.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/nih_cxr.py
@ -1,106 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-from pathlib import Path
-from typing import Callable, List, Optional, Tuple, Dict, Union
-
-import PIL
-from PIL import Image
-from torch.utils.data import Dataset
-import numpy as np
-import pandas as pd
-
-NIH_TOTAL_SIZE = 112120
-
-
-class NIHCXR(Dataset):
-    def __init__(self,
-                 data_directory: str,
-                 use_training_split: bool,
-                 seed: int = 1234,
-                 shuffle: bool = True,
-                 transform: Optional[Callable] = None,
-                 num_samples: int = None,
-                 return_index: bool = True) -> None:
-        """
-        Class for the full NIH ChestXray Dataset (112k images)
-
-        :param data_directory: the directory containing all training images from the dataset as well as the
-        Data_Entry_2017.csv file containing the dataset labels.
-        :param use_training_split: whether to return the training or the test split of the dataset.
-        :param seed: random seed to use for dataset creation
-        :param shuffle: whether to shuffle the dataset prior to spliting between validation and training
-        :param transform: a preprocessing function that takes a PIL image as input and returns a tensor
-        :param num_samples: number of the samples to return (has to been smaller than the dataset split)
-        """
-        self.data_directory = Path(data_directory)
-        if not self.data_directory.exists():
-            logging.error(
-                f"The data directory {self.data_directory} does not exist. Make sure to download the NIH data "
-                f"first.The dataset can on the main page"
-                "https://www.kaggle.com/nih-chest-xrays/data. Make sure all images are placed directly under the "
-                "data_directory folder. Make sure you downloaded the Data_Entry_2017.csv file to this directory as"
-                "well.")
-
-        self.train = use_training_split
-        self.seed = seed
-        self.random_state = np.random.RandomState(seed)
-        self.dataset_dataframe = pd.read_csv(self.data_directory / "Data_Entry_2017.csv")
-        self.dataset_dataframe["pneumonia_like"] = self.dataset_dataframe["Finding Labels"].apply(
-            lambda x: x.split("|")).apply(lambda x: "pneumonia" in x.lower()
-                                                    or "infiltration" in x.lower()
-                                                    or "consolidation" in x.lower())
-        self.transforms = transform
-
-        orig_labels = self.dataset_dataframe.pneumonia_like.values.astype(np.int64)
-        subjects_ids = self.dataset_dataframe["Image Index"].values
-        is_train_ids = self.dataset_dataframe["train"].values
-        self.num_classes = 2
-        self.indices = np.where(is_train_ids)[0] if use_training_split else np.where(~is_train_ids)[0]
-        self.indices = self.random_state.permutation(self.indices) \
-            if shuffle else self.indices
-        # ------------- Select subset of current split ------------- #
-        if num_samples is not None:
-            assert 0 < num_samples <= len(self.indices)
-            self.indices = self.indices[:num_samples]
-
-        self.subject_ids = subjects_ids[self.indices]
-        self.orig_labels = orig_labels[self.indices].reshape(-1)
-        self.targets = self.orig_labels
-
-        # Identify case ids for ambiguous and clear label noise cases
-        self.ambiguity_metric_args: Dict = dict()
-
-        dataset_type = "TRAIN" if use_training_split else "VAL"
-        logging.info(f"Proportion of positive labels - {dataset_type}: {np.mean(self.targets)}")
-        logging.info(f"Number samples - {dataset_type}: {self.targets.shape[0]}")
-        self.return_index = return_index
-
-    def __getitem__(self, index: int) -> Union[Tuple[int, PIL.Image.Image, int], Tuple[PIL.Image.Image, int]]:
-        """
-
-        :param index: The index of the sample to be fetched
-        :return: The image and label tensors
-        """
-        subject_id = self.subject_ids[index]
-        filename = self.data_directory / f"{subject_id}"
-        target = self.targets[index]
-        scan_image = Image.open(filename).convert("L")
-        if self.transforms is not None:
-            scan_image = self.transforms(scan_image)
-        if self.return_index:
-            return index, scan_image, int(target)
-        return scan_image, int(target)
-
-    def __len__(self) -> int:
-        """
-
-        :return: The size of the dataset
-        """
-        return len(self.indices)
-
-    def get_label_names(self) -> List[str]:
-        return ["NotPneunomiaLike", "PneunomiaLike"]
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/noisy_cxr_benchmark_creation/create_noisy_chestxray_dataset.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/noisy_cxr_benchmark_creation/create_noisy_chestxray_dataset.py
@ -1,112 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-import json
-from pathlib import Path
-
-import pandas as pd
-import numpy as np
-
-
-def create_nih_dataframe(mapping_file_path: Path) -> pd.DataFrame:
-    """
-    This function loads the json file mapping NIH ids to Kaggle images.
-    Loads the original NIH label (multiple labels for each image).
-    Then it creates the grouping by NIH categories (pneumonia, pneumonia like,
-    other disease, no finding).
-    :param mapping_file_path: path to the json mapping from NIH to Kaggle dataset (on the
-    RSNA webpage)
-    :return: dataframe with original NIH labels for each patient in the Kaggle dataset.
-    """
-    with open(mapping_file_path) as f:
-        list_subjects = json.load(f)
-    orig_dataset = pd.DataFrame(columns=["subject", "orig_label"])
-    orig_dataset["subject"] = [l["subset_img_id"] for l in list_subjects]  # noqa: E741
-    orig_labels = [str(l['orig_labels']).lower() for l in list_subjects]  # noqa: E741
-    orig_dataset["nih_pneumonia"] = ["pneumonia" in l for l in orig_labels]  # noqa: E741
-    orig_dataset["nih_pneumonia_like"] = [(("infiltration" in l or "consolidation" in l) and ("pneumonia" not in l)) for
-                                          l in orig_labels]  # noqa: E741
-    orig_dataset["no_finding"] = ["no finding" in str(l).lower() for l in orig_labels]  # noqa: E741
-    orig_dataset["orig_label"] = orig_labels
-    orig_dataset["orig_label"].apply(lambda x: sorted(x))
-    orig_dataset[
-        "nih_other_disease"] = ~orig_dataset.nih_pneumonia_like & ~orig_dataset.nih_pneumonia & ~orig_dataset.no_finding
-    orig_dataset[
-        "nih_category"] = 1 * orig_dataset.nih_pneumonia + 2 * orig_dataset.no_finding + 3 * \
-                          orig_dataset.nih_other_disease
-    orig_dataset["StudyInstanceUID"] = [l["StudyInstanceUID"] for l in list_subjects]  # noqa: E741
-    orig_dataset.nih_category = orig_dataset.nih_category.apply(lambda x: ["Consolidation/Infiltration", "Pneumonia",
-                                                                           "No finding", "Other disease"][x])
-    return orig_dataset
-
-
-def process_detailed_probs_dataset(detailed_probs_path: Path) -> pd.DataFrame:
-    """
-    This function loads the csv file with the detailed information for each bounding boxes as annotated
-    by the readers during the adjudication for the preparation of the challenge. It maps low, medium and high
-    probabilities label to a numerical scale from 1 to 3. Computes the minimum, maximum, average confidence for each
-    patient for which at least one bounding box was present.
-    :param detailed_probs_path: path to detailed_probs csv file released in Kaggle challenge.
-    :return: dataframe with metrics for confidence in bounding boxes by patient.
-    """
-    conversion_map = {"Lung Opacity (Low Prob)": 1, "Lung Opacity (Med Prob)": 2, "Lung Opacity (High Prob)": 3}
-    detailed_probs_dataset = pd.read_csv(detailed_probs_path)
-    detailed_probs_dataset["ClassProb"] = detailed_probs_dataset["labelName"].apply(lambda x: conversion_map[x])
-    process_details = detailed_probs_dataset.groupby("StudyInstanceUID")["ClassProb"].agg(
-        [np.mean, np.min, np.max, np.count_nonzero, list])
-    process_details.rename(columns={"mean": "avg_conf_score", "amin": "min_conf_score", "amax": "max_conf_score"},
-                           inplace=True)
-    return process_details
-
-
-def create_mapping_dataset_nih(mapping_file_path: Path,
-                               kaggle_dataset_path: Path,
-                               detailed_class_info_path: Path,
-                               detailed_probs_path: Path) -> pd.DataFrame:
-    """
-    Creates the final chest x-ray dataset combining labels from NIH, kaggle and detailed information about kaggle
-    labels from the detailed_class_info and detailed_probs csv file released during the challenge.
-    :param mapping_file_path:
-    :param kaggle_dataset_path:
-    :param detailed_class_info_path:
-    :param detailed_probs_path:
-    :return: detailed dataset
-    """
-    orig_dataset = create_nih_dataframe(mapping_file_path)
-    kaggle_dataset = pd.read_csv(kaggle_dataset_path)
-    difficulty = pd.read_csv(detailed_class_info_path).drop_duplicates()
-    detailed_probs_dataset = process_detailed_probs_dataset(detailed_probs_path)
-    # Merge NIH info with Kaggle dataset
-    merged = pd.merge(orig_dataset, kaggle_dataset)
-    merged.rename(columns={"label": "label_kaggle"}, inplace=True)
-    # Define binary label from original NIH label, for consolidation/infiltration
-    # mapping is not clear, use kaggle label.
-    merged.loc[merged.nih_pneumonia, "binary_nih_initial_label"] = True
-    merged.loc[merged.no_finding | merged.nih_other_disease, "binary_nih_initial_label"] = False
-    merged.loc[merged.nih_pneumonia_like, "binary_nih_initial_label"] = merged.loc[
-        merged.nih_pneumonia_like, "label_kaggle"]
-    # Add subclass information from Kaggle challenge to define ambiguous cases
-    merged = pd.merge(merged, difficulty, left_on="subject", right_on="patientId")
-    merged["not_normal"] = (merged["class"] == "No Lung Opacity / Not Normal")
-    # Add difficulty information from Kaggle based on presence of bounding boxes
-    merged = pd.merge(merged, detailed_probs_dataset, on="StudyInstanceUID", how="left")
-    merged.drop(columns=["patientId"], inplace=True)
-    merged.fillna(-1, inplace=True)
-    # Ambiguous if there was only low probability boxes and the adjudicated label is true
-    merged.loc[merged.label_kaggle, ["ambiguous"]] = (merged.loc[merged.label_kaggle].min_conf_score == 1) & (
-            merged.loc[merged.label_kaggle].max_conf_score == 1)
-    # Ambiguous if there was some bounding boxes but adjudicated label is false
-    merged.loc[~merged.label_kaggle, ["ambiguous"]] = merged.loc[~merged.label_kaggle].min_conf_score > -1
-    return merged
-
-
-if __name__ == "__main__":
-    from default_paths import INNEREYE_DQ_DIR
-    current_dir = Path(__file__).parent
-    mapping_file = Path(__file__).parent / "pneumonia-challenge-dataset-mappings_2018.json"
-    kaggle_dataset_path = current_dir / "stage_2_train_labels.csv"
-    detailed_class_info = current_dir / "stage_2_detailed_class_info.csv"
-    detailed_probs = current_dir / "RSNA_pneumonia_all_probs.csv"
-    dataset = create_mapping_dataset_nih(mapping_file, kaggle_dataset_path, detailed_class_info, detailed_probs)
-    dataset.to_csv(INNEREYE_DQ_DIR / "datasets" / "noisy_chestxray_dataset.csv", index=False)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/noisy_kaggle_cxr.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/noisy_kaggle_cxr.py
@ -1,183 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-import logging
-from pathlib import Path
-from typing import Callable, List, Optional, Tuple
-
-import PIL
-import numpy as np
-import pandas as pd
-import pydicom as dicom
-from PIL import Image
-from torch.utils.data import Dataset
-
-from InnerEyeDataQuality.datasets.label_distribution import LabelDistribution
-from InnerEyeDataQuality.selection.simulation_statistics import SimulationStats
-from InnerEyeDataQuality.utils.generic import convert_labels_to_one_hot
-from InnerEyeDataQuality.evaluation.metrics import compute_label_entropy
-
-
-class NoisyKaggleSubsetCXR(Dataset):
-    def __init__(self, data_directory: str,
-                 use_training_split: bool,
-                 consolidation_noise_rate: float,
-                 train_fraction: float = 0.5,
-                 seed: int = 1234,
-                 shuffle: bool = True,
-                 transform: Optional[Callable] = None,
-                 num_samples: Optional[int] = None,
-                 use_noisy_fixed_labels: bool = True) -> None:
-        """
-        Class for the noisy Kaggle RSNA Pneumonia Detection Dataset. This dataset uses the kaggle dataset with noisy
-        labels
-        as the original labels from RSNA and the clean labels are the Kaggle labels.
-
-        :param data_directory: the directory containing all training images from the Challenge (stage 1) as well as the
-        dataset.csv containing the kaggle and the original labels.
-        :param use_training_split: whether to return the training or the validation split of the dataset.
-        :param train_fraction: the proportion of samples to use for training
-        :param seed: random seed to use for dataset creation
-        :param shuffle: whether to shuffle the dataset prior to spliting between validation and training
-        :param transform: a preprocessing function that takes a PIL image as input and returns a tensor
-        :param num_samples: number of the samples to return (has to been smaller than the dataset split)
-        :param use_noisy_fixed_labels: if True use the original labels as the initial labels else use the clean labels.
-        :param consolidation_noise_rate: proportion of noisy samples among consolidation/infiltration NIH category.
-        """
-        dataset_type = "TRAIN" if use_training_split else "VAL"
-        self.data_directory = Path(data_directory)
-        if not self.data_directory.exists():
-            raise RuntimeError(
-                f"The data directory {self.data_directory} does not exist. Make sure to download to Kaggle data "
-                f"first.The kaggle dataset can "
-                "be acceded via the Kaggle CLI kaggle competitions download -c rsna-pneumonia-detection-challenge or "
-                "on the main page of the challenge "
-                "https://www.kaggle.com/c/rsna-pneumonia-detection-challenge/data?select=stage_2_train_images")
-
-        path_to_noisy_csv = Path(__file__).parent / "noisy_chestxray_dataset.csv"
-        if not path_to_noisy_csv.exists():
-            raise RuntimeError(f"The noisy dataset csv can not be found in {path_to_noisy_csv}, make sure to run "
-            "create_noisy_chestxray_dataset.py first. See readme for more detailed instructions on the pre-requisite"
-            " for running the noisy Chest Xray benchmark.")
-
-        self.train = use_training_split
-        self.train_fraction = train_fraction
-        self.seed = seed
-        self.random_state = np.random.RandomState(seed)
-        self.dataset_dataframe = pd.read_csv(str(Path(__file__).parent / "noisy_chestxray_dataset.csv"))
-        self.transforms = transform
-
-        self.dataset_dataframe["initial_label"] = self.dataset_dataframe.binary_nih_initial_label
-
-        # Random uniform noise among consolidation
-        pneumonia_like_subj = self.dataset_dataframe.loc[self.dataset_dataframe.nih_pneumonia_like, "subject"].values
-        selected = self.random_state.choice(pneumonia_like_subj,
-                                            replace=False,
-                                            size=int(len(pneumonia_like_subj) * consolidation_noise_rate))
-        self.dataset_dataframe.loc[self.dataset_dataframe.subject.isin(selected), "initial_label"] = \
-            ~self.dataset_dataframe.loc[self.dataset_dataframe.subject.isin(selected), "initial_label"]
-
-        self.dataset_dataframe["is_noisy"] = self.dataset_dataframe.label_kaggle != self.dataset_dataframe.initial_label
-
-        initial_labels = self.dataset_dataframe.initial_label.values.astype(np.int64).reshape(-1, 1)
-        kaggle_labels = self.dataset_dataframe.label_kaggle.values.astype(np.int64).reshape(-1, 1)
-        subjects_ids = self.dataset_dataframe.subject.values
-        is_ambiguous = self.dataset_dataframe.ambiguous.values
-        orig_label = self.dataset_dataframe.orig_label.values
-        nih_category = self.dataset_dataframe.nih_category.values
-
-        # Convert clean labels to one-hot to populate label counts
-        # i.e for easy cases assume the true distribution is 100% ground truth
-        kaggle_label_counts = convert_labels_to_one_hot(kaggle_labels, n_classes=2)
-        # For ambiguous cases: [0, 1] -> [1, 2] and [1, 0] -> [2, 1]
-        kaggle_label_counts[is_ambiguous, :] = kaggle_label_counts[is_ambiguous, :] * 2 + 1
-        _, self.num_classes = kaggle_label_counts.shape
-        assert self.num_classes == 2
-
-        # ------------- Split the data into training and validation sets ------------- #
-        self.num_datapoints = len(self.dataset_dataframe)
-        all_indices = np.arange(self.num_datapoints)
-        num_samples_set1 = int(self.num_datapoints * self.train_fraction)
-        all_indices = self.random_state.permutation(all_indices) \
-            if shuffle else all_indices
-        train_indices = all_indices[:num_samples_set1]
-        val_indices = all_indices[num_samples_set1:]
-        self.indices = train_indices if use_training_split else val_indices
-
-        # ------------- Select subset of current split ------------- #
-        # If n_samples is set to restrict dataset i.e. for data_curation
-        num_samples = self.num_datapoints if num_samples is None else num_samples
-        if num_samples < self.num_datapoints:
-            assert 0 < num_samples <= len(self.indices)
-        self.indices = self.indices[:num_samples]
-
-        # ------------ Finalize dataset --------------- #
-        self.subject_ids = subjects_ids[self.indices]
-
-        # Label distribution is constructed from the true labels
-        self.label_counts = kaggle_label_counts[self.indices]
-        self.label_distribution = LabelDistribution(seed, self.label_counts)
-
-        self.initial_labels = initial_labels[self.indices].reshape(-1)
-        self.kaggle_labels = kaggle_labels[self.indices].reshape(-1)
-        self.targets = self.initial_labels if use_noisy_fixed_labels else self.kaggle_labels
-        self.orig_labels = orig_label[self.indices]
-        self.is_ambiguous = is_ambiguous[self.indices]
-        self.nih_category = nih_category[self.indices]
-
-        # Identify case ids for ambiguous and clear label noise cases
-        label_stats = SimulationStats(name="NoisyChestXray", true_label_counts=self.label_counts,
-                                      initial_labels=convert_labels_to_one_hot(self.targets, self.num_classes))
-        self.clear_mislabeled_cases = label_stats.mislabelled_not_ambiguous_sample_ids[0]
-        self.ambiguous_mislabelled_cases = label_stats.mislabelled_ambiguous_sample_ids[0]
-        self.true_label_entropy = compute_label_entropy(label_counts=self.label_counts)
-        self.ambiguity_metric_args = {"ambiguous_mislabelled_ids": self.ambiguous_mislabelled_cases,
-                                      "clear_mislabelled_ids": self.clear_mislabeled_cases,
-                                      "true_label_entropy": self.true_label_entropy}
-        self.num_samples = self.targets.shape[0]
-        logging.info(self.num_samples)
-        logging.info(len(self.targets))
-        logging.info(len(self.indices))
-        logging.info(f"Proportion of positive clean labels - {dataset_type}: {np.mean(self.kaggle_labels)}")
-        logging.info(f"Proportion of positive noisy labels - {dataset_type}: {np.mean(self.targets)}")
-        logging.info(
-            f"Total noise rate on the {dataset_type} dataset: {np.mean(self.kaggle_labels != self.targets)} \n")
-        selected_df = self.dataset_dataframe.loc[self.dataset_dataframe.subject.isin(self.subject_ids)]
-        noisy_df = selected_df.loc[selected_df.is_noisy]
-        noisy_df["nih_noise"] = ~noisy_df.nih_pneumonia_like
-        logging.info(f"\n{pd.crosstab(noisy_df.nih_noise, noisy_df.ambiguous).to_string()}")
-        # self.weight = np.mean(self.kaggle_labels)
-        # logging.info(f"Weight negative {self.weight:.2f} - weight positive {(1 - self.weight):.2f}")
-        self.png_files = (self.data_directory / f"{self.subject_ids[0]}.png").exists()
-
-    def __getitem__(self, index: int) -> Tuple[int, PIL.Image.Image, int]:
-        """
-
-        :param index: The index of the sample to be fetched
-        :return: The image and label tensors
-        """
-        subject_id = self.subject_ids[index]
-        target = self.targets[index]
-        if self.png_files:
-            filename = self.data_directory / f"{subject_id}.png"
-            scan_image = Image.open(filename)
-        else:
-            filename = self.data_directory / f"{subject_id}.dcm"
-            scan_image = dicom.dcmread(filename).pixel_array
-            scan_image = Image.fromarray(scan_image)
-        if self.transforms is not None:
-            scan_image = self.transforms(scan_image)
-        if scan_image.shape == 2:
-            scan_image = scan_image.unsqueeze(dim=0)
-        return index, scan_image, int(target)
-
-    def __len__(self) -> int:
-        """
-
-        :return: The size of the dataset
-        """
-        return len(self.subject_ids)
-
-    def get_label_names(self) -> List[str]:
-        return ["Normal", "Opacity"]
--- a/InnerEye-DataQuality/InnerEyeDataQuality/datasets/tools.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/datasets/tools.py
@ -1,45 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Any, List, Union
-
-import numpy as np
-import torch
-from math import inf
-from scipy import stats
-import torch.nn.functional as F
-
-
-def get_instance_noise_model(n: float, dataset: Any, labels: Union[List, torch.Tensor], num_classes: int,
-                             feature_size: int, norm_std: float, seed: int) -> np.ndarray:
-    """
-    :param n: noise_rate
-    :param dataset: cifar10 # not train_loader
-    :param labels: labels (targets)
-    :param num_classes: class number
-    :param feature_size: the size of input images (e.g. 28*28)
-    :param norm_std: default 0.1
-    :param seed: random_seed
-    """
-    if isinstance(labels, list):
-        labels = torch.FloatTensor(labels)
-    P = []
-    random_state = np.random.RandomState(seed)
-    flip_distribution = stats.truncnorm((0 - n) / norm_std, (1 - n) / norm_std, loc=n, scale=norm_std)
-    flip_rate = flip_distribution.rvs(labels.shape[0], random_state=seed)
-
-    W = random_state.randn(num_classes, feature_size, num_classes)
-    W = torch.FloatTensor(W)
-
-    for i, (x, y) in enumerate(dataset):
-        # (1 x M) * (M x 10) = (1 x 10)
-        A = x.view(1, -1).mm(W[y]).squeeze(0)
-        A[y] = -inf
-        A = flip_rate[i] * F.softmax(A, dim=0)
-        A[y] += 1 - flip_rate[i]
-        P.append(A)
-    P = torch.stack(P, 0).numpy()
-
-    return P
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/init.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/init.py
@ -1,5 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/architectures/cxr/densenet121.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/architectures/cxr/densenet121.py
@ -1,26 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-import torch
-import torchvision
-from torch import nn
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-
-
-class Network(nn.Module):
-    """
-    DenseNet121 as implement in torchvision
-    """
-
-    def __init__(self, config: ConfigNode) -> None:
-        super().__init__()
-        self.densenet121 = torchvision.models.densenet121(pretrained=config.train.pretrained, progress=False)
-        num_ftrs = self.densenet121.classifier.in_features
-        self.densenet121.classifier = nn.Linear(num_ftrs, config.dataset.n_classes)
-        self.projection = self.densenet121.classifier
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self.densenet121(x)
-        return x
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/architectures/cxr/resnet50.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/architectures/cxr/resnet50.py
@ -1,26 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-import torch
-import torchvision
-from torch import nn
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-
-
-class Network(nn.Module):
-    """
-    Resnet50 as implement in torchvision
-    """
-
-    def __init__(self, config: ConfigNode) -> None:
-        super().__init__()
-        self.resnet = torchvision.models.resnet50(pretrained=config.train.pretrained, progress=False)
-        num_ftrs = self.resnet.fc.in_features
-        self.resnet.fc = nn.Linear(num_ftrs, config.dataset.n_classes)
-        self.projection = self.resnet.fc
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self.resnet(x)
-        return x
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/architectures/ema.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/architectures/ema.py
@ -1,117 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-from typing import Dict
-
-import numpy as np
-import torch
-
-
-class EMA:
-    """Exponential moving average of model parameters.
-    Args:
-        model (torch.nn.Module): Model with parameters whose EMA will be kept.
-        decay (float): Decay rate for exponential moving average.
-        step_max (int): Maximum required number of steps to reach specified decay rate from zero.
-                        In the initial epochs, decay rate is kept small to keep the teacher model up-to-date.
-    """
-
-    def __init__(self, model: torch.nn.Module, decay: float = 0.99, step_max: int = 150) -> None:
-        self.decay_max = decay
-        self.step_max = step_max
-        self.step_count = int(0)
-        self.shadow = {}
-        self.original: Dict[str, torch.Tensor] = {}
-        self.model = model  # reference to the student model
-
-        # Register model parameters
-        for name, param in self.model.named_parameters():
-            if param.requires_grad:
-                self.shadow[name] = param.data.clone()
-
-        logging.info("Creating a teacher model (EMA)")
-
-    def update(self) -> None:
-        """
-        Receives a new set of parameter values and merges them with the previously stored ones.
-        """
-        self.step_count += int(1)
-        decay = self._get_decay_rate(self.step_count)
-        for name, param in self.model.named_parameters():
-            if param.requires_grad:
-                assert name in self.shadow
-                new_average = (1.0 - decay) * param.data + decay * self.shadow[name]
-                self.shadow[name] = new_average.clone()
-
-    def _get_decay_rate(self, step: int) -> float:
-        """
-        Return decay rate for current stored parameters
-        """
-        if step <= self.step_max:
-            ratio = step / self.step_max
-            return 0.5 * self.decay_max * (1 - np.cos(ratio * np.pi))
-        else:
-            return self.decay_max
-
-    @torch.no_grad()
-    def inference(self, inputs: torch.Tensor) -> torch.Tensor:
-        """
-        Runs an inference using the template of student model
-        """
-        is_train = self.model.training
-        self._assign()
-        self.model.eval()
-        self._switch_hooks(False)
-        outputs = self.model.forward(inputs).detach()
-        self._switch_hooks(True)
-        self._restore()
-        self.model.train() if is_train else self.model.eval()
-
-        return outputs
-
-    def _switch_hooks(self, bool_value: bool) -> None:
-        for layer in self.model.children():
-            if hasattr(layer, "use_hook"):
-                layer.use_hook = bool_value  # type: ignore
-
-    def _assign(self) -> None:
-        """Assign exponential moving average of parameter values to the
-        respective parameters.
-        Args:
-            model (torch.nn.Module): Model to assign parameter values.
-        """
-        for name, param in self.model.named_parameters():
-            if param.requires_grad:
-                assert name in self.shadow
-                self.original[name] = param.data.clone()
-                param.data = self.shadow[name]
-
-    def _restore(self) -> None:
-        """Restore original parameters to a model. That is, put back
-        the values that were in each parameter at the last call to `assign`.
-        Args:
-            model (torch.nn.Module): Model to assign parameter values.
-        """
-        for name, param in self.model.named_parameters():
-            if param.requires_grad:
-                assert name in self.shadow
-                param.data = self.original[name]
-
-    def save_model(self, save_path: str) -> None:
-        """
-        Saves EMA model parameters to a checkpoint file.
-        """
-        self._assign()
-        state = {'ema_model': self.model.state_dict(),
-                 'ema_params': self.shadow,
-                 'step_count': self.step_count}
-        torch.save(state, save_path)
-        self._restore()
-
-    def restore_from_checkpoint(self, path: str) -> None:
-        state = torch.load(path)
-        self.shadow = state['ema_params']
-        self.step_count = state['step_count']
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/architectures/lambda_layer.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/architectures/lambda_layer.py
@ -1,22 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Callable
-
-import torch
-import torch.nn as nn
-
-
-class Lambda(nn.Module):
-    """
-    Lambda torch nn module that can be used to modularise nn.functional methods.
-    """
-
-    def __init__(self, fn: Callable) -> None:
-        super(Lambda, self).__init__()
-        self.lambda_func = fn
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        return self.lambda_func(input)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/collect_embeddings.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/collect_embeddings.py
@ -1,84 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import List, Optional, Union
-
-import numpy as np
-import torch
-from InnerEyeDataQuality.deep_learning.self_supervised.ssl_classifier_module import PretrainedClassifier, SSLClassifier
-
-
-class CollectCNNEmbeddings:
-    """
-    This class takes care of registering a forward hook to get the embeddings for a given model.
-    """
-
-    def __init__(self, use_only_in_train: bool, store_input_tensor: bool) -> None:
-        """
-        :param use_only_in_train: If set to True, hooks are registered only for model forward passes in training mode.
-        :param store_input_tensor:
-        """
-        self.inputs: list = list()
-        self.layer: Optional[torch.nn.Module] = None
-        self.use_only_in_train = use_only_in_train
-        self.store_input_tensor = store_input_tensor
-
-    def __call__(self,
-                 module: torch.nn.Module,
-                 module_in: List[torch.Tensor],
-                 module_out: torch.Tensor) -> None:
-
-        # If model is in validation state and only training time collection is allowed, then exit.
-        if self.use_only_in_train and not module.training:
-            return
-        if module.use_hook:
-            _tensor = module_in[0] if self.store_input_tensor else module_out
-            self.inputs.append(_tensor.detach().cpu())
-
-    def return_embeddings(self, return_numpy: int = True) -> Union[np.ndarray, torch.Tensor, None]:
-        if len(self.inputs) == 0:
-            return None
-        embeddings = torch.cat(self.inputs, dim=0)
-        return embeddings.cpu().numpy() if return_numpy else embeddings
-
-    def reset(self) -> None:
-        self.inputs = list()
-
-
-def register_embeddings_collector(models: List[torch.nn.Module],
-                                  use_only_in_train: bool = False) -> List[CollectCNNEmbeddings]:
-    """
-    Takes a list of models and register a foward hook for each model
-    :param models: Torch module
-    :param use_only_in_train: If set to True, hooks are registered only for model forward passes in training mode.
-    """
-    assert(isinstance(use_only_in_train, bool))
-    all_model_cnn_embeddings = []
-    for model in models:
-        store_input_tensor = False if isinstance(model, SSLClassifier) else True
-        cnn_embeddings = CollectCNNEmbeddings(use_only_in_train, store_input_tensor)
-        if hasattr(model, "projection"):
-            cnn_embeddings.layer = model.projection  # type: ignore
-        elif hasattr(model, "resnet"):
-            cnn_embeddings.layer = model.resnet.fc  # type: ignore
-        elif isinstance(model, PretrainedClassifier):
-            cnn_embeddings.layer = model.classifier_head
-        else:
-            cnn_embeddings.layer = model.fc  # type: ignore
-        cnn_embeddings.layer.use_hook = True  # type: ignore
-        cnn_embeddings.layer.register_forward_hook(cnn_embeddings)  # type: ignore
-        all_model_cnn_embeddings.append(cnn_embeddings)
-    return all_model_cnn_embeddings
-
-
-def get_all_embeddings(embeddings_collectors: List[CollectCNNEmbeddings]) -> List[np.ndarray]:
-    """
-    Returns all embeddings from a list of embeddings collectors and resets the list
-    """
-    output = list()
-    for cnn_embeddings in embeddings_collectors:
-        output.append(cnn_embeddings.return_embeddings(return_numpy=True))
-        cnn_embeddings.reset()
-    return output
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/create_dataset_transforms.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/create_dataset_transforms.py
@ -1,83 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Any, Callable, List, Tuple
-
-import numpy as np
-import torchvision
-from torchvision.transforms import ToTensor
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.deep_learning.transforms import AddGaussianNoise, CenterCrop, ElasticTransform, \
-    ExpandChannels, RandomAffine, RandomColorJitter, RandomErasing, RandomGamma, \
-    RandomHorizontalFlip, RandomResizeCrop, Resize
-
-
-def _get_dataset_stats(
-        config: ConfigNode) -> Tuple[np.ndarray, np.ndarray]:
-    name = config.dataset.name
-    if name == 'CIFAR10':
-        mean = np.array([0.4914, 0.4822, 0.4465])
-        std = np.array([0.2470, 0.2435, 0.2616])
-    else:
-        raise ValueError()
-    return mean, std
-
-
-def create_transform(config: ConfigNode, is_train: bool) -> Callable:
-    if config.dataset.name in ["NoisyChestXray", "Kaggle"]:
-        return create_chest_xray_transform(config, is_train)
-    elif config.dataset.name in ["CIFAR10", "CIFAR10H", "CIFAR10IDN", "CIFAR10H_TRAIN_VAL", "CIFAR10SYM"]:
-        return create_cifar_transform(config, is_train)
-    else:
-        raise ValueError
-
-
-def create_cifar_transform(config: ConfigNode,
-                           is_train: bool) -> Callable:
-    transforms: List[Any] = list()
-    if is_train:
-        if config.augmentation.use_random_affine:
-            transforms.append(RandomAffine(config))
-        if config.augmentation.use_random_crop:
-            transforms.append(RandomResizeCrop(config))
-        if config.augmentation.use_random_horizontal_flip:
-            transforms.append(RandomHorizontalFlip(config))
-        if config.augmentation.use_random_color:
-            transforms.append(RandomColorJitter(config))
-    transforms += [ToTensor()]
-    return torchvision.transforms.Compose(transforms)
-
-
-def create_chest_xray_transform(config: ConfigNode,
-                                is_train: bool) -> Callable:
-    """
-    Defines the image transformations pipeline for Chest-Xray datasets.
-    """
-    transforms: List[Any] = []
-    if is_train:
-        if config.augmentation.use_random_affine:
-            transforms.append(RandomAffine(config))
-        if config.augmentation.use_random_crop:
-            transforms.append(RandomResizeCrop(config))
-        else:
-            transforms.append(Resize(config))
-        if config.augmentation.use_random_horizontal_flip:
-            transforms.append(RandomHorizontalFlip(config))
-        if config.augmentation.use_gamma_transform:
-            transforms.append(RandomGamma(config))
-        if config.augmentation.use_random_color:
-            transforms.append(RandomColorJitter(config))
-        if config.augmentation.use_elastic_transform:
-            transforms.append(ElasticTransform(config))
-        transforms += [CenterCrop(config), ToTensor()]
-        if config.augmentation.use_random_erasing:
-            transforms.append(RandomErasing(config))
-        if config.augmentation.add_gaussian_noise:
-            transforms.append(AddGaussianNoise(config))
-    else:
-        transforms += [Resize(config), CenterCrop(config), ToTensor()]
-    transforms.append(ExpandChannels())
-    return torchvision.transforms.Compose(transforms)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/dataloader.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/dataloader.py
@ -1,61 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Any
-
-import numpy as np
-import torch
-import yacs.config
-from torch.utils.data import DataLoader
-from torchvision.datasets.vision import VisionDataset
-
-
-def get_number_of_samples_per_epoch(dataloader: torch.utils.data.DataLoader) -> int:
-    """
-    Returns the expected number of samples for a single epoch
-    """
-    total_num_samples = len(dataloader.dataset)  # type: ignore
-    batch_size = dataloader.batch_size
-    drop_last = dataloader.drop_last
-    num_samples = int(total_num_samples / batch_size) * batch_size if drop_last else total_num_samples  # type:ignore
-    return num_samples
-
-
-def get_train_dataloader(train_dataset: VisionDataset,
-                         config: yacs.config.CfgNode,
-                         seed: int,
-                         **kwargs: Any) -> DataLoader:
-    if config.train.use_balanced_sampler:
-        counts = np.bincount(train_dataset.targets)
-        class_weights = counts.sum() / counts
-        sample_weights = class_weights[train_dataset.targets]
-        sample = torch.utils.data.WeightedRandomSampler(weights=sample_weights, num_samples=len(train_dataset))
-        kwargs.pop("shuffle", None)
-        kwargs.update({"sampler": sample})
-    return torch.utils.data.DataLoader(
-        train_dataset,
-        batch_size=config.train.batch_size,
-        num_workers=config.train.dataloader.num_workers,
-        pin_memory=config.train.dataloader.pin_memory,
-        worker_init_fn=WorkerInitFunc(seed),
-        **kwargs)
-
-
-class WorkerInitFunc:
-    def __init__(self, seed: int) -> None:
-        self.seed = seed
-
-    def __call__(self, worker_id: int) -> None:
-        return np.random.seed(self.seed + worker_id)
-
-
-def get_val_dataloader(val_dataset: VisionDataset, config: yacs.config.CfgNode, seed: int) -> DataLoader:
-    return torch.utils.data.DataLoader(
-        val_dataset,
-        batch_size=config.validation.batch_size,
-        shuffle=False,
-        num_workers=config.validation.dataloader.num_workers,
-        pin_memory=config.validation.dataloader.pin_memory,
-        worker_init_fn=WorkerInitFunc(seed))
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/graph/classifier.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/graph/classifier.py
@ -1,111 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-from typing import Tuple
-
-import numpy as np
-import scipy
-import torch
-from sklearn.neighbors import kneighbors_graph
-
-from InnerEyeDataQuality.algorithms.graph import GraphParameters, build_connectivity_graph, label_diffusion
-from InnerEyeDataQuality.utils.generic import convert_labels_to_one_hot, find_set_difference_torch
-
-
-class GraphClassifier:
-    """
-    Graph based classifier. Builds a graph and runs label diffusion to classify new points.
-    """
-
-    def __init__(self,
-                 num_samples: int,
-                 num_classes: int,
-                 labels: np.ndarray,
-                 device: torch.device) -> None:
-        self.graph = None
-        self.device = device
-        self.num_samples = num_samples
-        self.num_classes = num_classes
-        self.graph_params = GraphParameters(n_neighbors=12,
-                                            diffusion_alpha=0.95,
-                                            cg_solver_max_iter=10,
-                                            diffusion_batch_size=None,
-                                            distance_kernel="cosine")
-
-        # Convert to one-hot label distribution
-        self.labels = np.array(labels) if isinstance(labels, list) else labels
-        self.one_hot_labels = convert_labels_to_one_hot(self.labels, n_classes=num_classes)
-        assert np.all(self.one_hot_labels.sum(axis=1) == 1.0)
-        assert self.labels.shape[0] == num_samples
-
-    def build_graph(self, embeddings: np.ndarray) -> None:
-        logging.info("Building a new connectivity graph")
-        assert embeddings.shape[0] == self.num_samples
-
-        # Build a connectivity graph and k-nearest neighbours.
-        n_neighbors = self.graph_params.n_neighbors
-        self.knn = kneighbors_graph(embeddings, n_neighbors, metric=self.graph_params.distance_kernel, n_jobs=-1)
-        self.graph = build_connectivity_graph(normalised=True,
-                                              embeddings=embeddings,
-                                              n_neighbors=n_neighbors,
-                                              distance_kernel=self.graph_params.distance_kernel)
-        laplacian = scipy.sparse.eye(self.num_samples) - self.graph_params.diffusion_alpha * self.graph  # type: ignore
-        self.laplacian_inv = scipy.sparse.linalg.inv(laplacian.tocsc()).todense()
-
-    def fit(self, query_batch_ids: np.ndarray) -> np.ndarray:
-        """
-        Run label diffusion and identify potential labels for query samples
-        """
-        diffused_labels = label_diffusion(inv_laplacian=self.laplacian_inv,
-                                          labels=self.one_hot_labels,
-                                          query_batch_ids=query_batch_ids,
-                                          diffusion_normalizing_factor=0.01)
-        assert np.all(diffused_labels.shape == (query_batch_ids.size, self.num_classes))
-        assert not np.isnan(diffused_labels).any()
-        return diffused_labels
-
-    def filter_cases(self,
-                     local_ind_keep: torch.Tensor,
-                     local_ind_exclude: torch.Tensor,
-                     global_ind: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        Filter list of cases to drop based on diffused labels. If labels and diffused labels agree, do not
-        exclude the sample.
-        :param local_ind_keep: original list of samples to keep
-        :param local_ind_exclude: original list of samples to drop
-        :param global_ind: list of global indices
-        :return: updated list of indices to keep and drop
-        """
-        # If input indices are empty return an empty tensor
-        if torch.numel(local_ind_exclude) == 0:
-            return local_ind_keep, local_ind_exclude
-
-        # Check input variable consistency
-        num_samples = torch.numel(global_ind)
-        assert num_samples == torch.numel(local_ind_exclude) + torch.numel(local_ind_keep)
-        global_ind = global_ind.to(local_ind_keep.device)
-        all_local_ind = torch.tensor(range(num_samples), device=local_ind_keep.device, dtype=local_ind_keep.dtype)
-
-        # Run graph diffusion to filter out incorrectly picked indices.
-        global_ind_exclude = global_ind[local_ind_exclude].cpu().numpy()
-        diffused_probs = self.fit(global_ind_exclude)
-        graph_pred = np.argmax(diffused_probs, axis=1)
-        initial_labels = self.labels[global_ind_exclude]
-
-        # Update the local indices for exclude
-        local_ind_exclude_updated = local_ind_exclude[graph_pred != initial_labels]
-        local_ind_keep_updated = find_set_difference_torch(all_local_ind, local_ind_exclude_updated)
-
-        return local_ind_keep_updated, local_ind_exclude_updated
-
-    def compute_mingling_index(self, indices: np.ndarray) -> None:
-        """
-        Computes mingling index of each graph node based on label distribution in local graphs.
-        """
-        mingling = np.zeros(indices.shape, dtype=np.float)
-        for loop_id, _ind in enumerate(indices):
-            disagreement = self.labels[self.knn[_ind].indices] != self.labels[_ind]
-            mingling[loop_id] = np.sum(disagreement) / float(disagreement.size)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/loss.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/loss.py
@ -1,75 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Union
-
-import yacs
-import torch
-from torch import Tensor as T
-import torch.nn.functional as F
-
-
-def tanh_loss(logits: T) -> T:
-    """
-    Penalises sparse logits with large values
-    "Bootstrap Your Own Latent A New Approach to Self-Supervised Learning" Supplementary C.1
-    """
-    alpha = 10
-    tclip = alpha * torch.tanh(logits / alpha)
-    return torch.mean(torch.pow(tclip, 2.0))
-
-# Computes consistency loss between unlabelled or excluded points
-def consistency_loss(logits_source: T, logits_target: T) -> Union[T, float]:
-    """
-    Class probability consistency loss based on total variation. 
-    """
-    if (logits_source.numel() > 0) & (logits_source.shape == logits_target.shape):
-        _prob_source = torch.softmax(logits_source, dim=-1)
-        _prob_target = torch.softmax(logits_target, dim=-1)
-        loss = torch.mean(torch.norm(_prob_source - _prob_target.detach(), p=2, dim=-1))
-        return loss
-    else:
-        return 0.0
-
-def early_regularisation_loss(student_logits: torch.Tensor, ema_logits: torch.Tensor) -> torch.Tensor:
-    """
-    Implements early regularisation loss term proposed in:
-    https://arxiv.org/abs/2007.00151
-    """
-    posteriors_teacher = torch.softmax(ema_logits, dim=-1).detach()
-    posteriors_student = torch.softmax(student_logits, dim=-1)
-    inner_prod = torch.sum(posteriors_teacher * posteriors_student, dim=-1)
-    early_regularisation = torch.mean(torch.log(1 - inner_prod))
-
-    return early_regularisation
-
-def onehot_encoding(label: torch.Tensor, n_classes: int) -> torch.Tensor:
-    return torch.zeros(label.size(0), n_classes).to(label.device).scatter_(1, label.view(-1, 1), 1)
-
-class CrossEntropyLoss:
-    """
-    Cross entropy loss - implements label smoothing 
-    """
-
-    def __init__(self, config: yacs.config.CfgNode):
-        self.n_classes = config.dataset.n_classes
-        self.use_label_smoothing = config.augmentation.use_label_smoothing
-        self.epsilon = config.augmentation.label_smoothing.epsilon
-
-    def __call__(self, predictions: torch.Tensor, targets: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
-        if self.use_label_smoothing:
-            device = predictions.device
-            onehot = onehot_encoding(targets, self.n_classes).type_as(predictions).to(device)
-            targets = onehot * (1 - self.epsilon) + torch.ones_like(onehot).to(device) * self.epsilon / self.n_classes
-            logp = F.log_softmax(predictions, dim=1)
-            loss_per_sample = torch.sum(-logp * targets, dim=1)
-            if reduction == 'none':
-                return loss_per_sample
-            elif reduction == 'mean':
-                return loss_per_sample.mean()
-            else:
-                raise NotImplementedError
-        else:
-            return F.cross_entropy(predictions, targets, reduction=reduction)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/init.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/init.py
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/joint_metrics.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/joint_metrics.py
@ -1,175 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from dataclasses import dataclass
-from typing import Any, Optional
-
-import matplotlib.pyplot as plt
-import numpy as np
-from InnerEyeDataQuality.datasets.cifar10_utils import get_cifar10_label_names
-from InnerEyeDataQuality.deep_learning.metrics.sample_metrics import SampleMetrics
-from InnerEyeDataQuality.deep_learning.metrics.plots_tensorboard import (get_scatter_plot, plot_disagreement_per_sample,
-                                                                 plot_excluded_cases_coteaching)
-from InnerEyeDataQuality.deep_learning.transforms import ToNumpy
-from torch.utils.tensorboard import SummaryWriter
-from torchvision.datasets import CIFAR10
-
-
-@dataclass()
-class JointMetrics():
-    """
-    Stores metrics for co-teaching models.
-    """
-    num_samples: int
-    num_epochs: int
-    dataset: Optional[Any] = None
-    ambiguous_mislabelled_ids: np.ndarray = None
-    clear_mislabelled_ids: np.ndarray = None
-    true_label_entropy: np.ndarray = None
-    plot_dropped_images: bool = False
-
-    def reset(self) -> None:
-        self.kl_divergence_symmetric = np.full([self.num_samples], np.nan)
-        self.active = False
-
-    def __post_init__(self) -> None:
-        self.reset()
-        self.prediction_disagreement = np.zeros([self.num_samples, self.num_epochs], dtype=np.bool)
-        self._initialise_dataset_properties()
-        self.case_drop_histogram = np.zeros([self.num_samples, self.num_epochs + 1], dtype=np.bool)
-        if not isinstance(self.clear_mislabelled_ids, np.ndarray) or not isinstance(self.ambiguous_mislabelled_ids,
-                                                                                    np.ndarray):
-            return
-
-        self.true_mislabelled_ids = np.concatenate([self.ambiguous_mislabelled_ids, self.clear_mislabelled_ids], axis=0)
-        self.case_drop_histogram[self.clear_mislabelled_ids, -1] = True
-        self.case_drop_histogram[self.ambiguous_mislabelled_ids, -1] = True
-
-    def _initialise_dataset_properties(self) -> None:
-        if self.dataset is not None:
-            self.label_names = get_cifar10_label_names() if isinstance(self.dataset, CIFAR10) \
-                else self.dataset.get_label_names()
-            self.dataset.transform = ToNumpy()  # type: ignore
-
-    def log_results(self, writer: SummaryWriter, epoch: int, sample_metrics: SampleMetrics) -> None:
-        if (not self.active) or (self.ambiguous_mislabelled_ids is None) or (self.clear_mislabelled_ids is None):
-            return
-
-        # KL Divergence between the two posteriors
-        writer.add_scalars(main_tag='symmetric-kl-divergence', tag_scalar_dict={
-            'all': np.nanmean(self.kl_divergence_symmetric),
-            'ambiguous': np.nanmean(self.kl_divergence_symmetric[self.ambiguous_mislabelled_ids]),
-            'clear_noise': np.nanmean(self.kl_divergence_symmetric[self.clear_mislabelled_ids])},
-                           global_step=epoch)
-
-        # Disagreement rate between the models
-        writer.add_scalars(main_tag='disagreement_rate', tag_scalar_dict={
-            'all': np.nanmean(self.prediction_disagreement[:, epoch]),
-            'ambiguous': np.nanmean(self.prediction_disagreement[self.ambiguous_mislabelled_ids, epoch]),
-            'clear_noise': np.nanmean(self.prediction_disagreement[self.clear_mislabelled_ids, epoch])},
-                           global_step=epoch)
-
-        # Add histogram for the loss values
-        self.log_loss_values(writer, sample_metrics.loss_per_sample[:, epoch], epoch)
-
-        # Add disagreement metrics
-        fig = get_scatter_plot(self.true_label_entropy, self.kl_divergence_symmetric,
-                               x_label="Label entropy", y_label="Symmetric-KL", y_lim=[0.0, 2.0])
-        writer.add_figure('Sym-KL vs Label Entropy', figure=fig, global_step=epoch, close=True)
-
-        fig = plot_disagreement_per_sample(self.prediction_disagreement, self.true_label_entropy)
-        writer.add_figure('Disagreement of prediction', figure=fig, global_step=epoch, close=True)
-
-        # Excluded cases diagnostics
-        self.log_dropped_cases_metrics(writer=writer, epoch=epoch)
-
-        # Every 10 epochs, display the dropped cases in the co-teaching algorithm
-        if epoch % 10 and self.plot_dropped_images:
-            self.log_dropped_images(writer=writer, predictions=sample_metrics.predictions, epoch=epoch)
-
-        # Close all figures
-        plt.close('all')
-
-    def log_dropped_cases_metrics(self, writer: SummaryWriter, epoch: int) -> None:
-        """
-        Creates all diagnostics for dropped cases analysis.
-        """
-        entropy_sorted_indices = np.argsort(self.true_label_entropy)
-        drop_cur_epoch_mask = self.case_drop_histogram[:, epoch]
-        drop_cur_epoch_ids = np.where(drop_cur_epoch_mask)[0]
-        is_sample_dropped = np.any(drop_cur_epoch_mask)
-        title = None
-        if is_sample_dropped:
-            n_dropped = float(drop_cur_epoch_ids.size)
-            average_label_entropy_dropped_cases = np.mean(self.true_label_entropy[drop_cur_epoch_mask])
-            n_detected_mislabelled = np.intersect1d(drop_cur_epoch_ids, self.true_mislabelled_ids).size
-            n_clean_dropped = int(n_dropped - n_detected_mislabelled)
-            n_detected_mislabelled_ambiguous = np.intersect1d(drop_cur_epoch_ids, self.ambiguous_mislabelled_ids).size
-            n_detected_mislabelled_clear = np.intersect1d(drop_cur_epoch_ids, self.clear_mislabelled_ids).size
-            perc_detected_mislabelled = n_detected_mislabelled / n_dropped * 100
-            perc_detected_clear_mislabelled = n_detected_mislabelled_clear / n_dropped * 100
-            perc_detected_ambiguous_mislabelled = n_detected_mislabelled_ambiguous / n_dropped * 100
-            title = f"Dropped Cases: Avg label entropy {average_label_entropy_dropped_cases:.3f}\n " \
-                    f"Dropped cases: {n_detected_mislabelled} mislabelled ({perc_detected_mislabelled:.1f}%) - " \
-                    f"{n_clean_dropped} clean ({(100 - perc_detected_mislabelled):.1f}%)\n" \
-                    f"Num ambiguous mislabelled among detected cases: {n_detected_mislabelled_ambiguous}" \
-                    f" ({perc_detected_ambiguous_mislabelled:.1f}%)\n" \
-                    f"Num clear mislabelled among detected cases: {n_detected_mislabelled_clear}" \
-                    f" ({perc_detected_clear_mislabelled:.1f}%)"
-            writer.add_scalars(main_tag='Number of dropped cases', tag_scalar_dict={
-                'clean_cases': n_clean_dropped,
-                'all_mislabelled_cases': n_detected_mislabelled,
-                'mislabelled_clear_cases': n_detected_mislabelled_clear,
-                'mislabelled_ambiguous_cases': n_detected_mislabelled_ambiguous}, global_step=epoch)
-            writer.add_scalar(tag="Percentage of mislabelled among dropped cases",
-                              scalar_value=perc_detected_mislabelled, global_step=epoch)
-        fig = plot_excluded_cases_coteaching(case_drop_mask=self.case_drop_histogram,
-                                             entropy_sorted_indices=entropy_sorted_indices, title=title,
-                                             num_epochs=self.num_epochs, num_samples=self.num_samples)
-        writer.add_figure('Histogram of excluded cases', figure=fig, global_step=epoch, close=True)
-
-    def log_loss_values(self, writer: SummaryWriter, loss_values: np.ndarray, epoch: int) -> None:
-        """
-        Logs histogram of loss values of one of the co-teaching models.
-        """
-        writer.add_histogram('loss/all', loss_values, epoch)
-        writer.add_histogram('loss/ambiguous_noise', loss_values[self.ambiguous_mislabelled_ids], epoch)
-        writer.add_histogram('loss/clear_noise', loss_values[self.clear_mislabelled_ids], epoch)
-
-    def log_dropped_images(self, writer: SummaryWriter, predictions: np.ndarray, epoch: int) -> None:
-        """
-        Logs images dropped during co-teaching training
-        """
-        dropped_cases = np.where(self.case_drop_histogram[:, epoch])[0]
-        if dropped_cases.size > 0 and self.dataset is not None:
-            dropped_cases = dropped_cases[np.argsort(self.true_label_entropy[dropped_cases])]
-            fig = self.plot_batch_images_and_labels(predictions, list_indices=dropped_cases[:64])
-            writer.add_figure("Dropped images with lowest entropy", figure=fig, global_step=epoch, close=True)
-            fig = self.plot_batch_images_and_labels(predictions, list_indices=dropped_cases[-64:])
-            writer.add_figure("Dropped images with highest entropy", figure=fig, global_step=epoch, close=True)
-
-            kept_cases = np.where(~self.case_drop_histogram[:, epoch])[0]
-            kept_cases = kept_cases[np.argsort(self.true_label_entropy[kept_cases])]
-            fig = self.plot_batch_images_and_labels(predictions, kept_cases[-64:])
-            writer.add_figure("Kept images with highest entropy", figure=fig, global_step=epoch, close=True)
-
-    def plot_batch_images_and_labels(self, predictions: np.ndarray, list_indices: np.ndarray) -> plt.Figure:
-        """
-        Plots of batch of images along with their labels and predictions. Noise cases are colored in red, clean cases
-        in green. Images are assumed to be numpy images (use ToNumpy() transform).
-        """
-        assert self.dataset is not None
-        fig, ax = plt.subplots(8, 8, figsize=(8, 10))
-        ax = ax.ravel()
-        for i, index in enumerate(list_indices):
-            predicted = int(predictions[index])
-            color = "red" if index in self.true_mislabelled_ids else "green"
-            _, img, training_label = self.dataset.__getitem__(index)
-            ax[i].imshow(img)
-            ax[i].set_axis_off()
-            ax[i].set_title(f"Label: {self.label_names[training_label]}\nPred: {self.label_names[predicted]}",
-                            color=color, fontsize="x-small")
-        return fig
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/plots_tensorboard.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/plots_tensorboard.py
@ -1,87 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Optional, List, Tuple, Union
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib as mpl
-
-mpl.use('Agg')  # No display
-
-
-def get_scatter_plot(x_data: np.ndarray, y_data: np.ndarray, scale: np.ndarray = None,
-                     title: str = '', x_label: str = '', y_label: str = '',
-                     y_lim: Optional[List[float]] = None) -> plt.Figure:
-    fig, ax = plt.subplots()
-    ax.scatter(x_data, y_data, alpha=0.3, s=scale)
-    ax.set_xlabel(x_label)
-    ax.set_ylabel(y_label)
-    ax.set_title(title)
-    ax.grid()
-    if y_lim:
-        ax.set_ylim(y_lim)
-
-    return fig
-
-
-def get_histogram_plot(data: Union[List[np.ndarray], np.ndarray], num_bins: int, title: str = '',
-                       x_label: str = '', x_lim: Tuple[float, float] = None) -> plt.Figure:
-    """
-    Creates a histogram plot for a given set of numpy arrays specified in `data` object.
-    Return the generated figure object.
-    """
-
-    fig, ax = plt.subplots()
-    cm = plt.cm.get_cmap('Pastel1')
-    if isinstance(data, list):
-        for _d_id, _d in enumerate(data):
-            ax.hist(_d, density=True, bins=num_bins, color=cm(_d_id), alpha=0.6)
-    else:
-        ax.hist(data, density=True, bins=num_bins)
-    ax.set_ylabel('Sample density')
-    ax.set_xlabel(x_label)
-    ax.set_title(title)
-    ax.grid()
-
-    if x_lim:
-        ax.set_xlim(x_lim)
-
-    return fig
-
-
-def plot_excluded_cases_coteaching(case_drop_mask: np.ndarray,
-                                   entropy_sorted_indices: np.ndarray,
-                                   num_epochs: int,
-                                   num_samples: int,
-                                   title: Optional[str] = None) -> plt.Figure:
-    """
-    Plots the excluded cases in co-teaching training - training epochs vs sample_ids
-    Samples are sorted based on their true ambiguity score.
-    """
-    fig, ax = plt.subplots(figsize=(15, 10))
-    ax.imshow(case_drop_mask[entropy_sorted_indices, :].astype(np.uint8) * 255, cmap="gray",
-              extent=[0, num_epochs, 0, num_samples], vmin=0, vmax=10, aspect='auto')
-    ax.set_xlabel("Number of epochs")
-    ax.set_ylabel("Training sample ids (ordered by true entropy)")
-    if title:
-        ax.set_title(title)
-    return fig
-
-
-def plot_disagreement_per_sample(prediction_disagreement: np.ndarray, true_label_entropy: np.ndarray) -> plt.Figure:
-    """
-    Plots predicted class disagreement between two models - training epochs vs sample_ids
-    Samples are sorted based on their true ambiguity score.
-    """
-    entropy_sorted_indices = np.argsort(true_label_entropy)
-    fig, ax = plt.subplots()
-    num_epochs = prediction_disagreement.shape[1]
-    num_samples = prediction_disagreement.shape[0]
-    ax.imshow(prediction_disagreement[entropy_sorted_indices, :].astype(np.uint8) * 2, cmap="gray",
-              extent=[0, num_epochs, 0, num_samples], vmin=0, vmax=1, aspect='auto')
-    ax.set_xlabel("Number of epochs")
-    ax.set_ylabel("Training sample ids (ordered by true entropy)")
-    return fig
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/sample_metrics.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/sample_metrics.py
@ -1,170 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-from dataclasses import dataclass
-from typing import Optional, List
-
-import numpy as np
-import torch
-from sklearn.metrics import auc, f1_score, precision_recall_curve, roc_auc_score
-from torch.utils.tensorboard import SummaryWriter
-
-@dataclass
-class SampleMetrics():
-    """
-    Stores data required for training monitoring of individual model and
-    post-training analysis in sample selection
-    """
-    name: str
-    num_epochs: int
-    num_samples: int
-    num_classes: int
-    clear_labels: np.ndarray = None
-    ambiguous_mislabelled_ids: np.ndarray = None
-    clear_mislabelled_ids: np.ndarray = None
-    embeddings_size: Optional[int] = None
-    true_label_entropy: Optional[np.ndarray] = None
-
-    def __post_init__(self) -> None:
-        self.reset()
-        self.loss_per_sample = np.full([self.num_samples, self.num_epochs], np.nan)
-        self.logits_per_sample = np.full([self.num_samples, self.num_classes, self.num_epochs], np.nan)
-
-        if self.clear_mislabelled_ids is not None:
-            full_set = set(range(self.num_samples))
-            if self.ambiguous_mislabelled_ids is not None:
-                self.all_mislabelled_ids = np.concatenate(
-                    [self.ambiguous_mislabelled_ids, self.clear_mislabelled_ids], 0)
-            else:
-                self.all_mislabelled_ids = self.clear_mislabelled_ids
-            self.all_clean_label_ids = np.array(list(full_set.difference(self.all_mislabelled_ids)))
-
-    def reset(self) -> None:
-        self.loss_optimised: List[float] = list()
-        self.predictions = np.full([self.num_samples], np.nan)
-        self.labels = np.full([self.num_samples], np.nan)
-        self.probabilities = np.full([self.num_samples, self.num_classes], np.nan)
-        self.correct_predictions_teacher = np.full(self.num_samples, np.nan)
-        self.correct_predictions = np.full([self.num_samples], np.nan)
-        self.embeddings_per_sample = np.full([self.num_samples, self.embeddings_size],
-                                             np.nan) if self.embeddings_size is not None else None
-
-    def get_average_accuracy(self) -> np.ndarray:
-        return np.nanmean(self.correct_predictions)
-
-    def get_average_loss(self) -> np.ndarray:
-        return np.mean(self.loss_optimised)
-
-    def log_results(self, epoch: int, name: str, writer: Optional[SummaryWriter] = None) -> None:
-        mean_loss = self.get_average_loss()
-        accuracy = self.get_average_accuracy()
-        accuracy_on_clean = np.nanmean(self.predictions == self.clear_labels)
-        logging.info(f'{self.name} \t accuracy: {accuracy:.2f} \t loss: {mean_loss:.2e}')
-        if writer is None:
-            return
-        # Store accuracy and loss metrics
-        writer.add_scalar(tag='loss', scalar_value=mean_loss, global_step=epoch)
-        writer.add_scalar(tag='accuracy/acc_on_sampled_labels', scalar_value=accuracy, global_step=epoch)
-        writer.add_scalar(tag='accuracy/acc_on_clean_labels', scalar_value=accuracy_on_clean, global_step=epoch)
-
-        # Binary classification case
-        if self.num_classes == 2:
-            logits = self.logits_per_sample[:, :, epoch]
-            labels = self.labels
-            predictions = np.argmax(logits, axis=-1)
-            available_indices = np.where(~np.isnan(self.labels))[0]
-            roc_auc = roc_auc_score(labels[available_indices], logits[available_indices, 1].reshape(-1))
-            f1 = f1_score(labels[available_indices], predictions[available_indices])
-            precision, recall, _ = precision_recall_curve(labels[available_indices], logits[available_indices, 1].reshape(-1))
-            pr_auc = auc(recall, precision)
-            writer.add_scalar(tag='roc_auc', scalar_value=roc_auc, global_step=epoch)
-            writer.add_scalar(tag='f1_score', scalar_value=f1, global_step=epoch)
-            writer.add_scalar(tag='pr_auc', scalar_value=pr_auc, global_step=epoch)
-            logging.info(f'{name} \t roc_auc: {roc_auc: .2f} \t pr_auc: {pr_auc: .2f} \t f1_score: {f1: .2f}')
-            if self.clear_mislabelled_ids is not None:
-                get_sub_f1 = lambda ind: f1_score(labels[ind], predictions[ind]) if len(ind) > 0 else 0
-                clean_available = np.intersect1d(self.all_clean_label_ids, available_indices)
-                mislabelled_available = np.intersect1d(self.all_mislabelled_ids, available_indices)
-                scalar_dict = {
-                    'clean_cases': get_sub_f1(clean_available),
-                    'all_mislabelled_cases': get_sub_f1(mislabelled_available)}
-                writer.add_scalars(main_tag='f1_breakdown', tag_scalar_dict=scalar_dict, global_step=epoch)
-                get_sub_auc = lambda ind: roc_auc_score(labels[ind], predictions[ind]) if len(ind) > 0 else 0
-                scalar_dict = {
-                    'clean_cases': get_sub_auc(clean_available),
-                    'all_mislabelled_cases': get_sub_auc(mislabelled_available)}
-                writer.add_scalars(main_tag='auc_breakdown', tag_scalar_dict=scalar_dict, global_step=epoch)
-
-        # Add histogram for the loss values
-        self.log_loss_values(writer, self.loss_per_sample[:, epoch], epoch)
-
-        # Breakdown of the accuracy on different sample types
-        if self.clear_mislabelled_ids is not None:
-            get_sub_acc = lambda ind: np.nanmean(self.correct_predictions[ind])
-            scalar_dict = {
-                'clean_cases': get_sub_acc(self.all_clean_label_ids),
-                'all_mislabelled_cases': get_sub_acc(self.all_mislabelled_ids),
-                'mislabelled_clear_cases': get_sub_acc(self.clear_mislabelled_ids)}
-            if self.ambiguous_mislabelled_ids is not None:
-                scalar_dict.update({'mislabelled_ambiguous_cases': get_sub_acc(self.ambiguous_mislabelled_ids)})
-            writer.add_scalars(main_tag='accuracy_breakdown', tag_scalar_dict=scalar_dict, global_step=epoch)
-
-        # Log mean teacher's accuracy
-        if not np.isnan(self.correct_predictions_teacher).any():
-            writer.add_scalar("teacher_accuracy", np.nanmean(self.correct_predictions_teacher), epoch)
-
-    def get_margin(self, epoch: int) -> np.ndarray:
-        """
-        Get the margin for each sample defined as logits(y) - max_{y != t}[logits(t)]
-        """
-        margin = np.full(self.num_samples, np.nan)
-        logits = self.logits_per_sample[:, :, epoch]
-        for i in range(self.num_samples):
-            label = int(self.labels[i])
-            assigned_logit = logits[i, label]
-            order = np.argsort(logits[i])
-            order = order[order != label]
-            other_max_logits = logits[i, order[-1]]
-            margin[i] = assigned_logit - other_max_logits
-        return margin
-
-    def log_loss_values(self, writer: SummaryWriter, loss_values: np.ndarray, epoch: int) -> None:
-        """
-        Logs histogram of loss values of one of the co-teaching models.
-        """
-        writer.add_histogram('loss/all', loss_values, epoch)
-
-    def append_batch(
-            self,
-            epoch: int,
-            logits: torch.Tensor,
-            labels: torch.Tensor,
-            loss: float,
-            indices: list,
-            per_sample_loss: torch.Tensor,
-            embeddings: Optional[np.ndarray] = None,
-            teacher_logits: Optional[torch.Tensor] = None) -> None:
-        """
-        Append stats collected from batch of samples to metrics
-        """
-        if teacher_logits is not None:
-            self.correct_predictions_teacher[indices] = torch.eq(torch.argmax(teacher_logits, dim=-1),
-                                                                                labels).type(torch.float32).cpu()
-        self.correct_predictions[indices] = torch.eq(torch.argmax(logits, dim=-1), labels).type(torch.float32).cpu()
-        self.predictions[indices] = torch.argmax(logits, dim=-1).cpu()
-        self.loss_per_sample[indices, epoch] = per_sample_loss.cpu()
-        self.logits_per_sample[indices, :, epoch] = logits.cpu()
-        self.probabilities[indices, :] = torch.softmax(logits, dim=-1).cpu()
-        self.labels[indices] = labels.cpu()
-        self.loss_optimised.append(loss)
-
-        if embeddings is not None:
-            # We don't know the size of the features in advance.
-            if self.embeddings_size is None:
-                self.embeddings_size = embeddings.shape[1]
-                self.embeddings_per_sample = np.full([self.num_samples, self.embeddings_size], np.nan)
-            assert self.embeddings_per_sample is not None
-            self.embeddings_per_sample[indices, :] = embeddings
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/tracker.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/metrics/tracker.py
@ -1,92 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import os
-from pathlib import Path
-from typing import Any, Optional
-
-import numpy as np
-import torch
-from InnerEyeDataQuality.deep_learning.metrics.joint_metrics import JointMetrics
-from InnerEyeDataQuality.deep_learning.metrics.sample_metrics import SampleMetrics
-from torch.utils.data import Dataset
-from torch.utils.tensorboard import SummaryWriter
-
-
-class MetricTracker(object):
-    """
-    """
-
-    def __init__(self,
-                 output_dir: str,
-                 num_epochs: int,
-                 num_samples_total: int,
-                 num_samples_per_epoch: int,
-                 num_classes: int,
-                 save_tf_events: bool,
-                 dataset: Optional[Dataset] = None,
-                 name: str = "default_metric",
-                 **sample_info_kwargs: Any):
-        """
-        Class to track model training metrics.
-        If a co-teaching model is trained, joint model metrics are stored such as disagreement rate and kl divergence.
-        Similarly, it stores loss and logits values on a per sample basis for each epoch for post-training analysis.
-        This stored data can be utilised in data selection simulation.
-        """
-        Path(output_dir).mkdir(parents=True, exist_ok=True)
-
-        self.output_dir = output_dir
-        self.name = name
-        self.num_classes = num_classes
-        self.num_samples_total = num_samples_total
-        self.num_samples_per_epoch = num_samples_per_epoch
-        clean_targets = dataset.clean_targets if hasattr(dataset, "clean_targets") else None  # type: ignore
-
-        self.joint_model_metrics = JointMetrics(num_samples_total, num_epochs, dataset, **sample_info_kwargs)
-        self.sample_metrics = SampleMetrics(name, num_epochs, num_samples_total, num_classes,
-                                            clear_labels=clean_targets,  
-                                            embeddings_size=None, **sample_info_kwargs)
-        self.writer = SummaryWriter(log_dir=output_dir) if save_tf_events else None
-
-    def reset(self) -> None:
-        self.sample_metrics.reset()
-        self.joint_model_metrics.reset()
-
-    def log_epoch_and_reset(self, epoch: int) -> None:
-        # assert np.count_nonzero(~np.isnan(self.sample_metrics.loss_per_sample[:, epoch])) == self.num_samples_per_epoch
-        self.sample_metrics.log_results(epoch=epoch, name=self.name, writer=self.writer)
-        if self.writer:
-            self.joint_model_metrics.log_results(self.writer, epoch, self.sample_metrics)
-        # Reset epoch metrics
-        self.reset() 
-
-    def append_batch_aggregate(self, epoch: int, logits_x: torch.Tensor, logits_y: torch.Tensor,
-                               dropped_cases: torch.Tensor, indices: torch.Tensor) -> None:
-        """
-        Stores the disagreement stats for co-teaching models
-        """
-        post_x = torch.softmax(logits_x, dim=-1)
-        post_y = torch.softmax(logits_y, dim=-1)
-        sym_kl_per_sample = torch.sum(post_x * torch.log(post_x / post_y) + post_y * torch.log(post_y / post_x), dim=-1)
-
-        pred_x = torch.argmax(logits_x, dim=-1)
-        pred_y = torch.argmax(logits_y, dim=-1)
-        class_pred_disagreement = pred_x != pred_y
-        self.joint_model_metrics.kl_divergence_symmetric[indices] = sym_kl_per_sample.cpu().numpy()
-        self.joint_model_metrics.prediction_disagreement[indices, epoch] = class_pred_disagreement.cpu().numpy()
-        self.joint_model_metrics.case_drop_histogram[dropped_cases.cpu().numpy(), epoch] = True
-        self.joint_model_metrics.active = True
-
-    def save_loss(self) -> None:
-        output_path = os.path.join(self.output_dir, f'{self.name}_training_stats.npz')
-        if hasattr(self.joint_model_metrics, "case_drop_histogram"):
-            np.savez(output_path,
-                     loss_per_sample=self.sample_metrics.loss_per_sample,
-                     logits_per_sample=self.sample_metrics.logits_per_sample,
-                     dropped_cases=self.joint_model_metrics.case_drop_histogram[:, :-1])
-        else:
-            np.savez(output_path,
-                     loss_per_sample=self.sample_metrics.loss_per_sample,
-                     logits_per_sample=self.sample_metrics.logits_per_sample)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/model_inference.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/model_inference.py
@ -1,69 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Any, List, Tuple
-
-import numpy as np
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.deep_learning.collect_embeddings import get_all_embeddings, register_embeddings_collector
-from InnerEyeDataQuality.deep_learning.utils import get_run_config
-from InnerEyeDataQuality.deep_learning.trainers.vanilla_trainer import VanillaTrainer
-from InnerEyeDataQuality.deep_learning.trainers.model_trainer_base import ModelTrainer
-from InnerEyeDataQuality.deep_learning.trainers.co_teaching_trainer import CoTeachingTrainer
-from InnerEyeDataQuality.deep_learning.dataloader import get_val_dataloader
-from InnerEyeDataQuality.utils.custom_types import SelectorTypes as ST
-
-NUM_MULTIPLE_RUNS = 10
-
-def inference_model(dataloader: Any, model_trainer: ModelTrainer, use_mc_sampling: bool = False) -> Tuple[List, List]:
-    """
-    Performs an inference pass on a single model
-    :param config:
-    :return:
-    """
-    # Inference on given dataloader
-    all_model_cnn_embeddings = register_embeddings_collector(model_trainer.models, use_only_in_train=False)
-    trackers = model_trainer.run_inference(dataloader, use_mc_sampling)
-    embs = get_all_embeddings(all_model_cnn_embeddings)
-    probs = [metric_tracker.sample_metrics.probabilities for metric_tracker in trackers]
-    return embs, probs
-
-
-def inference_ensemble(dataset: Any, config: ConfigNode) -> Tuple[np.ndarray, np.ndarray, np.ndarray, ModelTrainer]:
-    """
-    Returns: 
-        embeddings: 2D numpy array - containing sample embeddings obtained from a CNN.
-                    [num_samples, embedding_size]
-        posteriors: 2D numpy array - containing class posteriors obtained from a CNN.
-                    [num_samples, num_classes]
-        trainer:    Model trainer object built using config file
-    """
-
-    # Reload the model from config
-    model_trainer_class = CoTeachingTrainer if config.train.use_co_teaching else VanillaTrainer
-    config = get_run_config(config, config.train.seed)
-    model_trainer = model_trainer_class(config=config)
-    model_trainer.load_checkpoints(restore_scheduler=False)
-
-    # Prepare output data structures
-    all_embeddings = []
-    all_posteriors = []
-
-    # Run inference on the given dataset
-    use_mc_sampling = config.model.use_dropout and ST(config.selector.type[0]) == ST.BaldSelector
-    multi_inference = config.train.use_self_supervision or config.model.use_dropout
-    num_runs = NUM_MULTIPLE_RUNS if multi_inference else 1
-    for run_ind in range(num_runs):
-        dataloader = get_val_dataloader(dataset, config, seed=config.train.seed + run_ind) 
-        embeddings, posteriors = inference_model(dataloader, model_trainer, use_mc_sampling)
-        all_embeddings.append(embeddings)
-        all_posteriors.append(posteriors)
-
-    # Aggregate results and return
-    embeddings = np.mean(all_embeddings, axis=(0, 1))
-    all_posteriors = np.stack(all_posteriors, axis=0)
-    avg_posteriors = np.mean(all_posteriors, axis=(0, 1))
-
-    return embeddings, avg_posteriors, all_posteriors, model_trainer
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/scheduler.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/scheduler.py
@ -1,51 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-
-import numpy as np
-
-class ForgetRateScheduler(object):
-    """
-    Forget rate scheduler for the co-teaching model
-    """
-
-    def __init__(self,
-                 num_epochs: int,
-                 forget_rate: float = 0.0,
-                 num_gradual: int = 10,
-                 num_warmup_epochs: int = 25,
-                 start_epoch: int = 0):
-        """
-
-        :param num_epochs: The total number of training epochs
-        :param forget_rate: The base forget rate
-        :param num_gradual: The number of epochs to gradual increase the forget_rate to its base value
-        :param start_epoch: Allows manually set the start epoch if training is resumed.
-        """
-        logging.info(f"No samples will be excluded in co-teaching for the first {num_warmup_epochs} epochs.")
-        if num_gradual <= num_warmup_epochs:
-            logging.warning(f"Num gradual {num_gradual} <= num warm up epochs. This argument will be ignored.")
-        assert 0 <= forget_rate < 1.
-        self.forget_rate_schedule = np.ones(num_epochs) * forget_rate
-        self.forget_rate_schedule[:num_gradual] = np.linspace(0, forget_rate, num_gradual)
-        self.forget_rate_schedule[:num_warmup_epochs] = np.zeros(num_warmup_epochs)
-        self.current_epoch = start_epoch
-
-    def step(self) -> None:
-        """
-        Step the current epoch by one
-        :return:
-        """
-        self.current_epoch += 1
-
-    @property
-    def get_forget_rate(self) -> float:
-        """
-
-        :return: The current forget rate
-        """
-        current_epoch = min(self.current_epoch, len(self.forget_rate_schedule) - 1)
-        return float(self.forget_rate_schedule[current_epoch])
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/init.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/init.py
@ -1,5 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/byol/byol_models.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/byol/byol_models.py
@ -1,60 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Tuple, Any
-
-from InnerEyeDataQuality.deep_learning.architectures.lambda_layer import Lambda
-from InnerEyeDataQuality.deep_learning.self_supervised.ssl_classifier_module import get_encoder_output_dim
-from InnerEyeDataQuality.deep_learning.self_supervised.utils import create_ssl_encoder
-from torch import Tensor as T
-from torch import nn
-
-
-class _MLP(nn.Module):
-    def __init__(self, input_dim: int, hidden_size: int, output_dim: int) -> None:
-        super().__init__()
-        self.output_dim = output_dim
-        self.input_dim = input_dim
-        self.model = nn.Sequential(
-            nn.Linear(input_dim, hidden_size, bias=False),
-            nn.BatchNorm1d(hidden_size),
-            nn.ReLU(inplace=True),
-            nn.Linear(hidden_size, output_dim, bias=True))
-
-    def forward(self, x: T) -> T:
-        x = self.model(x)
-        return x
-
-class SSLEncoder(nn.Module):
-    def __init__(self, encoder_name: str, dataset_name: str, use_output_pooling: bool = True):
-        super().__init__()
-        self.cnn_model = create_ssl_encoder(encoder_name=encoder_name, dataset_name=dataset_name)
-        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
-        self.use_output_pooling = use_output_pooling
-
-    def forward(self, x: T) -> T:
-        x = self.cnn_model(x)
-        x = x[-1] if isinstance(x, list) else x
-        x = self.avgpool(x).view(x.size(0), -1) if self.use_output_pooling else x
-        return x
-
-    def get_output_feature_dim(self) -> int:
-        return get_encoder_output_dim(self)
-
-class SiameseArm(nn.Module):
-    def __init__(self, *encoder_kwargs: Any) -> None:
-        super().__init__()
-
-        self.encoder = SSLEncoder(*encoder_kwargs)  # Encoder
-        self.projector = _MLP(input_dim=self.encoder.get_output_feature_dim(), hidden_size=2048, output_dim=128)
-        self.predictor = _MLP(input_dim=self.projector.output_dim, hidden_size=128, output_dim=128)
-        self.projector_normalised = nn.Sequential(self.projector,
-                                                  Lambda(lambda x: nn.functional.normalize(x, dim=-1)))
-
-    def forward(self, x: T) -> Tuple[T, T, T]:
-        y = self.encoder(x)
-        z = self.projector(y)
-        h = self.predictor(z)
-        return y, z, h
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/byol/byol_module.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/byol/byol_module.py
@ -1,142 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from copy import deepcopy
-from typing import Any, Dict, Iterator, List, Tuple, Optional
-
-import pytorch_lightning as pl
-import torch
-import torch.nn.functional as F
-from InnerEyeDataQuality.deep_learning.self_supervised.byol.byol_models import SiameseArm
-from InnerEyeDataQuality.deep_learning.self_supervised.byol.byol_moving_average import BYOLMAWeightUpdate
-from pl_bolts.optimizers.lars_scheduling import LARSWrapper
-from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR
-from torch import Tensor as T
-from torch.optim import Adam
-
-BatchType = Tuple[List, T]
-
-class BYOLInnerEye(pl.LightningModule):
-    """
-    Implementation of `Bootstrap Your Own Latent (BYOL) <https://arxiv.org/pdf/2006.07733.pdf>`
-    """
-
-    def __init__(self,
-                 num_samples: int,
-                 learning_rate: float,
-                 batch_size: int,
-                 encoder_name: str,
-                 warmup_epochs: int,
-                 dataset_name: Optional[str] = None,
-                 weight_decay: float = 1e-6,
-                 **kwargs: Any) -> None:
-        """
-        Args:
-            num_samples: Number of samples present in training dataset / dataloader.
-            learning_rate: Optimizer learning rate.
-            batch_size: Sample batch size used in gradient updates.
-            encoder_name: Type of CNN encoder used to extract image embeddings. The options are:
-                          {'resnet18', 'resnet50', 'resnet101'}.
-            warmup_epochs: Number of epochs for scheduler warm up (linear increase from 0 to base_lr).
-            dataset_name: Name of training dataset - If set to "CIFAR10" then the encoder is adjusted to image size.
-            weight_decay: L2-norm weight decay.
-        """
-        super().__init__()
-        self.save_hyperparameters()
-
-        self.min_learning_rate = 1e-4
-        self.online_network = SiameseArm(encoder_name, dataset_name)
-        self.target_network = deepcopy(self.online_network)
-        self.weight_callback = BYOLMAWeightUpdate()
-
-    def on_train_batch_end(self, *args: Any, **kwargs: Any) -> None:
-        # Add callback for user automatically since it's key to BYOL weight update
-        self.weight_callback.on_before_zero_grad(self.trainer, self)
-
-    def forward(self, x: T) -> T:  # type: ignore
-        return self.target_network.encoder(x)
-
-    def cosine_loss(self, a: T, b: T) -> T:
-        a = F.normalize(a, dim=-1)
-        b = F.normalize(b, dim=-1)
-        neg_cos_sim = -(a * b).sum(dim=-1).mean()
-        return neg_cos_sim
-
-    def shared_step(self, batch: BatchType, batch_idx: int) -> T:
-        (img_1, img_2), y = batch
-
-        # Image 1 to image 2 loss
-        _, _, h_img1 = self.online_network(img_1)
-        _, _, h_img2 = self.online_network(img_2)
-        with torch.no_grad():
-            _, z_img1, _ = self.target_network(img_1)
-            _, z_img2, _ = self.target_network(img_2)
-        loss = 0.5 * (self.cosine_loss(h_img1, z_img2.detach()) 
-                    + self.cosine_loss(h_img2, z_img1.detach()))
-
-        return loss
-
-    def training_step(self, batch: BatchType, batch_idx: int) -> T:  # type: ignore
-        loss = self.shared_step(batch, batch_idx)
-        self.log_dict({'byol/train_loss': loss, 'byol/tau': self.weight_callback.current_tau})
-
-        return loss
-
-    def validation_step(self, batch: BatchType, batch_idx: int) -> T:  # type: ignore
-        loss = self.shared_step(batch, batch_idx)
-        self.log_dict({'byol/validation_loss': loss})
-
-        return loss
-
-    def setup(self, *args: Any, **kwargs: Any) -> None:
-        global_batch_size = self.trainer.world_size * self.hparams.batch_size  # type: ignore
-        self.train_iters_per_epoch = self.hparams.num_samples // global_batch_size  # type: ignore
-
-    def configure_optimizers(self) -> Any:
-        # TRICK 1 (Use lars + filter weights)
-        # exclude certain parameters
-        parameters = self.exclude_from_wt_decay(self.online_network.named_parameters(),
-                                                weight_decay=self.hparams.weight_decay)  # type: ignore
-        optimizer = LARSWrapper(Adam(parameters, lr=self.hparams.learning_rate))  # type: ignore
-
-        # Trick 2 (after each step)
-        self.hparams.warmup_epochs = self.hparams.warmup_epochs * self.train_iters_per_epoch  # type: ignore
-        max_epochs = self.trainer.max_epochs * self.train_iters_per_epoch
-
-        linear_warmup_cosine_decay = LinearWarmupCosineAnnealingLR(
-            optimizer,
-            warmup_epochs=self.hparams.warmup_epochs,  # type: ignore
-            max_epochs=max_epochs,
-            warmup_start_lr=0,
-            eta_min=self.min_learning_rate,
-        )
-
-        scheduler = {'scheduler': linear_warmup_cosine_decay, 'interval': 'step', 'frequency': 1}
-
-        return [optimizer], [scheduler]
-
-    def exclude_from_wt_decay(self,
-                              named_params: Iterator[Tuple[str, T]],
-                              weight_decay: float,
-                              skip_list: List[str] = ['bias', 'bn']) -> List[Dict[str, Any]]:
-        """
-        Convolution-Linear bias-terms and batch-norm parameters are excluded from l2-norm weight decay regularisation.
-        https://arxiv.org/pdf/2006.07733.pdf Section 3.3 Optimisation and Section F.5.
-        """
-        params = []
-        excluded_params = []
-
-        for name, param in named_params:
-            if not param.requires_grad:
-                continue
-            elif any(layer_name in name for layer_name in skip_list):
-                excluded_params.append(param)
-            else:
-                params.append(param)
-
-        return [
-            {'params': params, 'weight_decay': weight_decay},
-            {'params': excluded_params, 'weight_decay': 0.}
-        ]
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/byol/byol_moving_average.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/byol/byol_moving_average.py
@ -1,58 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import math
-import torch
-
-import pytorch_lightning as pl
-from pytorch_lightning import Callback
-
-
-class BYOLMAWeightUpdate(Callback):
-    """
-    Weight updates for BYOL moving average encoder (e.g. teacher). Pl_module is expected to contain three attributes:
-        - ``pl_module.online_network``
-        - ``pl_module.target_network``
-        - ``pl_module.global_step``
-
-    Updates the target_network params using an exponential moving average update rule weighted by tau.
-    Tau parameter is increased from its base value to 1.0 with every training step scheduled with a cosine function.
-    global_step correspond to the total number of sgd updates expected to happen throughout the BYOL training.
-
-    Target network is updated at the end of each SGD update on training batch.
-    """
-
-    def __init__(self, initial_tau: float = 0.99):
-        """
-        Args:
-            initial_tau: starting tau. Auto-updates with every training step
-        """
-        super().__init__()
-        self.initial_tau = initial_tau
-        self.current_tau = initial_tau
-
-    def on_before_zero_grad(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:  # type: ignore
-        # get networks
-        online_net = pl_module.online_network
-        target_net = pl_module.target_network
-        assert(isinstance(online_net, torch.nn.Module))
-        assert(isinstance(target_net, torch.nn.Module))
-
-        # update weights
-        self.update_weights(online_net, target_net)
-
-        # update tau after
-        self.current_tau = self.update_tau(pl_module, trainer)
-
-    def update_tau(self, pl_module: pl.LightningModule, trainer: pl.Trainer) -> float:
-        max_steps = len(trainer.train_dataloader) * trainer.max_epochs  # type: ignore
-        tau = 1 - (1 - self.initial_tau) * (math.cos(math.pi * pl_module.global_step / max_steps) + 1) / 2
-        return tau
-
-    def update_weights(self, online_net: torch.nn.Module, target_net: torch.nn.Module) -> None:
-        # apply MA weight update
-        for current_params, ma_params in zip(online_net.parameters(), target_net.parameters()):
-            up_weight, old_weight = current_params.data, ma_params.data
-            ma_params.data = old_weight * self.current_tau + (1 - self.current_tau) * up_weight
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/init.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/init.py
@ -1,5 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/cifar10h_byol.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/cifar10h_byol.yaml
@ -1,16 +0,0 @@
-dataset:
-  name: CIFAR10H
-  num_workers: 6
-train:
-  seed: 1
-  batch_size: 512
-  base_lr: 1e-3
-  output_dir: cifar10h/self_supervised
-  resume_from_last_checkpoint: False
-  self_supervision:
-    type: byol
-    encoder_name: resnet50
-scheduler:
-  epochs: 2500
-preprocess:
-  resize: 32
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/cifar10h_simclr.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/cifar10h_simclr.yaml
@ -1,16 +0,0 @@
-dataset:
-  name: CIFAR10H
-  num_workers: 6
-train:
-  seed: 1
-  batch_size: 512
-  base_lr: 1e-3
-  output_dir: cifar10h/self_supervised
-  resume_from_last_checkpoint: False
-  self_supervision:
-    type: simclr
-    encoder_name: resnet101
-scheduler:
-  epochs: 2000
-preprocess:
-  resize: 32
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/nih_byol.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/nih_byol.yaml
@ -1,49 +0,0 @@
-dataset:
-  name: NIH
-  dataset_dir: datadrive/NIH
-train:
-  seed: 3
-  batch_size: 2400
-  base_lr: 1e-3
-  output_dir: nih/ssup
-  resume_from_last_checkpoint: False
-  self_supervision:
-    type: byol
-    encoder_name: resnet50
-    use_balanced_binary_loss_for_linear_head: True
-scheduler:
-  epochs: 1500
-preprocess:
-  center_crop_size: 224
-  resize: 256
-augmentation:
-  use_random_horizontal_flip: True
-  use_random_affine: True
-  use_random_color: True
-  use_random_crop: True
-  use_gamma_transform: True
-  use_random_erasing: True
-  add_gaussian_noise: True
-  use_elastic_transform: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_erasing:
-      scale: (0.15, 0.4)
-      ratio: (0.33, 3)
-  random_affine:
-    max_angle: 180
-    max_horizontal_shift: 0.00
-    max_vertical_shift: 0.00
-    max_shear: 40
-  elastic_transform:
-    sigma: 4
-    alpha: 34
-    p_apply: 0.4
-  random_color:
-    brightness: 0.2
-    contrast: 0.2
-    saturation: 0.0
-  random_crop:
-    scale: (0.4, 1.0)
-  gaussian_noise:
-    std: 0.05
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/nih_simclr.yaml
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/nih_simclr.yaml
@ -1,46 +0,0 @@
-dataset:
-  name: NIH
-  dataset_dir: datadrive/NIH
-train:
-  seed: 2
-  batch_size: 1600
-  base_lr: 1e-3
-  output_dir: nih/nohist
-  resume_from_last_checkpoint: False
-  self_supervision:
-    type: simclr
-    encoder_name: resnet50
-scheduler:
-  epochs: 1500
-preprocess:
-  center_crop_size: 224
-  resize: 256
-augmentation:
-  use_random_horizontal_flip: True
-  use_random_affine: True
-  use_random_color: True
-  use_random_crop: True
-  use_gamma_transform: False
-  use_random_erasing: True
-  add_gaussian_noise: True
-  use_elastic_transform: True
-  random_horizontal_flip:
-    prob: 0.5
-  random_erasing:
-      scale: (0.005, 0.08)
-      ratio: (0.33, 3)
-  random_affine:
-    max_angle: 180
-    max_horizontal_shift: 0.00
-    max_vertical_shift: 0.00
-    max_shear: 40
-  elastic_transform:
-    sigma: 4
-    alpha: 34
-    p_apply: 0.4
-  random_color:
-    brightness: 0.3
-    contrast: 0.3
-    saturation: 0.0
-  random_crop:
-      scale: (0.5, 1.0)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/ssl_model_config.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/configs/ssl_model_config.py
@ -1,85 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-
-config = ConfigNode()
-
-config.dataset = ConfigNode()
-config.dataset.name = 'CIFAR10'
-config.dataset.dataset_dir = ''
-config.dataset.num_workers = None
-
-config.train = ConfigNode()
-config.train.seed = None
-config.train.batch_size = None
-config.train.output_dir = None
-config.train.resume_from_last_checkpoint = False
-config.train.base_lr = None
-config.train.self_supervision = ConfigNode()
-config.train.self_supervision.type = None
-config.train.self_supervision.encoder_name = None
-config.train.self_supervision.use_balanced_binary_loss_for_linear_head = False
-config.train.checkpoint_period = 200
-
-config.scheduler = ConfigNode()
-config.scheduler.epochs = None
-
-config.augmentation = ConfigNode()
-config.augmentation.use_random_crop = False
-config.augmentation.use_random_horizontal_flip = False
-config.augmentation.use_random_affine = False
-config.augmentation.use_label_smoothing = False
-config.augmentation.use_random_color = False
-config.augmentation.add_gaussian_noise = False
-config.augmentation.use_gamma_transform = False
-config.augmentation.use_random_erasing = False
-config.augmentation.use_elastic_transform = False
-config.augmentation.random_crop = ConfigNode()
-config.augmentation.random_crop.scale = (0.9, 1.0)
-
-config.augmentation.elastic_transform = ConfigNode()
-config.augmentation.elastic_transform.sigma = 4
-config.augmentation.elastic_transform.alpha = 35
-config.augmentation.elastic_transform.p_apply = 0.5
-
-config.augmentation.gaussian_noise = ConfigNode()
-config.augmentation.gaussian_noise.std = 0.01
-config.augmentation.gaussian_noise.p_apply = 0.5
-
-config.augmentation.random_horizontal_flip = ConfigNode()
-config.augmentation.random_horizontal_flip.prob = 0.5
-
-config.augmentation.random_affine = ConfigNode()
-config.augmentation.random_affine.max_angle = 0
-config.augmentation.random_affine.max_horizontal_shift = 0.0
-config.augmentation.random_affine.max_vertical_shift = 0.0
-config.augmentation.random_affine.max_shear = 5
-
-config.augmentation.random_color = ConfigNode()
-config.augmentation.random_color.brightness = 0.0
-config.augmentation.random_color.contrast = 0.1
-config.augmentation.random_color.saturation = 0.1
-
-config.augmentation.gamma = ConfigNode()
-config.augmentation.gamma.scale = (0.5, 1.5)
-
-config.augmentation.label_smoothing = ConfigNode()
-config.augmentation.label_smoothing.epsilon = 0.1
-
-config.augmentation.random_erasing = ConfigNode()
-config.augmentation.random_erasing.scale = (0.01, 0.1)
-config.augmentation.random_erasing.ratio = (0.3, 3.3)
-
-config.preprocess = ConfigNode()
-config.preprocess.use_resize = False
-config.preprocess.use_center_crop = False
-config.preprocess.center_crop_size = 224
-config.preprocess.histogram_normalization = ConfigNode()
-config.preprocess.histogram_normalization.disk_size = 30
-config.preprocess.resize = 32
-
-def get_default_model_config() -> ConfigNode:
-    return config.clone()
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/datamodules/chestxray_datamodule.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/datamodules/chestxray_datamodule.py
@ -1,156 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Any, Callable, Optional
-
-import torch
-from pytorch_lightning import LightningDataModule
-from torch.utils.data import DataLoader
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.datasets.kaggle_cxr import KAGGLE_TOTAL_SIZE, KaggleCXR
-from InnerEyeDataQuality.datasets.nih_cxr import NIHCXR, NIH_TOTAL_SIZE
-from InnerEyeDataQuality.deep_learning.create_dataset_transforms import create_chest_xray_transform
-from InnerEyeDataQuality.deep_learning.dataloader import WorkerInitFunc
-from InnerEyeDataQuality.deep_learning.transforms import DualViewTransformWrapper
-import numpy as np
-
-class KaggleDataModule(LightningDataModule):
-
-    def __init__(self, config: ConfigNode,
-                 num_devices: int,
-                 num_workers: int,
-                 *args: Any, **kwargs: Any) -> None:
-        """
-        This is the data module to load and prepare the Kaggle Pneumonia detection challenge dataset.
-        :param config:
-        :param num_devices: The number of GPUs to use. The total batch size specified in the config will be divided
-        by the number of GPUs.
-        :param num_workers: The number of dataloader workers.
-        """
-        super().__init__(*args, **kwargs)
-        self.config = config
-        self.num_samples = KAGGLE_TOTAL_SIZE
-        self.batch_size = config.train.batch_size // num_devices
-        self.num_workers = num_workers
-        self.train_transforms = DualViewTransformWrapper(create_chest_xray_transform(self.config, is_train=True))
-        self.val_transforms = DualViewTransformWrapper(create_chest_xray_transform(self.config, is_train=False))
-        self.train_dataset = KaggleCXR(self.config.dataset.dataset_dir, use_training_split=True,
-                                       transform=self.train_transforms, return_index=False)
-        self.class_weights: Optional[torch.Tensor] = None
-        if config.train.self_supervision.use_balanced_binary_loss_for_linear_head:
-            # Weight = inverse class proportion.
-            class_weights = len(self.train_dataset.targets) / np.bincount(self.train_dataset.targets)
-            # Normalized class weights
-            class_weights /= class_weights.sum()
-            self.class_weights = torch.tensor(class_weights)
-
-    @property
-    def num_classes(self) -> int:
-        return 2
-
-    def train_dataloader(self) -> DataLoader:  # type: ignore
-        """
-        Returns Kaggle training set (80% of total dataset)
-        """
-        return torch.utils.data.DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            pin_memory=False,
-            worker_init_fn=WorkerInitFunc(self.config.train.seed),
-            drop_last=True)
-
-    def val_dataloader(self) -> DataLoader:  # type: ignore
-        """
-        Returns Kaggle validation set (20% of total dataset)
-        """
-        val_dataset = KaggleCXR(self.config.dataset.dataset_dir, use_training_split=False,
-                                transform=self.val_transforms, return_index=False)
-        loader = torch.utils.data.DataLoader(
-            val_dataset,
-            batch_size=self.batch_size,
-            shuffle=False,
-            num_workers=self.num_workers,
-            pin_memory=False,
-            worker_init_fn=WorkerInitFunc(self.config.train.seed),
-            drop_last=True)
-        return loader
-
-    def test_dataloader(self) -> DataLoader:  # type: ignore
-        """
-        No Kaggle test split implemented
-        """
-        raise NotImplementedError
-
-    def default_transforms(self) -> Callable:
-        transform = create_chest_xray_transform(self.config, is_train=False)
-        return transform
-
-
-class NIHDataModule(LightningDataModule):
-
-    def __init__(self, config: ConfigNode, num_devices: int, num_workers: int, *args: Any, **kwargs: Any) -> None:
-        """
-        This is the data module to load and prepare the NIH dataset (112k scans).
-        """
-        super().__init__(*args, **kwargs)
-        self.config = config
-        self.batch_size = config.train.batch_size // num_devices
-        self.num_workers = num_workers
-        self.train_transforms = DualViewTransformWrapper(create_chest_xray_transform(self.config, is_train=True))
-        self.val_transforms = DualViewTransformWrapper(create_chest_xray_transform(self.config, is_train=False))
-        self.num_samples = NIH_TOTAL_SIZE
-        self.train_dataset = NIHCXR(self.config.dataset.dataset_dir, use_training_split=True,
-                         transform=self.train_transforms, return_index=False)
-        self.class_weights: Optional[torch.Tensor] = None
-        if config.train.self_supervision.use_balanced_binary_loss_for_linear_head:
-            # Weight = inverse class proportion.
-            class_weights = len(self.train_dataset.targets) / np.bincount(self.train_dataset.targets)
-            # Normalized class weights
-            class_weights /= class_weights.sum()
-            self.class_weights = torch.tensor(class_weights)
-
-    @property
-    def num_classes(self) -> int:
-        return 2
-
-    def train_dataloader(self) -> DataLoader:  # type: ignore
-        """
-        Returns NIH training set (80% of total dataset)
-        """
-        return torch.utils.data.DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            pin_memory=False,
-            worker_init_fn=WorkerInitFunc(self.config.train.seed),
-            drop_last=True)
-
-    def val_dataloader(self) -> DataLoader:  # type: ignore
-        """
-        Returns NIH validation set (20% of total dataset)
-        """
-        val_dataset = NIHCXR(self.config.dataset.dataset_dir,
-                             use_training_split=False, transform=self.val_transforms, return_index=False)
-        loader = torch.utils.data.DataLoader(
-            val_dataset,
-            batch_size=self.batch_size,
-            shuffle=False,
-            num_workers=self.num_workers,
-            pin_memory=False,
-            worker_init_fn=WorkerInitFunc(self.config.train.seed),
-            drop_last=True)
-        return loader
-
-    def test_dataloader(self) -> DataLoader:  # type: ignore
-        """
-        No NIH test split implemented
-        """
-        raise NotImplementedError
-
-    def default_transforms(self) -> Callable:
-        transform = create_chest_xray_transform(self.config, is_train=False)
-        return transform
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/datamodules/cifar10h_datamodule.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/datamodules/cifar10h_datamodule.py
@ -1,82 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Any, List
-
-import torch
-from InnerEyeDataQuality.datasets.cifar10h import TOTAL_CIFAR10H_DATASET_SIZE
-from pl_bolts.datamodules import CIFAR10DataModule
-from pl_bolts.transforms.dataset_normalizations import cifar10_normalization
-from torch.utils.data import DataLoader, random_split
-from torchvision import transforms as transform_lib
-
-
-class CIFAR10HDataModule(CIFAR10DataModule):
-
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        """
-        """
-        super().__init__(*args, **kwargs)
-        self.num_samples = TOTAL_CIFAR10H_DATASET_SIZE
-        self.class_weights = None
-
-    def train_dataloader(self) -> DataLoader:
-        """
-        CIFAR train set removes a subset to use for validation
-        """
-        transforms = self.default_transforms() if self.train_transforms is None else self.train_transforms
-        dataset = self.DATASET(self.data_dir, train=False, download=True, transform=transforms, **self.extra_args)
-        loader = DataLoader(dataset,
-                            batch_size=self.batch_size,
-                            shuffle=True,
-                            num_workers=self.num_workers,
-                            drop_last=True,
-                            pin_memory=True)
-        assert len(dataset) == TOTAL_CIFAR10H_DATASET_SIZE
-
-        return loader
-
-    def val_dataloader(self) -> DataLoader:
-        """
-        CIFAR10 val set uses a subset of the training set for validation
-        """
-        transforms = self.default_transforms() if self.val_transforms is None else self.val_transforms
-
-        dataset = self.DATASET(self.data_dir, train=True, download=False, transform=transforms, **self.extra_args)
-        num_samples = len(dataset)
-        _, dataset_val = random_split(dataset,
-                                      [num_samples - self.val_split, self.val_split],
-                                      generator=torch.Generator().manual_seed(self.seed))
-        assert len(dataset_val) == self.val_split
-        loader = DataLoader(dataset_val,
-                            batch_size=self.batch_size,
-                            shuffle=False,
-                            num_workers=self.num_workers,
-                            pin_memory=True,
-                            drop_last=True)
-        return loader
-
-    def test_dataloader(self) -> DataLoader:
-        """
-        CIFAR10 test set uses the test split
-        """
-        transforms = self.default_transforms() if self.test_transforms is None else self.test_transforms
-
-        dataset = self.DATASET(self.data_dir, train=True, download=False, transform=transforms, **self.extra_args)
-        num_samples = len(dataset)
-        dataset_test, _ = random_split(dataset,
-                                       [num_samples - self.val_split, self.val_split],
-                                       generator=torch.Generator().manual_seed(self.seed))
-        loader = DataLoader(dataset_test,
-                            batch_size=self.batch_size,
-                            shuffle=False,
-                            num_workers=self.num_workers,
-                            drop_last=True,
-                            pin_memory=True)
-        return loader
-
-    def default_transforms(self) -> List[object]:
-        cf10_transforms = transform_lib.Compose([transform_lib.ToTensor(), cifar10_normalization()])
-        return cf10_transforms
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/datamodules/utils.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/datamodules/utils.py
@ -1,37 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import multiprocessing
-import torch
-import pytorch_lightning as pl
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from pl_bolts.models.self_supervised.simclr.transforms import SimCLREvalDataTransform, SimCLRTrainDataTransform
-
-from .chestxray_datamodule import KaggleDataModule, NIHDataModule
-from .cifar10h_datamodule import CIFAR10HDataModule
-
-
-num_gpus = torch.cuda.device_count()
-num_devices = num_gpus if num_gpus > 0 else 1
-
-def create_ssl_data_modules(config: ConfigNode) -> pl.LightningDataModule:
-    """
-    Returns torch lightining data module.
-    """
-    num_workers = config.dataset.num_workers if config.dataset.num_workers else multiprocessing.cpu_count()
-
-    if config.dataset.name == "Kaggle":
-        dm = KaggleDataModule(config, num_devices=num_devices, num_workers=num_workers)  # type: ignore
-    elif config.dataset.name == "NIH":
-        dm = NIHDataModule(config, num_devices=num_devices, num_workers=num_workers)  # type: ignore
-    elif config.dataset.name == "CIFAR10H":
-        dm = CIFAR10HDataModule(num_workers=num_workers,
-                                batch_size=config.train.batch_size // num_devices,
-                                seed=1234)
-        dm.train_transforms = SimCLRTrainDataTransform(32)
-        dm.val_transforms = SimCLREvalDataTransform(32)
-    else:
-        raise NotImplementedError(f"No pytorch data module implemented for dataset type: {config.dataset.name}")
-    return dm
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/encoders.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/encoders.py
@ -1,22 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import torch
-from torchvision.models import densenet121
-
-
-class DenseNet121Encoder(torch.nn.Module):
-    """
-    This module creates a Densenet121 encoder i.e. Densenet121 model without
-    its classification head.
-    """
-
-    def __init__(self) -> None:
-        super().__init__()
-        self.densenet121 = densenet121()
-        self.cnn_model = self.densenet121.features
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self.cnn_model(x)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/main.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/main.py
@ -1,118 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import argparse
-import logging
-import sys
-from pathlib import Path
-from typing import Dict, Union
-
-import pytorch_lightning as pl
-import torch
-
-from default_paths import EXPERIMENT_DIR
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.deep_learning.self_supervised.byol.byol_module import BYOLInnerEye
-from InnerEyeDataQuality.deep_learning.self_supervised.datamodules.utils import create_ssl_data_modules
-from InnerEyeDataQuality.deep_learning.self_supervised.simclr_module import SimCLRInnerEye
-from InnerEyeDataQuality.deep_learning.self_supervised.ssl_classifier_module import (SSLOnlineEvaluatorInnerEye,
-                                                                                     get_encoder_output_dim)
-from InnerEyeDataQuality.deep_learning.utils import load_ssl_model_config
-from pytorch_lightning import seed_everything
-from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
-
-num_gpus = torch.cuda.device_count()
-num_devices = num_gpus if num_gpus > 0 else 1
-trained_kwargs: Dict[str, Union[str, int, float]] = {"precision": 16} if num_gpus > 0 else {}
-if num_gpus > 1:  # If multi-gpu training update the parameters for DDP
-    trained_kwargs.update({"distributed_backend": "ddp", "sync_batchnorm": True})
-
-def get_last_checkpoint_path(default_root_dir: str, model_version: str) -> str:
-    return str(Path(default_root_dir) / model_version / "checkpoints" / "last.ckpt")
-
-
-def cli_main(config: ConfigNode, debug: bool = False) -> None:
-    """
-    Runs self-supervised training on imaging data using contrastive loss.
-    Currently it supports only ``BYOL`` and ``SimCLR``.
-
-    :param config: A ssl_model config specifying training configurations (and augmentations).
-                   Beware the augmentations parameters are ignored at the moment when using CIFAR10 as we
-                   always use the default augmentations from PyTorch Lightning.
-    :param debug: If set to True, only runs training and validation on 1% of the data.
-    """
-
-    # set seed
-    seed_everything(config.train.seed)
-
-    # self-supervision type
-    ssl_type = config.train.self_supervision.type
-    default_root_dir = EXPERIMENT_DIR / config.train.output_dir
-    model_version = f'{ssl_type}_seed_{config.train.seed}'
-    checkpoint_dir = str(default_root_dir / model_version / "checkpoints")
-
-    # Model checkpointing callback
-    checkpoint_callback = ModelCheckpoint(period=config.train.checkpoint_period, save_top_k=-1, dirpath=checkpoint_dir)
-    checkpoint_callback_last = ModelCheckpoint(save_last=True, dirpath=checkpoint_dir)
-
-    lr_logger = LearningRateMonitor()
-    tb_logger = pl.loggers.TensorBoardLogger(save_dir=str(default_root_dir),
-                                             version='logs',
-                                             name=model_version)
-    # Create SimCLR data modules and model
-    dm = create_ssl_data_modules(config)
-    if ssl_type == "simclr":
-        model = SimCLRInnerEye(num_samples=dm.num_samples,  # type: ignore
-                               batch_size=dm.batch_size,  # type: ignore
-                               lr=config.train.base_lr,
-                               dataset_name=config.dataset.name,
-                               encoder_name=config.train.self_supervision.encoder_name)
-    # Create BYOL model
-    else:
-        model = BYOLInnerEye(num_samples=dm.num_samples,  # type: ignore
-                             learning_rate=config.train.base_lr,
-                             dataset_name=config.dataset.name,
-                             encoder_name=config.train.self_supervision.encoder_name,
-                             batch_size=dm.batch_size,  # type: ignore
-                             warmup_epochs=10)
-    model.hparams.update({'ssl_type': ssl_type})
-
-    # Online fine-tunning using an MLP
-    online_eval = SSLOnlineEvaluatorInnerEye(class_weights=dm.class_weights,  # type: ignore
-                                             z_dim=get_encoder_output_dim(model, dm),
-                                             num_classes=dm.num_classes,  # type: ignore
-                                             dataset=config.dataset.name,
-                                             drop_p=0.2)  # type: ignore
-
-    # Load latest checkpoint
-    resume_from_last_checkpoint = get_last_checkpoint_path(default_root_dir, model_version) if \
-        config.train.resume_from_last_checkpoint else None
-    if debug:
-        overfit_batches = num_devices / (min(len(dm.val_dataloader()), len(dm.train_dataloader())) * 2.0)
-        trained_kwargs.update({"overfit_batches": overfit_batches})
-
-    # Create trainer and run training
-    trainer = pl.Trainer(gpus=num_gpus,
-                         logger=tb_logger,
-                         default_root_dir=str(default_root_dir),
-                         benchmark=True,
-                         max_epochs=config.scheduler.epochs,
-                         callbacks=[lr_logger, online_eval, checkpoint_callback, checkpoint_callback_last],
-                         resume_from_checkpoint=resume_from_last_checkpoint,
-                         **trained_kwargs)
-    trainer.fit(model, dm)  # type: ignore
-
-
-if __name__ == "__main__":
-    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
-    parser = argparse.ArgumentParser(description='Train a self-supervised model')
-    parser.add_argument('--config', dest='config', type=str, required=True,
-                        help='Path to config file characterising trained CNN model/s')
-    args, unknown_args = parser.parse_known_args()
-    config_path = args.config
-    config = load_ssl_model_config(config_path)
-    # Launch the script
-    cli_main(config)
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/metrics.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/metrics.py
@ -1,38 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import numpy as np
-import torch
-from pytorch_lightning.metrics import Metric
-from pytorch_lightning.metrics.functional import auroc
-
-
-class AreaUnderRocCurve(Metric):
-    """
-    Computes the area under the receiver operating curve (ROC).
-    """
-
-    def __init__(self) -> None:
-        super().__init__(dist_sync_on_step=False)
-        self.add_state("preds", default=[], dist_reduce_fx=None)
-        self.add_state("targets", default=[], dist_reduce_fx=None)
-        self.name = "auc"
-
-    def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None:  # type: ignore
-        assert preds.dim() == 2 and targets.dim() == 1 and \
-               preds.shape[1] == 2 and preds.shape[0] == targets.shape[
-                   0], f"Expected 2-dim preds, 1-dim targets, but got: preds = {preds.shape}, targets = {targets.shape}"
-        self.preds.append(preds)  # type: ignore
-        self.targets.append(targets)  # type: ignore
-
-    def compute(self) -> torch.Tensor:
-        """
-        Computes a metric from the stored predictions and targets.
-        """
-        preds = torch.cat(self.preds)  # type: ignore
-        targets = torch.cat(self.targets)  # type: ignore
-        if torch.unique(targets).numel() == 1:
-            return torch.tensor(np.nan)
-        return auroc(preds[:, 1], targets)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/simclr_module.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/simclr_module.py
@ -1,43 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Any
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from InnerEyeDataQuality.deep_learning.self_supervised.byol.byol_models import SSLEncoder
-from pl_bolts.models.self_supervised.simclr.simclr_module import SimCLR
-
-
-class _Projection(nn.Module):
-    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int) -> None:
-        super().__init__()
-        self.output_dim = output_dim
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-
-        self.model = nn.Sequential(
-            nn.Linear(self.input_dim, self.hidden_dim, bias=True),
-            nn.BatchNorm1d(self.hidden_dim),
-            nn.ReLU(),
-            nn.Linear(self.hidden_dim, self.output_dim, bias=False))
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self.model(x)
-        return F.normalize(x, dim=1)
-
-
-class SimCLRInnerEye(SimCLR):
-    def __init__(self, encoder_name: str, dataset_name: str, **kwargs: Any) -> None:
-        """
-        Args:
-            encoder_name [str]: Image encoder name (predefined models)
-            dataset_name [str]: Image dataset name (e.g. cifar10, kaggle, etc.)
-        """
-        super().__init__(**kwargs)
-        self.save_hyperparameters()
-        self.encoder = SSLEncoder(encoder_name, dataset_name, use_output_pooling=True)
-        self.projection = _Projection(input_dim=self.encoder.get_output_feature_dim(), hidden_dim=2048, output_dim=128)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/ssl_classifier_module.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/ssl_classifier_module.py
@ -1,194 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Any, List, Optional, Tuple, Union
-
-import pytorch_lightning as pl
-import torch
-from pl_bolts.callbacks.ssl_online import SSLOnlineEvaluator
-from pl_bolts.models.self_supervised.evaluator import SSLEvaluator
-from torch import Tensor as T
-from torch.nn import ModuleList, functional as F
-
-from InnerEyeDataQuality.deep_learning.self_supervised.metrics import AreaUnderRocCurve
-from pytorch_lightning.metrics import Accuracy
-
-
-BatchType = Tuple[List, T]
-
-class SSLOnlineEvaluatorInnerEye(SSLOnlineEvaluator):
-    def __init__(self, class_weights: Optional[torch.Tensor] = None, **kwargs: Any) -> None:
-        """
-        Creates a hook to evaluate a linear model on top of an SSL embedding.
-
-        :param class_weights: The class weights to use when computing the cross entropy loss. If set to None,
-                              no weighting will be done.
-        """
-
-        super().__init__(**kwargs)
-        self.training_step = int(0)
-        self.weight_decay = 1e-4
-        self.learning_rate = 1e-4
-
-        self.train_metrics = ModuleList([AreaUnderRocCurve(), Accuracy()]) if self.num_classes == 2 else ModuleList([Accuracy()])
-        self.val_metrics = ModuleList([AreaUnderRocCurve(), Accuracy()]) if self.num_classes == 2 else ModuleList([Accuracy()])
-        self.class_weights = class_weights
-
-    def on_pretrain_routine_start(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
-        # Move metrics and class weights to module device
-        for metric in [*self.train_metrics, *self.val_metrics]:
-            metric.to(device=pl_module.device)  # type: ignore
-        if self.class_weights is not None:
-            self.class_weights = self.class_weights.float().to(device=pl_module.device)
-
-        pl_module.non_linear_evaluator = SSLEvaluator(n_input=self.z_dim,
-                                                      n_classes=self.num_classes,
-                                                      p=self.drop_p,
-                                                      n_hidden=self.hidden_dim).to(pl_module.device)
-        assert isinstance(pl_module.non_linear_evaluator, torch.nn.Module)
-        self.optimizer = torch.optim.Adam(pl_module.non_linear_evaluator.parameters(),
-                                          lr=self.learning_rate,
-                                          weight_decay=self.weight_decay)
-
-    # Use only one of the transformed images
-    @staticmethod
-    def to_device(batch: BatchType, device: Union[str, torch.device]) -> Tuple[T, T]:
-        (x1, x2), y = batch
-        x1 = x1.to(device)
-        y = y.to(device)
-        return x1, y
-
-    def shared_step(self, batch: BatchType, pl_module: pl.LightningModule, is_training: bool) -> T:
-        x, y = self.to_device(batch, pl_module.device)
-        with torch.no_grad():
-            representations = self.get_representations(pl_module, x)
-        representations = representations.detach()
-        assert isinstance(pl_module.non_linear_evaluator, torch.nn.Module)
-
-        # Run the linear-head with SSL embeddings.
-        mlp_preds = pl_module.non_linear_evaluator(representations)
-        mlp_loss = F.cross_entropy(mlp_preds, y, weight=self.class_weights)
-
-        with torch.no_grad():
-            posteriors = F.softmax(mlp_preds, dim=-1)
-            for metric in (self.train_metrics if is_training else self.val_metrics):
-                metric(posteriors, y)
-
-        return mlp_loss
-
-    def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):  # type: ignore
-        loss = self.shared_step(batch, pl_module, is_training=False)
-        # Log classification metrics
-        pl_module.log('ssl/online_val_loss', loss, on_step=False, on_epoch=True, sync_dist=False)
-
-    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:  # type: ignore
-        logger = trainer.logger.experiment  # type: ignore 
-        loss = self.shared_step(batch, pl_module, is_training=True)
-
-        # update finetune weights
-        loss.backward()
-        self.optimizer.step()
-        self.optimizer.zero_grad()
-        self.training_step += 1
-
-        # log metrics
-        logger.add_scalar('ssl/online_train_loss', loss, global_step=self.training_step)
-
-class SSLClassifier(torch.nn.Module):
-    """
-    SSL Image classifier that combines pre-trained SSL encoder with a trainable linear-head.
-    """
-
-    def __init__(self, num_classes: int, encoder: torch.nn.Module, projection: torch.nn.Module):
-        super().__init__()
-        self.encoder = encoder
-        self.projection = projection
-        self.encoder.eval(), self.projection.eval()
-        self.classifier_head = SSLEvaluator(n_input=get_encoder_output_dim(self.encoder),
-                                            n_hidden=None,
-                                            n_classes=num_classes,
-                                            p=0.20)
-
-    def train(self, mode: bool = True) -> Any:
-        self.training = mode
-        self.encoder.train(False)
-        self.projection.train(False)
-        self.classifier_head.train(mode)
-        return self
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        assert isinstance(self.encoder.avgpool, torch.nn.Module)
-        with torch.no_grad():
-            # Generate representations
-            repr = self.encoder(x)
-
-            # Generate image embeddings
-            self.projection(repr)
-            # Generate class logits
-            agg_repr = self.encoder.avgpool(repr) if repr.ndim > 2 else repr
-            agg_repr = agg_repr.reshape(agg_repr.size(0), -1).detach()
-
-        return self.classifier_head(agg_repr)
-
-class PretrainedClassifier(torch.nn.Module):
-    def __init__(self, num_classes: int, encoder: torch.nn.Module):
-        super().__init__()
-        self.encoder = encoder
-        self.classifier_head = torch.nn.Linear(get_encoder_output_dim(self.encoder), num_classes)
-
-    def train(self, mode: bool = True) -> Any:
-        self.classifier_head.train(mode)
-        self.encoder.train(mode)
-        return self
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # Generate representations
-        repr = self.encoder(x)
-        # Generate class logits
-        agg_repr = self.encoder.avgpool(repr) if repr.ndim > 2 else repr  # type: ignore
-        agg_repr = agg_repr.reshape(agg_repr.size(0), -1)
-        return self.classifier_head(agg_repr)
-
-def get_encoder_output_dim(pl_module: Union[pl.LightningModule, torch.nn.Module],
-                           dm: Optional[pl.LightningDataModule] = None) -> int:
-    """
-    Calculates the output dimension of ssl encoder by making a single forward pass.
-    :param pl_module: pl encoder module
-    :param dm: pl datamodule
-    """
-    # Target device
-    device = pl_module.device if isinstance(pl_module, pl.LightningDataModule) else \
-        next(pl_module.parameters()).device  # type: ignore
-    assert (isinstance(device, torch.device))
-
-    # Create a dummy input image
-    if dm is not None:
-        batch = iter(dm.train_dataloader()).next()  # type: ignore 
-        x, _ = SSLOnlineEvaluatorInnerEye.to_device(batch, device)
-    else:
-        x = torch.rand((1, 3, 256, 256)).to(device)
-
-    # Extract the number of output feature dimensions
-    with torch.no_grad():
-        representations = pl_module(x)
-
-    return representations.shape[1]
-
-
-def WrapSSL(ssl_class: Any, num_classes: int) -> Any:
-    """
-    Wraps a given SSL encoder and adds a non-linear evaluator to it. This is done to load pre-trained SSL checkpoints.
-    PL requires non_linear_evaluator to be included in pl_module at SSL training time.
-    :param num_classes: Number of target classes for the linear head.
-    :param ssl_class:   SSL object either BYOL or SimCLR.
-    """
-    class _wrap(ssl_class):  # type: ignore
-        def __init__(self, **kwargs: Any) -> None:
-            super().__init__(**kwargs)
-            self.non_linear_evaluator = SSLEvaluator(n_input=get_encoder_output_dim(self),
-                                                     n_classes=num_classes,
-                                                     n_hidden=None)
-
-    return _wrap
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/utils.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/self_supervised/utils.py
@ -1,63 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Optional
-import logging
-
-import torch
-from InnerEyeDataQuality.deep_learning.self_supervised.ssl_classifier_module import PretrainedClassifier, SSLClassifier, \
-    WrapSSL
-from pl_bolts.models.self_supervised.resnets import resnet18, resnet50_bn, resnet101
-
-
-def create_ssl_encoder(encoder_name: str, dataset_name: Optional[str] = None) -> torch.nn.Module:
-    """
-    """
-    if encoder_name == 'resnet18':
-        encoder = resnet18(return_all_feature_maps=False)
-    elif encoder_name == 'resnet50':
-        encoder = resnet50_bn(return_all_feature_maps=False)
-    elif encoder_name == 'resnet101':
-        encoder = resnet101(return_all_feature_maps=False)
-    else:
-        raise ValueError("Unknown model type")
-
-    if dataset_name is not None:
-        if dataset_name in ["CIFAR10", "CIFAR10H"]:
-            logging.info("Updating the initial convolution in order not to shrink CIFAR10 images")
-            encoder.conv1 = torch.nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
-
-    return encoder
-
-
-def create_ssl_image_classifier(num_classes: int, pl_checkpoint_path: str, freeze_encoder: bool = True) -> torch.nn.Module:
-    from InnerEyeDataQuality.deep_learning.self_supervised.byol.byol_module import BYOLInnerEye
-    from InnerEyeDataQuality.deep_learning.self_supervised.simclr_module import SimCLRInnerEye
-    """
-    """
-    ssl_type = torch.load(pl_checkpoint_path, map_location=lambda storage, loc: storage)["hyper_parameters"]["ssl_type"]
-    logging.info(f"Creating a {ssl_type} based image classifier")
-    logging.info(f"Loading pretrained {ssl_type} weights from:\n {pl_checkpoint_path}")
-
-    if ssl_type == "byol":
-        byol_module = WrapSSL(BYOLInnerEye, num_classes).load_from_checkpoint(pl_checkpoint_path, strict=False)
-        if freeze_encoder:
-            model = SSLClassifier(num_classes=num_classes, encoder=byol_module.target_network.encoder,
-                                  projection=byol_module.target_network.projector_normalised)
-        else:
-            model = PretrainedClassifier(num_classes=num_classes,  # type: ignore
-                                         encoder=byol_module.target_network.encoder)
-    elif ssl_type == "simclr":
-        simclr_module = WrapSSL(SimCLRInnerEye, num_classes).load_from_checkpoint(pl_checkpoint_path, strict=False)
-        if freeze_encoder:
-            model = SSLClassifier(num_classes=num_classes, encoder=simclr_module.encoder,
-                                  projection=simclr_module.projection)
-        else:
-            model = PretrainedClassifier(num_classes=num_classes,  # type: ignore
-                                         encoder=simclr_module.encoder)
-    else:
-        raise NotImplementedError(f"Unknown unsupervised model: {ssl_type}")
-
-    return model
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/train.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/train.py
@ -1,51 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import argparse
-import logging
-import os
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.deep_learning.utils import create_logger, get_run_config, load_model_config
-from InnerEyeDataQuality.deep_learning.trainers.co_teaching_trainer import CoTeachingTrainer
-from InnerEyeDataQuality.deep_learning.trainers.elr_trainer import ELRTrainer
-from InnerEyeDataQuality.deep_learning.trainers.vanilla_trainer import VanillaTrainer
-from InnerEyeDataQuality.utils.generic import set_seed
-
-
-def train(config: ConfigNode) -> None:
-    create_logger(config.train.output_dir)
-    logging.info('Starting training...')
-    if config.train.use_co_teaching and config.train.use_elr:
-        raise ValueError("You asked for co-teaching and ELR at the same time. Please double check your configuration.")
-    if config.train.use_co_teaching:
-        model_trainer_class = CoTeachingTrainer
-    elif config.train.use_elr:
-        model_trainer_class = ELRTrainer  # type: ignore
-    else:
-        model_trainer_class = VanillaTrainer  # type: ignore
-    model_trainer_class(config).run_training()
-
-
-def train_ensemble(config: ConfigNode, num_runs: int) -> None:
-    for i, _ in enumerate(range(num_runs)):
-        config_run = get_run_config(config, config.train.seed + i)
-        set_seed(config_run.train.seed)
-        os.makedirs(config_run.train.output_dir, exist_ok=True)
-        train(config_run)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Parser for model training.')
-    parser.add_argument('--config', type=str, required=True,
-                        help='Path to config file characterising trained CNN model/s')
-    parser.add_argument('--num_runs', type=int, default=1, help='Number of runs (ensemble)')
-    args, unknown_args = parser.parse_known_args()
-
-    # Load config
-    config = load_model_config(args.config)
-
-    # Launch training
-    train_ensemble(config, args.num_runs)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/init.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/init.py
@ -1,5 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/co_teaching_trainer.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/co_teaching_trainer.py
@ -1,203 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import Any, List, Optional, Tuple
-
-import torch
-from torch.utils.data.dataloader import DataLoader
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.deep_learning.architectures.ema import EMA
-from InnerEyeDataQuality.deep_learning.collect_embeddings import get_all_embeddings
-from InnerEyeDataQuality.deep_learning.graph.classifier import GraphClassifier
-from InnerEyeDataQuality.deep_learning.loss import CrossEntropyLoss, consistency_loss
-from InnerEyeDataQuality.deep_learning.trainers.model_trainer_base import IndexContainer, Loss, ModelTrainer
-from InnerEyeDataQuality.deep_learning.scheduler import ForgetRateScheduler
-from InnerEyeDataQuality.deep_learning.utils import create_model
-from InnerEyeDataQuality.utils.generic import find_union_set_torch, map_to_device
-
-
-class CoTeachingTrainer(ModelTrainer):
-    """
-    Implements co-teaching training using two models
-    """
-
-    def __init__(self, config: ConfigNode):
-        self.use_teacher_model = config.train.use_teacher_model
-        super().__init__(config)
-
-        self.forget_rate_scheduler = ForgetRateScheduler(
-            config.scheduler.epochs,
-            forget_rate=config.train.co_teaching_forget_rate,
-            num_gradual=config.train.co_teaching_num_gradual,
-            start_epoch=config.train.resume_epoch if config.train.resume_epoch > 0 else 0,
-            num_warmup_epochs=config.train.co_teaching_num_warmup)
-
-        self.joint_metric_tracker = self.train_trackers[0]
-        self.use_consistency_loss = config.train.co_teaching_consistency_loss
-        self.use_graph = config.train.co_teaching_use_graph
-        self.consistency_loss_weight = 0.10
-        self.num_models = len(self.models)
-        self.loss_fn = CrossEntropyLoss(config)
-        self.ema_models = [EMA(self.models[0]), EMA(self.models[1])] if config.train.use_teacher_model else None
-        self.graph_classifiers = [GraphClassifier(num_samples=len(self.train_loader.dataset),  # type: ignore
-                                                  num_classes=config.dataset.n_classes,
-                                                  labels=self.train_loader.dataset.targets,  # type: ignore
-                                                  device=config.device)
-                                  for _ in range(2)]
-
-    # Create two models for co-teaching
-    def get_models(self, config: ConfigNode) -> List[torch.nn.Module]:
-        """
-        :param config: The job config
-        :return: A list of two models to be trained
-        """
-        return [create_model(config, model_id=0), create_model(config, model_id=1)]
-
-    def update_teacher_models(self) -> None:
-        if self.ema_models:
-            for i in range(len(self.models)):
-                self.ema_models[i].update()
-
-    def deploy_teacher_models(self, inputs: torch.Tensor) -> Optional[List[torch.Tensor]]:
-        if not self.ema_models:
-            return None
-        elif isinstance(inputs, list):
-            return [_m.inference(_i) for _m, _i in zip(self.ema_models, inputs)]
-        else:
-            return [_m.inference(inputs) for _m in self.ema_models]
-
-    @torch.no_grad()
-    def _get_samples_for_update(self,
-                                outputs: List[torch.Tensor],
-                                labels: torch.Tensor,
-                                global_indices: torch.Tensor,
-                                teacher_logits: Optional[List[torch.Tensor]]) -> Tuple[IndexContainer, IndexContainer]:
-        """
-        Return a list of indices that should be kept for gradient updates.
-        """
-
-        def _get_small_loss_sample_ids(logits: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-            num_samples = labels.shape[0]
-            num_remember = int((1. - self.forget_rate_scheduler.get_forget_rate) * num_samples)
-            per_sample_loss = self.loss_fn(logits, labels, reduction='none')
-            ind_sorted = torch.argsort(per_sample_loss)
-            return ind_sorted[:num_remember], ind_sorted[num_remember:]
-
-        judge = lambda i: teacher_logits[i] if self.use_teacher_model and teacher_logits else outputs[i]
-        ind_1_keep, ind_1_exc = _get_small_loss_sample_ids(judge(0))
-        ind_0_keep, ind_0_exc = _get_small_loss_sample_ids(judge(1))
-
-        # Use graph based classifier
-        if self.graph_classifiers[0].graph is not None:
-            ind_0_keep, ind_0_exc = self.graph_classifiers[0].filter_cases(ind_0_keep, ind_0_exc, global_indices)
-            ind_1_keep, ind_1_exc = self.graph_classifiers[1].filter_cases(ind_1_keep, ind_1_exc, global_indices)
-
-        return IndexContainer(ind_0_keep, ind_0_exc), IndexContainer(ind_1_keep, ind_1_exc)
-
-    # Co-teaching loss
-    def compute_loss(self,
-                     outputs: List[torch.Tensor],
-                     labels: torch.Tensor,
-                     indices: Optional[Tuple[IndexContainer, IndexContainer]] = None,
-                     **kwargs: Any) -> List[Loss]:
-        """
-        Implements the co-teaching loss using the outputs of two different models
-        :param outputs:  A list of logits outputed by each model
-        :param labels: The target labels
-        :param indices: Sample indices that should be kept and excluded in loss computation (for both models).
-        :return: A list of Loss object, each element contains the loss that is fed to the optimizer and a
-        tensor of per sample losses
-        """
-        options = {'ema_logits': None}
-        options.update(kwargs)
-        ema_logits: Optional[List[torch.Tensor]] = options['ema_logits']
-
-        loss_obj = list()
-        indices: Tuple[IndexContainer, IndexContainer] = [  # type: ignore
-            IndexContainer(keep=torch.arange(labels.shape[0]), exclude=torch.tensor([], dtype=torch.long)) for _ in
-            range(len(outputs))] if indices is None else indices
-        assert indices is not None  # for mypy
-        assert len(outputs) == len(indices) == 2
-        for _output, _index in zip(outputs, indices):
-            # The indices to keep for each model is determined by the loss of the other.
-            per_sample_loss = self.loss_fn(_output, labels, reduction='none')
-            if self.weight is not None:
-                per_sample_loss *= self.weight[labels]
-            loss_update = torch.mean(per_sample_loss[_index.keep])
-            loss_obj.append(Loss(per_sample_loss, loss_update))
-
-        # Consistency loss between predictions on noisy samples
-        if self.use_consistency_loss and ema_logits:
-            joint_excluded = find_union_set_torch(indices[0].exclude, indices[1].exclude)
-            c_loss0 = consistency_loss(outputs[0][joint_excluded], ema_logits[0][joint_excluded])
-            c_loss1 = consistency_loss(outputs[1][joint_excluded], ema_logits[1][joint_excluded])
-            loss_obj[0].loss += self.consistency_loss_weight * c_loss0
-            loss_obj[1].loss += self.consistency_loss_weight * c_loss1
-
-        return loss_obj
-
-    def run_epoch(self, dataloader: DataLoader, epoch: int, is_train: bool = False) -> None:
-        """
-        Run a training or validation epoch of the base model trainer but also step the forget rate scheduler
-        :param dataloader: A dataloader object.
-        :param epoch: Current epoch id.
-        :param is_train: Whether this is a training epoch or not.
-        :param run_inference_on_training_set: If True, record all metrics using the train_trackers
-        (even if is_train = False)
-        :return:
-        """
-        for model in self.models:
-            model.train() if is_train else model.eval()
-        trackers = self.train_trackers if is_train else self.val_trackers
-        # Consume input dataloader and update model
-        for indices, images, labels in dataloader:
-            images, labels = map_to_device(images, self.device), labels.to(self.device)
-            outputs = self.forward(images, requires_grad=is_train)
-            ema_logits = self.deploy_teacher_models(images)
-            selected_ind = self._get_samples_for_update(outputs, labels, indices, ema_logits) if is_train else None
-            losses = self.compute_loss(outputs, labels, selected_ind, ema_logits=ema_logits)
-            assert (len(outputs) == len(losses)) & (len(outputs) == 2)
-
-            if is_train:
-                assert selected_ind is not None
-                self.step_optimizers(losses)
-                self.update_teacher_models()
-                self.joint_metric_tracker.append_batch_aggregate(epoch=epoch,
-                                                                 logits_x=outputs[0].detach(),
-                                                                 logits_y=outputs[1].detach(),
-                                                                 dropped_cases=indices[selected_ind[0].exclude],
-                                                                 indices=indices)
-
-            # Collect model embeddings
-            embeddings = get_all_embeddings(self.all_model_cnn_embeddings)
-            # Log training and validation stats in metric tracker
-            for i, (logits, loss) in enumerate(zip(outputs, losses)):
-                teacher_logits = ema_logits[i].detach() if self.ema_models else None  # type: ignore
-                trackers[i].sample_metrics.append_batch(epoch=epoch,
-                                                        logits=logits.detach(),
-                                                        labels=labels.detach(),
-                                                        loss=loss.loss.item(),
-                                                        indices=indices.tolist(),
-                                                        per_sample_loss=loss.per_sample_loss.detach(),
-                                                        embeddings=embeddings[i],
-                                                        teacher_logits=teacher_logits)
-
-        # Adjust forget rate for co-teaching
-        if is_train:
-            self.forget_rate_scheduler.step()
-            if self.use_graph:
-                self.graph_classifiers[0].build_graph(embeddings=trackers[1].sample_metrics.embeddings_per_sample)
-                self.graph_classifiers[1].build_graph(embeddings=trackers[0].sample_metrics.embeddings_per_sample)
-
-    def save_checkpoint(self, epoch: int) -> bool:
-        is_save = super().save_checkpoint(epoch=epoch)
-        if is_save and self.ema_models:
-            is_last_epoch = epoch == self.config.scheduler.epochs - 1
-            suffix = '_last_epoch.pt' if is_last_epoch else f'_epoch_{epoch:d}.pt'
-            for i, ema_model in enumerate(self.ema_models):
-                save_path = str(self.checkpoint_dir / f'checkpoint_ema_model_{i:d}') + suffix
-                ema_model.save_model(save_path)
-        return is_save
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/elr_trainer.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/elr_trainer.py
@ -1,96 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import List
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from torch.types import Device
-from torch.utils.data import DataLoader
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.deep_learning.collect_embeddings import get_all_embeddings
-from InnerEyeDataQuality.deep_learning.loss import CrossEntropyLoss
-from InnerEyeDataQuality.deep_learning.trainers.model_trainer_base import Loss
-from InnerEyeDataQuality.deep_learning.trainers.vanilla_trainer import VanillaTrainer
-
-
-class ELRTrainer(VanillaTrainer):
-    def __init__(self, config: ConfigNode):
-        super().__init__(config)
-        self.num_classes = config.dataset.n_classes
-        self.loss_fn = {"TRAIN": ELRLoss(num_examples=self.train_trackers[0].num_samples_total,
-                                         num_classes=config.dataset.n_classes,
-                                         device=self.device),
-                        "VAL": CrossEntropyLoss(config)}
-
-    def compute_loss(self, is_train: bool, outputs: List[torch.Tensor], labels: torch.Tensor,
-                     indices: torch.Tensor = None) -> Loss:
-        """
-        Implements the standard cross-entropy loss using one model
-        :param outputs:  A list of logits outputed by each model
-        :param labels: The target labels
-        :return: A list of Loss object, each element contains the loss that is fed to the optimizer and a
-        tensor of per sample losses
-        """
-        logits = outputs[0]
-        if is_train:
-            per_sample_loss = self.loss_fn["TRAIN"](predictions=logits, targets=labels, indices=indices)  # type: ignore
-        else:
-            per_sample_loss = self.loss_fn["VAL"](predictions=logits, targets=labels, reduction='none')  # type: ignore
-        loss = torch.mean(per_sample_loss)
-        return Loss(per_sample_loss, loss)
-
-    def run_epoch(self, dataloader: DataLoader, epoch: int, is_train: bool = False) -> None:
-        """
-        Run a training or validation epoch of the base model trainer but also step the forget rate scheduler
-        :param dataloader: A dataloader object
-        :param epoch: Current epoch id.
-        :param is_train: Whether this is a training epoch or not
-        :param run_inference_on_training_set: If True, record all metrics using the train_trackers
-        (even if is_train = False)
-        :return:
-        """
-
-        for model in self.models:
-            model.train() if is_train else model.eval()
-
-        for indices, images, labels in dataloader:
-            images, labels = images.to(self.device), labels.to(self.device)
-            outputs = self.forward(images, requires_grad=is_train)
-            embeddings = get_all_embeddings(self.all_model_cnn_embeddings)[0]
-            losses = self.compute_loss(is_train, outputs, labels, indices)
-            if is_train:
-                self.step_optimizers([losses])
-
-            # Log training and validation stats in metric tracker
-            tracker = self.train_trackers[0] if is_train else self.val_trackers[0]
-            tracker.sample_metrics.append_batch(epoch, outputs[0].detach(), labels.detach(), losses.loss.item(),
-                                                indices.cpu().tolist(), losses.per_sample_loss.detach(), embeddings)
-
-
-class ELRLoss(nn.Module):
-    """
-    Adapted from https://github.com/shengliu66/ELR.
-    """
-
-    def __init__(self, num_examples: int, num_classes: int, device: Device, beta: float = 0.9, _lambda: float = 3):
-        super().__init__()
-        self.num_classes = num_classes
-        self.targets = torch.zeros(num_examples, self.num_classes, device=device)
-        self.beta = beta
-        self._lambda = _lambda
-
-    def forward(self, predictions: torch.Tensor, targets: torch.Tensor, indices: torch.Tensor) -> torch.Tensor:
-        y_pred = F.softmax(predictions, dim=1)
-        y_pred = torch.clamp(y_pred, 1e-4, 1.0 - 1e-4)
-        y_pred_ = y_pred.data.detach()
-        self.targets[indices] = self.beta * self.targets[indices] + (1 - self.beta) * (
-                (y_pred_) / (y_pred_).sum(dim=1, keepdim=True))
-        ce_loss = F.cross_entropy(predictions, targets, reduction="none")
-        elr_reg = (1 - (self.targets[indices] * y_pred).sum(dim=1)).log()
-        per_sample_loss = ce_loss + self._lambda * elr_reg
-        return per_sample_loss
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/model_trainer_base.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/model_trainer_base.py
@ -1,284 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-import os
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, List, Optional, Tuple, Union
-
-import torch
-from torch.utils.data.dataloader import DataLoader
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.deep_learning.architectures.ema import EMA
-from InnerEyeDataQuality.deep_learning.collect_embeddings import register_embeddings_collector
-from InnerEyeDataQuality.deep_learning.dataloader import (get_number_of_samples_per_epoch, get_train_dataloader,
-                                                          get_val_dataloader)
-from InnerEyeDataQuality.deep_learning.metrics.tracker import MetricTracker
-from InnerEyeDataQuality.utils.dataset_utils import get_datasets
-from PyTorchImageClassification.optim import create_optimizer
-from PyTorchImageClassification.scheduler import create_scheduler
-
-
-@dataclass(frozen=True)
-class IndexContainer:
-    keep: torch.Tensor
-    exclude: torch.Tensor
-
-@dataclass
-class Loss:
-    per_sample_loss: torch.Tensor  # the loss value computed for each sample without any modifications
-    loss: torch.Tensor  # the loss that is fed to the optimizer; can be derived from per_sample loss given some rule
-
-
-class ModelTrainer(object):
-    """
-    ModelTrainer class handles the training of models, logging, saving checkpoints and validation. This class trains one
-    or more similar models at a time each having its own optimizer but which can interact in the loss function.
-    This is an abstract class for which some methods are left not implemented;
-    child classes must implement these methods
-    """
-
-    def __init__(self, config: ConfigNode) -> None:
-        self.config = config
-        self.checkpoint_dir = Path(self.config.train.output_dir) / 'checkpoints'
-        self.log_dir = Path(self.config.train.output_dir) / 'logs'
-        self.seed = config.train.seed
-        self.device = torch.device(config.device)
-        train_dataset, val_dataset = get_datasets(config)
-        self.weight = torch.tensor([train_dataset.weight, (1-train_dataset.weight)],  # type: ignore
-                                   device=self.device, dtype=torch.float) if hasattr(train_dataset, "weight") else None
-        self.train_loader = get_train_dataloader(train_dataset, config, seed=self.seed,
-                                                 drop_last=config.train.dataloader.drop_last, shuffle=True)
-        self.val_loader = get_val_dataloader(val_dataset, config, seed=self.seed)
-        self.models = self.get_models(config)
-        self.ema_models: Optional[List[EMA]] = None
-        self.schedulers = [create_scheduler(config, create_optimizer(config, model), len(self.train_loader))
-                           for model in self.models]
-        self.train_trackers, self.val_trackers = self._create_metric_trackers(config)
-        self.all_trackers = self.train_trackers + self.val_trackers
-        self.all_model_cnn_embeddings = register_embeddings_collector(self.models, use_only_in_train=True)
-
-    def _create_metric_trackers(self, config: ConfigNode) -> Tuple[List[MetricTracker], List[MetricTracker]]:
-        """
-        Creates metric trackers used at model training and validation.
-        """
-        train_loader = self.train_loader
-        val_loader = self.val_loader
-        num_models = len(self.models)
-
-        if hasattr(train_loader.dataset, "ambiguity_metric_args"):
-            ambiguity_metric_args = train_loader.dataset.ambiguity_metric_args  # type: ignore
-        else:
-            ambiguity_metric_args = dict()
-
-        save_tf_events = config.tensorboard.save_events if hasattr(config.tensorboard, 'save_events') else True
-        metric_kwargs = {"num_epochs": config.scheduler.epochs,
-                         "num_classes": config.dataset.n_classes,
-                         "save_tf_events": save_tf_events}
-        # A dataset without augmentations and not normalized
-        dataset_train, dataset_val = get_datasets(config)
-        train_trackers = [MetricTracker(dataset=dataset_train,
-                                        output_dir=str(self.log_dir / f'model_{i:d}_train'),
-                                        num_samples_total=len(train_loader.dataset),  # type: ignore
-                                        num_samples_per_epoch=get_number_of_samples_per_epoch(train_loader),
-                                        name=f"model_{i}_train",
-                                        **{**metric_kwargs, **ambiguity_metric_args})
-                                        for i in range(num_models)]
-        val_trackers = [MetricTracker(dataset=dataset_val,
-                                      output_dir=str(self.log_dir / f'model_{i:d}_val'),
-                                      num_samples_total=len(val_loader.dataset),  # type: ignore
-                                      num_samples_per_epoch=get_number_of_samples_per_epoch(val_loader),
-                                      name=f"model_{i}_valid",
-                                      **metric_kwargs) for i in range(num_models)]
-
-        return train_trackers, val_trackers
-
-    def get_models(self, config: ConfigNode) -> List[torch.nn.Module]:
-        """
-        :param config: The job config
-        :return: A list of models to be trained
-        """
-        raise NotImplementedError
-
-    def forward(self, images: torch.Tensor, requires_grad: bool = True) -> List[torch.Tensor]:
-        """
-        Performs the forward pass for all models in the ModelTrainer class
-        :param images: The input images
-        :param requires_grad: Flag to indicate if forward pass required .grad attributes
-        :return: A list of the logits for each model
-        """
-        def _forward(inputs: Union[List[torch.Tensor], torch.Tensor]) -> List[torch.Tensor]:
-            if isinstance(inputs, list):
-                return [_model(_input) for _model, _input in zip(self.models, inputs)]
-            else:
-                return [_model(inputs) for _model in self.models]
-
-        @torch.no_grad()
-        def _forward_inference_only(inputs: Union[List[torch.Tensor], torch.Tensor]) -> List[torch.Tensor]:
-            return _forward(inputs)
-
-        if requires_grad:
-            return _forward(images)
-        else:
-            return _forward_inference_only(images)
-
-    def compute_loss(self, outputs: List[torch.Tensor], labels: torch.Tensor,
-                     indices: Optional[Tuple[IndexContainer, IndexContainer]] = None) -> Union[List[Loss], Loss]:
-        """
-        Compute the losses that will be optimized
-        :param outputs:  A list of logits outputed by each model
-        :param labels: The target labels
-        :return: A list of Loss object, each element contains the loss that is fed to the optimizer and a
-        tensor of per sample losses
-        """
-        raise NotImplementedError
-
-    def step_optimizers(self, losses: List[Loss]) -> None:
-        """
-        Take an optimizer step for every model's optimizer
-        :param losses: A list of Loss objects
-        :return:
-        """
-        for loss, scheduler in zip(losses, self.schedulers):
-            scheduler.optimizer.zero_grad()
-            loss.loss.backward()
-            scheduler.optimizer.step()
-            scheduler.step()
-
-    def run_epoch(self, dataloader: DataLoader, epoch: int, is_train: bool = False) -> None:
-        """
-        Run a training or validation epoch
-        :param dataloader: A dataloader object
-        :param epoch: Current training epoch id
-        :param is_train: Whether this is a training epoch or not
-        :return:
-        """
-        raise NotImplementedError
-
-    def save_checkpoint(self, epoch: int) -> bool:
-        """
-        Save checkpoints for the models for the current epoch
-        :param epoch: The current epoch
-        :return: Save success
-        """
-        is_last_epoch = epoch == self.config.scheduler.epochs - 1
-        is_save = is_last_epoch or (epoch > 0 and epoch % self.config.train.checkpoint_period == 0)
-
-        if is_save:
-            logging.info(f"Saving model checkpoints, epoch {epoch}")
-            self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
-            for ii in range(len(self.models)):
-                path = str(self.checkpoint_dir / f'checkpoint_model_{ii:d}')
-                full_save_name = path + '_last_epoch.pt' if is_last_epoch else path + f'_epoch_{epoch:d}.pt'
-                state = {'epoch': epoch,
-                         'model': self.models[ii].state_dict(),
-                         'scheduler': self.schedulers[ii].state_dict(),
-                         'optimizer': self.schedulers[ii].optimizer.state_dict()}
-                torch.save(state, full_save_name)
-
-        return is_save
-
-    def load_checkpoints(self, restore_scheduler: bool, epoch: Optional[int] = None) -> None:
-        """
-        If epoch is not specified, latest checkpoint files are loaded to restore the state
-        of model weights and optimisers
-        :param restore_scheduler (bool): Restores the state of optimiser and scheduler from checkpoint
-        :param epoch (int): Training epoch id.
-        """
-        suffix = f'_epoch_{epoch:d}.pt' if epoch else '_last_epoch.pt'
-
-        for ii in range(len(self.models)):
-            path = str(self.checkpoint_dir / f'checkpoint_model_{ii:d}') + suffix
-            logging.info(f"Loading model-{ii} from checkpoint:\n {path}")
-            state = torch.load(str(path))
-
-            self.models[ii].load_state_dict(state['model'])
-            if restore_scheduler:
-                self.schedulers[ii].load_state_dict(state['scheduler'])
-                self.schedulers[ii].optimizer.load_state_dict(state['optimizer'])
-            if epoch is not None:
-                logging.info(f"Model is loaded from epoch: {epoch}")
-                assert state['epoch'] == epoch
-
-        if self.ema_models:
-            for ii in range(len(self.ema_models)):
-                path = str(self.checkpoint_dir / f'checkpoint_ema_model_{ii:d}') + suffix
-                logging.info(f"Loading ema teacher model-{ii} from checkpoint:\n {path}")
-                self.ema_models[ii].restore_from_checkpoint(path)
-
-    def run_training(self) -> None:
-        """
-        Perform model training.
-        Model/s specified in config are trained for `num_epoch` epochs and results are stored in tf events.
-        """
-        num_epochs = self.config.scheduler.epochs
-        epoch_range = range(num_epochs)
-        if self.config.train.resume_epoch > 0:
-            resume_epoch = self.config.train.resume_epoch
-            epoch_range = range(resume_epoch + 1, num_epochs)
-            self.load_checkpoints(restore_scheduler=self.config.train.restore_scheduler, epoch=resume_epoch)
-
-        # Model evaluation - startup
-        logging.info("Running evaluation on the validation set before training ...")
-        self.run_epoch(self.val_loader, is_train=False, epoch=0)
-        self.val_trackers[0].log_epoch_and_reset(epoch=0)
-
-        # Model training loop
-        for epoch in epoch_range:
-            epoch_start = time.time()
-            logging.info('\n' + f'Epoch {epoch:d}')
-            self.run_epoch(self.train_loader, is_train=True, epoch=epoch)
-            self.run_epoch(self.val_loader, is_train=False, epoch=epoch)
-            self.save_checkpoint(epoch)
-
-            for _t in self.all_trackers:
-                _t.log_epoch_and_reset(epoch)
-            logging.info('Epoch time: {0:.2f} secs'.format(time.time() - epoch_start))
-
-        # Store loss values for post-training analysis
-        for _t in self.all_trackers:
-            _t.save_loss()
-
-    def run_inference(self, dataloader: Any, use_mc_sampling: bool = False) -> List[MetricTracker]:
-        """
-        Deployment of pre-trained model.
-        """
-        dataset = dataloader.dataset
-        trackers = [MetricTracker(os.path.join(self.config.train.output_dir, f'model_{i:d}_inference'),
-                                  num_samples_total=len(dataset),
-                                  num_samples_per_epoch=len(dataset),
-                                  name=f"model_{i}_inference",
-                                  num_epochs=1,
-                                  num_classes=self.config.dataset.n_classes,
-                                  save_tf_events=False) for i in range(len(self.models))]
-
-        for model in self.models:
-            model.eval()
-            if use_mc_sampling:
-                logging.info("Applying MC sampling at inference time.")
-                dropout_layers = [layer for layer in model.modules() if isinstance(layer, torch.nn.Dropout)]
-                for _layer in dropout_layers:
-                    _layer.training = True
-
-        with torch.no_grad():
-            for indices, images, labels in dataloader:
-                images, labels = images.to(self.device), labels.to(self.device)
-                outputs = self.forward(images, requires_grad=False)
-                losses = self.compute_loss(outputs, labels, indices=None)
-                if not isinstance(losses, List):
-                    losses = [losses]
-
-                # Log training and validation stats in metric tracker
-                for i, (logits, loss) in enumerate(zip(outputs, losses)):
-                    trackers[i].sample_metrics.append_batch(epoch=0,
-                                                            logits=logits.detach(),
-                                                            labels=labels.detach(),
-                                                            loss=loss.loss.item(),
-                                                            indices=indices.cpu().tolist(),
-                                                            per_sample_loss=loss.per_sample_loss.detach())
-        return trackers
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/vanilla_trainer.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/trainers/vanilla_trainer.py
@ -1,80 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from typing import List, Optional, Tuple
-
-import torch
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.deep_learning.collect_embeddings import get_all_embeddings
-from InnerEyeDataQuality.deep_learning.trainers.model_trainer_base import IndexContainer, Loss, ModelTrainer
-from InnerEyeDataQuality.deep_learning.utils import create_model
-from InnerEyeDataQuality.deep_learning.loss import tanh_loss
-from torch.utils.data.dataloader import DataLoader
-from InnerEyeDataQuality.deep_learning.loss import CrossEntropyLoss
-
-
-class VanillaTrainer(ModelTrainer):
-    """
-    Implements vanilla cross entropy training with one model
-    """
-
-    def __init__(self, config: ConfigNode):
-        super().__init__(config)
-        self.tanh_regularisation = config.train.tanh_regularisation
-        self.loss_fn = CrossEntropyLoss(config)
-
-    def get_models(self, config: ConfigNode) -> List[torch.nn.Module]:
-        """
-        :param config: The job config
-        :return: A list with one model to be trained
-        """
-        return [create_model(config, model_id=0)]
-
-    def compute_loss(self, outputs: List[torch.Tensor], labels: torch.Tensor,
-                     indices: Optional[Tuple[IndexContainer, IndexContainer]] = None) -> Loss:
-        """
-        Implements the standard cross-entropy loss using one model
-        :param outputs:  A list of logits outputed by each model
-        :param labels: The target labels
-        :return: A list of Loss object, each element contains the loss that is fed to the optimizer and a
-        tensor of per sample losses
-        """
-        logits = outputs[0]
-        per_sample_loss = self.loss_fn(predictions=logits, targets=labels, reduction='none')
-        if self.weight is not None:
-            per_sample_loss *= self.weight[labels]
-        loss = torch.mean(per_sample_loss)
-
-        if self.tanh_regularisation != 0.0:
-            loss += self.tanh_regularisation * tanh_loss(logits)
-
-        return Loss(per_sample_loss, loss)
-
-    def run_epoch(self, dataloader: DataLoader, epoch: int, is_train: bool = False) -> None:
-        """
-        Run a training or validation epoch of the base model trainer but also step the forget rate scheduler
-        :param dataloader: A dataloader object
-        :param epoch: Current epoch id.
-        :param is_train: Whether this is a training epoch or not
-        :param run_inference_on_training_set: If True, record all metrics using the train_trackers
-        (even if is_train = False)
-        :return:
-        """
-        for model in self.models:
-            model.train() if is_train else model.eval()
-
-        for indices, images, labels in dataloader:
-            images, labels = images.to(self.device), labels.to(self.device)
-            outputs = self.forward(images, requires_grad=is_train)
-            embeddings = get_all_embeddings(self.all_model_cnn_embeddings)[0]
-            losses = self.compute_loss(outputs, labels)
-            if is_train:
-                self.step_optimizers([losses])
-
-            # Log training and validation stats in metric tracker
-            tracker = self.train_trackers[0] if is_train else self.val_trackers[0]
-            tracker.sample_metrics.append_batch(epoch, outputs[0].detach(), labels.detach(), losses.loss.item(),
-                                                indices.cpu().tolist(), losses.per_sample_loss.detach(), embeddings)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/transforms.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/transforms.py
@ -1,185 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import random
-from typing import Any, Callable, Tuple
-
-import PIL
-import PIL.Image
-import numpy as np
-import torch
-import torchvision
-from scipy.ndimage import gaussian_filter, map_coordinates
-from skimage.filters import rank
-from skimage.morphology import disk
-
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-
-
-class BaseTransform:
-    def __init__(self, config: ConfigNode):
-        self.transform = lambda x: x
-
-    def __call__(self, data: PIL.Image.Image) -> PIL.Image.Image:
-        return self.transform(data)
-
-
-class Standardize:
-    def __init__(self, mean: np.ndarray, std: np.ndarray):
-        self.mean = np.array(mean)
-        self.std = np.array(std)
-
-    def __call__(self, image: PIL.Image.Image) -> np.ndarray:
-        image = np.asarray(image).astype(np.float32) / 255.
-        image = (image - self.mean) / self.std
-        return image
-
-
-class CenterCrop(BaseTransform):
-    def __init__(self, config: ConfigNode):
-        self.transform = torchvision.transforms.CenterCrop(config.preprocess.center_crop_size)
-
-
-class RandomCrop(BaseTransform):
-    def __init__(self, config: ConfigNode):
-        self.transform = torchvision.transforms.RandomCrop(
-            config.dataset.image_size,
-            padding=config.augmentation.random_crop.padding,
-            fill=config.augmentation.random_crop.fill,
-            padding_mode=config.augmentation.random_crop.padding_mode)
-
-
-class RandomResizeCrop(BaseTransform):
-    def __init__(self, config: ConfigNode):
-        self.transform = torchvision.transforms.RandomResizedCrop(
-            size=config.preprocess.resize,
-            scale=config.augmentation.random_crop.scale)
-
-
-class RandomHorizontalFlip(BaseTransform):
-    def __init__(self, config: ConfigNode):
-        self.transform = torchvision.transforms.RandomHorizontalFlip(
-            config.augmentation.random_horizontal_flip.prob)
-
-
-class RandomAffine(BaseTransform):
-    def __init__(self, config: ConfigNode):
-        self.transform = torchvision.transforms.RandomAffine(degrees=config.augmentation.random_affine.max_angle,  # 15
-                                                             translate=(
-                                                                 config.augmentation.random_affine.max_horizontal_shift,
-                                                                 config.augmentation.random_affine.max_vertical_shift),
-                                                             shear=config.augmentation.random_affine.max_shear)
-
-
-class Resize(BaseTransform):
-    def __init__(self, config: ConfigNode):
-        self.transform = torchvision.transforms.Resize(config.preprocess.resize)
-
-
-class RandomColorJitter(BaseTransform):
-    def __init__(self, config: ConfigNode) -> None:
-        self.transform = torchvision.transforms.ColorJitter(brightness=config.augmentation.random_color.brightness,
-                                                            contrast=config.augmentation.random_color.contrast,
-                                                            saturation=config.augmentation.random_color.saturation)
-
-
-class RandomErasing(BaseTransform):
-    def __init__(self, config: ConfigNode) -> None:
-        self.transform = torchvision.transforms.RandomErasing(p=0.5,
-                                                              scale=config.augmentation.random_erasing.scale,
-                                                              ratio=config.augmentation.random_erasing.ratio)
-
-
-class RandomGamma(BaseTransform):
-    def __init__(self, config: ConfigNode) -> None:
-        self.min, self.max = config.augmentation.gamma.scale
-
-    def __call__(self, image: PIL.Image.Image) -> PIL.Image.Image:
-        gamma = random.uniform(self.min, self.max)
-        return torchvision.transforms.functional.adjust_gamma(image, gamma=gamma)
-
-
-class HistogramNormalization:
-    def __init__(self, config: ConfigNode) -> None:
-        self.disk_size = config.preprocess.histogram_normalization.disk_size
-
-    def __call__(self, image: PIL.Image.Image) -> np.ndarray:
-        # Apply local histogram equalization
-        image = np.array(image)
-        return PIL.Image.fromarray(rank.equalize(image, selem=disk(self.disk_size)))
-
-
-class ExpandChannels:
-    def __call__(self, data: torch.Tensor) -> torch.Tensor:
-        return torch.repeat_interleave(data, 3, dim=0)
-
-
-class ToNumpy:
-    def __call__(self, image: PIL.Image.Image) -> np.ndarray:
-        return np.array(image)
-
-
-class AddGaussianNoise:
-    def __init__(self, config: ConfigNode) -> None:
-        """
-        Transformation to add Gaussian noise N(0, std) to
-        an image. Where std is set with the config.augmentation.gaussian_noise.std
-        argument. The transformation will be applied with probability
-        config.augmentation.gaussian_noise.p_apply
-        """
-        self.std = config.augmentation.gaussian_noise.std
-        self.p_apply = config.augmentation.gaussian_noise.p_apply
-
-    def __call__(self, data: torch.Tensor) -> torch.Tensor:
-        if np.random.random(1) > self.p_apply:
-            return data
-        noise = torch.randn(size=data.shape) * self.std
-        data = torch.clamp(data + noise, 0, 1)
-        return data
-
-
-class ElasticTransform:
-    """Elastic deformation of images as described in [Simard2003]_.
-    .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
-       Convolutional Neural Networks applied to Visual Document Analysis", in
-       Proc. of the International Conference on Document Analysis and
-       Recognition, 2003.
-
-       https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.160.8494&rep=rep1&type=pdf
-
-        :param sigma: elasticity coefficient
-        :param alpha: intensity of the deformation
-        :param p_apply: probability of applying the transformation
-    """
-
-    def __init__(self, config: ConfigNode) -> None:
-        self.alpha = config.augmentation.elastic_transform.alpha
-        self.sigma = config.augmentation.elastic_transform.sigma
-        self.p_apply = config.augmentation.elastic_transform.p_apply
-
-    def __call__(self, image: PIL.Image) -> PIL.Image:
-        if np.random.random(1) > self.p_apply:
-            return image
-        image = np.asarray(image).squeeze()
-        assert len(image.shape) == 2
-        shape = image.shape
-
-        dx = gaussian_filter((np.random.random(shape) * 2 - 1), self.sigma, mode="constant", cval=0) * self.alpha
-        dy = gaussian_filter((np.random.random(shape) * 2 - 1), self.sigma, mode="constant", cval=0) * self.alpha
-
-        x, y = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
-        indices = np.reshape(x + dx, (-1, 1)), np.reshape(y + dy, (-1, 1))
-        return PIL.Image.fromarray(map_coordinates(image, indices, order=1).reshape(shape))
-
-
-class DualViewTransformWrapper:
-    def __init__(self, transforms: Callable):
-        self.transforms = transforms
-
-    def __call__(self, sample: PIL.Image.Image) -> Tuple[Any, Any]:
-        transform = self.transforms
-        xi = transform(sample)
-        xj = transform(sample)
-        return xi, xj
--- a/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/utils.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/deep_learning/utils.py
@ -1,154 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import collections
-import logging
-from importlib import import_module
-from pathlib import Path
-from typing import Optional, Union
-
-import torch
-
-from yacs.config import CfgNode
-
-from default_paths import PROJECT_ROOT_DIR
-from InnerEyeDataQuality.configs.config_node import ConfigNode
-from InnerEyeDataQuality.configs import model_config
-from InnerEyeDataQuality.configs.selector_config import get_default_selector_config
-from InnerEyeDataQuality.deep_learning.self_supervised.configs import ssl_model_config
-from InnerEyeDataQuality.deep_learning.self_supervised.utils import create_ssl_image_classifier
-from InnerEyeDataQuality.utils.generic import setup_cudnn, get_train_output_dir
-
-
-def get_run_config(config: ConfigNode, run_seed: int) -> ConfigNode:
-    config_run = config.clone()
-    config_run.defrost()
-    config_run.train.seed = run_seed
-    config_run.train.output_dir = get_train_output_dir(config)
-    config_run.freeze()
-    return config_run
-
-
-def load_ssl_model_config(config_path: Path) -> ConfigNode:
-    '''
-    Loads configs required for self supervised learning. Does not setup cudann as this is being
-    taken care of by lightining.
-    '''
-    config = ssl_model_config.get_default_model_config()
-    config.merge_from_file(config_path)
-    update_model_config(config)
-
-    # Freeze config entries
-    config.freeze()
-
-    return config
-
-
-def load_model_config(config_path: Path) -> ConfigNode:
-    '''
-    Loads configs required for model training and inference.
-    '''
-    config = model_config.get_default_model_config()
-    config.merge_from_file(config_path)
-    update_model_config(config)
-    setup_cudnn(config)
-
-    # Freeze config entries
-    config.freeze()
-
-    return config
-
-
-def update_model_config(config: ConfigNode) -> ConfigNode:
-    '''
-    Adds dataset specific parameters in model config
-    '''
-    if config.dataset.name in ['CIFAR10', 'CIFAR100']:
-        dataset_dir = f'~/.torch/datasets/{config.dataset.name}'
-        config.dataset.dataset_dir = dataset_dir
-        config.dataset.image_size = 32
-        config.dataset.n_channels = 3
-        config.dataset.n_classes = int(config.dataset.name[5:])
-    elif config.dataset.name in ['MNIST']:
-        dataset_dir = '~/.torch/datasets'
-        config.dataset.dataset_dir = dataset_dir
-        config.dataset.image_size = 28
-        config.dataset.n_channels = 1
-        config.dataset.n_classes = 10
-
-    if not torch.cuda.is_available():
-        config.device = 'cpu'
-
-    return config
-
-
-def override_config(source: ConfigNode, overrides: ConfigNode) -> ConfigNode:
-    '''
-    Overrides the keys and values present in node `overrides` into source object recursively.
-    '''
-    for key, value in overrides.items():
-        if isinstance(value, collections.Mapping) and value:
-            returned = override_config(source.get(key, {}), value)  # type: ignore
-            returned = CfgNode(returned)
-            source[key] = returned
-        else:
-            source[key] = overrides[key]
-    return source
-
-
-def load_selector_config(config_path: str) -> ConfigNode:
-    """
-    Loads a selector config and merges with its model config
-    """
-    selector_config = get_default_selector_config()
-    selector_config.merge_from_file(config_path)
-    model_config = load_model_config(PROJECT_ROOT_DIR / selector_config.selector.model_config_path)
-    merged_config = override_config(source=model_config, overrides=selector_config)
-    merged_config.freeze()
-
-    return merged_config
-
-
-def create_logger(output_dir: Union[str, Path]) -> None:
-    if isinstance(output_dir, str):
-        output_dir = Path(output_dir)
-    log_path = output_dir.absolute() / 'training.log'
-    logging.basicConfig(filename=log_path,
-                        filemode='w',
-                        format='%(asctime)s %(name)-4s %(levelname)-6s %(message)s',
-                        datefmt='%m-%d %H:%M',
-                        level=logging.DEBUG)
-
-    # define a Handler which writes INFO messages or higher to the sys.stderr
-    console = logging.StreamHandler()
-    console.setLevel(logging.INFO)
-
-    # set a format which is simpler for console use
-    formatter = logging.Formatter('%(name)-4s: %(levelname)-6s %(message)s')
-
-    # tell the handler to use this format
-    console.setFormatter(formatter)
-
-    # add the handler to the root logger
-    logging.getLogger().addHandler(console)
-
-
-def create_model(config: ConfigNode, model_id: Optional[int]) -> torch.nn.Module:
-    device = torch.device(config.device)
-    if config.train.use_self_supervision:
-        assert isinstance(model_id, int) and model_id < 2
-        model_checkpoint_name = config.train.self_supervision.checkpoints[model_id]
-        model_checkpoint_path = PROJECT_ROOT_DIR / model_checkpoint_name
-        model = create_ssl_image_classifier(num_classes=config.dataset.n_classes,
-                                            pl_checkpoint_path=str(model_checkpoint_path),
-                                            freeze_encoder=config.train.self_supervision.freeze_encoder)
-    else:
-        try:
-            module = import_module('PyTorchImageClassification.models'f'.{config.model.type}.{config.model.name}')
-        except ModuleNotFoundError:
-            module = import_module(
-                f'InnerEyeDataQuality.deep_learning.architectures.{config.model.type}.{config.model.name}')
-        model = getattr(module, 'Network')(config)
-    return model.to(device)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/evaluation/metrics.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/evaluation/metrics.py
@ -1,77 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import numpy as np
-
-
-def compute_label_entropy(label_counts: np.ndarray) -> np.ndarray:
-    """
-    :param label_counts: Input label histogram (n_samples x n_classes)
-    """
-    label_distribution = label_counts / np.sum(label_counts, axis=-1, keepdims=True)
-    return cross_entropy(label_distribution, label_distribution)
-
-
-def compute_accuracy(source: np.ndarray, target: np.ndarray) -> float:
-    """
-    Computes the agreement rate between two tensors
-    :param source: source array (n_samples, n_classes)
-    :param target: target array (n_samples, n_classes)
-    """
-    # Compare single label case against all available labels
-    source = np.argmax(source, axis=1)
-    target = np.argmax(target, axis=1)
-
-    # Accuracy
-    acc = 100.0 * np.sum(source == target) / source.size
-
-    return acc
-
-def compute_model_disagreement_score(posteriors: np.ndarray) -> np.ndarray:
-    """
-    Measure model disagreement score (Ref BALD)
-    :param posteriors: numpy array (shape: (model_candidates, batch_size, num_classes))
-    :return: Disagreement score (BALD) for each sample (shape: (batch))
-    """
-    def _entropy(x: np.ndarray, log_base: int, epsilon: float = 1e-12) -> np.ndarray:
-        return -np.sum(x * (np.log(x + epsilon) / np.log(log_base)), axis=-1)
-    num_classes = int(posteriors.shape[-1])
-    avg_posteriors = np.mean(posteriors, axis=0)
-    avg_entropy = _entropy(avg_posteriors, num_classes)
-    exp_conditional_entropy = np.mean(_entropy(posteriors, num_classes), axis=0)
-    bald_score = avg_entropy - exp_conditional_entropy
-    return bald_score
-
-
-def cross_entropy(predicted_distribution: np.ndarray, target_distribution: np.ndarray) -> np.ndarray:
-    """
-    Compute the normalised cross-entropy between the predicted and target distributions
-    :param predicted_distribution: Predicted distribution shape = (num_samples, num_classes)
-    :param target_distribution: Target distribution shape = (num_samples, num_classes)
-    :return: The cross-entropy for each sample
-    """
-    num_classes = predicted_distribution.shape[1]
-    return -np.sum(target_distribution * np.log(predicted_distribution + 1e-12) / np.log(num_classes), axis=-1)
-
-def max_prediction_error(predicted_distribution: np.ndarray, target_distribution: np.ndarray) -> np.ndarray:
-    """
-    Compute the max (class-wise) prediction error between the predicted and target distributions
-    :param predicted_distribution: Predicted distribution shape = (num_samples, num_classes)
-    :param target_distribution: Target distribution shape = (num_samples, num_classes)
-    :return: The max (class-wise) prediction error for each sample
-    """
-    current_target_class = np.argmax(target_distribution, axis=1)
-    current_target_pred_prob = predicted_distribution[range(len(current_target_class)), current_target_class]
-    prediction_errors = 1.0 - current_target_pred_prob
-    return prediction_errors
-
-def total_variation(predicted_distribution: np.ndarray, target_distribution: np.ndarray) -> np.ndarray:
-    """
-    Compute the total variation error between the predicted and target distributions
-    :param predicted_distribution: Predicted distribution shape = (num_samples, num_classes)
-    :param target_distribution: Target distribution shape = (num_samples, num_classes)
-    :return: The total variation for each sample
-    """
-    return np.sum(np.abs(predicted_distribution - target_distribution), axis=-1) / 2.
--- a/InnerEye-DataQuality/InnerEyeDataQuality/evaluation/plot_stats.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/evaluation/plot_stats.py
@ -1,408 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import logging
-import re
-
-import numpy as np
-import pandas as pd
-import seaborn as sns
-
-from itertools import cycle
-from matplotlib import pyplot as plt
-from pathlib import Path
-from sklearn.metrics import auc, confusion_matrix, precision_recall_curve, roc_auc_score, roc_curve
-from typing import Any, Dict, List, Optional, Tuple, Union
-
-from default_paths import MAIN_SIMULATION_DIR
-from InnerEyeDataQuality.selection.simulation_statistics import SimulationStatsDistribution
-from InnerEyeDataQuality.utils.generic import create_folder, save_obj
-
-
-def _plot_mean_and_confidence_intervals(ax: plt.Axes, y: np.ndarray, color: str, label: str, linestyle: str = '-',
-                                        clip: Tuple[float, float] = (-np.inf, np.inf),
-                                        use_derivative: bool = False,
-                                        include_auc: bool = True,
-                                        linewidth: float = 3.0) -> None:
-    if use_derivative:
-        y = (y - y[:, 0][:, np.newaxis]) / (np.arange(y.shape[1]) + 1)
-
-    x = np.arange(0, y.shape[1])
-    mean = np.mean(y, axis=0)
-    total_auc = auc([0, np.max(x)], [100, 100])
-    auc_mean = auc(x, mean) / total_auc
-    label += f" - AUC: {auc_mean:.4f}" if include_auc else ""
-    std_error = np.std(y, axis=0) / np.sqrt(y.shape[0]) * 1.96
-    ax.plot(x, mean, color=color, label=label, linestyle=linestyle, linewidth=linewidth)
-    ax.fill_between(x,
-                    np.clip((mean - std_error), a_min=clip[0], a_max=clip[1]),
-                    np.clip((mean + std_error), a_min=clip[0], a_max=clip[1]),
-                    color=color, alpha=.2)
-
-
-def plot_stats_for_all_selectors(stats: Dict[str, SimulationStatsDistribution],
-                                 y_attr_names: List[str],
-                                 y_attr_labels: List[str],
-                                 title: str = '',
-                                 x_label: str = '',
-                                 y_label: str = '',
-                                 legend_loc: Union[int, str] = 2,
-                                 fontsize: int = 12,
-                                 figsize: Tuple[int, int] = (14, 10),
-                                 plot_n_not_ambiguous_noise_cases: bool = False,
-                                 **plot_kwargs: Any) -> Tuple[plt.Figure, plt.Axes]:
-    """
-    Given the dictionary with SimulationStatsDistribution plot a curve for each attribute in the y_attr_names list for
-    each selector. Total number of curves will be num_selectors * num_y_attr_names.
-    :param stats: A dictionary where each entry corresponds to the SimulationStatsDistribution of each selector.
-    :param y_attr_names: The names of the attributes to plot on the y axis.
-    :param y_attr_labels: The labels for the legend of the attributes to plot on the y axis.
-    :param title: The title of the figure.
-    :param x_label: The title of the x-axis.
-    :param y_label: The title of the y-axis.
-    :param legend_loc: The location of the legend.
-    :param plot_n_not_ambiguous_noise_cases: If True, indicate the number of noise easy cases left at the end of the
-    simulation
-    for each selector. If all cases are selected indicate the corresponding iteration.
-    :return: The figure and axis with all the curves plotted.
-    """
-    colors = ['blue', 'red', 'orange', 'brown', 'black', 'purple', 'green', 'gray', 'olive', 'cyan', 'yellow', 'pink']
-    linestyles = ['-', '--']
-    fig, ax = plt.subplots(figsize=figsize)
-
-    # Sort alphabetically
-    ordered_keys = sorted(stats.keys())
-    # Put always oracle and random first (this crashes if oracle / random not there)
-    ordered_keys.remove("Oracle")
-    ordered_keys.remove("Random")
-    ordered_keys.insert(0, "Oracle")
-    ordered_keys.insert(1, "Random")
-
-    for _stat_key, color in zip(ordered_keys, cycle(colors)):
-        _stat = stats[_stat_key]
-        if '(Posterior)' in _stat.name:
-            _stat.name = _stat.name.split('(Posterior)')[0]
-        for y_attr_name, y_attr_label, linestyle in zip(y_attr_names, y_attr_labels, cycle(linestyles)):
-            color = assign_preset_color(name=_stat.name, color=color)
-            _plot_mean_and_confidence_intervals(ax=ax, y=_stat.__getattribute__(y_attr_name), color=color,
-                                                label=y_attr_label + _stat.name, linestyle=linestyle, **plot_kwargs)
-        if plot_n_not_ambiguous_noise_cases:
-            average_value_attribute = np.mean(_stat.__getattribute__(y_attr_name), axis=0)
-            std_value_attribute = np.std(_stat.__getattribute__(y_attr_name), axis=0)
-            logging.debug(f"Method {_stat.name} - {y_attr_name} std: {std_value_attribute[-1]}")
-            n_average_mislabelled_not_ambiguous = np.mean(_stat.num_remaining_mislabelled_not_ambiguous, axis=0)
-            ax.text(average_value_attribute.size + 1, average_value_attribute[-1],
-                    f"{average_value_attribute[-1]:.2f}", fontsize=fontsize - 4)
-            no_mislabelled_not_ambiguous_remaining = np.where(n_average_mislabelled_not_ambiguous == 0)[0]
-            if no_mislabelled_not_ambiguous_remaining.size > 0:
-                idx = np.min(no_mislabelled_not_ambiguous_remaining)
-                ax.scatter(idx + 1, average_value_attribute[idx], color=color, marker="s")
-    ax.grid()
-    ax.set_title(title, fontsize=fontsize)
-    ax.set_xlabel(x_label, fontsize=fontsize)
-    ax.set_ylabel(y_label, fontsize=fontsize)
-    ax.legend(loc=legend_loc, fontsize=fontsize - 2)
-    ax.xaxis.set_tick_params(labelsize=fontsize - 2)
-    ax.yaxis.set_tick_params(labelsize=fontsize - 2)
-
-    return fig, ax
-
-
-def assign_preset_color(name: str, color: str) -> str:
-    if re.search('oracle', name, re.IGNORECASE):
-        return 'blue'
-    elif re.search('random', name, re.IGNORECASE):
-        return 'red'
-    elif re.search('bald', name, re.IGNORECASE):
-        if re.search('active', name, re.IGNORECASE):
-            return 'green'
-        return 'purple'
-    elif re.search('clean', name, re.IGNORECASE):
-        return 'pink'
-    elif re.search('self-supervision', name, re.IGNORECASE) \
-            or re.search('SSL', name, re.IGNORECASE) or re.search('pretrained', name, re.IGNORECASE):
-        return 'orange'
-    elif re.search('imagenet', name, re.IGNORECASE):
-        return 'green'
-    elif re.search('self-supervision', name, re.IGNORECASE):
-        if re.search('graph', name, re.IGNORECASE):
-            return 'green'
-        elif re.search('With entropy', name, re.IGNORECASE):
-            return 'olive'
-        elif re.search('active', name, re.IGNORECASE):
-            return 'cyan'
-        else:
-            return 'orange'
-    elif re.search('coteaching', name, re.IGNORECASE):
-        return 'brown'
-    elif re.search('less', name, re.IGNORECASE):
-        return 'grey'
-    elif re.search('vanilla', name, re.IGNORECASE):
-        if re.search('active', name, re.IGNORECASE):
-            return 'grey'
-        return 'black'
-    else:
-        return color
-
-
-def plot_minimal_sampler(stats: Dict[str, SimulationStatsDistribution],
-                         n_samples: int, ax: plt.Axes, fontsize: int = 12, legend_loc: int = 2) -> None:
-    """
-    Starting with one initial label, a noisy sample needs to be relabeled at least twice (best case scenario
-    the sampled class during relabeling is equal to the correct label
-    """
-
-    n_mislabelled_ambiguous = list(stats.values())[0].num_initial_mislabelled_ambiguous
-    n_mislabelled_not_ambiguous = list(stats.values())[0].num_initial_mislabelled_not_ambiguous
-
-    accuracy_beginning = 100 * float(n_samples - n_mislabelled_ambiguous - n_mislabelled_not_ambiguous) / n_samples
-    ax.plot([0, 2 * (n_mislabelled_not_ambiguous + n_mislabelled_ambiguous)], [accuracy_beginning, 100], linestyle="--",
-            label="Minimal sampler")
-
-    # accuracy_easy_cases = 100 * float(n_samples - n_mislabelled_ambiguous) / n_samples
-    # max_accuracy = max([np.max(_stat.accuracy) for _stat in stats.values()])
-    # plt.scatter(2 * n_mislabelled_not_ambiguous, accuracy_easy_cases,
-    #            marker='s', label="No non-ambiguous noise cases left")
-    # ax.set_ylim(accuracy_beginning - 0.25, max_accuracy + 0.25)
-
-    ax.legend(loc=legend_loc, fontsize=fontsize - 2)
-
-
-def plot_stats(stats: Dict[str, SimulationStatsDistribution],
-               dataset_name: str,
-               n_samples: int,
-               filename_suffix: str,
-               save_path: Optional[Path] = None,
-               sample_indices: Optional[List[int]] = None) -> None:
-    """
-    :param stats:
-    :param dataset_name:
-    :param n_samples:
-    :param filename_suffix:
-    :param save_path:
-    :param sample_indices: Image indices used in the dataset to visualise the selected cases
-    """
-
-    if save_path:
-        input_args = locals()
-        save_path = save_path / dataset_name
-        save_path.mkdir(exist_ok=True)
-        save_obj(input_args, save_path / "inputs_to_plot_stats.pkl")
-
-    noise_rate = 100 * float(
-        list(stats.values())[0].num_initial_mislabelled_ambiguous + list(stats.values())[
-            0].num_initial_mislabelled_not_ambiguous) / n_samples
-    # Label accuracy vs num relabels
-    fontsize, legend_loc = 20, 2
-    fig, ax = plot_stats_for_all_selectors(stats, ['accuracy'], [''],
-                                           title=f'Dataset Curation - {dataset_name} (N={n_samples}, '
-                                                 f'{noise_rate:.1f}% noise)',
-                                           x_label='Number of collected relabels on the dataset',
-                                           y_label='Percentage of correct labels',
-                                           legend_loc=legend_loc,
-                                           fontsize=fontsize,
-                                           figsize=(11, 10),
-                                           plot_n_not_ambiguous_noise_cases=True)
-    if dataset_name != "NoisyChestXray":
-        plot_minimal_sampler(stats, n_samples, ax, fontsize=fontsize, legend_loc=legend_loc)
-    if save_path:
-        fig.savefig(save_path / f"simulation_label_accuracy_{filename_suffix}.pdf", bbox_inches='tight')
-        fig.savefig(save_path / f"simulation_label_accuracy_{filename_suffix}.png", bbox_inches='tight')
-
-    # Label accuracy vs num relabels - To Origin
-    fig, ax = plot_stats_for_all_selectors(stats, ['accuracy'], [''],
-                                           title=f'Dataset Curation - {dataset_name} (N={n_samples})',
-                                           x_label='Number of collected relabels on the dataset',
-                                           y_label='Percentage of correct labels',
-                                           legend_loc=1,
-                                           plot_n_not_ambiguous_noise_cases=False,
-                                           use_derivative=True)
-    if save_path:
-        fig.savefig(save_path / f"simulation_label_accuracy_to_origin_{filename_suffix}.png", bbox_inches='tight')
-
-    # Average total variation vs num relabels
-    fig, ax = plot_stats_for_all_selectors(stats, ['avg_total_variation'], [''],
-                                           title=f'Dataset Curation - {dataset_name} (N={n_samples})',
-                                           x_label='Number of collected relabels on the dataset',
-                                           y_label='Total Variation (Full vs Sampled Distributions)',
-                                           fontsize=fontsize,
-                                           include_auc=False,
-                                           figsize=(10, 11),
-                                           legend_loc=1)
-    if save_path:
-        fig.savefig(save_path / f"simulation_avg_total_variation_{filename_suffix}.png", bbox_inches='tight')
-        fig.savefig(save_path / f"simulation_avg_total_variation_{filename_suffix}.pdf", bbox_inches='tight')
-
-    # Remaining number of noisy cases vs num relabels
-    fig, ax = plot_stats_for_all_selectors(stats,
-                                           ['num_remaining_mislabelled_not_ambiguous'], [''],
-                                           x_label='Number of collected relabels on the dataset',
-                                           y_label='# of remaining clear noisy samples',
-                                           fontsize=fontsize + 1,
-                                           figsize=(10, 10),
-                                           include_auc=False,
-                                           linewidth=5.0,
-                                           legend_loc="lower left")
-    if save_path:
-        fig.savefig(save_path / f"simulation_remaining_clear_mislabelled_{filename_suffix}.png", bbox_inches='tight')
-        fig.savefig(save_path / f"simulation_remaining_clear_mislabelled_{filename_suffix}.pdf", bbox_inches='tight')
-
-    # Remaining number ambiguous cases vs num relabels
-    fig, ax = plot_stats_for_all_selectors(stats,
-                                           ['num_remaining_mislabelled_ambiguous'], [''],
-                                           x_label='Number of collected relabels on the dataset',
-                                           y_label='# of remaining difficult noisy samples',
-                                           fontsize=fontsize + 1,
-                                           include_auc=False,
-                                           figsize=(10, 10),
-                                           linewidth=5.0,
-                                           legend_loc="lower left")
-    if save_path:
-        fig.savefig(save_path / f"simulation_remaining_ambiguous_mislabelled_{filename_suffix}.png",
-                    bbox_inches='tight')
-        fig.savefig(save_path / f"simulation_remaining_ambiguous_mislabelled_{filename_suffix}.pdf",
-                    bbox_inches='tight')
-
-
-def plot_roc_curve(scores: np.ndarray, labels: np.ndarray,
-                   type_of_cases: str = "mislabelled",
-                   ax: Optional[plt.Axes] = None,
-                   color: str = "b",
-                   legend: str = "AUC",
-                   linestyle: str = "-") -> None:
-    fpr, tpr, threshold = roc_curve(labels, scores)
-    roc_auc = roc_auc_score(labels, scores)
-    if ax is None:
-        fig, ax = plt.subplots()
-    ax.plot(fpr, tpr, color=color, label=f"{legend}: {roc_auc:.2f}", linestyle=linestyle)
-    ax.set_ylabel("True positive rate")
-    ax.set_xlabel("False positive rate")
-    ax.set_title(f"ROC curve - {type_of_cases} detection")
-    ax.legend(loc="lower right")
-    ax.grid(b=True)
-
-
-def plot_pr_curve(scores: np.ndarray, labels: np.ndarray,
-                  type_of_cases: str = "mislabelled",
-                  ax: Optional[plt.Axes] = None,
-                  color: str = "b",
-                  legend: str = "AUC",
-                  linestyle: str = "-") -> None:
-    precision, recall, _ = precision_recall_curve(y_true=labels, probas_pred=scores)
-    pr_auc = auc(recall, precision)
-    if ax is None:
-        fig, ax = plt.subplots()
-    ax.plot(recall, precision, color=color, label=f"{legend}: {pr_auc:.2f}", linestyle=linestyle)
-    ax.set_xlim([0.0, 1.05])
-    ax.set_ylim([0.0, 1.05])
-    ax.set_xlabel("Recall")
-    ax.set_ylabel("Precision")
-    ax.set_title(f"Precision-Recall curve - {type_of_cases} detection")
-    ax.legend(loc="lower right")
-    ax.grid(b=True)
-
-
-def plot_binary_confusion(scores: np.ndarray, labels: np.ndarray, type_of_cases: str = "mislabelled",
-                          ax: Optional[plt.Axes] = None) -> None:
-    if ax is None:
-        fig, ax = plt.subplots()
-    fpr, tpr, threshold = roc_curve(labels, scores)
-    optimal_threshold = threshold[np.argmax(tpr - fpr)]
-    prediction = scores > optimal_threshold
-    tn, fp, fn, tp = confusion_matrix(labels, prediction).ravel()
-    rates = tn / (tn + tp), fp / (tn + fp), fn / (tp + fn), tp / (tp + fn)
-    cf_matrix = confusion_matrix(labels, prediction)
-    group_names = ["True Neg", "False Pos", "False Neg", "True Pos"]
-    group_counts = [f"{value:.0f}" for value in cf_matrix.flatten()]
-    group_percentages = [f"{value:.3f}" for value in rates]
-    annotations = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in zip(group_names, group_counts, group_percentages)]
-    annotations = np.asarray(annotations).reshape(2, 2)
-    sns.heatmap(cf_matrix, annot=annotations, fmt="", cmap=sns.light_palette("navy", reverse=True), ax=ax,
-                cbar=False)
-    ax.set_xlabel("Prediction")
-    ax.set_ylabel("Actual")
-    ax.set_title(f"Confusion matrix - {type_of_cases} cases\nThreshold = {optimal_threshold:.2f}")
-
-
-def plot_stats_scores(selector_name: str, scores_mislabelled: np.ndarray, labels_mislabelled: np.ndarray,
-                      scores_ambiguous: Optional[np.ndarray] = None, labels_ambiguous: Optional[np.ndarray] = None,
-                      save_path: Optional[Path] = None) -> None:
-    if scores_ambiguous is None:
-        fig, ax = plt.subplots(3, 1, figsize=(5, 10))
-    else:
-        fig, ax = plt.subplots(3, 2, figsize=(10, 15))
-
-    fig.suptitle(selector_name)
-
-    ax = ax.ravel(order="F")
-    plot_roc_curve(scores_mislabelled, labels_mislabelled, type_of_cases="mislabelled", ax=ax[0])
-    plot_pr_curve(scores_mislabelled, labels_mislabelled, type_of_cases="mislabelled", ax=ax[1])
-    plot_binary_confusion(scores_mislabelled, labels_mislabelled, type_of_cases="mislabelled", ax=ax[2])
-
-    if scores_ambiguous is not None and labels_ambiguous is not None and np.sum(labels_ambiguous) != 0:
-        plot_roc_curve(scores_ambiguous, labels_ambiguous, type_of_cases="ambiguous", ax=ax[3])
-        plot_pr_curve(scores_ambiguous, labels_ambiguous, type_of_cases="ambiguous", ax=ax[4])
-        plot_binary_confusion(scores_ambiguous, labels_ambiguous, "ambiguous", ax[5])
-        # ambiguous detection given that is mislabelled
-        scores_ambiguous_given_mislabelled = scores_ambiguous[labels_mislabelled == 1]
-        labels_ambiguous_given_mislabelled = labels_ambiguous[labels_mislabelled == 1]
-        if roc_auc_score(labels_ambiguous_given_mislabelled, scores_ambiguous_given_mislabelled) < .5:
-            scores_ambiguous_given_mislabelled *= -1
-
-        plot_roc_curve(scores_ambiguous_given_mislabelled,
-                       labels_ambiguous_given_mislabelled,
-                       type_of_cases="ambiguous",
-                       ax=ax[3],
-                       color="red", legend="AUC given mislabelled=True", linestyle="--")
-        plot_pr_curve(scores_ambiguous_given_mislabelled,
-                      labels_ambiguous_given_mislabelled,
-                      type_of_cases="ambiguous",
-                      ax=ax[4],
-                      color="red", legend="AUC given mislabelled=True", linestyle="--")
-
-    if save_path:
-        plt.savefig(save_path / f"{selector_name}_stats_scoring.png", bbox_inches="tight")
-    plt.show()
-
-
-def plot_relabeling_score(true_majority: np.ndarray,
-                          starting_scores: np.ndarray,
-                          current_majority: np.ndarray,
-                          selector_name: str) -> None:
-    """
-    Plots for ranking of samples score-wise
-    """
-    create_folder(MAIN_SIMULATION_DIR / "scores_histogram")
-    create_folder(MAIN_SIMULATION_DIR / "noise_detection_heatmaps")
-
-    is_noisy = true_majority != current_majority
-    total_noise_rate = np.mean(is_noisy)
-
-    # Compute how many noisy sampled where present in the
-    # highest n_noisy samples.
-    target_perc = int((1 - total_noise_rate) * 100)
-    q = np.percentile(starting_scores, q=target_perc)
-    noisy_cases_detected = is_noisy[starting_scores > q]
-    percentage_noisy_detected = 100 * float(noisy_cases_detected.sum()) / is_noisy.sum()
-
-    # Plot histogram of scores differentiated by noisy or not.
-    df = pd.DataFrame({"scores": starting_scores,
-                       "is_noisy": is_noisy})
-    plt.close()
-    sns.histplot(data=df, x="scores", hue="is_noisy", multiple="dodge", bins=10)
-    plt.title(f"Histogram of relabeling scores {selector_name}\n"
-              f"{percentage_noisy_detected:.1f}% noise cases > {target_perc}th percentile of scores")
-    plt.savefig(MAIN_SIMULATION_DIR / "scores_histogram" / selector_name)
-    plt.close()
-
-    # Plot heatmap showing where the noisy cases are located in the score ranking.
-    idx = np.argsort(starting_scores)
-    sorted_is_noisy = is_noisy[idx]
-    plt.title(f"{selector_name}\n"
-              f"Location of noisy cases by increasing scores (from left to right)\n"
-              f"{percentage_noisy_detected:.1f}% noise cases > {target_perc}th percentile of scores")
-    sns.heatmap(sorted_is_noisy.reshape(1, -1), yticklabels=False, vmax=1.3, cbar=False)
-    plt.savefig(MAIN_SIMULATION_DIR / "noise_detection_heatmaps" / selector_name)
-    plt.close()
--- a/InnerEye-DataQuality/InnerEyeDataQuality/main_simulation.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/main_simulation.py
@ -1,115 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-
-import argparse
-import copy
-import datetime
-import logging
-import multiprocessing
-import numpy as np
-
-from itertools import product
-from typing import Any, Dict, Tuple
-from joblib import Parallel, delayed
-
-from default_paths import MAIN_SIMULATION_DIR
-from InnerEyeDataQuality.evaluation.plot_stats import plot_stats
-from InnerEyeDataQuality.selection.data_curation_utils import get_user_specified_selectors, \
-    update_trainer_for_simulation
-from InnerEyeDataQuality.selection.selectors.base import SampleSelector
-from InnerEyeDataQuality.selection.selectors.label_based import (LabelBasedDecisionRule, LabelDistributionBasedSampler,
-                                                                 PosteriorBasedSelector)
-from InnerEyeDataQuality.selection.selectors.bald import BaldSelector
-from InnerEyeDataQuality.selection.selectors.random_selector import RandomSelector
-from InnerEyeDataQuality.selection.simulation import DataCurationSimulator
-from InnerEyeDataQuality.selection.simulation_statistics import SimulationStats, SimulationStatsDistribution
-from InnerEyeDataQuality.utils.dataset_utils import load_dataset_and_initial_labels_for_simulation
-from InnerEyeDataQuality.utils.generic import create_folder, get_data_selection_parser, get_logger, set_seed
-
-
-EXP_OUTPUT_DIR = MAIN_SIMULATION_DIR / datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
-
-
-def main(args: argparse.Namespace) -> None:
-    dataset, initial_labels = load_dataset_and_initial_labels_for_simulation(args.config[0], args.on_val_set)
-    n_classes = dataset.num_classes
-    n_samples = dataset.num_samples
-    true_distribution = dataset.label_distribution
-
-    user_specified_selectors = get_user_specified_selectors(list_configs=args.config,
-                                                            dataset=dataset,
-                                                            output_path=MAIN_SIMULATION_DIR,
-                                                            plot_embeddings=args.plot_embeddings)
-
-    # Data selection simulations for annotation
-    default_selector = {
-        'Random': RandomSelector(n_samples, n_classes, name='Random'),
-        'Oracle': PosteriorBasedSelector(true_distribution.distribution, n_samples,
-                                         num_classes=n_classes,
-                                         name='Oracle',
-                                         allow_repeat_samples=True,
-                                         decision_rule=LabelBasedDecisionRule.INV)}
-    sample_selectors = {**user_specified_selectors, **default_selector}
-
-    # Benchmark 2
-    # Determine the number of simulation iterations based on the noise rate
-    expected_noise_rate = np.mean(np.argmax(true_distribution.distribution, -1) != dataset.targets[:n_samples])
-    relabel_budget = int(min(n_samples * expected_noise_rate, n_samples) * 0.35)
-    if dataset.name == "NoisyChestXray":
-        relabel_budget = min(int(n_samples * expected_noise_rate * 2.5), n_samples)
-    else:
-        relabel_budget = min(int(n_samples * expected_noise_rate * 3.0), n_samples)
-
-    logging.info(f"Expected noise rate {expected_noise_rate} - Allocated relabelling budget {relabel_budget}")
-
-    # Setup the simulation function.
-    def _run_simulation_for_selector(name: str,
-                                     seed: int,
-                                     sample_selector: SampleSelector) -> Tuple[str, SimulationStats]:
-        if isinstance(sample_selector, (LabelDistributionBasedSampler, BaldSelector)):
-            update_trainer_for_simulation(sample_selector, seed=seed)
-        simulator = DataCurationSimulator(initial_labels=copy.deepcopy(initial_labels),
-                                          label_distribution=copy.deepcopy(true_distribution),
-                                          relabel_budget=relabel_budget,
-                                          name=name,
-                                          seed=seed,
-                                          sample_selector=copy.deepcopy(sample_selector))
-        simulator.run_simulation()
-        if sample_selector.output_directory is not None:
-            simulator.save_simulator_results(MAIN_SIMULATION_DIR / sample_selector.output_directory / f"seed_{seed}")
-        return name, simulator.global_stats
-
-    # Run the simulation over multiple seeds and selectors
-    simulation_iter = product(sample_selectors.items(), args.seeds)
-    if args.debug:
-        parallel_output = [_run_simulation_for_selector(_name, _seed, _sel) for (_name, _sel), _seed in simulation_iter]
-    else:
-        num_jobs = min(len(sample_selectors) * len(args.seeds), multiprocessing.cpu_count())
-        parallel = Parallel(n_jobs=num_jobs)
-        parallel_output = parallel(delayed(_run_simulation_for_selector)(_name, _seed, _selector)
-                                   for (_name, _selector), _seed in simulation_iter)
-
-    # Aggregate parallel output arrays
-    global_stats: Dict[str, Any] = {name: list() for name in set([name for name, _ in parallel_output])}
-    [global_stats[name].append(stats) for name, stats in parallel_output]
-
-    # Analyse simulation stats
-    stats_dist = {name: SimulationStatsDistribution(stats) for name, stats in global_stats.items()}
-    plot_filename_suffix = "val" if args.on_val_set else "train"
-    plot_stats(stats_dist,
-               dataset_name=dataset.name,
-               n_samples=n_samples,
-               save_path=EXP_OUTPUT_DIR,
-               filename_suffix=plot_filename_suffix)
-
-
-if __name__ == '__main__':
-    create_folder(EXP_OUTPUT_DIR)
-    get_logger(EXP_OUTPUT_DIR / 'dataread.log')
-    parser = get_data_selection_parser()
-    args, unknown_args = parser.parse_known_args()
-    set_seed(seed=12345)
-    main(args)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/model_selection_benchmark.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/model_selection_benchmark.py
@ -1,146 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-import argparse
-import logging
-from pathlib import Path
-from typing import List, Union
-
-import numpy as np
-import pandas as pd
-
-from default_paths import MAIN_SIMULATION_DIR, MODEL_SELECTION_BENCHMARK_DIR, FIGURE_DIR
-from InnerEyeDataQuality.datasets.cifar10_utils import get_cifar10_label_names
-from InnerEyeDataQuality.deep_learning.model_inference import inference_ensemble
-from InnerEyeDataQuality.deep_learning.utils import create_logger, load_model_config, load_selector_config
-from InnerEyeDataQuality.selection.selectors.label_based import cross_entropy
-from InnerEyeDataQuality.selection.simulation import DataCurationSimulator
-from InnerEyeDataQuality.selection.simulation_statistics import get_ambiguous_sample_ids
-from InnerEyeDataQuality.utils.dataset_utils import get_datasets
-from InnerEyeDataQuality.utils.generic import create_folder
-from InnerEyeDataQuality.utils.plot import plot_confusion_matrix
-
-
-def get_rank(array: Union[np.ndarray, List]) -> int:
-    """
-    Returns the ranking of an array where the highest value has a rank of 1 and
-    the lowest value has the highest rank.
-    """
-    return len(array) - np.argsort(array).argsort()
-
-
-def main(args: argparse.Namespace) -> None:
-
-    # Parameters
-    number_of_runs = 1
-    evaluate_on_ambiguous_samples = False
-
-    # Create the evaluation dataset - Make sure that it's the same dataset for all configs
-    assert isinstance(args.config, list)
-    _, dataset = get_datasets(load_model_config(args.config[0]),
-                              use_augmentation=False,
-                              use_noisy_labels_for_validation=True,
-                              use_fixed_labels=False if number_of_runs > 1 else True)
-
-    # Choose a subset of the dataset
-    if evaluate_on_ambiguous_samples:
-        ind = get_ambiguous_sample_ids(dataset.label_counts, threshold=0.10)  # type: ignore
-    else:
-        ind = range(len(dataset))  # type: ignore
-
-    # If specified load curated dataset labels
-    curated_target_labels = dict()
-    for cfg_path in args.curated_label_config if args.curated_label_config else list():
-        cfg = load_selector_config(cfg_path)
-        search_dir = MAIN_SIMULATION_DIR / cfg.selector.output_directory
-        targets_list = list()
-        for _f in search_dir.glob('**/*.hdf'):
-            _label_counts = DataCurationSimulator.load_simulator_results(_f)
-            _targets = np.argmax(_label_counts, axis=1)
-            targets_list.append(_targets)
-        curated_target_labels[str(Path(cfg_path).stem)] = targets_list
-
-    # Define class labels for noisy, clean and curated datasets
-    df_rows_list = []
-    metric_names = ["accuracy", "top_n_accuracy", "cross_entropy", "accuracy_per_class"]
-
-    # Run the same experiment multiple time
-    for _run_id in range(number_of_runs):
-        target_labels = {"clean": dataset.clean_targets[ind],  # type: ignore
-                         "noisy": np.array([dataset.__getitem__(_i)[2] for _i in ind]),
-                         **{_n: _l[_run_id] for _n, _l in curated_target_labels.items()}}
-
-        # Loops over different models
-        for config_id, config in enumerate([load_model_config(cfg) for cfg in args.config]):
-            posteriors = inference_ensemble(dataset, config)[1][ind]
-
-            # Collect metrics
-            for _label_name, _label in target_labels.items():
-                df_row = {"model": Path(args.config[config_id]).stem, "run_id": _run_id,
-                          "dataset": _label_name, "count": _label.size}
-                for _metric_name in metric_names:
-                    _val = benchmark_metrics(posteriors, observed_labels=_label, metric_name=_metric_name,
-                                             true_labels=target_labels["clean"])
-                    df_row.update({_metric_name: _val})  # type: ignore
-                df_rows_list.append(df_row)
-
-    df = pd.DataFrame(df_rows_list)
-    df = df.sort_values(by=["dataset", "model"], axis=0)
-    logging.info(f"\n{df.to_string()}")
-
-    # Aggregate multiple runs
-    group_cols = ['model', 'dataset']
-    df_grouped = df.groupby(group_cols, as_index=False)['accuracy', 'count', 'cross_entropy'].agg([np.mean, np.std])
-    logging.info(f"\n{df_grouped.to_string()}")
-
-    # Plot the observed confusion matrix
-    plot_confusion_matrix(target_labels["clean"], target_labels["noisy"],
-                          get_cifar10_label_names(), save_path=FIGURE_DIR)
-
-
-def benchmark_metrics(posteriors: np.ndarray,
-                      observed_labels: np.ndarray,
-                      metric_name: str,
-                      true_labels: np.ndarray) -> Union[float, List[float]]:
-    """
-    Defines metrics to be used in model comparison.
-    """
-    predictions = np.argmax(posteriors, axis=1)
-
-    # Accuracy averaged across all classes
-    if metric_name == "accuracy":
-        return np.mean(predictions == observed_labels) * 100.0
-    # Cross-entropy loss across all samples
-    elif metric_name == "top_n_accuracy":
-        N = 2
-        sorted_class_predictions = np.argsort(posteriors, axis=1)[:, ::-1]
-        correct = int(0)
-        for _i in range(observed_labels.size):
-            correct += np.any(sorted_class_predictions[_i, :N] == observed_labels[_i])
-        return correct * 100.0 / observed_labels.size
-    elif metric_name == "cross_entropy":
-        return np.mean(cross_entropy(posteriors, np.eye(10)[observed_labels]))
-    # Average accuracy per class - samples are groupped based on their true class label
-    elif metric_name == "accuracy_per_class":
-        vals = list()
-        for _class in np.unique(true_labels, return_counts=False):
-            mask = true_labels == _class
-            val = np.mean(predictions[mask] == observed_labels[mask]) * 100.0
-            vals.append(np.around(val, decimals=3))
-        return vals
-    else:
-        raise ValueError("Unknown metric")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Execute benchmark 3')
-    parser.add_argument('--config', dest='config', type=str, required=True, nargs='+',
-                        help='Path to config file(s) characterising trained CNN model/s')
-    parser.add_argument('--curated-label-config', dest='curated_label_config', type=str, required=False, nargs='+',
-                        help='Path to config file(s) corresponding to curated labels in adjudication simulation')
-    args, unknown_args = parser.parse_known_args()
-    create_folder(MODEL_SELECTION_BENCHMARK_DIR)
-    create_logger(MODEL_SELECTION_BENCHMARK_DIR)
-    main(args)
--- a/InnerEye-DataQuality/InnerEyeDataQuality/selection/init.py
+++ b/InnerEye-DataQuality/InnerEyeDataQuality/selection/init.py
@ -1,5 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
--- a/Показать больше
+++ b/Показать больше