Enable Bring-your-own-Lightning-model (#417)

- Enable brining arbitrary PyTorch-Lightning models to the InnerEye toolbox - Upgrade mypy and simplify the way we invoke it
2021-04-19 16:28:41 +01:00 · 2021-04-19 16:28:41 +01:00 · 0d479ba3d8
--- a/.flake8
+++ b/.flake8
@ -2,3 +2,4 @@
 ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401
 max-line-length = 160
 max-complexity = 25
+exclude = fastMRI/
--- a/.github/workflows/linting_and_hello_world.yml
+++ b/.github/workflows/linting_and_hello_world.yml
@ -45,6 +45,13 @@ jobs:
          PYTHONPATH: ${{ github.workspace }}
        if: always()

+      - name: Run HelloContainer model
+        run: |
+          $CONDA/envs/InnerEye/bin/python ./InnerEye/ML/runner.py --model=HelloContainer
+        env:
+          PYTHONPATH: ${{ github.workspace }}
+        if: always()
+
  windows:
    runs-on: windows-latest
    steps:
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "fastMRI"]
+	path = fastMRI
+	url = https://github.com/facebookresearch/fastMRI
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -13,6 +13,9 @@ created.

 ### Added

+- ([#417](https://github.com/microsoft/InnerEye-DeepLearning/pull/417)) Added a generic way of adding PyTorch Lightning
+models to the toolbox. It is now possible to train almost any Lightning model with the InnerEye toolbox in AzureML,
+with only minimum code changes required. See [the MD documentation](docs/bring_your_own_model.md) for details.
 - ([#430](https://github.com/microsoft/InnerEye-DeepLearning/pull/430)) Update conversion to 1.0.1 InnerEye-DICOM-RT to
  add:  manufacturer, SoftwareVersions, Interpreter and ROIInterpretedTypes.
 - ([#385](https://github.com/microsoft/InnerEye-DeepLearning/pull/385)) Add the ability to train a model on multiple
@ -70,6 +73,7 @@ created.
 - ([#437](https://github.com/microsoft/InnerEye-DeepLearning/pull/437)) Fixed multi-node DDP bug in PL v1.2.8. Re-add
  end-to-end test for multi-node.
 ### Removed
+- ([#417](https://github.com/microsoft/InnerEye-DeepLearning/pull/417)) Removed an output file that only contains metadata for a legacy consumer

 ### Deprecated

--- a/InnerEye/Azure/azure_config.py
+++ b/InnerEye/Azure/azure_config.py
@ -275,19 +275,6 @@ class SourceConfig:
        self.script_params = retained_args


-@dataclass
-class ExperimentResultLocation:
-    """
-    Information that is need to recover where the results of an experiment reside.
-    """
-    results_container_name: Optional[str] = None
-    results_uri: Optional[str] = None
-    dataset_folder: Optional[str] = None
-    dataset_uri: Optional[str] = None
-    azure_job_name: Optional[str] = None
-    commandline_overrides: Optional[str] = None
-
-
@dataclass
 class ParserResult:
    """
--- a/InnerEye/Azure/azure_runner.py
+++ b/InnerEye/Azure/azure_runner.py
@ -19,17 +19,18 @@ from azureml.core.datastore import Datastore
 from azureml.core.runconfig import MpiConfiguration, RunConfiguration
 from azureml.core.workspace import WORKSPACE_DEFAULT_BLOB_STORE_NAME
 from azureml.data import FileDataset
+from azureml.data.dataset_consumption_config import DatasetConsumptionConfig

 from InnerEye.Azure import azure_util
 from InnerEye.Azure.azure_config import AzureConfig, ParserResult, SourceConfig
 from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, RUN_RECOVERY_FROM_ID_KEY_NAME, \
    RUN_RECOVERY_ID_KEY_NAME, \
-    merge_conda_dependencies
+    is_offline_run_context, merge_conda_dependencies
 from InnerEye.Azure.secrets_handling import read_all_settings
 from InnerEye.Azure.tensorboard_monitor import AMLTensorBoardMonitorConfig, monitor
 from InnerEye.Common.generic_parsing import GenericConfig
 from InnerEye.ML.common import ModelExecutionMode
-from InnerEye.ML.utils.config_util import ModelConfigLoader
+from InnerEye.ML.utils.config_loader import ModelConfigLoader

 SLEEP_TIME_SECONDS = 30
 INPUT_DATA_KEY = "input_data"
@ -42,15 +43,12 @@ ENVIRONMENT_VERSION = "1"

 def submit_to_azureml(azure_config: AzureConfig,
                      source_config: SourceConfig,
-                      model_config_overrides: str,
                      azure_dataset_id: str) -> Run:
    """
    The main entry point. It creates an AzureML workspace if needed, submits an experiment using the code
    as specified in source_config, and waits for completion if needed.
    :param azure_config: azure related configurations to setup valid workspace
    :param source_config: The information about which code should be submitted, and which arguments should be used.
-    :param model_config_overrides: A string that describes which model parameters were overwritten by commandline
-     arguments in the present run. This is only used for diagnostic purposes (it is set as a Tag on the run).
    :param azure_dataset_id: The name of the dataset on blob storage to be used for this run.
    """
    azure_run: Optional[Run] = None
@ -68,8 +66,7 @@ def submit_to_azureml(azure_config: AzureConfig,
    for s in [signal.SIGINT, signal.SIGTERM]:
        signal.signal(s, interrupt_handler)
    # create train/test experiment
-    azure_run = create_and_submit_experiment(azure_config, source_config, model_config_overrides,
-                                             azure_dataset_id)
+    azure_run = create_and_submit_experiment(azure_config, source_config, azure_dataset_id)

    if azure_config.wait_for_completion:
        # We want the job output to be visible on the console, but the program should not exit if the
@ -79,13 +76,12 @@ def submit_to_azureml(azure_config: AzureConfig,
    return azure_run


-def set_run_tags(run: Run, azure_config: AzureConfig, model_config_overrides: str) -> None:
+def set_run_tags(run: Run, azure_config: AzureConfig, commandline_args: str) -> None:
    """
    Set metadata for the run
    :param run: Run to set metadata for.
    :param azure_config: The configurations for the present AzureML job
-    :param model_config_overrides: A string that describes which model parameters were overwritten by commandline
-     arguments in the present run.
+    :param commandline_args: A string that holds all commandline arguments that were used for the present run.
    """
    git_information = azure_config.get_git_information()
    run.set_tags({
@ -103,7 +99,7 @@ def set_run_tags(run: Run, azure_config: AzureConfig, model_config_overrides: st
        "source_message": git_information.commit_message,
        "source_author": git_information.commit_author,
        "source_dirty": str(git_information.is_dirty),
-        "overrides": model_config_overrides,
+        "commandline_args": commandline_args,
        CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: -1,
    })

@ -125,14 +121,11 @@ def create_experiment_name(azure_config: AzureConfig) -> str:
 def create_and_submit_experiment(
        azure_config: AzureConfig,
        source_config: SourceConfig,
-        model_config_overrides: str,
        azure_dataset_id: str) -> Run:
    """
    Creates an AzureML experiment in the workspace and submits it for execution.
    :param azure_config: azure related configurations to setup valid workspace
    :param source_config: The information about which code should be submitted, and which arguments should be used.
-    :param model_config_overrides: A string that describes which model parameters were overwritten by commandline
-     arguments in the present run. This is only used for diagnostic purposes (it is set as a Tag on the run).
    :param azure_dataset_id: The name of the dataset in blob storage to be used for this run.
    :returns: Run object for the submitted AzureML run
    """
@ -144,8 +137,12 @@ def create_and_submit_experiment(
    # submit a training/testing run associated with the experiment
    run: Run = exp.submit(script_run_config)

-    # set metadata for the run
-    set_run_tags(run, azure_config, model_config_overrides)
+    if is_offline_run_context(run):
+        # This codepath will only be executed in unit tests, when exp.submit is mocked.
+        return run
+
+    # Set metadata for the run.
+    set_run_tags(run, azure_config, commandline_args=(" ".join(source_config.script_params)))

    print("\n==============================================================================")
    print(f"Successfully queued new run {run.id} in experiment: {exp.name}")
@ -276,6 +273,21 @@ def get_or_create_python_environment(azure_config: AzureConfig,
    return env


+def get_dataset_consumption(azure_config: AzureConfig, azure_dataset_id: str) -> DatasetConsumptionConfig:
+    """
+    Creates a configuration for using an AzureML dataset inside of an AzureML run. This will make the AzureML
+    dataset with given name available as a named input, using INPUT_DATA_KEY as the key.
+    :param azure_config: azure related configurations to use for model scale-out behaviour
+    :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty
+    string to not use any datasets.
+    """
+    azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id)
+    if not azureml_dataset:
+        raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.")
+    named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY)
+    return named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download()
+
+
 def create_run_config(azure_config: AzureConfig,
                      source_config: SourceConfig,
                      azure_dataset_id: str = "",
@ -292,11 +304,7 @@ def create_run_config(azure_config: AzureConfig,
    :return: The configured script run.
    """
    if azure_dataset_id:
-        azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id)
-        if not azureml_dataset:
-            raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.")
-        named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY)
-        dataset_consumption = named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download()
+        dataset_consumption = get_dataset_consumption(azure_config, azure_dataset_id)
    else:
        dataset_consumption = None
    # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
@ -354,8 +362,7 @@ def create_runner_parser(model_config_class: type = None) -> argparse.ArgumentPa
 def parse_args_and_add_yaml_variables(parser: ArgumentParser,
                                      yaml_config_file: Optional[Path] = None,
                                      project_root: Optional[Path] = None,
-                                      fail_on_unknown_args: bool = False,
-                                      args: List[str] = None) -> ParserResult:
+                                      fail_on_unknown_args: bool = False) -> ParserResult:
    """
    Reads arguments from sys.argv, modifies them with secrets from local YAML files,
    and parses them using the given argument parser.
@ -364,14 +371,12 @@ def parse_args_and_add_yaml_variables(parser: ArgumentParser,
    :param yaml_config_file: The path to the YAML file that contains values to supply into sys.argv.
    :param fail_on_unknown_args: If True, raise an exception if the parser encounters an argument that it does not
    recognize. If False, unrecognized arguments will be ignored, and added to the "unknown" field of the parser result.
-    :param args: arguments to parse
    :return: The parsed arguments, and overrides
    """
    settings_from_yaml = read_all_settings(yaml_config_file, project_root=project_root)
    return parse_arguments(parser,
                           settings_from_yaml=settings_from_yaml,
-                           fail_on_unknown_args=fail_on_unknown_args,
-                           args=args)
+                           fail_on_unknown_args=fail_on_unknown_args)


 def _create_default_namespace(parser: ArgumentParser) -> Namespace:
@ -471,7 +476,7 @@ def run_duration_string_to_seconds(s: str) -> Optional[int]:
    elif suffix == "d":
        multiplier = 24 * 60 * 60
    else:
-        raise ArgumentError("s", f"Invalid suffix: Must be one of 's', 'm', 'h', 'd', but got: {s}")
+        raise ArgumentError("s", f"Invalid suffix: Must be one of 's', 'm', 'h', 'd', but got: {s}")  # type: ignore
    return int(float(s[:-1]) * multiplier)


--- a/InnerEye/Azure/azure_util.py
+++ b/InnerEye/Azure/azure_util.py
@ -45,15 +45,6 @@ INNEREYE_SDK_NAME = "innereye"
 INNEREYE_SDK_VERSION = "1.0"


-def get_results_blob_path(run_id: str) -> str:
-    """
-    Creates the name of the top level folder that contains the results for a given AzureML run.
-    :param run_id: The AzureML run ID for which the folder should be created.
-    :return: A full Azure blob storage path, starting with the container name.
-    """
-    return AZUREML_RUN_FOLDER + run_id
-
-
 def create_run_recovery_id(run: Run) -> str:
    """
   Creates an recovery id for a run so it's checkpoints could be recovered for training/testing
@ -293,6 +284,21 @@ def merge_conda_files(files: List[Path], result_file: Path) -> None:
        ruamel.yaml.dump(unified_definition, f, indent=2, default_flow_style=False)


+def get_all_environment_files(project_root: Path) -> List[Path]:
+    """
+    Returns a list of all Conda environment files that should be used. This is firstly the InnerEye conda file,
+    and possibly a second environment.yml file that lives at the project root folder.
+    :param project_root: The root folder of the code that starts the present training run.
+    :return: A list with 1 or 2 entries that are conda environment files.
+    """
+    innereye_yaml = fixed_paths.get_environment_yaml_file()
+    project_yaml = project_root / fixed_paths.ENVIRONMENT_YAML_FILE_NAME
+    files = [innereye_yaml]
+    if innereye_yaml != project_yaml:
+        files.append(project_yaml)
+    return files
+
+
 def merge_conda_dependencies(files: List[Path]) -> Tuple[CondaDependencies, str]:
    """
    Creates a CondaDependencies object from the Conda environments specified in one or more files.
--- a/InnerEye/Common/build_config.py
+++ b/InnerEye/Common/build_config.py
@ -1,52 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-import json
-from pathlib import Path
-from typing import Optional
-
-from InnerEye.Azure.azure_config import AzureConfig, ExperimentResultLocation
-
-BUILDINFORMATION_JSON = "buildinformation.json"
-
-
-def build_information_to_dot_net_json(azure_config: AzureConfig, result_location: ExperimentResultLocation) -> str:
-    """
-    Converts the build metadata to a JSON string.
-    :param azure_config: Azure configuration file with build information.
-    :param result_location: ExperimentResultLocation object with result locations.
-    """
-    git_information = azure_config.get_git_information()
-    return json.dumps({
-        "BuildNumber": azure_config.build_number,
-        "BuildRequestedFor": azure_config.build_user,
-        "BuildSourceBranchName": git_information.branch,
-        "BuildSourceVersion": git_information.commit_id,
-        "BuildSourceAuthor": git_information.commit_author,
-        "ModelName": azure_config.model,
-        "ResultsContainerName": result_location.results_container_name,
-        "ResultsUri": result_location.results_uri,
-        "DatasetFolder": result_location.dataset_folder,
-        "DatasetFolderUri": result_location.dataset_uri,
-        "AzureBatchJobName": result_location.azure_job_name})
-
-
-def build_information_to_dot_net_json_file(azure_config: AzureConfig,
-                                           result_location: ExperimentResultLocation,
-                                           folder: Optional[Path] = None) -> None:
-    """
-    Writes the build metadata to a file called buildinformation.json in the given folder.
-    :param azure_config: Azure configuration file
-    :param result_location: ExperimentResultLocation object with result locations.
-    :param folder: Results are written to this folder, if not None. Else, results are written in the root folder.
-    """
-    filename = Path(BUILDINFORMATION_JSON)
-
-    if folder is not None:
-        if not folder.exists():
-            folder.mkdir(parents=True)
-
-    full_file = filename if folder is None else folder / filename
-    with full_file.open("w") as f:
-        f.write(build_information_to_dot_net_json(azure_config, result_location))
--- a/InnerEye/Common/common_util.py
+++ b/InnerEye/Common/common_util.py
@ -389,3 +389,29 @@ def remove_file_or_directory(pth: Path) -> None:
        pth.rmdir()
    elif pth.exists():
        pth.unlink()
+
+
+def add_folder_to_sys_path_if_needed(folder_under_repo_root: str) -> None:
+    """
+    Checks if the Python paths in sys.path already contain the given folder, which is expected to be relative
+    to the repository root. If that folder is not yet in sys.path, add it.
+    """
+    full_folder = repository_root_directory() / folder_under_repo_root
+    for path_str in sys.path:
+        path = Path(path_str)
+        if path == full_folder:
+            return
+    print(f"Adding {full_folder} to sys.path")
+    sys.path.append(str(full_folder))
+
+
+@contextmanager
+def change_working_directory(path_or_str: PathOrString) -> Generator:
+    """
+    Context manager for changing the current working directory
+    """
+    new_path = Path(path_or_str).expanduser()
+    old_path = Path.cwd()
+    os.chdir(new_path)
+    yield
+    os.chdir(old_path)
--- a/InnerEye/Common/fixed_paths.py
+++ b/InnerEye/Common/fixed_paths.py
@ -34,6 +34,8 @@ DEFAULT_RESULT_ZIP_DICOM_NAME = "segmentation.dcm.zip"
 DEFAULT_AML_LOGS_DIR = "azureml-logs"

 DEFAULT_LOGS_DIR_NAME = "logs"
+LOG_FILE_NAME = "stdout.txt"
+
 DEFAULT_MODEL_SUMMARIES_DIR_PATH = Path(DEFAULT_LOGS_DIR_NAME) / "model_summaries"
 # The folder at the project root directory that holds datasets for local execution.
 DATASETS_DIR_NAME = "datasets"
--- a/InnerEye/Common/fixed_paths_for_tests.py
+++ b/InnerEye/Common/fixed_paths_for_tests.py
@ -32,17 +32,6 @@ def full_ml_test_data_path(path: str = "") -> Path:
    return _full_test_data_path("ML", path)


-def full_azure_test_data_path(path: str = "") -> Path:
-    """
-    Takes a relative path inside of the Azure/tests/test_data folder, and returns its
-    full absolute path.
-
-    :param path: A path relative to the Tests/Azure/test_data
-    :return: The full absolute path of the argument.
-    """
-    return _full_test_data_path("Azure", path)
-
-
 def _full_test_data_path(prefix: str, suffix: str) -> Path:
    root = tests_root_directory()
    return root / prefix / "test_data" / suffix
--- a/InnerEye/Common/generic_parsing.py
+++ b/InnerEye/Common/generic_parsing.py
@ -279,3 +279,21 @@ class GenericConfig(param.Parameterized):
                    reason = f"parameter is {reason}"
            # We could raise an error here instead - to be discussed.
            logging.warning(f"Override {key}={desired} failed: {reason} in class {self.__class__.name}")
+
+
+def create_from_matching_params(from_object: param.Parameterized, cls_: Type[T]) -> T:
+    """
+    Creates an object of the given target class, and then copies all attributes from the `from_object` to
+    the newly created object, if there is a matching attribute. The target class must be a subclass of
+    param.Parameterized.
+    :param from_object: The object to read attributes from.
+    :param cls_: The name of the class for the newly created object.
+    :return: An instance of cls_
+    """
+    c = cls_()
+    if not isinstance(c, param.Parameterized):
+        raise ValueError(f"The created object must be a subclass of param.Parameterized, but got {type(c)}")
+    for param_name, p in c.params().items():
+        if not p.constant and not p.readonly:
+            setattr(c, param_name, getattr(from_object, param_name))
+    return c
--- a/InnerEye/ML/config.py
+++ b/InnerEye/ML/config.py
@ -9,16 +9,17 @@ from dataclasses import dataclass
 from enum import Enum, unique
 from math import isclose
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union

 import numpy as np
 import pandas as pd
 import param
-from azureml.core import ScriptRunConfig
+from azureml.core import Model, ScriptRunConfig
 from azureml.train.hyperdrive import HyperDriveConfig
 from pandas import DataFrame

-from InnerEye.Common.common_util import any_pairwise_larger, any_smaller_or_equal_than, check_is_any_of
+from InnerEye.Azure.azure_config import AzureConfig
+from InnerEye.Common.common_util import ModelProcessing, any_pairwise_larger, any_smaller_or_equal_than, check_is_any_of
 from InnerEye.Common.generic_parsing import IntTuple
 from InnerEye.Common.type_annotations import TupleFloat2, TupleFloat3, TupleInt3, TupleStringOptionalFloat
 from InnerEye.ML.common import ModelExecutionMode
@ -264,7 +265,9 @@ class SegmentationModelBase(ModelConfigBase):

    #: The number of image levels used in Unet (in encoding and decoding paths).
    num_downsampling_paths: int = param.Integer(4, bounds=(1, None),
-        instantiate=False, doc="The number of levels used in a UNet architecture in encoding and decoding paths.")
+                                                instantiate=False,
+                                                doc="The number of levels used in a UNet architecture in encoding and "
+                                                    "decoding paths.")

    #: The size of the random crops that will be drawn from the input images during training. This is also the
    #: input size of the model.
@ -666,7 +669,7 @@ class SegmentationModelBase(ModelConfigBase):
        """
        Loads a dataset from the dataset_csv file, and stores it in the present object.
        """
-        assert self.local_dataset is not None  # for mypy
+        assert self.local_dataset is not None, "The dataset must be provided in self.local_dataset"
        self.dataset_data_frame = pd.read_csv(self.local_dataset / self.dataset_csv,
                                              dtype=str,
                                              converters=self.col_type_converters,
@ -793,3 +796,7 @@ class SegmentationModelBase(ModelConfigBase):
        By default no transformation is performed.
        """
        return ModelTransformsPerExecutionMode()
+
+
+PostCrossValidationHookSignature = Callable[[ModelConfigBase, Path], None]
+ModelDeploymentHookSignature = Callable[[SegmentationModelBase, AzureConfig, Model, ModelProcessing], Any]
--- a/InnerEye/ML/configs/other/HelloContainer.py
+++ b/InnerEye/ML/configs/other/HelloContainer.py
@ -0,0 +1,140 @@
+#  ------------------------------------------------------------------------------------------
+#  Copyright (c) Microsoft Corporation. All rights reserved.
+#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+#  ------------------------------------------------------------------------------------------
+from pathlib import Path
+from typing import Any, Dict, List, Tuple
+
+import numpy as np
+import torch
+from pytorch_lightning import LightningDataModule, LightningModule
+from torch.optim import Adam, Optimizer
+from torch.optim.lr_scheduler import StepLR, _LRScheduler
+from torch.utils.data import DataLoader, Dataset
+
+from InnerEye.Common import fixed_paths_for_tests
+from InnerEye.ML.lightning_container import LightningContainer
+
+
+class HelloDataset(Dataset):
+    """
+    A simple 1dim regression task, read from a data file stored in the test data folder.
+    """
+    # Creating the data file:
+    # import numpy as np
+    # import torch
+    #
+    # N = 100
+    # x = torch.rand((N, 1)) * 10
+    # y = 0.2 * x + 0.1 * torch.randn(x.size())
+    # xy = torch.cat((x, y), dim=1)
+    # np.savetxt("Tests/ML/test_data/hellocontainer.csv", xy.numpy(), delimiter=",")
+    def __init__(self, root_folder: Path, start_index: int, end_index: int) -> None:
+        """
+        Creates the 1-dim regression dataset.
+        :param root_folder: The folder in which the data file lives ("hellocontainer.csv")
+        :param start_index: The first row to read.
+        :param end_index: The last row to read (exclusive)
+        """
+        super().__init__()
+        raw_data = np.loadtxt(root_folder / "hellocontainer.csv", delimiter=",")[start_index:end_index]
+        self.data = torch.tensor(raw_data, dtype=torch.float)
+
+    def __len__(self) -> int:
+        return self.data.shape[0]
+
+    def __getitem__(self, item: int) -> Dict[str, torch.Tensor]:
+        return {'x': self.data[item][0:1], 'y': self.data[item][1:2]}
+
+
+class HelloDataModule(LightningDataModule):
+    """
+    A data module that gives the training, validation and test data for a simple 1-dim regression task.
+    """
+    def __init__(self, root_folder: Path) -> None:
+        super().__init__()
+        self.train = HelloDataset(root_folder, start_index=0, end_index=50)
+        self.val = HelloDataset(root_folder, start_index=50, end_index=70)
+        self.test = HelloDataset(root_folder, start_index=70, end_index=100)
+
+    def train_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
+        return DataLoader(self.train, batch_size=5)
+
+    def val_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
+        return DataLoader(self.val, batch_size=5)
+
+    def test_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
+        return DataLoader(self.test, batch_size=5)
+
+
+class HelloRegression(LightningModule):
+    """
+    A simple 1-dim regression model.
+    """
+    def __init__(self) -> None:
+        super().__init__()
+        self.model = torch.nn.Linear(in_features=1, out_features=1, bias=True)
+        self.test_mse: List[torch.Tensor] = []
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore
+        return self.model(x)
+
+    def training_step(self, batch: Dict[str, torch.Tensor], *args: Any, **kwargs: Any) -> torch.Tensor:  # type: ignore
+        input = batch["x"]
+        target = batch["y"]
+        prediction = self.forward(input)
+        loss = torch.nn.functional.mse_loss(prediction, target)
+        self.log("loss", loss, on_epoch=True, on_step=False)
+        return loss
+
+    def configure_optimizers(self) -> Tuple[List[Optimizer], List[_LRScheduler]]:
+        optimizer = Adam(self.parameters(), lr=1e-1)
+        scheduler = StepLR(optimizer, step_size=20, gamma=0.5)
+        return [optimizer], [scheduler]
+
+    def on_test_epoch_start(self) -> None:
+        self.test_mse = []
+
+    def test_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> torch.Tensor:  # type: ignore
+        input = batch["x"]
+        target = batch["y"]
+        prediction = self.forward(input)
+        loss = torch.nn.functional.mse_loss(prediction, target)
+        self.test_mse.append(loss)
+        return loss
+
+    def on_test_epoch_end(self) -> None:
+        average_mse = torch.mean(torch.stack(self.test_mse))
+        Path("test_mse.txt").write_text(str(average_mse.item()))
+
+
+class HelloContainer(LightningContainer):
+    """
+    An example for using the InnerEye functionality to "bring your own lightning model". This container has methods
+    to generate the actual Lightning model, and read out the datamodule that will be used for training.
+    The number of training epochs is controlled at container level.
+    You can train this model by running `python InnerEye/ML/runner.py --model=HelloContainer` on the local box,
+    or via `python InnerEye/ML/runner.py --model=HelloContainer --azureml=True` in AzureML
+    """
+    def __init__(self) -> None:
+        super().__init__()
+        self.local_dataset = fixed_paths_for_tests.full_ml_test_data_path()
+        self.num_epochs = 20
+
+    # This method must be overridden by any subclass of LightningContainer
+    def create_model(self) -> LightningModule:
+        return HelloRegression()
+
+    # This method must be overridden by any subclass of LightningContainer
+    def get_data_module(self) -> LightningDataModule:
+        assert self.local_dataset is not None
+        return HelloDataModule(root_folder=self.local_dataset)  # type: ignore
+
+    # This is an optional override: This report creation method can read out any files that were written during
+    # training, and cook them into a nice looking report. Here, the report is a simple text file.
+    def create_report(self) -> None:
+        # This just prints out the test MSE, but you could also generate a Jupyter notebook here, for example.
+        test_mse = float(Path("test_mse.txt").read_text())
+        report = f"Performance on test set: MSE = {test_mse}"
+        print(report)
+        Path("report.txt").write_text(report)
--- a/InnerEye/ML/configs/other/fastmri_varnet.py
+++ b/InnerEye/ML/configs/other/fastmri_varnet.py
@ -0,0 +1,68 @@
+#  ------------------------------------------------------------------------------------------
+#  Copyright (c) Microsoft Corporation. All rights reserved.
+#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+#  ------------------------------------------------------------------------------------------
+
+# Suppress all errors here because the imports after code cause loads of warnings. We can't specifically suppress
+# individual warnings only.
+# flake8: noqa
+from typing import Optional
+
+import param
+import torch
+from pytorch_lightning import LightningDataModule, LightningModule
+from torch.utils.tensorboard import SummaryWriter
+
+from InnerEye.Common.common_util import add_folder_to_sys_path_if_needed
+from InnerEye.ML.lightning_container import LightningContainer
+
+add_folder_to_sys_path_if_needed("fastMRI")
+
+from fastmri.data.subsample import create_mask_for_mask_type
+from fastmri.data.transforms import VarNetDataTransform
+from fastmri.pl_modules import FastMriDataModule, VarNetModule
+
+
+class VarNetWithImageLogging(VarNetModule):
+    """
+    A clone of the VarNet model that logs images to only the Tensorboard loggers. The original VarNet hardcodes
+    a single logger that must be Tensorboard.
+    """
+
+    def log_image(self, name: str, image: torch.Tensor) -> None:
+        experiments = self.logger.experiment if isinstance(self.logger.experiment, list) \
+            else [self.logger.experiment]
+        for experiment in experiments:
+            if isinstance(experiment, SummaryWriter):
+                experiment.add_image(name, image, global_step=self.global_step)
+
+
+class FastMri(LightningContainer):
+    # All fields that are declared here will be automatically available as commandline arguments.
+    challenge: str = param.String(default="multicoil", doc="Chooses between the singlecoil or multicoil"
+                                                           "acquisition setup.")
+    sample_rate: Optional[float] = param.Number(default=None, doc="Fraction of slices of the training data split to "
+                                                                  "use. Default: 1.0")
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.azure_dataset_id = "fastmrimini_brain"
+
+    def create_model(self) -> LightningModule:
+        return VarNetWithImageLogging()
+
+    def get_data_module(self) -> LightningDataModule:
+        mask = create_mask_for_mask_type(mask_type_str="equispaced",
+                                         center_fractions=[0.08],
+                                         accelerations=[4])
+        # use random masks for train transform, fixed masks for val transform
+        train_transform = VarNetDataTransform(mask_func=mask, use_seed=False)
+        val_transform = VarNetDataTransform(mask_func=mask)
+        test_transform = VarNetDataTransform()
+
+        return FastMriDataModule(data_path=self.local_dataset,
+                                 challenge=self.challenge,
+                                 sample_rate=self.sample_rate,
+                                 train_transform=train_transform,
+                                 val_transform=val_transform,
+                                 test_transform=test_transform)
--- a/InnerEye/ML/configs/segmentation/HelloWorld.py
+++ b/InnerEye/ML/configs/segmentation/HelloWorld.py
@ -7,7 +7,7 @@ from typing import Any

 from azureml.core import ScriptRunConfig
 from azureml.train.hyperdrive import BanditPolicy, HyperDriveConfig, PrimaryMetricGoal, RandomParameterSampling, uniform
-from networkx.tests.test_convert_pandas import pd
+import pandas as pd

 from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path
 from InnerEye.Common.metrics_constants import TrackedMetrics
--- a/InnerEye/ML/deep_learning_config.py
+++ b/InnerEye/ML/deep_learning_config.py
@ -183,39 +183,186 @@ class DeepLearningFileSystemConfig(Parameterized):
                logs_folder=logs_folder,
                project_root=self.project_root
            )
-        raise ValueError("This method should only be called for offline runs, when the logs folder is inside the "
-                         "outputs folder.")
+        raise ValueError("This method should only be called for runs outside AzureML, when the logs folder is "
+                         "inside the outputs folder.")


-class DeepLearningConfig(GenericConfig, CudaAwareConfig):
+class WorkflowParams(param.Parameterized):
    """
-    A class that holds all settings that are shared across segmentation models and regression/classification models.
+    This class contains all parameters that affect how the whole training and testing workflow is executed.
    """
-    _model_category: ModelCategory = param.ClassSelector(class_=ModelCategory,
-                                                         doc="The high-level model category described by this config.")
-    _model_name: str = param.String(None, doc="The human readable name of the model (for example, Liver). This is "
-                                              "usually set from the class name.")
-
    random_seed: int = param.Integer(42, doc="The seed to use for all random number generators.")
-    azure_dataset_id: str = param.String(doc="If provided, the ID of the dataset to use. This dataset must exist as a "
-                                             "folder of the same name in the 'datasets' "
-                                             "container in the datasets storage account.")
-    local_dataset: Optional[Path] = param.ClassSelector(class_=Path,
-                                                        default=None,
-                                                        allow_None=True,
-                                                        doc="The path of the dataset to use, when training is running "
-                                                            "outside Azure.")
-    num_dataload_workers: int = param.Integer(8, bounds=(0, None),
-                                              doc="The number of data loading workers (processes). When set to 0,"
-                                                  "data loading is running in the same process (no process startup "
-                                                  "cost, hence good for use in unit testing. However, it "
-                                                  "does not give the same result as running with 1 worker process)")
-    shuffle: bool = param.Boolean(True, doc="If true, the dataset will be shuffled randomly during training.")
-    num_epochs: int = param.Integer(100, bounds=(1, None), doc="Number of epochs to train.")
-    start_epoch: int = param.Integer(0, bounds=(0, None), doc="The first epoch to train. Set to 0 to start a new "
-                                                              "training. Set to a value larger than zero for starting"
-                                                              " from a checkpoint.")
+    number_of_cross_validation_splits: int = param.Integer(0, bounds=(0, None),
+                                                           doc="Number of cross validation splits for k-fold cross "
+                                                               "validation")
+    cross_validation_split_index: int = param.Integer(DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, bounds=(-1, None),
+                                                      doc="The index of the cross validation fold this model is "
+                                                          "associated with when performing k-fold cross validation")
+    perform_training_set_inference: bool = \
+        param.Boolean(False,
+                      doc="If True, run full image inference on the training set at the end of training. If False and "
+                          "perform_validation_and_test_set_inference is True (default), only run inference on "
+                          "validation and test set. If both flags are False do not run inference.")
+    perform_validation_and_test_set_inference: bool = \
+        param.Boolean(True,
+                      doc="If True (default), run full image inference on validation and test set after training.")
+    weights_url: str = param.String(doc="If provided, a url from which weights will be downloaded and used for model "
+                                        "initialization.")
+    local_weights_path: Optional[Path] = param.ClassSelector(class_=Path,
+                                                             default=None,
+                                                             allow_None=True,
+                                                             doc="The path to the weights to use for model "
+                                                                 "initialization, when training outside AzureML.")
+    generate_report: bool = param.Boolean(default=True,
+                                          doc="If True (default), write a modelling report in HTML format. If False,"
+                                              "do not write that report.")
+    # The default multiprocessing start_method in both PyTorch and the Python standard library is "fork" for Linux and
+    # "spawn" (the only available method) for Windows. There is some evidence that using "forkserver" on Linux
+    # can reduce the chance of stuck jobs.
+    multiprocessing_start_method: MultiprocessingStartMethod = \
+        param.ClassSelector(class_=MultiprocessingStartMethod,
+                            default=(MultiprocessingStartMethod.spawn if is_windows()
+                                     else MultiprocessingStartMethod.fork),
+                            doc="Method to be used to start child processes in pytorch. Should be one of forkserver, "
+                                "fork or spawn. If not specified, fork is used on Linux and spawn on Windows. "
+                                "Set to forkserver as a possible remedy for stuck jobs.")
+    monitoring_interval_seconds: int = param.Integer(0, doc="Seconds delay between logging GPU/CPU resource "
+                                                            "statistics. If 0 or less, do not log any resource "
+                                                            "statistics.")

+    def validate(self) -> None:
+        if self.weights_url and self.local_weights_path:
+            raise ValueError("Cannot specify both local_weights_path and weights_url.")
+
+        if self.number_of_cross_validation_splits == 1:
+            raise ValueError("At least two splits required to perform cross validation, but got "
+                             f"{self.number_of_cross_validation_splits}. To train without cross validation, set "
+                             "number_of_cross_validation_splits=0.")
+        if 0 < self.number_of_cross_validation_splits <= self.cross_validation_split_index:
+            raise ValueError(f"Cross validation split index is out of bounds: {self.cross_validation_split_index}, "
+                             f"which is invalid for CV with {self.number_of_cross_validation_splits} splits.")
+        elif self.number_of_cross_validation_splits == 0 and self.cross_validation_split_index != -1:
+            raise ValueError(f"Cross validation split index must be -1 for a non cross validation run, "
+                             f"found number_of_cross_validation_splits = {self.number_of_cross_validation_splits} "
+                             f"and cross_validation_split_index={self.cross_validation_split_index}")
+
+    @property
+    def is_offline_run(self) -> bool:
+        """
+        Returns True if the run is executing outside AzureML, or False if inside AzureML.
+        """
+        return is_offline_run_context(RUN_CONTEXT)
+
+    @property
+    def perform_cross_validation(self) -> bool:
+        """
+        True if cross validation will be be performed as part of the training procedure.
+        :return:
+        """
+        return self.number_of_cross_validation_splits > 1
+
+    def get_effective_random_seed(self) -> int:
+        """
+        Returns the random seed set as part of this configuration. If the configuration corresponds
+        to a cross validation split, then the cross validation fold index will be added to the
+        set random seed in order to return the effective random seed.
+        :return:
+        """
+        seed = self.random_seed
+        if self.perform_cross_validation:
+            # offset the random seed based on the cross validation split index so each
+            # fold has a different initial random state.
+            seed += self.cross_validation_split_index
+        return seed
+
+
+class DatasetParams(param.Parameterized):
+    azure_dataset_id: str = param.String(doc="If provided, the ID of the dataset to use when running in AzureML. "
+                                             "This dataset must exist as a folder of the same name in the 'datasets' "
+                                             "container in the datasets storage account. This dataset will be mounted "
+                                             "and made available at the 'local_dataset' path when running in AzureML.")
+    local_dataset: Optional[Path] = \
+        param.ClassSelector(class_=Path, default=None, allow_None=True,
+                            doc="The path of the dataset to use, when training is running outside Azure.")
+
+
+class OutputParams(param.Parameterized):
+    output_to: str = param.String(default="",
+                                  doc="If provided, the run outputs will be written to the given folder. If not "
+                                      "provided, outputs will go into a subfolder of the project root folder.")
+    file_system_config: DeepLearningFileSystemConfig = param.ClassSelector(default=DeepLearningFileSystemConfig(),
+                                                                           class_=DeepLearningFileSystemConfig,
+                                                                           instantiate=False,
+                                                                           doc="File system related configs")
+    _model_name: str = param.String("", doc="The human readable name of the model (for example, Liver). This is "
+                                            "usually set from the class name.")
+
+    @property
+    def model_name(self) -> str:
+        """
+        Gets the human readable name of the model (e.g., Liver). This is usually set from the class name.
+        :return: A model name as a string.
+        """
+        return self._model_name
+
+    def set_output_to(self, output_to: PathOrString) -> None:
+        """
+        Adjusts the file system settings in the present object such that all outputs are written to the given folder.
+        :param output_to: The absolute path to a folder that should contain the outputs.
+        """
+        if isinstance(output_to, Path):
+            output_to = str(output_to)
+        self.output_to = output_to
+        self.create_filesystem()
+
+    def create_filesystem(self, project_root: Path = fixed_paths.repository_root_directory()) -> None:
+        """
+        Creates new file system settings (outputs folder, logs folder) based on the information stored in the
+        present object. If any of the folders do not yet exist, they are created.
+        :param project_root: The root folder for the codebase that triggers the training run.
+        """
+        self.file_system_config = DeepLearningFileSystemConfig.create(
+            project_root=project_root,
+            model_name=self.model_name,
+            is_offline_run=is_offline_run_context(RUN_CONTEXT),
+            output_to=self.output_to
+        )
+
+    @property
+    def outputs_folder(self) -> Path:
+        """Gets the full path in which the model outputs should be stored."""
+        return self.file_system_config.outputs_folder
+
+    @property
+    def logs_folder(self) -> Path:
+        """Gets the full path in which the model logs should be stored."""
+        return self.file_system_config.logs_folder
+
+    @property
+    def checkpoint_folder(self) -> Path:
+        """Gets the full path in which the model checkpoints should be stored during training."""
+        return self.outputs_folder / CHECKPOINT_FOLDER
+
+    @property
+    def visualization_folder(self) -> Path:
+        """Gets the full path in which the visualizations notebooks should be saved during training."""
+        return self.outputs_folder / VISUALIZATION_FOLDER
+
+    def get_path_to_checkpoint(self) -> Path:
+        """
+        Returns the full path to a recovery checkpoint.
+        """
+        return create_recovery_checkpoint_path(self.checkpoint_folder)
+
+    def get_path_to_best_checkpoint(self) -> Path:
+        """
+        Returns the full path to a checkpoint file that was found to be best during training, whatever criterion
+        was applied there.
+        """
+        return get_best_checkpoint_path(self.checkpoint_folder)
+
+
+class OptimizerParams(param.Parameterized):
    l_rate: float = param.Number(1e-4, doc="The initial learning rate", bounds=(0, None))
    _min_l_rate: float = param.Number(0.0, doc="The minimum learning rate for the Polynomial and Cosine schedulers.",
                                      bounds=(0.0, None))
@ -256,35 +403,87 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
                                                 doc="The betas parameter of Adam, default is (0.9, 0.999)")
    momentum: float = param.Number(0.6, doc="The momentum parameter of the optimizers")
    weight_decay: float = param.Number(1e-4, doc="The weight decay used to control L2 regularization")
+
+    def validate(self) -> None:
+        if len(self.adam_betas) < 2:
+            raise ValueError(
+                "The adam_betas parameter should be the coefficients used for computing running averages of "
+                "gradient and its square")
+
+        if self.l_rate_scheduler == LRSchedulerType.MultiStep:
+            if not self.l_rate_multi_step_milestones:
+                raise ValueError("Must specify l_rate_multi_step_milestones to use LR scheduler MultiStep")
+            if sorted(set(self.l_rate_multi_step_milestones)) != self.l_rate_multi_step_milestones:
+                raise ValueError("l_rate_multi_step_milestones must be a strictly increasing list")
+            if self.l_rate_multi_step_milestones[0] <= 0:
+                raise ValueError("l_rate_multi_step_milestones cannot be negative or 0.")
+
+    @property
+    def min_l_rate(self) -> float:
+        return self._min_l_rate
+
+    @min_l_rate.setter
+    def min_l_rate(self, value: float) -> None:
+        if value > self.l_rate:
+            raise ValueError("l_rate must be >= min_l_rate, found: {}, {}".format(self.l_rate, value))
+        self._min_l_rate = value
+
+
+class TrainerParams(CudaAwareConfig):
+    num_epochs: int = param.Integer(100, bounds=(1, None), doc="Number of epochs to train.")
    recovery_checkpoint_save_interval: int = param.Integer(10, bounds=(0, None),
                                                           doc="Save epoch checkpoints when epoch number is a multiple "
                                                               "of recovery_checkpoint_save_interval. The intended use "
                                                               "is to allow restore training from failed runs.")
-    train_batch_size: int = param.Integer(4, bounds=(0, None),
-                                          doc="The number of crops that make up one minibatch during training.")
    detect_anomaly: bool = param.Boolean(False, doc="If true, test gradients for anomalies (NaN or Inf) during "
                                                    "training.")
    use_mixed_precision: bool = param.Boolean(False, doc="If true, mixed precision training is activated during "
                                                         "training.")
+    max_num_gpus: int = param.Integer(default=-1, doc="The maximum number of GPUS to use. If set to a value < 0, use"
+                                                      "all available GPUs.")
+    pl_progress_bar_refresh_rate: Optional[int] = \
+        param.Integer(default=None,
+                      doc="PyTorch Lightning trainer flag 'progress_bar_refresh_rate': How often to refresh progress "
+                          "bar (in steps). Value 0 disables progress bar. Value None chooses automatically.")
+    pl_num_sanity_val_steps: int = \
+        param.Integer(default=0,
+                      doc="PyTorch Lightning trainer flag 'num_sanity_val_steps': Number of validation "
+                          "steps to run before training, to identify possible problems")
+    pl_deterministic: bool = \
+        param.Integer(default=True,
+                      doc="Controls the PyTorch Lightning trainer flags 'deterministic' and 'benchmark'. If "
+                          "'pl_deterministic' is True, results are perfectly reproducible. If False, they are not, but "
+                          "you may see training speed increases.")
+    start_epoch: int = param.Integer(0, bounds=(0, None), doc="The first epoch to train. Set to 0 to start a new "
+                                                              "training. Set to a value larger than zero for starting"
+                                                              " from a checkpoint.")
+
+
+class DeepLearningConfig(WorkflowParams,
+                         DatasetParams,
+                         OutputParams,
+                         OptimizerParams,
+                         TrainerParams,
+                         CudaAwareConfig,
+                         GenericConfig):
+    """
+    A class that holds all settings that are shared across segmentation models and regression/classification models.
+    """
+    _model_category: ModelCategory = param.ClassSelector(class_=ModelCategory,
+                                                         doc="The high-level model category described by this config.")
+
+    num_dataload_workers: int = param.Integer(8, bounds=(0, None),
+                                              doc="The number of data loading workers (processes). When set to 0,"
+                                                  "data loading is running in the same process (no process startup "
+                                                  "cost, hence good for use in unit testing. However, it "
+                                                  "does not give the same result as running with 1 worker process)")
+    shuffle: bool = param.Boolean(True, doc="If true, the dataset will be shuffled randomly during training.")
+    train_batch_size: int = param.Integer(4, bounds=(0, None),
+                                          doc="The number of crops that make up one minibatch during training.")
    use_model_parallel: bool = param.Boolean(False, doc="If true, neural network model is partitioned across all "
                                                        "available GPUs to fit in a large model. It shall not be used "
                                                        "together with data parallel.")
-    monitoring_interval_seconds: int = param.Integer(0, doc="Seconds delay between logging GPU/CPU resource "
-                                                            "statistics. If 0 or less, do not log any resource "
-                                                            "statistics.")
-    number_of_cross_validation_splits: int = param.Integer(0, bounds=(0, None),
-                                                           doc="Number of cross validation splits for k-fold cross "
-                                                               "validation")
-    cross_validation_split_index: int = param.Integer(DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, bounds=(-1, None),
-                                                      doc="The index of the cross validation fold this model is "
-                                                          "associated with when performing k-fold cross validation")
-    file_system_config: DeepLearningFileSystemConfig = param.ClassSelector(default=DeepLearningFileSystemConfig(),
-                                                                           class_=DeepLearningFileSystemConfig,
-                                                                           instantiate=False,
-                                                                           doc="File system related configs")
    pin_memory: bool = param.Boolean(True, doc="Value of pin_memory argument to DataLoader")
-    _overrides: Dict[str, Any] = param.Dict(instantiate=True,
-                                            doc="Model config properties that were overridden from the commandline")
    restrict_subjects: Optional[str] = \
        param.String(doc="Use at most this number of subjects for train, val, or test set (must be > 0 or None). "
                         "If None, do not modify the train, val, or test sets. If a string of the form 'i,j,k' where "
@ -294,14 +493,6 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
                         "limit test set to 5. If any of i,j,k is '+', discarded members of the other sets are added "
                         "to that set.",
                     allow_None=True)
-    perform_training_set_inference: bool = \
-        param.Boolean(False,
-                      doc="If True, run full image inference on the training set at the end of training. If False and "
-                          "perform_validation_and_test_set_inference is True (default), only run inference on "
-                          "validation and test set. If both flags are False do not run inference.")
-    perform_validation_and_test_set_inference: bool = \
-        param.Boolean(True,
-                      doc="If True (default), run full image inference on validation and test set after training.")
    _dataset_data_frame: Optional[DataFrame] = \
        param.DataFrame(default=None,
                        doc="The dataframe that contains the dataset for the model. This is usually read from disk "
@ -315,19 +506,6 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
                                        "on Linux, inference is currently disabled as the data loaders hang. "
                                        "If False, use the default data loader logic that starts new processes for "
                                        "each epoch.")
-    # The default multiprocessing start_method in both PyTorch and the Python standard library is "fork" for Linux and
-    # "spawn" (the only available method) for Windows. There is some evidence that using "forkserver" on Linux
-    # can reduce the chance of stuck jobs.
-    multiprocessing_start_method: MultiprocessingStartMethod = \
-        param.ClassSelector(class_=MultiprocessingStartMethod,
-                            default=(MultiprocessingStartMethod.spawn if is_windows()
-                                     else MultiprocessingStartMethod.fork),
-                            doc="Method to be used to start child processes in pytorch. Should be one of forkserver, "
-                                "fork or spawn. If not specified, fork is used on Linux and spawn on Windows. "
-                                "Set to forkserver as a possible remedy for stuck jobs.")
-    output_to: str = param.String(default="",
-                                  doc="If provided, the run outputs will be written to the given folder. If not "
-                                      "provided, outputs will go into a subfolder of the project root folder.")
    max_batch_grad_cam: int = param.Integer(default=0, doc="Max number of validation batches for which "
                                                           "to save gradCam images. By default "
                                                           "visualizations are saved for all images "
@ -336,7 +514,6 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
                                              doc="Target smoothing value for label smoothing")
    log_to_parent_run: bool = param.Boolean(default=False, doc="If true, hyperdrive child runs will log their metrics"
                                                               "to their parent run.")
-
    use_imbalanced_sampler_for_training: bool = param.Boolean(default=False,
                                                              doc="If True, use an imbalanced sampler during training.")
    drop_last_batch_in_training: bool = param.Boolean(default=False,
@ -358,28 +535,6 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
                                                 "weights are updated using mean_teacher_"
                                                 "weight = alpha * (mean_teacher_weight) "
                                                 " + (1-alpha) * (current_student_weights). ")
-    weights_url: str = param.String(doc="If provided, a url from which weights will be downloaded and used for model "
-                                        "initialization.")
-    local_weights_path: Optional[Path] = param.ClassSelector(class_=Path,
-                                                             default=None,
-                                                             allow_None=True,
-                                                             doc="The path to the weights to use for model "
-                                                                 "initialization, "
-                                                                 "when training is running outside Azure.")
-    max_num_gpus: int = param.Integer(default=-1, doc="The maximum number of GPUS to use. If set to a value < 0, use"
-                                                      "all available GPUs.")
-    generate_report: bool = param.Boolean(default=True,
-                                          doc="If True (default), write a modelling report in HTML format. If False,"
-                                              "do not write that report.")
-    pl_num_sanity_val_steps: int = \
-        param.Integer(default=0, doc="PyTorch Lightning trainer flag 'num_sanity_val_steps': Number of validation "
-                                     "steps to run before training, to identify possible problems")
-    pl_deterministic: bool = \
-        param.Integer(default=True,
-                      doc="Controls the PyTorch Lightning trainer flags 'deterministic' and 'benchmark'. If "
-                          "'pl_deterministic' is True, results are perfectly reproducible. If False, they are not, but "
-                          "you may see training speed increases.")
-
    #: Name of the csv file providing information on the dataset to be used.
    dataset_csv: str = param.String(
        DATASET_CSV_FILE_NAME,
@ -394,49 +549,19 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
        super().__init__(throw_if_unknown_param=True, **params)
        logging.info("Creating the default output folder structure.")
        self.create_filesystem(fixed_paths.repository_root_directory())
+        # Disable the PL progress bar because all InnerEye models have their own console output
+        self.pl_progress_bar_refresh_rate = 0

    def validate(self) -> None:
        """
        Validates the parameters stored in the present object.
        """
-        if len(self.adam_betas) < 2:
-            raise ValueError(
-                "The adam_betas parameter should be the coefficients used for computing running averages of "
-                "gradient and its square")
+        WorkflowParams.validate(self)
+        OptimizerParams.validate(self)

        if self.azure_dataset_id is None and self.local_dataset is None:
            raise ValueError("Either of local_dataset or azure_dataset_id must be set.")

-        if self.weights_url and self.local_weights_path:
-            raise ValueError("Cannot specify both local_weights_path and weights_url.")
-
-        if self.number_of_cross_validation_splits == 1:
-            raise ValueError(f"At least two splits required to perform cross validation found "
-                             f"number_of_cross_validation_splits={self.number_of_cross_validation_splits}")
-        if 0 < self.number_of_cross_validation_splits <= self.cross_validation_split_index:
-            raise ValueError(f"Cross validation split index is out of bounds: {self.cross_validation_split_index}, "
-                             f"which is invalid for CV with {self.number_of_cross_validation_splits} splits.")
-        elif self.number_of_cross_validation_splits == 0 and self.cross_validation_split_index != -1:
-            raise ValueError(f"Cross validation split index must be -1 for a non cross validation run, "
-                             f"found number_of_cross_validation_splits = {self.number_of_cross_validation_splits} "
-                             f"and cross_validation_split_index={self.cross_validation_split_index}")
-
-        if self.l_rate_scheduler == LRSchedulerType.MultiStep:
-            if not self.l_rate_multi_step_milestones:
-                raise ValueError("Must specify l_rate_multi_step_milestones to use LR scheduler MultiStep")
-            if sorted(set(self.l_rate_multi_step_milestones)) != self.l_rate_multi_step_milestones:
-                raise ValueError("l_rate_multi_step_milestones must be a strictly increasing list")
-            if self.l_rate_multi_step_milestones[0] <= 0:
-                raise ValueError("l_rate_multi_step_milestones cannot be negative or 0.")
-
-    @property
-    def model_name(self) -> str:
-        """
-        Gets the human readable name of the model (e.g., Liver). This is usually set from the class name.
-        :return: A model name as a string.
-        """
-        return self._model_name
-
    @property
    def model_category(self) -> ModelCategory:
        """
@ -463,48 +588,6 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
    def compute_grad_cam(self) -> bool:
        return self.max_batch_grad_cam > 0

-    @property
-    def min_l_rate(self) -> float:
-        return self._min_l_rate
-
-    @min_l_rate.setter
-    def min_l_rate(self, value: float) -> None:
-        if value > self.l_rate:
-            raise ValueError("l_rate must be >= min_l_rate, found: {}, {}".format(self.l_rate, value))
-        self._min_l_rate = value
-
-    @property
-    def outputs_folder(self) -> Path:
-        """Gets the full path in which the model outputs should be stored."""
-        return self.file_system_config.outputs_folder
-
-    @property
-    def logs_folder(self) -> Path:
-        """Gets the full path in which the model logs should be stored."""
-        return self.file_system_config.logs_folder
-
-    @property
-    def checkpoint_folder(self) -> Path:
-        """Gets the full path in which the model checkpoints should be stored during training."""
-        return self.outputs_folder / CHECKPOINT_FOLDER
-
-    @property
-    def visualization_folder(self) -> Path:
-        """Gets the full path in which the visualizations notebooks should be saved during training."""
-        return self.outputs_folder / VISUALIZATION_FOLDER
-
-    @property
-    def perform_cross_validation(self) -> bool:
-        """
-        True if cross validation will be be performed as part of the training procedure.
-        :return:
-        """
-        return self.number_of_cross_validation_splits > 1
-
-    @property
-    def overrides(self) -> Optional[Dict[str, Any]]:
-        return self._overrides
-
    @property
    def dataset_data_frame(self) -> Optional[DataFrame]:
        """
@ -521,29 +604,6 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
        """
        self._dataset_data_frame = data_frame

-    def set_output_to(self, output_to: PathOrString) -> None:
-        """
-        Adjusts the file system settings in the present object such that all outputs are written to the given folder.
-        :param output_to: The absolute path to a folder that should contain the outputs.
-        """
-        if isinstance(output_to, Path):
-            output_to = str(output_to)
-        self.output_to = output_to
-        self.create_filesystem()
-
-    def create_filesystem(self, project_root: Path = fixed_paths.repository_root_directory()) -> None:
-        """
-        Creates new file system settings (outputs folder, logs folder) based on the information stored in the
-        present object. If any of the folders do not yet exist, they are created.
-        :param project_root: The root folder for the codebase that triggers the training run.
-        """
-        self.file_system_config = DeepLearningFileSystemConfig.create(
-            project_root=project_root,
-            model_name=self.model_name,
-            is_offline_run=self.is_offline_run,
-            output_to=self.output_to
-        )
-
    def get_train_epochs(self) -> List[int]:
        """
        Returns the epochs for which training will be performed.
@ -565,34 +625,6 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
        """
        return self.get_total_number_of_training_epochs()

-    def get_path_to_checkpoint(self) -> Path:
-        """
-        Returns full path to a recovery checkpoint.
-        :return: path to a checkpoint given an epoch
-        """
-        return create_recovery_checkpoint_path(self.checkpoint_folder)
-
-    def get_path_to_best_checkpoint(self) -> Path:
-        """
-        Returns full path to a checkpoint given an epoch
-        :return: path to a checkpoint given an epoch
-        """
-        return get_best_checkpoint_path(self.checkpoint_folder)
-
-    def get_effective_random_seed(self) -> int:
-        """
-        Returns the random seed set as part of this configuration. If the configuration corresponds
-        to a cross validation split, then the cross validation fold index will be added to the
-        set random seed in order to return the effective random seed.
-        :return:
-        """
-        seed = self.random_seed
-        if self.perform_cross_validation:
-            # offset the random seed based on the cross validation split index so each
-            # fold has a different initial random state.
-            seed += self.cross_validation_split_index
-        return seed
-
    @property  # type: ignore
    def use_gpu(self) -> bool:  # type: ignore
        """
@ -617,30 +649,6 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
                raise ValueError("Can't set use_gpu to True if there is not CUDA capable GPU present.")
        self._use_gpu = value

-    def write_args_file(self) -> None:
-        """
-        Writes the current config to disk in the default output folder.
-        """
-        self.outputs_folder.mkdir(exist_ok=True, parents=True)
-        dst = self.outputs_folder / ARGS_TXT
-        dst.write_text(data=str(self))
-
-    def should_wait_for_other_cross_val_child_runs(self) -> bool:
-        """
-        Returns True if the current run is an online run and is the 0th cross validation split.
-        In this case, this will be the run that will wait for all other child runs to finish in order
-        to aggregate their results.
-        :return:
-        """
-        return (not self.is_offline_run) and self.cross_validation_split_index == 0
-
-    @property
-    def is_offline_run(self) -> bool:
-        """
-        Returns True if the run is executing outside AzureML, or False if inside AzureML.
-        """
-        return is_offline_run_context(RUN_CONTEXT)
-
    @property
    def compute_mean_teacher_model(self) -> bool:
        """
@ -654,7 +662,7 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
        # Avoid callable params, the bindings that are printed out can be humongous.
        # Avoid dataframes
        skip_params = {name for name, value in self.param.params().items()
-                       if isinstance(value, (param.Callable, DataFrame))}
+                       if isinstance(value, (param.Callable, param.DataFrame))}
        for key, value in self.param.get_param_values():
            if key not in skip_params:
                arguments_str += f"\t{key:40}: {value}\n"
@ -677,7 +685,6 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
        See https://pytorch.org/tutorials/beginner/saving_loading_models.html#warmstarting-model-using-parameters
        -from-a-different-model
        for an explanation on why strict=False is useful when loading parameters from other models.
-
        :param path_to_checkpoint: Path to the checkpoint file.
        :return: Dictionary with model and optimizer state dicts. The dict should have at least the following keys:
        1. Key ModelAndInfo.MODEL_STATE_DICT_KEY and value set to the model state dict.
@ -685,7 +692,15 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
        Other (optional) entries corresponding to keys ModelAndInfo.OPTIMIZER_STATE_DICT_KEY and
        ModelAndInfo.MEAN_TEACHER_STATE_DICT_KEY are also supported.
        """
-        import torch
-        map_location = None if self.use_gpu else 'cpu'
-        checkpoint = torch.load(str(path_to_checkpoint), map_location=map_location)
-        return checkpoint
+        return load_checkpoint(path_to_checkpoint=path_to_checkpoint, use_gpu=self.use_gpu)
+
+
+def load_checkpoint(path_to_checkpoint: Path, use_gpu: bool = True) -> Dict[str, Any]:
+    """
+    Loads a Torch checkpoint from the given file. If use_gpu==False, map all parameters to the GPU, otherwise
+    left the device of all parameters unchanged.
+    """
+    import torch
+    map_location = None if use_gpu else 'cpu'
+    checkpoint = torch.load(str(path_to_checkpoint), map_location=map_location)
+    return checkpoint
--- a/InnerEye/ML/lightning_base.py
+++ b/InnerEye/ML/lightning_base.py
@ -4,20 +4,25 @@
 #  ------------------------------------------------------------------------------------------
 import logging
 import numbers
+from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union

+import param
 import torch
 from pytorch_lightning import LightningDataModule, LightningModule
 from pytorch_lightning.utilities import rank_zero_only
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
-from torch.utils.data import DataLoader
+from torch.utils.data import DataLoader, Dataset

-from InnerEye.Common.common_util import EPOCH_METRICS_FILE_NAME
+from InnerEye.Common.common_util import EPOCH_METRICS_FILE_NAME, logging_section
 from InnerEye.Common.metrics_constants import LoggingColumns, MetricType, TRAIN_PREFIX, VALIDATION_PREFIX
 from InnerEye.Common.type_annotations import DictStrFloat
 from InnerEye.ML.common import ModelExecutionMode
-from InnerEye.ML.deep_learning_config import DeepLearningConfig
+from InnerEye.ML.config import SegmentationModelBase
+from InnerEye.ML.deep_learning_config import DatasetParams, DeepLearningConfig, WorkflowParams, OutputParams, \
+    TrainerParams
+from InnerEye.ML.lightning_container import LightningContainer
 from InnerEye.ML.lightning_loggers import StoringLogger
 from InnerEye.ML.metrics import EpochTimers, MAX_ITEM_LOAD_TIME_SEC, store_epoch_metrics
 from InnerEye.ML.metrics_dict import DataframeLogger
@ -25,20 +30,43 @@ from InnerEye.ML.model_config_base import ModelConfigBase
 from InnerEye.ML.utils import model_util
 from InnerEye.ML.utils.device_aware_module import DeviceAwareModule
 from InnerEye.ML.utils.lr_scheduler import SchedulerWithWarmUp
-from InnerEye.ML.utils.ml_util import RandomStateSnapshot, set_random_seed
+from InnerEye.ML.utils.ml_util import RandomStateSnapshot, set_random_seed, validate_dataset_paths
+from InnerEye.ML.utils.model_util import generate_and_print_model_summary
+from InnerEye.ML.visualizers.patch_sampling import visualize_random_crops_for_dataset


-class TrainingAndValidationDataLightning(LightningDataModule):
+class TrainAndValDataLightning(LightningDataModule):
    """
    A class that wraps training and validation data from an InnerEye model configuration to a Lightning data module.
+    When doing inference on the trained models, we use InferenceDataLightning. This is particularly important for
+    segmentation models, where training and validation happens on equal sized patches, but inference is running on
+    images of arbitrary size.
    """

-    def _init__(self, config: ModelConfigBase) -> None:
+    def __init__(self, config: ModelConfigBase) -> None:
        super().__init__()
        self.config = config
        self.data_loaders: Dict[ModelExecutionMode, DataLoader] = {}

+    def prepare_data(self, *args: Any, **kwargs: Any) -> None:
+        """
+        Writes the dataset files for later use in cross validation analysis. This is only executed once per
+        distributed training run.
+        """
+        # Save the dataset files for later use in cross validation analysis
+        self.config.write_dataset_files()
+
    def setup(self, stage: Optional[str] = None) -> None:
+        """
+        Checks if the dataset folder is present, and the dataset file exists. This is execute on each node in
+        distributed training.
+        """
+        # Check for existing dataset.csv file in the correct locations. Skip that if a dataset has already been
+        # loaded (typically only during tests)
+        if self.config.dataset_data_frame is None:
+            assert self.config.local_dataset is not None
+            validate_dataset_paths(self.config.local_dataset, self.config.dataset_csv)
+        self.config.read_dataset_if_needed()
        self.data_loaders = self.config.create_data_loaders()

    def train_dataloader(self) -> DataLoader:  # type: ignore
@ -48,7 +76,91 @@ class TrainingAndValidationDataLightning(LightningDataModule):
        return self.data_loaders[ModelExecutionMode.VAL]

    def test_dataloader(self) -> DataLoader:  # type: ignore
-        raise NotImplementedError("For segmentation models, the test dataset should not be evaluated patch-wise.")
+        raise NotImplementedError("There is no test dataset stored here, because this object is only meant to be "
+                                  "used for training and validation.")
+
+
+class InferenceDataLightning(LightningDataModule):
+    """
+    A class that wraps data for running model inference on InnerEye models, as a Lightning data module.
+    Note that training and validation data is handled by TrainAndValDataLightning.
+    """
+
+    def __init__(self, config: ModelConfigBase) -> None:
+        super().__init__()
+        self.config = config
+        self.train_data: Dataset = Dataset()
+        self.val_data: Dataset = Dataset()
+        self.test_data: Dataset = Dataset()
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """
+        Initializes the datasets stored in the present object, by calling the config object to
+        prepare the torch Dataset objects for train/val/test.
+        """
+        self.train_data = self.config.get_torch_dataset_for_inference(ModelExecutionMode.TRAIN)
+        self.val_data = self.config.get_torch_dataset_for_inference(ModelExecutionMode.VAL)
+        self.test_data = self.config.get_torch_dataset_for_inference(ModelExecutionMode.TEST)
+
+    def train_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
+        return DataLoader(self.train_data)
+
+    def val_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
+        return DataLoader(self.val_data)
+
+    def test_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
+        return DataLoader(self.test_data)
+
+    def prepare_data(self, *args: Any, **kwargs: Any) -> None:
+        pass
+
+
+class InnerEyeContainer(LightningContainer):
+    """
+    A container that wraps the creation of Lightning datasets for the built-in InnerEye models.
+    """
+
+    def __init__(self, config: ModelConfigBase):
+        super().__init__()
+        self.config = config
+        self._model_name = config.model_name
+        # Fields like cross validation index are defined at container level, but the InnerEye models define them
+        # at model level. Copy everything over.
+        for type_to_copy in [WorkflowParams, DatasetParams, TrainerParams, OutputParams]:
+            assert issubclass(type_to_copy, param.Parameterized)
+            self.apply_overrides({p: getattr(config, p) for p in type_to_copy.params()},  # type: ignore
+                                 should_validate=False)
+
+    def setup(self) -> None:
+        """
+        This hook reads the dataset file, and possibly sets required pre-processing objects, like one-hot encoder
+        for categorical features, that need to be available before creating the model.
+        """
+        self.config.read_dataset_if_needed()
+
+    def create_model(self) -> LightningModule:  # type: ignore
+        from InnerEye.ML.lightning_models import create_lightning_model
+        return create_lightning_model(self.config)
+
+    def get_data_module(self) -> LightningDataModule:
+        return TrainAndValDataLightning(self.config)  # type: ignore
+
+    def get_inference_data_module(self) -> LightningDataModule:
+        return InferenceDataLightning(self.config)  # type: ignore
+
+    def before_training_on_rank_zero(self) -> None:
+        # Save the dataset files for later use in cross validation analysis
+        self.config.write_dataset_files()
+        if isinstance(self.config, SegmentationModelBase):
+            with logging_section("Visualizing the effect of sampling random crops for training"):
+                visualize_random_crops_for_dataset(self.config)
+
+        # Print out a detailed breakdown of layers, memory consumption and time.
+        assert isinstance(self.model, InnerEyeLightning)
+        generate_and_print_model_summary(self.config, self.model.model)
+
+    def load_checkpoint_and_modify(self, path_to_checkpoint: Path) -> Dict[str, Any]:
+        return self.config.load_checkpoint_and_modify(path_to_checkpoint=path_to_checkpoint)


 class InnerEyeLightning(LightningModule):
@ -61,6 +173,7 @@ class InnerEyeLightning(LightningModule):
    def __init__(self, config: DeepLearningConfig, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)
        self.outputs_folder = config.outputs_folder
+        self.checkpoint_folder = config.checkpoint_folder
        self.model: DeviceAwareModule = DeviceAwareModule()
        # These two will be set later in set_optimizer_and_scheduler.
        # The ddp_spawn accelerator only works if the model configuration object is
@ -85,20 +198,17 @@ class InnerEyeLightning(LightningModule):
                                                          fixed_columns=fixed_logger_columns)
        self.val_epoch_metrics_logger = DataframeLogger(self.val_metrics_folder / EPOCH_METRICS_FILE_NAME,
                                                        fixed_columns=fixed_logger_columns)
-        # Fields to store diagnostics for unit testing
-        self.train_diagnostics: List[Any] = []
-        self.val_diagnostics: List[Any] = []
        # Stores information the checkpoint that created this model, if any.
        self.checkpoint_loading_message = ""

    def set_optimizer_and_scheduler(self, config: DeepLearningConfig) -> None:
        self.optimizer = model_util.create_optimizer(config, self.model.parameters())
-        self.l_rate_scheduler = SchedulerWithWarmUp(config, self.optimizer)
+        self.l_rate_scheduler = SchedulerWithWarmUp(config, self.optimizer, num_epochs=config.num_epochs)

    def configure_optimizers(self) -> Tuple[List[Optimizer], List[_LRScheduler]]:
        return [self.optimizer], [self.l_rate_scheduler]  # type: ignore

-    def close_all_loggers(self) -> None:
+    def on_fit_end(self) -> None:
        """
        Flushes all logger objects that the present object holds.
        """
--- a/InnerEye/ML/lightning_container.py
+++ b/InnerEye/ML/lightning_container.py
@ -0,0 +1,292 @@
+#  ------------------------------------------------------------------------------------------
+#  Copyright (c) Microsoft Corporation. All rights reserved.
+#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+#  ------------------------------------------------------------------------------------------
+import abc
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+
+import param
+import torch
+from pytorch_lightning import LightningDataModule, LightningModule
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+
+from InnerEye.Common.generic_parsing import GenericConfig, create_from_matching_params
+from InnerEye.ML.common import ModelExecutionMode
+from InnerEye.ML.deep_learning_config import DatasetParams, OptimizerParams, OutputParams, TrainerParams, \
+    WorkflowParams, load_checkpoint
+from InnerEye.ML.utils import model_util
+from InnerEye.ML.utils.lr_scheduler import SchedulerWithWarmUp
+
+
+class InnerEyeInference(abc.ABC):
+    """
+    A base class that defines the methods that need to be present for doing inference on a trained model. This
+    form of inference is slightly different from what PyTorch Lightning does in its `Trainer.test` method. In
+    particular, this inference can be executed on any of the training, validation, or test set.
+
+    The inference code calls the methods in this order:
+
+    model.on_inference_start()
+    for dataset_split in [Train, Val, Test]
+        model.on_inference_epoch_start(dataset_split, is_ensemble_model=False)
+        for batch_idx, item in enumerate(dataloader[dataset_split])):
+            model_outputs = model.forward(item)
+            model.inference_step(item, batch_idx, model_outputs)
+        model.on_inference_epoch_end()
+    model.on_inference_end()
+    """
+
+    def on_inference_start(self) -> None:
+        """
+        Runs initialization for everything that inference might require. This can initialize
+        output files, set up metric computation, etc. This is run only once.
+        """
+        pass
+
+    def on_inference_epoch_start(self, dataset_split: ModelExecutionMode, is_ensemble_model: bool) -> None:
+        """
+        Runs initialization for inference, when starting inference on a new dataset split (train/val/test).
+        Depending on the settings, this can be called anywhere between 0 (no inference at all) to 3 times (inference
+        on all of train/val/test split).
+        :param dataset_split: Indicates whether the item comes from the training, validation or test set.
+        :param is_ensemble_model: If False, the model_outputs come from an individual model. If True, the model
+        outputs come from multiple models.
+        """
+        pass
+
+    def inference_step(self, batch: Any, batch_idx: int, model_output: torch.Tensor) -> None:
+        """
+        This hook is called when the model has finished making a prediction. It can write the results to a file,
+        or compute metrics and store them.
+        :param batch: The batch of data for which the model made a prediction.
+        :param model_output: The model outputs. This would usually be a torch.Tensor, but can be any datatype.
+        """
+        # We don't want abstract methods here, it avoids class creation for unit tests, and we also want this
+        # method to be left optional (it should be possible to also use Lightning's native test_step method)
+        raise NotImplementedError("Method on_inference_start must be overwritten in a derived class.")
+
+    def on_inference_epoch_end(self) -> None:
+        """
+        Called when the inference on one of the dataset splits (train/val/test) has finished.
+        Depending on the settings, this can be called anywhere between 0 (no inference at all) to 3 times (inference
+        on all of train/val/test split).
+        """
+        pass
+
+    def on_inference_end(self) -> None:
+        """
+        Called when all inference epochs have finished. This can write all metrics to disk, for example. This method
+        is called exactly once.
+        """
+        pass
+
+    def aggregate_ensemble_model_outputs(self, model_outputs: Iterator[torch.Tensor]) -> torch.Tensor:
+        """
+        Aggregates the outputs of multiple models when using an ensemble model. In the default implementation,
+        this averages the tensors coming from all the models.
+        :param model_outputs: An iterator over the model outputs for all ensemble members.
+        :return: The aggregate model outputs.
+        """
+        aggregate_output: Optional[torch.Tensor] = None
+        count = 0
+        for m in model_outputs:
+            count += 1
+            if aggregate_output is None:
+                aggregate_output = m
+            else:
+                aggregate_output += m
+        if count == 0 or aggregate_output is None:
+            raise ValueError("There were no results to aggregate.")
+        aggregate_output /= count
+        return aggregate_output
+
+
+class LightningModuleWithOptimizer(LightningModule):
+    """
+    A base class that supplies a method to configure optimizers and LR schedulers. To use this in your model,
+    inherit from this class instead of from LightningModule.
+    If this class is used, all configuration options for the optimizers and LR schedulers will be also available as
+    commandline arguments (for example, you can supply the InnerEye runner with "--l_rate=1e-2" to change the learning
+    rate.
+    """
+    # These fields will be set by the LightningContainer when the model is created.
+    _optimizer_params = OptimizerParams()
+    _trainer_params = TrainerParams()
+
+    def configure_optimizers(self) -> Tuple[List[Optimizer], List[_LRScheduler]]:
+        """
+        This is the default implementation of the method that provides the optimizer and LR scheduler for
+        PyTorch Lightning. It reads out the optimizer and scheduler settings from the model fields,
+        and creates the two objects.
+        Override this method for full flexibility to define any optimizer and scheduler.
+        :return: A tuple of (optimizer, LR scheduler)
+        """
+        optimizer = model_util.create_optimizer(self._optimizer_params, self.parameters())
+        l_rate_scheduler = SchedulerWithWarmUp(self._optimizer_params, optimizer,
+                                               num_epochs=self._trainer_params.num_epochs)
+        return [optimizer], [l_rate_scheduler]
+
+
+class LightningContainer(GenericConfig,
+                         WorkflowParams,
+                         DatasetParams,
+                         OutputParams,
+                         TrainerParams,
+                         OptimizerParams):
+    """
+    A LightningContainer contains all information to train a user-specified PyTorch Lightning model. The model that
+    should be trained is returned by the `create_model` method. The training data must be returned in the form of
+    a LightningDataModule, by the `get_data_module` method.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._model: Optional[LightningModule] = None
+        self._model_name = type(self).__name__
+
+    def validate(self) -> None:
+        WorkflowParams.validate(self)
+        OptimizerParams.validate(self)
+
+    def setup(self) -> None:
+        """
+        This method is called as one of the first operations of the training/testing workflow, before any other
+        operations on the present object. At the point when called, the dataset is already available in
+        the location given by self.local_dataset. Use this method to prepare datasets or data loaders, for example.
+        """
+        pass
+
+    def create_model(self) -> LightningModule:
+        """
+        This method must create the actual Lightning model that will be trained. It can read out parameters from the
+        container and pass them into the model, for example.
+        """
+        pass
+
+    def get_data_module(self) -> LightningDataModule:
+        """
+        Gets the data that is used for the training, validation, and test steps.
+        This should read a dataset from the self.local_dataset folder or download from a web location.
+        The format of the data is not specified any further.
+        The method must take cross validation into account, and ensure that logic to create training and validation
+        sets takes cross validation with a given number of splits is correctly taken care of.
+        :return: A LightningDataModule
+        """
+        return None  # type: ignore
+
+    def get_inference_data_module(self) -> LightningDataModule:
+        """
+        Gets the data that is used to evaluate the trained model. By default, this returns the value
+        of get_data_module(), but you can override this to get for example full image datasets for
+        segmentation models.
+        This should read a dataset from the self.local_dataset folder or download from a web location.
+        The format of the data is not specified any further.
+        The method must take cross validation into account, and ensure that logic to create training and validation
+        sets takes cross validation with a given number of splits is correctly taken care of.
+        :return: A LightningDataModule
+        """
+        # You can override this if inference uses different data, for example segmentation models use
+        # full images rather than equal sized crops.
+        return self.get_data_module()
+
+    def get_trainer_arguments(self) -> Dict[str, Any]:
+        """
+        Gets additional parameters that will be passed on to the PyTorch Lightning trainer.
+        """
+        return dict()
+
+    def create_report(self) -> None:
+        """
+        This method is called after training and testing has been completed. It can aggregate all files that were
+        written during training and testing, and compile them into some helpful overarching output.
+        The report should be written to self.
+        """
+        pass
+
+    def before_training_on_rank_zero(self) -> None:
+        """
+        A hook that will be called before starting model training, before creating the Lightning Trainer object.
+        In distributed training, this is only run on rank zero. It is executed after the before_training_on_all_ranks
+        hook.
+        """
+        pass
+
+    def before_training_on_all_ranks(self) -> None:
+        """
+        A hook that will be called before starting model training.
+        In distributed training, this hook will be called on all ranks. It is executed before the
+        the before_training_on_rank_zero hook.
+        """
+        pass
+
+    def load_checkpoint_and_modify(self, path_to_checkpoint: Path) -> Dict[str, Any]:
+        """
+        This method is called when a file with weights for network initialization is supplied at container level,
+        in the self.weights_url or self.local_weights_path fields. It can load that file as a Torch checkpoint,
+        and rename parameters.
+
+        By default, uses torch.load to read and return the state dict from the checkpoint file, and does no modification
+        of the checkpoint file.
+
+        Overloading this function:
+        When weights_url or local_weights_path is set, the file downloaded may not be in the exact
+        format expected by the model's load_state_dict() - for example, pretrained Imagenet weights for networks
+        may have mismatched layer names in different implementations.
+        In such cases, you can overload this function to extract the state dict from the checkpoint.
+
+        NOTE: The model checkpoint will be loaded using the torch function load_state_dict() with argument strict=False,
+        so extra care needs to be taken to check that the state dict is valid.
+        Check the logs for warnings related to missing and unexpected keys.
+        See https://pytorch.org/tutorials/beginner/saving_loading_models.html#warmstarting-model-using-parameters
+        -from-a-different-model
+        for an explanation on why strict=False is useful when loading parameters from other models.
+        :param path_to_checkpoint: Path to the checkpoint file.
+        :return: Dictionary with model and optimizer state dicts. The dict should have at least the following keys:
+        1. Key ModelAndInfo.MODEL_STATE_DICT_KEY and value set to the model state dict.
+        2. Key ModelAndInfo.EPOCH_KEY and value set to the checkpoint epoch.
+        Other (optional) entries corresponding to keys ModelAndInfo.OPTIMIZER_STATE_DICT_KEY and
+        ModelAndInfo.MEAN_TEACHER_STATE_DICT_KEY are also supported.
+        """
+        return load_checkpoint(path_to_checkpoint=path_to_checkpoint, use_gpu=self.use_gpu)
+
+    # The code from here on does not need to be modified.
+
+    @property
+    def model(self) -> LightningModule:
+        """
+        Returns the PyTorch Lightning module that the present container object manages.
+        :return: A PyTorch Lightning module
+        """
+        if self._model is None:
+            raise ValueError("No Lightning module has been set yet.")
+        return self._model
+
+    def create_lightning_module_and_store(self) -> None:
+        """
+        Creates the Lightning model by calling `create_lightning_module` and stores it in the `lightning_module`
+        property.
+        """
+        self._model = self.create_model()
+        if isinstance(self._model, LightningModuleWithOptimizer):
+            self._model._optimizer_params = create_from_matching_params(self, OptimizerParams)
+            self._model._trainer_params = create_from_matching_params(self, TrainerParams)
+
+    def __str__(self) -> str:
+        """Returns a string describing the present object, as a list of key: value strings."""
+        arguments_str = "\nContainer:\n"
+        # Avoid callable params, the bindings that are printed out can be humongous.
+        # Avoid dataframes
+        skip_params = {name for name, value in self.param.params().items()
+                       if isinstance(value, (param.Callable, param.DataFrame))}
+        for key, value in self.param.get_param_values():
+            if key not in skip_params:
+                arguments_str += f"\t{key:40}: {value}\n"
+        # Print out all other separate vars that are not under the guidance of the params library,
+        # skipping the two that are introduced by params
+        skip_vars = {"param", "initialized"}
+        for key, value in vars(self).items():
+            if key not in skip_vars and key[0] != "_":
+                arguments_str += f"\t{key:40}: {value}\n"
+        return arguments_str
--- a/InnerEye/ML/lightning_helpers.py
+++ b/InnerEye/ML/lightning_helpers.py
@ -8,31 +8,11 @@ from pathlib import Path
 import torch

 from InnerEye.ML.lightning_base import InnerEyeLightning
-from InnerEye.ML.lightning_models import ScalarLightning, SegmentationLightning
+from InnerEye.ML.lightning_models import create_lightning_model
 from InnerEye.ML.model_config_base import ModelConfigBase
 from InnerEye.ML.models.architectures.base_model import BaseSegmentationModel


-def create_lightning_model(config: ModelConfigBase, set_optimizer_and_scheduler: bool = True) -> InnerEyeLightning:
-    """
-    Creates a PyTorch Lightning model that matches the provided InnerEye model configuration object.
-    The `optimizer` and `l_rate_scheduler` object of the Lightning model will also be populated.
-    :param set_optimizer_and_scheduler: If True (default), initialize the optimizer and LR scheduler of the model.
-    If False, skip that step (this is only meant to be used for unit tests.)
-    :param config: An InnerEye model configuration object
-    :return: A PyTorch Lightning model object.
-    """
-    if config.is_segmentation_model:
-        model: InnerEyeLightning = SegmentationLightning(config)
-    elif config.is_scalar_model:
-        model = ScalarLightning(config)
-    else:
-        raise NotImplementedError(f"Don't know how to handle config of type {type(config)}")
-    if set_optimizer_and_scheduler:
-        model.set_optimizer_and_scheduler(config)
-    return model
-
-
 def load_from_lightning_checkpoint(config: ModelConfigBase, checkpoint_path: Path) -> InnerEyeLightning:
    """
    Reads a PyTorch model from a checkpoint. First, a PyTorch Lightning model is created matching the InnerEye
--- a/InnerEye/ML/lightning_loggers.py
+++ b/InnerEye/ML/lightning_loggers.py
@ -2,12 +2,13 @@
 #  Copyright (c) Microsoft Corporation. All rights reserved.
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
-from typing import Any, Dict, Iterable, Optional
+from typing import Any, Dict, Iterable, List, Optional

 from pytorch_lightning.loggers import LightningLoggerBase
 from pytorch_lightning.utilities import rank_zero_only

 from InnerEye.Azure.azure_util import RUN_CONTEXT, is_offline_run_context
+from InnerEye.Common.metrics_constants import TRAIN_PREFIX, VALIDATION_PREFIX
 from InnerEye.Common.type_annotations import DictStrFloat


@ -21,6 +22,9 @@ class StoringLogger(LightningLoggerBase):
        super().__init__()
        self.results: Dict[int, DictStrFloat] = {}
        self.hyperparams: Any = None
+        # Fields to store diagnostics for unit testing
+        self.train_diagnostics: List[Any] = []
+        self.val_diagnostics: List[Any] = []

    @rank_zero_only
    def log_metrics(self, metrics: DictStrFloat, step: Optional[int] = None) -> None:
@ -44,7 +48,7 @@ class StoringLogger(LightningLoggerBase):
        self.hyperparams = params

    def experiment(self) -> Any:
-        return ""
+        return None

    def name(self) -> Any:
        return ""
@ -93,6 +97,48 @@ class StoringLogger(LightningLoggerBase):
        """
        return {epoch: self.extract_by_prefix(epoch, prefix_filter) for epoch in self.epochs}

+    def get_metric(self, is_training: bool, metric_type: str) -> List[float]:
+        """
+        Gets a scalar metric out of either the list of training or the list of validation results. This returns
+        the value that a specific metric attains in all of the epochs.
+        :param is_training: If True, read metrics that have a "train/" prefix, otherwise those that have a "val/"
+        prefix.
+        :param metric_type: The metric to extract.
+        :return: A list of floating point numbers, with one entry per entry in the the training or validation results.
+        """
+        full_metric_name = (TRAIN_PREFIX if is_training else VALIDATION_PREFIX) + metric_type
+        return [self.results[epoch][full_metric_name] for epoch in self.epochs]
+
+    def get_train_metric(self, metric_type: str) -> List[float]:
+        """
+        Gets a scalar metric from the list of training results. This returns
+        the value that a specific metric attains in all of the epochs.
+        :param metric_type: The metric to extract.
+        :return: A list of floating point numbers, with one entry per entry in the the training results.
+        """
+        return self.get_metric(is_training=True, metric_type=metric_type)
+
+    def get_val_metric(self, metric_type: str) -> List[float]:
+        """
+        Gets a scalar metric from the list of validation results. This returns
+        the value that a specific metric attains in all of the epochs.
+        :param metric_type: The metric to extract.
+        :return: A list of floating point numbers, with one entry per entry in the the validation results.
+        """
+        return self.get_metric(is_training=False, metric_type=metric_type)
+
+    def train_results_per_epoch(self) -> List[DictStrFloat]:
+        """
+        Gets the full set of training metrics that the logger stores, as a list of dictionaries per epoch.
+        """
+        return list(self.to_metrics_dicts(prefix_filter=TRAIN_PREFIX).values())
+
+    def val_results_per_epoch(self) -> List[DictStrFloat]:
+        """
+        Gets the full set of validation metrics that the logger stores, as a list of dictionaries per epoch.
+        """
+        return list(self.to_metrics_dicts(prefix_filter=VALIDATION_PREFIX).values())
+

 class AzureMLLogger(LightningLoggerBase):
    """
@ -115,7 +161,7 @@ class AzureMLLogger(LightningLoggerBase):
        pass

    def experiment(self) -> Any:
-        return ""
+        return None

    def name(self) -> Any:
        return ""
--- a/InnerEye/ML/lightning_models.py
+++ b/InnerEye/ML/lightning_models.py
@ -21,6 +21,7 @@ from InnerEye.ML.lightning_metrics import Accuracy05, AccuracyAtOptimalThreshold
    OptimalThreshold, ScalarMetricsBase
 from InnerEye.ML.metrics import compute_dice_across_patches
 from InnerEye.ML.metrics_dict import DataframeLogger, MetricsDict, SequenceMetricsDict
+from InnerEye.ML.model_config_base import ModelConfigBase
 from InnerEye.ML.scalar_config import ScalarModelBase
 from InnerEye.ML.sequence_config import SequenceModelBase
 from InnerEye.ML.utils import image_util, metrics_util, model_util
@ -129,9 +130,9 @@ class SegmentationLightning(InnerEyeLightning):
        if isinstance(center_indices, torch.Tensor):
            center_indices = center_indices.cpu().numpy()
        if is_training:
-            self.train_diagnostics.append(center_indices)
+            self.storing_logger.train_diagnostics.append(center_indices)
        else:
-            self.val_diagnostics.append(center_indices)
+            self.storing_logger.val_diagnostics.append(center_indices)
        # if self.train_val_params.in_training_mode:
        #     # store the sample train patch from this epoch for visualization
        #     if batch_index == self.example_to_save and self.config.store_dataset_sample:
@ -380,3 +381,23 @@ def transfer_batch_to_device(batch: Any, device: torch.device) -> Any:
        return batch
    else:
        return move_data_to_device(batch, device)
+
+
+def create_lightning_model(config: ModelConfigBase, set_optimizer_and_scheduler: bool = True) -> InnerEyeLightning:
+    """
+    Creates a PyTorch Lightning model that matches the provided InnerEye model configuration object.
+    The `optimizer` and `l_rate_scheduler` object of the Lightning model will also be populated.
+    :param set_optimizer_and_scheduler: If True (default), initialize the optimizer and LR scheduler of the model.
+    If False, skip that step (this is only meant to be used for unit tests.)
+    :param config: An InnerEye model configuration object
+    :return: A PyTorch Lightning model object.
+    """
+    if config.is_segmentation_model:
+        model: InnerEyeLightning = SegmentationLightning(config)
+    elif config.is_scalar_model:
+        model = ScalarLightning(config)
+    else:
+        raise NotImplementedError(f"Don't know how to handle config of type {type(config)}")
+    if set_optimizer_and_scheduler:
+        model.set_optimizer_and_scheduler(config)
+    return model
--- a/InnerEye/ML/model_training.py
+++ b/InnerEye/ML/model_training.py
@ -8,7 +8,7 @@ import subprocess
 import sys
 from pathlib import Path
 from time import sleep
-from typing import Optional, Tuple, TypeVar
+from typing import Any, Dict, Optional, Tuple, TypeVar

 import numpy as np
 import torch
@ -18,27 +18,18 @@ from pytorch_lightning.loggers import TensorBoardLogger
 from pytorch_lightning.plugins import DDPPlugin
 from pytorch_lightning.utilities.exceptions import MisconfigurationException

-from InnerEye.Azure.azure_util import RUN_CONTEXT
-from InnerEye.Common.common_util import SUBJECT_METRICS_FILE_NAME, logging_section
-from InnerEye.Common.metrics_constants import TRAIN_PREFIX, VALIDATION_PREFIX
+from InnerEye.Azure.azure_util import RUN_CONTEXT, is_offline_run_context
+from InnerEye.Common.common_util import SUBJECT_METRICS_FILE_NAME, change_working_directory
 from InnerEye.Common.resource_monitor import ResourceMonitor
 from InnerEye.ML.common import ModelExecutionMode, RECOVERY_CHECKPOINT_FILE_NAME, cleanup_checkpoint_folder
-from InnerEye.ML.config import SegmentationModelBase
-from InnerEye.ML.deep_learning_config import VISUALIZATION_FOLDER
-from InnerEye.ML.lightning_base import TrainingAndValidationDataLightning
-from InnerEye.ML.lightning_helpers import create_lightning_model
+from InnerEye.ML.deep_learning_config import ARGS_TXT, VISUALIZATION_FOLDER
+from InnerEye.ML.lightning_base import InnerEyeContainer, InnerEyeLightning
+from InnerEye.ML.lightning_container import LightningContainer
 from InnerEye.ML.lightning_loggers import AzureMLLogger, StoringLogger
 from InnerEye.ML.lightning_models import SUBJECT_OUTPUT_PER_RANK_PREFIX, ScalarLightning, \
    get_subject_output_file_per_rank
-from InnerEye.ML.model_config_base import ModelConfigBase
-from InnerEye.ML.utils import ml_util
 from InnerEye.ML.utils.checkpoint_handling import CheckpointHandler
-from InnerEye.ML.utils.model_util import generate_and_print_model_summary
-from InnerEye.ML.utils.training_util import ModelTrainingResults
-from InnerEye.ML.visualizers.patch_sampling import visualize_random_crops_for_dataset

-MAX_ITEM_LOAD_TIME_SEC = 0.5
-MAX_LOAD_TIME_WARNINGS = 3
 TEMP_PREFIX = "temp/"

 T = TypeVar('T')
@ -68,23 +59,38 @@ def upload_output_file_as_temp(file_path: Path, outputs_folder: Path) -> None:
    upload_name = TEMP_PREFIX + str(file_path.relative_to(outputs_folder))
    RUN_CONTEXT.upload_file(upload_name, path_or_stream=str(file_path))

-def create_lightning_trainer(config: ModelConfigBase,
+
+def write_args_file(config: Any, outputs_folder: Path) -> None:
+    """
+    Writes the given config to disk in plain text in the default output folder.
+    """
+    output = str(config)
+    outputs_folder.mkdir(exist_ok=True, parents=True)
+    dst = outputs_folder / ARGS_TXT
+    dst.write_text(output)
+    logging.info(output)
+
+
+def create_lightning_trainer(container: LightningContainer,
                             resume_from_checkpoint: Optional[Path] = None,
-                             num_nodes: int = 1) -> Tuple[Trainer, StoringLogger]:
+                             num_nodes: int = 1,
+                             **kwargs: Dict[str, Any]) -> \
+        Tuple[Trainer, Optional[StoringLogger]]:
    """
    Creates a Pytorch Lightning Trainer object for the given model configuration. It creates checkpoint handlers
    and loggers. That includes a diagnostic logger for use in unit tests, that is also returned as the second
    return value.
-    :param config: The model configuration.
+    :param container: The container with model and data.
    :param resume_from_checkpoint: If provided, training resumes from this checkpoint point.
    :param num_nodes: The number of nodes to use in distributed training.
+    :param kwargs: Any additional keyowrd arguments will be passed to the constructor of Trainer.
    :return: A tuple [Trainer object, diagnostic logger]
    """
    # For now, stick with the legacy behaviour of always saving only the last epoch checkpoint. For large segmentation
    # models, this still appears to be the best way of choosing them because validation loss on the relatively small
    # training patches is not stable enough. Going by the validation loss somehow works for the Prostate model, but
    # not for the HeadAndNeck model.
-    best_checkpoint_callback = ModelCheckpoint(dirpath=str(config.checkpoint_folder),
+    best_checkpoint_callback = ModelCheckpoint(dirpath=str(container.checkpoint_folder),
                                               # filename=BEST_CHECKPOINT_FILE_NAME,
                                               # monitor=f"{VALIDATION_PREFIX}{MetricType.LOSS.value}",
                                               # save_top_k=1,
@ -93,144 +99,141 @@ def create_lightning_trainer(config: ModelConfigBase,
    # Store 1 recovery checkpoint every recovery_checkpoint_save_interval epochs. Due to a bug in Lightning, this
    # will still write alternate files recovery.ckpt and recovery-v0.ckpt, which are cleaned up later in
    # cleanup_checkpoint_folder
-    recovery_checkpoint_callback = ModelCheckpoint(dirpath=str(config.checkpoint_folder),
+    recovery_checkpoint_callback = ModelCheckpoint(dirpath=str(container.checkpoint_folder),
                                                   filename=RECOVERY_CHECKPOINT_FILE_NAME,
-                                                   period=config.recovery_checkpoint_save_interval
+                                                   period=container.recovery_checkpoint_save_interval
                                                   )

-    num_gpus = torch.cuda.device_count() if config.use_gpu else 0
+    num_gpus = torch.cuda.device_count() if container.use_gpu else 0
    logging.info(f"Number of available GPUs: {num_gpus}")
-    if config.max_num_gpus >= 0 and config.max_num_gpus < num_gpus:
-        num_gpus = config.max_num_gpus
+    if 0 <= container.max_num_gpus < num_gpus:
+        num_gpus = container.max_num_gpus
        logging.info(f"Restricting the number of GPUs to {num_gpus}")
    # Accelerator should be "ddp" when running large models in AzureML (when using DDP_spawn, we get out of GPU memory).
    # For unit tests, only "ddp_spawn" works
    accelerator = "ddp" if num_gpus * num_nodes > 1 else None
    plugins = [InnerEyeDDPPlugin(num_nodes=num_nodes, sync_batchnorm=True)] if num_gpus * num_nodes > 1 else None
    logging.info(f"Using {num_gpus} GPUs with accelerator '{accelerator}'")
-    storing_logger = StoringLogger()
-    tensorboard_logger = TensorBoardLogger(save_dir=str(config.logs_folder), name="Lightning", version="")
-    loggers = [storing_logger, tensorboard_logger, AzureMLLogger()]
-    # This leads to problems with run termination.
-    # if not is_offline_run_context(RUN_CONTEXT):
-    #     mlflow_logger = MLFlowLogger(experiment_name=RUN_CONTEXT.experiment.name,
-    #                                  tracking_uri=RUN_CONTEXT.experiment.workspace.get_mlflow_tracking_uri())
-    #     # The MLFlow logger needs to get its ID from the AzureML run context, otherwise there will be two sets of
-    #     # results for each run, one from native AzureML and one from the MLFlow logger.
-    #     mlflow_logger._run_id = RUN_CONTEXT.id
-    #     loggers.append(mlflow_logger)
+    tensorboard_logger = TensorBoardLogger(save_dir=str(container.logs_folder), name="Lightning", version="")
+    loggers = [tensorboard_logger, AzureMLLogger()]
+    storing_logger: Optional[StoringLogger]
+    if isinstance(container, InnerEyeContainer):
+        storing_logger = StoringLogger()
+        loggers.append(storing_logger)
+    else:
+        storing_logger = None
    # Use 32bit precision when running on CPU. Otherwise, make it depend on use_mixed_precision flag.
-    precision = 32 if num_gpus == 0 else 16 if config.use_mixed_precision else 32
+    precision = 32 if num_gpus == 0 else 16 if container.use_mixed_precision else 32
    # The next two flags control the settings in torch.backends.cudnn.deterministic and torch.backends.cudnn.benchmark
    # https://pytorch.org/docs/stable/notes/randomness.html
    # For the classification models, we observed only a small performance deterioration (increase in 10sec on total
    # training time of 22min) when switching to deterministic.
-    if config.pl_deterministic:
+    if container.pl_deterministic:
        deterministic = True
        benchmark = False
    else:
        deterministic = False
        benchmark = True
-    trainer = Trainer(default_root_dir=str(config.outputs_folder),
+    # Read out additional model-specific args here.
+    # We probably want to keep essential ones like numgpu and logging.
+    trainer = Trainer(default_root_dir=str(container.outputs_folder),
                      deterministic=deterministic,
                      benchmark=benchmark,
                      accelerator=accelerator,
-                      max_epochs=config.num_epochs,
-                      num_sanity_val_steps=config.pl_num_sanity_val_steps,
+                      max_epochs=container.num_epochs,
+                      num_sanity_val_steps=container.pl_num_sanity_val_steps,
                      callbacks=[best_checkpoint_callback, recovery_checkpoint_callback],
                      logger=loggers,
-                      progress_bar_refresh_rate=0,  # Disable the progress bar completely
+                      progress_bar_refresh_rate=container.pl_progress_bar_refresh_rate,
                      num_nodes=num_nodes,
                      gpus=num_gpus,
                      precision=precision,
                      sync_batchnorm=True,
-                      terminate_on_nan=config.detect_anomaly,
+                      terminate_on_nan=container.detect_anomaly,
                      resume_from_checkpoint=str(resume_from_checkpoint) if resume_from_checkpoint else None,
-                      plugins=plugins
-                      )
+                      plugins=plugins,
+                      **kwargs)
    return trainer, storing_logger


-def model_train(config: ModelConfigBase,
-                checkpoint_handler: CheckpointHandler,
-                num_nodes: int = 1) -> ModelTrainingResults:
+def start_resource_monitor(config: LightningContainer) -> ResourceMonitor:
+    # initialize and start GPU monitoring
+    gpu_tensorboard = config.logs_folder / "gpu_utilization"
+    # Result file in CSV format should NOT live in the logs folder, the streaming upload that is
+    # used for this folder might corrupt the file.
+    gpu_csv = config.outputs_folder / "gpu_utilization"
+    gpu_csv.mkdir(parents=True, exist_ok=True)
+    logging.info(f"Starting resource monitor. GPU utilization will be written to Tensorboard in "
+                 f"{gpu_tensorboard}, aggregate metrics to {gpu_csv}")
+    resource_monitor = ResourceMonitor(interval_seconds=config.monitoring_interval_seconds,
+                                       tensorboard_folder=gpu_tensorboard,
+                                       csv_results_folder=gpu_csv)
+    resource_monitor.start()
+    return resource_monitor
+
+
+def model_train(checkpoint_handler: CheckpointHandler,
+                container: LightningContainer,
+                num_nodes: int = 1) -> Tuple[Trainer, Optional[StoringLogger]]:
    """
    The main training loop. It creates the Pytorch model based on the configuration options passed in,
    creates a Pytorch Lightning trainer, and trains the model.
    If a checkpoint was specified, then it loads the checkpoint before resuming training.
-    :param config: The arguments which specify all required information.
    :param checkpoint_handler: Checkpoint handler object to find checkpoint paths for model initialization
    :param num_nodes: The number of nodes to use in distributed training.
+    :param container: A container object that holds the training data in PyTorch Lightning format
+    and the model to train.
+    :return: A tuple of [Trainer, StoringLogger]. Trainer is the Lightning Trainer object that was used for fitting
+    the model. The StoringLogger object is returned when training an InnerEye built-in model, this is None when
+    fitting other models.
    """
    # Get the path to the checkpoint to recover from
    checkpoint_path = checkpoint_handler.get_recovery_path_train()
-    # This reads the dataset file, and possibly sets required pre-processing objects, like one-hot encoder
-    # for categorical features, that need to be available before creating the model.
-    config.read_dataset_if_needed()
+    lightning_model = container.model
+
+    container.before_training_on_all_ranks()
+    resource_monitor: Optional[ResourceMonitor] = None
+    # Execute some bookkeeping tasks only once if running distributed:
+    if is_rank_zero():
+        logging.info(f"Model checkpoints are saved at {container.checkpoint_folder}")
+        container.before_training_on_rank_zero()
+        write_args_file(container.config if isinstance(container, InnerEyeContainer) else container,
+                        outputs_folder=container.outputs_folder)
+        if container.monitoring_interval_seconds > 0:
+            resource_monitor = start_resource_monitor(container)

    # Create the trainer object. Backup the environment variables before doing that, in case we need to run a second
    # training in the unit tests.d
    old_environ = dict(os.environ)
-    seed_everything(config.get_effective_random_seed())
-    trainer, storing_logger = create_lightning_trainer(config, checkpoint_path, num_nodes=num_nodes)
-
+    # Set random seeds just before training. For segmentation models, we have
+    # something that changes the random seed in the before_training_on_rank_zero hook.
+    seed_everything(container.get_effective_random_seed())
+    trainer, storing_logger = create_lightning_trainer(container,
+                                                       checkpoint_path,
+                                                       num_nodes=num_nodes,
+                                                       **container.get_trainer_arguments())
    logging.info(f"GLOBAL_RANK: {os.getenv('GLOBAL_RANK')}, LOCAL_RANK {os.getenv('LOCAL_RANK')}. "
                 f"trainer.global_rank: {trainer.global_rank}")
-    logging.debug("Creating the PyTorch model.")
-    lightning_model = create_lightning_model(config)
-    lightning_model.storing_logger = storing_logger
+    # InnerEye models use this logger for diagnostics
+    if isinstance(lightning_model, InnerEyeLightning):
+        if storing_logger is None:
+            raise ValueError("InnerEye models require the storing_logger for diagnostics")
+        lightning_model.storing_logger = storing_logger

-    resource_monitor = None
-    # Execute some bookkeeping tasks only once if running distributed:
-    if is_rank_zero():
-        config.write_args_file()
-        logging.info(str(config))
-        # Save the dataset files for later use in cross validation analysis
-        config.write_dataset_files()
-        logging.info(f"Model checkpoints are saved at {config.checkpoint_folder}")
-
-        # set the random seed for all libraries
-        ml_util.set_random_seed(config.get_effective_random_seed(), "Patch visualization")
-        # Visualize how patches are sampled for segmentation models. This changes the random generator, but we don't
-        # want training to depend on how many patients we visualized, and hence set the random seed again right after.
-        if isinstance(config, SegmentationModelBase):
-            with logging_section("Visualizing the effect of sampling random crops for training"):
-                visualize_random_crops_for_dataset(config)
-
-        # Print out a detailed breakdown of layers, memory consumption and time.
-        generate_and_print_model_summary(config, lightning_model.model)
-
-        if config.monitoring_interval_seconds > 0:
-            # initialize and start GPU monitoring
-            gpu_tensorboard = config.logs_folder / "gpu_utilization"
-            # Result file in CSV format should NOT live in the logs folder, the streaming upload that is
-            # used for this folder might corrupt the file.
-            gpu_csv = config.outputs_folder / "gpu_utilization"
-            gpu_csv.mkdir(parents=True, exist_ok=True)
-            logging.info(f"Starting resource monitor. GPU utilization will be written to Tensorboard in "
-                         f"{gpu_tensorboard}, aggregate metrics to {gpu_csv}")
-            resource_monitor = ResourceMonitor(interval_seconds=config.monitoring_interval_seconds,
-                                               tensorboard_folder=gpu_tensorboard,
-                                               csv_results_folder=gpu_csv)
-            resource_monitor.start()
-
-    # Training loop
    logging.info("Starting training")
-
-    lightning_data = TrainingAndValidationDataLightning(config)  # type: ignore
-    # When trying to store the config object in the constructor, it does not appear to get stored at all, later
-    # reference of the object simply fail. Hence, have to set explicitly here.
-    lightning_data.config = config
-    trainer.fit(lightning_model, datamodule=lightning_data)
-    trainer.logger.close()  # type: ignore
-    lightning_model.close_all_loggers()
+    # When training models that are not built-in InnerEye models, we have no guarantee that they write
+    # files to the right folder. Best guess is to change the current working directory to where files should go.
+    data_module = container.get_data_module()
+    with change_working_directory(container.outputs_folder):
+        trainer.fit(lightning_model, datamodule=data_module)
+        trainer.logger.close()  # type: ignore
    world_size = getattr(trainer, "world_size", 0)
-    is_azureml_run = not config.is_offline_run
+    is_azureml_run = not is_offline_run_context(RUN_CONTEXT)
    # Per-subject model outputs for regression models are written per rank, and need to be aggregated here.
    # Each thread per rank will come here, and upload its files to the run outputs. Rank 0 will later download them.
    if is_azureml_run and world_size > 1 and isinstance(lightning_model, ScalarLightning):
-        upload_output_file_as_temp(lightning_model.train_subject_outputs_logger.csv_path, config.outputs_folder)
-        upload_output_file_as_temp(lightning_model.val_subject_outputs_logger.csv_path, config.outputs_folder)
+        upload_output_file_as_temp(lightning_model.train_subject_outputs_logger.csv_path, container.outputs_folder)
+        upload_output_file_as_temp(lightning_model.val_subject_outputs_logger.csv_path, container.outputs_folder)
    # DDP will start multiple instances of the runner, one for each GPU. Those should terminate here after training.
    # We can now use the global_rank of the Lightining model, rather than environment variables, because DDP has set
    # all necessary properties.
@ -239,7 +242,7 @@ def model_train(config: ModelConfigBase,
        sys.exit()

    logging.info("Choosing the best checkpoint and removing redundant files.")
-    cleanup_checkpoint_folder(config.checkpoint_folder)
+    cleanup_checkpoint_folder(container.checkpoint_folder)
    # Lightning modifies a ton of environment variables. If we first run training and then the test suite,
    # those environment variables will mislead the training runs in the test suite, and make them crash.
    # Hence, restore the original environment after training.
@ -254,17 +257,9 @@ def model_train(config: ModelConfigBase,
            for rank in range(world_size):
                for mode in [ModelExecutionMode.TRAIN, ModelExecutionMode.VAL]:
                    file = mode.value + "/" + get_subject_output_file_per_rank(rank)
-                    RUN_CONTEXT.download_file(name=TEMP_PREFIX + file, output_file_path=config.outputs_folder / file)
+                    RUN_CONTEXT.download_file(name=TEMP_PREFIX + file, output_file_path=container.outputs_folder / file)
        # Concatenate all temporary file per execution mode
-        aggregate_and_create_subject_metrics_file(config.outputs_folder)
-
-    model_training_results = ModelTrainingResults(
-        train_results_per_epoch=list(storing_logger.to_metrics_dicts(prefix_filter=TRAIN_PREFIX).values()),
-        val_results_per_epoch=list(storing_logger.to_metrics_dicts(prefix_filter=VALIDATION_PREFIX).values()),
-        train_diagnostics=lightning_model.train_diagnostics,
-        val_diagnostics=lightning_model.val_diagnostics,
-        optimal_temperature_scale_values_per_checkpoint_epoch=[]
-    )
+        aggregate_and_create_subject_metrics_file(container.outputs_folder)

    logging.info("Finished training")

@ -272,20 +267,20 @@ def model_train(config: ModelConfigBase,
    # checkpoints correctly.
    checkpoint_handler.additional_training_done()

-    # Upload visualization directory to AML run context to be able to see it
-    # in the Azure UI.
-    if config.max_batch_grad_cam > 0 and config.visualization_folder.exists():
-        RUN_CONTEXT.upload_folder(name=VISUALIZATION_FOLDER, path=str(config.visualization_folder))
+    # Upload visualization directory to AML run context to be able to see it in the Azure UI.
+    if isinstance(container, InnerEyeContainer):
+        if container.config.max_batch_grad_cam > 0 and container.visualization_folder.exists():
+            RUN_CONTEXT.upload_folder(name=VISUALIZATION_FOLDER, path=str(container.visualization_folder))

    if resource_monitor:
        logging.info("Shutting down the resource monitor process.")
-        if not config.is_offline_run:
+        if is_azureml_run:
            for gpu_name, metrics_per_gpu in resource_monitor.read_aggregate_metrics().items():
                # Log as a table, with GPU being the first column
                RUN_CONTEXT.log_row("GPU utilization", GPU=gpu_name, **metrics_per_gpu)
        resource_monitor.kill()

-    return model_training_results
+    return trainer, storing_logger


 def aggregate_and_create_subject_metrics_file(outputs_folder: Path) -> None:
@ -298,16 +293,15 @@ def aggregate_and_create_subject_metrics_file(outputs_folder: Path) -> None:
    for mode in [ModelExecutionMode.TRAIN, ModelExecutionMode.VAL]:
        temp_files = (outputs_folder / mode.value).rglob(SUBJECT_OUTPUT_PER_RANK_PREFIX + "*")
        result_file = outputs_folder / mode.value / SUBJECT_METRICS_FILE_NAME
-        result_file = result_file.open("a")
-        for i, file in enumerate(temp_files):
-            temp_file_contents = file.read_text()
-            if i == 0:
-                # Copy the first file as-is, including the first line with the column headers
-                result_file.write(temp_file_contents)
-            else:
-                # For all files but the first one, cut off the header line.
-                result_file.write(os.linesep + os.linesep.join(temp_file_contents.splitlines()[1:]))
-        result_file.close()
+        with result_file.open("a") as f:
+            for i, file in enumerate(temp_files):
+                temp_file_contents = file.read_text()
+                if i == 0:
+                    # Copy the first file as-is, including the first line with the column headers
+                    f.write(temp_file_contents)
+                else:
+                    # For all files but the first one, cut off the header line.
+                    f.write(os.linesep + os.linesep.join(temp_file_contents.splitlines()[1:]))


 class InnerEyeDDPPlugin(DDPPlugin):
--- a/InnerEye/ML/normalize_and_visualize_dataset.py
+++ b/InnerEye/ML/normalize_and_visualize_dataset.py
@ -21,7 +21,7 @@ from InnerEye.ML.dataset.full_image_dataset import load_dataset_sources
 from InnerEye.ML.deep_learning_config import ARGS_TXT
 from InnerEye.ML.photometric_normalization import PhotometricNormalization
 from InnerEye.ML.run_ml import MLRunner
-from InnerEye.ML.utils.config_util import ModelConfigLoader
+from InnerEye.ML.utils.config_loader import ModelConfigLoader
 from InnerEye.ML.utils.io_util import load_images_from_dataset_source


@ -73,7 +73,7 @@ def main(yaml_file_path: Path) -> None:
    In addition, the arguments '--image_channel' and '--gt_channel' must be specified (see below).
    """
    config, runner_config, args = get_configs(SegmentationModelBase(should_validate=False), yaml_file_path)
-    local_dataset = MLRunner(config, runner_config).mount_or_download_dataset()
+    local_dataset = MLRunner(config, azure_config=runner_config).mount_or_download_dataset()
    assert local_dataset is not None
    dataframe = pd.read_csv(local_dataset / DATASET_CSV_FILE_NAME)
    normalizer_config = NormalizeAndVisualizeConfig(**args)
--- a/InnerEye/ML/pipelines/inference.py
+++ b/InnerEye/ML/pipelines/inference.py
@ -449,7 +449,7 @@ class InferenceBatch(CTImagesMaskedBatch):

    @inbatch_parallel(init='indices', post='_post_custom_components', target='threads')
    def set_component(self, batch_idx: int, component: InferenceBatch.Components, data: np.ndarray) \
-            -> Dict[InferenceBatch.Components, Any]:
+            -> Dict[str, Any]:
        logging.debug("Updated data in pipeline component: {}, for batch: {}.".format(component.value, batch_idx))
        return {
            component.value: {'type': component.value, 'data': data}
--- a/InnerEye/ML/run_ml.py
+++ b/InnerEye/ML/run_ml.py
@ -16,6 +16,9 @@ from azureml._restclient.constants import RunStatus
 from azureml.core import Environment, Run
 from azureml.core.model import Model
 from azureml.data import FileDataset
+from pytorch_lightning import LightningModule, Trainer, seed_everything
+from pytorch_lightning.utilities.cloud_io import load as pl_load
+from torch.utils.data import DataLoader

 from InnerEye.Azure import azure_util
 from InnerEye.Azure.azure_config import AzureConfig
@ -23,30 +26,33 @@ from InnerEye.Azure.azure_runner import ENVIRONMENT_VERSION, INPUT_DATA_KEY, get
 from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, \
    DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, EFFECTIVE_RANDOM_SEED_KEY_NAME, IS_ENSEMBLE_KEY_NAME, \
    MODEL_ID_KEY_NAME, PARENT_RUN_CONTEXT, PARENT_RUN_ID_KEY_NAME, RUN_CONTEXT, RUN_RECOVERY_FROM_ID_KEY_NAME, \
-    RUN_RECOVERY_ID_KEY_NAME, create_run_recovery_id, get_results_blob_path, merge_conda_files
+    RUN_RECOVERY_ID_KEY_NAME, create_run_recovery_id, is_offline_run_context, \
+    merge_conda_files
 from InnerEye.Common import fixed_paths
-from InnerEye.Common.build_config import ExperimentResultLocation, build_information_to_dot_net_json_file
 from InnerEye.Common.common_util import BASELINE_COMPARISONS_FOLDER, BASELINE_WILCOXON_RESULTS_FILE, \
-    CROSSVAL_RESULTS_FOLDER, ENSEMBLE_SPLIT_NAME, FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, ModelProcessing, \
+    CROSSVAL_RESULTS_FOLDER, ENSEMBLE_SPLIT_NAME, FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, \
+    ModelProcessing, \
    OTHER_RUNS_SUBDIR_NAME, SCATTERPLOTS_SUBDIR_NAME, SUBJECT_METRICS_FILE_NAME, \
-    get_best_epoch_results_path, is_windows, logging_section, print_exception, remove_file_or_directory
-from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME, PYTHON_ENVIRONMENT_NAME
+    change_working_directory, get_best_epoch_results_path, is_windows, logging_section, logging_to_file, \
+    print_exception, remove_file_or_directory
+from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME, LOG_FILE_NAME, PYTHON_ENVIRONMENT_NAME
 from InnerEye.ML.common import ModelExecutionMode
 from InnerEye.ML.config import SegmentationModelBase
-from InnerEye.ML.deep_learning_config import CHECKPOINT_FOLDER, FINAL_ENSEMBLE_MODEL_FOLDER, FINAL_MODEL_FOLDER, \
-    ModelCategory, MultiprocessingStartMethod
+from InnerEye.ML.deep_learning_config import CHECKPOINT_FOLDER, DeepLearningConfig, FINAL_ENSEMBLE_MODEL_FOLDER, \
+    FINAL_MODEL_FOLDER, ModelCategory, MultiprocessingStartMethod
+from InnerEye.ML.lightning_base import InnerEyeContainer
+from InnerEye.ML.lightning_container import InnerEyeInference, LightningContainer
 from InnerEye.ML.metrics import InferenceMetrics, InferenceMetricsForSegmentation
 from InnerEye.ML.model_config_base import ModelConfigBase
 from InnerEye.ML.model_inference_config import ModelInferenceConfig
 from InnerEye.ML.model_testing import model_test
-from InnerEye.ML.model_training import model_train
-from InnerEye.ML.reports.notebook_report import get_ipynb_report_name, generate_classification_crossval_notebook, \
+from InnerEye.ML.model_training import create_lightning_trainer, model_train
+from InnerEye.ML.reports.notebook_report import generate_classification_crossval_notebook, \
    generate_classification_multilabel_notebook, generate_classification_notebook, generate_segmentation_notebook, \
-    reports_folder
+    get_ipynb_report_name, reports_folder
 from InnerEye.ML.runner import ModelDeploymentHookSignature, PostCrossValidationHookSignature, get_all_environment_files
 from InnerEye.ML.scalar_config import ScalarModelBase
 from InnerEye.ML.sequence_config import SequenceModelBase
-from InnerEye.ML.utils import ml_util
 from InnerEye.ML.utils.checkpoint_handling import CheckpointHandler
 from InnerEye.ML.visualizers import activation_maps
 from InnerEye.ML.visualizers.plot_cross_validation import \
@ -78,7 +84,8 @@ def download_dataset(azure_dataset_id: str,
    contains a dataset csv file, no download is started.
    :param azure_dataset_id: The name of a dataset that is registered in the AzureML workspace.
    :param target_folder: The folder in which to download the dataset from Azure.
-    :param dataset_csv: Name of the csv file describing the dataset.
+    :param dataset_csv: Name of the csv file describing the dataset. This is only used to check if the dataset has been
+    downloaded already.
    :param azure_config: All Azure-related configuration options.
    :return: A path on the local machine that contains the dataset.
    """
@ -88,11 +95,18 @@ def download_dataset(azure_dataset_id: str,
        raise ValueError(f"Expected to get a FileDataset, but got {type(azure_dataset)}")
    # The downloaded dataset may already exist from a previous run.
    expected_dataset_path = target_folder / azure_dataset_id
-    expected_dataset_file = expected_dataset_path / dataset_csv
    logging.info(f"Model training will use dataset '{azure_dataset_id}' in Azure.")
-    if expected_dataset_path.is_dir() and expected_dataset_file.is_file():
-        logging.info(f"The dataset appears to be downloaded already in {expected_dataset_path}. Skipping.")
-        return expected_dataset_path
+    if expected_dataset_path.is_dir():
+        if dataset_csv:
+            if (expected_dataset_path / dataset_csv).is_file():
+                logging.info(f"The file {dataset_csv} is already downloaded in {expected_dataset_path}. Skipping.")
+                return expected_dataset_path
+        else:
+            existing_files = sum(1 for _ in expected_dataset_path.rglob("*"))
+            if existing_files > 1:
+                logging.info(f"There are already {existing_files} files in {expected_dataset_path}. Skipping.")
+                return expected_dataset_path
+
    logging.info("Starting to download the dataset - WARNING, this could take very long!")
    with logging_section("Downloading dataset"):
        t0 = time.perf_counter()
@ -121,15 +135,20 @@ def log_metrics(val_metrics: Optional[InferenceMetricsForSegmentation],
 class MLRunner:

    def __init__(self,
-                 model_config: ModelConfigBase,
+                 model_config: Optional[DeepLearningConfig] = None,
+                 container: Optional[LightningContainer] = None,
                 azure_config: Optional[AzureConfig] = None,
                 project_root: Optional[Path] = None,
                 post_cross_validation_hook: Optional[PostCrossValidationHookSignature] = None,
-                 model_deployment_hook: Optional[ModelDeploymentHookSignature] = None) -> None:
+                 model_deployment_hook: Optional[ModelDeploymentHookSignature] = None,
+                 output_subfolder: str = "") -> None:
        """
        Driver class to run a ML experiment. Note that the project root argument MUST be supplied when using InnerEye
        as a package!
-        :param model_config: Model related configurations
+        :param model_config: If None, run the training as per the `container` argument (bring-your-own-model). If not
+        None, this is the model configuration for a built-in InnerEye model.
+        :param container: The LightningContainer object to use for training. If None, assume that the training is
+        for a built-in InnerEye model.
        :param azure_config: Azure related configurations
        :param project_root: Project root. This should only be omitted if calling run_ml from the test suite. Supplying
        it is crucial when using InnerEye as a package or submodule!
@ -138,50 +157,112 @@ class MLRunner:
        :param model_deployment_hook: an optional function for deploying a model in an application-specific way.
        If present, it should take a model config (SegmentationModelBase), an AzureConfig, and an AzureML
        Model as arguments, and return an optional Path and a further object of any type.
+        :param output_subfolder: If provided, the output folder structure will have an additional subfolder,
+        when running outside AzureML.
        """
+        if model_config is not None and container is not None:
+            raise ValueError("Only one of the two arguments 'model_config', 'container' must be provided.")
        self.model_config = model_config
+        if container is None:
+            assert isinstance(model_config, ModelConfigBase), \
+                "When using a built-in InnerEye model, the configuration should be an instance of ModelConfigBase"
+            container = InnerEyeContainer(model_config)
+        self.container = container
        self.azure_config: AzureConfig = azure_config or AzureConfig()
        self.project_root: Path = project_root or fixed_paths.repository_root_directory()
        self.post_cross_validation_hook = post_cross_validation_hook
        self.model_deployment_hook = model_deployment_hook
+        self.output_subfolder = output_subfolder
+        self._has_setup_run = False
+
+    def setup(self, use_mount_or_download_dataset: bool = True) -> None:
+        """
+        If the present object is using one of the InnerEye built-in models, create a (fake) container for it
+        and call the setup method. It sets the random seeds, and then creates the actual Lightning modules.
+        :param use_mount_or_download_dataset: If True, try to download or mount the dataset that is used by the model.
+        If False, assume that the dataset is already available (this should only be used for unit tests).
+        """
+        if self._has_setup_run:
+            return
+        if (not self.azure_config.only_register_model) and use_mount_or_download_dataset:
+            # Set local_dataset to the mounted path specified in azure_runner.py, if any, or download it if that fails
+            # and config.local_dataset was not already set.
+            # This must happen before container setup because that could already read datasets.
+            self.container.local_dataset = self.mount_or_download_dataset()
+        # Ensure that we use fixed seeds before initializing the PyTorch models
+        seed_everything(self.container.get_effective_random_seed())
+        # Creating the folder structure must happen before the LightningModule is created, because the output
+        # parameters of the container will be copied into the module.
+        if self.output_subfolder:
+            # This codepath is only executed for cross validation runs outside AzureML: The folder structure
+            # uses an existing folder structure set by the caller, and just a subfolder is added.
+            self.container.file_system_config = self.container.file_system_config.add_subfolder(self.output_subfolder)
+        else:
+            self.container.create_filesystem(self.project_root)
+        # A lot of the code for the built-in InnerEye models expects the output paths directly in the config files.
+        if isinstance(self.container, InnerEyeContainer):
+            self.container.config.local_dataset = self.container.local_dataset
+            self.container.config.file_system_config = self.container.file_system_config
+        self.container.setup()
+        self.container.create_lightning_module_and_store()
+        self._has_setup_run = True
+
+    @property
+    def is_offline_run(self) -> bool:
+        """
+        Returns True if the present run is outside of AzureML, and False if it is inside of AzureML.
+        :return:
+        """
+        return is_offline_run_context(RUN_CONTEXT)
+
+    @property
+    def innereye_config(self) -> DeepLearningConfig:
+        """
+        Gets the model configuration object for all built-in InnerEye models. Raises an exception if the present
+        object trains a LightningContainer that is not a built-in InnerEye model.
+        """
+        if self.model_config is None or not isinstance(self.model_config, DeepLearningConfig):
+            raise ValueError("This property should only be used with built-in InnerEye models, but model "
+                             f"configuration is of type {type(self.model_config)}")
+        return self.model_config
+
+    def start_logging_to_file(self) -> None:
+        if self.container is None:
+            self.setup()
+        logging_to_file(self.container.logs_folder / LOG_FILE_NAME)

    def is_offline_cross_val_parent_run(self) -> bool:
        """
        Returns true if the current run is an offline run with cross validation splits > 0
        and cross_validation_split_index == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX (ie: a parent)
        """
-        return self.model_config.cross_validation_split_index == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX and \
-               self.model_config.perform_cross_validation and self.model_config.is_offline_run
+        return self.container.cross_validation_split_index == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX and \
+               self.container.perform_cross_validation and self.is_offline_run

    def spawn_offline_cross_val_classification_child_runs(self) -> None:
        """
        Trains and Tests k models based on their respective data splits sequentially.
        Stores the results on the Validation set to the outputs directory of the parent run.
        """
-        _config = self.model_config
-        assert isinstance(_config, ScalarModelBase)
-        parent_run_file_system = _config.file_system_config
+        assert isinstance(self.innereye_config, ScalarModelBase)

        def _spawn_run(cross_val_split_index: int) -> None:
-            split_model_config = copy.deepcopy(_config)
-            assert isinstance(split_model_config, ScalarModelBase)
-            split_model_config.cross_validation_split_index = cross_val_split_index
-
-            _local_split_folder_name = str(cross_val_split_index)
-            split_model_config.file_system_config = parent_run_file_system.add_subfolder(_local_split_folder_name)
-
+            split_config = copy.deepcopy(self.innereye_config)
+            split_config.cross_validation_split_index = cross_val_split_index
            logging.info(f"Running model train and test on cross validation split: {cross_val_split_index}")
-            split_ml_runner = MLRunner(model_config=split_model_config,
+            split_ml_runner = MLRunner(model_config=split_config,
+                                       container=None,
                                       azure_config=self.azure_config,
                                       project_root=self.project_root,
                                       post_cross_validation_hook=self.post_cross_validation_hook,
-                                       model_deployment_hook=self.model_deployment_hook)
+                                       model_deployment_hook=self.model_deployment_hook,
+                                       output_subfolder=str(cross_val_split_index))
            split_ml_runner.run()

-        for i in range(_config.number_of_cross_validation_splits):
+        for i in range(self.innereye_config.number_of_cross_validation_splits):
            _spawn_run(i)

-        config_and_files = get_config_and_results_for_offline_runs(self.model_config)
+        config_and_files = get_config_and_results_for_offline_runs(self.innereye_config)
        plot_cross_validation_from_files(config_and_files, Path(config_and_files.config.outputs_directory))

    def set_run_tags_from_parent(self) -> None:
@ -208,8 +289,8 @@ class MLRunner:
        ]
        new_tags = {tag: run_tags_parent.get(tag, "") for tag in tags_to_copy}
        new_tags[RUN_RECOVERY_ID_KEY_NAME] = create_run_recovery_id(run=RUN_CONTEXT)
-        new_tags[CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY] = str(self.model_config.cross_validation_split_index)
-        new_tags[EFFECTIVE_RANDOM_SEED_KEY_NAME] = str(self.model_config.get_effective_random_seed())
+        new_tags[CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY] = str(self.container.cross_validation_split_index)
+        new_tags[EFFECTIVE_RANDOM_SEED_KEY_NAME] = str(self.container.get_effective_random_seed())
        RUN_CONTEXT.set_tags(new_tags)

    def run(self) -> None:
@ -217,66 +298,113 @@ class MLRunner:
        Driver function to run a ML experiment. If an offline cross validation run is requested, then
        this function is recursively called for each cross validation split.
        """
+        self.setup()
        if self.is_offline_cross_val_parent_run():
-            if self.model_config.is_segmentation_model:
+            if self.innereye_config.is_segmentation_model:
                raise NotImplementedError("Offline cross validation is only supported for classification models.")
            self.spawn_offline_cross_val_classification_child_runs()
            return

        # Get the AzureML context in which the script is running
-        if not self.model_config.is_offline_run and PARENT_RUN_CONTEXT is not None:
+        if not self.is_offline_run and PARENT_RUN_CONTEXT is not None:
            logging.info("Setting tags from parent run.")
            self.set_run_tags_from_parent()

-        self.save_build_info_for_dotnet_consumers()
-
        # Set data loader start method
        self.set_multiprocessing_start_method()

        # configure recovery container if provided
-        checkpoint_handler = CheckpointHandler(model_config=self.model_config,
+        checkpoint_handler = CheckpointHandler(container=self.container,
                                               azure_config=self.azure_config,
                                               project_root=self.project_root,
                                               run_context=RUN_CONTEXT)
        checkpoint_handler.download_recovery_checkpoints_or_weights()
+        trainer: Optional[Trainer] = None
        # do training and inference, unless the "only register" switch is set (which requires a run_recovery
        # to be valid).
        if not self.azure_config.only_register_model:
-            # Set local_dataset to the mounted path specified in azure_runner.py, if any, or download it if that fails
-            # and config.local_dataset was not already set.
-            self.model_config.local_dataset = self.mount_or_download_dataset()
-            # Check for existing dataset.csv file in the correct locations. Skip that if a dataset has already been
-            # loaded (typically only during tests)
-            if self.model_config.dataset_data_frame is None:
-                assert self.model_config.local_dataset is not None
-                ml_util.validate_dataset_paths(
-                    self.model_config.local_dataset,
-                    self.model_config.dataset_csv)
-
            # train a new model if required
            if self.azure_config.train:
                with logging_section("Model training"):
-                    model_train(self.model_config, checkpoint_handler, num_nodes=self.azure_config.num_nodes)
-            else:
-                self.model_config.write_dataset_files()
+                    trainer, _ = model_train(checkpoint_handler,
+                                             container=self.container,
+                                             num_nodes=self.azure_config.num_nodes)
+                # log the number of epochs used for model training
+                RUN_CONTEXT.log(name="Train epochs", value=self.container.num_epochs)
+            elif isinstance(self.container, InnerEyeContainer):
+                self.innereye_config.write_dataset_files()
                self.create_activation_maps()

-            # log the number of epochs used for model training
-            RUN_CONTEXT.log(name="Train epochs", value=self.model_config.num_epochs)
+        if isinstance(self.container, InnerEyeContainer):
+            # Inference for the InnerEye built-in models
+            # We specify the ModelProcessing as DEFAULT here even if the run_recovery points to an ensemble run, because
+            # the current run is a single one. See the documentation of ModelProcessing for more details.
+            self.run_inference_and_register_model(checkpoint_handler, ModelProcessing.DEFAULT)

-        # We specify the ModelProcessing as DEFAULT here even if the run_recovery points to an ensemble run, because
-        # the current run is a single one. See the documentation of ModelProcessing for more details.
-        self.run_inference_and_register_model(checkpoint_handler, ModelProcessing.DEFAULT)
+            if self.container.generate_report:
+                self.generate_report(ModelProcessing.DEFAULT)

-        if self.model_config.generate_report:
-            self.generate_report(ModelProcessing.DEFAULT)
+            # If this is an cross validation run, and the present run is child run 0, then wait for the sibling runs,
+            # build the ensemble model, and write a report for that.
+            if self.container.number_of_cross_validation_splits > 0:
+                should_wait_for_other_child_runs = (not self.is_offline_run) and \
+                                                   self.container.cross_validation_split_index == 0
+                if should_wait_for_other_child_runs:
+                    self.wait_for_runs_to_finish()
+                    self.create_ensemble_model_and_run_inference()
+        else:
+            # Inference for all models that are specified via LightningContainers.
+            self.run_inference_for_lightning_models(checkpoint_handler.get_checkpoints_to_test(), trainer)
+            # We can't enforce that files are written to the output folder, hence change the working directory manually
+            with change_working_directory(self.container.outputs_folder):
+                self.container.create_report()

-        # If this is an cross validation run, and the present run is child run 0, then wait for the sibling runs,
-        # build the ensemble model, and write a report for that.
-        if self.model_config.number_of_cross_validation_splits > 0:
-            if self.model_config.should_wait_for_other_cross_val_child_runs():
-                self.wait_for_runs_to_finish()
-                self.create_ensemble_model()
+    def run_inference_for_lightning_models(self, checkpoint_paths: List[Path], trainer: Optional[Trainer]) -> None:
+        """
+        Run inference on the test set for all models that are specified via a LightningContainer.
+        """
+        if len(checkpoint_paths) != 1:
+            raise ValueError(f"This method expects exactly 1 checkpoint for inference, but got {len(checkpoint_paths)}")
+        lightning_model = self.container.model
+        # Run the customized inference code only if the the "inference" step has been overridden
+        if isinstance(lightning_model, InnerEyeInference) and \
+                type(lightning_model).inference_step != InnerEyeInference.inference_step:
+            logging.info("Running inference via the InnerEyeInference.inference_step method")
+            # Read the data modules before changing the working directory, in case the code relies on relative paths
+            data = self.container.get_inference_data_module()
+            dataloaders: List[Tuple[DataLoader, ModelExecutionMode]] = []
+            if self.container.perform_validation_and_test_set_inference:
+                dataloaders.append((data.test_dataloader(), ModelExecutionMode.TEST))  # type: ignore
+                dataloaders.append((data.val_dataloader(), ModelExecutionMode.VAL))  # type: ignore
+            if self.container.perform_training_set_inference:
+                dataloaders.append((data.train_dataloader(), ModelExecutionMode.TRAIN))  # type: ignore
+            map_location = "gpu" if self.container.use_gpu else "cpu"
+            checkpoint = pl_load(checkpoint_paths[0], map_location=map_location)
+            lightning_model.load_state_dict(checkpoint['state_dict'])
+            lightning_model.eval()
+            with change_working_directory(self.container.outputs_folder):
+                lightning_model.on_inference_start()
+                for loader, split in dataloaders:
+                    logging.info(f"Starting inference on {split.value} set")
+                    lightning_model.on_inference_epoch_start(dataset_split=split, is_ensemble_model=False)
+                    for batch_idx, item in enumerate(loader):
+                        model_output = lightning_model.forward(item[0])
+                        lightning_model.inference_step(item, batch_idx, model_output=model_output)
+                    lightning_model.on_inference_epoch_end()
+                lightning_model.on_inference_end()
+        elif type(lightning_model).test_step != LightningModule.test_step:
+            # Run Lightning's built-in test procedure if the `test_step` method has been overridden
+            logging.info("Running inference via the LightningModule.test_step method")
+            trainer = trainer or create_lightning_trainer(self.container)[0]
+            # When training models that are not built-in InnerEye models, we have no guarantee that they write
+            # files to the right folder. Best guess is to change the current working directory to where files should go.
+            with change_working_directory(self.container.outputs_folder):
+                trainer.test(self.container.model,
+                             test_dataloaders=self.container.get_data_module().test_dataloader(),
+                             ckpt_path=str(checkpoint_paths[0]))
+            logging.info("Finished inference.")
+        else:
+            logging.warning("None of the suitable test methods is overridden. Skipping inference completely.")

    def run_inference_and_register_model(self, checkpoint_handler: CheckpointHandler,
                                         model_proc: ModelProcessing) -> None:
@ -311,17 +439,17 @@ class MLRunner:
        model (from the run we recovered) should already have been registered, so we should only
        do so if this run is specifically for that purpose.
        """
-        if self.model_config.is_offline_run:
+        if self.is_offline_run:
            return False
        return self.azure_config.train or self.azure_config.only_register_model

    def create_activation_maps(self) -> None:
-        if self.model_config.is_segmentation_model and self.model_config.activation_map_layers is not None:
+        if self.innereye_config.is_segmentation_model and self.innereye_config.activation_map_layers is not None:
            logging.info("Extracting activation maps for layer")
-            activation_maps.extract_activation_maps(self.model_config)
+            activation_maps.extract_activation_maps(self.innereye_config)  # type: ignore
            logging.info("Successfully extracted and saved activation maps")

-    def mount_or_download_dataset(self) -> Path:
+    def mount_or_download_dataset(self) -> Optional[Path]:
        """
        Makes the dataset that the model uses available on the executing machine. If the present training run is outside
        of AzureML, it expects that either the model has a `local_dataset` field set, in which case no action will be
@ -331,53 +459,46 @@ class MLRunner:
        mounted or downloaded.
        Returns the path of the dataset on the executing machine.
        """
-        azure_dataset_id = self.model_config.azure_dataset_id
-
-        if self.model_config.is_offline_run:
+        azure_dataset_id = self.container.azure_dataset_id
+        local_dataset = self.container.local_dataset
+        if self.is_offline_run:
+            # A dataset, either local or in Azure, is required for the built-in InnerEye models. When models are
+            # specified via a LightningContainer, these dataset fields are optional, because the container datasets
+            # could be downloaded even from the web.
+            is_dataset_required = isinstance(self.container, InnerEyeContainer)
            # The present run is outside of AzureML: If local_dataset is set, use that as the path to the data.
            # Otherwise, download the dataset specified by the azure_dataset_id
-            local_dataset = self.model_config.local_dataset
-            if (not azure_dataset_id) and (local_dataset is None):
-                raise ValueError("The model must contain either local_dataset or azure_dataset_id.")
+            if is_dataset_required:
+                if (not azure_dataset_id) and (local_dataset is None):
+                    raise ValueError("The model must contain either local_dataset or azure_dataset_id.")
            if local_dataset:
                expected_dir = Path(local_dataset)
                if not expected_dir.is_dir():
                    raise FileNotFoundError(f"The model uses a dataset in {expected_dir}, but that does not exist.")
                logging.info(f"Model training will use the local dataset provided in {expected_dir}")
                return expected_dir
-            return download_dataset(azure_dataset_id=azure_dataset_id,
-                                    target_folder=self.project_root / fixed_paths.DATASETS_DIR_NAME,
-                                    dataset_csv=self.model_config.dataset_csv,
-                                    azure_config=self.azure_config)
+            if azure_dataset_id:
+                dataset_csv = ""
+                if isinstance(self.model_config, DeepLearningConfig):
+                    dataset_csv = self.model_config.dataset_csv
+                return download_dataset(azure_dataset_id=azure_dataset_id,
+                                        target_folder=self.project_root / fixed_paths.DATASETS_DIR_NAME,
+                                        dataset_csv=dataset_csv, azure_config=self.azure_config)
+            return None

        # Inside of AzureML, datasets can be either mounted or downloaded.
-        if not azure_dataset_id:
-            raise ValueError("The model must contain azure_dataset_id for running on AML")
-        mounted = try_to_mount_input_dataset()
-        if not mounted:
-            raise ValueError("Unable to mount or download input dataset.")
-        return mounted
-
-    def save_build_info_for_dotnet_consumers(self) -> None:
-        results_container = get_results_blob_path(RUN_CONTEXT.id)
-        result_location = ExperimentResultLocation(
-            azure_job_name=RUN_CONTEXT.id,
-            dataset_folder=self.model_config.azure_dataset_id,
-            results_container_name=results_container,
-            commandline_overrides=str(self.model_config.overrides),
-            dataset_uri=self.model_config.azure_dataset_id,
-            results_uri="",
-        )
-        # Fill in the missing information in the build config (everything that is not available at the time
-        # of evoking the runner), and then save in the format needed for the .NET consumers
-        build_information_to_dot_net_json_file(
-            self.azure_config, result_location, folder=self.model_config.outputs_folder)
+        if azure_dataset_id:
+            mounted = try_to_mount_input_dataset()
+            if not mounted:
+                raise ValueError("Unable to mount or download input dataset.")
+            return mounted
+        return None

    def set_multiprocessing_start_method(self) -> None:
        """
        Set the (PyTorch) multiprocessing start method.
        """
-        method = self.model_config.multiprocessing_start_method
+        method = self.container.multiprocessing_start_method
        if is_windows():
            if method != MultiprocessingStartMethod.spawn:
                logging.warning(f"Cannot set multiprocessing start method to '{method.name}' "
@ -402,7 +523,7 @@ class MLRunner:
            logging.warning("Abandoning model registration - no valid checkpoint paths found")
            return

-        if not self.model_config.is_offline_run:
+        if not self.is_offline_run:
            split_index = RUN_CONTEXT.get_tags().get(CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, None)
            if split_index == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX:
                RUN_CONTEXT.tag(IS_ENSEMBLE_KEY_NAME, str(model_proc == ModelProcessing.ENSEMBLE_CREATION))
@ -447,7 +568,7 @@ class MLRunner:
        model_subfolder = FINAL_MODEL_FOLDER if model_proc == ModelProcessing.DEFAULT else FINAL_ENSEMBLE_MODEL_FOLDER
        # This is the path under which AzureML will know the files: Either "final_model" or "final_ensemble_model"
        artifacts_path = model_subfolder
-        final_model_folder = self.model_config.file_system_config.run_folder / model_subfolder
+        final_model_folder = self.innereye_config.file_system_config.run_folder / model_subfolder
        # Copy all code from project and InnerEye into the model folder, and copy over checkpoints.
        # This increases the size of the data stored for the run. The other option would be to store all checkpoints
        # right in the final model folder - however, then that would also contain any other checkpoints that the model
@ -467,7 +588,7 @@ class MLRunner:
        # When registering the model on the run, we need to provide a relative path inside of the run's output
        # folder in `model_path`
        model = run_to_register_on.register_model(
-            model_name=self.model_config.model_name,
+            model_name=self.innereye_config.model_name,
            model_path=artifacts_path,
            tags=RUN_CONTEXT.get_tags(),
            description=model_description
@ -487,9 +608,9 @@ class MLRunner:
        logging.info(f"Registered {model_proc.value} model: {model.name}, with Id: {model.id}")
        # create a version of the model for deployment if the hook is provided
        if self.model_deployment_hook is not None:
-            assert isinstance(self.model_config, SegmentationModelBase)
+            assert isinstance(self.innereye_config, SegmentationModelBase)
            deployment_result = self.model_deployment_hook(
-                self.model_config, self.azure_config, model, model_proc)
+                self.innereye_config, self.azure_config, model, model_proc)
        return model, deployment_result

    @staticmethod
@ -539,17 +660,17 @@ class MLRunner:
                try:
                    # Checkpoints live in a folder structure in the checkpoint folder. There can be multiple of
                    # them, with identical names, coming from an ensemble run. Hence, preserve their folder structure.
-                    checkpoint_relative = checkpoint.relative_to(self.model_config.checkpoint_folder)
+                    checkpoint_relative = checkpoint.relative_to(self.innereye_config.checkpoint_folder)
                except ValueError:
                    raise ValueError(f"Checkpoint file {checkpoint} was expected to be in a subfolder of "
-                                     f"{self.model_config.checkpoint_folder}")
+                                     f"{self.innereye_config.checkpoint_folder}")
                # Checkpoints go into a newly created folder "checkpoints" inside of the model folder
                relative_checkpoint_paths.append(str(Path(CHECKPOINT_FOLDER) / checkpoint_relative))
            else:
                raise ValueError(f"Expected an absolute path to a checkpoint file, but got: {checkpoint}")
        model_folder.mkdir(parents=True, exist_ok=True)
-        model_inference_config = ModelInferenceConfig(model_name=self.model_config.model_name,
-                                                      model_configs_namespace=self.model_config.__class__.__module__,
+        model_inference_config = ModelInferenceConfig(model_name=self.innereye_config.model_name,
+                                                      model_configs_namespace=self.innereye_config.__class__.__module__,
                                                      checkpoint_paths=relative_checkpoint_paths)
        # Inference configuration must live in the root folder of the registered model
        full_path_to_config = model_folder / fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME
@ -591,11 +712,12 @@ class MLRunner:
        val_metrics = None
        test_metrics = None

-        config = self.model_config
+        config = self.innereye_config

        def run_model_test(data_split: ModelExecutionMode) -> Optional[InferenceMetrics]:
-            return model_test(config, data_split=data_split, checkpoint_handler=checkpoint_handler,
+            return model_test(config, data_split=data_split, checkpoint_handler=checkpoint_handler,  # type: ignore
                              model_proc=model_proc)
+
        if config.perform_validation_and_test_set_inference:
            # perform inference on test set
            test_metrics = run_model_test(ModelExecutionMode.TEST)
@ -610,7 +732,7 @@ class MLRunner:

        # log the metrics to AzureML experiment if possible. When doing ensemble runs, log to the Hyperdrive parent run,
        # so that we get the metrics of child run 0 and the ensemble separated.
-        if config.is_segmentation_model and not config.is_offline_run:
+        if config.is_segmentation_model and not self.is_offline_run:
            run_for_logging = PARENT_RUN_CONTEXT if model_proc.ENSEMBLE_CREATION else RUN_CONTEXT
            log_metrics(val_metrics=val_metrics, test_metrics=test_metrics,  # type: ignore
                        train_metrics=train_metrics, run_context=run_for_logging)  # type: ignore
@ -633,9 +755,9 @@ class MLRunner:
        :return: True if all sibling runs of the current run have finished (they either completed successfully,
        or failed). False if any of them is still pending (running or queued).
        """
-        if (not self.model_config.is_offline_run) \
+        if (not self.is_offline_run) \
                and (azure_util.is_cross_validation_child_run(RUN_CONTEXT)):
-            n_splits = self.model_config.get_total_number_of_cross_validation_runs()
+            n_splits = self.innereye_config.get_total_number_of_cross_validation_runs()
            child_runs = azure_util.fetch_child_runs(PARENT_RUN_CONTEXT,
                                                     expected_number_cross_validation_splits=n_splits)
            pending_runs = [x.id for x in child_runs
@ -648,14 +770,14 @@ class MLRunner:
        else:
            raise NotImplementedError("are_sibling_runs_finished only works for cross validation runs in AzureML.")

-    def create_ensemble_model(self) -> None:
+    def create_ensemble_model_and_run_inference(self) -> None:
        """
        Create an ensemble model from the results of the sibling runs of the present run. The present run here will
        be cross validation child run 0.
        """
        assert PARENT_RUN_CONTEXT, "This function should only be called in a Hyperdrive run"
        with logging_section("Downloading checkpoints from sibling runs"):
-            checkpoint_handler = CheckpointHandler(model_config=self.model_config,
+            checkpoint_handler = CheckpointHandler(container=self.container,
                                                   azure_config=self.azure_config,
                                                   project_root=self.project_root,
                                                   run_context=PARENT_RUN_CONTEXT)
@ -665,13 +787,13 @@ class MLRunner:
                                              model_proc=ModelProcessing.ENSEMBLE_CREATION)

        crossval_dir = self.plot_cross_validation_and_upload_results()
-        if self.model_config.generate_report:
+        if self.innereye_config.generate_report:
            self.generate_report(ModelProcessing.ENSEMBLE_CREATION)
        # CrossValResults should have been uploaded to the parent run, so we don't need it here.
        remove_file_or_directory(crossval_dir)
        # We can also remove OTHER_RUNS under the root, as it is no longer useful and only contains copies of files
        # available elsewhere. However, first we need to upload relevant parts of OTHER_RUNS/ENSEMBLE.
-        other_runs_dir = self.model_config.outputs_folder / OTHER_RUNS_SUBDIR_NAME
+        other_runs_dir = self.innereye_config.outputs_folder / OTHER_RUNS_SUBDIR_NAME
        other_runs_ensemble_dir = other_runs_dir / ENSEMBLE_SPLIT_NAME
        if PARENT_RUN_CONTEXT is not None:
            if other_runs_ensemble_dir.exists():
@ -690,9 +812,9 @@ class MLRunner:
        from InnerEye.ML.visualizers.plot_cross_validation import crossval_config_from_model_config, \
            plot_cross_validation, unroll_aggregate_metrics
        # perform aggregation as cross val splits are now ready
-        plot_crossval_config = crossval_config_from_model_config(self.model_config)
+        plot_crossval_config = crossval_config_from_model_config(self.innereye_config)
        plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[RUN_RECOVERY_ID_KEY_NAME]
-        plot_crossval_config.outputs_directory = self.model_config.outputs_folder
+        plot_crossval_config.outputs_directory = self.innereye_config.outputs_folder
        plot_crossval_config.azure_config = self.azure_config
        cross_val_results_root = plot_cross_validation(plot_crossval_config)
        if isinstance(self.model_config, ScalarModelBase) and not isinstance(self.model_config, SequenceModelBase):
@ -701,10 +823,10 @@ class MLRunner:
            full_metrics_csv = cross_val_results_root / FULL_METRICS_DATAFRAME_FILE
            generate_classification_crossval_notebook(notebook_path, self.model_config, full_metrics_csv)
        if self.post_cross_validation_hook:
-            self.post_cross_validation_hook(self.model_config, cross_val_results_root)
+            self.post_cross_validation_hook(self.innereye_config, cross_val_results_root)
        # upload results to the parent run's outputs so that the files are visible inside the AzureML UI.
        PARENT_RUN_CONTEXT.upload_folder(name=CROSSVAL_RESULTS_FOLDER, path=str(cross_val_results_root))
-        if self.model_config.is_scalar_model:
+        if self.innereye_config.is_scalar_model:
            try:
                aggregates = pd.read_csv(cross_val_results_root / METRICS_AGGREGATES_FILE)
                unrolled_aggregate_metrics = unroll_aggregate_metrics(aggregates)
@ -715,7 +837,7 @@ class MLRunner:
        return cross_val_results_root

    def generate_report(self, model_proc: ModelProcessing) -> None:
-        config = self.model_config
+        config = self.innereye_config
        if config.model_category not in [ModelCategory.Segmentation, ModelCategory.Classification]:
            logging.info(f"No reporting available for a model with category {config.model_category}")
            return
@ -753,7 +875,8 @@ class MLRunner:

                    if len(config.class_names) > 1:
                        generate_classification_multilabel_notebook(
-                            result_notebook=reports_dir / get_ipynb_report_name(f"{config.model_category.value}_multilabel"),
+                            result_notebook=reports_dir / get_ipynb_report_name(
+                                f"{config.model_category.value}_multilabel"),
                            config=config,
                            train_metrics=path_to_best_epoch_train,
                            val_metrics=path_to_best_epoch_val,
--- a/InnerEye/ML/runner.py
+++ b/InnerEye/ML/runner.py
@ -4,50 +4,62 @@
 #  ------------------------------------------------------------------------------------------
 import os
 import sys
+import warnings
 from pathlib import Path

 # Suppress all errors here because the imports after code cause loads of warnings. We can't specifically suppress
 # individual warnings only.
 # flake8: noqa
-
 # Workaround for an issue with how AzureML and Pytorch Lightning interact: When spawning additional processes for DDP,
 # the working directory is not correctly picked up in sys.path
-print("Starting InnerEye runner.")
+
+print(f"Starting InnerEye runner at {sys.argv[0]}")
 innereye_root = Path(__file__).absolute().parent.parent.parent
 if (innereye_root / "InnerEye").is_dir():
    innereye_root_str = str(innereye_root)
    if innereye_root_str not in sys.path:
-        print(f"Adding to sys.path: {innereye_root_str}")
+        print(f"Adding InnerEye folder to sys.path: {innereye_root_str}")
        sys.path.insert(0, innereye_root_str)
+# We change the current working directory before starting the actual training. However, this throws off starting
+# the child training threads because sys.argv[0] is a relative path when running in AzureML. Turn that into an absolute
+# path.
+runner_path = Path(sys.argv[0])
+if not runner_path.is_absolute():
+    sys.argv[0] = str(runner_path.absolute())

 import logging
-from pathlib import Path
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Optional, Tuple

 from azureml._base_sdk_common import user_agent
-from azureml.core import Model, Run
+from azureml.core import Run

 from InnerEye.Azure import azure_util
 from InnerEye.Azure.azure_config import AzureConfig, ParserResult, SourceConfig
 from InnerEye.Azure.azure_runner import create_runner_parser, parse_args_and_add_yaml_variables, \
    parse_arguments, set_environment_variables_for_multi_node, submit_to_azureml
-from InnerEye.Azure.azure_util import is_run_and_child_runs_completed
+from InnerEye.Azure.azure_util import get_all_environment_files, is_run_and_child_runs_completed
 from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
 from InnerEye.Common import fixed_paths
 from InnerEye.Common.common_util import FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, \
-    ModelProcessing, disable_logging_to_file, is_linux, logging_to_file, logging_to_stdout, print_exception
+    disable_logging_to_file, is_linux, logging_to_stdout
+from InnerEye.Common.generic_parsing import GenericConfig
 from InnerEye.ML.common import DATASET_CSV_FILE_NAME
-from InnerEye.ML.config import SegmentationModelBase
+from InnerEye.ML.config import ModelDeploymentHookSignature, PostCrossValidationHookSignature
+from InnerEye.ML.deep_learning_config import DeepLearningConfig
 from InnerEye.ML.model_config_base import ModelConfigBase
-from InnerEye.ML.utils.config_util import ModelConfigLoader
+from InnerEye.ML.utils.config_loader import ModelConfigLoader

-LOG_FILE_NAME = "stdout.txt"
+try:
+    # This import can fail when the code runs inside the azure_runner.yml Conda environment, that we use
+    # for the PR builds
+    from InnerEye.ML.lightning_container import LightningContainer

-PostCrossValidationHookSignature = Callable[[ModelConfigBase, Path], None]
-ModelDeploymentHookSignature = Callable[[SegmentationModelBase, AzureConfig, Model, ModelProcessing], Any]
+    has_torch = True
+except ModuleNotFoundError as ex:
+    has_torch = False


-def may_initialize_rpdb() -> None:
+def initialize_rpdb() -> None:
    """
    On Linux only, import and initialize rpdb, to enable remote debugging if necessary.
    """
@ -85,23 +97,10 @@ def suppress_logging_noise() -> None:
    os.environ['MKL_THREADING_LAYER'] = 'GNU'


-def get_all_environment_files(project_root: Path) -> List[Path]:
-    """
-    Returns a list of all Conda environment files that should be used. This is firstly the InnerEye conda file,
-    and possibly a second environment.yml file that lives at the project root folder.
-    :param project_root: The root folder of the code that starts the present training run.
-    :return: A list with 1 or 2 entries that are conda environment files.
-    """
-    innereye_yaml = fixed_paths.get_environment_yaml_file()
-    project_yaml = project_root / fixed_paths.ENVIRONMENT_YAML_FILE_NAME
-    files = [innereye_yaml]
-    if innereye_yaml != project_yaml:
-        files.append(project_yaml)
-    return files
-
-
 class Runner:
    """
+    This class contains the high-level logic to start a training run: choose a model configuration by name,
+    submit to AzureML if needed, or otherwise start the actual training and test loop.
    :param project_root: The root folder that contains all of the source code that should be executed.
    :param yaml_config_file: The path to the YAML file that contains values to supply into sys.argv.
    :param post_cross_validation_hook: A function to call after waiting for completion of cross validation runs.
@ -116,17 +115,17 @@ class Runner:
                 project_root: Path,
                 yaml_config_file: Path,
                 post_cross_validation_hook: Optional[PostCrossValidationHookSignature] = None,
-                 model_deployment_hook: Optional[ModelDeploymentHookSignature] = None,
-                 command_line_args: Optional[List[str]] = None):
+                 model_deployment_hook: Optional[ModelDeploymentHookSignature] = None):
        self.project_root = project_root
        self.yaml_config_file = yaml_config_file
        self.post_cross_validation_hook = post_cross_validation_hook
        self.model_deployment_hook = model_deployment_hook
-        self.command_line_args = command_line_args
        # model_config and azure_config are placeholders for now, and are set properly when command line args are
        # parsed.
-        self.model_config: ModelConfigBase = ModelConfigBase(azure_dataset_id="")
+        self.model_config: Optional[DeepLearningConfig] = None
        self.azure_config: AzureConfig = AzureConfig()
+        # This should be typed as LightningContainer, but we don't always have that imported
+        self.lightning_container: Any = None

    def parse_and_load_model(self) -> ParserResult:
        """
@ -138,47 +137,84 @@ class Runner:
        """
        # Create a parser that will understand only the args we need for an AzureConfig
        parser1 = create_runner_parser()
-        parser1_result = parse_args_and_add_yaml_variables(parser1,
-                                                           yaml_config_file=self.yaml_config_file,
-                                                           project_root=self.project_root,
-                                                           args=self.command_line_args,
-                                                           fail_on_unknown_args=False)
-        azure_config = AzureConfig(**parser1_result.args)
+        parser_result = parse_args_and_add_yaml_variables(parser1,
+                                                          yaml_config_file=self.yaml_config_file,
+                                                          project_root=self.project_root,
+                                                          fail_on_unknown_args=False)
+        azure_config = AzureConfig(**parser_result.args)
        azure_config.project_root = self.project_root
        self.azure_config = azure_config
-        self.model_config = None  # type: ignore
+        self.model_config = None
+        self.lightning_container = None
        if not azure_config.model:
            raise ValueError("Parameter 'model' needs to be set to tell InnerEye which model to run.")
-        model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser1_result.args)
-        # Create the model as per the "model" commandline option
-        model_config = model_config_loader.create_model_config_from_name(
-            model_name=azure_config.model
-        )
-        # This model will be either a classification model or a segmentation model. Those have different
-        # fields that could be overridden on the command line. Create a parser that understands the fields we need
-        # for the actual model type. We feed this parser will the YAML settings and commandline arguments that the
-        # first parser did not recognize.
-        parser2 = type(model_config).create_argparser()
-        parser2_result = parse_arguments(parser2,
-                                         settings_from_yaml=parser1_result.unknown_settings_from_yaml,
-                                         args=parser1_result.unknown,
-                                         fail_on_unknown_args=True)
-        # Apply the overrides and validate. Overrides can come from either YAML settings or the commandline.
-        model_config.apply_overrides(parser1_result.unknown_settings_from_yaml)
-        model_config.apply_overrides(parser2_result.overrides)
-        model_config.validate()
-        # Set the file system related configs, they might be affected by the overrides that were applied.
-        logging.info("Creating the adjusted output folder structure.")
-        model_config.create_filesystem(self.project_root)
+        model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser_result.args)
+        # Create the model as per the "model" commandline option. This can return either a built-in config
+        # of type DeepLearningConfig, or a LightningContainer.
+        config_or_container = model_config_loader.create_model_config_from_name(model_name=azure_config.model)
+
+        def parse_overrides_and_apply(c: object, previous_parser_result: ParserResult) -> ParserResult:
+            assert isinstance(c, GenericConfig)
+            parser = type(c).create_argparser()
+            # For each parser, feed in the unknown settings from the previous parser. All commandline args should
+            # be consumed by name, hence fail if there is something that is still unknown.
+            parser_result = parse_arguments(parser,
+                                            settings_from_yaml=previous_parser_result.unknown_settings_from_yaml,
+                                            args=previous_parser_result.unknown,
+                                            fail_on_unknown_args=True)
+            # Apply the overrides and validate. Overrides can come from either YAML settings or the commandline.
+            c.apply_overrides(parser_result.known_settings_from_yaml)
+            c.apply_overrides(parser_result.overrides)
+            c.validate()
+            return parser_result
+
+        # Now create a parser that understands overrides at model/container level.
+        parser_result = parse_overrides_and_apply(config_or_container, parser_result)
+
+        if has_torch and isinstance(config_or_container, LightningContainer):
+            self.lightning_container = config_or_container
+        elif isinstance(config_or_container, DeepLearningConfig):
+            # Built-in InnerEye models: A fake container for these models will be created in MLRunner
+            self.model_config = config_or_container
+        else:
+            raise ValueError(f"Don't know how to handle a loaded configuration of type {type(config_or_container)}")
        if azure_config.extra_code_directory:
            exist = "exists" if Path(azure_config.extra_code_directory).exists() else "does not exist"
            logging.info(f"extra_code_directory is {azure_config.extra_code_directory}, which {exist}")
        else:
            logging.info("extra_code_directory is unset")
-        self.model_config = model_config
-        return parser2_result
+        return parser_result

-    def run(self) -> Tuple[ModelConfigBase, Optional[Run]]:
+    def _get_property_from_config_or_container(self, name: str) -> Any:
+        """
+        Reads out a property or attribute from either the model configuration (if that is a built-in InnerEye
+        model) or the lightning container.
+        :param name: The name of the property to read.
+        :return: The property value, coming from either the model config or the container.
+        """
+        if isinstance(self.model_config, DeepLearningConfig):
+            return getattr(self.model_config, name)
+        elif self.lightning_container is not None:
+            return getattr(self.lightning_container, name)
+        else:
+            raise ValueError(f"Did not expect config of type {type(self.model_config)} and container of type "
+                             f"{type(self.lightning_container)}")
+
+    @property
+    def perform_cross_validation(self) -> bool:
+        """
+        Returns True if cross validation will be be performed as part of the training procedure.
+        """
+        return self._get_property_from_config_or_container("perform_cross_validation")
+
+    @property
+    def azure_dataset_id(self) -> str:
+        """
+        Returns the name of the Azure dataset that should be used.
+        """
+        return self._get_property_from_config_or_container("azure_dataset_id")
+
+    def run(self) -> Tuple[Optional[DeepLearningConfig], Optional[Run]]:
        """
        The main entry point for training and testing models from the commandline. This chooses a model to train
        via a commandline argument, runs training or testing, and writes all required info to disk and logs.
@ -188,10 +224,12 @@ class Runner:
        # Usually, when we set logging to DEBUG, we want diagnostics about the model
        # build itself, but not the tons of debug information that AzureML submissions create.
        logging_to_stdout(logging.INFO)
-        may_initialize_rpdb()
+        initialize_rpdb()
        user_agent.append(azure_util.INNEREYE_SDK_NAME, azure_util.INNEREYE_SDK_VERSION)
        self.parse_and_load_model()
-        if self.model_config is not None and self.model_config.perform_cross_validation:
+        if self.perform_cross_validation:
+            if self.lightning_container is not None:
+                raise NotImplementedError("Cross validation for LightingContainer models is not yet supported.")
            # force hyperdrive usage if performing cross validation
            self.azure_config.hyperdrive = True
        run_object: Optional[Run] = None
@ -208,23 +246,24 @@ class Runner:
        """
        # The adal package creates a logging.info line each time it gets an authentication token, avoid that.
        logging.getLogger('adal-python').setLevel(logging.WARNING)
-        if not self.model_config.azure_dataset_id:
-            raise ValueError("When running on AzureML, the 'azure_dataset_id' property must be set.")
-        model_config_overrides = str(self.model_config.overrides)
+        # PyJWT prints out warnings that are beyond our control
+        warnings.filterwarnings("ignore", category=DeprecationWarning)
+        if isinstance(self.model_config, DeepLearningConfig) and not self.azure_dataset_id:
+            raise ValueError("When running an InnerEye built-in model in AzureML, the 'azure_dataset_id' "
+                             "property must be set.")
+        hyperdrive_func = lambda run_config: self.model_config.get_hyperdrive_config(run_config)  # type: ignore
        source_config = SourceConfig(
            root_folder=self.project_root,
            entry_script=Path(sys.argv[0]).resolve(),
            conda_dependencies_files=get_all_environment_files(self.project_root),
-            hyperdrive_config_func=lambda run_config: self.model_config.get_hyperdrive_config(run_config),
-            # For large jobs, upload of results times out frequently because of large checkpoint files. Default is 600
+            hyperdrive_config_func=hyperdrive_func,
+            # For large jobs, upload of results can time out because of large checkpoint files. Default is 600
            upload_timeout_seconds=86400,
        )
        source_config.set_script_params_except_submit_flag()
-        assert self.model_config.azure_dataset_id is not None  # to stop mypy complaining about next line
-        azure_run = submit_to_azureml(self.azure_config, source_config, model_config_overrides,
-                                      self.model_config.azure_dataset_id)
+        azure_run = submit_to_azureml(self.azure_config, source_config, self.azure_dataset_id)
        logging.info("Job submission to AzureML done.")
-        if self.azure_config.pytest_mark:
+        if self.azure_config.pytest_mark and self.azure_config.wait_for_completion:
            # The AzureML job can optionally run pytest. Attempt to download it to the current directory.
            # A build step will pick up that file and publish it to Azure DevOps.
            # If pytest_mark is set, this file must exist.
@ -246,21 +285,17 @@ class Runner:
        # build itself, but not the tons of debug information that AzureML submissions create.
        logging_to_stdout(self.azure_config.log_level)
        suppress_logging_noise()
-        error_messages = []
        # For the PR build in AzureML, we can either pytest, or the training of the simple PR model. Running both
        # only works when using DDP_spawn, but that has as a side-effect that it messes up memory consumption of the
        # large models.
        if self.azure_config.pytest_mark:
-            try:
-                outputs_folder = Path.cwd() / fixed_paths.DEFAULT_AML_UPLOAD_DIR
-                pytest_passed, results_file_path = run_pytest(self.azure_config.pytest_mark, outputs_folder)
-                if not pytest_passed:
-                    pytest_failures = f"Not all PyTest tests passed. See {results_file_path}"
-                    logging.error(pytest_failures)
-                    error_messages.append(pytest_failures)
-            except Exception as ex:
-                print_exception(ex, "Unable to run PyTest.")
-                error_messages.append(f"Unable to run PyTest: {ex}")
+            outputs_folder = Path.cwd() / fixed_paths.DEFAULT_AML_UPLOAD_DIR
+            pytest_passed, results_file_path = run_pytest(self.azure_config.pytest_mark, outputs_folder)
+            if not pytest_passed:
+                # Terminate if pytest has failed. This makes the smoke test in
+                # PR builds fail if pytest fails.
+                pytest_failures = f"Not all PyTest tests passed. See {results_file_path}"
+                raise ValueError(pytest_failures)
        else:
            # Set environment variables for multi-node training if needed.
            # In particular, the multi-node environment variables should NOT be set in single node
@ -268,20 +303,14 @@ class Runner:
            # (https://github.com/microsoft/InnerEye-DeepLearning/issues/395)
            if self.azure_config.num_nodes > 1:
                set_environment_variables_for_multi_node()
+            logging.info("Creating the output folder structure.")
+            ml_runner = self.create_ml_runner()
+            ml_runner.setup()
+            ml_runner.start_logging_to_file()
            try:
-                logging_to_file(self.model_config.logs_folder / LOG_FILE_NAME)
-                try:
-                    self.create_ml_runner().run()
-                except Exception as ex:
-                    print_exception(ex, "Model training/testing failed.")
-                    error_messages.append(f"Training failed: {ex}")
+                ml_runner.run()
            finally:
                disable_logging_to_file()
-        # Terminate if pytest or model training has failed. This makes the smoke test in
-        # PR builds fail if pytest fails.
-        if error_messages:
-            raise ValueError(
-                f"At least one component of the runner failed: {os.linesep} {os.linesep.join(error_messages)}")

    def create_ml_runner(self) -> Any:
        """
@ -289,10 +318,11 @@ class Runner:
        """
        # This import statement cannot be at the beginning of the file because it will cause import
        # of packages that are not available inside the azure_runner.yml environment, in particular pytorch.
-        # That is also why we specify the return type as Any rather than MLRunner.
+        # That is also why we specify the return type is Any rather than MLRunner.
        from InnerEye.ML.run_ml import MLRunner
        return MLRunner(
            model_config=self.model_config,
+            container=self.lightning_container,
            azure_config=self.azure_config,
            project_root=self.project_root,
            post_cross_validation_hook=self.post_cross_validation_hook,
@ -319,17 +349,15 @@ def default_post_cross_validation_hook(config: ModelConfigBase, root_folder: Pat
 def run(project_root: Path,
        yaml_config_file: Path,
        post_cross_validation_hook: Optional[PostCrossValidationHookSignature] = None,
-        model_deployment_hook: Optional[ModelDeploymentHookSignature] = None,
-        command_line_args: Optional[List[str]] = None) -> \
-        Tuple[ModelConfigBase, Optional[Run]]:
+        model_deployment_hook: Optional[ModelDeploymentHookSignature] = None) -> \
+        Tuple[Optional[DeepLearningConfig], Optional[Run]]:
    """
    The main entry point for training and testing models from the commandline. This chooses a model to train
    via a commandline argument, runs training or testing, and writes all required info to disk and logs.
    :return: If submitting to AzureML, returns the model configuration that was used for training,
    including commandline overrides applied (if any). For details on the arguments, see the constructor of Runner.
    """
-    runner = Runner(project_root, yaml_config_file, post_cross_validation_hook,
-                    model_deployment_hook, command_line_args)
+    runner = Runner(project_root, yaml_config_file, post_cross_validation_hook, model_deployment_hook)
    return runner.run()


--- a/InnerEye/ML/surface_distance_heatmaps.py
+++ b/InnerEye/ML/surface_distance_heatmaps.py
@ -17,7 +17,7 @@ from InnerEye.ML.common import ModelExecutionMode
 from InnerEye.ML.config import SegmentationModelBase
 from InnerEye.ML.plotting import segmentation_and_groundtruth_plot, surface_distance_ground_truth_plot
 from InnerEye.ML.utils import surface_distance_utils as sd_util
-from InnerEye.ML.utils.config_util import ModelConfigLoader
+from InnerEye.ML.utils.config_loader import ModelConfigLoader
 from InnerEye.ML.utils.csv_util import get_worst_performing_outliers, load_csv
 from InnerEye.ML.utils.image_util import multi_label_array_to_binary
 from InnerEye.ML.utils.io_util import load_nifti_image
@ -105,10 +105,8 @@ def main() -> None:
    if config_model is None:
        raise ValueError("The name of the model to train must be given in the --model argument.")

-    model_config = ModelConfigLoader[SegmentationModelBase]().create_model_config_from_name(
-        config_model,
-        overrides=parser_result.overrides
-    )
+    model_config = ModelConfigLoader().create_model_config_from_name(config_model)
+    model_config.apply_overrides(parser_result.overrides, should_validate=True)
    execution_mode = surface_distance_config.execution_mode

    run_mode = surface_distance_config.run_mode
--- a/InnerEye/ML/utils/checkpoint_handling.py
+++ b/InnerEye/ML/utils/checkpoint_handling.py
@ -5,6 +5,7 @@
 import logging
 import os
 import uuid
+from builtins import property
 from pathlib import Path
 from typing import List, Optional
 from urllib.parse import urlparse
@ -15,7 +16,8 @@ from azureml.core import Run

 from InnerEye.Azure.azure_config import AzureConfig
 from InnerEye.Common import fixed_paths
-from InnerEye.ML.deep_learning_config import DeepLearningConfig, WEIGHTS_FILE
+from InnerEye.ML.deep_learning_config import OutputParams, WEIGHTS_FILE
+from InnerEye.ML.lightning_container import LightningContainer
 from InnerEye.ML.utils.run_recovery import RunRecovery


@ -25,22 +27,29 @@ class CheckpointHandler:
    azure config and model config.
    """

-    def __init__(self, model_config: DeepLearningConfig, azure_config: AzureConfig,
+    def __init__(self, container: LightningContainer, azure_config: AzureConfig,
                 project_root: Path, run_context: Optional[Run] = None):
        self.azure_config = azure_config
-        self.model_config = model_config
+        self.container = container
        self.run_recovery: Optional[RunRecovery] = None
        self.project_root = project_root
        self.run_context = run_context
        self.local_weights_path: Optional[Path] = None
        self.has_continued_training = False

+    @property
+    def output_params(self) -> OutputParams:
+        """
+        Gets the part of the configuration that is responsible for output paths.
+        """
+        return self.container
+
    def download_checkpoints_from_hyperdrive_child_runs(self, hyperdrive_parent_run: Run) -> None:
        """
        Downloads the best checkpoints from all child runs of a Hyperdrive parent runs. This is used to gather results
        for ensemble creation.
        """
-        self.run_recovery = RunRecovery.download_best_checkpoints_from_child_runs(self.model_config,
+        self.run_recovery = RunRecovery.download_best_checkpoints_from_child_runs(self.output_params,
                                                                                  hyperdrive_parent_run)
        # Check paths are good, just in case
        for path in self.run_recovery.checkpoints_roots:
@ -55,11 +64,11 @@ class CheckpointHandler:
        """
        if self.azure_config.run_recovery_id:
            run_to_recover = self.azure_config.fetch_run(self.azure_config.run_recovery_id.strip())
-            self.run_recovery = RunRecovery.download_all_checkpoints_from_run(self.model_config, run_to_recover)
+            self.run_recovery = RunRecovery.download_all_checkpoints_from_run(self.output_params, run_to_recover)
        else:
            self.run_recovery = None

-        if self.model_config.weights_url or self.model_config.local_weights_path:
+        if self.container.weights_url or self.container.local_weights_path:
            self.local_weights_path = self.get_and_save_modified_weights()

    def additional_training_done(self) -> None:
@ -74,11 +83,11 @@ class CheckpointHandler:
        checkpoint from there, otherwise use the checkpoints from the current run.
        :return: Constructed checkpoint path to recover from.
        """
-
-        if self.model_config.start_epoch > 0 and not self.run_recovery:
+        start_epoch = self.container.start_epoch
+        if start_epoch > 0 and not self.run_recovery:
            raise ValueError("Start epoch is > 0, but no run recovery object has been provided to resume training.")

-        if self.run_recovery and self.model_config.start_epoch == 0:
+        if self.run_recovery and start_epoch == 0:
            raise ValueError("Run recovery set, but start epoch is 0. Please provide start epoch > 0 (for which a "
                             "checkpoint was saved in the previous run) to resume training from that run.")

@ -88,7 +97,7 @@ class CheckpointHandler:
            checkpoints = self.run_recovery.get_recovery_checkpoint_paths()
            if len(checkpoints) > 1:
                raise ValueError(f"Recovering training of ensemble runs is not supported. Found more than one "
-                                 f"checkpoint for epoch {self.model_config.start_epoch}")
+                                 f"checkpoint for epoch {start_epoch}")
            return checkpoints[0]
        elif self.local_weights_path:
            return self.local_weights_path
@ -129,7 +138,7 @@ class CheckpointHandler:
        if self.has_continued_training:
            # Checkpoint is from the current run, whether a new run or a run recovery which has been doing more
            # training, so we look for it there.
-            checkpoint_from_current_run = self.model_config.get_path_to_best_checkpoint()
+            checkpoint_from_current_run = self.output_params.get_path_to_best_checkpoint()
            if checkpoint_from_current_run.is_file():
                logging.info("Using checkpoints from current run.")
                checkpoint_paths = [checkpoint_from_current_run]
@ -172,7 +181,7 @@ class CheckpointHandler:
        target_folder = self.project_root / fixed_paths.MODEL_WEIGHTS_DIR_NAME
        target_folder.mkdir(exist_ok=True)

-        url = self.model_config.weights_url
+        url = self.container.weights_url

        # assign the same filename as in the download url if possible, so that we can check for duplicates
        # If that fails, map to a random uuid
@ -198,9 +207,9 @@ class CheckpointHandler:
        """
        Get the path to the local weights to use or download them and set local_weights_path
        """
-        if self.model_config.local_weights_path:
-            weights_path = self.model_config.local_weights_path
-        elif self.model_config.weights_url:
+        if self.container.local_weights_path:
+            weights_path = self.container.local_weights_path
+        elif self.container.weights_url:
            weights_path = self.download_weights()
        else:
            raise ValueError("Cannot download/modify weights - neither local_weights_path nor weights_url is set in"
@ -219,8 +228,8 @@ class CheckpointHandler:
        if not weights_path or not weights_path.is_file():
            raise FileNotFoundError(f"Could not find the weights file at {weights_path}")

-        modified_weights = self.model_config.load_checkpoint_and_modify(weights_path)
-        target_file = self.model_config.outputs_folder / WEIGHTS_FILE
+        modified_weights = self.container.load_checkpoint_and_modify(weights_path)
+        target_file = self.output_params.outputs_folder / WEIGHTS_FILE
        torch.save(modified_weights, target_file)
        return target_file

@ -228,4 +237,4 @@ class CheckpointHandler:
        """
        Returns true if the optimizer should be loaded from checkpoint. Looks at the model config to determine this.
        """
-        return self.model_config.start_epoch > 0
+        return self.container.start_epoch > 0
--- a/InnerEye/ML/utils/config_loader.py
+++ b/InnerEye/ML/utils/config_loader.py
@ -5,21 +5,19 @@
 import importlib
 import inspect
 import logging
-from importlib._bootstrap import ModuleSpec
 from importlib.util import find_spec
 from pathlib import Path
-from typing import Any, Dict, Generic, List, Optional, TypeVar
+from typing import Any, Dict, List, Optional

 import param
+from importlib._bootstrap import ModuleSpec

 from InnerEye.Common.common_util import path_to_namespace
 from InnerEye.Common.generic_parsing import GenericConfig
-from InnerEye.ML.model_config_base import ModelConfigBase
-
-C = TypeVar('C', bound=ModelConfigBase)
+from InnerEye.ML.deep_learning_config import DeepLearningConfig


-class ModelConfigLoader(GenericConfig, Generic[C]):
+class ModelConfigLoader(GenericConfig):
    """
    Helper class to manage model config loading
    """
@ -43,20 +41,21 @@ class ModelConfigLoader(GenericConfig, Generic[C]):
        from InnerEye.ML import configs
        return configs.__name__

-    def create_model_config_from_name(self, model_name: str, overrides: Optional[Dict[str, Any]] = None) -> C:
+    def create_model_config_from_name(self, model_name: str) -> DeepLearningConfig:
        """
-        Returns a segmentation or classification model configuration for a model of the given name.
+        Returns a model configuration for a model of the given name. This can be either a segmentation or
+        classification configuration for an InnerEye built-in model, or a LightningContainer.
+        To avoid having to import torch here, there are no references to LightningContainer.
        Searching for a class member called <model_name> in the search modules provided recursively.

        :param model_name: Name of the model for which to get the configs for.
-        :param overrides: Model properties to override.
        """
        if not model_name:
            raise ValueError("Unable to load a model configuration because the model name is missing.")

-        configs: Dict[str, C] = {}
+        configs: Dict[str, DeepLearningConfig] = {}

-        def _get_model_config(module_spec: ModuleSpec) -> Optional[C]:
+        def _get_model_config(module_spec: ModuleSpec) -> Optional[DeepLearningConfig]:
            """
            Given a module specification check to see if it has a class property with
            the <model_name> provided, and instantiate that config class with the
@ -66,6 +65,7 @@ class ModelConfigLoader(GenericConfig, Generic[C]):
            """
            # noinspection PyBroadException
            try:
+                logging.debug(f"Importing {module_spec.name}")
                target_module = importlib.import_module(module_spec.name)
                # The "if" clause checks that obj is a class, of the desired name, that is
                # defined in this module rather than being imported into it (and hence potentially
@ -74,7 +74,7 @@ class ModelConfigLoader(GenericConfig, Generic[C]):
                              if inspect.isclass(obj)
                              and name == model_name
                              and inspect.getmodule(obj) == target_module)
-                logging.info(f"Found class {_class.name} in file {module_spec.origin}")
+                logging.info(f"Found class {_class} in file {module_spec.origin}")
            # ignore the exception which will occur if the provided module cannot be loaded
            # or the loaded module does not have the required class as a member
            except Exception as e:
@ -82,13 +82,7 @@ class ModelConfigLoader(GenericConfig, Generic[C]):
                if exception_text != "":
                    logging.warning(f"(from attempt to import module {module_spec.name}): {exception_text}")
                return None
-            model_config: ModelConfigBase = _class()
-
-            # apply the overrides to the model
-            if overrides is not None:
-                model_config.apply_overrides(overrides)
-                # The parameters have presumably changed, so we need to re-validate.
-                model_config.validate()
+            model_config: DeepLearningConfig = _class()
            return model_config

        def _search_recursively_and_store(module_search_spec: ModuleSpec) -> None:
--- a/InnerEye/ML/utils/io_util.py
+++ b/InnerEye/ML/utils/io_util.py
@ -261,7 +261,7 @@ def load_dicom_image(path: PathOrString) -> np.ndarray:
    """
    ds = dicom.dcmread(path)
    pixels = ds.pixel_array
-    bits_stored = ds.BitsStored
+    bits_stored = int(ds.BitsStored)  # type: ignore
    if ds.PhotometricInterpretation == PhotometricInterpretation.MONOCHROME1.value:
        pixel_repr = ds.PixelRepresentation
        if pixel_repr == 0:  # unsigned
--- a/InnerEye/ML/utils/lr_scheduler.py
+++ b/InnerEye/ML/utils/lr_scheduler.py
@ -9,7 +9,7 @@ from typing import Dict, List
 from torch.optim.lr_scheduler import CosineAnnealingLR, ExponentialLR, LambdaLR, MultiStepLR, StepLR, _LRScheduler
 from torch.optim.optimizer import Optimizer

-from InnerEye.ML.deep_learning_config import DeepLearningConfig, LRSchedulerType, LRWarmUpType
+from InnerEye.ML.deep_learning_config import LRSchedulerType, LRWarmUpType, OptimizerParams


 def get_current_learning_rates(optimizer: Optimizer) -> List[float]:
@ -23,6 +23,7 @@ class LinearWarmUp(_LRScheduler):
    """
    Implements linear warmup up to a given initial learning rate.
    """
+
    def __init__(self, optimizer: Optimizer, warmup_epochs: int, final_lr: float, last_epoch: int = -1):
        if warmup_epochs < 0:
            raise ValueError("The number of warmup epochs must be >= 0.")
@ -60,9 +61,10 @@ class SchedulerWithWarmUp(_LRScheduler):
    of the normal schedulers.
    """

-    def __init__(self, args: DeepLearningConfig, optimizer: Optimizer, last_epoch: int = -1):
+    def __init__(self, args: OptimizerParams, optimizer: Optimizer, num_epochs: int, last_epoch: int = -1):
        self.optimizer = optimizer
        self.last_epoch = last_epoch
+        self.num_epochs = num_epochs
        self.warmup_epochs = 0 if args.l_rate_warmup == LRWarmUpType.NoWarmUp else args.l_rate_warmup_epochs
        self._scheduler = self.get_scheduler(args)
        # This must be called after self.get_scheduler, because we want the optimizer to have the learning rate
@ -75,12 +77,12 @@ class SchedulerWithWarmUp(_LRScheduler):
        self.min_l_rate = args.min_l_rate
        super().__init__(optimizer, last_epoch)

-    def get_scheduler(self, args: DeepLearningConfig) -> _LRScheduler:
+    def get_scheduler(self, args: OptimizerParams) -> _LRScheduler:
        """
        Create the LR scheduler that will be used after warmup, based on the config params.
        """
        scheduler: _LRScheduler
-        epochs_after_warmup = args.num_epochs - self.warmup_epochs
+        epochs_after_warmup = self.num_epochs - self.warmup_epochs
        if args.l_rate_scheduler == LRSchedulerType.Exponential:
            scheduler = ExponentialLR(optimizer=self.optimizer,
                                      gamma=args.l_rate_exponential_gamma,
--- a/InnerEye/ML/utils/metrics_util.py
+++ b/InnerEye/ML/utils/metrics_util.py
@ -125,7 +125,7 @@ def get_number_of_voxels_per_class(labels: torch.Tensor) -> torch.Tensor:
    if len(labels.shape) == 4:
        labels = labels[None, ...]

-    return torch.tensor(np.count_nonzero(labels.cpu().numpy(), axis=(2, 3, 4)))
+    return torch.count_nonzero(labels, dim=(2, 3, 4))


 def get_label_overlap_stats(labels: np.ndarray, label_names: List[str]) -> Dict[str, int]:
--- a/InnerEye/ML/utils/model_util.py
+++ b/InnerEye/ML/utils/model_util.py
@ -19,7 +19,7 @@ from InnerEye.ML.config import ModelArchitectureConfig, PaddingMode, Segmentatio
    basic_size_shrinkage
 from InnerEye.ML.dataset.scalar_sample import ScalarItem
 from InnerEye.ML.dataset.sequence_sample import ClassificationItemSequence
-from InnerEye.ML.deep_learning_config import DeepLearningConfig, OptimizerType
+from InnerEye.ML.deep_learning_config import OptimizerParams, OptimizerType
 from InnerEye.ML.model_config_base import ModelConfigBase
 from InnerEye.ML.models.architectures.base_model import BaseSegmentationModel, CropSizeConstraints
 from InnerEye.ML.models.architectures.complex import ComplexModel
@ -38,7 +38,7 @@ from InnerEye.ML.utils.temperature_scaling import ModelWithTemperature
 from InnerEye.ML.visualizers.model_summary import ModelSummary


-def create_optimizer(config: DeepLearningConfig, parameters: Iterator[Parameter]) -> torch.optim.Optimizer:
+def create_optimizer(config: OptimizerParams, parameters: Iterator[Parameter]) -> torch.optim.Optimizer:
    # Select optimizer type
    if config.optimizer_type in [OptimizerType.Adam, OptimizerType.AMSGrad]:
        return torch.optim.Adam(parameters, config.l_rate,
--- a/InnerEye/ML/utils/run_recovery.py
+++ b/InnerEye/ML/utils/run_recovery.py
@ -15,7 +15,7 @@ from InnerEye.Azure.azure_util import RUN_CONTEXT, download_outputs_from_run, fe
 from InnerEye.Common.common_util import OTHER_RUNS_SUBDIR_NAME, check_properties_are_not_none
 from InnerEye.ML.common import BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX, \
    create_recovery_checkpoint_path, get_best_checkpoint_path
-from InnerEye.ML.deep_learning_config import CHECKPOINT_FOLDER, DeepLearningConfig
+from InnerEye.ML.deep_learning_config import CHECKPOINT_FOLDER, OutputParams


@dataclass(frozen=True)
@ -26,7 +26,7 @@ class RunRecovery:
    checkpoints_roots: List[Path]

    @staticmethod
-    def download_best_checkpoints_from_child_runs(config: DeepLearningConfig, run: Run) -> RunRecovery:
+    def download_best_checkpoints_from_child_runs(config: OutputParams, run: Run) -> RunRecovery:
        """
        Downloads the best checkpoints from all child runs of the provided Hyperdrive parent run.
        The checkpoints for the sibling runs will go into folder 'OTHER_RUNS/<cross_validation_split>'
@ -61,7 +61,7 @@ class RunRecovery:
        return RunRecovery(checkpoints_roots=child_runs_checkpoints_roots)

    @staticmethod
-    def download_all_checkpoints_from_run(config: DeepLearningConfig, run: Run) -> RunRecovery:
+    def download_all_checkpoints_from_run(config: OutputParams, run: Run) -> RunRecovery:
        """
        Downloads all checkpoints of the provided run: The best checkpoint and the recovery checkpoint.
        A single folder inside the checkpoints folder will be created that contains the downloaded checkpoints.
--- a/InnerEye/ML/utils/training_util.py
+++ b/InnerEye/ML/utils/training_util.py
@ -1,51 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-
-from dataclasses import dataclass, field
-from typing import Any, List
-
-from InnerEye.Common.type_annotations import DictStrFloat
-
-
-@dataclass(frozen=True)
-class ModelTrainingResults:
-    """
-    Stores the results from training, with the results on training and validation data for each training epoch.
-    """
-    train_results_per_epoch: List[DictStrFloat]
-    val_results_per_epoch: List[DictStrFloat]
-    train_diagnostics: Any
-    val_diagnostics: Any
-    optimal_temperature_scale_values_per_checkpoint_epoch: List[float] = field(default_factory=list)
-
-    def get_metric(self, is_training: bool, metric_type: str) -> List[float]:
-        """
-        Gets a scalar metric out of either the list of training or the list of validation results. This returns
-        that value that a specific metric attains in all of the epochs.
-        :param is_training: If True, read metrics from the `train_results_per_epoch` field, if False read from the
-        `val_results_per_epoch` field.
-        :param metric_type: The metric to extract.
-        :return: A list of floating point numbers, with one entry per entry in the the training or validation results.
-        """
-        metrics = self.train_results_per_epoch if is_training else self.val_results_per_epoch
-        return [m[metric_type] for m in metrics]
-
-    def get_training_metric(self, metric_type: str) -> List[float]:
-        """
-        Gets a scalar metric from the list of training results. This returns
-        the value that a specific metric attains in all of the epochs.
-        :param metric_type: The metric to extract.
-        :return: A list of floating point numbers, with one entry per entry in the the training results.
-        """
-        return self.get_metric(is_training=True, metric_type=metric_type)
-
-    def get_validation_metric(self, metric_type: str) -> List[float]:
-        """
-        Gets a scalar metric from the list of validation results. This returns
-        the value that a specific metric attains in all of the epochs.
-        :param metric_type: The metric to extract.
-        :return: A list of floating point numbers, with one entry per entry in the the validation results.
-        """
-        return self.get_metric(is_training=False, metric_type=metric_type)
--- a/InnerEye/ML/visualizers/patch_sampling.py
+++ b/InnerEye/ML/visualizers/patch_sampling.py
@ -16,8 +16,7 @@ from InnerEye.ML.dataset.cropping_dataset import CroppingDataset
 from InnerEye.ML.dataset.full_image_dataset import FullImageDataset
 from InnerEye.ML.dataset.sample import Sample
 from InnerEye.ML.plotting import resize_and_save, scan_with_transparent_overlay
-from InnerEye.ML.utils import augmentation, io_util, ml_util
-from InnerEye.ML.utils.config_util import ModelConfigLoader
+from InnerEye.ML.utils import augmentation, io_util
 # The name of the folder inside the default outputs folder that will holds plots that show the effect of
 # sampling random patches
 from InnerEye.ML.utils.image_util import get_unit_image_header
@ -113,7 +112,7 @@ def visualize_random_crops_for_dataset(config: SegmentationModelBase, output_fol
    for training. Visualizations are stored in both Nifti format, and as 3 PNG thumbnail files, in the output folder.
    :param config: The model configuration.
    :param output_folder: The folder in which the visualizations should be written. If not provided, use a subfolder
-    "patch_sampling" in the models's default output folder
+    "patch_sampling" in the model's default output folder
    """
    dataset_splits = config.get_dataset_splits()
    # Load a sample using the full image data loader
@ -123,24 +122,3 @@ def visualize_random_crops_for_dataset(config: SegmentationModelBase, output_fol
    for sample_index in range(count):
        sample = full_image_dataset.get_samples_at_index(index=sample_index)[0]
        visualize_random_crops(sample, config, output_folder=output_folder)
-
-
-def main(args: CheckPatchSamplingConfig) -> None:
-    # Identify paths to inputs and outputs
-    commandline_args = {
-        "train_batch_size": 1,
-        "local_dataset": Path(args.local_dataset)
-    }
-    output_folder = Path(args.output_folder)
-    output_folder.mkdir(parents=True, exist_ok=True)
-
-    # Create a config file
-    config = ModelConfigLoader[SegmentationModelBase]().create_model_config_from_name(
-        args.model_name, overrides=commandline_args)
-    config.show_patch_sampling = args.number_samples
-    ml_util.set_random_seed(config.random_seed)
-    visualize_random_crops_for_dataset(config, output_folder=output_folder)
-
-
-if __name__ == "__main__":
-    main(CheckPatchSamplingConfig.parse_args())
--- a/README.md
+++ b/README.md
@ -11,6 +11,7 @@ On the modelling side, this toolbox supports
 - Segmentation models
 - Classification and regression models
 - Sequence models
+- Adding cloud support to any PyTorch Lightning model, via a [bring-your-own-model setup](docs/bring_your_own_model.md) 

 Classification, regression, and sequence models can be built with only images as inputs, or a combination of images
 and non-imaging data as input. This supports typical use cases on medical data where measurements, biomarkers, 
--- a/Tests/AfterTraining/test_after_training.py
+++ b/Tests/AfterTraining/test_after_training.py
@ -41,7 +41,7 @@ from InnerEye.Scripts import submit_for_inference
 from Tests.ML.util import assert_nifti_content, get_default_azure_config, get_nifti_shape

 FALLBACK_ENSEMBLE_RUN = "refs_pull_432_merge:HD_3af84e4a-0043-4260-8be2-04ce9ab09b1f"
-FALLBACK_SINGLE_RUN = "refs_pull_407_merge_1614271518_cdbeb28e"
+FALLBACK_SINGLE_RUN = "refs_pull_407_merge:refs_pull_407_merge_1614271518_cdbeb28e"
 FALLBACK_2NODE_RUN = "refs_pull_385_merge:refs_pull_385_merge_1612421371_ba12a007"
 FALLBACK_CV_GLAUCOMA = "refs_pull_432_merge_1618332810_b5d10d74"

--- a/Tests/Common/test_build_config.py
+++ b/Tests/Common/test_build_config.py
@ -1,68 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-import pytest
-
-from InnerEye.Azure.azure_config import AzureConfig
-from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY
-from InnerEye.Common.build_config import BUILDINFORMATION_JSON, ExperimentResultLocation, \
-    build_information_to_dot_net_json, build_information_to_dot_net_json_file
-from InnerEye.Common.output_directories import OutputFolderForTests
-from InnerEye.ML.config import SegmentationModelBase
-from InnerEye.ML.scalar_config import ScalarModelBase
-
-
-def test_build_config(test_output_dirs: OutputFolderForTests) -> None:
-    """
-    Test that json with build information is created correctly.
-    """
-    config = AzureConfig(
-        build_number=42,
-        build_user="user",
-        build_branch="branch",
-        build_source_id="00deadbeef",
-        build_source_author="author",
-        tag="tag",
-        model="model")
-    result_location = ExperimentResultLocation(azure_job_name="job")
-    net_json = build_information_to_dot_net_json(config, result_location)
-    expected = '{"BuildNumber": 42, "BuildRequestedFor": "user", "BuildSourceBranchName": "branch", ' \
-               '"BuildSourceVersion": "00deadbeef", "BuildSourceAuthor": "author", "ModelName": "model", ' \
-               '"ResultsContainerName": null, "ResultsUri": null, "DatasetFolder": null, "DatasetFolderUri": null, ' \
-               '"AzureBatchJobName": "job"}'
-    assert expected == net_json
-    result_folder = test_output_dirs.root_dir / "buildinfo"
-    build_information_to_dot_net_json_file(config, result_location, folder=result_folder)
-    result_file = result_folder / BUILDINFORMATION_JSON
-    assert result_file.exists()
-    assert result_file.read_text() == expected
-
-
-def test_fields_are_set() -> None:
-    """
-    Tests that expected fields are set when creating config classes.
-    """
-    expected = [("hello", None), ("world", None)]
-    config = SegmentationModelBase(
-        should_validate=False,
-        ground_truth_ids=[x[0] for x in expected],
-        largest_connected_component_foreground_classes=expected
-    )
-    assert hasattr(config, CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY)
-    assert config.largest_connected_component_foreground_classes == expected
-
-
-@pytest.mark.cpu_and_gpu
-def test_dataset_reader_workers() -> None:
-    """
-    Test to make sure the number of dataset reader workers are set correctly
-    """
-    config = ScalarModelBase(
-        should_validate=False,
-        num_dataset_reader_workers=-1
-    )
-    if config.is_offline_run:
-        assert config.num_dataset_reader_workers == -1
-    else:
-        assert config.num_dataset_reader_workers == 0
--- a/Tests/Common/test_commandline_parsing.py
+++ b/Tests/Common/test_commandline_parsing.py
@ -2,6 +2,7 @@
 #  Copyright (c) Microsoft Corporation. All rights reserved.
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
+from pathlib import Path
 from unittest import mock

 import pytest
@ -12,62 +13,71 @@ from InnerEye.Common.fixed_paths import DEFAULT_AML_UPLOAD_DIR, DEFAULT_LOGS_DIR
 from InnerEye.Common.output_directories import OutputFolderForTests
 from InnerEye.ML.config import PhotometricNormalizationMethod, SegmentationModelBase
 from InnerEye.ML.runner import Runner
+from Tests.ML.configs.DummyModel import DummyModel


-@pytest.mark.parametrize("is_default_namespace", [True, False])
+@pytest.mark.parametrize("is_container", [True, False])
@pytest.mark.parametrize("is_offline_run", [True, False])
@pytest.mark.parametrize("set_output_to", [True, False])
-def test_create_ml_runner_args(is_default_namespace: bool,
+def test_create_ml_runner_args(is_container: bool,
                               test_output_dirs: OutputFolderForTests,
                               is_offline_run: bool,
                               set_output_to: bool) -> None:
    """Test round trip parsing of commandline arguments:
    From arguments to the Azure runner to the arguments of the ML runner, checking that
-    whatever is passed on can be correctly parsed."""
+    whatever is passed on can be correctly parsed. It also checks that the output files go into the right place
+    in local runs and in AzureML."""
    logging_to_stdout()
-    model_name = "Lung"
+    model_name = "DummyContainerWithPlainLightning" if is_container else "DummyModel"
+    if is_container:
+        dataset_folder = Path("download")
+    else:
+        local_dataset = DummyModel().local_dataset
+        assert local_dataset is not None
+        dataset_folder = local_dataset
    outputs_folder = test_output_dirs.root_dir
    project_root = fixed_paths.repository_root_directory()
-    if is_default_namespace:
-        model_configs_namespace = None
-    else:
-        model_configs_namespace = "Tests.ML.configs"
-        model_name = "DummyModel"
+    model_configs_namespace = "Tests.ML.configs"

    args_list = [f"--model={model_name}", "--train=True", "--l_rate=100.0",
-                 "--norm_method=Simple Norm", "--subscription_id", "Test1", "--tenant_id=Test2",
-                 "--application_id", "Test3", "--azureml_datastore", "Test5",
-                 "--pytest_mark", "gpu"]
+                 "--subscription_id", "Test1", "--tenant_id=Test2",
+                 "--application_id", "Test3", "--azureml_datastore", "Test5"]

    # toggle the output_to flag off only for online runs
    if set_output_to or is_offline_run:
        args_list.append(f"--output_to={outputs_folder}")
+    if not is_container:
+        args_list.append("--norm_method=Simple Norm")

-    if not is_default_namespace:
-        args_list.append(f"--model_configs_namespace={model_configs_namespace}")
+    args_list.append(f"--model_configs_namespace={model_configs_namespace}")

    with mock.patch("sys.argv", [""] + args_list):
        with mock.patch("InnerEye.ML.deep_learning_config.is_offline_run_context", return_value=is_offline_run):
-            runner = Runner(project_root=project_root, yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
-            runner.parse_and_load_model()
-            azure_config = runner.azure_config
-            model_config = runner.model_config
+            with mock.patch("InnerEye.ML.run_ml.MLRunner.run", return_value=None):
+                with mock.patch("InnerEye.ML.run_ml.MLRunner.mount_or_download_dataset", return_value=dataset_folder):
+                    runner = Runner(project_root=project_root, yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
+                    runner.parse_and_load_model()
+                    # Only when calling config.create_filesystem we expect to see the correct paths, and this happens
+                    # inside run_in_situ
+                    runner.run_in_situ()
+                    azure_config = runner.azure_config
+                    container_or_legacy_config = runner.lightning_container if is_container else runner.model_config
    assert azure_config.model == model_name
-    assert model_config.l_rate == 100.0
-    assert model_config.norm_method == PhotometricNormalizationMethod.SimpleNorm
+    if not is_container:
+        assert container_or_legacy_config.norm_method == PhotometricNormalizationMethod.SimpleNorm
    if set_output_to or is_offline_run:
        # The actual output folder must be a subfolder of the folder given on the commandline. The folder will contain
        # a timestamp, that will start with the year number, hence will start with 20...
-        assert str(model_config.outputs_folder).startswith(str(outputs_folder / "20"))
-        assert model_config.logs_folder == (model_config.outputs_folder / DEFAULT_LOGS_DIR_NAME)
+        assert str(container_or_legacy_config.outputs_folder).startswith(str(outputs_folder / "20"))
+        assert container_or_legacy_config.logs_folder == \
+               (container_or_legacy_config.outputs_folder / DEFAULT_LOGS_DIR_NAME)
    else:
        # For runs inside AzureML, the output folder is the project root (the root of the folders that are
        # included in the snapshot). The "outputs_to" argument will be ignored.
-        assert model_config.outputs_folder == (project_root / DEFAULT_AML_UPLOAD_DIR)
-        assert model_config.logs_folder == (project_root / DEFAULT_LOGS_DIR_NAME)
+        assert container_or_legacy_config.outputs_folder == (project_root / DEFAULT_AML_UPLOAD_DIR)
+        assert container_or_legacy_config.logs_folder == (project_root / DEFAULT_LOGS_DIR_NAME)

-    assert not hasattr(model_config, "azureml_datastore")
-    assert azure_config.pytest_mark == "gpu"
+    assert not hasattr(container_or_legacy_config, "azureml_datastore")


 def test_overridable_properties() -> None:
@ -146,6 +156,7 @@ def test_parsing_with_custom_yaml(test_output_dirs: OutputFolderForTests) -> Non
                        yaml_config_file=yaml_file)
        loader_result = runner.parse_and_load_model()
    assert runner.azure_config is not None
+    assert runner.model_config is not None
    # This is only present in yaml
    # This is present in yaml and command line, and the latter should be used.
    assert runner.azure_config.tenant_id == "bar"
--- a/Tests/Common/test_generic_parsing.py
+++ b/Tests/Common/test_generic_parsing.py
@ -8,7 +8,7 @@ from typing import Any, List, Optional, Tuple
 import param
 import pytest

-from InnerEye.Common.generic_parsing import GenericConfig, IntTuple
+from InnerEye.Common.generic_parsing import GenericConfig, IntTuple, create_from_matching_params


 class ParamEnum(Enum):
@ -57,6 +57,7 @@ def test_create_parser() -> None:
    """
    Check that parse_args works as expected, with both non default and default values.
    """
+
    def check(arg: List[str], expected_key: str, expected_value: Any) -> None:
        parsed = ParamClass.parse_args(arg)
        assert getattr(parsed, expected_key) == expected_value
@ -127,3 +128,44 @@ def test_int_tuple_validation(value_idx_0: Any, value_idx_1: Any, value_idx_2: A
            m.int_tuple = (value_idx_0, value_idx_1, value_idx_2)
    else:
        m.int_tuple = (value_idx_0, value_idx_1, value_idx_2)
+
+
+class ClassFrom(param.Parameterized):
+    foo = param.String("foo")
+    bar = param.Integer(1)
+    baz = param.String("baz")
+    _private = param.String("private")
+    constant = param.String("constant", constant=True)
+
+
+class ClassTo(param.Parameterized):
+    foo = param.String("foo2")
+    bar = param.Integer(2)
+    _private = param.String("private2")
+    constant = param.String("constant2", constant=True)
+
+
+class NotParameterized:
+    foo = 1
+
+
+def test_create_from_matching_params() -> None:
+    """
+    Test if Parameterized objects can be cloned by looking at matching fields.
+    """
+    class_from = ClassFrom()
+    class_to = create_from_matching_params(class_from, cls_=ClassTo)
+    assert isinstance(class_to, ClassTo)
+    assert class_to.foo == "foo"
+    assert class_to.bar == 1
+    # Constant fields should not be touched
+    assert class_to.constant == "constant2"
+    # Private fields must be copied over.
+    assert class_to._private == "private"
+    # Baz is only present in the "from" object, and should not be copied to the new object
+    assert not hasattr(class_to, "baz")
+
+    with pytest.raises(ValueError) as ex:
+        create_from_matching_params(class_from, NotParameterized)
+    assert "subclass of param.Parameterized" in str(ex)
+    assert "NotParameterized" in str(ex)
--- a/Tests/Common/test_util.py
+++ b/Tests/Common/test_util.py
@ -2,11 +2,14 @@
 #  Copyright (c) Microsoft Corporation. All rights reserved.
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
+import os
+from pathlib import Path

 import pytest

 from InnerEye.Common import common_util
-from InnerEye.Common.common_util import check_is_any_of, is_private_field_name, namespace_to_path, \
+from InnerEye.Common.common_util import change_working_directory, check_is_any_of, is_private_field_name, \
+    namespace_to_path, \
    path_to_namespace, print_exception
 from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path, tests_root_directory
 from InnerEye.Common.output_directories import OutputFolderForTests
@ -106,3 +109,18 @@ def test_path_to_namespace(is_external: bool, test_output_dirs: OutputFolderForT
            path=full_ml_test_data_path(),
            root=tests_root_directory().parent
        ) == test_data.__name__
+
+
+def test_change_dir(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test the context manager for changing directories.
+    """
+    os.chdir(test_output_dirs.root_dir)
+    assert Path.cwd() == test_output_dirs.root_dir
+    new_dir = test_output_dirs.root_dir / "foo"
+    new_dir.mkdir()
+    with change_working_directory(new_dir):
+        assert Path.cwd() == new_dir
+        Path("bar.txt").touch()
+    assert Path.cwd() == test_output_dirs.root_dir
+    assert (new_dir / "bar.txt").is_file()
--- a/Tests/ML/configs/fastmri_random.py
+++ b/Tests/ML/configs/fastmri_random.py
@ -0,0 +1,78 @@
+#  ------------------------------------------------------------------------------------------
+#  Copyright (c) Microsoft Corporation. All rights reserved.
+#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+#  ------------------------------------------------------------------------------------------
+
+# Suppress all errors here because the imports after code cause loads of warnings. We can't specifically suppress
+# individual warnings only.
+# flake8: noqa
+import shutil
+from pathlib import Path
+from typing import Any, Optional
+
+from _pytest.monkeypatch import MonkeyPatch
+from pytorch_lightning import LightningDataModule, LightningModule
+
+from InnerEye.Common.common_util import add_folder_to_sys_path_if_needed
+from InnerEye.ML.configs.other.fastmri_varnet import VarNetWithImageLogging
+from InnerEye.ML.lightning_container import LightningContainer
+
+add_folder_to_sys_path_if_needed("fastMRI")
+
+from fastmri.data import SliceDataset
+from fastmri.data.subsample import create_mask_for_mask_type
+from fastmri.data.transforms import VarNetDataTransform
+from fastmri.pl_modules import FastMriDataModule
+# This import can fail if written as "from tests.create_temp_data, even though fastMRI is already in the path.
+from fastMRI.tests.create_temp_data import create_temp_data
+
+
+class FastMriRandomData(FastMriDataModule):
+    def __init__(self) -> None:
+        data_path = Path.cwd() / "data"
+        if data_path.is_dir():
+            shutil.rmtree(str(data_path))
+        data_path.mkdir(exist_ok=False, parents=True)
+        _, _, metadata = create_temp_data(data_path)
+
+        def retrieve_metadata_mock(a: Any, fname: Any) -> Any:
+            return metadata[str(fname)]
+
+        # That's a bit flaky, we should be un-doing that after, but there's no obvious place of doing so.
+        MonkeyPatch().setattr(SliceDataset, "_retrieve_metadata", retrieve_metadata_mock)
+
+        mask = create_mask_for_mask_type(mask_type_str="equispaced",
+                                         center_fractions=[0.08],
+                                         accelerations=[4])
+        # use random masks for train transform, fixed masks for val transform
+        train_transform = VarNetDataTransform(mask_func=mask, use_seed=False)
+        val_transform = VarNetDataTransform(mask_func=mask)
+        test_transform = VarNetDataTransform()
+
+        FastMriDataModule.__init__(self,
+                                   data_path=data_path / "knee_data",
+                                   challenge="multicoil",
+                                   train_transform=train_transform,
+                                   val_transform=val_transform,
+                                   test_transform=test_transform)
+
+    def prepare_data(self, *args: Any, **kwargs: Any) -> None:
+        print("FastMriRandomData.prepare_data")
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        print("FastMriRandomData.setup")
+
+
+class FastMriOnRandomData(LightningContainer):
+    def __init__(self) -> None:
+        super().__init__()
+        self.num_epochs = 1
+        # Restrict to a single GPU, because we have code in dataset creation that could cause race conditions
+        self.max_num_gpus = 1
+
+    def create_model(self) -> LightningModule:
+        return VarNetWithImageLogging()
+
+    def get_data_module(self) -> LightningDataModule:
+        # Local_dataset is set via the commandline to a random folder for unit testss
+        return FastMriRandomData()
--- a/Tests/ML/configs/lightning_test_containers.py
+++ b/Tests/ML/configs/lightning_test_containers.py
@ -0,0 +1,230 @@
+#  ------------------------------------------------------------------------------------------
+#  Copyright (c) Microsoft Corporation. All rights reserved.
+#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+#  ------------------------------------------------------------------------------------------
+from pathlib import Path
+from typing import Any, Dict, List, Tuple
+
+import pandas as pd
+import param
+import torch
+from pytorch_lightning import LightningDataModule, LightningModule
+from pytorch_lightning.metrics import MeanSquaredError
+from torch import Tensor
+from torch.nn import Identity
+from torch.utils.data import DataLoader, Dataset
+
+from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path
+from InnerEye.ML.common import ModelExecutionMode
+from InnerEye.ML.lightning_container import InnerEyeInference, LightningContainer, LightningModuleWithOptimizer
+
+
+class DummyContainerWithDatasets(LightningContainer):
+    def __init__(self, has_local_dataset: bool = False, has_azure_dataset: bool = False):
+        super().__init__()
+        self.local_dataset = full_ml_test_data_path("lightning_module_data") if has_local_dataset else None
+        self.azure_dataset_id = "azure_dataset" if has_azure_dataset else ""
+
+    def create_model(self) -> LightningModule:
+        return LightningModuleWithOptimizer()
+
+
+class DummyContainerWithAzureDataset(DummyContainerWithDatasets):
+    def __init__(self) -> None:
+        super().__init__(has_azure_dataset=True)
+
+
+class DummyContainerWithoutDataset(DummyContainerWithDatasets):
+    pass
+
+
+class DummyContainerWithLocalDataset(DummyContainerWithDatasets):
+    def __init__(self) -> None:
+        super().__init__(has_local_dataset=True)
+
+
+class DummyContainerWithAzureAndLocalDataset(DummyContainerWithDatasets):
+    def __init__(self) -> None:
+        super().__init__(has_local_dataset=True, has_azure_dataset=True)
+
+
+class InferenceWithParameters(LightningModule):
+    model_param = param.String(default="bar")
+
+    def __init__(self, container_param: str):
+        super().__init__()
+
+
+class DummyContainerWithParameters(LightningContainer):
+    container_param = param.String(default="foo")
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def create_model(self) -> LightningModule:
+        return InferenceWithParameters(self.container_param)
+
+
+class DummyRegressionPlainLightning(LightningModuleWithOptimizer):
+    """
+    A class that only implements plain Lightning training and test. Ideally, we want to support importing any plain
+    Lightning module without further methods added. This class here inherits LightningWithInference, but does not
+    implement the inference_step method
+    """
+
+    def __init__(self, in_features: int = 1, *args: Any, **kwargs: Any):
+        super().__init__(*args, **kwargs)
+        self.l_rate = 1e-1
+        activation = Identity()
+        layers = [
+            torch.nn.Linear(in_features=in_features, out_features=1, bias=True),
+            activation
+        ]
+        self.model = torch.nn.Sequential(*layers)  # type: ignore
+
+    def forward(self, x: Tensor) -> Tensor:  # type: ignore
+        return self.model(x)
+
+    def training_step(self, batch: Any, *args: Any, **kwargs: Any) -> torch.Tensor:  # type: ignore
+        input, target = batch
+        prediction = self.forward(input)
+        loss = torch.nn.functional.mse_loss(prediction, target)
+        self.log("loss", loss, on_epoch=True, on_step=True)
+        return loss
+
+    def test_step(self, batch, batch_idx) -> torch.Tensor:  # type: ignore
+        Path("test_step.txt").touch()
+        input, target = batch
+        prediction = self.forward(input)
+        loss = torch.nn.functional.mse_loss(prediction, target)
+        self.log("test_loss", loss, on_epoch=True, on_step=True)
+        return loss
+
+    def on_test_epoch_end(self) -> None:
+        Path("on_test_epoch_end.txt").touch()
+        pass
+
+
+class DummyRegression(DummyRegressionPlainLightning, InnerEyeInference):
+    def __init__(self, in_features: int = 1, *args, **kwargs) -> None:  # type: ignore
+        super().__init__(in_features=in_features, *args, **kwargs)  # type: ignore
+        self.l_rate = 1e-1
+        self.dataset_split = ModelExecutionMode.TRAIN
+        activation = Identity()
+        layers = [
+            torch.nn.Linear(in_features=in_features, out_features=1, bias=True),
+            activation
+        ]
+        self.model = torch.nn.Sequential(*layers)  # type: ignore
+
+    def forward(self, x: Tensor) -> Tensor:  # type: ignore
+        return self.model(x)
+
+    def training_step(self, batch, *args, **kwargs) -> torch.Tensor:  # type: ignore
+        input, target = batch
+        prediction = self.forward(input)
+        loss = torch.nn.functional.mse_loss(prediction, target)
+        self.log("loss", loss, on_epoch=True, on_step=True)
+        return loss
+
+    def on_inference_start(self) -> None:
+        Path("on_inference_start.txt").touch()
+        self.inference_mse: Dict[ModelExecutionMode, float] = {}
+
+    def on_inference_epoch_start(self, dataset_split: ModelExecutionMode, is_ensemble_model: bool) -> None:
+        self.dataset_split = dataset_split
+        Path(f"on_inference_start_{self.dataset_split.value}.txt").touch()
+        self.mse = MeanSquaredError()
+
+    def inference_step(self, item: Tuple[Tensor, Tensor], batch_idx: int, model_output: torch.Tensor) -> None:
+        input, target = item
+        prediction = self.forward(input)
+        self.mse(prediction, target)
+        with Path(f"inference_step_{self.dataset_split.value}.txt").open(mode="a") as f:
+            f.write(f"{prediction.item()},{target.item()}\n")
+
+    def on_inference_epoch_end(self) -> None:
+        Path(f"on_inference_end_{self.dataset_split.value}.txt").touch()
+        self.inference_mse[self.dataset_split] = self.mse.compute().item()
+        self.mse.reset()
+
+    def on_inference_end(self) -> None:
+        Path("on_inference_end.txt").touch()
+        df = pd.DataFrame(columns=["Split", "MSE"],
+                          data=[[split.value, mse] for split, mse in self.inference_mse.items()])
+        df.to_csv("metrics_per_split.csv", index=False)
+
+
+class FixedDataset(Dataset):
+    def __init__(self, inputs_and_targets: List[Tuple[Any, Any]]):
+        super().__init__()
+        self.inputs_and_targets = inputs_and_targets
+
+    def __len__(self) -> int:
+        return len(self.inputs_and_targets)
+
+    def __getitem__(self, item: int) -> Tuple[Tensor, Tensor]:
+        input = torch.tensor([float(self.inputs_and_targets[item][0])])
+        target = torch.tensor([float(self.inputs_and_targets[item][1])])
+        return input, target
+
+
+class FixedRegressionData(LightningDataModule):
+    def __init__(self) -> None:
+        super().__init__()
+        self.train_data = [(i, i) for i in range(1, 20, 3)]
+        self.val_data = [(i, i) for i in range(2, 20, 3)]
+        self.test_data = [(i, i) for i in range(3, 20, 3)]
+
+    def train_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
+        return DataLoader(FixedDataset(self.train_data))  # type: ignore
+
+    def val_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
+        return DataLoader(FixedDataset(self.val_data))  # type: ignore
+
+    def test_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
+        return DataLoader(FixedDataset(self.test_data))  # type: ignore
+
+
+class DummyContainerWithModel(LightningContainer):
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.perform_training_set_inference = True
+        self.num_epochs = 50
+        self.l_rate = 1e-1
+
+    def setup(self) -> None:
+        assert self.local_dataset is not None
+        (self.local_dataset / "setup.txt").touch()
+
+    def create_model(self) -> LightningModule:
+        return DummyRegression()
+
+    def get_data_module(self) -> LightningDataModule:
+        return FixedRegressionData()  # type: ignore
+
+    def create_report(self) -> None:
+        Path("create_report.txt").touch()
+
+
+class DummyContainerWithInvalidTrainerArguments(LightningContainer):
+
+    def create_model(self) -> LightningModule:
+        return DummyRegression()
+
+    def get_trainer_arguments(self) -> Dict[str, Any]:
+        return {"no_such_argument": 1}
+
+
+class DummyContainerWithPlainLightning(LightningContainer):
+    def __init__(self) -> None:
+        super().__init__()
+        self.num_epochs = 100
+        self.l_rate = 1e-2
+
+    def create_model(self) -> LightningModule:
+        return DummyRegressionPlainLightning()
+
+    def get_data_module(self) -> LightningDataModule:
+        return FixedRegressionData()  # type: ignore
--- a/Tests/ML/models/architectures/sequential/test_rnn_classifier.py
+++ b/Tests/ML/models/architectures/sequential/test_rnn_classifier.py
@ -21,7 +21,6 @@ from InnerEye.ML.deep_learning_config import TemperatureScalingConfig
 from InnerEye.ML.lightning_models import transfer_batch_to_device
 from InnerEye.ML.model_config_base import ModelTransformsPerExecutionMode
 from InnerEye.ML.model_testing import create_metrics_dict_for_scalar_models
-from InnerEye.ML.model_training import model_train
 from InnerEye.ML.models.architectures.classification.image_encoder_with_mlp import ImageEncoder, ImagingFeatureType
 from InnerEye.ML.models.architectures.sequential.rnn_classifier import RNNClassifier, RNNClassifierWithEncoder
 from InnerEye.ML.run_ml import MLRunner
@ -34,7 +33,7 @@ from InnerEye.ML.utils.io_util import ImageAndSegmentations
 from InnerEye.ML.utils.model_util import create_model_with_temperature_scaling, get_scalar_model_inputs_and_labels
 from InnerEye.ML.utils.split_dataset import DatasetSplits
 from InnerEye.ML.visualizers.grad_cam_hooks import VisualizationMaps
-from Tests.ML.util import get_default_azure_config, get_default_checkpoint_handler
+from Tests.ML.util import get_default_azure_config, model_train_unittest

 SCAN_SIZE = (6, 64, 60)

@ -213,8 +212,7 @@ def test_rnn_classifier_via_config_1(use_combined_model: bool,
    image_and_seg = ImageAndSegmentations[np.ndarray](images=np.random.uniform(0, 1, SCAN_SIZE),
                                                      segmentations=np.random.randint(0, 2, SCAN_SIZE))
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg):
-        model_train(config, get_default_checkpoint_handler(model_config=config,
-                                                           project_root=test_output_dirs.root_dir))
+        model_train_unittest(config, dirs=test_output_dirs)


@pytest.mark.skipif(common_util.is_windows(), reason="Has issues on windows build")
@ -247,7 +245,7 @@ def test_run_ml_with_sequence_model(use_combined_model: bool,
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg):
        azure_config = get_default_azure_config()
        azure_config.train = True
-        MLRunner(config, azure_config).run()
+        MLRunner(config, azure_config=azure_config).run()


@pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows")
@ -379,8 +377,7 @@ def test_rnn_classifier_via_config_2(test_output_dirs: OutputFolderForTests) ->
    config.num_epochs = 2
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset(dataset_contents)
-    results = model_train(config, get_default_checkpoint_handler(model_config=config,
-                                                                 project_root=test_output_dirs.root_dir))
+    results, _ = model_train_unittest(config, dirs=test_output_dirs)

    actual_train_loss = results.get_metric(is_training=True, metric_type=MetricType.LOSS.value)[-1]
    actual_val_loss = results.get_metric(is_training=False, metric_type=MetricType.LOSS.value)[-1]
@ -455,7 +452,7 @@ def test_run_ml_with_multi_label_sequence_model(test_output_dirs: OutputFolderFo
    config.max_batch_grad_cam = 1
    azure_config = get_default_azure_config()
    azure_config.train = True
-    MLRunner(config, azure_config).run()
+    MLRunner(config, azure_config=azure_config).run()
    # The metrics file should have one entry per epoch per subject per prediction target,
    # for all the 3 prediction targets.
    metrics_file = config.outputs_folder / "Train" / SUBJECT_METRICS_FILE_NAME
--- a/Tests/ML/models/architectures/test_image_encoder_with_mlp.py
+++ b/Tests/ML/models/architectures/test_image_encoder_with_mlp.py
@ -20,7 +20,6 @@ from InnerEye.Common.type_annotations import TupleInt3
 from InnerEye.ML.dataset.scalar_dataset import ScalarDataset
 from InnerEye.ML.lightning_models import transfer_batch_to_device
 from InnerEye.ML.model_config_base import ModelTransformsPerExecutionMode
-from InnerEye.ML.model_training import model_train
 from InnerEye.ML.models.architectures.classification.image_encoder_with_mlp import ImageEncoderWithMlp, \
    ImagingFeatureType
 from InnerEye.ML.run_ml import MLRunner
@ -34,7 +33,7 @@ from InnerEye.ML.utils.model_util import create_model_with_temperature_scaling,
 from InnerEye.ML.utils.split_dataset import DatasetSplits
 from InnerEye.ML.visualizers.grad_cam_hooks import VisualizationMaps
 from InnerEye.ML.visualizers.model_summary import ModelSummary
-from Tests.ML.util import get_default_azure_config, get_default_checkpoint_handler
+from Tests.ML.util import get_default_azure_config, model_train_unittest


 class ImageEncoder(ScalarModelBase):
@ -222,8 +221,7 @@ S3,week1,scan3.npy,True,6,60,Male,Val2
    summarizer.generate_summary(input_sizes=input_size)
    config.local_dataset = dataset_folder
    config.validate()
-    model_train(config, checkpoint_handler=get_default_checkpoint_handler(model_config=config,
-                                                                          project_root=Path(test_output_dirs.root_dir)))
+    model_train_unittest(config, dirs=test_output_dirs)
    # No further asserts here because the models are still in experimental state. Most errors would come
    # from having invalid model architectures, which would throw runtime errors during training.

@ -231,13 +229,13 @@ S3,week1,scan3.npy,True,6,60,Male,Val2
@pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows")
@pytest.mark.gpu
@pytest.mark.parametrize(["encode_channels_jointly", "aggregation_type", "imaging_feature_type"],
-    [(False, AggregationType.Average, ImagingFeatureType.Segmentation),
-     (True, AggregationType.Average, ImagingFeatureType.Segmentation),
-     (False, AggregationType.Average, ImagingFeatureType.ImageAndSegmentation),
-     (True, AggregationType.Average, ImagingFeatureType.ImageAndSegmentation),
-     (True, AggregationType.GatedPooling, ImagingFeatureType.ImageAndSegmentation),
-     (True, AggregationType.MixPooling, ImagingFeatureType.ImageAndSegmentation),
-     (True, AggregationType.ZAdaptive3dAvg, ImagingFeatureType.ImageAndSegmentation)])
+                         [(False, AggregationType.Average, ImagingFeatureType.Segmentation),
+                          (True, AggregationType.Average, ImagingFeatureType.Segmentation),
+                          (False, AggregationType.Average, ImagingFeatureType.ImageAndSegmentation),
+                          (True, AggregationType.Average, ImagingFeatureType.ImageAndSegmentation),
+                          (True, AggregationType.GatedPooling, ImagingFeatureType.ImageAndSegmentation),
+                          (True, AggregationType.MixPooling, ImagingFeatureType.ImageAndSegmentation),
+                          (True, AggregationType.ZAdaptive3dAvg, ImagingFeatureType.ImageAndSegmentation)])
 def test_image_encoder_with_segmentation(test_output_dirs: OutputFolderForTests,
                                         encode_channels_jointly: bool,
                                         aggregation_type: AggregationType,
@ -274,7 +272,7 @@ def test_image_encoder_with_segmentation(test_output_dirs: OutputFolderForTests,
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg):
        azure_config = get_default_azure_config()
        azure_config.train = True
-        MLRunner(config, azure_config).run()
+        MLRunner(config, azure_config=azure_config).run()
        # No further asserts here because the models are still in experimental state. Most errors would come
        # from having invalid model architectures, which would throw runtime errors during training.
        # Verified manually that the cross entropy on the Val set that appears during training, and the
--- a/Tests/ML/models/architectures/test_non_image_encoder_with_mlp.py
+++ b/Tests/ML/models/architectures/test_non_image_encoder_with_mlp.py
@ -11,13 +11,11 @@ import pytest
 from InnerEye.Common import common_util
 from InnerEye.Common.output_directories import OutputFolderForTests
 from InnerEye.ML.common import DATASET_CSV_FILE_NAME
-from InnerEye.ML.model_training import model_train
 from InnerEye.ML.models.architectures.classification.image_encoder_with_mlp import create_mlp
 from InnerEye.ML.run_ml import MLRunner
 from InnerEye.ML.scalar_config import ScalarLoss, ScalarModelBase
 from InnerEye.ML.utils.split_dataset import DatasetSplits
-
-from Tests.ML.util import get_default_checkpoint_handler
+from Tests.ML.util import model_train_unittest


 class NonImageEncoder(ScalarModelBase):
@ -73,11 +71,11 @@ def test_non_image_encoder(test_output_dirs: OutputFolderForTests,
    config.max_batch_grad_cam = 1
    config.validate()
    # run model training
-    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
-                                                        project_root=Path(test_output_dirs.root_dir))
-    model_train(config, checkpoint_handler=checkpoint_handler)
+    _, checkpoint_handler = model_train_unittest(config, dirs=test_output_dirs)
    # run model inference
-    MLRunner(config).model_inference_train_and_test(checkpoint_handler=checkpoint_handler)
+    runner = MLRunner(config)
+    runner.setup()
+    runner.model_inference_train_and_test(checkpoint_handler=checkpoint_handler)
    assert config.get_total_number_of_non_imaging_features() == 18


--- a/Tests/ML/models/test_2d_scalar_model.py
+++ b/Tests/ML/models/test_2d_scalar_model.py
@ -4,18 +4,17 @@
 #  ------------------------------------------------------------------------------------------

 import logging
-from pathlib import Path

 import pytest

 from InnerEye.Common.common_util import logging_to_stdout
 from InnerEye.Common.metrics_constants import MetricType
 from InnerEye.Common.output_directories import OutputFolderForTests
-from InnerEye.ML import model_testing, model_training
+from InnerEye.ML import model_testing
 from InnerEye.ML.common import ModelExecutionMode
 from InnerEye.ML.metrics import InferenceMetricsForClassification
 from Tests.ML.configs.ClassificationModelForTesting2D import ClassificationModelForTesting2D
-from Tests.ML.util import get_default_checkpoint_handler
+from Tests.ML.util import model_train_unittest


@pytest.mark.parametrize("use_mixed_precision", [False])
@ -31,10 +30,7 @@ def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests,
    # Train for 4 epochs, checkpoints at epochs 2 and 4
    config.num_epochs = 4
    config.use_mixed_precision = use_mixed_precision
-
-    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
-                                                        project_root=Path(test_output_dirs.root_dir))
-    model_training_result = model_training.model_train(config, checkpoint_handler=checkpoint_handler)
+    model_training_result, checkpoint_handler = model_train_unittest(config, dirs=test_output_dirs)
    assert model_training_result is not None
    expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05]

--- a/Tests/ML/models/test_instantiate_models.py
+++ b/Tests/ML/models/test_instantiate_models.py
@ -4,16 +4,20 @@
 #  ------------------------------------------------------------------------------------------
 import logging
 from typing import List
+from unittest import mock

 import pytest
+from azureml.core import Run

 from InnerEye.Common.common_util import logging_to_stdout, namespace_to_path
-from InnerEye.ML.config import SegmentationModelBase
-from InnerEye.ML.model_training import generate_and_print_model_summary
-from InnerEye.ML.utils.config_util import ModelConfigLoader
-from InnerEye.ML.utils.model_util import create_model_with_temperature_scaling
+from InnerEye.Common.output_directories import OutputFolderForTests
+from InnerEye.ML.lightning_container import LightningContainer
+from InnerEye.ML.utils.config_loader import ModelConfigLoader
+from InnerEye.ML.utils.model_util import create_model_with_temperature_scaling, generate_and_print_model_summary
 from Tests.ML.configs.DummyModel import DummyModel
-from Tests.ML.util import get_model_loader
+from Tests.ML.configs.lightning_test_containers import DummyContainerWithInvalidTrainerArguments, \
+    DummyContainerWithParameters
+from Tests.ML.util import default_runner, get_model_loader, model_loader_including_tests, model_train_unittest


 def find_models() -> List[str]:
@ -48,7 +52,7 @@ def test_load_all_configs(model_name: str) -> None:
    """
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
-    config = ModelConfigLoader[SegmentationModelBase]().create_model_config_from_name(model_name)
+    config = ModelConfigLoader().create_model_config_from_name(model_name)
    assert config.model_name == model_name, "Mismatch between definition .py file and model name"
    if config.is_segmentation_model:
        # Reduce the feature channels to a minimum, to make tests run fast on CPU.
@ -98,12 +102,90 @@ def test_config_loader_as_in_registration() -> None:
    During model registration, the model config namespace is read out from the present model. Ensure that we
    can create a config loader that has that value as an input.
    """
-    loader1 = ModelConfigLoader[SegmentationModelBase]()
+    loader1 = ModelConfigLoader()
    model_name = "BasicModel2Epochs"
    model = loader1.create_model_config_from_name(model_name)
    assert model is not None
    namespace = model.__module__
-    loader2 = ModelConfigLoader[SegmentationModelBase](model_configs_namespace=namespace)
+    loader2 = ModelConfigLoader(model_configs_namespace=namespace)
    assert len(loader2.module_search_specs) == 2
    model2 = loader2.create_model_config_from_name(model_name)
    assert model2 is not None
+
+
+def test_config_loader_on_lightning_container() -> None:
+    """
+    Test if the config loader can load an model that is neither classification nor segmentation.
+    """
+    # First test if the container can be instantiated at all (it is tricky to get that right when inheritance change)
+    DummyContainerWithParameters()
+    logging_to_stdout(log_level=logging.DEBUG)
+    model = model_loader_including_tests.create_model_config_from_name("DummyContainerWithParameters")
+    assert model is not None
+
+
+@pytest.mark.parametrize("container_name", ["DummyContainerWithAzureDataset",
+                                            "DummyContainerWithoutDataset",
+                                            "DummyContainerWithLocalDataset",
+                                            "DummyContainerWithAzureAndLocalDataset"])
+def test_submit_container_to_azureml(container_name: str) -> None:
+    """
+    Test if we can get the config loader to load a Lightning container model, and get it through the AzureML
+    submission process.
+    """
+    runner = default_runner()
+    mock_run = Run.get_context()
+    args = ["", f"--model={container_name}", "--azureml=True", "--model_configs_namespace=Tests.ML.configs"]
+    with mock.patch("sys.argv", args):
+        with mock.patch("InnerEye.Azure.azure_runner.get_dataset_consumption", return_value=None):
+            with mock.patch("azureml.core.Experiment.submit", return_value=mock_run):
+                loaded_config, actual_run = runner.run()
+    assert actual_run == mock_run
+    assert loaded_config is None
+    assert isinstance(runner.lightning_container, LightningContainer)
+
+
+def test_load_container_with_arguments() -> None:
+    """
+    Test if we can load a container and override a value in it via the commandline. Parameters can only be set at
+    container level, not at model level.
+    """
+    DummyContainerWithParameters()
+    runner = default_runner()
+    args = ["", "--model=DummyContainerWithParameters", "--container_param=param1",
+            "--model_configs_namespace=Tests.ML.configs"]
+    with mock.patch("sys.argv", args):
+        runner.parse_and_load_model()
+    assert isinstance(runner.lightning_container, DummyContainerWithParameters)
+    assert runner.lightning_container.container_param == "param1"
+    # Overriding model parameters should not work
+    args = ["", "--model=DummyContainerWithParameters", "--model_param=param2",
+            "--model_configs_namespace=Tests.ML.configs"]
+    with pytest.raises(ValueError) as ex:
+        with mock.patch("sys.argv", args):
+            runner.parse_and_load_model()
+    assert "model_param" in str(ex)
+
+
+def test_load_invalid_container() -> None:
+    """
+    Test if we loading a container fails if one of the parameters is not valid.
+    """
+    DummyContainerWithParameters()
+    runner = default_runner()
+    args = ["", "--model=DummyContainerWithParameters", "--number_of_cross_validation_splits=1",
+            "--model_configs_namespace=Tests.ML.configs"]
+    with pytest.raises(ValueError) as ex:
+        with mock.patch("sys.argv", args):
+            runner.parse_and_load_model()
+    assert "At least two splits required to perform cross validation, but got 1" in str(ex)
+
+
+def test_run_model_with_invalid_trainer_arguments(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test if the trainer_arguments in a LightningContainer are passed to the trainer.
+    """
+    container = DummyContainerWithInvalidTrainerArguments()
+    with pytest.raises(Exception) as ex:
+        model_train_unittest(config=None, dirs=test_output_dirs, lightning_container=container)
+    assert "no_such_argument" in str(ex)
--- a/Tests/ML/models/test_scalar_model.py
+++ b/Tests/ML/models/test_scalar_model.py
@ -16,10 +16,11 @@ import torch
 from InnerEye.Common import common_util, fixed_paths
 from InnerEye.Common.common_util import BEST_EPOCH_FOLDER_NAME, CROSSVAL_RESULTS_FOLDER, EPOCH_METRICS_FILE_NAME, \
    METRICS_AGGREGATES_FILE, SUBJECT_METRICS_FILE_NAME, get_best_epoch_results_path, logging_to_stdout
+from InnerEye.Common.fixed_paths import LOG_FILE_NAME
 from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path
 from InnerEye.Common.metrics_constants import LoggingColumns, MetricType
 from InnerEye.Common.output_directories import OutputFolderForTests
-from InnerEye.ML import model_testing, model_training, runner
+from InnerEye.ML import model_testing, runner
 from InnerEye.ML.common import ModelExecutionMode
 from InnerEye.ML.configs.classification.DummyMulticlassClassification import DummyMulticlassClassification
 from InnerEye.ML.dataset.scalar_dataset import ScalarDataset
@ -30,12 +31,13 @@ from InnerEye.ML.reports.notebook_report import generate_classification_multilab
    generate_classification_notebook, get_html_report_name, get_ipynb_report_name
 from InnerEye.ML.run_ml import MLRunner
 from InnerEye.ML.scalar_config import ScalarLoss, ScalarModelBase
-from InnerEye.ML.utils.config_util import ModelConfigLoader
+from InnerEye.ML.utils.config_loader import ModelConfigLoader
 from InnerEye.ML.visualizers.plot_cross_validation import EpochMetricValues, get_config_and_results_for_offline_runs, \
    unroll_aggregate_metrics
 from Tests.ML.configs.ClassificationModelForTesting import ClassificationModelForTesting
 from Tests.ML.configs.DummyModel import DummyModel
-from Tests.ML.util import get_default_azure_config, get_default_checkpoint_handler, machine_has_gpu
+from Tests.ML.util import get_default_azure_config, machine_has_gpu, \
+    model_train_unittest


@pytest.mark.cpu_and_gpu
@ -50,20 +52,20 @@ def test_train_classification_model(class_name: str, test_output_dirs: OutputFol
    config = ClassificationModelForTesting()
    config.class_names = [class_name]
    config.set_output_to(test_output_dirs.root_dir)
-    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
-                                                        project_root=Path(test_output_dirs.root_dir))
    # Train for 4 epochs, checkpoints at epochs 2 and 4
    config.num_epochs = 4
-    model_training_result = model_training.model_train(config, checkpoint_handler=checkpoint_handler)
+    model_training_result, checkpoint_handler = model_train_unittest(config, dirs=test_output_dirs)
    assert model_training_result is not None
    expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05]
    expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167]
    expected_val_loss = [0.737061, 0.736691, 0.736321, 0.735952]
    # Ensure that all metrics are computed on both training and validation set
-    assert len(model_training_result.train_results_per_epoch) == config.num_epochs
-    assert len(model_training_result.val_results_per_epoch) == config.num_epochs
-    assert len(model_training_result.train_results_per_epoch[0]) >= 11
-    assert len(model_training_result.val_results_per_epoch[0]) >= 11
+    train_results_per_epoch = model_training_result.train_results_per_epoch()
+    val_results_per_epoch = model_training_result.val_results_per_epoch()
+    assert len(train_results_per_epoch) == config.num_epochs
+    assert len(val_results_per_epoch) == config.num_epochs
+    assert len(train_results_per_epoch[0]) >= 11
+    assert len(val_results_per_epoch[0]) >= 11

    for metric in [MetricType.ACCURACY_AT_THRESHOLD_05,
                   MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD,
@ -74,10 +76,8 @@ def test_train_classification_model(class_name: str, test_output_dirs: OutputFol
                   MetricType.SECONDS_PER_BATCH,
                   MetricType.SECONDS_PER_EPOCH,
                   MetricType.SUBJECT_COUNT]:
-        assert metric.value in model_training_result.train_results_per_epoch[0], \
-            f"{metric.value} not in training"
-        assert metric.value in model_training_result.val_results_per_epoch[0], \
-            f"{metric.value} not in validation"
+        assert metric.value in train_results_per_epoch[0], f"{metric.value} not in training"
+        assert metric.value in val_results_per_epoch[0], f"{metric.value} not in validation"

    actual_train_loss = model_training_result.get_metric(is_training=True, metric_type=MetricType.LOSS.value)
    actual_val_loss = model_training_result.get_metric(is_training=False, metric_type=MetricType.LOSS.value)
@ -144,6 +144,7 @@ def test_train_classification_model(class_name: str, test_output_dirs: OutputFol
 """
    check_log_file(inference_metrics_path, inference_metrics_expected, ignore_columns=[])

+
@pytest.mark.skipif(common_util.is_windows(), reason="Has OOM issues on windows build")
@pytest.mark.cpu_and_gpu
 def test_train_classification_multilabel_model(test_output_dirs: OutputFolderForTests) -> None:
@ -155,35 +156,33 @@ def test_train_classification_multilabel_model(test_output_dirs: OutputFolderFor
    logging_to_stdout(logging.DEBUG)
    config = DummyMulticlassClassification()
    config.set_output_to(test_output_dirs.root_dir)
-    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
-                                                        project_root=Path(test_output_dirs.root_dir))
    # Train for 4 epochs, checkpoints at epochs 2 and 4
    config.num_epochs = 4
-    model_training_result = model_training.model_train(config, checkpoint_handler=checkpoint_handler)
+    model_training_result, checkpoint_handler = model_train_unittest(config, dirs=test_output_dirs)
    assert model_training_result is not None
    expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05]
    expected_train_loss = [0.699870228767395, 0.6239662170410156, 0.551329493522644, 0.4825132489204407]
    expected_val_loss = [0.6299371719360352, 0.5546272993087769, 0.4843321740627289, 0.41909298300743103]
    # Ensure that all metrics are computed on both training and validation set
-    assert len(model_training_result.train_results_per_epoch) == config.num_epochs
-    assert len(model_training_result.val_results_per_epoch) == config.num_epochs
-    assert len(model_training_result.train_results_per_epoch[0]) >= 11
-    assert len(model_training_result.val_results_per_epoch[0]) >= 11
+    train_results_per_epoch = model_training_result.train_results_per_epoch()
+    val_results_per_epoch = model_training_result.val_results_per_epoch()
+    assert len(train_results_per_epoch) == config.num_epochs
+    assert len(val_results_per_epoch) == config.num_epochs
+    assert len(train_results_per_epoch[0]) >= 11
+    assert len(val_results_per_epoch[0]) >= 11
    for class_name in config.class_names:
        for metric in [MetricType.ACCURACY_AT_THRESHOLD_05,
                       MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD,
                       MetricType.AREA_UNDER_PR_CURVE,
                       MetricType.AREA_UNDER_ROC_CURVE,
                       MetricType.CROSS_ENTROPY]:
-            assert f'{metric.value}/{class_name}' in model_training_result.train_results_per_epoch[
-                0], f"{metric.value} not in training"
-            assert f'{metric.value}/{class_name}' in model_training_result.val_results_per_epoch[
-                0], f"{metric.value} not in validation"
+            assert f'{metric.value}/{class_name}' in train_results_per_epoch[0], f"{metric.value} not in training"
+            assert f'{metric.value}/{class_name}' in val_results_per_epoch[0], f"{metric.value} not in validation"
    for metric in [MetricType.LOSS,
                   MetricType.SECONDS_PER_EPOCH,
                   MetricType.SUBJECT_COUNT]:
-        assert metric.value in model_training_result.train_results_per_epoch[0], f"{metric.value} not in training"
-        assert metric.value in model_training_result.val_results_per_epoch[0], f"{metric.value} not in validation"
+        assert metric.value in train_results_per_epoch[0], f"{metric.value} not in training"
+        assert metric.value in val_results_per_epoch[0], f"{metric.value} not in validation"

    actual_train_loss = model_training_result.get_metric(is_training=True, metric_type=MetricType.LOSS.value)
    actual_val_loss = model_training_result.get_metric(is_training=False, metric_type=MetricType.LOSS.value)
@ -265,13 +264,12 @@ def test_run_ml_with_classification_model(test_output_dirs: OutputFolderForTests
    logging_to_stdout()
    azure_config = get_default_azure_config()
    azure_config.train = True
-    config: ScalarModelBase = ModelConfigLoader[ScalarModelBase]() \
-        .create_model_config_from_name(model_name)
+    config: ScalarModelBase = ModelConfigLoader().create_model_config_from_name(model_name)
    config.number_of_cross_validation_splits = number_of_offline_cross_validation_splits
    config.set_output_to(test_output_dirs.root_dir)
    # Trying to run DDP from the test suite hangs, hence restrict to single GPU.
    config.max_num_gpus = 1
-    MLRunner(config, azure_config).run()
+    MLRunner(config, azure_config=azure_config).run()
    _check_offline_cross_validation_output_files(config)

    if config.perform_cross_validation:
@ -306,7 +304,7 @@ def test_run_ml_with_segmentation_model(test_output_dirs: OutputFolderForTests)
    config.set_output_to(test_output_dirs.root_dir)
    azure_config = get_default_azure_config()
    azure_config.train = True
-    MLRunner(config, azure_config).run()
+    MLRunner(config, azure_config=azure_config).run()


@pytest.mark.skipif(common_util.is_windows(), reason="Has OOM issues on windows build")
@ -319,7 +317,7 @@ def test_runner1(test_output_dirs: OutputFolderForTests) -> None:
    set_from_commandline = 12345
    scalar1 = '["label"]'
    model_name = "DummyClassification"
-    initial_config = ModelConfigLoader[ScalarModelBase]().create_model_config_from_name(model_name)
+    initial_config = ModelConfigLoader().create_model_config_from_name(model_name)
    assert initial_config.non_image_feature_channels == []
    output_root = str(test_output_dirs.root_dir)
    args = ["",
@ -338,7 +336,7 @@ def test_runner1(test_output_dirs: OutputFolderForTests) -> None:
    assert config.get_effective_random_seed() == set_from_commandline
    assert config.non_image_feature_channels == ["label"]
    assert str(config.outputs_folder).startswith(output_root)
-    assert (config.logs_folder / runner.LOG_FILE_NAME).exists()
+    assert (config.logs_folder / LOG_FILE_NAME).exists()


@pytest.mark.skipif(common_util.is_windows(), reason="Has OOM issues on windows build")
@ -456,7 +454,9 @@ def _compute_scalar_metrics(output_values_list: List[List[float]],
 def test_is_offline_cross_val_parent_run(offline_parent_cv_run: bool) -> None:
    train_config = DummyModel()
    train_config.number_of_cross_validation_splits = 2 if offline_parent_cv_run else 0
-    assert MLRunner(train_config).is_offline_cross_val_parent_run() == offline_parent_cv_run
+    runner = MLRunner(train_config)
+    runner.setup()
+    assert runner.is_offline_cross_val_parent_run() == offline_parent_cv_run


 def _check_offline_cross_validation_output_files(train_config: ScalarModelBase) -> None:
@ -487,12 +487,15 @@ def _check_offline_cross_validation_output_files(train_config: ScalarModelBase)
            _dataset_splits.train[train_config.subject_column].unique())
        _test_dataset_split_count = len(_dataset_splits.test[train_config.subject_column].unique())
        _aggregates_csv = pd.read_csv(aggregate_metrics_path)
-        _aggregates_csv_test = _aggregates_csv.loc[_aggregates_csv[LoggingColumns.DataSplit.value] == ModelExecutionMode.TEST.value]
-        _aggregates_csv_train_val = _aggregates_csv.loc[_aggregates_csv[LoggingColumns.DataSplit.value] != ModelExecutionMode.TEST.value]
+        _aggregates_csv_test = _aggregates_csv.loc[
+            _aggregates_csv[LoggingColumns.DataSplit.value] == ModelExecutionMode.TEST.value]
+        _aggregates_csv_train_val = _aggregates_csv.loc[
+            _aggregates_csv[LoggingColumns.DataSplit.value] != ModelExecutionMode.TEST.value]
        _counts_for_splits_train_val = list(_aggregates_csv_train_val[LoggingColumns.SubjectCount.value])
        _counts_for_splits_test = list(_aggregates_csv_test[LoggingColumns.SubjectCount.value])
        assert all([x == _val_dataset_split_count for x in _counts_for_splits_train_val])
-        assert all([x == _test_dataset_split_count * train_config.number_of_cross_validation_splits for x in _counts_for_splits_test])
+        assert all([x == _test_dataset_split_count * train_config.number_of_cross_validation_splits for x in
+                    _counts_for_splits_test])
        _epochs = list(_aggregates_csv_train_val[LoggingColumns.Epoch.value].astype(int))
        # Each epoch is recorded twice once for the training split and once for the validation
        # split
--- a/Tests/ML/pipelines/test_inference_smallimages.py
+++ b/Tests/ML/pipelines/test_inference_smallimages.py
@ -11,8 +11,8 @@ from InnerEye.Common.common_util import is_windows
 from InnerEye.Common.output_directories import OutputFolderForTests
 from InnerEye.Common.type_annotations import TupleInt3
 from InnerEye.ML.config import SegmentationModelBase
-from InnerEye.ML.lightning_helpers import create_lightning_model, load_from_checkpoint_and_adjust_for_inference
-from InnerEye.ML.lightning_models import SegmentationLightning
+from InnerEye.ML.lightning_helpers import load_from_checkpoint_and_adjust_for_inference
+from InnerEye.ML.lightning_models import SegmentationLightning, create_lightning_model
 from InnerEye.ML.pipelines.inference import InferencePipeline
 from InnerEye.ML.utils import image_util
 from Tests.ML.utils.test_model_util import create_model_and_store_checkpoint
--- a/Tests/ML/pipelines/test_scalar_inference.py
+++ b/Tests/ML/pipelines/test_scalar_inference.py
@ -12,8 +12,7 @@ from InnerEye.Common.output_directories import OutputFolderForTests
 from InnerEye.Common.type_annotations import TupleInt3
 from InnerEye.ML.dataset.sample import GeneralSampleMetadata
 from InnerEye.ML.dataset.scalar_sample import ScalarItem
-from InnerEye.ML.lightning_helpers import create_lightning_model
-from InnerEye.ML.lightning_models import ScalarLightning
+from InnerEye.ML.lightning_models import ScalarLightning, create_lightning_model
 from InnerEye.ML.models.architectures.base_model import DeviceAwareModule
 from InnerEye.ML.pipelines.scalar_inference import ScalarEnsemblePipeline, ScalarInferencePipeline, \
    ScalarInferencePipelineBase
--- a/Tests/ML/runners/test_commandline_override.py
+++ b/Tests/ML/runners/test_commandline_override.py
@ -1,30 +0,0 @@
-#  ------------------------------------------------------------------------------------------
-#  Copyright (c) Microsoft Corporation. All rights reserved.
-#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
-#  ------------------------------------------------------------------------------------------
-import argparse
-
-from InnerEye.ML.model_config_base import ModelConfigBase
-from Tests.ML.util import get_model_loader
-
-MODEL_NAME = "DummyModelWithOverrideGroups"
-LOADER = get_model_loader("Tests.ML.configs")
-
-
-def test_script_params_override() -> None:
-    # these are the parameters from the command line that should override
-    # the initial parameters
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--l_rate",
-                        help="The name of the model to train/test.",
-                        type=float,
-                        default=1.0)
-    args = parser.parse_args("")
-
-    try:
-        config: ModelConfigBase = LOADER.create_model_config_from_name(model_name=MODEL_NAME, overrides=vars(args))
-        # check that the values were changed
-        assert config.l_rate == args.l_rate
-    except ValueError:
-        # (Temporarily) handle the case where there is no Lung config.
-        pass
--- a/Tests/ML/test_config_helpers.py
+++ b/Tests/ML/test_config_helpers.py
@ -7,10 +7,13 @@ from typing import List, Optional, Union

 import pytest
 import torch
+from pandas import DataFrame

+from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY
 from InnerEye.Common.output_directories import OutputFolderForTests
 from InnerEye.ML.common import ModelExecutionMode
 from InnerEye.ML.config import ModelArchitectureConfig, SegmentationModelBase, equally_weighted_classes
+from InnerEye.ML.deep_learning_config import DeepLearningConfig
 from InnerEye.ML.models.architectures.base_model import BaseSegmentationModel
 from InnerEye.ML.scalar_config import ScalarModelBase
 from InnerEye.ML.utils import ml_util
@ -127,6 +130,35 @@ def test_equally_weighted_classes_fails(num_fg_clases: int, background_weight: O
        equally_weighted_classes(classes, background_weight)


+def test_fields_are_set() -> None:
+    """
+    Tests that expected fields are set when creating config classes.
+    """
+    expected = [("hello", None), ("world", None)]
+    config = SegmentationModelBase(
+        should_validate=False,
+        ground_truth_ids=[x[0] for x in expected],
+        largest_connected_component_foreground_classes=expected
+    )
+    assert hasattr(config, CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY)
+    assert config.largest_connected_component_foreground_classes == expected
+
+
+@pytest.mark.cpu_and_gpu
+def test_dataset_reader_workers() -> None:
+    """
+    Test to make sure the number of dataset reader workers are set correctly
+    """
+    config = ScalarModelBase(
+        should_validate=False,
+        num_dataset_reader_workers=-1
+    )
+    if config.is_offline_run:
+        assert config.num_dataset_reader_workers == -1
+    else:
+        assert config.num_dataset_reader_workers == 0
+
+
 def create_dataset_csv(test_output_dirs: OutputFolderForTests) -> Path:
    """Create dummy dataset csv file for tests,
    deleting any pre-existing file."""
@ -176,34 +208,46 @@ def test_dataset_csv_with_ScalarModelBase(
    assert model_config.dataset_data_frame is not None
    validate_dataset_paths(model_config)

+
 def test_unet3_num_downsampling_paths() -> None:
    for num_downsampling_paths in range(1, 5):
-        j = int(2**num_downsampling_paths)
+        j = int(2 ** num_downsampling_paths)

        # Test that num_downsampling_paths for built UNet3D
        # is set via model configuration
        crop_size = (j, j, j)
        config = SegmentationModelBase(
-                architecture=ModelArchitectureConfig.UNet3D,
-                image_channels=["ct"],
-                feature_channels=[1],
-                crop_size=crop_size,
-                num_downsampling_paths=num_downsampling_paths,
-                should_validate=False)
+            architecture=ModelArchitectureConfig.UNet3D,
+            image_channels=["ct"],
+            feature_channels=[1],
+            crop_size=crop_size,
+            num_downsampling_paths=num_downsampling_paths,
+            should_validate=False)
        network = build_net(config)
        assert network.num_downsampling_paths == num_downsampling_paths

        # Test that exception is raised if crop size is smaller than is allowed
        # by num_downsampling_paths
-        too_small_crop_size = (j//2, j//2, j//2)
+        too_small_crop_size = (j // 2, j // 2, j // 2)
        ex_msg = f"Crop size is not valid. The required minimum is {crop_size}"
        config = SegmentationModelBase(
-                architecture=ModelArchitectureConfig.UNet3D,
-                image_channels=["ct"],
-                feature_channels=[1],
-                crop_size=too_small_crop_size,
-                num_downsampling_paths=num_downsampling_paths,
-                should_validate=False)
+            architecture=ModelArchitectureConfig.UNet3D,
+            image_channels=["ct"],
+            feature_channels=[1],
+            crop_size=too_small_crop_size,
+            num_downsampling_paths=num_downsampling_paths,
+            should_validate=False)
        with pytest.raises(ValueError) as ex:
-            network = build_net(config)
+            build_net(config)
        assert ex_msg in str(ex)
+
+
+def test_config_str() -> None:
+    """
+    Check if dataframe fields are omitted from the string conversion of a config object.
+    """
+    config = DeepLearningConfig()
+    df = DataFrame(columns=["foobar"], data=[1.0, 2.0])
+    config.dataset_data_frame = df
+    s = str(config)
+    assert "foobar" not in s, f"Incorrect output: {s}"
--- a/Tests/ML/test_data/hellocontainer.csv
+++ b/Tests/ML/test_data/hellocontainer.csv
@ -0,0 +1,100 @@
+5.749202728271484375e+00,1.045434713363647461e+00
+7.637505531311035156e+00,1.481036424636840820e+00
+9.329424858093261719e+00,1.917572140693664551e+00
+7.494448661804199219e+00,1.591190218925476074e+00
+9.689485549926757812e+00,1.971193432807922363e+00
+1.534464955329895020e+00,2.667523026466369629e-01
+9.561051368713378906e+00,1.685984492301940918e+00
+8.846873283386230469e+00,1.842781662940979004e+00
+5.290946006774902344e+00,1.096875548362731934e+00
+6.057560443878173828e+00,1.215266227722167969e+00
+9.478215217590332031e+00,2.150293827056884766e+00
+3.349403142929077148e+00,6.767664551734924316e-01
+4.251931190490722656e+00,8.223311901092529297e-01
+9.133169174194335938e+00,1.817616820335388184e+00
+5.868637561798095703e-01,7.238323986530303955e-02
+4.395127296447753906e-02,-1.203215569257736206e-01
+4.473074913024902344e+00,1.031657218933105469e+00
+9.513977050781250000e+00,1.858699083328247070e+00
+1.762510538101196289e+00,5.511313676834106445e-01
+3.059309720993041992e+00,7.563391923904418945e-01
+1.646029353141784668e+00,1.496532708406448364e-01
+4.897529602050781250e+00,1.009216904640197754e+00
+5.573034763336181641e+00,1.063154220581054688e+00
+6.369268894195556641e-01,1.111971661448478699e-01
+4.231463432312011719e+00,7.547589540481567383e-01
+2.101852893829345703e+00,4.300535917282104492e-01
+6.729665279388427734e+00,1.531016945838928223e+00
+7.961721420288085938e+00,1.478816986083984375e+00
+2.669513702392578125e+00,5.368775129318237305e-01
+4.574956893920898438e+00,8.576059341430664062e-01
+7.437694072723388672e-01,2.781907916069030762e-01
+5.983660221099853516e+00,1.187077164649963379e+00
+9.752596855163574219e+00,2.027858018875122070e+00
+9.338418960571289062e+00,2.025038957595825195e+00
+6.751420497894287109e+00,1.303008437156677246e+00
+9.554377555847167969e+00,1.930274367332458496e+00
+3.040063381195068359e-01,1.029240339994430542e-02
+2.818799018859863281e-01,2.614871561527252197e-01
+5.994813919067382812e+00,1.070150852203369141e+00
+5.795848369598388672e-01,-4.384742677211761475e-02
+3.211182355880737305e+00,5.655854344367980957e-01
+8.615511894226074219e+00,1.730698943138122559e+00
+4.950296401977539062e+00,9.849137663841247559e-01
+1.632133126258850098e+00,4.643072187900543213e-01
+5.252981662750244141e+00,1.020202517509460449e+00
+6.792118072509765625e+00,1.392252922058105469e+00
+2.290313720703125000e+00,2.924301028251647949e-01
+3.329365253448486328e+00,8.425519466400146484e-01
+3.469936370849609375e+00,6.026793718338012695e-01
+8.790910243988037109e-02,-2.861313149333000183e-02
+3.999347686767578125e+00,7.818984389305114746e-01
+2.891576290130615234e-01,1.004043519496917725e-01
+5.804258346557617188e+00,1.216606140136718750e+00
+3.836791992187500000e+00,9.209365844726562500e-01
+5.516016006469726562e+00,1.077136993408203125e+00
+7.987973213195800781e+00,1.436331987380981445e+00
+8.790102958679199219e+00,1.966201663017272949e+00
+7.359976291656494141e+00,1.414163231849670410e+00
+3.334070444107055664e+00,6.647097468376159668e-01
+6.998687744140625000e+00,1.236205577850341797e+00
+4.366195201873779297e+00,8.169019818305969238e-01
+5.620658397674560547e-02,1.258963048458099365e-01
+4.941163539886474609e+00,8.424454331398010254e-01
+5.723971366882324219e+00,1.067836642265319824e+00
+6.601081371307373047e+00,1.462573528289794922e+00
+2.285490512847900391e+00,3.970748484134674072e-01
+3.233198642730712891e+00,5.250911116600036621e-01
+8.005992889404296875e+00,1.783252477645874023e+00
+9.988401412963867188e+00,2.108428716659545898e+00
+3.176209926605224609e+00,6.435566544532775879e-01
+8.746008872985839844e+00,1.664346933364868164e+00
+1.034811139106750488e+00,7.043153047561645508e-02
+4.228623390197753906e+00,7.815699577331542969e-01
+8.936402320861816406e+00,1.719643831253051758e+00
+6.312811374664306641e+00,1.115693926811218262e+00
+2.998808622360229492e+00,6.670392155647277832e-01
+4.147662162780761719e+00,8.732877969741821289e-01
+7.289369106292724609e+00,1.516813516616821289e+00
+7.059363842010498047e+00,1.442374944686889648e+00
+8.922320365905761719e+00,1.986880540847778320e+00
+2.708734989166259766e+00,5.354607105255126953e-01
+7.997574329376220703e+00,1.465035080909729004e+00
+3.965347290039062500e+00,8.214159011840820312e-01
+6.266443729400634766e+00,1.197527050971984863e+00
+1.101133823394775391e+00,6.622195243835449219e-02
+5.336141586303710938e+00,9.389448165893554688e-01
+9.122401237487792969e+00,1.791326642036437988e+00
+5.871895790100097656e+00,1.136480450630187988e+00
+7.560163497924804688e+00,1.564274787902832031e+00
+9.699577093124389648e-01,3.698585033416748047e-01
+6.118352413177490234e+00,1.192414522171020508e+00
+2.855588197708129883e+00,7.185647487640380859e-01
+7.776415348052978516e-01,1.955290585756301880e-01
+5.909432888031005859e+00,1.285851240158081055e+00
+1.471748352050781250e+00,4.017572104930877686e-01
+7.064949989318847656e+00,1.341601967811584473e+00
+4.807097911834716797e+00,1.043601632118225098e+00
+6.570946693420410156e+00,1.190045952796936035e+00
+4.360112667083740234e+00,7.959681153297424316e-01
+9.832940101623535156e+00,1.984794259071350098e+00
--- a/Tests/ML/test_download_upload.py
+++ b/Tests/ML/test_download_upload.py
@ -3,21 +3,27 @@
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
 import logging
+import shutil
 from pathlib import Path
-from typing import List
+from typing import Any, List, Optional
+from unittest import mock

 import pytest

 from InnerEye.Azure.azure_config import AzureConfig
-from InnerEye.Azure.azure_util import get_results_blob_path
 from InnerEye.Common import fixed_paths
 from InnerEye.Common.common_util import OTHER_RUNS_SUBDIR_NAME, logging_section, logging_to_stdout
+from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path
 from InnerEye.Common.output_directories import OutputFolderForTests
 from InnerEye.ML.common import DATASET_CSV_FILE_NAME
+from InnerEye.ML.deep_learning_config import DeepLearningConfig
+from InnerEye.ML.lightning_container import LightningContainer
 from InnerEye.ML.model_config_base import ModelConfigBase
 from InnerEye.ML.run_ml import MLRunner
 from InnerEye.ML.utils.run_recovery import RunRecovery
 from Tests.AfterTraining.test_after_training import FALLBACK_ENSEMBLE_RUN, FALLBACK_SINGLE_RUN, get_most_recent_run
+from Tests.ML.configs.DummyModel import DummyModel
+from Tests.ML.configs.lightning_test_containers import DummyContainerWithDatasets
 from Tests.ML.util import get_default_azure_config

 logging_to_stdout(logging.DEBUG)
@ -35,10 +41,6 @@ def runner_config() -> AzureConfig:
    return config


-def test_get_results_blob_path() -> None:
-    assert get_results_blob_path("some_run_id") == "azureml/ExperimentRun/dcid.some_run_id"
-
-
 def check_single_checkpoint(downloaded_checkpoints: List[Path]) -> None:
    assert len(downloaded_checkpoints) == 1
    assert downloaded_checkpoints[0].is_file()
@ -77,18 +79,22 @@ def test_download_best_checkpoints_ensemble_run(test_output_dirs: OutputFolderFo

 def test_download_azureml_dataset(test_output_dirs: OutputFolderForTests) -> None:
    dataset_name = "test-dataset"
-    config = ModelConfigBase(should_validate=False)
+    config = DummyModel()
+    config.local_dataset = None
+    config.azure_dataset_id = ""
    azure_config = get_default_azure_config()
-    runner = MLRunner(config, azure_config)
-    runner.project_root = test_output_dirs.root_dir
-
+    runner = MLRunner(config, azure_config=azure_config)
    # If the model has neither local_dataset or azure_dataset_id, mount_or_download_dataset should fail.
-    with pytest.raises(ValueError):
-        runner.mount_or_download_dataset()
+    # This mounting call must happen before any other operations on the container, because already the model
+    # creation may need access to the dataset.
+    with pytest.raises(ValueError) as ex:
+        runner.setup()
+    assert ex.value.args[0] == "The model must contain either local_dataset or azure_dataset_id."
+    runner.project_root = test_output_dirs.root_dir

    # Pointing the model to a dataset folder that does not exist should raise an Exception
    fake_folder = runner.project_root / "foo"
-    runner.model_config.local_dataset = fake_folder
+    runner.container.local_dataset = fake_folder
    with pytest.raises(FileNotFoundError):
        runner.mount_or_download_dataset()

@ -98,8 +104,8 @@ def test_download_azureml_dataset(test_output_dirs: OutputFolderForTests) -> Non
    assert local_dataset == fake_folder

    # Pointing the model to a dataset in Azure should trigger a download
-    runner.model_config.local_dataset = None
-    runner.model_config.azure_dataset_id = dataset_name
+    runner.container.local_dataset = None
+    runner.container.azure_dataset_id = dataset_name
    with logging_section("Starting download"):
        result_path = runner.mount_or_download_dataset()
    # Download goes into <project_root> / "datasets" / "test_dataset"
@ -115,3 +121,127 @@ def test_download_azureml_dataset(test_output_dirs: OutputFolderForTests) -> Non
        for file in ["ct", "esophagus", "heart", "lung_l", "lung_r", "spinalcord"]:
            f = (sub_folder / file).with_suffix(".nii.gz")
            assert f.is_file()
+
+
+def _test_mount_for_lightning_container(test_output_dirs: OutputFolderForTests,
+                                        is_offline_run: bool,
+                                        local_dataset: Optional[Path],
+                                        azure_dataset: str,
+                                        is_lightning_model: bool) -> LightningContainer:
+    config: Optional[DeepLearningConfig] = None
+    container: Optional[LightningContainer] = None
+    if is_lightning_model:
+        container = DummyContainerWithDatasets()
+        container.azure_dataset_id = azure_dataset
+        container.local_dataset = local_dataset
+    else:
+        config = DummyModel()
+        config.azure_dataset_id = azure_dataset
+        config.local_dataset = local_dataset
+    # The legacy InnerEye models require an existing dataset_csv file present in the dataset folder. Create that.
+    download_path = test_output_dirs.root_dir / "downloaded"
+    mount_path = test_output_dirs.root_dir / "mounted"
+    if not is_lightning_model:
+        for path in [download_path, mount_path]:
+            path.mkdir(exist_ok=True)
+            shutil.copy(full_ml_test_data_path(DATASET_CSV_FILE_NAME), path / DATASET_CSV_FILE_NAME)
+    with mock.patch("InnerEye.ML.run_ml.MLRunner.is_offline_run", is_offline_run):
+        with mock.patch("InnerEye.ML.run_ml.download_dataset", return_value=download_path):
+            with mock.patch("InnerEye.ML.run_ml.try_to_mount_input_dataset", return_value=mount_path):
+                runner = MLRunner(config, container=container,
+                                  azure_config=None, project_root=test_output_dirs.root_dir)
+                runner.setup()
+                return runner.container
+
+
+@pytest.mark.parametrize(("is_lightning_model", "expected_error"),
+                         [
+                             # A built-in InnerEye model must have either local dataset or azure dataset provided.
+                             (False, "The model must contain either local_dataset or azure_dataset_id"),
+                             # ... but this is OK for Lightning container models. A Lightning container could simply
+                             # download its data from the web before training.
+                             (True, "")
+                         ])
+def test_mount_failing_offline_runs(test_output_dirs: OutputFolderForTests,
+                                    is_lightning_model: bool,
+                                    expected_error: str) -> None:
+    """
+    Test cases when MLRunner.mount_or_download_dataset raises an exception, when running outside AzureML.
+    """
+
+    def run() -> Any:
+        return _test_mount_for_lightning_container(test_output_dirs=test_output_dirs,
+                                                   is_offline_run=True,
+                                                   local_dataset=None,
+                                                   azure_dataset="",
+                                                   is_lightning_model=is_lightning_model)
+
+    if expected_error:
+        with pytest.raises(ValueError) as ex:
+            run()
+        assert expected_error in str(ex)
+    else:
+        assert run().local_dataset is None
+
+
+def test_mount_in_azureml1(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test cases when MLRunner.mount_or_download_dataset runs inside AzureML.
+    """
+    container = _test_mount_for_lightning_container(test_output_dirs=test_output_dirs,
+                                                    is_offline_run=False,
+                                                    local_dataset=None,
+                                                    azure_dataset="foo",
+                                                    is_lightning_model=False)
+    assert "mounted" in str(container.local_dataset)
+
+
+def test_mount_in_azureml2(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test cases when MLRunner.mount_or_download_dataset runs inside AzureML.
+    """
+    container = _test_mount_for_lightning_container(test_output_dirs=test_output_dirs,
+                                                    is_offline_run=False,
+                                                    local_dataset=None,
+                                                    azure_dataset="",
+                                                    is_lightning_model=True)
+    assert container.local_dataset is None
+
+
+def test_mount_or_download(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Tests the different combinations of local and Azure datasets, with Innereye built-in and container models.
+    """
+    root = test_output_dirs.root_dir
+    for is_lightning_model in [True, False]:
+        # With runs outside of AzureML, an AML dataset should get downloaded.
+        container = _test_mount_for_lightning_container(test_output_dirs=test_output_dirs,
+                                                        is_offline_run=True,
+                                                        local_dataset=None,
+                                                        azure_dataset="foo",
+                                                        is_lightning_model=is_lightning_model)
+        assert "downloaded" in str(container.local_dataset)
+        # For all InnerEye built-in models, the paths from container level need to be copied down to legacy config
+        # level.
+        if not is_lightning_model:
+            assert container.config.local_dataset == container.local_dataset
+        # With runs in AzureML, an AML dataset should get mounted.
+        container = _test_mount_for_lightning_container(test_output_dirs=test_output_dirs,
+                                                        is_offline_run=False,
+                                                        local_dataset=None,
+                                                        azure_dataset="foo",
+                                                        is_lightning_model=is_lightning_model)
+        assert "mounted" in str(container.local_dataset)
+        if not is_lightning_model:
+            assert container.config.local_dataset == container.local_dataset
+
+        # With runs outside of AzureML, a local dataset should be used as-is. Azure dataset ID is ignored here.
+        shutil.copy(full_ml_test_data_path(DATASET_CSV_FILE_NAME), root / DATASET_CSV_FILE_NAME)
+        container = _test_mount_for_lightning_container(test_output_dirs=test_output_dirs,
+                                                        is_offline_run=True,
+                                                        local_dataset=root,
+                                                        azure_dataset="",
+                                                        is_lightning_model=is_lightning_model)
+        assert container.local_dataset == root
+        if not is_lightning_model:
+            assert container.config.local_dataset == container.local_dataset
--- a/Tests/ML/test_lightning_containers.py
+++ b/Tests/ML/test_lightning_containers.py
@ -0,0 +1,229 @@
+#  ------------------------------------------------------------------------------------------
+#  Copyright (c) Microsoft Corporation. All rights reserved.
+#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+#  ------------------------------------------------------------------------------------------
+from io import StringIO
+from unittest import mock
+
+import pandas as pd
+import pytest
+from pytorch_lightning import LightningModule
+
+from InnerEye.Common.output_directories import OutputFolderForTests
+from InnerEye.ML.common import ModelExecutionMode
+from InnerEye.ML.deep_learning_config import ARGS_TXT, DatasetParams, WorkflowParams
+from InnerEye.ML.lightning_base import InnerEyeContainer
+from InnerEye.ML.lightning_container import LightningContainer
+from InnerEye.ML.model_config_base import ModelConfigBase
+from InnerEye.ML.run_ml import MLRunner
+from Tests.ML.configs.DummyModel import DummyModel
+from Tests.ML.configs.lightning_test_containers import DummyContainerWithModel, DummyContainerWithPlainLightning
+from Tests.ML.util import default_runner
+
+
+def test_run_container_in_situ(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test if we can get the config loader to load a Lightning container model, and then train locally.
+    """
+    runner = default_runner()
+    local_dataset = test_output_dirs.root_dir / "dataset"
+    local_dataset.mkdir()
+    args = ["", "--model=DummyContainerWithModel", "--model_configs_namespace=Tests.ML.configs",
+            f"--output_to={test_output_dirs.root_dir}", f"--local_dataset={local_dataset}"]
+    with mock.patch("sys.argv", args):
+        loaded_config, actual_run = runner.run()
+    assert actual_run is None
+    assert isinstance(runner.lightning_container, DummyContainerWithModel)
+    # Test if the outputs folder is relative to the folder that we specified via the commandline
+    runner.lightning_container.outputs_folder.relative_to(test_output_dirs.root_dir)
+    results = runner.lightning_container.outputs_folder
+    # Test that the setup method has been called
+    assert runner.lightning_container.local_dataset is not None
+    assert (runner.lightning_container.local_dataset / "setup.txt").is_file()
+    # Test if all the files that are written during inference exist. Data for all 3 splits must be processed
+    assert (results / "on_inference_start.txt").is_file()
+    assert (results / "on_inference_end.txt").is_file()
+    for mode in ModelExecutionMode:
+        assert (results / f"on_inference_start_{mode.value}.txt").is_file()
+        assert (results / f"on_inference_end_{mode.value}.txt").is_file()
+        step_results = results / f"inference_step_{mode.value}.txt"
+        assert step_results.is_file()
+        # We should have one line per data item, and there are around 6 of them
+        result_lines = [line for line in step_results.read_text().splitlines() if line.strip()]
+        assert len(result_lines) >= 5
+    metrics_per_split = pd.read_csv(results / "metrics_per_split.csv")
+    # Training should have reduced the MSE to pretty much zero.
+    expected = pd.read_csv(StringIO("""Split,MSE
+Test,1e-7
+Val,1e-7
+Train,1e-7"""))
+    pd.testing.assert_frame_equal(metrics_per_split, expected, check_less_precise=True)
+    # Test if we have an args file that lists all parameters
+    args_file = (results / ARGS_TXT).read_text()
+    assert "Container:" in args_file
+    assert "adam_betas" in args_file
+    # Report generation must run
+    assert (results / "create_report.txt").is_file()
+
+
+def test_run_container_with_plain_lightning_in_situ(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test if we can train a plain Lightning model, without any additional methods defined, end-to-end.
+    """
+    runner = default_runner()
+    local_dataset = test_output_dirs.root_dir / "dataset"
+    local_dataset.mkdir()
+    args = ["", "--model=DummyContainerWithPlainLightning", "--model_configs_namespace=Tests.ML.configs",
+            f"--output_to={test_output_dirs.root_dir}", f"--local_dataset={local_dataset}"]
+    with mock.patch("sys.argv", args):
+        loaded_config, actual_run = runner.run()
+    assert actual_run is None
+    assert isinstance(runner.lightning_container, DummyContainerWithPlainLightning)
+    # Test if the outputs folder is relative to the folder that we specified via the commandline
+    runner.lightning_container.outputs_folder.relative_to(test_output_dirs.root_dir)
+    results = runner.lightning_container.outputs_folder
+    # Test if all the files that are written during inference exist.
+    assert not (results / "on_inference_start.txt").is_file()
+    assert (results / "test_step.txt").is_file()
+
+
+def test_innereye_container_init() -> None:
+    """
+    Test if the constructor of the InnerEye container copies attributes as expected.
+    """
+    # The constructor should copy all fields that belong to either EssentialParams or DatasetParams from the
+    # config object to the container.
+    for (attrib, type_) in [("weights_url", WorkflowParams), ("azure_dataset_id", DatasetParams)]:
+        config = ModelConfigBase()
+        assert hasattr(type_, attrib)
+        assert hasattr(config, attrib)
+        setattr(config, attrib, "foo")
+        container = InnerEyeContainer(config)
+        assert getattr(container, attrib) == "foo"
+
+
+def test_create_fastmri_container() -> None:
+    """
+    Test if we can create a model that uses the fastMRI submodule. This is effectively just testing module imports,
+    and if the submodule is created correctly.
+    """
+    from InnerEye.ML.configs.other.fastmri_varnet import VarNetWithImageLogging
+    from Tests.ML.configs.fastmri_random import FastMriOnRandomData
+    FastMriOnRandomData()
+    VarNetWithImageLogging()
+
+
+@pytest.mark.gpu
+def test_run_fastmri_container(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test if we can get run the fastMRI model end-to-end. This takes about 2min on a CPU machine, hence only run
+    in AzureML
+    """
+    runner = default_runner()
+    dataset_dir = test_output_dirs.root_dir / "dataset"
+    dataset_dir.mkdir(parents=True)
+    args = ["", "--model=FastMriOnRandomData",
+            f"--output_to={test_output_dirs.root_dir}",
+            "--model_configs_namespace=Tests.ML.configs"]
+    with mock.patch("sys.argv", args):
+        loaded_config, actual_run = runner.run()
+    assert actual_run is None
+    from Tests.ML.configs.fastmri_random import FastMriOnRandomData
+    assert isinstance(runner.lightning_container, FastMriOnRandomData)
+
+
+def test_model_name_is_set(test_output_dirs: OutputFolderForTests) -> None:
+    container = DummyContainerWithModel()
+    container.local_dataset = test_output_dirs.root_dir
+    runner = MLRunner(model_config=None, container=container)
+    runner.setup()
+    expected_name = "DummyContainerWithModel"
+    assert runner.container._model_name == expected_name
+    assert expected_name in str(runner.container.outputs_folder)
+
+
+def test_model_name_for_innereye_container() -> None:
+    """
+    Test if the InnerEye container picks up the name of the model correctly. The name will impact the output folder
+    structure that is created.
+    """
+    expected_name = "DummyModel"
+    model = DummyModel()
+    assert model.model_name == expected_name
+    container = InnerEyeContainer(model)
+    assert container.model_name == expected_name
+
+
+class DummyContainerWithFields(LightningContainer):
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.perform_training_set_inference = True
+        self.num_epochs = 123456
+        self.l_rate = 1e-2
+
+    def create_model(self) -> LightningModule:
+        return LightningModule()
+
+
+def test_container_to_str() -> None:
+    """
+    Test how a string representation of a container looks like.
+    """
+    c = DummyContainerWithFields()
+    # Set any other field that is not done via the params library
+    c.foo = "bar"
+    s = str(c)
+    print(s)
+    assert "foo" in s
+    assert "bar" in s
+    assert "param" not in s
+    assert "initialized" not in s
+    assert "123456" in s
+
+
+def test_file_system_with_subfolders(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test if a subfolder can be created within the output folder structure, for use with cross validation.
+    """
+    model = DummyModel()
+    model.set_output_to(test_output_dirs.root_dir)
+    container = InnerEyeContainer(model)
+    # File system should be copied from model config to container
+    assert container.file_system_config == model.file_system_config
+    runner = MLRunner(model_config=model)
+    runner.setup()
+    assert str(runner.container.outputs_folder).endswith(model.model_name)
+    output_subfolder = "foo"
+    expected_folder = runner.container.outputs_folder / output_subfolder
+    runner = MLRunner(model_config=model, output_subfolder=output_subfolder)
+    runner.setup()
+    assert runner.container.outputs_folder == expected_folder
+
+
+def test_optim_params1(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test if the optimizer parameters are read correctly for InnerEye configs.
+    """
+    model = DummyModel()
+    model.set_output_to(test_output_dirs.root_dir)
+    runner = MLRunner(model_config=model)
+    runner.setup()
+    lightning_model = runner.container.model
+    optim, _ = lightning_model.configure_optimizers()
+    assert optim[0].param_groups[0]["lr"] == 1e-3
+
+
+def test_optim_params2(test_output_dirs: OutputFolderForTests) -> None:
+    """
+    Test if the optimizer parameters are read correctly for containers.
+    """
+    container = DummyContainerWithModel()
+    container.local_dataset = test_output_dirs.root_dir
+    runner = MLRunner(model_config=None, container=container)
+    runner.setup()
+    lightning_model = runner.container.model
+    optim, _ = lightning_model.configure_optimizers()
+    expected_lr = 1e-1
+    assert container.l_rate == expected_lr
+    assert optim[0].param_groups[0]["lr"] == expected_lr
--- a/Tests/ML/test_model_train_test_and_recovery.py
+++ b/Tests/ML/test_model_train_test_and_recovery.py
@ -14,9 +14,8 @@ from InnerEye.ML.common import BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX, ModelExecu
 from InnerEye.ML.configs.classification.DummyClassification import DummyClassification
 from InnerEye.ML.metrics import InferenceMetricsForClassification
 from InnerEye.ML.model_testing import model_test
-from InnerEye.ML.model_training import model_train
 from InnerEye.ML.utils.run_recovery import RunRecovery
-from Tests.ML.util import get_default_checkpoint_handler
+from Tests.ML.util import get_default_checkpoint_handler, model_train_unittest


 # @pytest.mark.parametrize("mean_teacher_model", [True, False])
@ -35,10 +34,8 @@ def test_recover_testing_from_run_recovery(mean_teacher_model: bool,
    os.makedirs(str(config.outputs_folder))
    config.recovery_checkpoint_save_interval = 2

-    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
-                                                        project_root=test_output_dirs.root_dir)
-    train_results = model_train(config, checkpoint_handler=checkpoint_handler)
-    assert len(train_results.train_results_per_epoch) == config.num_epochs
+    train_results, checkpoint_handler = model_train_unittest(config, dirs=test_output_dirs)
+    assert len(train_results.train_results_per_epoch()) == config.num_epochs

    # Run inference on this
    test_results = model_test(config=config, data_split=ModelExecutionMode.TEST, checkpoint_handler=checkpoint_handler)
--- a/Tests/ML/test_model_training.py
+++ b/Tests/ML/test_model_training.py
@ -19,7 +19,6 @@ from InnerEye.Common.common_util import SUBJECT_METRICS_FILE_NAME, is_windows, l
 from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path
 from InnerEye.Common.metrics_constants import MetricType, TrackedMetrics, VALIDATION_PREFIX
 from InnerEye.Common.output_directories import OutputFolderForTests
-from InnerEye.ML import model_training
 from InnerEye.ML.common import BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX, DATASET_CSV_FILE_NAME, ModelExecutionMode, \
    RECOVERY_CHECKPOINT_FILE_NAME_WITH_SUFFIX, \
    STORED_CSV_FILE_NAMES
@ -27,15 +26,15 @@ from InnerEye.ML.config import MixtureLossComponent, SegmentationLoss
 from InnerEye.ML.configs.classification.DummyClassification import DummyClassification
 from InnerEye.ML.dataset.sample import CroppedSample
 from InnerEye.ML.deep_learning_config import DeepLearningConfig
-from InnerEye.ML.model_training import aggregate_and_create_subject_metrics_file, model_train
+from InnerEye.ML.lightning_loggers import StoringLogger
+from InnerEye.ML.model_training import aggregate_and_create_subject_metrics_file
 from InnerEye.ML.models.losses.mixture import MixtureLoss
 from InnerEye.ML.utils.io_util import load_nifti_image
 from InnerEye.ML.utils.model_util import create_segmentation_loss_function
 from InnerEye.ML.utils.run_recovery import RunRecovery
-from InnerEye.ML.utils.training_util import ModelTrainingResults
 from InnerEye.ML.visualizers.patch_sampling import PATCH_SAMPLING_FOLDER
 from Tests.ML.configs.DummyModel import DummyModel
-from Tests.ML.util import get_default_checkpoint_handler, machine_has_gpu
+from Tests.ML.util import get_default_checkpoint_handler, machine_has_gpu, model_train_unittest

 config_path = full_ml_test_data_path()
 base_path = full_ml_test_data_path()
@ -102,22 +101,24 @@ def _test_model_train(output_dirs: OutputFolderForTests,
    train_config.recovery_checkpoint_save_interval = 1

    if machine_has_gpu:
-        expected_train_losses = [0.4553468, 0.454904]
-        expected_val_losses = [0.4553881, 0.4553041]
+        expected_train_losses = [0.4552919, 0.4548529]
+        expected_val_losses = [0.455389, 0.455306]
    else:
-        expected_train_losses = [0.4553469, 0.4548947]
-        expected_val_losses = [0.4553880, 0.4553041]
+        expected_train_losses = [0.4552919, 0.4548538]
+        expected_val_losses = [0.4553891, 0.4553060]
    loss_absolute_tolerance = 1e-6
    expected_learning_rates = [train_config.l_rate, 5.3589e-4]

-    checkpoint_handler = get_default_checkpoint_handler(model_config=train_config,
-                                                        project_root=Path(output_dirs.root_dir))
-    model_training_result = model_training.model_train(train_config,
-                                                       checkpoint_handler=checkpoint_handler)
-    assert isinstance(model_training_result, ModelTrainingResults)
+    model_training_result, _ = model_train_unittest(train_config, dirs=output_dirs)
+    assert isinstance(model_training_result, StoringLogger)
+
+    actual_train_losses = model_training_result.get_train_metric(MetricType.LOSS.value)
+    actual_val_losses = model_training_result.get_val_metric(MetricType.LOSS.value)
+    print("actual_train_losses = {}".format(actual_train_losses))
+    print("actual_val_losses = {}".format(actual_val_losses))

    def assert_all_close(metric: str, expected: List[float], **kwargs: Any) -> None:
-        actual = model_training_result.get_training_metric(metric)
+        actual = model_training_result.get_train_metric(metric)
        assert np.allclose(actual, expected, **kwargs), f"Mismatch for {metric}: Got {actual}, expected {expected}"

    # check to make sure training batches are NOT all the same across epochs
@ -135,28 +136,24 @@ def _test_model_train(output_dirs: OutputFolderForTests,
    # and be the same across 'region' and 'region_1' because they derive from the same Nifti files.
    # The following values are read off directly from the results of compute_dice_across_patches in the training loop
    # This checks that averages are computed correctly, and that metric computers are reset after each epoch.
-    train_voxels = [[83092.0, 83212.0, 82946.0], [83000.0, 82881.0, 83309.0]]
+    train_voxels = [[82860.0, 83212.0, 83087.0], [82831.0, 82900.0, 83212.0]]
    val_voxels = [[82765.0, 83212.0], [82765.0, 83212.0]]
-    _check_voxel_count(model_training_result.train_results_per_epoch, _mean_list(train_voxels), "Train")
-    _check_voxel_count(model_training_result.val_results_per_epoch, _mean_list(val_voxels), "Val")
+    _check_voxel_count(model_training_result.train_results_per_epoch(), _mean_list(train_voxels), "Train")
+    _check_voxel_count(model_training_result.val_results_per_epoch(), _mean_list(val_voxels), "Val")

-    actual_train_losses = model_training_result.get_training_metric(MetricType.LOSS.value)
-    actual_val_losses = model_training_result.get_validation_metric(MetricType.LOSS.value)
-    print("actual_train_losses = {}".format(actual_train_losses))
-    print("actual_val_losses = {}".format(actual_val_losses))
    assert np.allclose(actual_train_losses, expected_train_losses, atol=loss_absolute_tolerance), "Train losses"
    assert np.allclose(actual_val_losses, expected_val_losses, atol=loss_absolute_tolerance), "Val losses"
    # Check that the metric we track for Hyperdrive runs is actually written.
    assert TrackedMetrics.Val_Loss.value.startswith(VALIDATION_PREFIX)
    tracked_metric = TrackedMetrics.Val_Loss.value[len(VALIDATION_PREFIX):]
-    for val_result in model_training_result.val_results_per_epoch:
+    for val_result in model_training_result.val_results_per_epoch():
        assert tracked_metric in val_result

    # The following values are read off directly from the results of compute_dice_across_patches in the
-    # training loop. Results are slightly different for CPU, hence use a larger tolerance there.
-    dice_tolerance = 1e-4 if machine_has_gpu else 4.5e-4
-    train_dice_region = [[0.0, 0.0, 4.0282e-04], [0.0309, 0.0334, 0.0961]]
-    train_dice_region1 = [[0.4806, 0.4800, 0.4832], [0.4812, 0.4842, 0.4663]]
+    # training loop. Results are slightly different for GPU, hence use a larger tolerance there.
+    dice_tolerance = 1e-3 if machine_has_gpu else 4.5e-4
+    train_dice_region = [[0.0, 0.0, 4.0282e-04], [0.0372, 0.0388, 0.1091]]
+    train_dice_region1 = [[0.4785, 0.4807, 0.4834], [0.4832, 0.4800, 0.4628]]
    # There appears to be some amount of non-determinism here: When using a tolerance of 1e-4, we get occasional
    # test failures on Linux in the cloud (not on Windows, not on AzureML) Unclear where it comes from. Even when
    # failing here, the losses match up to the expected tolerance.
@ -192,10 +189,10 @@ def _test_model_train(output_dirs: OutputFolderForTests,
    assert len(list(sampling_folder.rglob("*.png"))) == 3 * train_config.show_patch_sampling

    # Time per epoch: Test that we have all these times logged.
-    model_training_result.get_training_metric(MetricType.SECONDS_PER_EPOCH.value)
-    model_training_result.get_validation_metric(MetricType.SECONDS_PER_EPOCH.value)
-    model_training_result.get_validation_metric(MetricType.SECONDS_PER_BATCH.value)
-    model_training_result.get_training_metric(MetricType.SECONDS_PER_BATCH.value)
+    model_training_result.get_train_metric(MetricType.SECONDS_PER_EPOCH.value)
+    model_training_result.get_val_metric(MetricType.SECONDS_PER_EPOCH.value)
+    model_training_result.get_val_metric(MetricType.SECONDS_PER_BATCH.value)
+    model_training_result.get_train_metric(MetricType.SECONDS_PER_BATCH.value)

    # Issue #372
    # # Test for saving of example images
@ -323,9 +320,7 @@ def test_recover_training_mean_teacher_model(test_output_dirs: OutputFolderForTe

    # First round of training
    config.num_epochs = 2
-    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
-                                                        project_root=test_output_dirs.root_dir)
-    model_train(config, checkpoint_handler=checkpoint_handler)
+    model_train_unittest(config, dirs=test_output_dirs)
    assert len(list(config.checkpoint_folder.glob("*.*"))) == 2

    # Restart training from previous run
@ -336,9 +331,13 @@ def test_recover_training_mean_teacher_model(test_output_dirs: OutputFolderForTe
    # make if seem like run recovery objects have been downloaded
    checkpoint_root = config.checkpoint_folder / "old_run"
    shutil.copytree(str(original_checkpoint_folder), str(checkpoint_root))
+
+    # Create a new checkpoint handler and set run_recovery to the copied checkpoints
+    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
+                                                        project_root=test_output_dirs.root_dir)
    checkpoint_handler.run_recovery = RunRecovery([checkpoint_root])

-    model_train(config, checkpoint_handler=checkpoint_handler)
+    model_train_unittest(config, dirs=test_output_dirs, checkpoint_handler=checkpoint_handler)
    # remove recovery checkpoints
    shutil.rmtree(checkpoint_root)
    assert len(list(config.checkpoint_folder.glob("*.*"))) == 2
--- a/Tests/ML/util.py
+++ b/Tests/ML/util.py
@ -4,7 +4,7 @@
 #  ------------------------------------------------------------------------------------------
 import logging
 from pathlib import Path
-from typing import Any, List, Optional, Union
+from typing import Any, List, Optional, Tuple, Union

 import numpy as np
 import pytest
@ -15,15 +15,21 @@ from azureml.core import Workspace
 from InnerEye.Azure.azure_config import AzureConfig
 from InnerEye.Common import fixed_paths
 from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path
+from InnerEye.Common.output_directories import OutputFolderForTests
 from InnerEye.Common.type_annotations import PathOrString, TupleInt3
-from InnerEye.ML.config import SegmentationModelBase
 from InnerEye.ML.dataset.full_image_dataset import PatientDatasetSource
 from InnerEye.ML.dataset.sample import PatientMetadata, Sample
 from InnerEye.ML.deep_learning_config import DeepLearningConfig
+from InnerEye.ML.lightning_base import InnerEyeContainer
+from InnerEye.ML.lightning_container import LightningContainer
+from InnerEye.ML.lightning_loggers import StoringLogger
+from InnerEye.ML.model_training import model_train
 from InnerEye.ML.photometric_normalization import PhotometricNormalization
+from InnerEye.ML.run_ml import MLRunner
+from InnerEye.ML.runner import Runner
 from InnerEye.ML.utils import io_util
 from InnerEye.ML.utils.checkpoint_handling import CheckpointHandler
-from InnerEye.ML.utils.config_util import ModelConfigLoader
+from InnerEye.ML.utils.config_loader import ModelConfigLoader
 from InnerEye.ML.utils.io_util import ImageHeader, ImageWithHeader
 from InnerEye.ML.utils.ml_util import is_gpu_available

@ -185,12 +191,12 @@ def assert_binary_files_match(actual_file: Path, expected_file: Path) -> None:
 DummyPatientMetadata = PatientMetadata(patient_id='42')


-def get_model_loader(namespace: Optional[str] = None) -> ModelConfigLoader[SegmentationModelBase]:
+def get_model_loader(namespace: Optional[str] = None) -> ModelConfigLoader:
    """
    Returns a ModelConfigLoader for segmentation models, with the given non-default namespace (if not None)
    to search under.
    """
-    return ModelConfigLoader[SegmentationModelBase](model_configs_namespace=namespace)
+    return ModelConfigLoader(model_configs_namespace=namespace)


 def get_default_azure_config() -> AzureConfig:
@ -206,7 +212,9 @@ def get_default_checkpoint_handler(model_config: DeepLearningConfig, project_roo
    Gets a checkpoint handler, using the given model config and the default azure configuration.
    """
    azure_config = get_default_azure_config()
-    return CheckpointHandler(azure_config=azure_config, model_config=model_config,
+    lightning_container = InnerEyeContainer(model_config)
+    return CheckpointHandler(azure_config=azure_config,
+                             container=lightning_container,
                             project_root=project_root)


@ -216,3 +224,46 @@ def get_default_workspace() -> Workspace:
    :return:
    """
    return get_default_azure_config().get_workspace()
+
+
+def model_train_unittest(config: Optional[DeepLearningConfig],
+                         dirs: OutputFolderForTests,
+                         checkpoint_handler: Optional[CheckpointHandler] = None,
+                         lightning_container: Optional[LightningContainer] = None) -> \
+        Tuple[StoringLogger, CheckpointHandler]:
+    """
+    A shortcut for running model training in the unit test suite. It runs training for the given config, with the
+    default checkpoint handler initialized to point to the test output folder specified in dirs.
+    :param config: The configuration of the model to train.
+    :param dirs: The test fixture that provides an output folder for the test.
+    :param lightning_container: An optional LightningContainer object that will be pass through to the training routine.
+    :param checkpoint_handler: The checkpoint handler that should be used for training. If not provided, it will be
+    created via get_default_checkpoint_handler.
+    :return: Tuple[StoringLogger, CheckpointHandler]
+    """
+    runner = MLRunner(model_config=config, container=lightning_container)
+    # Setup will set random seeds before model creation, and set the model in the container.
+    # It will also set random seeds correctly. Later we use so initialized container.
+    # For all tests running in AzureML, we need to skip the downloading of datasets that would otherwise happen,
+    # because all unit test configs come with their own local dataset already.
+    runner.setup(use_mount_or_download_dataset=False)
+    if checkpoint_handler is None:
+        azure_config = get_default_azure_config()
+        checkpoint_handler = CheckpointHandler(azure_config=azure_config,
+                                               container=runner.container,
+                                               project_root=dirs.root_dir)
+    _, storing_logger = model_train(checkpoint_handler=checkpoint_handler,
+                                    container=runner.container)
+    return storing_logger, checkpoint_handler  # type: ignore
+
+
+def default_runner() -> Runner:
+    """
+    Create an InnerEye Runner object with the default settings, pointing to the repository root and
+    default settings files.
+    """
+    return Runner(project_root=fixed_paths.repository_root_directory(),
+                  yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
+
+
+model_loader_including_tests = get_model_loader(namespace="Tests.ML.configs")
--- a/Tests/ML/utils/test_checkpoint_handling.py
+++ b/Tests/ML/utils/test_checkpoint_handling.py
@ -5,6 +5,7 @@

 import os
 from pathlib import Path
+from unittest import mock
 from urllib.parse import urlparse

 import pytest
@ -49,20 +50,20 @@ def test_use_local_weights_file(test_output_dirs: OutputFolderForTests) -> None:
    assert not checkpoint_handler.local_weights_path

    # weights from local_weights_path and weights_url will be modified if needed and stored at this location
-    expected_path = checkpoint_handler.model_config.outputs_folder / WEIGHTS_FILE
+    expected_path = checkpoint_handler.output_params.outputs_folder / WEIGHTS_FILE

    # Set a weights_path
    checkpoint_handler.azure_config.run_recovery_id = ""
-    config.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE
+    checkpoint_handler.container.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE
    checkpoint_handler.download_recovery_checkpoints_or_weights()
    assert checkpoint_handler.local_weights_path == expected_path
    assert checkpoint_handler.local_weights_path.is_file()

    # set a local_weights_path
-    config.weights_url = ""
+    checkpoint_handler.container.weights_url = ""
    local_weights_path = test_output_dirs.root_dir / "exist.pth"
    create_checkpoint_file(local_weights_path)
-    config.local_weights_path = local_weights_path
+    checkpoint_handler.container.local_weights_path = local_weights_path
    checkpoint_handler.download_recovery_checkpoints_or_weights()
    assert checkpoint_handler.local_weights_path == expected_path

@ -113,35 +114,35 @@ def test_get_recovery_path_train(test_output_dirs: OutputFolderForTests) -> None
    assert checkpoint_handler.get_recovery_path_train() is None

    # weights from local_weights_path and weights_url will be modified if needed and stored at this location
-    expected_path = checkpoint_handler.model_config.outputs_folder / WEIGHTS_FILE
+    expected_path = checkpoint_handler.output_params.outputs_folder / WEIGHTS_FILE

    # Set a weights_url to get checkpoint from
    checkpoint_handler.azure_config.run_recovery_id = ""
-    config.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE
+    checkpoint_handler.container.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE
    checkpoint_handler.download_recovery_checkpoints_or_weights()
    assert checkpoint_handler.local_weights_path == expected_path
-    config.start_epoch = 0
+    checkpoint_handler.container.start_epoch = 0
    assert checkpoint_handler.get_recovery_path_train() == expected_path
    # Can't resume training from an external checkpoint
-    config.start_epoch = 20
+    checkpoint_handler.container.start_epoch = 20
    with pytest.raises(ValueError) as ex:
        checkpoint_handler.get_recovery_path_train()
-        assert ex.value.args == "Start epoch is > 0, but no run recovery object has been provided to resume training."
+    assert ex.value.args[0] == "Start epoch is > 0, but no run recovery object has been provided to resume training."

    # Set a local_weights_path to get checkpoint from
-    config.weights_url = ""
+    checkpoint_handler.container.weights_url = ""
    local_weights_path = test_output_dirs.root_dir / "exist.pth"
    create_checkpoint_file(local_weights_path)
-    config.local_weights_path = local_weights_path
+    checkpoint_handler.container.local_weights_path = local_weights_path
    checkpoint_handler.download_recovery_checkpoints_or_weights()
    assert checkpoint_handler.local_weights_path == expected_path
-    config.start_epoch = 0
+    checkpoint_handler.container.start_epoch = 0
    assert checkpoint_handler.get_recovery_path_train() == expected_path
    # Can't resume training from an external checkpoint
-    config.start_epoch = 20
+    checkpoint_handler.container.start_epoch = 20
    with pytest.raises(ValueError) as ex:
        checkpoint_handler.get_recovery_path_train()
-        assert ex.value.args == "Start epoch is > 0, but no run recovery object has been provided to resume training."
+    assert ex.value.args[0] == "Start epoch is > 0, but no run recovery object has been provided to resume training."


@pytest.mark.after_training_single_run
@ -162,7 +163,7 @@ def test_get_recovery_path_train_single_run(test_output_dirs: OutputFolderForTes
        assert "Run recovery set, but start epoch is 0" in ex.value.args[0]

    # Run recovery with start epoch provided should succeed
-    config.start_epoch = 20
+    checkpoint_handler.container.start_epoch = 20
    expected_path = create_recovery_checkpoint_path(path=config.checkpoint_folder / run_recovery_id.split(":")[1])
    assert checkpoint_handler.get_recovery_path_train() == expected_path

@ -200,7 +201,7 @@ def test_get_best_checkpoint_single_run(test_output_dirs: OutputFolderForTests)
    checkpoint_handler.azure_config.run_recovery_id = run_recovery_id
    checkpoint_handler.download_recovery_checkpoints_or_weights()

-    config.start_epoch = 1
+    checkpoint_handler.container.start_epoch = 1
    # There is no checkpoint in the current run - use the one from run_recovery
    checkpoint_paths = checkpoint_handler.get_best_checkpoint()
    expected_checkpoint = config.checkpoint_folder / run_recovery_id.split(":")[1] \
@ -246,16 +247,16 @@ def test_get_checkpoints_to_test(test_output_dirs: OutputFolderForTests) -> None
    # so the local weights should be used ignoring any epochs to test
    local_weights_path = test_output_dirs.root_dir / "exist.pth"
    create_checkpoint_file(local_weights_path)
-    config.local_weights_path = local_weights_path
+    manage_recovery.container.local_weights_path = local_weights_path
    manage_recovery.download_recovery_checkpoints_or_weights()
    checkpoint_and_paths = manage_recovery.get_checkpoints_to_test()
    assert checkpoint_and_paths
    assert len(checkpoint_and_paths) == 1
-    assert checkpoint_and_paths[0] == manage_recovery.model_config.outputs_folder / WEIGHTS_FILE
+    assert checkpoint_and_paths[0] == manage_recovery.output_params.outputs_folder / WEIGHTS_FILE

-    config.start_epoch = 1
+    manage_recovery.container.start_epoch = 1
    manage_recovery.additional_training_done()
-    config.checkpoint_folder.mkdir()
+    manage_recovery.container.checkpoint_folder.mkdir()

    # Copy checkpoint to make it seem like training has happened
    expected_checkpoint = config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
@ -325,13 +326,13 @@ def test_get_local_weights_path_or_download(test_output_dirs: OutputFolderForTes
    # If local_weights_path folder exists, get_local_weights_path_or_download should not do anything.
    local_weights_path = manage_recovery.project_root / "exist.pth"
    create_checkpoint_file(local_weights_path)
-    manage_recovery.model_config.local_weights_path = local_weights_path
+    manage_recovery.container.local_weights_path = local_weights_path
    returned_weights_path = manage_recovery.get_local_weights_path_or_download()
    assert local_weights_path == returned_weights_path

    # Pointing the model to a URL should trigger a download
-    config.local_weights_path = None
-    config.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE
+    manage_recovery.container.local_weights_path = None
+    manage_recovery.container.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE
    downloaded_weights = manage_recovery.get_local_weights_path_or_download()
    # Download goes into <project_root> / "modelweights" / "resnet18-5c106cde.pth"
    expected_path = manage_recovery.project_root / MODEL_WEIGHTS_DIR_NAME / \
@ -361,7 +362,7 @@ def test_get_and_modify_local_weights(test_output_dirs: OutputFolderForTests) ->
        assert "neither local_weights_path nor weights_url is set in the model config" in ex.value.args[0]

    # Pointing the model to a local_weights_path that does not exist will raise an error.
-    config.local_weights_path = manage_recovery.project_root / "non_exist"
+    manage_recovery.container.local_weights_path = manage_recovery.project_root / "non_exist"
    with pytest.raises(FileNotFoundError) as file_ex:
        manage_recovery.get_and_save_modified_weights()
        assert "Could not find the weights file" in file_ex.value.args[0]
@ -369,39 +370,42 @@ def test_get_and_modify_local_weights(test_output_dirs: OutputFolderForTests) ->
    # Test that weights are properly modified when a local_weights_path is set

    # set a method to modify weights:
-    ModelConfigBase.load_checkpoint_and_modify = lambda self, path_to_checkpoint: {"modified": "local",  # type: ignore
-                                                                                   "path": path_to_checkpoint}
-    # Set the local_weights_path to an empty file, which will be passed to modify_checkpoint
-    local_weights_path = manage_recovery.project_root / "exist.pth"
-    create_checkpoint_file(local_weights_path)
-    config.local_weights_path = local_weights_path
-    weights_path = manage_recovery.get_and_save_modified_weights()
-    expected_path = manage_recovery.model_config.outputs_folder / WEIGHTS_FILE
-    # read from weights_path and check that the dict has been written
-    assert weights_path.is_file()
-    assert expected_path == weights_path
-    read = torch.load(str(weights_path))
-    assert read.keys() == {"modified", "path"}
-    assert read["modified"] == "local"
-    assert read["path"] == local_weights_path
-    # clean up
-    weights_path.unlink()
+    with mock.patch.object(ModelConfigBase,
+                           'load_checkpoint_and_modify',
+                           lambda self, path_to_checkpoint: {"modified": "local",  # type: ignore
+                                                             "path": path_to_checkpoint}):
+        # Set the local_weights_path to an empty file, which will be passed to modify_checkpoint
+        local_weights_path = manage_recovery.project_root / "exist.pth"
+        create_checkpoint_file(local_weights_path)
+        manage_recovery.container.local_weights_path = local_weights_path
+        weights_path = manage_recovery.get_and_save_modified_weights()
+        expected_path = manage_recovery.output_params.outputs_folder / WEIGHTS_FILE
+        # read from weights_path and check that the dict has been written
+        assert weights_path.is_file()
+        assert expected_path == weights_path
+        read = torch.load(str(weights_path))
+        assert read.keys() == {"modified", "path"}
+        assert read["modified"] == "local"
+        assert read["path"] == local_weights_path
+        # clean up
+        weights_path.unlink()

    # Test that weights are properly modified when weights_url is set

    # set a different method to modify weights, to avoid using old files from other tests:
-    ModelConfigBase.load_checkpoint_and_modify = lambda self, path_to_checkpoint: {"modified": "url",  # type: ignore
-                                                                                   "path": path_to_checkpoint}
-    # Set the weights_url to the sample pytorch URL, which will be passed to modify_checkpoint
-    config.local_weights_path = None
-    config.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE
-    weights_path = manage_recovery.get_and_save_modified_weights()
-    expected_path = manage_recovery.model_config.outputs_folder / WEIGHTS_FILE
-    # read from weights_path and check that the dict has been written
-    assert weights_path.is_file()
-    assert expected_path == weights_path
-    read = torch.load(str(weights_path))
-    assert read.keys() == {"modified", "path"}
-    assert read["modified"] == "url"
-    assert read["path"] == manage_recovery.project_root / MODEL_WEIGHTS_DIR_NAME / \
-           os.path.basename(urlparse(EXTERNAL_WEIGHTS_URL_EXAMPLE).path)
+    with mock.patch.object(ModelConfigBase,
+                           'load_checkpoint_and_modify',
+                           lambda self, path_to_checkpoint: {"modified": "url", "path": path_to_checkpoint}):
+        # Set the weights_url to the sample pytorch URL, which will be passed to modify_checkpoint
+        manage_recovery.container.local_weights_path = None
+        manage_recovery.container.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE
+        weights_path = manage_recovery.get_and_save_modified_weights()
+        expected_path = manage_recovery.output_params.outputs_folder / WEIGHTS_FILE
+        # read from weights_path and check that the dict has been written
+        assert weights_path.is_file()
+        assert expected_path == weights_path
+        read = torch.load(str(weights_path))
+        assert read.keys() == {"modified", "path"}
+        assert read["modified"] == "url"
+        assert read["path"] == manage_recovery.project_root / MODEL_WEIGHTS_DIR_NAME / \
+                os.path.basename(urlparse(EXTERNAL_WEIGHTS_URL_EXAMPLE).path)
--- a/Tests/ML/utils/test_lr_scheduler.py
+++ b/Tests/ML/utils/test_lr_scheduler.py
@ -161,7 +161,7 @@ def _create_lr_scheduler_and_optimizer(config: SegmentationModelBase, optimizer:
    if optimizer is None:
        optimizer = _create_dummy_optimizer(config)
    # create lr scheduler
-    lr_scheduler = SchedulerWithWarmUp(config, optimizer)
+    lr_scheduler = SchedulerWithWarmUp(config, optimizer, num_epochs=config.num_epochs)
    return lr_scheduler, optimizer


@ -215,7 +215,7 @@ def test_lr_scheduler_with_warmup(warmup_epochs: int, expected_values: List[floa
                                l_rate_warmup_epochs=warmup_epochs,
                                l_rate_warmup=LRWarmUpType.Linear,
                                should_validate=False)
-    scheduler = SchedulerWithWarmUp(config, optimizer)
+    scheduler = SchedulerWithWarmUp(config, optimizer, num_epochs=config.num_epochs)
    lrs = enumerate_scheduler(scheduler, 4)
    assert lrs == expected_values

--- a/Tests/ML/utils/test_model_util.py
+++ b/Tests/ML/utils/test_model_util.py
@ -15,7 +15,9 @@ from InnerEye.ML.common import BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX, LAST_CHECK
    LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX, RECOVERY_CHECKPOINT_FILE_NAME, RECOVERY_CHECKPOINT_FILE_NAME_WITH_SUFFIX, \
    cleanup_checkpoint_folder, keep_best_checkpoint, keep_latest
 from InnerEye.ML.config import SegmentationModelBase
-from InnerEye.ML.lightning_helpers import create_lightning_model, load_from_checkpoint_and_adjust_for_inference
+from InnerEye.ML.lightning_base import InnerEyeContainer
+from InnerEye.ML.lightning_helpers import load_from_checkpoint_and_adjust_for_inference
+from InnerEye.ML.lightning_models import create_lightning_model
 from InnerEye.ML.model_config_base import ModelConfigBase
 from InnerEye.ML.model_training import create_lightning_trainer
 from Tests.ML.configs.ClassificationModelForTesting import ClassificationModelForTesting
@ -34,7 +36,8 @@ def create_model_and_store_checkpoint(config: ModelConfigBase, checkpoint_path:
    :param config: The model configuration.
    :param checkpoint_path: The path and filename of the checkpoint file.
    """
-    trainer, _ = create_lightning_trainer(config)
+    container = InnerEyeContainer(config)
+    trainer, _ = create_lightning_trainer(container)
    model = create_lightning_model(config)
    if machine_has_gpu:
        model = model.cuda()  # type: ignore
--- a/Tests/ML/visualizers/test_plot_cross_validation.py
+++ b/Tests/ML/visualizers/test_plot_cross_validation.py
@ -397,7 +397,7 @@ def test_run_ml_with_multi_label_sequence_in_crossval(test_output_dirs: OutputFo
    config.number_of_cross_validation_splits = 2
    azure_config = get_default_azure_config()
    azure_config.train = True
-    MLRunner(config, azure_config).run()
+    MLRunner(config, azure_config=azure_config).run()


 def test_load_files_with_prediction_target() -> None:
--- a/azure-pipelines/build-pr.yml
+++ b/azure-pipelines/build-pr.yml
@ -148,3 +148,18 @@ jobs:
        parameters:
          pytest_mark: after_training_glaucoma_cv_run
          test_run_title: tests_after_training_glaucoma_cv_run
+
+  - job: TrainHelloWorld
+    variables:
+      - name: model
+        value: 'HelloWorld'
+      - name: tag
+        value: 'HelloWorldPR'
+    pool:
+      vmImage: 'ubuntu-18.04'
+    steps:
+      - template: train_template.yml
+        parameters:
+          wait_for_completion: 'True'
+          pytest_mark: ''
+          max_run_duration: '1h'
--- a/azure-pipelines/checkout.yml
+++ b/azure-pipelines/checkout.yml
@ -1,6 +1,7 @@
 steps:
  - checkout: self
    lfs: true
+    submodules: true

  - bash: |
      if [ $(Agent.OS) = 'Windows_NT' ]
--- a/docs/bring_your_own_model.md
+++ b/docs/bring_your_own_model.md
@ -0,0 +1,229 @@
+# Bring Your Own PyTorch Lightning Model
+
+The InnerEye toolbox is capable of training any PyTorch Lighting (PL) model inside of AzureML, making
+use of all the usual InnerEye toolbox features:
+- Working with different model in the same codebase, and selecting one by name
+- Distributed training in AzureML
+- Logging via AzureML's native capabilities
+- Training on a local GPU machine or inside of AzureML without code changes
+- Supply commandline overrides for model configuration elements, to quickly queue many jobs
+
+This can be used by
+- Defining a special container class, that encapsulates the PyTorch Lighting model to train, and the data that should
+be used for training and testing.
+- Adding essential trainer parameters like number of epochs to that container.
+- Invoking the InnerEye runner and providing the name of the container class, like this: 
+`python InnerEye/ML/runner.py --model=MyContainer`. To train in AzureML, just add a `--azureml=True` flag.
+
+There is a fully working example [HelloContainer](../InnerEye/ML/configs/other/HelloContainer.py), that implements
+a simple 1-dimensional regression model from data stored in a CSV file. You can run that
+from the command line by `python InnerEye/ML/runner.py --model=HelloContainer`.
+
+## Setup
+
+In order to use these capabilities, you need to implement a class deriving from `LightningContainer`. This class
+encapsulates everything that is needed for training with PyTorch Lightning:
+- The `create_model` method needs to return a subclass of `LightningModule`, that has
+all the usual PyTorch Lightning methods required for training, like the `training_step` and `forward` methods. This
+object needs to adhere to additional constraints, see below.
+- The `get_data_module` method of the container needs to return a `LightningDataModule` that has the data loaders for
+training and validation data.
+- The optional `get_inference_data_module` returns a `LightningDataModule` that is used to read the data for inference
+(that is, evaluating the trained model). By default, this returns the same data as `get_training_data_module`, but you
+can override this for special models like segmentation models that are trained on equal sized image patches, but 
+evaluated on full images of varying size.
+
+Your class needs to be defined in a Python file in the `InnerEye/ML/configs` folder, otherwise it won't be picked up
+correctly. If you'd like to have your model defined in a different folder, please specify the Python namespace via
+the `--model_configs_namespace` argument. For example, use `--model_configs_namespace=My.Own.configs` if your
+model configuration classes reside in folder `My/Own/configs` from the repository root.
+
+*Example*:
+```python
+from pathlib import Path
+from torch.utils.data import DataLoader
+from pytorch_lightning import LightningModule, LightningDataModule
+from InnerEye.ML.lightning_container import LightningContainer
+
+class MyLightningModel(LightningModule):
+    def __init__(self):
+        self.layer = ...
+    def training_step(self, *args, **kwargs):
+        ...
+    def forward(self, *args, **kwargs):
+        ...
+    def configure_optimizers(self):
+        ...
+    def test_step(self, *args, **kwargs):
+        ...
+
+class MyDataModule(LightningDataModule):
+    def __init__(self, root_path: Path):
+        # All data should be read from the folder given in self.root_path
+        self.root_path = root_path
+    def train_dataloader(self, *args, **kwargs) -> DataLoader:
+        ...
+    def val_dataloader(self, *args, **kwargs) -> DataLoader:
+        # The data should be read off self.root_path
+        ...
+    def test_dataloader(self, *args, **kwargs) -> DataLoader:
+        # The data should be read off self.root_path
+        ...
+        
+class MyContainer(LightningContainer):
+    def __init__(self):
+        super().__init__()
+        self.azure_dataset_id = "folder_name_in_azure_blob_storage"
+        self.local_dataset = "/some/local/path"
+        self.num_epochs = 42
+
+    def create_model(self) -> LightningModule:
+        return MyLightningModel()
+
+    def get_data_module(self) -> LightningDataModule:
+        return MyDataModule(root_path=self.local_dataset)
+```
+
+Where does the data for training come from?
+- When training a model on a local box or VM, the data is read from the `local_dataset` folder that you define in the 
+container.
+- When training a model in AzureML, the code searches for a folder called `folder_name_in_azure_blob_storage` in
+Azure blob storage. That is then downloaded or mounted. The local download path is then copied over the `local_dataset`
+field in the container, and hence you can always read data from `self.local_dataset`
+- Alternatively, you can use the `prepare_data` method of a `LightningDataModule` to download data from the web,
+for example. In this case, you don't need to define any of the `local_dataset` or `azure_dataset_id` fields.
+
+In the above example, training is done for 42 epochs. After the model is trained, it will be evaluated on the test set,
+via PyTorch Lightning's [built-in test functionality](https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html?highlight=trainer.test#test).
+See below for an alternative way of running the evaluation on the test set.
+
+### Outputting files during training
+
+The Lightning model returned by `create_model` needs to write its output files to the current working directory.
+When running the InnerEye toolbox outside of AzureML, the toolbox will change the current working directory to a 
+newly created output folder, with a name that contains the time stamp and and the model name.
+When running the InnerEye toolbox in AzureML, the folder structure will be set up such that all files written
+to the current working directory are later uploaded to Azure blob storage at the end of the AzureML job. The files
+will also be later available via the AzureML UI.
+
+### Trainer arguments
+All arguments that control the PyTorch Lightning `Trainer` object are defined in the class `TrainerParams`. A
+`LightningContainer` object inherits from this class. The most essential one is the `num_epochs` field, which controls
+the `max_epochs` argument of the `Trainer`.
+
+Usage example:
+```python
+from pytorch_lightning import LightningModule, LightningDataModule
+from InnerEye.ML.lightning_container import LightningContainer
+class MyContainer(LightningContainer):
+    def __init__(self):
+        super().__init__()
+        self.num_epochs = 42
+
+    def create_model(self) -> LightningModule:
+        return MyLightningModel()
+
+    def get_data_module(self) -> LightningDataModule:
+        return MyDataModule(root_path=self.local_dataset)
+```
+
+For further details how the `TrainerParams` are used, refer to the `create_lightning_trainer` method in 
+[InnerEye/ML/model_training.py](../InnerEye/ML/model_training.py)
+
+### Optimizer and LR scheduler arguments
+There are two possible ways of choosing the optimizer and LR scheduler:
+- The Lightning model returned by `create_model` can define its own `configure_optimizers` method, with the same
+signature as `LightningModule.configure_optimizers`. This is the typical way of configuring it for Lightning models.
+- Alternatively, the model can inherit from `LightningModuleWithOptimizer`. This class implements a 
+`configure_optimizers` method that uses settings defined in the `OptimizerParams` class. These settings are all
+available from the command line, and you can, for example, start a new run with a different learning rate by
+supplying the additional commandline flag `--l_rate=1e-2`. 
+
+### Evaluating the trained model
+The InnerEye toolbox provides two possible routes of implementing that:
+
+You can either use PyTorch Lightning's built-in capabilities, via the `test_step` method. If the model that is
+returned by `create_model` implements the `test_step` method, the InnerEye toolbox will use the `trainer.test` method
+(see [docs](https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html?highlight=trainer.test#test)).
+In this case, the best checkpoint during training will be used. The test data is read via the data loader created
+by the `test_dataloader` of the `LightningDataModule` that is used for training/validation.
+
+Alternatively, the model can implement the methods defined in `InnerEyeInference`. In this case, the methods will be
+call in this order:
+```
+model.on_inference_start()
+for dataset_split in [Train, Val, Test]
+    model.on_inference_epoch_start(dataset_split, is_ensemble_model=False)
+    for batch_idx, item in enumerate(dataloader[dataset_split])):
+        model_outputs = model.forward(item)
+        model.inference_step(item, batch_idx, model_outputs)
+    model.on_inference_epoch_end()
+model.on_inference_end()
+```
+
+## Overriding properties on the commandline
+
+You can define hyperparameters that affect data and/or model, as in the following code snippet: 
+```python
+import param
+from pytorch_lightning import LightningModule
+from InnerEye.ML.lightning_container import LightningContainer
+class DummyContainerWithParameters(LightningContainer):
+    num_layers = param.Integer(default=4)
+
+    def create_model(self) -> LightningModule:
+        return MyLightningModel(self.num_layers)
+    ...
+```
+All parameters added in this form will be automatically accessible from the commandline, there is no need to define
+a separate argument parser: When starting training, you can add a flag like `--num_layers=7`.
+
+## Examples
+
+### Setting only the required fields
+```python
+from pytorch_lightning import LightningModule, LightningDataModule
+from InnerEye.ML.lightning_container import LightningContainer
+
+class Container1(LightningContainer):
+    def __init__(self):
+        super().__init__()
+        self.azure_dataset_id = "some_folder_in_azure"
+        self.num_epochs = 20
+
+    def create_model(self) -> LightningModule:
+        return MyLightningModel()
+
+    def get_data_module(self) -> LightningDataModule:
+        # This should read data from self.local_dataset. Before training, the data folder "some_folder_in_azure"
+        # (given by self.azure_dataset_id) will be downloaded or mounted, and its local path set in
+        # self.local_dataset
+        return MyDataModule(root_folder=self.local_dataset) 
+```
+
+### Adding additional arguments for the PyTorch Lightning trainer
+
+```python
+from typing import Dict, Any
+from pytorch_lightning import LightningModule, LightningDataModule
+from InnerEye.ML.lightning_container import LightningContainer
+class Container2(LightningContainer):
+    def __init__(self):
+        super().__init__()
+        self.azure_dataset_id = "some_folder_in_azure"
+        self.num_epochs = 20
+
+    def create_model(self) -> LightningModule:
+        return MyLightningModel()
+
+    def get_data_module(self) -> LightningDataModule:
+        # This should read data from self.local_dataset. Before training, the data folder "some_folder_in_azure"
+        # (given by self.azure_dataset_id) will be downloaded or mounted, and its local path set in
+        # self.local_dataset
+        return MyDataModule(root_folder=self.local_dataset) 
+
+    def get_trainer_arguments(self) -> Dict[str, Any]:
+        # These arguments will be passed through to the Lightning trainer.
+        return {"gradient_clip_val": 1, "limit_train_batches": 10}
+```
+
--- a/environment.yml
+++ b/environment.yml
@ -27,7 +27,7 @@ dependencies:
      - lightning-bolts==0.3.1
      - matplotlib==3.3.0
      - mlflow==1.12.1
-      - mypy==0.770
+      - mypy==0.812
      - mypy-extensions==0.4.3
      - numba==0.51.2
      - numpy==1.19.1
@ -47,6 +47,7 @@ dependencies:
      - pytorch-lightning==1.2.8
      - rich==5.1.1
      - rpdb==0.1.6
+      - runstats==1.8.0
      - scikit-image==0.17.2
      - scikit-learn==0.23.2
      - scipy==1.5.2
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit f2070aeb7a5e7d1b0e45c6aad247d18d074705a8
--- a/mypy_runner.py
+++ b/mypy_runner.py
@ -5,10 +5,10 @@
 import os
 import subprocess
 import sys
-from pathlib import Path
-from typing import List
-from shutil import which
 from argparse import ArgumentParser
+from pathlib import Path
+from shutil import which
+from typing import List


 def run_mypy(files: List[str], mypy_executable_path: str) -> int:
@ -23,51 +23,24 @@ def run_mypy(files: List[str], mypy_executable_path: str) -> int:
    :return: maximum return code from any of the mypy runs
    """
    return_code = 0
-    iteration = 1
-    while files:
-        dirs = sorted(set(os.path.dirname(file) or "." for file in files))
-        print(f"Iteration {iteration}: running mypy on {len(files)} files in {len(dirs)} directories")
-        # Set of files we are hoping to see mentioned in the mypy log.
-        files_to_do = set(files)
-        for index, dir in enumerate(dirs, 1):
-            # Adding "--no-site-packages" might be necessary if there are errors in site packages,
-            # but it may stop inconsistencies with site packages being spotted.
-            command = [mypy_executable_path, "--config=mypy.ini", "--verbose", dir]
-            print(f"Processing directory {index:2d} of {len(dirs)}: {Path(dir).absolute()}")
+    print(f"Running mypy on {len(files)} files")
+    for index, file in enumerate(files):
+        print(f"Processing {(index+1):2d} of {len(files)}: {file}")
+        file_path = Path(file)
+        mypy_args = []
+        if file_path.is_file():
+            mypy_args = [file]
+        elif file_path.is_dir():
+            # There is a bug in recent mypy versions, complaining about duplicate files when telling
+            # mypy to scan a directory. Telling it to scan a namespace avoids this bug.
+            mypy_args = ["-p", file.replace(os.path.sep, ".")]
+        else:
+            print("Skipping.")
+        if mypy_args:
+            command = [mypy_executable_path, "--config=mypy.ini", *mypy_args]
            # We pipe stdout and then print it, otherwise lines can appear in the wrong order in builds.
-            process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+            process = subprocess.run(command)
            return_code = max(return_code, process.returncode)
-            for line in process.stdout.split("\n"):
-                if line and not line.startswith("Success: "):
-                    tokens = line.split(":")
-                    if line.startswith("Found") or len(tokens) < 2:
-                        print(line)
-                    else:
-                        print(f"{Path.cwd() / tokens[0]}:{':'.join(tokens[1:])}")
-
-            # Remove from files_to_do every Python file that's reported as processed in the log.
-            for line in process.stderr.split("\n"):
-                tokens = line.split()
-                if len(tokens) == 4 and tokens[0] == "LOG:" and tokens[1] == "Parsing":
-                    name = tokens[2]
-                elif len(tokens) == 7 and tokens[:4] == ["LOG:", "Metadata", "fresh", "for"]:
-                    name = tokens[-1]
-                else:
-                    continue
-                if name.endswith(".py"):
-                    if name.startswith("./") or name.startswith(".\\"):
-                        name = name[2:]
-                    files_to_do.discard(name)
-        # If we didn't manage to discard any files, there's no point continuing. This should not occur, but if
-        # it does, we don't want to continue indefinitely.
-        if len(files_to_do) == len(files):
-            print("No further files appear to have been checked! Unchecked files are:")
-            for file in sorted(files_to_do):
-                print(f"  {file}")
-            return_code = max(return_code, 1)
-            break
-        files = sorted(files_to_do)
-        iteration += 1
    return return_code


@ -83,17 +56,11 @@ def main() -> int:
    args = parser.parse_args()
    current_dir = Path(".")
    if args.files:
-        file_list = [Path(arg) for arg in args.files if arg.endswith(".py")]
+        file_list = args.files
    else:
-        # We don't want to check the files in the submodule if any, partly because they should already have
-        # been checked in the original repo, and partly because we don't want the module name clashes mypy would
-        # otherwise report.
-        submodule_name = "innereye-deeplearning"
-        files = set(current_dir.glob('*.py'))
-        for path in current_dir.glob('*'):
-            if path.name != submodule_name:
-                files.update(path.rglob('*.py'))
-        file_list = list(files)
+        file_list = list(str(f) for f in current_dir.glob('*.py'))
+        for dir in ["InnerEye", "Tests", "TestsOutsidePackage", "TestSubmodule"]:
+            file_list.append(dir)

    mypy = args.mypy or which("mypy")
    if not mypy:
--- a/score.py
+++ b/score.py
@ -26,7 +26,7 @@ from InnerEye.ML.model_testing import DEFAULT_RESULT_IMAGE_NAME
 from InnerEye.ML.photometric_normalization import PhotometricNormalization
 from InnerEye.ML.pipelines.ensemble import EnsemblePipeline
 from InnerEye.ML.pipelines.inference import FullImageInferencePipelineBase, InferencePipeline
-from InnerEye.ML.utils.config_util import ModelConfigLoader
+from InnerEye.ML.utils.config_loader import ModelConfigLoader
 from InnerEye.ML.utils.io_util import ImageWithHeader, load_nifti_image, reverse_tuple_float3, store_as_ubyte_nifti, \
    load_dicom_series_and_save

@ -67,8 +67,7 @@ def init_from_model_inference_json(model_folder: Path, use_gpu: bool = True) ->
    logging.info(f'model_inference_config: {model_inference_config}')
    full_path_to_checkpoints = [model_folder / x for x in model_inference_config.checkpoint_paths]
    logging.info(f'full_path_to_checkpoints: {full_path_to_checkpoints}')
-    loader = ModelConfigLoader[SegmentationModelBase](
-        model_configs_namespace=model_inference_config.model_configs_namespace)
+    loader = ModelConfigLoader(model_configs_namespace=model_inference_config.model_configs_namespace)
    model_config = loader.create_model_config_from_name(model_name=model_inference_config.model_name)
    return create_inference_pipeline(model_config, full_path_to_checkpoints, use_gpu)
				`@ -0,0 +1 @@`
				`Subproject commit f2070aeb7a5e7d1b0e45c6aad247d18d074705a8`