Adding cross-validation to the hi-ml runner (#198)

* Adding cross-validation * Simplified model config loading for models in the histopathology folder * Editor setup for VSCode * Simplified pytest usage, also from within VSCode
2022-03-07 12:21:09 +00:00 · 2022-03-07 12:21:09 +00:00 · 314433cb95
--- a/.amlignore
+++ b/.amlignore
@ -0,0 +1,16 @@
+/.git
+/.github
+/azure-pipelines
+/docs
+/.idea
+.pytest_cache
+.mypy_cache
+logs
+outputs
+config.json
+*.egg-info
+# Temporary files generated from conda merging
+temp_environment-*
+.config
+.vscode
+node_modules
--- a/.coveragerc
+++ b/.coveragerc
@ -2,7 +2,7 @@
    omit =
        **/pytest
        **/__init__.py
-        */hello_container_2.py
+        **/temp_config_for_unittests.py

 [html]
 skip_empty = true
--- a/.github/workflows/build-test-pr.yml
+++ b/.github/workflows/build-test-pr.yml
@ -75,6 +75,8 @@ jobs:
    strategy:
      matrix:
        folder: [ hi-ml, hi-ml-azure, hi-ml-histopathology ]
+      # This will let all parts of the matrix run, to collect as many errors as possible, rather than aborting after first fail
+      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
@ -85,7 +87,7 @@ jobs:
        with:
          python-version: ${{ env.pythonVersion }}

-      - name: Install dependencies
+      - name: Install hi-ml-azure in editable mode
        if: ${{ matrix.folder != 'hi-ml-azure' }}
        run: |
          cd hi-ml-azure
@ -93,6 +95,14 @@ jobs:
          # Install local package in editable mode
          make pip_local

+      - name: Install hi-ml in editable mode
+        if: ${{ matrix.folder != 'hi-ml-azure' && matrix.folder != 'hi-ml' }}
+        run: |
+          cd hi-ml
+
+          # Install local package in editable mode
+          make pip_local
+
      - name: Test with pytest, fast only
        run: |
          cd ${{ matrix.folder }}
@ -108,7 +118,7 @@ jobs:
    needs: [ pytest_fast ]
    strategy:
      matrix:
-        folder: [ hi-ml, hi-ml-azure, hi-ml-histopathology ]
+        folder: [ hi-ml, hi-ml-azure ]
    steps:
      - uses: actions/checkout@v2
        with:
@ -134,7 +144,7 @@ jobs:
    needs: [ build-python ]
    strategy:
      matrix:
-        folder: [ hi-ml, hi-ml-azure, hi-ml-histopathology ]
+        folder: [ hi-ml, hi-ml-azure ]
        packageName: [ '*.whl', '*.tar.gz' ]
    steps:
      - uses: actions/checkout@v2
@ -199,6 +209,61 @@ jobs:
        with:
          folder: ${{ matrix.folder }}

+  test-project-folders:
+    runs-on: ubuntu-18.04
+    needs: [ build-python ]
+    strategy:
+      matrix:
+        folder: [ hi-ml-histopathology ]
+        packageName: [ '*.whl' ]
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          lfs: true
+
+      - name: Set up Python ${{ env.pythonVersion }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ env.pythonVersion }}
+
+      - name: PIP upgrade
+        run: |
+          cd hi-ml-azure
+          make pip_upgrade
+
+      - name: Download hi-ml-azure
+        id: download_himlazure
+        uses: ./.github/actions/download_package_artifacts
+        with:
+          folder: hi-ml-azure
+          package_type: ${{ matrix.packageName }}
+
+      - name: Download hi-ml
+        id: download_himl
+        uses: ./.github/actions/download_package_artifacts
+        with:
+          folder: hi-ml
+          package_type: ${{ matrix.packageName }}
+
+      - name: Install dependencies
+        run: |
+          cd hi-ml-azure
+          pip install ${{ steps.download_himlazure.outputs.package_filename }} 
+          cd ../hi-ml
+          pip install ${{ steps.download_himl.outputs.package_filename }}
+
+      - name: Run tests
+        run: |
+          cd ${{ matrix.folder }}
+          make pip_local
+          make pytest_and_coverage
+
+      - name: Upload coverage artifacts
+        uses: ./.github/actions/upload_coverage_artifacts
+        if: ${{ matrix.packageName == '*.whl' }}
+        with:
+          folder: ${{ matrix.folder }}
+

  build-coverage:
    runs-on: ubuntu-18.04
--- a/.gitignore
+++ b/.gitignore
@ -150,4 +150,11 @@ node_modules/
 !.github/actions/format_coverage/dist/
 package-lock.json

-logs/
+logs/
+
+# Temporary files generated from conda merging
+temp_environment-*
+# Temporary files from unittesting
+temp_config_for_unittests.py
+# Temp file from building requirements for histo
+temp_requirements.txt
--- a/.idea/hi-ml.iml
+++ b/.idea/hi-ml.iml
@ -2,7 +2,6 @@
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
-      <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/hi-ml-azure/src" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/hi-ml/src" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/hi-ml-azure/testazure" isTestSource="false" />
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@ -0,0 +1,8 @@
+{
+    "recommendations": [
+        "njpwerner.autodocstring",
+        "doi.fileheadercomment",
+        "ms-python.python",
+        "ms-python.vscode-pylance"
+    ]
+}
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -11,6 +11,54 @@
            "program": "${file}",
            "console": "integratedTerminal"
        },
+        {
+            "name": "Python: Run the HelloWorld model locally",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/hi-ml/src/health_ml/runner.py",
+            "args": [
+                "--model=HelloWorld"
+            ],
+            "console": "integratedTerminal"
+        },
+        {
+            "name": "Python: Run the HelloWorld model in AzureML",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/hi-ml/src/health_ml/runner.py",
+            "args": [
+                "--model=HelloWorld",
+                "--azureml",
+                "--cluster=lite-testing-ds2",
+            ],
+            "console": "integratedTerminal"
+        },
+        {
+            "name": "Python: Run HelloWorld with cross-validation",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/hi-ml/src/health_ml/runner.py",
+            "args": [
+                "--model=HelloWorld",
+                "--azureml",
+                "--cluster=lite-testing-ds2",
+                "--crossval_count=2"
+            ],
+            "console": "integratedTerminal"
+        },
+        {
+            "name": "Python: Run DeepSMILECrck in AzureML",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/hi-ml/src/health_ml/runner.py",
+            "cwd": "${workspaceFolder}/hi-ml-histopathology",
+            "args": [
+                "--model=histopathology.configs.classification.DeepSMILECrck",
+                "--azureml",
+                "--cluster=lite-testing-ds2",
+            ],
+            "console": "integratedTerminal"
+        },
        {
            "name": "Elevate: HI-ML-AZURE Local",
            "type": "python",
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,4 +1,5 @@
 {
+    "autoDocstring.docstringFormat": "sphinx",
    "fileHeaderComment.parameter":{
        "*":{
            "line": "-------------------------------------------------------------------------------------------",
@ -49,4 +50,20 @@
    "[python]": {
        "editor.rulers": [120],
    },
+    "python.analysis.extraPaths": [
+        "./hi-ml-azure/src",
+        "./hi-ml-azure/testazure",
+        "./hi-ml/src",
+        "./hi-ml/testhiml",
+        "./hi-ml-histopathology/src",
+        "./hi-ml-histopathology/testhisto",
+        "./hi-ml-histopathology/testSSL",
+    ],
+    "terminal.integrated.defaultProfile.windows": "Command Prompt",
+    "terminal.integrated.env.windows": {
+        "PYTHONPATH":"${workspaceFolder}/hi-ml/src:${workspaceFolder}/hi-ml-azure/src:${workspaceFolder}/hi-ml-histopathology/src"
+    },
+    "terminal.integrated.env.linux": {
+        "PYTHONPATH":"${workspaceFolder}/hi-ml/src:${workspaceFolder}/hi-ml-azure/src:${workspaceFolder}/hi-ml-histopathology/src"
+    }
 }
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -20,12 +20,17 @@ the environment file since it is necessary for the augmentations.
 - ([#181](https://github.com/microsoft/hi-ml/pull/181)) Add computational pathology tools in hi-ml-histopathology folder.
 - ([#187](https://github.com/microsoft/hi-ml/pull/187)) Add mean pooling layer for MIL.
 - ([#186](https://github.com/microsoft/hi-ml/pull/186)) Add inference to hi-ml runner.
+- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Add cross-validation to hi-ml runner.
+- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Improved editor setup for VSCode.

 ### Changed
+- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Model config loader is now more flexible, can accept fully qualified class name or just top-level module name and class (like histopathology.DeepSMILECrck)
+- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Runner raises an error when Conda environment file contains a pip include (-r) statement

 - ([#196](https://github.com/microsoft/hi-ml/pull/196)) Show current workspace name in error message.

 ### Fixed
+- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Dependencies for histopathology folder are no longer specified in `test_requirements.txt`, but correctly in the histopathology Conda environment.
 - ([#188](https://github.com/microsoft/hi-ml/pull/188)) Updated DeepSMILES models. Now they are uptodate with innereye-dl.
 - ([#179](https://github.com/microsoft/hi-ml/pull/179)) HEDJitter was jittering the D channel as well. StainNormalization was relying on skimage.
 - ([#195](https://github.com/microsoft/hi-ml/pull/195)) Fix DeepMIL metrics bug whereby hard labels were used instead of probabilities.
--- a/README.md
+++ b/README.md
@ -6,13 +6,11 @@ This toolbox aims at providing low-level and high-level building blocks for Mach
 practitioners. It helps to simplify and streamline work on deep learning models for healthcare and life sciences,
 by providing tested components (data loaders, pre-processing), deep learning models, and cloud integration tools.

-This toolbox is still in very early stages, and presently offers only the cloud integration components. ML components
-will be added in the next few weeks.
+This repository consists of two Python packages, as well as project-specific codebases:

-This toolbox consists of two Python projects:
-
-* [hi-ml-azure](https://pypi.org/project/hi-ml-azure/) - providing helper functions for running in AzureML.
-* [hi-ml](https://pypi.org/project/hi-ml/) - providing ML components.
+* PyPi package [hi-ml-azure](https://pypi.org/project/hi-ml-azure/) - providing helper functions for running in AzureML.
+* PyPi package [hi-ml](https://pypi.org/project/hi-ml/) - providing ML components.
+* hi-ml-histopathology: Models and workflows for working with histopathology images

 ## Getting started

@ -24,6 +22,10 @@ For just the AzureML helper functions:

 * Install from `pypi` via `pip`, by running `pip install hi-ml-azure`

+For the histopathology workflows, please follow the instructions [here](hi-ml-histopathology/README.md).
+
+If you would like to contribute to the code, please check the [developer guide](docs/source/developers.md).
+
 ## Documentation

 The detailed package documentation, with examples and API reference, is on 
--- a/docs/source/contributing.md
+++ b/docs/source/contributing.md
@ -12,6 +12,10 @@ If in doubt, reach out to the core `hi-ml` team before starting your work.

 Please look through the existing folder structure to find a good home for your contribution.

+## Setting up your dev environment
+
+Please see the detailed instructions [here](developers.md).
+
 ## Submitting a Pull Request

 If you'd like to submit a PR to the codebase, please ensure you:
--- a/docs/source/developers.md
+++ b/docs/source/developers.md
@ -1,5 +1,13 @@
 # Notes for developers

+## Development environment
+
+We suggest using Visual Studio Code (VSCode), available for multiple platforms [here](https://code.visualstudio.com/).
+On Windows system, we recommend using WSL, the Windows Subsystem for Linux, because some PyTorch features are not available on Windows.
+Inside VSCode, please install the extensions that are recommended for this project - they are available in `.vscode/extensions.json` in the
+repository root. 
+
+
 ## Creating a Conda environment

 To create a separate Conda environment with all packages that `hi-ml` requires for running and testing,
@ -15,6 +23,9 @@ outside the Conda environment. For WSL, these are the required steps (see also
 [here](https://docs.microsoft.com/en-us/windows/dev-environment/javascript/nodejs-on-wsl)):
 ```shell
 curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.38.0/install.sh | bash
+```
+Close your terminal and re-open it, then run:
+```shell
 nvm install node
 npm install -g pyright
 ```
@ -103,3 +114,13 @@ To create a new package release, follow these steps:
 * Click "Auto-generate  release notes" to pull in the titles of the Pull Requests since the last release.
 * Before the auto-generated "What's changed" section, add a few sentences that summarize what's new.
 * Click "Publish release"
+
+
+## Troubleshooting
+
+### Debugging a test in VSCode fails on Windows
+
+* Symptom: Debugging just does not seem to do anything
+* Check: Debug Console shows error `from _sqlite3 import *: ImportError: DLL load failed: The specified module could not be found.`
+* Fix: [see here](https://stackoverflow.com/questions/54876404/unable-to-import-sqlite3-using-anaconda-python)
+* Run `conda info --envs` to see where your Conda environment lives, then place `sqlite3.dll` into the `DLLs` folder inside of the environment
--- a/docs/source/runner.md
+++ b/docs/source/runner.md
@ -11,17 +11,36 @@ use of these features:
 This can be used by invoking the hi-ml runner and providing the name of the container class, like this:
 `himl-runner --model=MyContainer`.

-There is a fully working example [HelloContainer](../../hi-ml/src/health-ml/configs/hello_container.py), that
+There is a fully working example [HelloContainer](../../hi-ml/src/health-ml/configs/hello_world.py), that
 implements a simple 1-dimensional regression model from data stored in a CSV file. You can run that
-from the command line by `himl-runner --model=HelloContainer`.
+from the command line by `himl-runner --model=HelloWorld`.

-# Running ML experiments in Azure ML 
+## Specifying the model to run
+
+The `--model` argument specifies the name of a class that should be used for model training. The class needs to
+be a subclass of `LightningContainer`, see below. There are different ways of telling the runner where to find 
+that class:
+* If just providing a single class name, like `--model=HelloWorld`, the class is expected somewhere in the 
+`health_ml.configs` namespace. It can be in any module/folder inside of that namespace.
+* If the class is outside of the `health_ml.configs` (as would be normal if using the `himl-runner` from a package),
+you need to provide some "hints" where to start searching. It is enough to provide the start of the namespace string:
+for example, `--model histopathology.PandaImageNetMIL` is effectively telling the runner to search for the 
+`PandaImageNetMIL` class _anywhere_ in the `histopathology` namespace. You can think of this as 
+`histopathology.*.PandaImageNetMIL`
+
+## Running ML experiments in Azure ML 

 To train in AzureML, add a `--azureml` flag. Use the flag `--cluster` to specify the name of the cluster
 in your Workspace that you want to submit the job to. So the whole command would look like:
 `himl-runner --model=HelloContainer --cluster=my_cluster_name --azureml`. You can also specify `--num_nodes` if
 you wish to distribute the model training.

+When starting the runner, you need to do that from a directory that contains all the code that your experiment needs: 
+The current working directory will be used as the root of all data that will be copied to AzureML to run your experiment.
+(the only exception to this rule is if you start the runner from within an enlistment of the HI-ML GitHub repository).
+
+AzureML needs to know which Python/Conda environment it should use. For that, the runner expects a file `environment.yml`
+in the current working directory, that contains a Conda environment definition.

 ## Setup - creating your model config file

@ -140,7 +159,7 @@ By default, config files will be looked for in the folder "health_ml.configs". T
 that live elsewhere, use a fully qualified name for the parameter `--model` - e.g. "MyModule.Configs.my_config.py"


-### Outputting files during training
+## Outputting files during training

 The Lightning model returned by `create_model` needs to write its output files to the current working directory.
 When running inside of AzureML, the output folders will be directly under the project root. If not running inside
@ -150,7 +169,7 @@ When running in AzureML, the folder structure will be set up such that all files
 to the current working directory are later uploaded to Azure blob storage at the end of the AzureML job. The files
 will also be later available via the AzureML UI.

-### Trainer arguments
+## Trainer arguments
 All arguments that control the PyTorch Lightning `Trainer` object are defined in the class `TrainerParams`. A
 `LightningContainer` object inherits from this class. The most essential one is the `max_epochs` field, which controls
 the `max_epochs` argument of the `Trainer`.
--- a/environment.yml
+++ b/environment.yml
@ -1,3 +1,5 @@
+# This environment definition contains all packages to run hi-ml and hi-ml-azure development work, building and
+# testing
 name: himl
 channels:
  - defaults
@ -5,7 +7,10 @@ channels:
 dependencies:
  - pip=20.1.1
  - python=3.7.3
-  - pytorch=1.8.0
+  - pytorch=1.10.0
+  - cudatoolkit=11.3.1
  - pip:
+      - -r hi-ml-azure/run_requirements.txt
+      - -r hi-ml/run_requirements.txt
      - -r build_requirements.txt
      - -r test_requirements.txt
--- a/hi-ml-azure/pytest.ini
+++ b/hi-ml-azure/pytest.ini
@ -1,8 +1,10 @@
 [pytest]
+testpaths = testazure
+norecursedirs = outputs 
 log_cli = True
 log_cli_level = DEBUG
-adopts = --strict-markers
+addopts = --strict-markers
 markers =
    fast: Tests that should run very fast, and can act as smoke tests to see if something goes terribly wrong.
    slow: Tests that are slow to run and not crucial to the build.
-    timeout: Tests will terminate and fail if not completed within this length of time.
+    timeout: Tests will terminate and fail if not completed within this length of time.
--- a/hi-ml-azure/run_requirements.txt
+++ b/hi-ml-azure/run_requirements.txt
@ -1,7 +1,8 @@
-azureml-sdk==1.32.0
-azureml-tensorboard==1.32.0
-conda-merge==0.1.5
-pandas==1.3.4
-param==1.9.3
-ruamel.yaml==0.16.12
-tensorboard==2.6.0
+azureml-sdk>=1.36.0
+azureml-tensorboard>=1.36.0
+conda-merge>=0.1.5
+pandas>=1.3.4
+param>=1.12
+pysocks>=1.5.8
+ruamel.yaml>=0.16.12
+tensorboard>=2.6.0
--- a/hi-ml-azure/src/health_azure/himl.py
+++ b/hi-ml-azure/src/health_azure/himl.py
@ -27,7 +27,7 @@ from azureml.data.dataset_consumption_config import DatasetConsumptionConfig
 from azureml.train.hyperdrive import HyperDriveConfig, GridParameterSampling, PrimaryMetricGoal, choice
 from azureml.dataprep.fuse.daemon import MountContext

-from health_azure.utils import (create_python_environment, create_run_recovery_id, _find_file,
+from health_azure.utils import (create_python_environment, create_run_recovery_id, find_file_in_parent_to_pythonpath,
                                is_run_and_child_runs_completed, is_running_in_azure_ml, register_environment,
                                run_duration_string_to_seconds, to_azure_friendly_string, RUN_CONTEXT, get_workspace,
                                PathOrString, DEFAULT_ENVIRONMENT_VARIABLES)
@ -178,7 +178,7 @@ def create_run_configuration(workspace: Workspace,


 def create_crossval_hyperdrive_config(num_splits: int,
-                                      cross_val_index_arg_name: str = "cross_validation_split_index",
+                                      cross_val_index_arg_name: str = "crossval_index",
                                      metric_name: str = "val/loss") -> HyperDriveConfig:
    """
    Creates an Azure ML HyperDriveConfig object for running cross validation. Note: this config expects a metric
@ -186,20 +186,21 @@ def create_crossval_hyperdrive_config(num_splits: int,
    https://docs.microsoft.com/en-us/azure/machine-learning/how-to-tune-hyperparameters#log-metrics-for-hyperparameter-tuning))

    :param num_splits: The number of splits for k-fold cross validation
-    :param cross_val_index_arg_name: The name of the argument received by each of the child runs that indicates which
-        split that child represents.
+    :param cross_val_index_arg_name: The name of the commandline argument that each of the child runs gets, to
+        indicate which split they should work on.
    :param metric_name: The name of the metric that the HyperDriveConfig will compare runs by. Please note that it is
        your responsibility to make sure a metric with this name is logged to the Run in your training script
    :return: an Azure ML HyperDriveConfig object
    """
-    logging.info(f"Creating a HyperDriveConfig. Please be aware that this expects to find the metric {metric_name}"
-                 f" logged to the Run during your training script.")
+    logging.info(f"Creating a HyperDriveConfig. Please note that this expects to find the specified "
+                 f"metric '{metric_name}' logged to AzureML from your training script (for example, using the "
+                 f"AzureMLLogger with Pytorch Lightning)")
+    parameter_dict = {
+        cross_val_index_arg_name: choice(list(range(num_splits))),
+    }
    return HyperDriveConfig(
        run_config=ScriptRunConfig(""),
-        hyperparameter_sampling=GridParameterSampling(
-            {
-                cross_val_index_arg_name: choice(list(range(num_splits)))
-            }),
+        hyperparameter_sampling=GridParameterSampling(parameter_dict),
        primary_metric_name=metric_name,
        primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
        max_total_runs=num_splits
@ -337,7 +338,8 @@ def submit_to_azure_if_needed(  # type: ignore
        submit_to_azureml: Optional[bool] = None,
        tags: Optional[Dict[str, str]] = None,
        after_submission: Optional[Callable[[Run], None]] = None,
-        hyperdrive_config: Optional[HyperDriveConfig] = None
+        hyperdrive_config: Optional[HyperDriveConfig] = None,
+        create_output_folders: bool = True,
 ) -> AzureRunInfo:  # pragma: no cover
    """
    Submit a folder to Azure, if needed and run it.
@ -390,6 +392,7 @@ def submit_to_azure_if_needed(  # type: ignore
        for local execution (i.e., return immediately) will be executed. If not provided (None), submission to AzureML
        will be triggered if the commandline flag '--azureml' is present in sys.argv
    :param hyperdrive_config: A configuration object for Hyperdrive (hyperparameter search).
+    :param create_output_folders: If True (default), create folders "outputs" and "logs" in the current working folder.
    :return: If the script is submitted to AzureML then we terminate python as the script should be executed in AzureML,
        otherwise we return a AzureRunInfo object.
    """
@ -448,7 +451,7 @@ def submit_to_azure_if_needed(  # type: ignore
    workspace = get_workspace(aml_workspace, workspace_config_path)

    if conda_environment_file is None:
-        conda_environment_file = _find_file(CONDA_ENVIRONMENT_FILE)
+        conda_environment_file = find_file_in_parent_to_pythonpath(CONDA_ENVIRONMENT_FILE)
    conda_environment_file = _str_to_path(conda_environment_file)

    logging.info(f"Loaded AzureML workspace {workspace.name}")
--- a/hi-ml-azure/src/health_azure/paths.py
+++ b/hi-ml-azure/src/health_azure/paths.py
@ -0,0 +1,48 @@
+#  ------------------------------------------------------------------------------------------
+#  Copyright (c) Microsoft Corporation. All rights reserved.
+#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+#  ------------------------------------------------------------------------------------------
+
+import logging
+from pathlib import Path
+
+ENVIRONMENT_YAML_FILE_NAME = "environment.yml"
+
+REPO_HIML_FOLDER = "hi-ml"
+REPO_HIML_AZURE_FOLDER = "hi-ml-azure"
+
+
+def is_himl_used_from_git_repo() -> bool:
+    """Returns False if HI-ML was installed as a package into site-packages. Returns True if the HI-ML codebase is
+    used from a clone of the full git repository.
+
+    :return: False if HI-ML is installed as a package, True if used via source from git.
+    :rtype: bool
+    """
+    health_ml_root = Path(__file__).parent.parent
+    logging.debug(f"health_ml root: {health_ml_root}")
+    if health_ml_root.parent.stem == "site-packages":
+        return False
+    himl_root = health_ml_root.parent.parent
+    # These two folder are present in the top-level folder of the git repo
+    expected_folders = [REPO_HIML_FOLDER, REPO_HIML_AZURE_FOLDER]
+    all_folders_exist = all((himl_root / folder).is_dir() for folder in expected_folders)
+    if all_folders_exist:
+        return True
+    raise ValueError(
+        "Unable to determine the installation status: Code is not used from site-packages, but the "
+        "expected top-level folders are not present?"
+    )
+
+
+def git_repo_root_folder() -> Path:
+    """
+    Attempts to return the path to the top-level hi-ml repo that contains the hi-ml and hi-ml-azure packages.
+    This top level repo will only be present if hi-ml has been installed as a git submodule, or the repo has
+    been directly downloaded. Otherwise (e.g.if hi-ml has been installed as a pip package) returns None
+
+    return: Path to the himl root dir if it exists, else None
+    """
+    if not is_himl_used_from_git_repo():
+        raise ValueError("This function can only be used if the HI-ML package is used directly from the git repo.")
+    return Path(__file__).parent.parent.parent.parent
--- a/hi-ml-azure/src/health_azure/utils.py
+++ b/hi-ml-azure/src/health_azure/utils.py
@ -65,6 +65,13 @@ RUN_CONTEXT = Run.get_context()
 PARENT_RUN_CONTEXT = getattr(RUN_CONTEXT, "parent", None)
 WORKSPACE_CONFIG_JSON = "config.json"

+# Names for sections in a Conda environment definition
+CONDA_NAME = "name"
+CONDA_CHANNELS = "channels"
+CONDA_DEPENDENCIES = "dependencies"
+CONDA_PIP = "pip"
+
+
 # By default, define several environment variables that work around known issues in the software stack
 DEFAULT_ENVIRONMENT_VARIABLES = {
    "AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC": "3600",
@ -112,14 +119,15 @@ class GenericConfig(param.Parameterized):
        """
        # check if illegal arguments are passed in
        legal_params = self.get_overridable_parameters()
-        illegal = [k for k, v in params.items() if (k in self.params().keys()) and (k not in legal_params)]
+        current_param_names = self.param.values().keys()
+        illegal = [k for k, v in params.items() if (k in current_param_names) and (k not in legal_params)]

        if illegal:
            raise ValueError(f"The following parameters cannot be overridden as they are either "
                             f"readonly, constant, or private members : {illegal}")
        if throw_if_unknown_param:
            # check if parameters not defined by the config class are passed in
-            unknown = [k for k, v in params.items() if (k not in self.params().keys())]
+            unknown = [k for k, v in params.items() if (k not in current_param_names)]
            if unknown:
                raise ValueError(f"The following parameters do not exist: {unknown}")
        # set known arguments
@ -358,7 +366,7 @@ def get_overridable_parameters(config: Any) -> Dict[str, param.Parameter]:
    :return: A dictionary of parameter names and their definitions.
    """
    assert isinstance(config, param.Parameterized)
-    return dict((k, v) for k, v in config.params().items()
+    return dict((k, v) for k, v in config.param.params().items()
                if reason_not_overridable(v) is None)


@ -424,16 +432,17 @@ def report_on_overrides(config: Any, overrides_to_apply: Dict[str, Any], keys_to
    :param keys_to_ignore: set of dictionary keys not to report on
    """
    assert isinstance(config, param.Parameterized)
+    current_params = config.param.params()
    for key, desired in overrides_to_apply.items():
        if key in keys_to_ignore:
            continue
        actual = getattr(config, key, None)
        if actual == desired:
            continue
-        if key not in config.params():
+        if key not in current_params:
            reason = "parameter is undefined"
        else:
-            val = config.params()[key]
+            val = current_params[key]
            reason = reason_not_overridable(val)  # type: ignore
            if reason is None:
                reason = "for UNKNOWN REASONS"
@ -456,7 +465,7 @@ def create_from_matching_params(from_object: param.Parameterized, cls_: Type[T])
    c = cls_()
    if not isinstance(c, param.Parameterized):
        raise ValueError(f"The created object must be a subclass of param.Parameterized, but got {type(c)}")
-    for param_name, p in c.params().items():
+    for param_name, p in c.param.params().items():
        if not p.constant and not p.readonly:
            setattr(c, param_name, getattr(from_object, param_name))
    return c
@ -648,38 +657,39 @@ def determine_run_id_type(run_or_recovery_id: str) -> str:
    return run_or_recovery_id


-def _find_file(file_name: str, stop_at_pythonpath: bool = True) -> Optional[Path]:
+def find_file_in_parent_folders(file_name: str, stop_at_path: List[Path]) -> Optional[Path]:
+    """Searches for a file of the given name in the current working directory, or any of its parent folders.
+    Searching stops if either the file is found, or no parent folder can be found, or the search has reached any
+    of the given folders in stop_at_path.
+
+    :param file_name: The name of the file to find.
+    :param stop_at_path: A list of folders. If any of them is reached, search stops.
+    :return: The absolute path of the file if found, or None if it was not found.
    """
-    Recurse up the file system, starting at the current working directory, to find a file. Optionally stop when we hit
-    the PYTHONPATH root (defaults to stopping).
-
-    :param file_name: The file name of the file to find.
-    :param stop_at_pythonpath: (Defaults to True.) Whether to stop at the PYTHONPATH root.
-    :return: The path to the file, or None if it cannot be found.
-    """
-
-    def return_file_or_parent(
-            start_at: Path,
-            file_name: str,
-            stop_at_pythonpath: bool,
-            pythonpaths: List[Path]) -> Optional[Path]:
-
-        logging.info(f"Searching for file {file_name} in {start_at}")
+    def return_file_or_parent(start_at: Path) -> Optional[Path]:
+        logging.debug(f"Searching for file {file_name} in {start_at}")
        expected = start_at / file_name
        if expected.is_file() and expected.name == file_name:
            return expected
-        if start_at.parent == start_at or start_at in pythonpaths:
+        if start_at.parent == start_at or start_at in stop_at_path:
            return None
-        return return_file_or_parent(start_at.parent, file_name, stop_at_pythonpath, pythonpaths)
+        return return_file_or_parent(start_at.parent)

+    return return_file_or_parent(start_at=Path.cwd())
+
+
+def find_file_in_parent_to_pythonpath(file_name: str) -> Optional[Path]:
+    """
+    Recurse up the file system, starting at the current working directory, to find a file. Stop when we hit
+    any of the folders in PYTHONPATH.
+
+    :param file_name: The file name of the file to find.
+    :return: The path to the file, or None if it cannot be found.
+    """
    pythonpaths: List[Path] = []
    if 'PYTHONPATH' in os.environ:
        pythonpaths = [Path(path_string) for path_string in os.environ['PYTHONPATH'].split(os.pathsep)]
-    return return_file_or_parent(
-        start_at=Path.cwd(),
-        file_name=file_name,
-        stop_at_pythonpath=stop_at_pythonpath,
-        pythonpaths=pythonpaths)
+    return find_file_in_parent_folders(file_name=file_name, stop_at_path=pythonpaths)


 def get_workspace(aml_workspace: Optional[Workspace] = None, workspace_config_path: Optional[Path] = None) -> Workspace:
@ -705,7 +715,7 @@ def get_workspace(aml_workspace: Optional[Workspace] = None, workspace_config_pa
        return aml_workspace

    if workspace_config_path is None:
-        workspace_config_path = _find_file(WORKSPACE_CONFIG_JSON)
+        workspace_config_path = find_file_in_parent_to_pythonpath(WORKSPACE_CONFIG_JSON)
        if workspace_config_path:
            logging.info(f"Using the workspace config file {str(workspace_config_path.absolute())}")
        else:
@ -715,7 +725,9 @@ def get_workspace(aml_workspace: Optional[Workspace] = None, workspace_config_pa
        raise ValueError("Workspace config path is not a path, check your input.")
    elif workspace_config_path.is_file():
        auth = get_authentication()
-        return Workspace.from_config(path=str(workspace_config_path), auth=auth)
+        workspace = Workspace.from_config(path=str(workspace_config_path), auth=auth)
+        logging.info(f"Logged into AzureML workspace {workspace.name}")
+        return workspace

    raise ValueError("Workspace config file does not exist or cannot be read.")

@ -902,6 +914,52 @@ def _retrieve_unique_deps(dependencies: List[str], keep_method: str = "first") -
    return unique_deps_list


+def _get_pip_dependencies(parsed_yaml: Any) -> Optional[Tuple[int, List[Any]]]:
+    """Gets the first pip dependencies section of a Conda yaml file. Returns the index at which the pip section
+    was found, and the pip section itself. If no pip section was found, returns None
+    """
+    if CONDA_DEPENDENCIES in parsed_yaml:
+        for i, dep in enumerate(parsed_yaml.get(CONDA_DEPENDENCIES)):
+            if isinstance(dep, dict) and CONDA_PIP in dep:
+                return i, dep[CONDA_PIP]
+    return None
+
+
+def is_pip_include_dependency(package: str) -> bool:
+    """Returns True if the given package name (as used in a Conda environment file) relies on PIP includes,
+    in the format "-r requirements.txt"
+
+    :param package: The name of the PIP dependency to check.
+    :return: True if the package name is a PIP include statement.
+    """
+    return package.strip().startswith("-r ")
+
+
+def is_conda_file_with_pip_include(conda_file: Path) -> Tuple[bool, Dict]:
+    """Checks if the given Conda environment file uses the "include" syntax in the pip section, like
+    `-r requirements.txt`. If it uses pip includes, the function returns True and a modified Conda yaml
+    without all the pip include statements. If no pip include statements are found, False is returned and the
+    unmodified Conda yaml.
+
+    :param conda_file: The path of a Conda environment file.
+    :return: True if the file uses pip includes, False if not. Seconda return value is the modified Conda environment
+    without the PIP include statements.
+    """
+    conda_yaml = conda_merge.read_file(str(conda_file))
+    pip_dep = _get_pip_dependencies(conda_yaml)
+    if pip_dep is not None:
+        pip_index, pip = pip_dep
+        pip_without_include = [package for package in pip if not is_pip_include_dependency(package)]
+        if len(pip) != len(pip_without_include):
+            if len(pip_without_include) == 0:
+                # Avoid empty PIP dependencies section, this causes a failure in conda_merge
+                conda_yaml.get(CONDA_DEPENDENCIES).pop(pip_index)
+            else:
+                conda_yaml.get(CONDA_DEPENDENCIES)[pip_index] = {CONDA_PIP: pip_without_include}
+            return True, conda_yaml
+    return False, conda_yaml
+
+
 def merge_conda_files(conda_files: List[Path], result_file: Path, pip_files: Optional[List[Path]] = None,
                      pip_clash_keep_method: str = "first") -> None:
    """
@ -914,43 +972,42 @@ def merge_conda_files(conda_files: List[Path], result_file: Path, pip_files: Opt
    :param pip_clash_keep_method: If two or more pip packages are specified with the same name, this determines
        which one should be kept. Current options: ['first', 'last']
    """
-    env_definitions = [conda_merge.read_file(str(f)) for f in conda_files]
+    env_definitions: List[Any] = []
+    for file in conda_files:
+        _, pip_without_include = is_conda_file_with_pip_include(file)
+        env_definitions.append(pip_without_include)
    unified_definition = {}
-    NAME = "name"
-    CHANNELS = "channels"
-    DEPENDENCIES = "dependencies"

    extra_pip_deps = []
    for pip_file in pip_files or []:
-        with open(pip_file, "r") as f_path:
-            additional_pip_deps = [d for d in f_path.read().split("\n") if d]
-            extra_pip_deps.extend(additional_pip_deps)
+        additional_pip_deps = [d for d in pip_file.read_text().split("\n") if d and not is_pip_include_dependency(d)]
+        extra_pip_deps.extend(additional_pip_deps)

-    name = conda_merge.merge_names(env.get(NAME) for env in env_definitions)
+    name = conda_merge.merge_names(env.get(CONDA_NAME) for env in env_definitions)
    if name:
-        unified_definition[NAME] = name
+        unified_definition[CONDA_NAME] = name

    try:
-        channels = conda_merge.merge_channels(env.get(CHANNELS) for env in env_definitions)
+        channels = conda_merge.merge_channels(env.get(CONDA_CHANNELS) for env in env_definitions)
    except conda_merge.MergeError:
        logging.error("Failed to merge channel priorities.")
        raise
    if channels:
-        unified_definition[CHANNELS] = channels
+        unified_definition[CONDA_CHANNELS] = channels

    try:
-        deps_to_merge = [env.get(DEPENDENCIES) for env in env_definitions]
+        deps_to_merge = [env.get(CONDA_DEPENDENCIES) for env in env_definitions]
        if len(extra_pip_deps) > 0:
-            deps_to_merge.extend([[{"pip": extra_pip_deps}]])
+            deps_to_merge.append([{CONDA_PIP: extra_pip_deps}])
        deps = conda_merge.merge_dependencies(deps_to_merge)

        # Remove duplicated pip packages from merged dependencies sections. Note that for a package that is
        # duplicated, the first value encountered will be retained.
-        pip_deps_entries = [d for d in deps if isinstance(d, dict) and "pip" in d]  # type: ignore
+        pip_deps_entries = [d for d in deps if isinstance(d, dict) and CONDA_PIP in d]  # type: ignore
        if len(pip_deps_entries) == 0:
            raise ValueError("Didn't find a dictionary with the key 'pip' in the list of dependencies")
        pip_deps_entry: Dict[str, List[str]] = pip_deps_entries[0]
-        pip_deps = pip_deps_entry["pip"]
+        pip_deps = pip_deps_entry[CONDA_PIP]
        # temporarily remove pip dependencies from deps to be added back after deduplicaton
        deps.remove(pip_deps_entry)

@ -960,13 +1017,13 @@ def merge_conda_files(conda_files: List[Path], result_file: Path, pip_files: Opt
        unique_pip_deps = _retrieve_unique_deps(pip_deps, keep_method=pip_clash_keep_method)

        # finally add back the deduplicated list of dependencies
-        unique_deps.append({"pip": unique_pip_deps})  # type: ignore
+        unique_deps.append({CONDA_PIP: unique_pip_deps})  # type: ignore

    except conda_merge.MergeError:
        logging.error("Failed to merge dependencies.")
        raise
    if unique_deps:
-        unified_definition[DEPENDENCIES] = unique_deps
+        unified_definition[CONDA_DEPENDENCIES] = unique_deps
    else:
        raise ValueError("No dependencies found in any of the conda files.")

@ -1439,7 +1496,7 @@ def _get_runs_from_script_config(script_config: AmlRunScriptConfig, workspace: W
    if script_config.run is None:
        if script_config.experiment is None:
            # default to latest run file
-            latest_run_file = _find_file("most_recent_run.txt")
+            latest_run_file = find_file_in_parent_to_pythonpath("most_recent_run.txt")
            if latest_run_file is None:
                raise ValueError("Could not find most_recent_run.txt")
            runs = [get_most_recent_run(latest_run_file, workspace)]
@ -1678,7 +1735,7 @@ def aml_workspace_for_unittests() -> Workspace:
    is found, the workspace details are read from environment variables. Authentication information is also read
    from environment variables.
    """
-    config_json = _find_file(WORKSPACE_CONFIG_JSON)
+    config_json = find_file_in_parent_to_pythonpath(WORKSPACE_CONFIG_JSON)
    if config_json is not None:
        return Workspace.from_config(path=str(config_json))
    else:
--- a/hi-ml-azure/test_environment.yml
+++ b/hi-ml-azure/test_environment.yml
@ -1,6 +0,0 @@
-name: test-env
-dependencies:
-  - pip=20.1.1
-  - python=3.7.3
-  - pip:
-      - -r ../test_requirements.txt
--- a/hi-ml-azure/testazure/conftest.py
+++ b/hi-ml-azure/testazure/conftest.py
@ -3,13 +3,19 @@
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
 import shutil
+import sys
 import uuid
 from pathlib import Path
 from typing import Generator

 import pytest

-from health_azure.himl import _package_setup
+full_folder = str(Path(__file__).parent.parent / "src")
+if full_folder not in sys.path:
+    print(f"Adding to sys.path for running hi-ml-azure: {full_folder}")
+    sys.path.insert(0, str(full_folder))
+
+from health_azure.himl import _package_setup  # noqa: E402


 def outputs_for_tests() -> Path:
--- a/hi-ml-azure/testazure/testazure/test_azure_util.py
+++ b/hi-ml-azure/testazure/testazure/test_azure_util.py
@ -29,6 +29,7 @@ from azureml.core import Experiment, Run, ScriptRunConfig, Workspace
 from azureml.core.authentication import ServicePrincipalAuthentication
 from azureml.core.environment import CondaDependencies
 from azureml.data.azure_storage_datastore import AzureBlobDatastore
+from health_azure import paths

 import health_azure.utils as util
 from health_azure.himl import AML_IGNORE_FILE, append_to_amlignore
@ -61,10 +62,10 @@ def test_find_file(tmp_path: Path) -> None:
    start_path.mkdir(exist_ok=False)
    where_are_we_now = Path.cwd()
    os.chdir(start_path)
-    found_file = util._find_file(file_name, False)
+    found_file = util.find_file_in_parent_to_pythonpath(file_name)
    assert found_file
    with mock.patch.dict(os.environ, {"PYTHONPATH": str(python_root.absolute())}):
-        found_file = util._find_file(file_name)
+        found_file = util.find_file_in_parent_to_pythonpath(file_name)
        assert not found_file
    os.chdir(where_are_we_now)

@ -384,6 +385,110 @@ dependencies:
            util.merge_conda_files(files, merged_file)


+def test_merge_conda_pip_include(random_folder: Path) -> None:
+    """
+    Tests the logic to exclude PIP include statements from Conda environments.
+    """
+    env1 = """
+channels:
+  - default
+dependencies:
+  - conda_both=3.0
+  - pip:
+      - -r requirements.txt
+      - foo==1.0
+"""
+    file1 = random_folder / "env1.yml"
+    file1.write_text(env1)
+    merged_file = random_folder / "merged.yml"
+    util.merge_conda_files([file1], merged_file)
+    merged_contents = merged_file.read_text()
+    assert "-r requirements.txt" not in merged_contents
+
+    file2 = random_folder / "requirements.txt"
+    file2.write_text("package==1.0.0")
+    merged_file2 = random_folder / "merged2.yml"
+    util.merge_conda_files([file1], merged_file2, pip_files=[file2])
+    merged_contents2 = merged_file2.read_text()
+    assert merged_contents2 == """channels:
+- default
+dependencies:
+- conda_both=3.0
+- pip:
+  - foo==1.0
+  - package==1.0.0
+"""
+
+
+def test_merge_conda_pip_include2(random_folder: Path) -> None:
+    """
+    Tests the logic to exclude PIP include statements from Conda environments, on the root level environment file.
+    """
+    if paths.is_himl_used_from_git_repo():
+        root_yaml = paths.git_repo_root_folder() / paths.ENVIRONMENT_YAML_FILE_NAME
+        requirements = paths.git_repo_root_folder() / "hi-ml-azure" / "run_requirements.txt"
+        merged_file2 = random_folder / "merged2.yml"
+        util.merge_conda_files([root_yaml], merged_file2, pip_files=[requirements])
+
+
+def assert_pip_length(yaml: Any, expected_length: int) -> None:
+    """Checks if the pip dependencies section of a Conda YAML file has the expected number of entries
+    """
+    pip = util._get_pip_dependencies(yaml)
+    assert pip is not None
+    assert len(pip[1]) == expected_length
+
+
+@pytest.mark.fast
+def test_pip_include_1() -> None:
+    """Test if Conda files that use PIP include are handled correctly. This uses the top-level environment.yml
+    file in the repository.
+    """
+    if paths.is_himl_used_from_git_repo():
+        root_yaml = paths.git_repo_root_folder() / paths.ENVIRONMENT_YAML_FILE_NAME
+        assert root_yaml.is_file()
+        original_yaml = conda_merge.read_file(root_yaml)
+        # At the time of writing, the top-level environment file only had 4 include statements in the pip
+        # section, they should all be filtered out.
+        assert_pip_length(original_yaml, 4)
+        uses_pip_include, modified_yaml = util.is_conda_file_with_pip_include(root_yaml)
+        assert uses_pip_include
+        pip = util._get_pip_dependencies(modified_yaml)
+        # The pip section of the top-level yaml has nothing but include statements, so after filtering the
+        # pip section is empty. In this case, no pip section shoudld be present at all.
+        assert pip is None
+
+
+@pytest.mark.fast
+def test_pip_include_2(tmp_path: Path) -> None:
+    """Test if Conda files that use PIP include are recognized.
+    """
+    # Environment file without a "-r" include statement
+    conda_str = """name: simple-envpip
+dependencies:
+  - pip:
+    - azureml-sdk==1.23.0
+  - more_conda
+"""
+    tmp_conda = tmp_path / "env.yml"
+    tmp_conda.write_text(conda_str)
+    uses_pip_include, modified_yaml = util.is_conda_file_with_pip_include(tmp_conda)
+    assert not uses_pip_include
+    assert_pip_length(modified_yaml, 1)
+
+    # Environment file that has a "-r" include statement
+    conda_str = """name: simple-env
+dependencies:
+  - pip:
+    - -r foo.txt
+    - any_package
+"""
+    tmp_conda.write_text(conda_str)
+    uses_pip_include, modified_yaml = util.is_conda_file_with_pip_include(tmp_conda)
+    assert uses_pip_include
+    assert util._get_pip_dependencies(modified_yaml) == (0, ["any_package"])
+
+
@pytest.mark.parametrize(["s", "expected"],
                         [
                             ("1s", 1),
@ -392,6 +497,7 @@ dependencies:
                             ("1.0d", 24 * 3600),
                             ("", None),
                         ])  # NOQA
+@pytest.mark.fast
 def test_run_duration(s: str, expected: Optional[float]) -> None:
    actual = util.run_duration_string_to_seconds(s)
    assert actual == expected
@ -399,11 +505,13 @@ def test_run_duration(s: str, expected: Optional[float]) -> None:
        assert isinstance(actual, int)


+@pytest.mark.fast
 def test_run_duration_fails() -> None:
    with pytest.raises(Exception):
        util.run_duration_string_to_seconds("17b")


+@pytest.mark.fast
 def test_repository_root() -> None:
    root = repository_root()
    assert (root / "SECURITY.md").is_file()
--- a/hi-ml-histopathology/Makefile
+++ b/hi-ml-histopathology/Makefile
@ -17,9 +17,12 @@ pip_build:
 pip_test:
 	$(call call_parent,pip_test)

-# pip install local package in editable mode for development and testing
+# pip install all requirements for histo, read off the Conda file. This is somewhat hacky,
+# we could also build a full Conda before starting the tests. Unclear about the performance
+# impact of that.
 call_pip_local:
-	ls
+	sed -e '1,/pip:/ d' environment.yml | cut -d "-" -f 2- > temp_requirements.txt
+	pip install -r temp_requirements.txt

 # pip upgrade and install local package in editable mode
 pip_local: pip_upgrade call_pip_local
@ -38,13 +41,6 @@ clean:
 	rm -vrf ./testhisto/testhisto/test_outputs ./testhistotestSSL/test_ouputs
 	rm -vf ./coverage ./coverage.txt ./coverage.xml

-# build package, assuming build requirements already installed
-call_build:
-	ls
-
-# pip install build requirements and build package
-build: pip_build call_build
-
 # run flake8, assuming test requirements already installed
 call_flake8:
 	flake8 --count --statistics --config=../.flake8 .
@ -91,4 +87,4 @@ call_pytest_and_coverage:
 pytest_and_coverage: pip_test call_pytest_and_coverage

 # install test requirements and run all tests
-test_all: pip_test call_flake8 call_mypy call_pytest_and_coverage
+test_all: pip_test call_flake8 call_mypy call_pytest_and_coverage
--- a/hi-ml-histopathology/README.md
+++ b/hi-ml-histopathology/README.md
@ -0,0 +1,9 @@
+# Histopathology Models and Workflows
+
+## Getting started
+
+- Build environment
+- Download config to AzureML workspace
+- Run a first workflow.
+
+To be completed.
--- a/hi-ml-histopathology/environment.yml
+++ b/hi-ml-histopathology/environment.yml
@ -4,29 +4,29 @@ channels:
  - pytorch
  - conda-forge
 dependencies:
-  - cudatoolkit=11.1
+  - cudatoolkit=11.3.1
  - pip=20.1.1
  - python=3.7.3
  - pytorch=1.10.0
  - python-blosc==1.7.0
  - torchvision=0.11.1
  - pip:
-      - -r ../test_requirements.txt
-      - azureml-sdk==1.36.0
-      - cryptography==3.3.2
-      - docker==4.3.1
-      - flask==2.0.1
-      - gputil==1.4.0
      - hi-ml>=0.1.12
      - joblib==0.16.0
      - jupyter==1.0.0
      - jupyter-client==6.1.5
+      - lightning-bolts==0.4.0
      - mlflow==1.17.0
+      - monai==0.6.0
+      - more-itertools==8.10.0
      - mypy-extensions==0.4.3
      - numba==0.51.2
      - numpy==1.19.1
      - opencv-python-headless==4.5.1.48
+      - pandas==1.3.4
+      - pillow==9.0.0
      - psutil==5.7.2
+      - pydicom==2.0.0
      - pyflakes==2.2.0
      - PyJWT==1.7.1
      - rich==5.1.1
@ -34,8 +34,12 @@ dependencies:
      - runstats==1.8.0
      - scikit-image==0.17.2
      - scipy==1.5.2
+      - simpleitk==1.2.4
      - six==1.15.0
      - stopit==1.1.2
      - tabulate==0.8.7
-      - torchprof==1.3.3
+      - torch>=1.10.0
+      - torchvision>=0.11.1
+      - torchmetrics==0.6.0
      - umap-learn==0.5.2
+      - yacs==0.1.8
--- a/hi-ml-histopathology/pytest.ini
+++ b/hi-ml-histopathology/pytest.ini
@ -3,6 +3,6 @@ testpaths = testhisto testSSL
 norecursedirs = docs logs outputs test_data
 log_cli = true
 log_cli_level = DEBUG
-adopts = --strict-markers
+addopts = --strict-markers
 markers =
    fast: Tests that should run very fast, and can act as smoke tests to see if something goes terribly wrong.
--- a/hi-ml-histopathology/src/histopathology/configs/classification/BaseMIL.py
+++ b/hi-ml-histopathology/src/histopathology/configs/classification/BaseMIL.py
@ -51,6 +51,8 @@ class BaseMIL(LightningContainer):
                                                 "`none` (default),`cpu`, `gpu`")
    encoding_chunk_size: int = param.Integer(0, doc="If > 0 performs encoding in chunks, by loading"
                                             "enconding_chunk_size tiles per chunk")
+    is_finetune: bool = param.Boolean(False, doc="If True, fine-tune the encoder during training. If False, "
+                                      "keep the encoder frozen.")
    # local_dataset (used as data module root_path) is declared in DatasetParams superclass

    @property
--- a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILECrck.py
+++ b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILECrck.py
@ -58,8 +58,8 @@ class DeepSMILECrck(BaseMIL):
            # declared in TrainerParams:
            max_epochs=50,
            # declared in WorkflowParams:
-            # number_of_cross_validation_splits=5,
-            # cross_validation_split_index=0,
+            # crossval_count=5,
+            # crossval_index=0,
            # declared in OptimizerParams:
            l_rate=5e-4,
            weight_decay=1e-4,
@ -122,8 +122,8 @@ class DeepSMILECrck(BaseMIL):
            cache_mode=self.cache_mode,
            precache_location=self.precache_location,
            cache_dir=self.cache_dir,
-            number_of_cross_validation_splits=self.number_of_cross_validation_splits,
-            cross_validation_split_index=self.cross_validation_split_index,
+            crossval_count=self.crossval_count,
+            crossval_index=self.crossval_index,
        )

    def get_callbacks(self) -> List[Callback]:
--- a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py
+++ b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py
@ -60,8 +60,8 @@ class DeepSMILEPanda(BaseMIL):
            # use_mixed_precision = True,

            # declared in WorkflowParams:
-            number_of_cross_validation_splits=5,
-            cross_validation_split_index=0,
+            crossval_count=5,
+            crossval_index=0,

            # declared in OptimizerParams:
            l_rate=5e-4,
@ -125,8 +125,8 @@ class DeepSMILEPanda(BaseMIL):
            cache_mode=self.cache_mode,
            precache_location=self.precache_location,
            cache_dir=self.cache_dir,
-            # number_of_cross_validation_splits=self.number_of_cross_validation_splits,
-            # cross_validation_split_index=self.cross_validation_split_index,
+            # crossval_count=self.crossval_count,
+            # crossval_index=self.crossval_index,
        )

    # TODO: move self.class_names somewhere else since this is almost an exact copy of create_model in BaseMIL
--- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py
+++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py
@ -39,8 +39,8 @@ class TilesDataModule(LightningDataModule):
                 cache_mode: CacheMode = CacheMode.NONE,
                 precache_location: CacheLocation = CacheLocation.NONE,
                 cache_dir: Optional[Path] = None,
-                 number_of_cross_validation_splits: int = 0,
-                 cross_validation_split_index: int = 0) -> None:
+                 crossval_count: int = 0,
+                 crosval_index: int = 0) -> None:
        """
        :param root_path: Root directory of the source dataset.
        :param max_bag_size: Upper bound on number of tiles in each loaded bag. If 0 (default),
@ -67,8 +67,8 @@ class TilesDataModule(LightningDataModule):
          device it was saved from;
        If cache_mode is `DISK` precache_location `CPU` and `GPU` are equivalent.
        :param cache_dir: The directory onto which to cache data if caching is enabled.
-        :param number_of_cross_validation_splits: Number of folds to perform.
-        :param cross_validation_split_index: Index of the cross validation split to be performed.
+        :param crossval_count: Number of folds to perform.
+        :param crosval_index: Index of the cross validation split to be performed.
        """
        if precache_location is not CacheLocation.NONE and cache_mode is CacheMode.NONE:
            raise ValueError("Can only pre-cache if caching is enabled")
@ -85,8 +85,8 @@ class TilesDataModule(LightningDataModule):
        self.precache_location = precache_location
        self.cache_dir = cache_dir
        self.batch_size = batch_size
-        self.number_of_cross_validation_splits = number_of_cross_validation_splits
-        self.cross_validation_split_index = cross_validation_split_index
+        self.crossval_count = crossval_count
+        self.crosval_index = crosval_index
        self.train_dataset, self.val_dataset, self.test_dataset = self.get_splits()
        self.class_weights = self.train_dataset.get_class_weights()
        self.seed = seed
--- a/hi-ml-histopathology/src/histopathology/datamodules/tcga_crck_module.py
+++ b/hi-ml-histopathology/src/histopathology/datamodules/tcga_crck_module.py
@ -31,9 +31,9 @@ class TcgaCrckTilesDataModule(TilesDataModule):
                                                group_column=trainval_dataset.SLIDE_ID_COLUMN,
                                                random_seed=5)

-        # if self.number_of_cross_validation_splits > 1:
+        # if self.crossval_count > 1:
        #     # Function get_k_fold_cross_validation_splits() will concatenate train and val splits
-        #     splits = splits.get_k_fold_cross_validation_splits(self.number_of_cross_validation_splits)
+        #     splits = splits.get_k_fold_cross_validation_splits(self.crossval_count)
        # [self.cross_validation_split_index]

        return (TcgaCrck_TilesDataset(self.root_path, dataset_df=splits.train),
--- a/hi-ml-histopathology/testSSL/testSSL/test_ssl_containers.py
+++ b/hi-ml-histopathology/testSSL/testSSL/test_ssl_containers.py
@ -116,7 +116,7 @@ def test_ssl_container_cifar10_resnet_simclr() -> None:
        - checkpoint loading and ImageClassifier module creation
        - training of image classifier for one epoch.
    """
-    model_namespace_simclr = "hi-ml-histopathology.SSL.configs.CIFAR10SimCLR"
+    model_namespace_simclr = "SSL.configs.CIFAR10SimCLR"
    args = common_test_args + [f"--model={model_namespace_simclr}"]
    runner = default_runner()
    with check_config_json(Path.cwd()):
@ -160,7 +160,7 @@ def test_ssl_container_cifar10_resnet_simclr() -> None:
    assert SslOnlineEvaluatorHiml.EVALUATOR_STATE_NAME in callback_state

    # Now run the actual SSL classifier off the stored checkpoint
-    model_namespace_cifar = "hi-ml-histopathology.SSL.configs.SSLClassifierCIFAR"
+    model_namespace_cifar = "SSL.configs.SSLClassifierCIFAR"
    args = common_test_args + [f"--model={model_namespace_cifar}",
                               f"--local_ssl_weights_path={checkpoint_path}"]
    with check_config_json(Path.cwd()):
@ -180,7 +180,7 @@ def test_load_ssl_container_cifar10_cifar100_resnet_byol() -> None:
    Tests that the parameters feed into the BYOL model and online evaluator are
    indeed the one we fed through our command line args
    """
-    model_namespace_byol = "hi-ml-histopathology.SSL.configs.CIFAR10CIFAR100BYOL"
+    model_namespace_byol = "SSL.configs.CIFAR10CIFAR100BYOL"
    args = common_test_args + [f"--model={model_namespace_byol}"]
    runner = default_runner()
    with mock.patch("sys.argv", args):
@ -199,7 +199,7 @@ def test_ssl_container_rsna() -> None:
    runner = default_runner()
    path_to_cxr_test_dataset = TEST_OUTPUTS_PATH / "cxr_test_dataset"
    # Test training of SSL model
-    model_namespace_byol = "hi-ml-histopathology.SSL.configs.NIH_RSNA_BYOL"
+    model_namespace_byol = "SSL.configs.NIH_RSNA_BYOL"
    args = common_test_args + [f"--model={model_namespace_byol}",
                               f"--local_datasets={str(path_to_cxr_test_dataset)},{str(path_to_cxr_test_dataset)}",
                               "--use_balanced_binary_loss_for_linear_head=True",
@ -249,7 +249,7 @@ def test_ssl_container_rsna() -> None:

    # Check that we are able to load the checkpoint and create classifier model
    checkpoint_path = loaded_config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
-    model_namespace_cxr = "hi-ml-histopathology.SSL.configs.CXRImageClassifier"
+    model_namespace_cxr = "SSL.configs.CXRImageClassifier"
    args = common_test_args + [f"--model={model_namespace_cxr}",
                               f"--local_datasets={str(path_to_cxr_test_dataset)}",
                               "--use_balanced_binary_loss_for_linear_head=True",
--- a/hi-ml/environment.yml
+++ b/hi-ml/environment.yml
@ -0,0 +1,12 @@
+# This environment definition only specifies python and pytorch itself. It will be picked up automatically
+# for AzureML jobs that are started from within the hi-ml folder. The submission logic will automatically add the
+# pip packages required by health_azure and health_ml
+name: himl-basic
+channels:
+  - defaults
+  - pytorch
+dependencies:
+  - pip=20.1.1
+  - python=3.7.3
+  - pytorch=1.10.0
+  - cudatoolkit=11.3.1
--- a/hi-ml/pytest.ini
+++ b/hi-ml/pytest.ini
@ -1,6 +1,8 @@
 [pytest]
+testpaths = testhiml
+norecursedirs = outputs test_data
 log_cli = True
 log_cli_level = DEBUG
-adopts = --strict-markers
+addopts = --strict-markers
 markers =
    fast: Tests that should run very fast, and can act as smoke tests to see if something goes terribly wrong.
--- a/hi-ml/run_requirements.txt
+++ b/hi-ml/run_requirements.txt
@ -1,10 +1,10 @@
 dataclasses-json==0.5.2
 hi-ml-azure>=0.1.8
-jinja2==3.0.2
-matplotlib==3.4.3
-opencv-python-headless==4.5.1.48
-pandas==1.3.4
-pytorch-lightning==1.5.5
-rpdb==0.1.6
-torchvision==0.11.1
-torch>=1.8
+jinja2>=3.0.2
+matplotlib>=3.4.3
+opencv-python-headless>=4.5.1.48
+pandas>=1.3.4
+pytorch-lightning>=1.5.5
+rpdb>=0.1.6
+torchvision>=0.11.1
+torch>=1.10.0
--- a/hi-ml/src/health_ml/configs/hello_container.py
+++ b/hi-ml/src/health_ml/configs/hello_container.py
@ -5,7 +5,6 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

-import numpy as np
 import torch
 from pytorch_lightning import LightningDataModule, LightningModule
 from torchmetrics import MeanAbsoluteError
@ -16,63 +15,91 @@ from torch.utils.data import DataLoader, Dataset
 from health_ml.lightning_container import LightningContainer


-class HelloDataset(Dataset):
+def _create_1d_regression_dataset(n: int = 100, seed: int = 0) -> torch.Tensor:
+    """Creates a simple 1-D dataset of a noisy linear function.
+
+    :param n: The number of datapoints to generate, defaults to 100
+    :type n: int, optional
+    :param seed: Random number generator seed, defaults to 0
+    :type seed: int, optional
+    :return: A tensor that contains X values in [:, 0] and Y values in [:, 1]
+    :rtype: torch.Tensor
    """
-    A simple 1dim regression task, read from a data file stored in the test data folder.
+    torch.manual_seed(seed)
+    x = torch.rand((n, 1)) * 10
+    y = 0.2 * x + 0.1 * torch.randn(x.size())
+    xy = torch.cat((x, y), dim=1)
+    return xy
+
+
+def _split_crossval(xy: torch.Tensor, crossval_count: int, crossval_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
    """
-    # Creating the data file:
-    # import numpy as np
-    # import torch
-    #
-    # N = 100
-    # x = torch.rand((N, 1)) * 10
-    # y = 0.2 * x + 0.1 * torch.randn(x.size())
-    # xy = torch.cat((x, y), dim=1)
-    # np.savetxt("health_ml/configs/hellocontainer.csv", xy.numpy(), delimiter=",")
-    def __init__(self, raw_data: List[List[float]]) -> None:
+    Generates a split of the given dataset along the first dimension for cross-validation.
+
+    :param xy: The data that should be split. The split will be generated acros dimension 0.
+    :type xy: torch.Tensor
+    :param crossval_count: The number of splits in total
+    :type crossval_count: int
+    :param crossval_index: The index of the split that should be generated (0 <= crossval_index < crossval_count)
+    :type crossval_index: int
+    :return: A tuple of (training data, validation data)
+    :rtype: Tuple[torch.Tensor, torch.Tensor]
+    """
+    n = xy.shape[0]
+    split_size = n // crossval_count
+    val_start = crossval_index * split_size
+    val_end = (crossval_index + 1) * split_size
+    train1_start = 0 if crossval_index == 0 else (crossval_index - 1) * split_size
+    train1_end = 0 if crossval_index == 0 else val_start
+    train2_start = val_end if crossval_index < (crossval_count - 1) else 0
+    train2_end = n if crossval_index < (crossval_count - 1) else 0
+    val = xy[val_start:val_end]
+    train = torch.concat([xy[train1_start:train1_end], xy[train2_start:train2_end]])
+    return (train, val)
+
+
+class HelloWorldDataset(Dataset):
+    """
+    A simple 1dim regression task
+    """
+
+    def __init__(self, xy: torch.Tensor) -> None:
        """
        Creates the 1-dim regression dataset.

-        :param raw_data: The raw data. This must be numeric data which can be converted into a tensor.
-            See the static method  from_path_and_indexes for an example call.
+        :param xy: The raw data, x in the first column, y in the second column
        """
        super().__init__()  # type: ignore
-        self.data = torch.tensor(raw_data, dtype=torch.float)
+        self.xy = xy

    def __len__(self) -> int:
-        return self.data.shape[0]
+        return self.xy.shape[0]

    def __getitem__(self, item: int) -> Dict[str, torch.Tensor]:
-        return {'x': self.data[item][0:1], 'y': self.data[item][1:2]}
-
-    @staticmethod
-    def from_path_and_indexes(
-            root_folder: Path,
-            start_index: int,
-            end_index: int) -> 'HelloDataset':
-        """
-        Static method to instantiate a HelloDataset from the root folder with the start and end indexes.
-
-        :param root_folder: The folder in which the data file lives ("hellocontainer.csv")
-        :param start_index: The first row to read.
-        :param end_index: The last row to read (exclusive)
-        :return: A new instance based on the root folder and the start and end indexes.
-        """
-        raw_data = np.loadtxt(root_folder / "hellocontainer.csv", delimiter=",")[start_index:end_index]
-        return HelloDataset(raw_data)
+        return {"x": self.xy[item][0:1], "y": self.xy[item][1:2]}


-class HelloDataModule(LightningDataModule):
+class HelloWorldDataModule(LightningDataModule):
    """
    A data module that gives the training, validation and test data for a simple 1-dim regression task.
    """
-    def __init__(
-            self,
-            root_folder: Path) -> None:
+
+    def __init__(self, crossval_count: int, crossval_index: int) -> None:
        super().__init__()
-        self.train = HelloDataset.from_path_and_indexes(root_folder, start_index=0, end_index=50)
-        self.val = HelloDataset.from_path_and_indexes(root_folder, start_index=50, end_index=70)
-        self.test = HelloDataset.from_path_and_indexes(root_folder, start_index=70, end_index=100)
+        n_total = 200
+        xy = _create_1d_regression_dataset(n=n_total)
+        n_test = 40
+        n_val = 50
+        self.test = HelloWorldDataset(xy=xy[:n_test])
+        if crossval_count <= 1:
+            self.val = HelloWorldDataset(xy=xy[n_test:(n_test + n_val)])
+            self.train = HelloWorldDataset(xy=xy[(n_test + n_val):])
+        else:
+            # This could be done via a library function like sklearn's KFold function, but we don't want to add
+            # scikit-learn as a dependency just for this example.
+            train, val = _split_crossval(xy[n_test:], crossval_count=crossval_count, crossval_index=crossval_index)
+            self.val = HelloWorldDataset(xy=val)
+            self.train = HelloWorldDataset(xy=train)

    def prepare_data(self, *args: Any, **kwargs: Any) -> None:
        pass
@ -126,8 +153,9 @@ class HelloRegression(LightningModule):
        self.log("loss", loss, on_epoch=True, on_step=False)
        return loss

-    def validation_step(self, batch: Dict[str, torch.Tensor], *args: Any,  # type: ignore
-                        **kwargs: Any) -> torch.Tensor:
+    def validation_step(  # type: ignore
+        self, batch: Dict[str, torch.Tensor], *args: Any, **kwargs: Any
+    ) -> torch.Tensor:
        """
        This method is part of the standard PyTorch Lightning interface. For an introduction, please see
        https://pytorch-lightning.readthedocs.io/en/stable/starter/converting.html
@ -208,7 +236,7 @@ class HelloRegression(LightningModule):
        Path("test_mae.txt").write_text(str(self.test_mae.compute().item()))


-class HelloContainer(LightningContainer):
+class HelloWorld(LightningContainer):
    """
    An example container for using the hi-ml runner. This container has methods
    to generate the actual Lightning model, and read out the datamodule that will be used for training.
@ -231,5 +259,7 @@ class HelloContainer(LightningContainer):
    # in turn contains 3 data loaders for training, validation, and test set.
    def get_data_module(self) -> LightningDataModule:
        assert self.local_dataset_dir is not None
-        return HelloDataModule(
-            root_folder=self.local_dataset_dir)  # type: ignore
+        # If you would like to use the built-in cross validation functionality that runs training in parallel,
+        # you need to provide the crossvalidation parameters in the LightningContainer to the datamodule. The
+        # datamodule must carry out appropriate splitting of the data.
+        return HelloWorldDataModule(crossval_count=self.crossval_count, crossval_index=self.crossval_index)
--- a/hi-ml/src/health_ml/deep_learning_config.py
+++ b/hi-ml/src/health_ml/deep_learning_config.py
@ -10,15 +10,18 @@ from pathlib import Path
 from typing import List, Optional

 import param
+from azureml.train.hyperdrive import HyperDriveConfig
 from param import Parameterized

+from health_azure import create_crossval_hyperdrive_config
 from health_azure.utils import RUN_CONTEXT, PathOrString, is_running_in_azure_ml

 from health_ml.utils import fixed_paths
 from health_ml.utils.common_utils import (CHECKPOINT_FOLDER,
                                          create_unique_timestamp_id,
                                          DEFAULT_AML_UPLOAD_DIR,
-                                          DEFAULT_LOGS_DIR_NAME, is_windows, parse_model_id_and_version)
+                                          DEFAULT_LOGS_DIR_NAME,
+                                          parse_model_id_and_version)
 from health_ml.utils.type_annotations import TupleFloat2


@ -43,16 +46,6 @@ class LRSchedulerType(Enum):
    MultiStep = "MultiStep"


-@unique
-class MultiprocessingStartMethod(Enum):
-    """
-    Different methods for starting data loader processes.
-    """
-    fork = "fork"
-    forkserver = "forkserver"
-    spawn = "spawn"
-
-
@unique
 class OptimizerType(Enum):
    """
@ -79,7 +72,7 @@ class ExperimentFolderHandler(Parameterized):
    def create(project_root: Path,
               is_offline_run: bool,
               model_name: str,
-               output_to: Path = Path()) -> ExperimentFolderHandler:
+               output_to: Optional[Path] = None) -> ExperimentFolderHandler:
        """
        Creates a new object that holds output folder configurations. When running inside of AzureML, the output
        folders will be directly under the project root. If not running inside AzureML, a folder with a timestamp
@ -96,8 +89,7 @@ class ExperimentFolderHandler(Parameterized):
        """
        if not project_root.is_absolute():
            raise ValueError(f"The project root is required to be an absolute path, but got {project_root}")
-        # output_to by default will be Path() which is not None, but Path().stem is None
-        if is_offline_run or output_to.stem:
+        if is_offline_run or output_to:
            if output_to:
                logging.info(f"All results will be written to the specified output folder {output_to}")
                root = Path(output_to).absolute()
@ -140,20 +132,17 @@ class WorkflowParams(param.Parameterized):
    model_id: str = param.String(default="",
                                 doc="A model id string in the form 'model name:version' "
                                     "to use a registered model for inference.")
-    multiprocessing_start_method: MultiprocessingStartMethod = \
-        param.ClassSelector(class_=MultiprocessingStartMethod,
-                            default=(MultiprocessingStartMethod.spawn if is_windows()
-                                     else MultiprocessingStartMethod.fork),
-                            doc="Method to be used to start child processes in pytorch. Should be one of forkserver, "
-                                "fork or spawn. If not specified, fork is used on Linux and spawn on Windows. "
-                                "Set to forkserver as a possible remedy for stuck jobs.")
-    regression_test_folder: Optional[Path] = \
-        param.ClassSelector(class_=Path, default=None, allow_None=True,
-                            doc="A path to a folder that contains a set of files. At the end of training and "
-                                "model evaluation, all files given in that folder must be present in the job's output "
-                                "folder, and their contents must match exactly. When running in AzureML, you need to "
-                                "ensure that this folder is part of the snapshot that gets uploaded. The path should "
-                                "be relative to the repository root directory.")
+    crossval_count: int = param.Integer(default=1, bounds=(0, None),
+                                        doc="The number of splits to use when doing cross-validation. "
+                                            "Use 1 to disable cross-validation")
+    crossval_index: int = param.Integer(default=0, bounds=(0, None),
+                                        doc="When doing cross validation, this is the index of the current "
+                                            "split. Valid values: 0 .. (crossval_count -1)")
+    hyperdrive: bool = param.Boolean(False, doc="If True, use the Hyperdrive configuration specified in the "
+                                                "LightningContainer to run hyperparameter tuning. If False, just "
+                                                "run a plain single training job.")
+    CROSSVAL_INDEX_ARG_NAME = "crossval_index"
+    CROSSVAL_COUNT_ARG_NAME = "crossval_count"

    def validate(self) -> None:
        if sum([bool(param) for param in [self.weights_url, self.local_weights_path, self.model_id]]) > 1:
@ -162,6 +151,10 @@ class WorkflowParams(param.Parameterized):
        if self.model_id:
            parse_model_id_and_version(self.model_id)

+        if self.crossval_count > 1:
+            if not (0 <= self.crossval_index < (self.crossval_count - 1)):
+                raise ValueError(f"Attribute crossval_index out of bounds (crossval_count = {self.crossval_count})")
+
    @property
    def is_running_in_aml(self) -> bool:
        """
@ -180,21 +173,37 @@ class WorkflowParams(param.Parameterized):
        seed = self.random_seed
        return seed

+    @property
+    def is_crossvalidation_enabled(self) -> bool:
+        """
+        Returns True if the present parameters indicate that cross-validation should be used.
+        """
+        return self.crossval_count > 1
+
+    def get_crossval_hyperdrive_config(self) -> HyperDriveConfig:
+        # For crossvalidation, the name of the metric to monitor does not matter because no early termination or such
+        # is specified.
+        return create_crossval_hyperdrive_config(num_splits=self.crossval_count,
+                                                 cross_val_index_arg_name=self.CROSSVAL_INDEX_ARG_NAME,
+                                                 metric_name="val/loss"
+                                                 )
+

 class DatasetParams(param.Parameterized):
    azure_datasets: List[str] = param.List(default=[], class_=str,
                                           doc="If provided, the ID of one or more datasets to use when running in"
-                                               " AzureML.This dataset must exist as a folder of the same name in the"
-                                               " 'datasets' container in the datasets storage account. This dataset"
-                                               " will be mounted and made available at the 'local_dataset' path"
-                                               " when running in AzureML.")
+                                               " AzureML. This dataset must exist as a folder of the same name "
+                                               "in the 'datasets' container in the datasets storage account. This "
+                                               "dataset will be mounted and made available at the 'local_dataset' "
+                                               "path when running in AzureML.")
    local_datasets: List[Path] = param.List(default=[], class_=Path,
                                            doc="A list of one or more paths to the dataset to use, when training"
                                                " outside of Azure ML.")
    dataset_mountpoints: List[Path] = param.List(default=[], class_=Path,
-                                                 doc="The path at which the AzureML dataset should be made available "
-                                                     "via mounting or downloading. This only affects jobs running in "
-                                                     "AzureML. If empty, use a random mount/download point.")
+                                                 doc="The path at which the AzureML dataset should be made "
+                                                     "available via mounting or downloading. This only affects "
+                                                     "jobs running in AzureML. If empty, use a random "
+                                                     "mount/download point.")

    def validate(self) -> None:
        if (not self.azure_datasets) and (not self.local_datasets):
@ -207,10 +216,10 @@ class DatasetParams(param.Parameterized):


 class OutputParams(param.Parameterized):
-    output_to: Path = param.ClassSelector(class_=Path, default=Path(),
-                                          doc="If provided, the run outputs will be written to the given folder. If "
-                                              "not provided, outputs will go into a subfolder of the project root "
-                                              "folder.")
+    output_to: Optional[Path] = param.ClassSelector(class_=Path, default=None,
+                                                    doc="If provided, the run outputs will be written to the given "
+                                                        "folder. If not provided, outputs will go into a subfolder "
+                                                        "of the project root folder.")
    file_system_config: ExperimentFolderHandler = param.ClassSelector(default=ExperimentFolderHandler(),
                                                                      class_=ExperimentFolderHandler,
                                                                      instantiate=False,
@ -229,14 +238,15 @@ class OutputParams(param.Parameterized):

    def set_output_to(self, output_to: PathOrString) -> None:
        """
-        Adjusts the file system settings in the present object such that all outputs are written to the given folder.
+        Adjusts the file system settings in the present object such that all outputs are written to the given
+        folder.

        :param output_to: The absolute path to a folder that should contain the outputs.
        """
        self.output_to = Path(output_to)
-        self.create_filesystem()
+        self.create_filesystem(project_root=fixed_paths.repository_root_directory())

-    def create_filesystem(self, project_root: Path = fixed_paths.repository_root_directory()) -> None:
+    def create_filesystem(self, project_root: Path) -> None:
        """
        Creates new file system settings (outputs folder, logs folder) based on the information stored in the
        present object. If any of the folders do not yet exist, they are created.
@ -268,7 +278,8 @@ class OutputParams(param.Parameterized):

 class OptimizerParams(param.Parameterized):
    l_rate: float = param.Number(1e-4, doc="The initial learning rate", bounds=(0, None))
-    _min_l_rate: float = param.Number(0.0, doc="The minimum learning rate for the Polynomial and Cosine schedulers.",
+    _min_l_rate: float = param.Number(0.0,
+                                      doc="The minimum learning rate for the Polynomial and Cosine schedulers.",
                                      bounds=(0.0, None))
    l_rate_scheduler: LRSchedulerType = param.ClassSelector(default=LRSchedulerType.Polynomial,
                                                            class_=LRSchedulerType,
@ -338,19 +349,20 @@ class TrainerParams(param.Parameterized):
    autosave_every_n_val_epochs: int = param.Integer(1, bounds=(0, None),
                                                     doc="Save epoch checkpoints every N validation epochs. "
                                                         "If pl_check_val_every_n_epoch > 1, this means that "
-                                                         "checkpoints are saved every N * pl_check_val_every_n_epoch "
-                                                         "training epochs.")
+                                                         "checkpoints are saved every "
+                                                         "N * pl_check_val_every_n_epoch training epochs.")
    detect_anomaly: bool = param.Boolean(False, doc="If true, test gradients for anomalies (NaN or Inf) during "
                                                    "training.")
    use_mixed_precision: bool = param.Boolean(False, doc="If true, mixed precision training is activated during "
                                                         "training.")
-    max_num_gpus: int = param.Integer(default=-1, doc="The maximum number of GPUS to use. If set to a value < 0, use"
-                                                      "all available GPUs. In distributed training, this is the "
-                                                      "maximum number of GPUs per node.")
+    max_num_gpus: int = param.Integer(default=-1,
+                                      doc="The maximum number of GPUS to use. If set to a value < 0, use"
+                                          "all available GPUs. In distributed training, this is the "
+                                          "maximum number of GPUs per node.")
    pl_progress_bar_refresh_rate: Optional[int] = \
        param.Integer(default=None,
-                      doc="PyTorch Lightning trainer flag 'progress_bar_refresh_rate': How often to refresh progress "
-                          "bar (in steps). Value 0 disables progress bar. Value None chooses automatically.")
+                      doc="PyTorch Lightning trainer flag 'progress_bar_refresh_rate': How often to refresh "
+                          "progress bar (in steps). Value 0 disables progress bar. If None choose, automatically.")
    pl_num_sanity_val_steps: int = \
        param.Integer(default=0,
                      doc="PyTorch Lightning trainer flag 'num_sanity_val_steps': Number of validation "
@ -358,8 +370,8 @@ class TrainerParams(param.Parameterized):
    pl_deterministic: bool = \
        param.Boolean(default=False,
                      doc="Controls the PyTorch Lightning trainer flags 'deterministic' and 'benchmark'. If "
-                          "'pl_deterministic' is True, results are perfectly reproducible. If False, they are not, but "
-                          "you may see training speed increases.")
+                          "'pl_deterministic' is True, results are perfectly reproducible. If False, they are not, "
+                          "but you may see training speed increases.")
    pl_find_unused_parameters: bool = \
        param.Boolean(default=False,
                      doc="Controls the PyTorch Lightning flag 'find_unused_parameters' for the DDP plugin. "
@ -382,9 +394,9 @@ class TrainerParams(param.Parameterized):
    monitor_loading: bool = param.Boolean(default=False,
                                          doc="If True, add the BatchTimeCallback callback to the Lightning trainer "
                                              "object. This will monitor how long individual batches take to load.")
-    additional_env_files: List[str] = param.List(class_=Path, default=[],
-                                                 doc="Additional conda environment (.yml) files to merge into the"
-                                                     " overall environment definition")
+    additional_env_files: List[Path] = param.List(class_=Path, default=[],
+                                                  doc="Additional conda environment (.yml) files to merge into the"
+                                                      " overall environment definition")

    @property
    def use_gpu(self) -> bool:
@ -411,5 +423,6 @@ class TrainerParams(param.Parameterized):
            num_gpus = self.max_num_gpus
            logging.info(f"Restricting the number of GPUs to {num_gpus}")
        elif self.max_num_gpus > num_gpus:
-            logging.warning(f"You requested max_num_gpus {self.max_num_gpus} but there are only {num_gpus} available.")
+            logging.warning(
+                f"You requested max_num_gpus {self.max_num_gpus} but there are only {num_gpus} available.")
        return num_gpus
--- a/hi-ml/src/health_ml/experiment_config.py
+++ b/hi-ml/src/health_ml/experiment_config.py
@ -1,11 +1,10 @@
 import param
-from typing import Optional


 class ExperimentConfig(param.Parameterized):
-    cluster: Optional[str] = param.String(default=None, allow_None=True,
-                                          doc="The name of the GPU or CPU cluster inside the AzureML workspace"
-                                              "that should execute the job.")
+    cluster: str = param.String(default="", allow_None=False,
+                                doc="The name of the GPU or CPU cluster inside the AzureML workspace"
+                                    "that should execute the job.")
    num_nodes: int = param.Integer(default=1, doc="The number of virtual machines that will be allocated for this"
                                                  "job in AzureML.")
    model: str = param.String(doc="The fully qualified name of the model to train/test -e.g."
--- a/hi-ml/src/health_ml/lightning_container.py
+++ b/hi-ml/src/health_ml/lightning_container.py
@ -30,6 +30,7 @@ class LightningContainer(WorkflowParams,
    should be trained is returned by the `get_model` method. The training data must be returned in the form of
    a LightningDataModule, by the `get_data_module` method.
    """
+
    def __init__(self, **kwargs: Any) -> None:
        super().__init__(**kwargs)
        self._model: Optional[LightningModule] = None
@ -77,12 +78,20 @@ class LightningContainer(WorkflowParams,
        """
        return []

-    def get_parameter_search_hyperdrive_config(self, _: ScriptRunConfig) -> HyperDriveConfig:  # type: ignore
+    def get_parameter_tuning_config(self, run_config: ScriptRunConfig) -> HyperDriveConfig:  # type: ignore
        """
-        Parameter search is not implemented. It should be implemented in a sub class if needed.
+        Returns a configuration for hyperparameter tuning via AzureML's Hyperdrive capability.
+        Hyperparameter tuning can be triggered on the commandline via the "--hyperdrive" flag.
+        Override this method in your LightningContainer to use hyperparameter tuning.
+
+        The HyperDriveConfig config object needs to specify which parameters should be searched over, and which
+        metric should be monitored.
+
+        :param run_config: The ScriptRunConfig object that needs to be passed into the constructor of
+        HyperDriveConfig.
        """
-        raise NotImplementedError("Parameter search is not implemented. It should be implemented in"
-                                  "a sub class if needed.")
+        raise NotImplementedError("Parameter search is not implemented. Please override 'get_parameter_tuning_config' "
+                                  "in your model container.")

    def update_experiment_config(self, experiment_config: ExperimentConfig) -> None:
        """
@ -148,14 +157,17 @@ class LightningContainer(WorkflowParams,
            self._model._optimizer_params = create_from_matching_params(self, OptimizerParams)
            self._model._trainer_params = create_from_matching_params(self, TrainerParams)

-    def get_hyperdrive_config(self, run_config: ScriptRunConfig) -> HyperDriveConfig:
+    def get_hyperdrive_config(self) -> Optional[HyperDriveConfig]:
        """
-        Returns the HyperDrive config for either parameter search
+        Returns the HyperDrive config for either hyperparameter tuning or cross validation.

-        :param run_config: AzureML estimator
-        :return: HyperDriveConfigs
+        :return: A configuration object for HyperDrive
        """
-        return self.get_parameter_search_hyperdrive_config(run_config)
+        if self.is_crossvalidation_enabled:
+            return self.get_crossval_hyperdrive_config()
+        if self.hyperdrive:
+            return self.get_parameter_tuning_config(ScriptRunConfig(source_directory=""))
+        return None

    def load_model_checkpoint(self, checkpoint_path: Path) -> None:
        """
--- a/hi-ml/src/health_ml/model_trainer.py
+++ b/hi-ml/src/health_ml/model_trainer.py
@ -19,11 +19,10 @@ from health_azure.utils import (ENV_GLOBAL_RANK, ENV_LOCAL_RANK, ENV_NODE_RANK,
 from health_ml.lightning_container import LightningContainer
 from health_ml.utils import AzureMLLogger, AzureMLProgressBar
 from health_ml.utils.checkpoint_utils import cleanup_checkpoints
-from health_ml.utils.common_utils import AUTOSAVE_CHECKPOINT_FILE_NAME, EXPERIMENT_SUMMARY_FILE
+from health_ml.utils.common_utils import (AUTOSAVE_CHECKPOINT_FILE_NAME, EXPERIMENT_SUMMARY_FILE,
+                                          change_working_directory)
 from health_ml.utils.lightning_loggers import StoringLogger

-TEMP_PREFIX = "temp/"
-
 T = TypeVar('T')


@ -213,9 +212,11 @@ def model_train(checkpoint_path: Optional[Path],
    logging.info(f"Environment variables: {rank_info}. trainer.global_rank: {trainer.global_rank}")

    # get recovery checkpoint if it exists
-
    logging.info("Starting training")
-    trainer.fit(lightning_model, datamodule=data_module)
+    # Change to the outputs folder so that the model can write to current working directory, and still everything
+    # is put into the right place in AzureML (only the contents of the "outputs" folder is treated as a result file)
+    with change_working_directory(container.outputs_folder):
+        trainer.fit(lightning_model, datamodule=data_module)
    assert trainer.logger is not None
    trainer.logger.finalize('success')

--- a/hi-ml/src/health_ml/run_ml.py
+++ b/hi-ml/src/health_ml/run_ml.py
@ -20,7 +20,7 @@ from health_ml.model_trainer import create_lightning_trainer, model_train
 from health_ml.utils import fixed_paths
 from health_ml.utils.checkpoint_utils import CheckpointHandler
 from health_ml.utils.common_utils import (
-    EFFECTIVE_RANDOM_SEED_KEY_NAME, logging_section,
+    EFFECTIVE_RANDOM_SEED_KEY_NAME, change_working_directory, logging_section,
    RUN_RECOVERY_ID_KEY, RUN_RECOVERY_FROM_ID_KEY_NAME)
 from health_ml.utils.lightning_loggers import StoringLogger
 from health_ml.utils.type_annotations import PathOrString
@ -181,10 +181,13 @@ class MLRunner:
            trainer, _ = create_lightning_trainer(self.container, num_nodes=1)

            self.container.load_model_checkpoint(checkpoint_path=checkpoint_paths[0])
-            # Change the current working directory to ensure that test files go to thr right folder
            data_module = self.container.get_data_module()

-            _ = trainer.test(self.container.model, datamodule=data_module)
+            # Change to the outputs folder so that the model can write to current working directory, and still
+            # everything is put into the right place in AzureML (there, only the contents of the "outputs" folder
+            # retained)
+            with change_working_directory(self.container.outputs_folder):
+                _ = trainer.test(self.container.model, datamodule=data_module)

        else:
            logging.warning("None of the suitable test methods is overridden. Skipping inference completely.")
--- a/hi-ml/src/health_ml/runner.py
+++ b/hi-ml/src/health_ml/runner.py
@ -9,27 +9,23 @@ import param
 import sys
 import uuid
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple

 import matplotlib
+from azureml.core import Workspace

-# Add hi-ml packages to sys.path so that AML can find them
-# Optionally add the histopathology module, if this exists
+# Add hi-ml packages to sys.path so that AML can find them if we are using the runner directly from the git repo
 himl_root = Path(__file__).absolute().parent.parent.parent.parent
-print(f"Starting the himl runner at {himl_root}")
-print(f"health_ml pkg root: {himl_root}")
-health_ml_pkg = himl_root / "hi-ml" / "src"
-health_azure_pkg = himl_root / "hi-ml-azure" / "src"
-health_histopathology_dir = himl_root / "hi-ml-histopathology" / "src"
-
-if health_histopathology_dir.exists():
-    sys.path.insert(0, str(health_histopathology_dir))
-sys.path.insert(0, str(health_azure_pkg))
-sys.path.insert(0, str(health_ml_pkg))
-print(f"sys path: {sys.path}")
+folders_to_add = [himl_root / "hi-ml" / "src",
+                  himl_root / "hi-ml-azure" / "src",
+                  himl_root / "hi-ml-histopathology" / "src"]
+for folder in folders_to_add:
+    if folder.is_dir():
+        sys.path.insert(0, str(folder))

 from health_azure import AzureRunInfo, submit_to_azure_if_needed  # noqa: E402
 from health_azure.datasets import create_dataset_configs  # noqa: E402
+from health_azure.paths import is_himl_used_from_git_repo  # noqa: E402
 from health_azure.utils import (get_workspace, is_local_rank_zero, merge_conda_files,  # noqa: E402
                                set_environment_variables_for_multi_node, create_argparser, parse_arguments,
                                ParserResult, apply_overrides)
@ -38,12 +34,11 @@ from health_ml.experiment_config import ExperimentConfig  # noqa: E402
 from health_ml.lightning_container import LightningContainer  # noqa: E402
 from health_ml.run_ml import MLRunner  # noqa: E402
 from health_ml.utils import fixed_paths  # noqa: E402
-from health_ml.utils.common_utils import (get_all_environment_files,  # noqa: E402
+from health_ml.utils.common_utils import (check_conda_environments, get_all_environment_files,  # noqa: E402
                                          get_all_pip_requirements_files,
                                          is_linux, logging_to_stdout)
 from health_ml.utils.config_loader import ModelConfigLoader  # noqa: E402

-
 DEFAULT_DOCKER_BASE_IMAGE = "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04"


@ -142,8 +137,7 @@ class Runner:
        self.experiment_config = experiment_config
        if not experiment_config.model:
            raise ValueError("Parameter 'model' needs to be set to specify which model to run.")
-        print(f"Creating model loader with the following args: {parser_result.args}")
-        model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser_result.args)
+        model_config_loader: ModelConfigLoader = ModelConfigLoader()
        # Create the model as per the "model" commandline option. This is a LightningContainer.
        container = model_config_loader.create_model_config_from_name(model_name=experiment_config.model)

@ -161,6 +155,23 @@ class Runner:

        return parser_result_

+    def validate(self) -> None:
+        """
+        Runs sanity checks on the whole experiment.
+        """
+        if not self.experiment_config.azureml:
+            if self.lightning_container.hyperdrive:
+                logging.info("You have turned on HyperDrive for parameter tuning. This can "
+                             "only be run in AzureML. We switched on submitting to AzureML.")
+                self.experiment_config.azureml = True
+            if self.lightning_container.is_crossvalidation_enabled:
+                logging.info("You have turned on cross-validation. This can "
+                             "only be run in AzureML. We switched on submitting to AzureML.")
+                self.experiment_config.azureml = True
+            if self.experiment_config.cluster:
+                logging.info("You have provided a compute cluster name, hence we switched on submitting to AzureML.")
+                self.experiment_config.azureml = True
+
    def run(self) -> Tuple[LightningContainer, AzureRunInfo]:
        """
        The main entry point for training and testing models from the commandline. This chooses a model to train
@ -174,6 +185,7 @@ class Runner:
        logging_to_stdout(logging.INFO if is_local_rank_zero() else "ERROR")
        initialize_rpdb()
        self.parse_and_load_model()
+        self.validate()
        azure_run_info = self.submit_to_azureml_if_needed()
        self.run_in_situ(azure_run_info)
        return self.lightning_container, azure_run_info
@ -191,30 +203,18 @@ class Runner:
        entry_script = Path(sys.argv[0]).resolve()
        script_params = sys.argv[1:]

-        additional_conda_env_files = self.lightning_container.additional_env_files
-        additional_env_files: Optional[List[Path]]
-        if additional_conda_env_files is not None:
-            additional_env_files = [Path(f) for f in additional_conda_env_files]
-        else:
-            additional_env_files = None
-
-        conda_dependencies_files = get_all_environment_files(self.project_root,
-                                                             additional_files=additional_env_files)
-        pip_requirements_files = get_all_pip_requirements_files()
-
-        # Merge the project-specific dependencies with the packages and write unified definition
-        # to temp file. In case of version conflicts, the package version in the outer project is given priority.
-        temp_conda: Optional[Path] = None
-        if len(conda_dependencies_files) > 1 or len(pip_requirements_files) > 0:
-            temp_conda = root_folder / f"temp_environment-{uuid.uuid4().hex[:8]}.yml"
-            merge_conda_files(conda_dependencies_files, temp_conda, pip_files=pip_requirements_files)
-
        # TODO: Update environment variables
        environment_variables: Dict[str, Any] = {}

-        # get default datastore from provided workspace
-        workspace = get_workspace()
-        default_datastore = workspace.get_default_datastore().name
+        # Get default datastore from the provided workspace. Authentication can take a few seconds, hence only do
+        # that if we are really submitting to AzureML.
+        workspace: Optional[Workspace] = None
+        if self.experiment_config.azureml:
+            try:
+                workspace = get_workspace()
+            except ValueError:
+                logging.warning("No configuration file for an AzureML workspace was found.")
+        default_datastore = workspace.get_default_datastore().name if workspace is not None else ""

        local_datasets = self.lightning_container.local_datasets
        all_local_datasets = [Path(p) for p in local_datasets] if len(local_datasets) > 0 else []
@ -223,27 +223,48 @@ class Runner:
                                   all_dataset_mountpoints=self.lightning_container.dataset_mountpoints,
                                   all_local_datasets=all_local_datasets,  # type: ignore
                                   datastore=default_datastore)
+        if self.lightning_container.is_crossvalidation_enabled and not self.experiment_config.azureml:
+            raise ValueError("Cross-validation is only supported when submitting the job to AzureML.")
+        hyperdrive_config = self.lightning_container.get_hyperdrive_config()
+        temp_conda: Optional[Path] = None
        try:
            if self.experiment_config.azureml:
+                conda_files = get_all_environment_files(root_folder,
+                                                        additional_files=self.lightning_container.additional_env_files)
+                check_conda_environments(conda_files)
+                # This adds all pip packages required by hi-ml and hi-ml-azure in case the code is used directly from
+                # source (submodule) rather than installed as a package.
+                pip_requirements_files = get_all_pip_requirements_files()
+
+                # Merge the project-specific dependencies with the packages and write unified definition to temp file.
+                if len(conda_files) > 1 or len(pip_requirements_files) > 0:
+                    temp_conda = root_folder / f"temp_environment-{uuid.uuid4().hex[:8]}.yml"
+                    merge_conda_files(conda_files, temp_conda, pip_files=pip_requirements_files)
+
+                if workspace is None:
+                    raise ValueError("Unable to submit the script to AzureML because no workspace configuration file "
+                                     "(config.json) was found.")
                if not self.experiment_config.cluster:
-                    raise ValueError("You need to specify a cluster name via '--cluster NAME' to submit"
+                    raise ValueError("You need to specify a cluster name via '--cluster NAME' to submit "
                                     "the script to run in AzureML")
                azure_run_info = submit_to_azure_if_needed(
                    entry_script=entry_script,
                    snapshot_root_directory=root_folder,
                    script_params=script_params,
-                    conda_environment_file=temp_conda or conda_dependencies_files[0],
+                    conda_environment_file=temp_conda or conda_files[0],
                    aml_workspace=workspace,
                    compute_cluster_name=self.experiment_config.cluster,
                    environment_variables=environment_variables,
                    default_datastore=default_datastore,
-                    experiment_name=self.lightning_container.name,  # create_experiment_name(),
+                    experiment_name=self.lightning_container.model_name,  # create_experiment_name(),
                    input_datasets=input_datasets,  # type: ignore
                    num_nodes=self.experiment_config.num_nodes,
                    wait_for_completion=False,
                    ignored_folders=[],
                    submit_to_azureml=self.experiment_config.azureml,
                    docker_base_image=DEFAULT_DOCKER_BASE_IMAGE,
+                    hyperdrive_config=hyperdrive_config,
+                    create_output_folders=False,
                    tags=additional_run_tags(
                        commandline_args=" ".join(script_params))
                )
@ -252,7 +273,7 @@ class Runner:
                    input_datasets=input_datasets,  # type: ignore
                    submit_to_azureml=False)
        finally:
-            if temp_conda:
+            if temp_conda and temp_conda.is_file():
                temp_conda.unlink()
        # submit_to_azure_if_needed calls sys.exit after submitting to AzureML. We only reach this when running
        # the script locally or in AzureML.
@ -296,7 +317,7 @@ def run(project_root: Path) -> Tuple[LightningContainer, AzureRunInfo]:


 def main() -> None:
-    run(project_root=fixed_paths.repository_root_directory())
+    run(project_root=fixed_paths.repository_root_directory() if is_himl_used_from_git_repo() else Path.cwd())


 if __name__ == '__main__':
--- a/hi-ml/src/health_ml/utils/common_utils.py
+++ b/hi-ml/src/health_ml/utils/common_utils.py
@ -10,10 +10,11 @@ from typing import Any, Generator, Iterable, List, Optional, Union

 import torch
 from torch.nn import Module
+from health_azure import utils
+from health_azure import paths
+from health_azure.paths import ENVIRONMENT_YAML_FILE_NAME, git_repo_root_folder, is_himl_used_from_git_repo

-from health_azure.utils import PathOrString
-
-from health_ml.utils import fixed_paths
+from health_azure.utils import PathOrString, is_conda_file_with_pip_include


 MAX_PATH_LENGTH = 260
@ -25,15 +26,17 @@ string_to_path = lambda x: None if (x is None or len(x.strip()) == 0) else Path(
 # file and directory names
 CHECKPOINT_SUFFIX = ".ckpt"
 AUTOSAVE_CHECKPOINT_FILE_NAME = "autosave"
-AUTOSAVE_CHECKPOINT_CANDIDATES = [AUTOSAVE_CHECKPOINT_FILE_NAME + CHECKPOINT_SUFFIX,
-                                  AUTOSAVE_CHECKPOINT_FILE_NAME + "-v1" + CHECKPOINT_SUFFIX]
+AUTOSAVE_CHECKPOINT_CANDIDATES = [
+    AUTOSAVE_CHECKPOINT_FILE_NAME + CHECKPOINT_SUFFIX,
+    AUTOSAVE_CHECKPOINT_FILE_NAME + "-v1" + CHECKPOINT_SUFFIX,
+]
 CHECKPOINT_FOLDER = "checkpoints"
 DEFAULT_AML_UPLOAD_DIR = "outputs"
 DEFAULT_LOGS_DIR_NAME = "logs"
 EXPERIMENT_SUMMARY_FILE = "experiment_summary.txt"

 # run recovery
-RUN_RECOVERY_ID_KEY = 'run_recovery_id'
+RUN_RECOVERY_ID_KEY = "run_recovery_id"
 RUN_RECOVERY_FROM_ID_KEY_NAME = "recovered_from"

 # other
@ -45,6 +48,7 @@ class ModelExecutionMode(Enum):
    """
    Model execution mode
    """
+
    TRAIN = "Train"
    TEST = "Test"
    VAL = "Val"
@ -111,8 +115,7 @@ def _add_formatter(handler: logging.StreamHandler) -> None:
    """
    Adds a logging formatter that includes the timestamp and the logging level.
    """
-    formatter = logging.Formatter(fmt="%(asctime)s %(levelname)-8s %(message)s",
-                                  datefmt="%Y-%m-%dT%H:%M:%SZ")
+    formatter = logging.Formatter(fmt="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%dT%H:%M:%SZ")
    # noinspection PyTypeHints
    formatter.converter = time.gmtime  # type: ignore
    handler.setFormatter(formatter)
@ -129,6 +132,7 @@ def logging_section(gerund: str) -> Generator:
    :param gerund: string expressing what happens in this section of the log.
    """
    from time import time
+
    logging.info("")
    msg = f"**** STARTING: {gerund} "
    logging.info(msg + (100 - len(msg)) * "*")
@ -152,14 +156,14 @@ def is_windows() -> bool:
    """
    Returns True if the host operating system is Windows.
    """
-    return os.name == 'nt'
+    return os.name == "nt"


 def is_linux() -> bool:
    """
    Returns True if the host operating system is a flavour of Linux.
    """
-    return os.name == 'posix'
+    return os.name == "posix"


 def check_properties_are_not_none(obj: Any, ignore: Optional[List[str]] = None) -> None:
@ -206,16 +210,31 @@ def _create_generator(seed: Optional[int] = None) -> torch.Generator:
 def get_all_environment_files(project_root: Path, additional_files: Optional[List[Path]] = None) -> List[Path]:
    """
    Returns a list of all Conda environment files that should be used. This is just an
-    environment.yml file that lives at the project root folder, plus any additional files provided.
+    environment.yml file that lives at the project root folder, plus any additional files provided in the model.

    :param project_root: The root folder of the code that starts the present training run.
    :param additional_files: Optional list of additional environment files to merge
-    :return: A list with 1 entry that is the root level repo's conda environment files.
+    :return: A list of Conda environment files to use.
    """
    env_files = []
-    project_yaml = project_root / fixed_paths.ENVIRONMENT_YAML_FILE_NAME
-    if project_yaml.exists():
+    project_yaml = project_root / paths.ENVIRONMENT_YAML_FILE_NAME
+    if paths.is_himl_used_from_git_repo():
+        logging.info("Searching for Conda files in the parent folders")
+        git_repo_root = paths.git_repo_root_folder()
+        env_file = utils.find_file_in_parent_folders(
+            file_name=paths.ENVIRONMENT_YAML_FILE_NAME, stop_at_path=[git_repo_root]
+        )
+        assert env_file is not None, "Expected to find at least the environment definition file at repo root"
+        logging.info(f"Using Conda environment in {env_file}")
+        env_files.append(env_file)
+    elif project_yaml.exists():
+        logging.info(f"Using Conda environment in current folder: {project_yaml}")
        env_files.append(project_yaml)
+
+    if not env_files and not additional_files:
+        raise ValueError(
+            "No Conda environment files were found in the repository, and none were specified in the " "model itself."
+        )
    if additional_files:
        for additional_file in additional_files:
            if additional_file.exists():
@ -223,26 +242,44 @@ def get_all_environment_files(project_root: Path, additional_files: Optional[Lis
    return env_files


+def check_conda_environments(env_files: List[Path]) -> None:
+    """Tests if all conda environment files are valid. In particular, they must not contain "include" statements
+    in the pip section.
+
+    :param env_files: The list of Conda environment YAML files to check.
+    """
+    if is_himl_used_from_git_repo():
+        repo_root_yaml: Optional[Path] = git_repo_root_folder() / ENVIRONMENT_YAML_FILE_NAME
+    else:
+        repo_root_yaml = None
+    for file in env_files:
+        has_pip_include, _ = is_conda_file_with_pip_include(file)
+        # PIP include statements are only valid when reading from the repository root YAML file, because we
+        # are manually adding the included files in get_all_pip_requirements_files
+        if has_pip_include and file != repo_root_yaml:
+            raise ValueError(
+                f"The Conda environment definition in {file} uses '-r' to reference pip requirements "
+                "files. This does not work in AzureML. Please add the pip dependencies directly."
+            )
+
+
 def get_all_pip_requirements_files() -> List[Path]:
    """
    If the root level hi-ml directory is available (e.g. it has been installed as a submodule or
    downloaded directly into a parent repo) then we must add it's pip requirements to any environment
    definition. This function returns a list of the necessary pip requirements files. If the hi-ml
    root directory does not exist (e.g. hi-ml has been installed as a pip package, this is not necessary
-    and so this function returns None)
+    and so this function returns an empty list.)

    :return: An list list of pip requirements files in the hi-ml and hi-ml-azure packages if relevant,
        or else an empty list
    """
    files = []
-    himl_root_dir = fixed_paths.himl_root_dir()
-    if himl_root_dir is not None:
-        himl_yaml = himl_root_dir / "hi-ml" / "run_requirements.txt"
-        himl_az_yaml = himl_root_dir / "hi-ml-azure" / "run_requirements.txt"
-        files.append(himl_yaml)
-        files.append(himl_az_yaml)
-        return files
-    return []
+    if paths.is_himl_used_from_git_repo():
+        git_root = paths.git_repo_root_folder()
+        for folder in [Path("hi-ml") / "run_requirements.txt", Path("hi-ml-azure") / "run_requirements.txt"]:
+            files.append(git_root / folder)
+    return files


 def create_unique_timestamp_id() -> str:
@ -270,8 +307,7 @@ def parse_model_id_and_version(model_id_and_version: str) -> None:
    expected format
    """
    if len(model_id_and_version.split(":")) != 2:
-        raise ValueError(
-            f"model id should be in the form 'model_name:version', got {model_id_and_version}")
+        raise ValueError(f"model id should be in the form 'model_name:version', got {model_id_and_version}")


@contextmanager
--- a/hi-ml/src/health_ml/utils/config_loader.py
+++ b/hi-ml/src/health_ml/utils/config_loader.py
@ -7,171 +7,157 @@ from __future__ import annotations
 import importlib
 import inspect
 import logging
-import sys
+from importlib._bootstrap import ModuleSpec
 from importlib.util import find_spec
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Dict, List, Optional

-import param
-from importlib._bootstrap import ModuleSpec
-
-from health_azure.utils import PathOrString
 from health_ml.lightning_container import LightningContainer
-from health_ml.utils import fixed_paths


-class ModelConfigLoader(param.Parameterized):
+class ModelConfigLoader:
    """
    Helper class to manage model config loading.
    """

-    def __init__(self, **params: Any):
-        super().__init__(**params)
-        default_module = self.get_default_search_module()
-        self.module_search_specs: List[ModuleSpec] = [importlib.util.find_spec(default_module)]  # type: ignore
-        self._find_module_search_specs()
+    def __init__(self) -> None:
+        pass

-    def _find_module_search_specs(self) -> None:
-        """
-        Given the fully qualified model name, append the root folder to the system path (so that the config
-        file can be discovered) and try to find a spec for the specifed module. If found, appends the spec
-        to self.module_search_specs
-        """
-        model_namespace_parts = self.model.split(".")
-        if len(model_namespace_parts) == 1:
-            # config must be in the default path. This is already in module_search_specs so we dont need to do anything
-            return
-        else:
-            # Get the root folder of the fully qualified model name and ensure it is in the path to enable
-            # discovery of the config file
-            root_namespace = str(Path(model_namespace_parts[0]).absolute())
-            if root_namespace not in sys.path:
-                print(f"Adding {str(root_namespace)} to path")
-                sys.path.insert(0, str(root_namespace))
-
-            # Strip the root folder (now in the path) and the class name from the model namespace, leaving the
-            # module name - e.g. "mymodule.configs"
-            model_namespace = ".".join([str(p) for p in model_namespace_parts[1:-1]])  # type: ignore
-
-        custom_spec = importlib.util.find_spec(model_namespace)  # type: ignore
-        if custom_spec is None:
-            raise ValueError(f"Search namespace {model_namespace} was not found.")
-        self.module_search_specs.append(custom_spec)
-
-    @staticmethod
-    def get_default_search_module() -> str:
+    def default_module_spec(self) -> ModuleSpec:
        from health_ml import configs  # type: ignore
-        return configs.__name__
+
+        default_module = configs.__name__
+        return find_spec(default_module)
+
+    def find_module_search_specs(self, model_name: str) -> ModuleSpec:
+        """
+        Given model name (either only the class name or fully qualified), return the ModuleSpec that should be used for
+        loading. If the model name is only the class name, the function will return the result of calling
+        default_module_spec. Otherwise, this will return the module of the (fully qualified) model name.
+        """
+        model_namespace_parts = model_name.split(".")
+        if len(model_namespace_parts) == 1:
+            # config must be in the default path, nothing to be done
+            return self.default_module_spec()
+
+        module_name = ".".join(model_namespace_parts[:-1])
+        logging.debug(f"Getting specification for module {module_name}")
+        try:
+            custom_spec: Optional[ModuleSpec] = find_spec(module_name)
+        except Exception:
+            custom_spec = None
+        if custom_spec is None:
+            raise ValueError(f"Module {module_name} was not found.")
+        return custom_spec
+
+    def _get_model_config(self, module_spec: ModuleSpec, model_name: str) -> Optional[LightningContainer]:
+        """
+        Given a module specification check to see if it has a class property with
+        the <model_name> provided, and instantiate that config class with the
+        provided <config_overrides>. Otherwise, return None.
+
+        :param module_spec:
+        :return: Instantiated model config if it was found.
+        """
+        # noinspection PyBroadException
+        try:
+            logging.debug(f"Importing {module_spec.name}")
+            target_module = importlib.import_module(module_spec.name)
+            # The "if" clause checks that obj is a class, of the desired name, that is
+            # defined in this module rather than being imported into it (and hence potentially
+            # being found twice).
+            _class = next(
+                obj
+                for name, obj in inspect.getmembers(target_module)
+                if inspect.isclass(obj) and name == model_name and inspect.getmodule(obj) == target_module
+            )
+            logging.info(f"Found class {_class} in file {module_spec.origin}")
+        # ignore the exception which will occur if the provided module cannot be loaded
+        # or the loaded module does not have the required class as a member
+        except Exception as e:
+            exception_text = str(e)
+            if exception_text != "":
+                logging.warning(f"Error when trying to import module {module_spec.name}: {exception_text}")
+            return None
+        model_config = _class()
+        return model_config
+
+    def _search_recursively_and_store(self, module_spec: ModuleSpec, model_name: str) -> Dict[str, LightningContainer]:
+        """
+        Given a root namespace eg: A.B.C searches recursively in all child namespaces
+        for class property with the <model_name> provided. If found, this is
+        instantiated with the provided overrides, and added to the configs dictionary.
+
+        :param module_search_spec:
+        """
+        configs: Dict[str, LightningContainer] = {}
+        root_namespace = module_spec.name
+        namespaces_to_search: List[str] = []
+        if module_spec.submodule_search_locations:
+            logging.debug(
+                f"Searching through {len(module_spec.submodule_search_locations)} folders that match namespace "
+                f"{module_spec.name}: {module_spec.submodule_search_locations}"
+            )
+            for root in module_spec.submodule_search_locations:
+                # List all python files in all the dirs under root, except for private dirs (prefixed with .)
+                all_py_files = [x for x in Path(root).rglob("*.py") if ".." not in str(x)]
+                for f in all_py_files:
+                    if f.is_file() and "__pycache__" not in str(f) and f.name != "setup.py":
+                        sub_namespace = path_to_namespace(f, root=root)
+                        namespaces_to_search.append(root_namespace + "." + sub_namespace)
+        elif module_spec.origin:
+            # The module search spec already points to a python file: Search only that.
+            namespaces_to_search.append(module_spec.name)
+        else:
+            raise ValueError(f"Unable to process module spec: {module_spec}")
+
+        for n in namespaces_to_search:  # type: ignore
+            _module_spec = None
+            # noinspection PyBroadException
+            try:
+                _module_spec = find_spec(n)  # type: ignore
+            except Exception:
+                continue
+
+            if _module_spec:
+                config = self._get_model_config(_module_spec, model_name=model_name)
+                if config:
+                    configs[n] = config  # type: ignore
+        return configs

    def create_model_config_from_name(self, model_name: str) -> LightningContainer:
        """
        Returns a model configuration for a model of the given name.
-        To avoid having to import torch here, there are no references to LightningContainer.
-        Searching for a class member called <model_name> in the search modules provided recursively.

-        :param model_name: Fully qualified name of the model for which to get the configs for - i.e.
-            mymodule.configs.MyConfig
+        :param model_name: Class name (for example, "HelloWorld") if the model config is in the default search
+        namespace, or fully qualified name of the model, like mymodule.configs.MyConfig)
        """
        if not model_name:
            raise ValueError("Unable to load a model configuration because the model name is missing.")

-        # get the class name from the fully qualified name
-        model_name = model_name.split(".")[-1]
-
-        configs: Dict[str, LightningContainer] = {}
-
-        def _get_model_config(module_spec: ModuleSpec) -> Optional[LightningContainer]:
-            """
-            Given a module specification check to see if it has a class property with
-            the <model_name> provided, and instantiate that config class with the
-            provided <config_overrides>. Otherwise, return None.
-
-            :param module_spec:
-            :return: Instantiated model config if it was found.
-            """
-            # noinspection PyBroadException
-            try:
-                logging.debug(f"Importing {module_spec.name}")
-                target_module = importlib.import_module(module_spec.name)
-                # The "if" clause checks that obj is a class, of the desired name, that is
-                # defined in this module rather than being imported into it (and hence potentially
-                # being found twice).
-                _class = next(obj for name, obj in inspect.getmembers(target_module)
-                              if inspect.isclass(obj)
-                              and name == model_name  # noqa: W503
-                              and inspect.getmodule(obj) == target_module)  # noqa: W503
-                logging.info(f"Found class {_class} in file {module_spec.origin}")
-            # ignore the exception which will occur if the provided module cannot be loaded
-            # or the loaded module does not have the required class as a member
-            except Exception as e:
-                exception_text = str(e)
-                if exception_text != "":
-                    logging.warning(f"(from attempt to import module {module_spec.name}): {exception_text}")
-                return None
-            model_config = _class()
-            return model_config
-
-        def _search_recursively_and_store(module_search_spec: ModuleSpec) -> None:
-            """
-            Given a root namespace eg: A.B.C searches recursively in all child namespaces
-            for class property with the <model_name> provided. If found, this is
-            instantiated with the provided overrides, and added to the configs dictionary.
-
-            :param module_search_spec:
-            """
-            root_namespace = module_search_spec.name
-            namespaces_to_search: List[str] = []
-            if module_search_spec.submodule_search_locations:
-                logging.debug(f"Searching through {len(module_search_spec.submodule_search_locations)} folders that "
-                              f"match namespace {module_search_spec.name}: "
-                              f"{module_search_spec.submodule_search_locations}")
-                for root in module_search_spec.submodule_search_locations:
-                    # List all python files in all the dirs under root, except for private dirs (prefixed with .)
-                    all_py_files = [x for x in Path(root).rglob("*.py") if ".." not in str(x)]
-                    for f in all_py_files:
-                        if f.is_file() and "__pycache__" not in str(f) and f.name != "setup.py":
-                            sub_namespace = path_to_namespace(f, root=root)
-                            namespaces_to_search.append(root_namespace + "." + sub_namespace)
-            elif module_search_spec.origin:
-                # The module search spec already points to a python file: Search only that.
-                namespaces_to_search.append(module_search_spec.name)
-            else:
-                raise ValueError(f"Unable to process module spec: {module_search_spec}")
-
-            for n in namespaces_to_search:  # type: ignore
-                _module_spec = None
-                # noinspection PyBroadException
-                try:
-                    _module_spec = find_spec(n)  # type: ignore
-                except Exception:
-                    pass
-
-                if _module_spec:
-                    config = _get_model_config(_module_spec)
-                    if config:
-                        configs[n] = config  # type: ignore
-
-        for search_spec in self.module_search_specs:
-            _search_recursively_and_store(search_spec)
+        logging.info(f"Trying to locate model {model_name}")

+        name_parts = model_name.split(".")
+        class_name = name_parts[-1]
+        module_spec = self.find_module_search_specs(model_name)
+        configs = self._search_recursively_and_store(module_spec=module_spec, model_name=class_name)
        if len(configs) == 0:
-            raise ValueError(
-                f"Model name {model_name} was not found in search namespaces: "
-                f"{[s.name for s in self.module_search_specs]}.")
+            raise ValueError(f"Model '{model_name}' was not found in search namespace {module_spec.name}")
        elif len(configs) > 1:
            raise ValueError(
-                f"Multiple instances of model name {model_name} were found in namespaces: {configs.keys()}.")
+                f"Multiple instances of model '{model_name}' were found in namespaces: {[*configs.keys()]}"
+            )
        else:
            return list(configs.values())[0]


-def path_to_namespace(path: Path, root: PathOrString = fixed_paths.repository_root_directory()) -> str:
+def path_to_namespace(path: Path, root: Path) -> str:
    """
-    Given a path (in form R/A/B/C) and an optional root directory R, create a namespace A.B.C.
-    If root is provided, then path must be a relative child to it.
+    Given a path (in form R/A/B/C) and a root directory R, create a namespace string A.B.C.
+    The path must be located under the root directory.

    :param path: Path to convert to namespace
-    :param root: Path prefix to remove from namespace (default is project root)
-    :return:
+    :param root: Path prefix to remove from namespace.
+    :return: A Python namespace string
    """
    return ".".join([Path(x).stem for x in path.relative_to(root).parts])
--- a/hi-ml/src/health_ml/utils/fixed_paths.py
+++ b/hi-ml/src/health_ml/utils/fixed_paths.py
@ -4,61 +4,21 @@
 #  ------------------------------------------------------------------------------------------
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Optional
-
-from health_azure.utils import PathOrString
-
-ENVIRONMENT_YAML_FILE_NAME = "environment.yml"
+from health_azure.paths import git_repo_root_folder, is_himl_used_from_git_repo


-def get_environment_yaml_file() -> Path:
-    """
-    Returns the path where the environment.yml file is located, in the repository root directory.
-    The function throws an exception if the file is not found
-
-    :return: The full path to the environment files.
-    """
-    # The environment file is copied into the package folder in setup.py.
-    root_dir = repository_root_directory()
-    env = root_dir / ENVIRONMENT_YAML_FILE_NAME
-    if not env.exists():
-        raise ValueError(f"File {ENVIRONMENT_YAML_FILE_NAME} was not found not found in in the repository root"
-                         f"{root_dir}.")
-    return env
-
-
-def repository_root_directory(path: Optional[PathOrString] = None) -> Path:
+def repository_root_directory() -> Path:
    """
    Gets the full path to the root directory that holds the present repository.
+    This function should only be called if the repository is available (for example, in unit tests).
+    It will raise a ValueError if the repo is not available.

    :param path: if provided, a relative path to append to the absolute path to the repository root.
    :return: The full path to the repository's root directory, with symlinks resolved if any.
    """
-    root = Path.cwd()
-    if path:
-        full_path = root / path
-        assert full_path.exists(), f"Path {full_path} doesn't exist"
-        return root / path
-    else:
-        return root
-
-
-def himl_root_dir() -> Optional[Path]:
-    """
-    Attempts to return the path to the top-level hi-ml repo that contains the hi-ml and hi-ml-azure packages.
-    This top level repo will only be present if hi-ml has been installed as a git submodule, or the repo has
-    been directly downlaoded. Otherwise (e.g.if hi-ml has been installed as a pip package) returns None
-
-    return: Path to the himl root dir if it exists, else None
-    """
-    health_ml_root = Path(__file__).parent.parent
-    print(f"health ml root: {health_ml_root}")
-    if health_ml_root.parent.stem == "site-packages":
-        return None
-    himl_root = health_ml_root.parent.parent.parent
-    assert (himl_root / "hi-ml").is_dir(), f"no such dir {himl_root / 'hi-ml'}"
-    assert (himl_root / "hi-ml-azure").is_dir(), f"no such dir {himl_root / 'hi-ml-azure'}"
-    return himl_root
+    if is_himl_used_from_git_repo():
+        return git_repo_root_folder()
+    raise ValueError("This function should not be used if hi-ml is used as an installed package.")


@dataclass(frozen=True)
@ -66,6 +26,7 @@ class OutputFolderForTests:
    """
    Data class for the output directories for a given test
    """
+
    root_dir: Path

    def create_file_or_folder_path(self, file_or_folder_name: str) -> Path:
--- a/hi-ml/test_environment.yml
+++ b/hi-ml/test_environment.yml
@ -1,10 +0,0 @@
-name: test-env
-channels:
-  - defaults
-  - pytorch
-dependencies:
-  - pip=20.1.1
-  - python=3.7.3
-  - pytorch=1.8.0
-  - pip:
-      - -r ../test_requirements.txt
--- a/hi-ml/testhiml/conftest.py
+++ b/hi-ml/testhiml/conftest.py
@ -0,0 +1,14 @@
+import logging
+import sys
+from pathlib import Path
+
+
+root = Path(__file__).parent.parent.parent
+for folder in ["hi-ml-azure", "hi-ml"]:
+    full_folder = str(root / folder / "src")
+    if full_folder not in sys.path:
+        print(f"Adding to sys.path for running hi-ml: {full_folder}")
+        sys.path.insert(0, full_folder)
+
+# Matplotlib is very talkative in DEBUG mode
+logging.getLogger('matplotlib').setLevel(logging.INFO)
--- a/hi-ml/testhiml/testhiml/test_config_loader.py
+++ b/hi-ml/testhiml/testhiml/test_config_loader.py
@ -1,139 +1,88 @@
 import shutil
 from pathlib import Path
-from typing import Any

 import pytest

-from health_azure.utils import is_running_on_azure_agent
+from health_ml.configs import hello_world as hello_config
 from health_ml.lightning_container import LightningContainer
 from health_ml.utils.config_loader import ModelConfigLoader, path_to_namespace
-from testhiml.utils.fixed_paths_for_tests import full_test_data_path, tests_root_directory


-@pytest.fixture(scope="module")
-def config_loader() -> ModelConfigLoader:
-    return ModelConfigLoader(**{"model": "HelloContainer"})
+def test_find_module_search_specs() -> None:
+    config_loader = ModelConfigLoader()
+    module_spec = config_loader.find_module_search_specs(model_name="health_ml.utils.config_loader.Foo")
+    assert module_spec.name == "health_ml.utils.config_loader"
+    module_spec = config_loader.find_module_search_specs(model_name="DoesNotExist")
+    assert module_spec.name == "health_ml.configs"


-@pytest.fixture(scope="module")
-def hello_config() -> Any:
-    from health_ml.configs import hello_container  # type: ignore
-    assert Path(hello_container.__file__).exists(), "Can't find hello_container config"
-    return hello_container
+def test_get_default_search_module() -> None:
+    config_loader = ModelConfigLoader()
+    search_module = config_loader.default_module_spec()
+    assert search_module.name == "health_ml.configs"


-def test_find_module_search_specs(config_loader: ModelConfigLoader) -> None:
-    # By default, property module_search_specs includes the default config path - health_ml.configs
-    len_search_specs_before = len(config_loader.module_search_specs)
-    assert any([m.name == "health_ml.configs" for m in config_loader.module_search_specs])
-    config_loader._find_module_search_specs()
-    # nothing should have been added to module_search_specs
-    assert len(config_loader.module_search_specs) == len_search_specs_before
-
-
-def test_find_module_search_specs_outside_default_dir() -> None:
-    if is_running_on_azure_agent():
-        return
-    model_name = "NewConfig"
-
-    dummy_config_dir = Path.cwd() / "test_configs"
-    dummy_config_dir.mkdir()
-    dummy_config_path = dummy_config_dir / "new_config.py"
-    dummy_config = f"""class {model_name}:
-    def __init__(self):
-        pass
-"""
-    dummy_config_path.touch()
-    dummy_config_path.write_text(dummy_config)
-
-    dummy_config_namespace = f"test_configs.new_config.{model_name}"
-    config_loader2 = ModelConfigLoader(**{"model": f"{dummy_config_namespace}"})
-    # The root "testhiml" should now be in the system path and the module "outputs" should be in module_search_specs
-    # this wont be in the previous results, since the default path was used. The default search_spec (health_ml.configs)
-    # should also be in the results for hte new
-    assert any([m.name == "new_config" for m in config_loader2.module_search_specs])
-    assert any([m.name == "health_ml.configs" for m in config_loader2.module_search_specs])
-
-    # If the file doesnt exist but the parent module does, the module will still be appended to module_search_specs
-    # at this stage
-    config_loader3 = ModelConfigLoader(**{"model": "test_configs.new_config.idontexist"})
-    assert any([m.name == "new_config" for m in config_loader3.module_search_specs])
-
-    # If the parent module doesn't exist, an Exception should be raised
-    with pytest.raises(Exception) as e:
-        ModelConfigLoader(**{"model": "testhiml.idontexist.idontexist"})
-    assert "was not found" in str(e)
-
-    shutil.rmtree(dummy_config_dir)
-
-
-def test_get_default_search_module(config_loader: ModelConfigLoader) -> None:
-    search_module = config_loader.get_default_search_module()
-    assert search_module == "health_ml.configs"
-
-
-def test_create_model_config_from_name(config_loader: ModelConfigLoader, hello_config: Any
-                                       ) -> None:
+def test_create_model_config_from_name_errors() -> None:
+    config_loader = ModelConfigLoader()
    # if no model name is given, an exception should be raised
    with pytest.raises(Exception) as e:
        config_loader.create_model_config_from_name("")
-        assert "the model name is missing" in str(e)
+    assert "the model name is missing" in str(e)

    # if no config is found matching the model name, an exception should be raised
    with pytest.raises(Exception) as e:
        config_loader.create_model_config_from_name("idontexist")
-        assert "was not found in search namespaces" in str(e)
+    assert "was not found in search namespace" in str(e)

-    # if > 1 config is found matching the model name, an exception should be raised
-    config_name = "HelloContainer"
-    hello_config_path = Path(hello_config.__file__)
-    duplicate_config_file = hello_config_path.parent / "hello_container_2.py"
-    duplicate_config_file.touch()
-    shutil.copyfile(str(hello_config_path), str(duplicate_config_file))
    with pytest.raises(Exception) as e:
-        config_loader.create_model_config_from_name(config_name)
-        assert "Multiple instances of model name " in str(e)
-    duplicate_config_file.unlink()
+        config_loader.create_model_config_from_name("testhiml.idontexist.idontexist")
+    assert "Module testhiml.idontexist was not found" in str(e)

+
+def test_create_model_config_from_name_duplicates() -> None:
+    config_loader = ModelConfigLoader()
+    config_name = "HelloWorld"
    # if exactly one config is found, expect a LightningContainer to be returned
    container = config_loader.create_model_config_from_name(config_name)
    assert isinstance(container, LightningContainer)
    assert container.model_name == config_name

-
-def test_config_in_dif_location(tmp_path: Path, hello_config: Any) -> None:
-    himl_root = Path(hello_config.__file__).parent.parent
-    model_name = "HelloContainer"
-    new_config_path = himl_root / "hello_container_to_delete.py"
-    new_config_path.touch()
+    # if > 1 config is found matching the model name, an exception should be raised
    hello_config_path = Path(hello_config.__file__)
-    shutil.copyfile(str(hello_config_path), str(new_config_path))
-    config_loader = ModelConfigLoader(model=model_name)
-
-    # Trying to find this config should now cause an exception as it should find it in both "health_ml" and
-    # in "health_ml.configs"
+    # This file must be excluded from coverage reports, check .coveragerc
+    duplicate_config_file = hello_config_path.parent / "temp_config_for_unittests.py"
+    shutil.copyfile(hello_config_path, duplicate_config_file)
    with pytest.raises(Exception) as e:
-        config_loader.create_model_config_from_name(model_name)
-        assert "Multiple instances of model name HelloContainer were found in namespaces: " \
-               "dict_keys(['health_ml.configs.hello_container', 'health_ml.hello_container_to_delete']) " in str(e)
-    new_config_path.unlink()
+        config_loader.create_model_config_from_name(config_name)
+    assert "Multiple instances of model " in str(e)
+    duplicate_config_file.unlink()


-@pytest.mark.parametrize("is_external", [True, False])
-def test_path_to_namespace(is_external: bool) -> None:
+def test_path_to_namespace() -> None:
    """
-    A test to check conversion between namespace to path for InnerEye and external namespaces
+    A test to check conversion between paths and python namespaces.
    """
-    tests_root_dir = tests_root_directory()
-    if is_external:
-        folder_name = "logs"
-        full_folder = tests_root_dir / folder_name
-        assert path_to_namespace(
-            path=full_folder,
-            root=tests_root_dir
-        ) == folder_name
-    else:
-        assert path_to_namespace(
-            path=full_test_data_path(),
-            root=tests_root_dir
-        ) == "test_data"
+    assert path_to_namespace(Path("/foo/bar/baz"), root=Path("/foo")) == "bar.baz"
+
+
+def test_config_fully_qualified() -> None:
+    """
+    Test if we can load model configs when giving a full Python namespace.
+    """
+    # This name was deliberately chosen to be outside the default searchar namespace
+    model_name = "health_ml.utils.config_loader.ModelConfigLoader"
+    config_loader = ModelConfigLoader()
+    model = config_loader.create_model_config_from_name(model_name=model_name)
+    assert type(model).__name__ == "ModelConfigLoader"
+
+
+def test_config_fully_qualified_invalid() -> None:
+    """
+    Test error handling if giving a too long namespace
+    """
+    namespace = "health_ml.utils.config_loader.foo"
+    model_name = namespace + ".Foo"
+    config_loader = ModelConfigLoader()
+    with pytest.raises(ValueError) as ex:
+        config_loader.create_model_config_from_name(model_name=model_name)
+    assert f"Module {namespace} was not found" in str(ex)
--- a/hi-ml/testhiml/testhiml/test_deep_learning_config.py
+++ b/hi-ml/testhiml/testhiml/test_deep_learning_config.py
@ -60,6 +60,7 @@ def test_workflow_params_get_effective_random_seed() -> None:
    assert seed == params.random_seed


+@pytest.mark.fast
 def test_validate_dataset_params() -> None:
    # DatasetParams cannot be initialized with neither of azure_datasets or local_datasets set
    with pytest.raises(ValueError) as ex:
@ -78,11 +79,11 @@ def test_validate_dataset_params() -> None:
    # local datasets and dataset_mountpoints must be Paths
    with pytest.raises(Exception) as e:
        DatasetParams(local_datasets=["foo"])
-    assert "is not an instance of" in str(e)
+    assert "items must be instances of type <class 'pathlib.Path'>" in str(e)

    with pytest.raises(Exception) as e:
        DatasetParams(dataset_mountpoints=["foo"])
-    assert "is not an instance of" in str(e)
+    assert "items must be instances of type <class 'pathlib.Path'>" in str(e)

    # The following should be okay
    DatasetParams(local_datasets=[Path("foo")]).validate()
--- a/hi-ml/testhiml/testhiml/test_model_trainer.py
+++ b/hi-ml/testhiml/testhiml/test_model_trainer.py
@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch, Mock
 from pytorch_lightning import Callback, Trainer
 from pytorch_lightning.callbacks import GradientAccumulationScheduler, ModelCheckpoint, ModelSummary, TQDMProgressBar

-from health_ml.configs.hello_container import HelloContainer  # type: ignore
+from health_ml.configs.hello_world import HelloWorld  # type: ignore
 from health_ml.lightning_container import LightningContainer
 from health_ml.model_trainer import (create_lightning_trainer, write_experiment_summary_file, model_train)
 from health_ml.utils.common_utils import EXPERIMENT_SUMMARY_FILE
@ -65,8 +65,8 @@ def test_create_lightning_trainer_with_callbacks() -> None:
        callbacks = [MyCallback()]
        return {"callbacks": callbacks}

-    model_name = "HelloContainer"
-    model_config_loader = ModelConfigLoader(model=model_name)
+    model_name = "HelloWorld"
+    model_config_loader = ModelConfigLoader()
    container = model_config_loader.create_model_config_from_name(model_name)
    container.monitor_gpu = False
    container.monitor_loading = False
@ -88,7 +88,7 @@ def test_create_lightning_trainer_with_callbacks() -> None:


 def test_model_train() -> None:
-    container = HelloContainer()
+    container = HelloWorld()
    container.create_lightning_module_and_store()

    with patch.object(container, "get_data_module"):
--- a/hi-ml/testhiml/testhiml/test_run_ml.py
+++ b/hi-ml/testhiml/testhiml/test_run_ml.py
@ -5,7 +5,7 @@ import pytest
 from typing import Generator, Tuple
 from unittest.mock import patch

-from health_ml.configs.hello_container import HelloContainer
+from health_ml.configs.hello_world import HelloWorld
 from health_ml.experiment_config import ExperimentConfig
 from health_ml.lightning_container import LightningContainer
 from health_ml.run_ml import MLRunner
@ -34,7 +34,7 @@ def ml_runner() -> Generator:
@pytest.fixture(scope="module")
 def ml_runner_with_container() -> Generator:
    experiment_config = ExperimentConfig(model="HelloContainer")
-    container = HelloContainer()
+    container = HelloWorld()
    runner = MLRunner(experiment_config=experiment_config, container=container)
    runner.setup()
    yield runner
@ -92,10 +92,12 @@ def test_run_inference(ml_runner_with_container: MLRunner, tmp_path: Path) -> No
    """
    Test that run_inference gets called as expected.
    """
-    def _expected_files_exist() -> int:
+    def _expected_files_exist() -> bool:
        output_dir = ml_runner_with_container.container.outputs_folder
-        expected_files = [Path("test_mse.txt"), Path("test_mae.txt")]
-        return sum([p.exists() for p in expected_files] + [output_dir.is_dir()])
+        if not output_dir.is_dir():
+            return False
+        expected_files = ["test_mse.txt", "test_mae.txt"]
+        return all([(output_dir / p).exists() for p in expected_files])

    # create the test data
    import numpy as np
@ -112,7 +114,7 @@ def test_run_inference(ml_runner_with_container: MLRunner, tmp_path: Path) -> No
    assert not expected_ckpt_path.exists()
    # update the container to look for test data at this location
    ml_runner_with_container.container.local_dataset_dir = tmp_path
-    assert _expected_files_exist() == 0
+    assert not _expected_files_exist()

    actual_train_ckpt_path = ml_runner_with_container.checkpoint_handler.get_recovery_or_checkpoint_path_train()
    assert actual_train_ckpt_path is None
@ -123,8 +125,8 @@ def test_run_inference(ml_runner_with_container: MLRunner, tmp_path: Path) -> No
    actual_test_ckpt_path = ml_runner_with_container.checkpoint_handler.get_checkpoints_to_test()
    assert actual_test_ckpt_path == [expected_ckpt_path]
    assert actual_test_ckpt_path[0].exists()
-    # After training, the outputs directory should now exist
-    assert _expected_files_exist() == 3
+    # After training, the outputs directory should now exist and contain the 2 error files
+    assert _expected_files_exist()

    # if no checkpoint handler, no checkpoint paths will be saved and these are required for
    # inference so ValueError will be raised
@ -132,6 +134,3 @@ def test_run_inference(ml_runner_with_container: MLRunner, tmp_path: Path) -> No
        ml_runner_with_container.checkpoint_handler = None  # type: ignore
        ml_runner_with_container.run()
        assert "expects exactly 1 checkpoint for inference, but got 0" in str(e)
-
-    Path("test_mae.txt").unlink()
-    Path("test_mse.txt").unlink()
--- a/hi-ml/testhiml/testhiml/test_runner.py
+++ b/hi-ml/testhiml/testhiml/test_runner.py
@ -8,8 +8,11 @@ from typing import List, Optional
 from unittest.mock import patch, MagicMock

 import pytest
+from azureml.train.hyperdrive import HyperDriveConfig

 from health_azure import AzureRunInfo, DatasetConfig
+from health_ml.configs.hello_world import HelloWorld
+from health_ml.deep_learning_config import WorkflowParams
 from health_ml.lightning_container import LightningContainer
 from health_ml.runner import Runner

@ -21,12 +24,12 @@ def mock_runner(tmp_path: Path) -> Runner:


@pytest.mark.parametrize("model_name, cluster, num_nodes, should_raise_value_error", [
-    ("HelloContainer", "dummyCluster", 1, False),
+    ("HelloWorld", "dummyCluster", 1, False),
    ("", "", None, True),
-    ("HelloContainer", "", None, False),
+    ("HelloWorld", "", None, False),
    ("a", None, 0, True),
    (None, "b", 10, True),
-    ("HelloContainer", "b", 10, False)
+    ("HelloWorld", "b", 10, False)
 ])
 def test_parse_and_load_model(mock_runner: Runner, model_name: Optional[str], cluster: Optional[str],
                              num_nodes: Optional[int], should_raise_value_error: bool) -> None:
@ -63,7 +66,7 @@ def test_parse_and_load_model(mock_runner: Runner, model_name: Optional[str], cl


 def test_run(mock_runner: Runner) -> None:
-    model_name = "HelloContainer"
+    model_name = "HelloWorld"
    arguments = ["", f"--model={model_name}"]
    with patch("health_ml.runner.Runner.run_in_situ") as mock_run_in_situ:
        with patch("health_ml.runner.get_workspace"):
@ -113,3 +116,94 @@ def test_submit_to_azureml_if_needed(mock_get_workspace: MagicMock,
            assert run_info.input_datasets == []
            assert run_info.is_running_in_azure_ml is False
            assert run_info.output_folder is None
+
+
+def test_crossvalidation_flag() -> None:
+    """
+    Checks the basic use of the flags that trigger cross validation
+    :return:
+    """
+    container = HelloWorld()
+    assert not container.is_crossvalidation_enabled
+    container.crossval_count = 2
+    assert container.is_crossvalidation_enabled
+    container.validate()
+    # Validation should fail if the cross validation index is out of bounds
+    container.crossval_index = container.crossval_count
+    with pytest.raises(ValueError):
+        container.validate()
+
+
+def test_crossval_config() -> None:
+    """
+    Check if the flags to trigger Hyperdrive runs work as expected.
+    """
+    mock_tuning_config = "foo"
+    container = HelloWorld()
+    with patch("health_ml.configs.hello_world.HelloWorld.get_parameter_tuning_config",
+               return_value=mock_tuning_config):
+        # Without any flags set, no Hyperdrive config should be returned
+        assert container.get_hyperdrive_config() is None
+        # To trigger a hyperparameter search, the commandline flag for hyperdrive must be present
+        container.hyperdrive = True
+        assert container.get_hyperdrive_config() == mock_tuning_config
+        # Triggering cross validation works by just setting crossval_count
+        container.hyperdrive = False
+        container.crossval_count = 2
+        assert container.is_crossvalidation_enabled
+        crossval_config = container.get_hyperdrive_config()
+        assert isinstance(crossval_config, HyperDriveConfig)
+
+
+def test_crossval_argument_names() -> None:
+    """
+    Cross validation uses hardcoded argument names, check if they match the field names
+    """
+    container = HelloWorld()
+    crossval_count = 8
+    crossval_index = 5
+    container.crossval_count = crossval_count
+    container.crossval_index = crossval_index
+    assert getattr(container, container.CROSSVAL_INDEX_ARG_NAME) == crossval_index
+
+
+def test_submit_to_azure_hyperdrive(mock_runner: Runner) -> None:
+    """
+    Test if the hyperdrive configurations are passed to the submission function.
+    """
+    model_name = "HelloWorld"
+    crossval_count = 2
+    arguments = ["", f"--model={model_name}", "--cluster=foo", "--crossval_count", str(crossval_count)]
+    with patch("health_ml.runner.Runner.run_in_situ") as mock_run_in_situ:
+        with patch("health_ml.runner.get_workspace"):
+            with patch.object(sys, "argv", arguments):
+                with patch("health_ml.runner.submit_to_azure_if_needed") as mock_submit_to_aml:
+                    mock_runner.run()
+        mock_run_in_situ.assert_called_once()
+        mock_submit_to_aml.assert_called_once()
+        # call_args is a tuple of (args, kwargs)
+        call_kwargs = mock_submit_to_aml.call_args[1]
+        # Submission to AzureML should have been turned on because a cluster name was supplied
+        assert mock_runner.experiment_config.azureml
+        assert call_kwargs["submit_to_azureml"]
+        # Check details of the Hyperdrive config
+        hyperdrive_config = call_kwargs["hyperdrive_config"]
+        parameter_space = hyperdrive_config._generator_config["parameter_space"]
+        assert parameter_space[WorkflowParams.CROSSVAL_INDEX_ARG_NAME] == ["choice", [list(range(crossval_count))]]
+
+
+def test_run_hello_world(mock_runner: Runner) -> None:
+    """Test running a model end-to-end via the commandline runner
+    """
+    model_name = "HelloWorld"
+    arguments = ["", f"--model={model_name}"]
+    with patch("health_ml.runner.get_workspace") as mock_get_workspace:
+        with patch.object(sys, "argv", arguments):
+            mock_runner.run()
+        # get_workspace should not be called when using the runner outside AzureML, to not go through the
+        # time-consuming auth
+        mock_get_workspace.assert_not_called()
+        # Summary.txt is written at start, the other files during inference
+        expected_files = ["experiment_summary.txt", "test_mae.txt", "test_mse.txt"]
+        for file in expected_files:
+            assert (mock_runner.lightning_container.outputs_folder / file).is_file(), f"Missing file: {file}"
--- a/pyrightconfig.json
+++ b/pyrightconfig.json
@ -14,16 +14,26 @@
  "reportPrivateImportUsage": false,
  "executionEnvironments": [
    {
-      "root": "hi-ml/src"
+      "root": "hi-ml/src",
+      "extraPaths": [
+        "hi-ml-azure/src"
+      ]
    },
    {
-      "root": "hi-ml/testhiml"
+      "root": "hi-ml/testhiml",
+      "extraPaths": [
+        "hi-ml-azure/src",
+        "hi-ml/src",
+      ]
    },
    {
      "root": "hi-ml-azure/src"
    },
    {
-      "root": "hi-ml-azure/testazure"
+      "root": "hi-ml-azure/testazure",
+      "extraPaths": [
+        "hi-ml-azure/src",
+      ]
    }
  ]
 }
--- a/test_requirements.txt
+++ b/test_requirements.txt
@ -1,30 +1,10 @@
-coverage==5.5
-conda-merge==0.1.5
-flake8==3.8.4
-gitpython==3.1.7
-lightning-bolts==0.4.0
-matplotlib==3.3.0
-monai==0.6.0
-more-itertools==8.10.0
-mypy==0.910
-opencv-python-headless==4.5.1.48
-pandas==1.3.4
-param==1.9.3
-pillow==9.0.0
-pydicom==2.0.0
-pylint==2.9.5
+black==22.1.0
+coverage==6.3.2
+flake8==4.0.1
+mypy==0.931
+pylint==2.12.2
 pycobertura==2.0.1
 pytest==6.2.2
 pytest-cov==2.11.1
 pytest-timeout==2.0.1
-pytorch-lightning==1.5.5
-ruamel.yaml==0.16.12
-rpdb==0.1.6
-scikit-learn==1.0
-seaborn==0.10.1
-simpleitk==1.2.4
-torch==1.10.0
-torchmetrics==0.6.0
-torchvision==0.11.1
-types-requests==2.25.6
-yacs==0.1.8
+scikit-learn  # This is needed to make pyright pass on the docs folder