diff --git a/.idea/runConfigurations/Template__Run_ML_on_local_machine.xml b/.idea/runConfigurations/Template__Run_ML_on_local_machine.xml
index da3d151c..f940a0ea 100644
--- a/.idea/runConfigurations/Template__Run_ML_on_local_machine.xml
+++ b/.idea/runConfigurations/Template__Run_ML_on_local_machine.xml
@@ -12,7 +12,7 @@
-
+
diff --git a/InnerEye/ML/configs/segmentation/HelloWorld.py b/InnerEye/ML/configs/segmentation/HelloWorld.py
new file mode 100644
index 00000000..1c771f4e
--- /dev/null
+++ b/InnerEye/ML/configs/segmentation/HelloWorld.py
@@ -0,0 +1,119 @@
+# ------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+# ------------------------------------------------------------------------------------------
+from random import Random
+from typing import Any
+
+from azureml.train.estimator import Estimator
+from azureml.train.hyperdrive import BanditPolicy, HyperDriveConfig, PrimaryMetricGoal, RandomParameterSampling, uniform
+from networkx.tests.test_convert_pandas import pd
+
+from InnerEye.ML.common import TrackedMetrics
+from InnerEye.ML.config import PhotometricNormalizationMethod, SegmentationModelBase, equally_weighted_classes
+from InnerEye.ML.utils.model_metadata_util import generate_random_colours_list
+from InnerEye.ML.utils.split_dataset import DatasetSplits
+from Tests.fixed_paths_for_tests import full_ml_test_data_path
+
+
+class HelloWorld(SegmentationModelBase):
+ """
+ This is a very basic model that is pre-configured to train on the CPU for 2 epochs on a dummy dataset
+ ../Tests/ML/test_data/dataset.csv
+
+ The aim of this config is to demonstrate how to:
+ 1) Subclass SegmentationModelBase which is the base config for all segmentation model configs
+ 2) Configure the UNet3D implemented in this package
+ 3) Configure Azure HyperDrive based parameter search
+
+ - This model can be trained from the commandline: ../InnerEye/runner.py --model=HelloWorld
+ - If you have set up AzureML then parameter search can be performed for this model by running:
+ ../InnerEye/runner.py --model=HelloWorld --hyperdrive=True
+
+ In this example, the model is trained on 2 input image channels channel1 and channel2, and
+ predicts 2 foreground classes region, region_1.
+ """
+
+ def __init__(self, **kwargs: Any) -> None:
+ fg_classes = ["region", "region_1"]
+ super().__init__(
+ # Data definition - in this section we define where to load the dataset from
+ local_dataset=full_ml_test_data_path(),
+
+ # Model definition - in this section we define what model to use and some related configurations
+ architecture="UNet3D",
+ feature_channels=[4],
+ crop_size=(64, 64, 64),
+ image_channels=["channel1", "channel2"],
+ ground_truth_ids=fg_classes,
+ class_weights=equally_weighted_classes(fg_classes, background_weight=0.02),
+ mask_id="mask",
+
+ # Model training and testing - in this section we define configurations pertaining to the model
+ # training loop (ie: batch size, how many epochs to train, number of epochs to save)
+ # and testing (ie: how many epochs to test)
+ use_gpu=False,
+ num_dataload_workers=0,
+ train_batch_size=2,
+ start_epoch=0,
+ num_epochs=2,
+ save_start_epoch=1,
+ save_step_epochs=1,
+ test_start_epoch=2,
+ test_diff_epochs=1,
+ test_step_epochs=1,
+ use_mixed_precision=True,
+
+ # Pre-processing - in this section we define how to normalize our inputs, in this case we are doing
+ # CT Level and Window based normalization.
+ norm_method=PhotometricNormalizationMethod.CtWindow,
+ level=50,
+ window=200,
+
+ # Post-processing - in this section we define our post processing configurations, in this case
+ # we are filling holes in the generated segmentation masks for all of the foreground classes.
+ fill_holes=[True] * len(fg_classes),
+
+ # Output - in this section we define settings that determine how our output looks like in this case
+ # we define the structure names and colours to use.
+ ground_truth_ids_display_names=fg_classes,
+ colours=generate_random_colours_list(Random(5), len(fg_classes)),
+ )
+ self.add_and_validate(kwargs)
+
+ def get_model_train_test_dataset_splits(self, dataset_df: pd.DataFrame) -> DatasetSplits:
+ return DatasetSplits.from_subject_ids(
+ df=dataset_df,
+ train_ids=[1, 2, 3],
+ val_ids=[4, 5],
+ test_ids=[6],
+ )
+
+ def get_parameter_search_hyperdrive_config(self, estimator: Estimator) -> HyperDriveConfig:
+ """
+ Specify an Azure HyperDrive configuration.
+ Further details are described in the tutorial
+ https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters
+ A reference is provided at https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train
+ .hyperdrive?view=azure-ml-py
+ :param estimator: The estimator (configured PyTorch environment) of the experiment.
+ :return: An Azure HyperDrive run configuration (configured PyTorch environment).
+ """
+ parameter_space = {
+ 'l_rate': uniform(0.0005, 0.01)
+ }
+
+ param_sampling = RandomParameterSampling(parameter_space)
+
+ # early terminate poorly performing runs
+ early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)
+
+ return HyperDriveConfig(
+ estimator=estimator,
+ hyperparameter_sampling=param_sampling,
+ policy=early_termination_policy,
+ primary_metric_name=TrackedMetrics.Val_Loss.value,
+ primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
+ max_total_runs=10,
+ max_concurrent_runs=2
+ )
diff --git a/InnerEye/ML/scalar_config.py b/InnerEye/ML/scalar_config.py
index 971df767..bb633944 100644
--- a/InnerEye/ML/scalar_config.py
+++ b/InnerEye/ML/scalar_config.py
@@ -490,7 +490,7 @@ class ScalarModelBase(ModelConfigBase):
self.number_of_cross_validation_splits_per_fold))),
})
else:
- super().get_cross_validation_hyperdrive_sampler()
+ return super().get_cross_validation_hyperdrive_sampler()
def should_wait_for_other_cross_val_child_runs(self) -> bool:
"""
diff --git a/README.md b/README.md
index 6ddbdd89..1022c38b 100644
--- a/README.md
+++ b/README.md
@@ -59,8 +59,13 @@ After that, you need to set up your Python environment:
`conda env create --file environment.yml`
- Activate the environment by running `conda activate InnerEye`
+Now try to run the Hello World segmentation model, by running
+`python InnerEye/ML/runner.py --model=HelloWorld`
+If that works: Congratulations! You have successfully built your first model using the InnerEye toolbox
+
Detailed instructions, including setup in Azure, are here:
1. [Setting up your environment](docs/environment.md)
+1. [Training a Hello World segmentation model](docs/hello_world_model.md)
1. [Setting up Azure Machine Learning](docs/setting_up_aml.md)
1. [Creating a dataset](docs/creating_dataset.md)
1. [Building models in Azure ML](docs/building_models.md)
diff --git a/Tests/Common/test_build_config.py b/Tests/Common/test_build_config.py
index d6aff607..67ee7f11 100644
--- a/Tests/Common/test_build_config.py
+++ b/Tests/Common/test_build_config.py
@@ -7,7 +7,7 @@ from typing import Any
import pytest
from azureml.train.estimator import Estimator
-from azureml.train.hyperdrive import BanditPolicy, HyperDriveConfig, PrimaryMetricGoal, RandomParameterSampling, \
+from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal, RandomParameterSampling, \
choice, \
uniform
@@ -117,6 +117,22 @@ def test_dataset_reader_workers() -> None:
assert config.num_dataset_reader_workers == 0
+@pytest.mark.parametrize("number_of_cross_validation_splits_per_fold", [0, 2])
+def test_get_total_number_of_cross_validation_runs(number_of_cross_validation_splits_per_fold: int) -> None:
+ config = ScalarModelBase(should_validate=False)
+ config.number_of_cross_validation_splits = 2
+ config.number_of_cross_validation_splits_per_fold = number_of_cross_validation_splits_per_fold
+ assert config.perform_cross_validation
+
+ if number_of_cross_validation_splits_per_fold > 0:
+ assert config.perform_sub_fold_cross_validation
+ assert config.get_total_number_of_cross_validation_runs() \
+ == config.number_of_cross_validation_splits * number_of_cross_validation_splits_per_fold
+ else:
+ assert not config.perform_sub_fold_cross_validation
+ assert config.get_total_number_of_cross_validation_runs() == config.number_of_cross_validation_splits
+
+
@pytest.mark.parametrize("number_of_cross_validation_splits", [0, 2])
@pytest.mark.parametrize("number_of_cross_validation_splits_per_fold", [0, 2])
def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
@@ -147,8 +163,14 @@ def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
assert hd_config.estimator.source_directory == source_config.root_folder
assert hd_config.estimator.run_config.script == source_config.entry_script
assert hd_config.estimator._script_params == source_config.script_params
- assert hd_config._max_total_runs == config.get_total_number_of_cross_validation_runs() \
- if config.perform_cross_validation else HYPERDRIVE_TOTAL_RUNS
+
+ if number_of_cross_validation_splits > 0 and number_of_cross_validation_splits_per_fold > 0:
+ assert hd_config._max_total_runs == number_of_cross_validation_splits * \
+ number_of_cross_validation_splits_per_fold
+ elif number_of_cross_validation_splits > 0:
+ assert hd_config._max_total_runs == number_of_cross_validation_splits
+ else:
+ assert hd_config._max_total_runs == HYPERDRIVE_TOTAL_RUNS
if config.perform_cross_validation:
# check sampler is as expected
@@ -169,22 +191,12 @@ def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
def _create_dummy_hyperdrive_param_search_config(estimator: Estimator) -> HyperDriveConfig:
- parameter_space = {
- 'l_rate': uniform(0.0005, 0.01)
- }
-
- param_sampling = RandomParameterSampling(parameter_space)
-
- # early terminate poorly performing runs
- early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)
-
- config = HyperDriveConfig(estimator=estimator,
- hyperparameter_sampling=param_sampling,
- policy=early_termination_policy,
- primary_metric_name=TrackedMetrics.Val_Loss.value,
- primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
- max_total_runs=HYPERDRIVE_TOTAL_RUNS,
- max_concurrent_runs=8
- )
-
- return config
+ return HyperDriveConfig(
+ estimator=estimator,
+ hyperparameter_sampling=RandomParameterSampling({
+ 'l_rate': uniform(0.0005, 0.01)
+ }),
+ primary_metric_name=TrackedMetrics.Val_Loss.value,
+ primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
+ max_total_runs=HYPERDRIVE_TOTAL_RUNS
+ )
diff --git a/docs/hello_world_model.md b/docs/hello_world_model.md
new file mode 100644
index 00000000..5b238a82
--- /dev/null
+++ b/docs/hello_world_model.md
@@ -0,0 +1,12 @@
+# Training a Hello World segmentation model
+
+In the configs folder, you will find a config file called [HelloWorld.py](../InnerEye/ML/configs/segmentation/HelloWorld.py)
+We have created this file to demonstrate how to:
+
+1. Subclass SegmentationModelBase which is the base config for all segmentation model configs
+1. Configure the UNet3D implemented in this package
+1. Configure Azure HyperDrive based parameter search
+
+- This model can be trained from the commandline: ../InnerEye/runner.py --model=HelloWorld
+- If you have set up AzureML then parameter search can be performed for this model by running:
+../InnerEye/runner.py --model=HelloWorld --hyperdrive=True