Add a Hello World segmentation model + fix HyperDrive config (#234)
Add a Hello World segmentation model Fixed cross validation hyperdrive bug + add test coverage
This commit is contained in:
Родитель
8b0b47941a
Коммит
e26066c2df
|
@ -12,7 +12,7 @@
|
|||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="InnerEye/ML/runner.py" />
|
||||
<option name="PARAMETERS" value="--model=GlaucomaPublic" />
|
||||
<option name="PARAMETERS" value="--model=HelloWorld" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
|
|
|
@ -0,0 +1,119 @@
|
|||
# ------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
|
||||
# ------------------------------------------------------------------------------------------
|
||||
from random import Random
|
||||
from typing import Any
|
||||
|
||||
from azureml.train.estimator import Estimator
|
||||
from azureml.train.hyperdrive import BanditPolicy, HyperDriveConfig, PrimaryMetricGoal, RandomParameterSampling, uniform
|
||||
from networkx.tests.test_convert_pandas import pd
|
||||
|
||||
from InnerEye.ML.common import TrackedMetrics
|
||||
from InnerEye.ML.config import PhotometricNormalizationMethod, SegmentationModelBase, equally_weighted_classes
|
||||
from InnerEye.ML.utils.model_metadata_util import generate_random_colours_list
|
||||
from InnerEye.ML.utils.split_dataset import DatasetSplits
|
||||
from Tests.fixed_paths_for_tests import full_ml_test_data_path
|
||||
|
||||
|
||||
class HelloWorld(SegmentationModelBase):
|
||||
"""
|
||||
This is a very basic model that is pre-configured to train on the CPU for 2 epochs on a dummy dataset
|
||||
../Tests/ML/test_data/dataset.csv
|
||||
|
||||
The aim of this config is to demonstrate how to:
|
||||
1) Subclass SegmentationModelBase which is the base config for all segmentation model configs
|
||||
2) Configure the UNet3D implemented in this package
|
||||
3) Configure Azure HyperDrive based parameter search
|
||||
|
||||
- This model can be trained from the commandline: ../InnerEye/runner.py --model=HelloWorld
|
||||
- If you have set up AzureML then parameter search can be performed for this model by running:
|
||||
../InnerEye/runner.py --model=HelloWorld --hyperdrive=True
|
||||
|
||||
In this example, the model is trained on 2 input image channels channel1 and channel2, and
|
||||
predicts 2 foreground classes region, region_1.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
fg_classes = ["region", "region_1"]
|
||||
super().__init__(
|
||||
# Data definition - in this section we define where to load the dataset from
|
||||
local_dataset=full_ml_test_data_path(),
|
||||
|
||||
# Model definition - in this section we define what model to use and some related configurations
|
||||
architecture="UNet3D",
|
||||
feature_channels=[4],
|
||||
crop_size=(64, 64, 64),
|
||||
image_channels=["channel1", "channel2"],
|
||||
ground_truth_ids=fg_classes,
|
||||
class_weights=equally_weighted_classes(fg_classes, background_weight=0.02),
|
||||
mask_id="mask",
|
||||
|
||||
# Model training and testing - in this section we define configurations pertaining to the model
|
||||
# training loop (ie: batch size, how many epochs to train, number of epochs to save)
|
||||
# and testing (ie: how many epochs to test)
|
||||
use_gpu=False,
|
||||
num_dataload_workers=0,
|
||||
train_batch_size=2,
|
||||
start_epoch=0,
|
||||
num_epochs=2,
|
||||
save_start_epoch=1,
|
||||
save_step_epochs=1,
|
||||
test_start_epoch=2,
|
||||
test_diff_epochs=1,
|
||||
test_step_epochs=1,
|
||||
use_mixed_precision=True,
|
||||
|
||||
# Pre-processing - in this section we define how to normalize our inputs, in this case we are doing
|
||||
# CT Level and Window based normalization.
|
||||
norm_method=PhotometricNormalizationMethod.CtWindow,
|
||||
level=50,
|
||||
window=200,
|
||||
|
||||
# Post-processing - in this section we define our post processing configurations, in this case
|
||||
# we are filling holes in the generated segmentation masks for all of the foreground classes.
|
||||
fill_holes=[True] * len(fg_classes),
|
||||
|
||||
# Output - in this section we define settings that determine how our output looks like in this case
|
||||
# we define the structure names and colours to use.
|
||||
ground_truth_ids_display_names=fg_classes,
|
||||
colours=generate_random_colours_list(Random(5), len(fg_classes)),
|
||||
)
|
||||
self.add_and_validate(kwargs)
|
||||
|
||||
def get_model_train_test_dataset_splits(self, dataset_df: pd.DataFrame) -> DatasetSplits:
|
||||
return DatasetSplits.from_subject_ids(
|
||||
df=dataset_df,
|
||||
train_ids=[1, 2, 3],
|
||||
val_ids=[4, 5],
|
||||
test_ids=[6],
|
||||
)
|
||||
|
||||
def get_parameter_search_hyperdrive_config(self, estimator: Estimator) -> HyperDriveConfig:
|
||||
"""
|
||||
Specify an Azure HyperDrive configuration.
|
||||
Further details are described in the tutorial
|
||||
https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters
|
||||
A reference is provided at https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train
|
||||
.hyperdrive?view=azure-ml-py
|
||||
:param estimator: The estimator (configured PyTorch environment) of the experiment.
|
||||
:return: An Azure HyperDrive run configuration (configured PyTorch environment).
|
||||
"""
|
||||
parameter_space = {
|
||||
'l_rate': uniform(0.0005, 0.01)
|
||||
}
|
||||
|
||||
param_sampling = RandomParameterSampling(parameter_space)
|
||||
|
||||
# early terminate poorly performing runs
|
||||
early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)
|
||||
|
||||
return HyperDriveConfig(
|
||||
estimator=estimator,
|
||||
hyperparameter_sampling=param_sampling,
|
||||
policy=early_termination_policy,
|
||||
primary_metric_name=TrackedMetrics.Val_Loss.value,
|
||||
primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
|
||||
max_total_runs=10,
|
||||
max_concurrent_runs=2
|
||||
)
|
|
@ -490,7 +490,7 @@ class ScalarModelBase(ModelConfigBase):
|
|||
self.number_of_cross_validation_splits_per_fold))),
|
||||
})
|
||||
else:
|
||||
super().get_cross_validation_hyperdrive_sampler()
|
||||
return super().get_cross_validation_hyperdrive_sampler()
|
||||
|
||||
def should_wait_for_other_cross_val_child_runs(self) -> bool:
|
||||
"""
|
||||
|
|
|
@ -59,8 +59,13 @@ After that, you need to set up your Python environment:
|
|||
`conda env create --file environment.yml`
|
||||
- Activate the environment by running `conda activate InnerEye`
|
||||
|
||||
Now try to run the Hello World segmentation model, by running
|
||||
`python InnerEye/ML/runner.py --model=HelloWorld`
|
||||
If that works: Congratulations! You have successfully built your first model using the InnerEye toolbox
|
||||
|
||||
Detailed instructions, including setup in Azure, are here:
|
||||
1. [Setting up your environment](docs/environment.md)
|
||||
1. [Training a Hello World segmentation model](docs/hello_world_model.md)
|
||||
1. [Setting up Azure Machine Learning](docs/setting_up_aml.md)
|
||||
1. [Creating a dataset](docs/creating_dataset.md)
|
||||
1. [Building models in Azure ML](docs/building_models.md)
|
||||
|
|
|
@ -7,7 +7,7 @@ from typing import Any
|
|||
|
||||
import pytest
|
||||
from azureml.train.estimator import Estimator
|
||||
from azureml.train.hyperdrive import BanditPolicy, HyperDriveConfig, PrimaryMetricGoal, RandomParameterSampling, \
|
||||
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal, RandomParameterSampling, \
|
||||
choice, \
|
||||
uniform
|
||||
|
||||
|
@ -117,6 +117,22 @@ def test_dataset_reader_workers() -> None:
|
|||
assert config.num_dataset_reader_workers == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("number_of_cross_validation_splits_per_fold", [0, 2])
|
||||
def test_get_total_number_of_cross_validation_runs(number_of_cross_validation_splits_per_fold: int) -> None:
|
||||
config = ScalarModelBase(should_validate=False)
|
||||
config.number_of_cross_validation_splits = 2
|
||||
config.number_of_cross_validation_splits_per_fold = number_of_cross_validation_splits_per_fold
|
||||
assert config.perform_cross_validation
|
||||
|
||||
if number_of_cross_validation_splits_per_fold > 0:
|
||||
assert config.perform_sub_fold_cross_validation
|
||||
assert config.get_total_number_of_cross_validation_runs() \
|
||||
== config.number_of_cross_validation_splits * number_of_cross_validation_splits_per_fold
|
||||
else:
|
||||
assert not config.perform_sub_fold_cross_validation
|
||||
assert config.get_total_number_of_cross_validation_runs() == config.number_of_cross_validation_splits
|
||||
|
||||
|
||||
@pytest.mark.parametrize("number_of_cross_validation_splits", [0, 2])
|
||||
@pytest.mark.parametrize("number_of_cross_validation_splits_per_fold", [0, 2])
|
||||
def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
|
||||
|
@ -147,8 +163,14 @@ def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
|
|||
assert hd_config.estimator.source_directory == source_config.root_folder
|
||||
assert hd_config.estimator.run_config.script == source_config.entry_script
|
||||
assert hd_config.estimator._script_params == source_config.script_params
|
||||
assert hd_config._max_total_runs == config.get_total_number_of_cross_validation_runs() \
|
||||
if config.perform_cross_validation else HYPERDRIVE_TOTAL_RUNS
|
||||
|
||||
if number_of_cross_validation_splits > 0 and number_of_cross_validation_splits_per_fold > 0:
|
||||
assert hd_config._max_total_runs == number_of_cross_validation_splits * \
|
||||
number_of_cross_validation_splits_per_fold
|
||||
elif number_of_cross_validation_splits > 0:
|
||||
assert hd_config._max_total_runs == number_of_cross_validation_splits
|
||||
else:
|
||||
assert hd_config._max_total_runs == HYPERDRIVE_TOTAL_RUNS
|
||||
|
||||
if config.perform_cross_validation:
|
||||
# check sampler is as expected
|
||||
|
@ -169,22 +191,12 @@ def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
|
|||
|
||||
|
||||
def _create_dummy_hyperdrive_param_search_config(estimator: Estimator) -> HyperDriveConfig:
|
||||
parameter_space = {
|
||||
return HyperDriveConfig(
|
||||
estimator=estimator,
|
||||
hyperparameter_sampling=RandomParameterSampling({
|
||||
'l_rate': uniform(0.0005, 0.01)
|
||||
}
|
||||
|
||||
param_sampling = RandomParameterSampling(parameter_space)
|
||||
|
||||
# early terminate poorly performing runs
|
||||
early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)
|
||||
|
||||
config = HyperDriveConfig(estimator=estimator,
|
||||
hyperparameter_sampling=param_sampling,
|
||||
policy=early_termination_policy,
|
||||
}),
|
||||
primary_metric_name=TrackedMetrics.Val_Loss.value,
|
||||
primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
|
||||
max_total_runs=HYPERDRIVE_TOTAL_RUNS,
|
||||
max_concurrent_runs=8
|
||||
max_total_runs=HYPERDRIVE_TOTAL_RUNS
|
||||
)
|
||||
|
||||
return config
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
# Training a Hello World segmentation model
|
||||
|
||||
In the configs folder, you will find a config file called [HelloWorld.py](../InnerEye/ML/configs/segmentation/HelloWorld.py)
|
||||
We have created this file to demonstrate how to:
|
||||
|
||||
1. Subclass SegmentationModelBase which is the base config for all segmentation model configs
|
||||
1. Configure the UNet3D implemented in this package
|
||||
1. Configure Azure HyperDrive based parameter search
|
||||
|
||||
- This model can be trained from the commandline: ../InnerEye/runner.py --model=HelloWorld
|
||||
- If you have set up AzureML then parameter search can be performed for this model by running:
|
||||
../InnerEye/runner.py --model=HelloWorld --hyperdrive=True
|
Загрузка…
Ссылка в новой задаче