From 5d5e55e314fae9f43cb16846fbd807c663cd89ef Mon Sep 17 00:00:00 2001 From: Clemens Siebler Date: Wed, 3 Feb 2021 10:03:19 +0100 Subject: [PATCH] Documentation updates (#368) Co-authored-by: Javier Co-authored-by: Anton Schwaighofer --- docs/building_models.md | 8 +++++--- docs/setting_up_aml.md | 9 +++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/building_models.md b/docs/building_models.md index 02e960e6..5bd474b4 100755 --- a/docs/building_models.md +++ b/docs/building_models.md @@ -48,7 +48,7 @@ class Prostate(ProstateBase): def __init__(self) -> None: super().__init__( ground_truth_ids=["femur_r", "femur_l", "rectum", "prostate"], - azure_dataset_id="id-of-your-blob-containing-prostate-data") + azure_dataset_id="name-of-your-AML-dataset-with-prostate-data") ``` The allowed parameters and their meanings are defined in [`SegmentationModelBase`](/InnerEye/ML/config.py). The class name must be the same as the basename of the file containing it, so `Prostate.py` must contain `Prostate`. @@ -64,12 +64,14 @@ class HeadAndNeck(HeadAndNeckBase): def __init__(self) -> None: super().__init__( ground_truth_ids=["parotid_l", "parotid_r", "smg_l", "smg_r", "spinal_cord"] - azure_dataset_id="id-of-your-blob-containing-prostate-data") + azure_dataset_id="name-of-your-AML-dataset-with-prostate-data") ``` ### Training a new model -* Set up your model configuration as above. +* Set up your model configuration as above and update `azure_dataset_id` to the name of your Dataset in the AML workspace. +It is enough to put your dataset into blob storage. The dataset should be a contained in a folder at the root of the datasets container. +The InnerEye runner will check if there is a dataset in the AzureML workspace already, and if not, generate it directly from blob storage. * Train a new model, for example `Prostate`: ```shell script diff --git a/docs/setting_up_aml.md b/docs/setting_up_aml.md index 86f57890..316947f1 100644 --- a/docs/setting_up_aml.md +++ b/docs/setting_up_aml.md @@ -64,8 +64,13 @@ You can skip this if you have chosen automatic deployment above. We recommend using [low priority](https://docs.microsoft.com/en-us/azure/batch/batch-low-pri-vms) clusters, since they only cost a fraction of the dedicated VMs. -As a reference, the Prostate model and the Head and Neck model require VMs with 4 GPUs with at least 16GB of memory -per GPU, for example `Standard_ND24s`, `Standard_NC24s_v3` or `Standard_NC24s_v2`. +As a reference: +* The Prostate, HeadAndNeck, and the Lung model require VMs with 4 GPUs with at least 24GB of memory +per GPU, for example `Standard_ND24s` (4 GPUs, 24GB per GPU). +* It is possible to train all of these models on machines with fewer GPUs, or GPUs with less memory. If using GPUs with +less memory, some model parameters will need to be adjusted. As a starting point, we would suggest reducing the `train_batch_size`, +and if that is not sufficient, reducing the `crop_size`, bearing in mind though that the size of the crops has a large +impact on the model's accuracy. You need to ensure that your Azure subscription actually has a quota for accessing GPU machines. To see your quota, find your newly created AzureML workspace in the [Azure portal](http://portal.azure.com), using the search bar at the