Ignore local dataset argument when running inside AML runs (#238)

This PR modifies mount_or_download_dataset such that we ignore the `local_dataset` argument inside AML runs (only used for local runs).
This commit is contained in:
melanibe 2020-09-22 17:01:54 +02:00 коммит произвёл GitHub
Родитель e26066c2df
Коммит a112b399fe
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 16 добавлений и 12 удалений

Просмотреть файл

@ -190,8 +190,7 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
"usually set from the class name.") "usually set from the class name.")
random_seed: int = param.Integer(42, doc="The seed to use for all random number generators.") random_seed: int = param.Integer(42, doc="The seed to use for all random number generators.")
azure_dataset_id: Optional[str] = param.String(None, allow_None=True, azure_dataset_id: str = param.String(doc="If provided, the ID of the dataset to use. This dataset must exist as a "
doc="The ID of the dataset to use. This dataset must exist as a "
"folder of the same name in the 'datasets' " "folder of the same name in the 'datasets' "
"container in the datasets storage account.") "container in the datasets storage account.")
local_dataset: Optional[Path] = param.ClassSelector(class_=Path, local_dataset: Optional[Path] = param.ClassSelector(class_=Path,

Просмотреть файл

@ -393,23 +393,27 @@ class MLRunner:
mounted or downloaded. mounted or downloaded.
Returns the path of the dataset on the executing machine. Returns the path of the dataset on the executing machine.
""" """
azure_dataset_id = self.model_config.azure_dataset_id
if is_offline_run_context(RUN_CONTEXT):
# The present run is outside of AzureML: If local_dataset is set, use that as the path to the data.
# Otherwise, download the dataset specified by the azure_dataset_id
local_dataset = self.model_config.local_dataset local_dataset = self.model_config.local_dataset
if (not azure_dataset_id) and (local_dataset is None):
raise ValueError("The model must contain either local_dataset or azure_dataset_id.")
if local_dataset: if local_dataset:
expected_dir = Path(local_dataset) expected_dir = Path(local_dataset)
if not expected_dir.is_dir(): if not expected_dir.is_dir():
raise FileNotFoundError(f"The model uses a dataset in {expected_dir}, but that does not exist.") raise FileNotFoundError(f"The model uses a dataset in {expected_dir}, but that does not exist.")
logging.info(f"Model training will use the local dataset provided in {expected_dir}") logging.info(f"Model training will use the local dataset provided in {expected_dir}")
return expected_dir return expected_dir
azure_dataset_id = self.model_config.azure_dataset_id
if not azure_dataset_id:
raise ValueError("The model must contain either local_dataset or azure_dataset_id.")
if is_offline_run_context(RUN_CONTEXT):
# The present run is outside of AzureML: If local_dataset is set, use that as the path to the data.
# Otherwise, download the dataset specified by the azure_dataset_id
return download_dataset(azure_dataset_id=azure_dataset_id, return download_dataset(azure_dataset_id=azure_dataset_id,
target_folder=self.project_root / fixed_paths.DATASETS_DIR_NAME, target_folder=self.project_root / fixed_paths.DATASETS_DIR_NAME,
azure_config=self.azure_config) azure_config=self.azure_config)
# Inside of AzureML, datasets can be either mounted or downloaded. # Inside of AzureML, datasets can be either mounted or downloaded.
if not azure_dataset_id:
raise ValueError("The model must contain azure_dataset_id for running on AML")
mounted = try_to_mount_input_dataset(RUN_CONTEXT) mounted = try_to_mount_input_dataset(RUN_CONTEXT)
if not mounted: if not mounted:
raise ValueError("Unable to mount or download input dataset.") raise ValueError("Unable to mount or download input dataset.")

Просмотреть файл

@ -61,6 +61,7 @@ class ImageEncoder(ScalarModelBase):
l_rate=1e-1, l_rate=1e-1,
use_mixed_precision=True, use_mixed_precision=True,
aggregation_type=aggregation_type, aggregation_type=aggregation_type,
azure_dataset_id="test-dataset",
**kwargs **kwargs
) )
self.encode_channels_jointly = encode_channels_jointly self.encode_channels_jointly = encode_channels_jointly

Просмотреть файл

@ -2,7 +2,7 @@ name: PR-$(Date:yyyyMMdd)$(Rev:-r)
variables: variables:
model: 'BasicModel2Epochs' model: 'BasicModel2Epochs'
train: 'True' train: 'True'
more_switches: '--log_level=DEBUG' more_switches: '--log_level=DEBUG --local_dataset=F:\local'
run_recovery_id: '' run_recovery_id: ''
tags: 'PR' tags: 'PR'
user_friendly_name: 'PR build' user_friendly_name: 'PR build'