Ignore local dataset argument when running inside AML runs (#238)
This PR modifies mount_or_download_dataset such that we ignore the `local_dataset` argument inside AML runs (only used for local runs).
This commit is contained in:
Родитель
e26066c2df
Коммит
a112b399fe
|
@ -190,8 +190,7 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
|
|||
"usually set from the class name.")
|
||||
|
||||
random_seed: int = param.Integer(42, doc="The seed to use for all random number generators.")
|
||||
azure_dataset_id: Optional[str] = param.String(None, allow_None=True,
|
||||
doc="The ID of the dataset to use. This dataset must exist as a "
|
||||
azure_dataset_id: str = param.String(doc="If provided, the ID of the dataset to use. This dataset must exist as a "
|
||||
"folder of the same name in the 'datasets' "
|
||||
"container in the datasets storage account.")
|
||||
local_dataset: Optional[Path] = param.ClassSelector(class_=Path,
|
||||
|
|
|
@ -393,23 +393,27 @@ class MLRunner:
|
|||
mounted or downloaded.
|
||||
Returns the path of the dataset on the executing machine.
|
||||
"""
|
||||
local_dataset = self.model_config.local_dataset
|
||||
if local_dataset:
|
||||
expected_dir = Path(local_dataset)
|
||||
if not expected_dir.is_dir():
|
||||
raise FileNotFoundError(f"The model uses a dataset in {expected_dir}, but that does not exist.")
|
||||
logging.info(f"Model training will use the local dataset provided in {expected_dir}")
|
||||
return expected_dir
|
||||
azure_dataset_id = self.model_config.azure_dataset_id
|
||||
if not azure_dataset_id:
|
||||
raise ValueError("The model must contain either local_dataset or azure_dataset_id.")
|
||||
|
||||
if is_offline_run_context(RUN_CONTEXT):
|
||||
# The present run is outside of AzureML: If local_dataset is set, use that as the path to the data.
|
||||
# Otherwise, download the dataset specified by the azure_dataset_id
|
||||
local_dataset = self.model_config.local_dataset
|
||||
if (not azure_dataset_id) and (local_dataset is None):
|
||||
raise ValueError("The model must contain either local_dataset or azure_dataset_id.")
|
||||
if local_dataset:
|
||||
expected_dir = Path(local_dataset)
|
||||
if not expected_dir.is_dir():
|
||||
raise FileNotFoundError(f"The model uses a dataset in {expected_dir}, but that does not exist.")
|
||||
logging.info(f"Model training will use the local dataset provided in {expected_dir}")
|
||||
return expected_dir
|
||||
return download_dataset(azure_dataset_id=azure_dataset_id,
|
||||
target_folder=self.project_root / fixed_paths.DATASETS_DIR_NAME,
|
||||
azure_config=self.azure_config)
|
||||
|
||||
# Inside of AzureML, datasets can be either mounted or downloaded.
|
||||
if not azure_dataset_id:
|
||||
raise ValueError("The model must contain azure_dataset_id for running on AML")
|
||||
mounted = try_to_mount_input_dataset(RUN_CONTEXT)
|
||||
if not mounted:
|
||||
raise ValueError("Unable to mount or download input dataset.")
|
||||
|
|
|
@ -61,6 +61,7 @@ class ImageEncoder(ScalarModelBase):
|
|||
l_rate=1e-1,
|
||||
use_mixed_precision=True,
|
||||
aggregation_type=aggregation_type,
|
||||
azure_dataset_id="test-dataset",
|
||||
**kwargs
|
||||
)
|
||||
self.encode_channels_jointly = encode_channels_jointly
|
||||
|
|
|
@ -2,7 +2,7 @@ name: PR-$(Date:yyyyMMdd)$(Rev:-r)
|
|||
variables:
|
||||
model: 'BasicModel2Epochs'
|
||||
train: 'True'
|
||||
more_switches: '--log_level=DEBUG'
|
||||
more_switches: '--log_level=DEBUG --local_dataset=F:\local'
|
||||
run_recovery_id: ''
|
||||
tags: 'PR'
|
||||
user_friendly_name: 'PR build'
|
||||
|
|
Загрузка…
Ссылка в новой задаче