Ignore local dataset argument when running inside AML runs (#238)
This PR modifies mount_or_download_dataset such that we ignore the `local_dataset` argument inside AML runs (only used for local runs).
This commit is contained in:
Родитель
e26066c2df
Коммит
a112b399fe
|
@ -190,8 +190,7 @@ class DeepLearningConfig(GenericConfig, CudaAwareConfig):
|
||||||
"usually set from the class name.")
|
"usually set from the class name.")
|
||||||
|
|
||||||
random_seed: int = param.Integer(42, doc="The seed to use for all random number generators.")
|
random_seed: int = param.Integer(42, doc="The seed to use for all random number generators.")
|
||||||
azure_dataset_id: Optional[str] = param.String(None, allow_None=True,
|
azure_dataset_id: str = param.String(doc="If provided, the ID of the dataset to use. This dataset must exist as a "
|
||||||
doc="The ID of the dataset to use. This dataset must exist as a "
|
|
||||||
"folder of the same name in the 'datasets' "
|
"folder of the same name in the 'datasets' "
|
||||||
"container in the datasets storage account.")
|
"container in the datasets storage account.")
|
||||||
local_dataset: Optional[Path] = param.ClassSelector(class_=Path,
|
local_dataset: Optional[Path] = param.ClassSelector(class_=Path,
|
||||||
|
|
|
@ -393,23 +393,27 @@ class MLRunner:
|
||||||
mounted or downloaded.
|
mounted or downloaded.
|
||||||
Returns the path of the dataset on the executing machine.
|
Returns the path of the dataset on the executing machine.
|
||||||
"""
|
"""
|
||||||
local_dataset = self.model_config.local_dataset
|
|
||||||
if local_dataset:
|
|
||||||
expected_dir = Path(local_dataset)
|
|
||||||
if not expected_dir.is_dir():
|
|
||||||
raise FileNotFoundError(f"The model uses a dataset in {expected_dir}, but that does not exist.")
|
|
||||||
logging.info(f"Model training will use the local dataset provided in {expected_dir}")
|
|
||||||
return expected_dir
|
|
||||||
azure_dataset_id = self.model_config.azure_dataset_id
|
azure_dataset_id = self.model_config.azure_dataset_id
|
||||||
if not azure_dataset_id:
|
|
||||||
raise ValueError("The model must contain either local_dataset or azure_dataset_id.")
|
|
||||||
if is_offline_run_context(RUN_CONTEXT):
|
if is_offline_run_context(RUN_CONTEXT):
|
||||||
# The present run is outside of AzureML: If local_dataset is set, use that as the path to the data.
|
# The present run is outside of AzureML: If local_dataset is set, use that as the path to the data.
|
||||||
# Otherwise, download the dataset specified by the azure_dataset_id
|
# Otherwise, download the dataset specified by the azure_dataset_id
|
||||||
|
local_dataset = self.model_config.local_dataset
|
||||||
|
if (not azure_dataset_id) and (local_dataset is None):
|
||||||
|
raise ValueError("The model must contain either local_dataset or azure_dataset_id.")
|
||||||
|
if local_dataset:
|
||||||
|
expected_dir = Path(local_dataset)
|
||||||
|
if not expected_dir.is_dir():
|
||||||
|
raise FileNotFoundError(f"The model uses a dataset in {expected_dir}, but that does not exist.")
|
||||||
|
logging.info(f"Model training will use the local dataset provided in {expected_dir}")
|
||||||
|
return expected_dir
|
||||||
return download_dataset(azure_dataset_id=azure_dataset_id,
|
return download_dataset(azure_dataset_id=azure_dataset_id,
|
||||||
target_folder=self.project_root / fixed_paths.DATASETS_DIR_NAME,
|
target_folder=self.project_root / fixed_paths.DATASETS_DIR_NAME,
|
||||||
azure_config=self.azure_config)
|
azure_config=self.azure_config)
|
||||||
|
|
||||||
# Inside of AzureML, datasets can be either mounted or downloaded.
|
# Inside of AzureML, datasets can be either mounted or downloaded.
|
||||||
|
if not azure_dataset_id:
|
||||||
|
raise ValueError("The model must contain azure_dataset_id for running on AML")
|
||||||
mounted = try_to_mount_input_dataset(RUN_CONTEXT)
|
mounted = try_to_mount_input_dataset(RUN_CONTEXT)
|
||||||
if not mounted:
|
if not mounted:
|
||||||
raise ValueError("Unable to mount or download input dataset.")
|
raise ValueError("Unable to mount or download input dataset.")
|
||||||
|
|
|
@ -61,6 +61,7 @@ class ImageEncoder(ScalarModelBase):
|
||||||
l_rate=1e-1,
|
l_rate=1e-1,
|
||||||
use_mixed_precision=True,
|
use_mixed_precision=True,
|
||||||
aggregation_type=aggregation_type,
|
aggregation_type=aggregation_type,
|
||||||
|
azure_dataset_id="test-dataset",
|
||||||
**kwargs
|
**kwargs
|
||||||
)
|
)
|
||||||
self.encode_channels_jointly = encode_channels_jointly
|
self.encode_channels_jointly = encode_channels_jointly
|
||||||
|
|
|
@ -2,7 +2,7 @@ name: PR-$(Date:yyyyMMdd)$(Rev:-r)
|
||||||
variables:
|
variables:
|
||||||
model: 'BasicModel2Epochs'
|
model: 'BasicModel2Epochs'
|
||||||
train: 'True'
|
train: 'True'
|
||||||
more_switches: '--log_level=DEBUG'
|
more_switches: '--log_level=DEBUG --local_dataset=F:\local'
|
||||||
run_recovery_id: ''
|
run_recovery_id: ''
|
||||||
tags: 'PR'
|
tags: 'PR'
|
||||||
user_friendly_name: 'PR build'
|
user_friendly_name: 'PR build'
|
||||||
|
|
Загрузка…
Ссылка в новой задаче