Fix for invalid "." dataset location in AzureML (#161)

* Catch "." and raise an exception
This commit is contained in:
Anton Schwaighofer 2021-11-23 15:40:59 +00:00 коммит произвёл GitHub
Родитель c7bdf6ac0a
Коммит 145e7dc9a2
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 46 добавлений и 8 удалений

Просмотреть файл

@ -21,6 +21,7 @@ the section headers (Added/Changed/...) and incrementing the package version.
- ([164](https://github.com/microsoft/hi-ml/pull/164)) Look in more locations for std out from AzureML run.
### Fixed
- ([#161](https://github.com/microsoft/hi-ml/pull/161)) Empty string as target folder for a dataset creates an invalid mounting path for the dataset in AzureML (fixes #160)
### Removed

Просмотреть файл

@ -95,7 +95,7 @@ class DatasetConfig:
Defaults: False (downloading) for datasets that are script inputs, True (mounting) for datasets that are
script outputs.
:param target_folder: The folder into which the dataset should be downloaded or mounted. If left empty, a
random folder on /tmp will be chosen.
random folder on /tmp will be chosen. Do NOT use "." as the target_folder.
:param local_folder: The folder on the local machine at which the dataset is available. This
is used only for runs outside of AzureML. If this is empty then the target_folder will be used to
mount or download the dataset.
@ -109,8 +109,11 @@ class DatasetConfig:
self.datastore = datastore
self.version = version
self.use_mounting = use_mounting
self.target_folder = Path(target_folder) if target_folder is not None else None
self.local_folder = Path(local_folder) if local_folder is not None else None
# If target_folder is "" then convert to None
self.target_folder = Path(target_folder) if target_folder else None
if str(self.target_folder) == ".":
raise ValueError("Can't mount or download a dataset to the current working directory.")
self.local_folder = Path(local_folder) if local_folder else None
def to_input_dataset_local(self, workspace: Optional[Workspace]) -> Tuple[Optional[Path], Optional[MountContext]]:
"""
@ -137,7 +140,6 @@ class DatasetConfig:
azureml_dataset = get_or_create_dataset(workspace=workspace,
dataset_name=self.name,
datastore_name=self.datastore)
target_path = self.target_folder or Path(tempfile.mkdtemp())
use_mounting = self.use_mounting if self.use_mounting is not None else False
if use_mounting:
@ -170,7 +172,9 @@ class DatasetConfig:
dataset_name=self.name,
datastore_name=self.datastore)
named_input = azureml_dataset.as_named_input(_input_dataset_key(index=dataset_index))
path_on_compute = str(self.target_folder) if self.target_folder is not None else None
# If running on windows then self.target_folder may be a WindowsPath, make sure it is
# in posix format for Azure.
path_on_compute = self.target_folder.as_posix() if self.target_folder is not None else None
use_mounting = False if self.use_mounting is None else self.use_mounting
if use_mounting:
status += "mounted at "
@ -202,7 +206,7 @@ class DatasetConfig:
destination=(datastore, self.name + "/"))
# TODO: Can we get tags into here too?
dataset = dataset.register_on_complete(name=self.name)
if self.target_folder is not None:
if self.target_folder:
raise ValueError("Output datasets can't have a target_folder set.")
use_mounting = True if self.use_mounting is None else self.use_mounting
if use_mounting:

Просмотреть файл

@ -5,15 +5,18 @@
"""
Test the data input and output functionality
"""
from pathlib import Path
from unittest import mock
from azureml.exceptions._azureml_exception import UserErrorException
from health_azure.utils import PathOrString
import pytest
from azureml._restclient.exceptions import ServiceException
from azureml.core import Dataset
from azureml.data import FileDataset, OutputFileDatasetConfig
from azureml.data.azure_storage_datastore import AzureBlobDatastore
from azureml.data.dataset_consumption_config import DatasetConsumptionConfig
from azureml._restclient.exceptions import ServiceException
from azureml.exceptions._azureml_exception import UserErrorException
from health_azure.datasets import (DatasetConfig, _input_dataset_key, _output_dataset_key,
_replace_string_datasets, get_datastore, get_or_create_dataset)
from testazure.util import DEFAULT_DATASTORE, DEFAULT_WORKSPACE
@ -81,6 +84,36 @@ def test_dataset_input() -> None:
assert aml_dataset.mode == "mount"
@pytest.mark.parametrize("target_folder", [
"",
None,
])
def test_dataset_input_target_empty(target_folder: PathOrString) -> None:
"""
Leaving the target folder empty should NOT create a path_on_compute that is "."
"""
workspace = DEFAULT_WORKSPACE.workspace
# This dataset must exist in the workspace already, or at least in blob storage.
dataset_config = DatasetConfig(name="hello_world", datastore=DEFAULT_DATASTORE, target_folder=target_folder)
aml_dataset = dataset_config.to_input_dataset(workspace=workspace, dataset_index=1)
assert isinstance(aml_dataset, DatasetConsumptionConfig)
assert aml_dataset.path_on_compute is None
@pytest.mark.parametrize("target_folder", [
".",
Path(),
Path("."),
])
def test_dataset_invalid_target(target_folder: PathOrString) -> None:
"""
Passing in "." as a target_folder shouold raise an exception.
"""
with pytest.raises(ValueError) as ex:
DatasetConfig(name="hello_world", datastore=DEFAULT_DATASTORE, target_folder=target_folder)
assert "current working directory" in str(ex)
def test_dataset_output() -> None:
"""
Test turning a dataset setup object to an actual AML output dataset.