зеркало из https://github.com/microsoft/hi-ml.git
Fix for invalid "." dataset location in AzureML (#161)
* Catch "." and raise an exception
This commit is contained in:
Родитель
c7bdf6ac0a
Коммит
145e7dc9a2
|
@ -21,6 +21,7 @@ the section headers (Added/Changed/...) and incrementing the package version.
|
|||
- ([164](https://github.com/microsoft/hi-ml/pull/164)) Look in more locations for std out from AzureML run.
|
||||
|
||||
### Fixed
|
||||
- ([#161](https://github.com/microsoft/hi-ml/pull/161)) Empty string as target folder for a dataset creates an invalid mounting path for the dataset in AzureML (fixes #160)
|
||||
|
||||
### Removed
|
||||
|
||||
|
|
|
@ -95,7 +95,7 @@ class DatasetConfig:
|
|||
Defaults: False (downloading) for datasets that are script inputs, True (mounting) for datasets that are
|
||||
script outputs.
|
||||
:param target_folder: The folder into which the dataset should be downloaded or mounted. If left empty, a
|
||||
random folder on /tmp will be chosen.
|
||||
random folder on /tmp will be chosen. Do NOT use "." as the target_folder.
|
||||
:param local_folder: The folder on the local machine at which the dataset is available. This
|
||||
is used only for runs outside of AzureML. If this is empty then the target_folder will be used to
|
||||
mount or download the dataset.
|
||||
|
@ -109,8 +109,11 @@ class DatasetConfig:
|
|||
self.datastore = datastore
|
||||
self.version = version
|
||||
self.use_mounting = use_mounting
|
||||
self.target_folder = Path(target_folder) if target_folder is not None else None
|
||||
self.local_folder = Path(local_folder) if local_folder is not None else None
|
||||
# If target_folder is "" then convert to None
|
||||
self.target_folder = Path(target_folder) if target_folder else None
|
||||
if str(self.target_folder) == ".":
|
||||
raise ValueError("Can't mount or download a dataset to the current working directory.")
|
||||
self.local_folder = Path(local_folder) if local_folder else None
|
||||
|
||||
def to_input_dataset_local(self, workspace: Optional[Workspace]) -> Tuple[Optional[Path], Optional[MountContext]]:
|
||||
"""
|
||||
|
@ -137,7 +140,6 @@ class DatasetConfig:
|
|||
azureml_dataset = get_or_create_dataset(workspace=workspace,
|
||||
dataset_name=self.name,
|
||||
datastore_name=self.datastore)
|
||||
|
||||
target_path = self.target_folder or Path(tempfile.mkdtemp())
|
||||
use_mounting = self.use_mounting if self.use_mounting is not None else False
|
||||
if use_mounting:
|
||||
|
@ -170,7 +172,9 @@ class DatasetConfig:
|
|||
dataset_name=self.name,
|
||||
datastore_name=self.datastore)
|
||||
named_input = azureml_dataset.as_named_input(_input_dataset_key(index=dataset_index))
|
||||
path_on_compute = str(self.target_folder) if self.target_folder is not None else None
|
||||
# If running on windows then self.target_folder may be a WindowsPath, make sure it is
|
||||
# in posix format for Azure.
|
||||
path_on_compute = self.target_folder.as_posix() if self.target_folder is not None else None
|
||||
use_mounting = False if self.use_mounting is None else self.use_mounting
|
||||
if use_mounting:
|
||||
status += "mounted at "
|
||||
|
@ -202,7 +206,7 @@ class DatasetConfig:
|
|||
destination=(datastore, self.name + "/"))
|
||||
# TODO: Can we get tags into here too?
|
||||
dataset = dataset.register_on_complete(name=self.name)
|
||||
if self.target_folder is not None:
|
||||
if self.target_folder:
|
||||
raise ValueError("Output datasets can't have a target_folder set.")
|
||||
use_mounting = True if self.use_mounting is None else self.use_mounting
|
||||
if use_mounting:
|
||||
|
|
|
@ -5,15 +5,18 @@
|
|||
"""
|
||||
Test the data input and output functionality
|
||||
"""
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
from azureml.exceptions._azureml_exception import UserErrorException
|
||||
from health_azure.utils import PathOrString
|
||||
|
||||
import pytest
|
||||
from azureml._restclient.exceptions import ServiceException
|
||||
from azureml.core import Dataset
|
||||
from azureml.data import FileDataset, OutputFileDatasetConfig
|
||||
from azureml.data.azure_storage_datastore import AzureBlobDatastore
|
||||
from azureml.data.dataset_consumption_config import DatasetConsumptionConfig
|
||||
from azureml._restclient.exceptions import ServiceException
|
||||
from azureml.exceptions._azureml_exception import UserErrorException
|
||||
|
||||
from health_azure.datasets import (DatasetConfig, _input_dataset_key, _output_dataset_key,
|
||||
_replace_string_datasets, get_datastore, get_or_create_dataset)
|
||||
from testazure.util import DEFAULT_DATASTORE, DEFAULT_WORKSPACE
|
||||
|
@ -81,6 +84,36 @@ def test_dataset_input() -> None:
|
|||
assert aml_dataset.mode == "mount"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("target_folder", [
|
||||
"",
|
||||
None,
|
||||
])
|
||||
def test_dataset_input_target_empty(target_folder: PathOrString) -> None:
|
||||
"""
|
||||
Leaving the target folder empty should NOT create a path_on_compute that is "."
|
||||
"""
|
||||
workspace = DEFAULT_WORKSPACE.workspace
|
||||
# This dataset must exist in the workspace already, or at least in blob storage.
|
||||
dataset_config = DatasetConfig(name="hello_world", datastore=DEFAULT_DATASTORE, target_folder=target_folder)
|
||||
aml_dataset = dataset_config.to_input_dataset(workspace=workspace, dataset_index=1)
|
||||
assert isinstance(aml_dataset, DatasetConsumptionConfig)
|
||||
assert aml_dataset.path_on_compute is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("target_folder", [
|
||||
".",
|
||||
Path(),
|
||||
Path("."),
|
||||
])
|
||||
def test_dataset_invalid_target(target_folder: PathOrString) -> None:
|
||||
"""
|
||||
Passing in "." as a target_folder shouold raise an exception.
|
||||
"""
|
||||
with pytest.raises(ValueError) as ex:
|
||||
DatasetConfig(name="hello_world", datastore=DEFAULT_DATASTORE, target_folder=target_folder)
|
||||
assert "current working directory" in str(ex)
|
||||
|
||||
|
||||
def test_dataset_output() -> None:
|
||||
"""
|
||||
Test turning a dataset setup object to an actual AML output dataset.
|
||||
|
|
Загрузка…
Ссылка в новой задаче