зеркало из https://github.com/microsoft/hi-ml.git
BUG: Prevent creating v2 data asset without specifying datastore (#854)
Closes #786 It should now be impossible to create data new data assets without also specifying a datastore by its specific name, in both v1 and v2 pathways. Default datastore interactions in all other code paths have been left unchanged.
This commit is contained in:
Родитель
2613b2eaeb
Коммит
2a11d5f665
|
@ -128,6 +128,11 @@ def _get_or_create_v1_dataset(datastore_name: str, dataset_name: str, workspace:
|
|||
try:
|
||||
azureml_dataset = _retrieve_v1_dataset(dataset_name, workspace)
|
||||
except UserErrorException:
|
||||
if datastore_name == "":
|
||||
raise ValueError(
|
||||
"When creating a new dataset, a datastore name must be provided. Please specify a datastore name using "
|
||||
"the --datastore flag"
|
||||
)
|
||||
azureml_dataset = _create_v1_dataset(datastore_name, dataset_name, workspace)
|
||||
return azureml_dataset
|
||||
|
||||
|
@ -193,15 +198,17 @@ def _create_v2_data_asset(
|
|||
:param datastore_name: The name of the datastore in which to create or update the Data asset.
|
||||
:param data_asset_name: The name of the data asset to be created.
|
||||
:param version: The version of the data asset to be created.
|
||||
:raises ValueError: If no data asset name is provided to define the new asset.
|
||||
:raises ValueError: If no datastore name is provided to define where to create the data.
|
||||
:return: The created or updated Data asset.
|
||||
"""
|
||||
if not data_asset_name:
|
||||
if data_asset_name == "":
|
||||
raise ValueError("Cannot create data asset with empty name.")
|
||||
|
||||
if not datastore_name:
|
||||
default_datastore = ml_client.datastores.get_default()
|
||||
datastore_name = default_datastore.name
|
||||
if datastore_name == "":
|
||||
raise ValueError(
|
||||
"Cannot create data asset with empty datastore name. Please specify a datastore name using the --datastore flag."
|
||||
)
|
||||
|
||||
logging.info(
|
||||
f"Creating a new Data asset from data in folder '{data_asset_name}' in the datastore '{datastore_name}'"
|
||||
|
|
|
@ -310,6 +310,16 @@ def test_get_or_create_v1_dataset() -> None:
|
|||
mocks["_create_v1_dataset"].assert_called_once()
|
||||
|
||||
|
||||
def test_get_or_create_v1_dataset_empty_datastore_name() -> None:
|
||||
workspace = DEFAULT_WORKSPACE.workspace
|
||||
datastore = ""
|
||||
dataset_name = "foo"
|
||||
with pytest.raises(ValueError) as ex:
|
||||
_get_or_create_v1_dataset(datastore, dataset_name, workspace)
|
||||
|
||||
assert "When creating a new dataset, a datastore name must be provided." in str(ex)
|
||||
|
||||
|
||||
def test_get_or_create_v2_data_asset() -> None:
|
||||
def _mock_error_from_retrieve_v2_data_asset(
|
||||
ml_client: MLClient,
|
||||
|
@ -464,9 +474,15 @@ def test_retrieving_v2_data_asset_does_not_increment() -> None:
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["asset_name", "version"], [(TEST_DATA_ASSET_NAME, None), (TEST_DATA_ASSET_NAME, "1"), ("", 1)]
|
||||
["asset_name", "datastore_name", "version"],
|
||||
[
|
||||
(TEST_DATA_ASSET_NAME, TEST_DATASTORE_NAME, None),
|
||||
(TEST_DATA_ASSET_NAME, "", None),
|
||||
(TEST_DATA_ASSET_NAME, TEST_DATASTORE_NAME, "1"),
|
||||
("", TEST_DATASTORE_NAME, 1),
|
||||
],
|
||||
)
|
||||
def test_create_v2_data_asset(asset_name: str, version: Optional[str]) -> None:
|
||||
def test_create_v2_data_asset(asset_name: str, datastore_name: str, version: Optional[str]) -> None:
|
||||
try:
|
||||
data_asset = _create_v2_data_asset(
|
||||
ml_client=TEST_ML_CLIENT,
|
||||
|
@ -483,6 +499,8 @@ def test_create_v2_data_asset(asset_name: str, version: Optional[str]) -> None:
|
|||
except ValueError as ex:
|
||||
if asset_name == "":
|
||||
assert "Cannot create data asset with empty name." in str(ex)
|
||||
elif datastore_name == "":
|
||||
assert "Cannot create data asset with empty datastore name." in str(ex)
|
||||
else:
|
||||
pytest.fail(f"Unexpected error: {ex}")
|
||||
|
||||
|
|
|
@ -1782,7 +1782,12 @@ def test_create_v2_inputs(already_exists: bool) -> None:
|
|||
|
||||
for use_mounting in [True, False]:
|
||||
mock_input_dataconfigs = [
|
||||
DatasetConfig(name="dummy_dataset", use_mounting=use_mounting, version=int(mock_data_version))
|
||||
DatasetConfig(
|
||||
name="dummy_dataset",
|
||||
datastore="dummy_datastore",
|
||||
use_mounting=use_mounting,
|
||||
version=int(mock_data_version),
|
||||
)
|
||||
]
|
||||
inputs = himl.create_v2_inputs(mock_ml_client, mock_input_dataconfigs)
|
||||
assert isinstance(inputs, Dict)
|
||||
|
|
Загрузка…
Ссылка в новой задаче