BUG: Prevent creating v2 data asset without specifying datastore (#854)

Closes #786 

It should now be impossible to create data new data assets without also
specifying a datastore by its specific name, in both v1 and v2 pathways.

Default datastore interactions in all other code paths have been left
unchanged.
This commit is contained in:
Peter Hessey 2023-04-07 09:59:49 +01:00 коммит произвёл GitHub
Родитель 2613b2eaeb
Коммит 2a11d5f665
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 37 добавлений и 7 удалений

Просмотреть файл

@ -128,6 +128,11 @@ def _get_or_create_v1_dataset(datastore_name: str, dataset_name: str, workspace:
try:
azureml_dataset = _retrieve_v1_dataset(dataset_name, workspace)
except UserErrorException:
if datastore_name == "":
raise ValueError(
"When creating a new dataset, a datastore name must be provided. Please specify a datastore name using "
"the --datastore flag"
)
azureml_dataset = _create_v1_dataset(datastore_name, dataset_name, workspace)
return azureml_dataset
@ -193,15 +198,17 @@ def _create_v2_data_asset(
:param datastore_name: The name of the datastore in which to create or update the Data asset.
:param data_asset_name: The name of the data asset to be created.
:param version: The version of the data asset to be created.
:raises ValueError: If no data asset name is provided to define the new asset.
:raises ValueError: If no datastore name is provided to define where to create the data.
:return: The created or updated Data asset.
"""
if not data_asset_name:
if data_asset_name == "":
raise ValueError("Cannot create data asset with empty name.")
if not datastore_name:
default_datastore = ml_client.datastores.get_default()
datastore_name = default_datastore.name
if datastore_name == "":
raise ValueError(
"Cannot create data asset with empty datastore name. Please specify a datastore name using the --datastore flag."
)
logging.info(
f"Creating a new Data asset from data in folder '{data_asset_name}' in the datastore '{datastore_name}'"

Просмотреть файл

@ -310,6 +310,16 @@ def test_get_or_create_v1_dataset() -> None:
mocks["_create_v1_dataset"].assert_called_once()
def test_get_or_create_v1_dataset_empty_datastore_name() -> None:
workspace = DEFAULT_WORKSPACE.workspace
datastore = ""
dataset_name = "foo"
with pytest.raises(ValueError) as ex:
_get_or_create_v1_dataset(datastore, dataset_name, workspace)
assert "When creating a new dataset, a datastore name must be provided." in str(ex)
def test_get_or_create_v2_data_asset() -> None:
def _mock_error_from_retrieve_v2_data_asset(
ml_client: MLClient,
@ -464,9 +474,15 @@ def test_retrieving_v2_data_asset_does_not_increment() -> None:
@pytest.mark.parametrize(
["asset_name", "version"], [(TEST_DATA_ASSET_NAME, None), (TEST_DATA_ASSET_NAME, "1"), ("", 1)]
["asset_name", "datastore_name", "version"],
[
(TEST_DATA_ASSET_NAME, TEST_DATASTORE_NAME, None),
(TEST_DATA_ASSET_NAME, "", None),
(TEST_DATA_ASSET_NAME, TEST_DATASTORE_NAME, "1"),
("", TEST_DATASTORE_NAME, 1),
],
)
def test_create_v2_data_asset(asset_name: str, version: Optional[str]) -> None:
def test_create_v2_data_asset(asset_name: str, datastore_name: str, version: Optional[str]) -> None:
try:
data_asset = _create_v2_data_asset(
ml_client=TEST_ML_CLIENT,
@ -483,6 +499,8 @@ def test_create_v2_data_asset(asset_name: str, version: Optional[str]) -> None:
except ValueError as ex:
if asset_name == "":
assert "Cannot create data asset with empty name." in str(ex)
elif datastore_name == "":
assert "Cannot create data asset with empty datastore name." in str(ex)
else:
pytest.fail(f"Unexpected error: {ex}")

Просмотреть файл

@ -1782,7 +1782,12 @@ def test_create_v2_inputs(already_exists: bool) -> None:
for use_mounting in [True, False]:
mock_input_dataconfigs = [
DatasetConfig(name="dummy_dataset", use_mounting=use_mounting, version=int(mock_data_version))
DatasetConfig(
name="dummy_dataset",
datastore="dummy_datastore",
use_mounting=use_mounting,
version=int(mock_data_version),
)
]
inputs = himl.create_v2_inputs(mock_ml_client, mock_input_dataconfigs)
assert isinstance(inputs, Dict)