зеркало из https://github.com/microsoft/hi-ml.git
cleanup
This commit is contained in:
Родитель
aebaf3e91e
Коммит
4070f31434
|
@ -7,10 +7,10 @@ import tempfile
|
|||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
from azure.ai.ml import MLClient
|
||||
from azure.ai.ml import MLClient, Input
|
||||
from azure.ai.ml.entities import Data
|
||||
from azure.ai.ml.entities import Datastore as V2Datastore
|
||||
from azure.ai.ml.constants import AssetTypes
|
||||
from azure.ai.ml.constants import AssetTypes, InputOutputModes
|
||||
from azure.ai.ml.operations import DatastoreOperations
|
||||
from azure.core.exceptions import HttpResponseError, ResourceNotFoundError
|
||||
from azureml.core import Dataset, Workspace, Datastore
|
||||
|
@ -476,6 +476,71 @@ class DatasetConfig:
|
|||
logger.info(status)
|
||||
return result
|
||||
|
||||
def to_data_asset(self, ml_client: MLClient) -> Data:
|
||||
"""Retrieves or creates a v2 Data Asset using the provided MLClient object, using the settings in the present object.
|
||||
|
||||
:param ml_client: An MLClient object.
|
||||
:return: The data asset associated with the dataset described in the present object.
|
||||
"""
|
||||
version = self.version
|
||||
logger.info(
|
||||
f"Trying to access data asset {self.name} version {version}, datastore {self.datastore}"
|
||||
)
|
||||
|
||||
# if version is None, this function gets the latest version
|
||||
data_asset: Data = _get_or_create_v2_data_asset(
|
||||
ml_client,
|
||||
self.datastore,
|
||||
self.name,
|
||||
version=str(version) if version else None,
|
||||
)
|
||||
if not data_asset.path:
|
||||
raise ValueError(f"Data asset {data_asset.id} has no path.")
|
||||
|
||||
return data_asset
|
||||
|
||||
|
||||
def create_v2_inputs(ml_client: MLClient, input_datasets: List[DatasetConfig]) -> Dict[str, Input]:
|
||||
"""
|
||||
Create a dictionary of Azure ML v2 Input objects, required for passing input data in to an AML job
|
||||
|
||||
:param ml_client: An MLClient object.
|
||||
:param input_datasets: A list of DatasetConfigs to convert to Inputs.
|
||||
:return: A dictionary in the format "input_name": Input.
|
||||
"""
|
||||
result = {}
|
||||
for i, dataset in enumerate(input_datasets):
|
||||
data_asset = dataset.to_data_asset(ml_client)
|
||||
result[f"{V2_INPUT_ASSET_IDENTIFIER}{i}"] = Input(
|
||||
type=data_asset.type,
|
||||
path=data_asset.path,
|
||||
mode=InputOutputModes.MOUNT if dataset.use_mounting else InputOutputModes.DOWNLOAD,
|
||||
path
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def create_v2_outputs(ml_client: MLClient, output_datasets: List[DatasetConfig]) -> Dict[str, Output]:
|
||||
"""
|
||||
Create a dictionary of Azure ML v2 Output objects, required for passing output data in to an AML job
|
||||
|
||||
:ml_client: An MLClient object.
|
||||
:param output_datasets: A list of DatasetConfigs to convert to Outputs.
|
||||
:return: A dictionary in the format "output_name": Output.
|
||||
"""
|
||||
|
||||
output_assets = [get_data_asset_from_config(ml_client, output_dataset) for output_dataset in output_datasets]
|
||||
return {
|
||||
# Data assets can be of type "uri_folder", "uri_file", "mltable", all of which are value types in Input
|
||||
f"{V2_OUTPUT_ASSET_IDENTIFIER}{i}": Output( # type: ignore
|
||||
type=data_asset.type, # type: ignore
|
||||
path=data_asset.path,
|
||||
mode=InputOutputModes.MOUNT, # hard-coded to mount for now, as this is the only mode that doesn't break
|
||||
)
|
||||
for i, data_asset in enumerate(output_assets)
|
||||
}
|
||||
|
||||
|
||||
|
||||
StrOrDatasetConfig = Union[str, DatasetConfig]
|
||||
|
||||
|
|
|
@ -20,8 +20,7 @@ from pathlib import Path
|
|||
from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
|
||||
|
||||
from azure.ai.ml import Input, MLClient, Output, command
|
||||
from azure.ai.ml.constants import InputOutputModes
|
||||
from azure.ai.ml.entities import Command, Data
|
||||
from azure.ai.ml.entities import Command
|
||||
from azure.ai.ml.entities import Environment as EnvironmentV2
|
||||
from azure.ai.ml.entities import Job, Sweep, UserIdentityConfiguration
|
||||
from azure.ai.ml.entities._job.distribution import DistributionConfiguration, MpiDistribution, PyTorchDistribution
|
||||
|
@ -641,74 +640,6 @@ def _str_to_path(s: Optional[PathOrString]) -> Optional[Path]:
|
|||
return s
|
||||
|
||||
|
||||
def get_data_asset_from_config(ml_client: MLClient, dataset_config: DatasetConfig) -> Data:
|
||||
"""Given a list of dataset configs, generates and returns a list of data assets.
|
||||
|
||||
:param ml_client: An MLClient object.
|
||||
:param dataset_list: The list of datasets to create data assets for.
|
||||
:raises ValueError: Raised if a data asset has no path.
|
||||
:return: A list of data assets.
|
||||
"""
|
||||
|
||||
version = dataset_config.version
|
||||
logger.info(
|
||||
f"Trying to access data asset {dataset_config.name} version {version}, datastore {dataset_config.datastore}"
|
||||
)
|
||||
|
||||
# if version is None, this function gets the latest version
|
||||
data_asset: Data = _get_or_create_v2_data_asset(
|
||||
ml_client,
|
||||
dataset_config.datastore,
|
||||
dataset_config.name,
|
||||
version=str(version) if version else None,
|
||||
)
|
||||
if not data_asset.path:
|
||||
raise ValueError(f"Data asset {data_asset.id} has no path.")
|
||||
|
||||
return data_asset
|
||||
|
||||
|
||||
def create_v2_inputs(ml_client: MLClient, input_datasets: List[DatasetConfig]) -> Dict[str, Input]:
|
||||
"""
|
||||
Create a dictionary of Azure ML v2 Input objects, required for passing input data in to an AML job
|
||||
|
||||
:param ml_client: An MLClient object.
|
||||
:param input_datasets: A list of DatasetConfigs to convert to Inputs.
|
||||
:return: A dictionary in the format "input_name": Input.
|
||||
"""
|
||||
input_assets = [get_data_asset_from_config(ml_client, input_dataset) for input_dataset in input_datasets]
|
||||
# Data assets can be of type "uri_folder", "uri_file", "mltable", all of which are value types in Input
|
||||
return {
|
||||
f"{V2_INPUT_ASSET_IDENTIFIER}{i}": Input( # type: ignore
|
||||
type=data_asset.type, # type: ignore
|
||||
path=data_asset.path,
|
||||
mode=InputOutputModes.MOUNT if input_datasets[i].use_mounting else InputOutputModes.DOWNLOAD,
|
||||
)
|
||||
for i, data_asset in enumerate(input_assets)
|
||||
}
|
||||
|
||||
|
||||
def create_v2_outputs(ml_client: MLClient, output_datasets: List[DatasetConfig]) -> Dict[str, Output]:
|
||||
"""
|
||||
Create a dictionary of Azure ML v2 Output objects, required for passing output data in to an AML job
|
||||
|
||||
:ml_client: An MLClient object.
|
||||
:param output_datasets: A list of DatasetConfigs to convert to Outputs.
|
||||
:return: A dictionary in the format "output_name": Output.
|
||||
"""
|
||||
|
||||
output_assets = [get_data_asset_from_config(ml_client, output_dataset) for output_dataset in output_datasets]
|
||||
return {
|
||||
# Data assets can be of type "uri_folder", "uri_file", "mltable", all of which are value types in Input
|
||||
f"{V2_OUTPUT_ASSET_IDENTIFIER}{i}": Output( # type: ignore
|
||||
type=data_asset.type, # type: ignore
|
||||
path=data_asset.path,
|
||||
mode=InputOutputModes.MOUNT, # hard-coded to mount for now, as this is the only mode that doesn't break
|
||||
)
|
||||
for i, data_asset in enumerate(output_assets)
|
||||
}
|
||||
|
||||
|
||||
def submit_to_azure_if_needed( # type: ignore
|
||||
compute_cluster_name: str = "",
|
||||
entry_script: Optional[PathOrString] = None,
|
||||
|
|
Загрузка…
Ссылка в новой задаче