128 строки
6.3 KiB
Python
128 строки
6.3 KiB
Python
# ------------------------------------------------------------------------------------------
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
|
|
# ------------------------------------------------------------------------------------------
|
|
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Tuple
|
|
|
|
from attr import dataclass
|
|
from azureml.core import Experiment, Model, ScriptRunConfig, Environment, Datastore
|
|
from azureml.core.runconfig import RunConfiguration
|
|
from azureml.core.workspace import WORKSPACE_DEFAULT_BLOB_STORE_NAME, Workspace
|
|
|
|
from azure_config import AzureConfig
|
|
from source_config import SourceConfig
|
|
|
|
ENVIRONMENT_VERSION = "1"
|
|
DEFAULT_RESULT_IMAGE_NAME = "segmentation.dcm.zip"
|
|
DEFAULT_DATA_FOLDER = "data"
|
|
SCORE_SCRIPT = "score.py"
|
|
RUN_SCORING_SCRIPT = "download_model_and_run_scoring.py"
|
|
# The property in the model registry that holds the name of the Python environment
|
|
PYTHON_ENVIRONMENT_NAME = "python_environment_name"
|
|
IMAGEDATA_FILE_NAME = "imagedata.zip"
|
|
|
|
|
|
@dataclass
|
|
class SubmitForInferenceConfig:
|
|
"""
|
|
Inference config class.
|
|
"""
|
|
model_id: str
|
|
image_data: bytes
|
|
experiment_name: str
|
|
|
|
|
|
def create_run_config(azure_config: AzureConfig,
|
|
source_config: SourceConfig,
|
|
environment_name: str) -> ScriptRunConfig:
|
|
"""
|
|
Creates a configuration to run the InnerEye training script in AzureML.
|
|
:param azure_config: azure related configurations to use for model scale-out behaviour
|
|
:param source_config: configurations for model execution, such as name and execution mode
|
|
:param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
|
|
is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used
|
|
when running inference for an existing model.
|
|
:return: The configured script run.
|
|
"""
|
|
# AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
|
|
entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix()
|
|
logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} "
|
|
f"relative to source directory {source_config.root_folder})")
|
|
max_run_duration = 43200 # 12 hours in seconds
|
|
workspace = azure_config.get_workspace()
|
|
run_config = RunConfiguration(script=entry_script_relative_path, arguments=source_config.script_params)
|
|
env = Environment.get(azure_config.get_workspace(), name=environment_name, version=ENVIRONMENT_VERSION)
|
|
logging.info(f"Using existing Python environment '{env.name}'.")
|
|
run_config.environment = env
|
|
run_config.target = azure_config.cluster
|
|
run_config.max_run_duration_seconds = max_run_duration
|
|
# Use blob storage for storing the source, rather than the FileShares section of the storage account.
|
|
run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name
|
|
script_run_config = ScriptRunConfig(source_directory=str(source_config.root_folder), run_config=run_config)
|
|
return script_run_config
|
|
|
|
|
|
def submit_for_inference(args: SubmitForInferenceConfig, workspace: Workspace, azure_config: AzureConfig) -> Tuple[str, str]:
|
|
"""
|
|
Create and submit an inference to AzureML, and optionally download the resulting segmentation.
|
|
:param args: configuration, see SubmitForInferenceConfig
|
|
:param workspace: Azure ML workspace.
|
|
:param azure_config: An object with all necessary information for accessing Azure.
|
|
:return: Azure Run Id (and the target path on the datastore, including the uuid, for a unit
|
|
test to ensure that the image data zip is overwritten after infernece)
|
|
"""
|
|
logging.info("Identifying model")
|
|
model = Model(workspace=workspace, id=args.model_id)
|
|
model_id = model.id
|
|
logging.info(f"Identified model {model_id}")
|
|
|
|
source_directory = tempfile.TemporaryDirectory()
|
|
source_directory_path = Path(source_directory.name)
|
|
logging.info(f"Building inference run submission in {source_directory_path}")
|
|
|
|
image_folder = source_directory_path / DEFAULT_DATA_FOLDER
|
|
image_folder.mkdir(parents=True, exist_ok=True)
|
|
image_path = image_folder / IMAGEDATA_FILE_NAME
|
|
image_path.write_bytes(args.image_data)
|
|
|
|
image_datastore = Datastore(workspace, azure_config.datastore_name)
|
|
target_path = f"{azure_config.image_data_folder}/{str(uuid.uuid4())}"
|
|
image_datastore.upload_files(files=[str(image_path)], target_path=target_path, overwrite=False, show_progress=False)
|
|
image_path.unlink()
|
|
|
|
# Retrieve the name of the Python environment that the training run used. This environment
|
|
# should have been registered. If no such environment exists, it will be re-create from the
|
|
# Conda files provided.
|
|
python_environment_name = model.tags.get(PYTHON_ENVIRONMENT_NAME, "")
|
|
if python_environment_name == "":
|
|
raise ValueError(
|
|
f"Model ID: {model_id} does not contain an environment tag {PYTHON_ENVIRONMENT_NAME}")
|
|
|
|
# Copy the scoring script from the repository. This will start the model download from Azure,
|
|
# and invoke the scoring script.
|
|
entry_script = source_directory_path / Path(RUN_SCORING_SCRIPT).name
|
|
current_file_path = Path(os.path.dirname(os.path.realpath(__file__)))
|
|
shutil.copyfile(current_file_path / str(RUN_SCORING_SCRIPT), str(entry_script))
|
|
source_config = SourceConfig(
|
|
root_folder=source_directory_path,
|
|
entry_script=entry_script,
|
|
script_params=["--model_id", model_id,
|
|
"--script_name", SCORE_SCRIPT,
|
|
"--datastore_name", azure_config.datastore_name,
|
|
"--datastore_image_path", str(Path(target_path) / IMAGEDATA_FILE_NAME)])
|
|
run_config = create_run_config(azure_config, source_config, environment_name=python_environment_name)
|
|
exp = Experiment(workspace=workspace, name=args.experiment_name)
|
|
run = exp.submit(run_config)
|
|
logging.info(f"Submitted run {run.id} in experiment {run.experiment.name}")
|
|
logging.info(f"Run URL: {run.get_portal_url()}")
|
|
source_directory.cleanup()
|
|
logging.info(f"Deleted submission directory {source_directory_path}")
|
|
return run.id, target_path
|