InnerEye-Inference/submit_for_inference.py

128 строки
6.3 KiB
Python

# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import logging
import os
import shutil
import tempfile
import uuid
from pathlib import Path
from typing import Tuple
from attr import dataclass
from azureml.core import Experiment, Model, ScriptRunConfig, Environment, Datastore
from azureml.core.runconfig import RunConfiguration
from azureml.core.workspace import WORKSPACE_DEFAULT_BLOB_STORE_NAME, Workspace
from azure_config import AzureConfig
from source_config import SourceConfig
ENVIRONMENT_VERSION = "1"
DEFAULT_RESULT_IMAGE_NAME = "segmentation.dcm.zip"
DEFAULT_DATA_FOLDER = "data"
SCORE_SCRIPT = "score.py"
RUN_SCORING_SCRIPT = "download_model_and_run_scoring.py"
# The property in the model registry that holds the name of the Python environment
PYTHON_ENVIRONMENT_NAME = "python_environment_name"
IMAGEDATA_FILE_NAME = "imagedata.zip"
@dataclass
class SubmitForInferenceConfig:
"""
Inference config class.
"""
model_id: str
image_data: bytes
experiment_name: str
def create_run_config(azure_config: AzureConfig,
source_config: SourceConfig,
environment_name: str) -> ScriptRunConfig:
"""
Creates a configuration to run the InnerEye training script in AzureML.
:param azure_config: azure related configurations to use for model scale-out behaviour
:param source_config: configurations for model execution, such as name and execution mode
:param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used
when running inference for an existing model.
:return: The configured script run.
"""
# AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix()
logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} "
f"relative to source directory {source_config.root_folder})")
max_run_duration = 43200 # 12 hours in seconds
workspace = azure_config.get_workspace()
run_config = RunConfiguration(script=entry_script_relative_path, arguments=source_config.script_params)
env = Environment.get(azure_config.get_workspace(), name=environment_name, version=ENVIRONMENT_VERSION)
logging.info(f"Using existing Python environment '{env.name}'.")
run_config.environment = env
run_config.target = azure_config.cluster
run_config.max_run_duration_seconds = max_run_duration
# Use blob storage for storing the source, rather than the FileShares section of the storage account.
run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name
script_run_config = ScriptRunConfig(source_directory=str(source_config.root_folder), run_config=run_config)
return script_run_config
def submit_for_inference(args: SubmitForInferenceConfig, workspace: Workspace, azure_config: AzureConfig) -> Tuple[str, str]:
"""
Create and submit an inference to AzureML, and optionally download the resulting segmentation.
:param args: configuration, see SubmitForInferenceConfig
:param workspace: Azure ML workspace.
:param azure_config: An object with all necessary information for accessing Azure.
:return: Azure Run Id (and the target path on the datastore, including the uuid, for a unit
test to ensure that the image data zip is overwritten after infernece)
"""
logging.info("Identifying model")
model = Model(workspace=workspace, id=args.model_id)
model_id = model.id
logging.info(f"Identified model {model_id}")
source_directory = tempfile.TemporaryDirectory()
source_directory_path = Path(source_directory.name)
logging.info(f"Building inference run submission in {source_directory_path}")
image_folder = source_directory_path / DEFAULT_DATA_FOLDER
image_folder.mkdir(parents=True, exist_ok=True)
image_path = image_folder / IMAGEDATA_FILE_NAME
image_path.write_bytes(args.image_data)
image_datastore = Datastore(workspace, azure_config.datastore_name)
target_path = f"{azure_config.image_data_folder}/{str(uuid.uuid4())}"
image_datastore.upload_files(files=[str(image_path)], target_path=target_path, overwrite=False, show_progress=False)
image_path.unlink()
# Retrieve the name of the Python environment that the training run used. This environment
# should have been registered. If no such environment exists, it will be re-create from the
# Conda files provided.
python_environment_name = model.tags.get(PYTHON_ENVIRONMENT_NAME, "")
if python_environment_name == "":
raise ValueError(
f"Model ID: {model_id} does not contain an environment tag {PYTHON_ENVIRONMENT_NAME}")
# Copy the scoring script from the repository. This will start the model download from Azure,
# and invoke the scoring script.
entry_script = source_directory_path / Path(RUN_SCORING_SCRIPT).name
current_file_path = Path(os.path.dirname(os.path.realpath(__file__)))
shutil.copyfile(current_file_path / str(RUN_SCORING_SCRIPT), str(entry_script))
source_config = SourceConfig(
root_folder=source_directory_path,
entry_script=entry_script,
script_params=["--model_id", model_id,
"--script_name", SCORE_SCRIPT,
"--datastore_name", azure_config.datastore_name,
"--datastore_image_path", str(Path(target_path) / IMAGEDATA_FILE_NAME)])
run_config = create_run_config(azure_config, source_config, environment_name=python_environment_name)
exp = Experiment(workspace=workspace, name=args.experiment_name)
run = exp.submit(run_config)
logging.info(f"Submitted run {run.id} in experiment {run.experiment.name}")
logging.info(f"Run URL: {run.get_portal_url()}")
source_directory.cleanup()
logging.info(f"Deleted submission directory {source_directory_path}")
return run.id, target_path