|
|
|
@ -1,453 +1,453 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# Create a multimodel deployment using a custom container"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"In this example we serve two models from the same endpoint and same deployment. \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Both model files are registered as a single model asset on Azure and loaded simultaneously in the scoring script. The scoring script parses each request for a \"model\" field and routes the payload accordingly. \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"A custom container is not necessary for multimodel deployment - the custom container used here simply adds Python requirements via `pip` to an Azure Inference Minimal base image. An equivalent multimodel deployment can be created using a conda file-based environment."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 1. Configure parameters, assets, and clients"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.1 Set workspace details"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"subscription_id = \"<SUBSCRIPTION_ID>\"\n",
|
|
|
|
|
"resource_group = \"<RESOURCE_GROUP>\"\n",
|
|
|
|
|
"workspace_name = \"<AML_WORKSPACE_NAME>\""
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.2 Set endpoint details"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import random\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"endpoint_name = f\"multimod-{random.randint(0,10000)}\""
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.3 Set asset paths\n",
|
|
|
|
|
"Define the directories containing the two model files as well as a directory which contains the scoring script"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import os\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"base_path = \"../../../../../cli/endpoints/online/custom-container/minimal/multimodel\"\n",
|
|
|
|
|
"models_path = os.path.join(base_path, \"models\")\n",
|
|
|
|
|
"code_path = os.path.join(base_path, \"code\")\n",
|
|
|
|
|
"test_data_path = os.path.join(base_path, \"test-data\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.4 Examine the models folder\n",
|
|
|
|
|
"The models folder contains two models which will be loaded simultaneously by the scoring script."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import os\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"os.listdir(models_path)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.5 Examine the scoring script\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"The scoring script loads both models into a dictionary keyed on their name in the `init` function. In the run function, each request is parsed for a `model` key in the JSON to choose the model. The `data` payload is then passed to the appropriate model.\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"```python \n",
|
|
|
|
|
"import joblib\n",
|
|
|
|
|
"import os\n",
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"from pathlib import Path\n",
|
|
|
|
|
"import json\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"models = None\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def init():\n",
|
|
|
|
|
" global models\n",
|
|
|
|
|
" model_dir = Path(os.getenv(\"AZUREML_MODEL_DIR\")) / \"models\"\n",
|
|
|
|
|
" models = {m[:-4]: joblib.load(model_dir / m) for m in os.listdir(model_dir)}\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def run(data):\n",
|
|
|
|
|
" data = json.loads(data)\n",
|
|
|
|
|
" model = models[data[\"model\"]]\n",
|
|
|
|
|
" payload = pd.DataFrame(data[\"data\"])\n",
|
|
|
|
|
" try:\n",
|
|
|
|
|
" ret = model.predict(payload)\n",
|
|
|
|
|
" return pd.DataFrame(ret).to_json()\n",
|
|
|
|
|
" except KeyError:\n",
|
|
|
|
|
" raise KeyError(\"No such model\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"``` "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.6 Examine the Dockerfile\n",
|
|
|
|
|
"The dockerfile is located at `base_path` / `minimal-multimodel.dockerfile`. It uses the AzureML Inference Minimal CPU image as a base and adds relevant dependencies for the scoring script.\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"```docker\n",
|
|
|
|
|
"FROM mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference:latest\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"RUN pip install pandas numpy scikit-learn joblib\n",
|
|
|
|
|
"```"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.7 Create an MLClient instance"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from azure.ai.ml import MLClient\n",
|
|
|
|
|
"from azure.ai.ml.entities import (\n",
|
|
|
|
|
" ManagedOnlineEndpoint,\n",
|
|
|
|
|
" ManagedOnlineDeployment,\n",
|
|
|
|
|
" Model,\n",
|
|
|
|
|
" CodeConfiguration,\n",
|
|
|
|
|
" Environment,\n",
|
|
|
|
|
" BuildContext,\n",
|
|
|
|
|
" ProbeSettings,\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"from azure.identity import DefaultAzureCredential\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"credential = DefaultAzureCredential()\n",
|
|
|
|
|
"ml_client = MLClient(\n",
|
|
|
|
|
" credential,\n",
|
|
|
|
|
" subscription_id=subscription_id,\n",
|
|
|
|
|
" resource_group_name=resource_group,\n",
|
|
|
|
|
" workspace_name=workspace_name,\n",
|
|
|
|
|
")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"credential = DefaultAzureCredential()\n",
|
|
|
|
|
"ml_client = MLClient(\n",
|
|
|
|
|
" credential,\n",
|
|
|
|
|
" subscription_id=subscription_id,\n",
|
|
|
|
|
" resource_group_name=resource_group,\n",
|
|
|
|
|
" workspace_name=workspace_name,\n",
|
|
|
|
|
")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 2. Create an endpoint"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 2.1 Define and create the endpoint"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"endpoint = ManagedOnlineEndpoint(name=endpoint_name)\n",
|
|
|
|
|
"poller = ml_client.online_endpoints.begin_create_or_update(endpoint)\n",
|
|
|
|
|
"poller.wait()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 2.2 Confirm that creation was successful"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from azure.ai.ml.exceptions import DeploymentException\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"status = poller.status()\n",
|
|
|
|
|
"if status != \"Succeeded\":\n",
|
|
|
|
|
" raise DeploymentException(status)\n",
|
|
|
|
|
"else:\n",
|
|
|
|
|
" print(\"Endpoint creation succeeded\")\n",
|
|
|
|
|
" endpoint = poller.result()\n",
|
|
|
|
|
" print(endpoint)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 3. Create the deployment"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 3.1 Define the deployment"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"deployment = ManagedOnlineDeployment(\n",
|
|
|
|
|
" name=\"custom-container-multimodel\",\n",
|
|
|
|
|
" endpoint_name=endpoint_name,\n",
|
|
|
|
|
" model=Model(name=\"minimal-multimodel\", path=models_path),\n",
|
|
|
|
|
" code_configuration=CodeConfiguration(\n",
|
|
|
|
|
" code=code_path, scoring_script=\"minimal-multimodel-score.py\"\n",
|
|
|
|
|
" ),\n",
|
|
|
|
|
" environment=Environment(\n",
|
|
|
|
|
" name=\"minimal-multimodel\",\n",
|
|
|
|
|
" build=BuildContext(\n",
|
|
|
|
|
" path=base_path,\n",
|
|
|
|
|
" dockerfile_path=\"minimal-multimodel.dockerfile\",\n",
|
|
|
|
|
" ),\n",
|
|
|
|
|
" inference_config={\n",
|
|
|
|
|
" \"liveness_route\": {\"path\": \"/\", \"port\": 5001},\n",
|
|
|
|
|
" \"readiness_route\": {\"path\": \"/\", \"port\": 5001},\n",
|
|
|
|
|
" \"scoring_route\": {\"path\": \"/score\", \"port\": 5001},\n",
|
|
|
|
|
" },\n",
|
|
|
|
|
" ),\n",
|
|
|
|
|
" instance_type=\"Standard_DS3_v2\",\n",
|
|
|
|
|
" instance_count=1,\n",
|
|
|
|
|
")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 3.2 Create the deployment"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"poller = ml_client.online_deployments.begin_create_or_update(deployment)\n",
|
|
|
|
|
"poller.wait()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 3.3 Confirm that creation was successful"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"status = poller.status()\n",
|
|
|
|
|
"if status != \"Succeeded\":\n",
|
|
|
|
|
" raise DeploymentException(status)\n",
|
|
|
|
|
"else:\n",
|
|
|
|
|
" print(\"Deployment creation succeeded\")\n",
|
|
|
|
|
" deployment = poller.result()\n",
|
|
|
|
|
" print(deployment)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 3.4 Set traffic to 100% "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"endpoint.traffic = {\"custom-container-multimodel\": 100}\n",
|
|
|
|
|
"poller = ml_client.begin_create_or_update(endpoint)\n",
|
|
|
|
|
"poller.wait()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 4. Test the endpoint\n",
|
|
|
|
|
"The `model` JSON field in both JSON payloads indicates which model to score."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 4.1 Test the diabetes model"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import json\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"res = ml_client.online_endpoints.invoke(\n",
|
|
|
|
|
" endpoint_name, request_file=os.path.join(test_data_path, \"diabetes-test-data.json\")\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"print(json.loads(res))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 4.2 Test the iris model"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"res = ml_client.online_endpoints.invoke(\n",
|
|
|
|
|
" endpoint_name, request_file=os.path.join(test_data_path, \"iris-test-data.json\")\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"print(json.loads(res))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 5. Delete assets"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 5.1 Delete the endpoint"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"poller = ml_client.online_endpoints.begin_delete(name=endpoint_name)"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3.10 - SDK V2",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python310-sdkv2"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.10.6"
|
|
|
|
|
},
|
|
|
|
|
"vscode": {
|
|
|
|
|
"interpreter": {
|
|
|
|
|
"hash": "e530ce6154f972640d3e5b626ff5929e0848c7598c5ca98c96181f27d47882a4"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# Create a multimodel deployment using a custom container"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"In this example we serve two models from the same endpoint and same deployment. \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Both model files are registered as a single model asset on Azure and loaded simultaneously in the scoring script. The scoring script parses each request for a \"model\" field and routes the payload accordingly. \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"A custom container is not necessary for multimodel deployment - the custom container used here simply adds Python requirements via `pip` to an Azure Inference Minimal base image. An equivalent multimodel deployment can be created using a conda file-based environment."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 1. Configure parameters, assets, and clients"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.1 Set workspace details"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"subscription_id = \"<SUBSCRIPTION_ID>\"\n",
|
|
|
|
|
"resource_group = \"<RESOURCE_GROUP>\"\n",
|
|
|
|
|
"workspace_name = \"<AML_WORKSPACE_NAME>\""
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.2 Set endpoint details"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import random\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"endpoint_name = f\"multimod-{random.randint(0,10000)}\""
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.3 Set asset paths\n",
|
|
|
|
|
"Define the directories containing the two model files as well as a directory which contains the scoring script"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import os\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"base_path = \"../../../../../cli/endpoints/online/custom-container/minimal/multimodel\"\n",
|
|
|
|
|
"models_path = os.path.join(base_path, \"models\")\n",
|
|
|
|
|
"code_path = os.path.join(base_path, \"code\")\n",
|
|
|
|
|
"test_data_path = os.path.join(base_path, \"test-data\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.4 Examine the models folder\n",
|
|
|
|
|
"The models folder contains two models which will be loaded simultaneously by the scoring script."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import os\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"os.listdir(models_path)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.5 Examine the scoring script\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"The scoring script loads both models into a dictionary keyed on their name in the `init` function. In the run function, each request is parsed for a `model` key in the JSON to choose the model. The `data` payload is then passed to the appropriate model.\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"```python \n",
|
|
|
|
|
"import joblib\n",
|
|
|
|
|
"import os\n",
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"from pathlib import Path\n",
|
|
|
|
|
"import json\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"models = None\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def init():\n",
|
|
|
|
|
" global models\n",
|
|
|
|
|
" model_dir = Path(os.getenv(\"AZUREML_MODEL_DIR\")) / \"models\"\n",
|
|
|
|
|
" models = {m[:-4]: joblib.load(model_dir / m) for m in os.listdir(model_dir)}\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def run(data):\n",
|
|
|
|
|
" data = json.loads(data)\n",
|
|
|
|
|
" model = models[data[\"model\"]]\n",
|
|
|
|
|
" payload = pd.DataFrame(data[\"data\"])\n",
|
|
|
|
|
" try:\n",
|
|
|
|
|
" ret = model.predict(payload)\n",
|
|
|
|
|
" return pd.DataFrame(ret).to_json()\n",
|
|
|
|
|
" except KeyError:\n",
|
|
|
|
|
" raise KeyError(\"No such model\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"``` "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.6 Examine the Dockerfile\n",
|
|
|
|
|
"The dockerfile is located at `base_path` / `minimal-multimodel.dockerfile`. It uses the AzureML Inference Minimal CPU image as a base and adds relevant dependencies for the scoring script.\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"```docker\n",
|
|
|
|
|
"FROM mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cpu-inference:latest\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"RUN pip install pandas numpy scikit-learn joblib\n",
|
|
|
|
|
"```"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.7 Create an MLClient instance"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from azure.ai.ml import MLClient\n",
|
|
|
|
|
"from azure.ai.ml.entities import (\n",
|
|
|
|
|
" ManagedOnlineEndpoint,\n",
|
|
|
|
|
" ManagedOnlineDeployment,\n",
|
|
|
|
|
" Model,\n",
|
|
|
|
|
" CodeConfiguration,\n",
|
|
|
|
|
" Environment,\n",
|
|
|
|
|
" BuildContext,\n",
|
|
|
|
|
" ProbeSettings,\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"from azure.identity import DefaultAzureCredential\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"credential = DefaultAzureCredential()\n",
|
|
|
|
|
"ml_client = MLClient(\n",
|
|
|
|
|
" credential,\n",
|
|
|
|
|
" subscription_id=subscription_id,\n",
|
|
|
|
|
" resource_group_name=resource_group,\n",
|
|
|
|
|
" workspace_name=workspace_name,\n",
|
|
|
|
|
")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"credential = DefaultAzureCredential()\n",
|
|
|
|
|
"ml_client = MLClient(\n",
|
|
|
|
|
" credential,\n",
|
|
|
|
|
" subscription_id=subscription_id,\n",
|
|
|
|
|
" resource_group_name=resource_group,\n",
|
|
|
|
|
" workspace_name=workspace_name,\n",
|
|
|
|
|
")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 2. Create an endpoint"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 2.1 Define and create the endpoint"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"endpoint = ManagedOnlineEndpoint(name=endpoint_name)\n",
|
|
|
|
|
"poller = ml_client.online_endpoints.begin_create_or_update(endpoint)\n",
|
|
|
|
|
"poller.wait()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 2.2 Confirm that creation was successful"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from azure.ai.ml.exceptions import DeploymentException\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"status = poller.status()\n",
|
|
|
|
|
"if status != \"Succeeded\":\n",
|
|
|
|
|
" raise DeploymentException(status)\n",
|
|
|
|
|
"else:\n",
|
|
|
|
|
" print(\"Endpoint creation succeeded\")\n",
|
|
|
|
|
" endpoint = poller.result()\n",
|
|
|
|
|
" print(endpoint)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 3. Create the deployment"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 3.1 Define the deployment"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"deployment = ManagedOnlineDeployment(\n",
|
|
|
|
|
" name=\"custom-container-multimodel\",\n",
|
|
|
|
|
" endpoint_name=endpoint_name,\n",
|
|
|
|
|
" model=Model(name=\"minimal-multimodel\", path=models_path),\n",
|
|
|
|
|
" code_configuration=CodeConfiguration(\n",
|
|
|
|
|
" code=code_path, scoring_script=\"minimal-multimodel-score.py\"\n",
|
|
|
|
|
" ),\n",
|
|
|
|
|
" environment=Environment(\n",
|
|
|
|
|
" name=\"minimal-multimodel\",\n",
|
|
|
|
|
" build=BuildContext(\n",
|
|
|
|
|
" path=base_path,\n",
|
|
|
|
|
" dockerfile_path=\"minimal-multimodel.dockerfile\",\n",
|
|
|
|
|
" ),\n",
|
|
|
|
|
" inference_config={\n",
|
|
|
|
|
" \"liveness_route\": {\"path\": \"/\", \"port\": 5001},\n",
|
|
|
|
|
" \"readiness_route\": {\"path\": \"/\", \"port\": 5001},\n",
|
|
|
|
|
" \"scoring_route\": {\"path\": \"/score\", \"port\": 5001},\n",
|
|
|
|
|
" },\n",
|
|
|
|
|
" ),\n",
|
|
|
|
|
" instance_type=\"Standard_DS3_v2\",\n",
|
|
|
|
|
" instance_count=1,\n",
|
|
|
|
|
")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 3.2 Create the deployment"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"poller = ml_client.online_deployments.begin_create_or_update(deployment)\n",
|
|
|
|
|
"poller.wait()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 3.3 Confirm that creation was successful"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"status = poller.status()\n",
|
|
|
|
|
"if status != \"Succeeded\":\n",
|
|
|
|
|
" raise DeploymentException(status)\n",
|
|
|
|
|
"else:\n",
|
|
|
|
|
" print(\"Deployment creation succeeded\")\n",
|
|
|
|
|
" deployment = poller.result()\n",
|
|
|
|
|
" print(deployment)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 3.4 Set traffic to 100% "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"endpoint.traffic = {\"custom-container-multimodel\": 100}\n",
|
|
|
|
|
"poller = ml_client.begin_create_or_update(endpoint)\n",
|
|
|
|
|
"poller.wait()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 4. Test the endpoint\n",
|
|
|
|
|
"The `model` JSON field in both JSON payloads indicates which model to score."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 4.1 Test the diabetes model"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import json\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"res = ml_client.online_endpoints.invoke(\n",
|
|
|
|
|
" endpoint_name, request_file=os.path.join(test_data_path, \"diabetes-test-data.json\")\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"print(json.loads(res))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 4.2 Test the iris model"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"res = ml_client.online_endpoints.invoke(\n",
|
|
|
|
|
" endpoint_name, request_file=os.path.join(test_data_path, \"iris-test-data.json\")\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"print(json.loads(res))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 5. Delete assets"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 5.1 Delete the endpoint"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"poller = ml_client.online_endpoints.begin_delete(name=endpoint_name)"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3.10 - SDK V2",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python310-sdkv2"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.10.6"
|
|
|
|
|
},
|
|
|
|
|
"vscode": {
|
|
|
|
|
"interpreter": {
|
|
|
|
|
"hash": "e530ce6154f972640d3e5b626ff5929e0848c7598c5ca98c96181f27d47882a4"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|
|
|
|
|