Changed azureml version + adjusted image conversion functions + improved paths for unittests + changed test wrt ACI

2019-03-28 15:49:37 -07:00 · 2019-03-28 15:49:37 -07:00 · 4f3d9d63ba
--- a/image_classification/environment.yml
+++ b/image_classification/environment.yml
@ -23,7 +23,7 @@ dependencies:
 - jupyter>=1.0.0
 - pytest>=3.6.4
 - pip:
-  - azureml-sdk[notebooks,contrib]==1.0.10
+  - azureml-sdk[notebooks,contrib]==1.0.21
  - black>=18.6b4
  - papermill>=0.15.0
  - ipywebrtc
--- a/image_classification/notebooks/deployment/01_deployment_on_azure_container_instances.ipynb
+++ b/image_classification/notebooks/deployment/01_deployment_on_azure_container_instances.ipynb
@ -114,12 +114,14 @@
    "from azureml.exceptions import ProjectSystemException, UserErrorException\n",
    "\n",
    "# Computer Vision repository\n",
-    "sys.path.append(\"../../\")\n",
+    "sys.path.extend([\"..\", \"../..\", \"../../..\"])\n",
+    "# This \"sys.path.extend()\" statement allows us to move up the directory hierarchy \n",
+    "# and access the utils_ic and utils_cv packages\n",
+    "from utils_cv.generate_deployment_env import generate_yaml\n",
+    "from utils_ic.common import ic_root_path\n",
    "from utils_ic.constants import IMAGENET_IM_SIZE\n",
-    "from utils_ic.imagenet_models import model_to_learner\n",
-    "from utils_ic.image_conversion import ims2json\n",
-    "sys.path.append(\"../../../\")\n",
-    "from utils_cv.generate_deployment_env import generate_yaml"
+    "from utils_ic.image_conversion import ims2strlist\n",
+    "from utils_ic.imagenet_models import model_to_learner"
   ]
  },
  {
@ -385,7 +387,8 @@
   "outputs": [],
   "source": [
    "# Initialize the run\n",
-    "run = experiment.start_logging()"
+    "run = experiment.start_logging(snapshot_directory=None)\n",
+    "# \"snapshot_directory=None\" prevents a snapshot from being saved -- this helps keep the amount of storage used low"
   ]
  },
  {
@ -412,7 +415,7 @@
    {
     "data": {
      "text/plain": [
-       "<azureml._restclient.models.batch_artifact_content_information_dto.BatchArtifactContentInformationDto at 0x1d3d41d5748>"
+       "<azureml._restclient.models.batch_artifact_content_information_dto.BatchArtifactContentInformationDto at 0x234476f2a20>"
      ]
     },
     "execution_count": 10,
@ -476,8 +479,8 @@
     "text": [
      "Model:\n",
      " --> Name: im_classif_resnet18\n",
-      "       --> ID: im_classif_resnet18:24\n",
-      "       --> Path:azureml-models\\im_classif_resnet18\\24\\im_classif_resnet18.pkl\n"
+      "       --> ID: im_classif_resnet18:66\n",
+      "       --> Path:azureml-models\\im_classif_resnet18\\66\\im_classif_resnet18.pkl\n"
     ]
    }
   ],
@ -566,11 +569,11 @@
    {
     "data": {
      "text/html": [
-       "<table style=\"width:100%\"><tr><th>Experiment</th><th>Id</th><th>Type</th><th>Status</th><th>Details Page</th><th>Docs Page</th></tr><tr><td>image-classifier-webservice</td><td>7188bb7c-c5a6-426e-9146-ce3c1405d57f</td><td></td><td>Completed</td><td><a href=\"https://mlworkspace.azure.ai/portal/subscriptions/b8c23406-f9b5-4ccb-8a65-a8cb5dcd6a5a/resourceGroups/alteste-rg/providers/Microsoft.MachineLearningServices/workspaces/ws2_tutorials2/experiments/image-classifier-webservice/runs/7188bb7c-c5a6-426e-9146-ce3c1405d57f\" target=\"_blank\" rel=\"noopener\">Link to Azure Portal</a></td><td><a href=\"https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.run.Run?view=azure-ml-py\" target=\"_blank\" rel=\"noopener\">Link to Documentation</a></td></tr></table>"
+       "<table style=\"width:100%\"><tr><th>Experiment</th><th>Id</th><th>Type</th><th>Status</th><th>Details Page</th><th>Docs Page</th></tr><tr><td>image-classifier-webservice</td><td>c4ab4a1f-89a6-4a44-a14f-f4b995a2e1d8</td><td></td><td>Completed</td><td><a href=\"https://mlworkspace.azure.ai/portal/subscriptions/b8c23406-f9b5-4ccb-8a65-a8cb5dcd6a5a/resourceGroups/alteste-rg/providers/Microsoft.MachineLearningServices/workspaces/ws2_tutorials2/experiments/image-classifier-webservice/runs/c4ab4a1f-89a6-4a44-a14f-f4b995a2e1d8\" target=\"_blank\" rel=\"noopener\">Link to Azure Portal</a></td><td><a href=\"https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.run.Run?view=azure-ml-py\" target=\"_blank\" rel=\"noopener\">Link to Documentation</a></td></tr></table>"
      ],
      "text/plain": [
       "Run(Experiment: image-classifier-webservice,\n",
-       "Id: 7188bb7c-c5a6-426e-9146-ce3c1405d57f,\n",
+       "Id: c4ab4a1f-89a6-4a44-a14f-f4b995a2e1d8,\n",
       "Type: None,\n",
       "Status: Completed)"
      ]
@ -710,9 +713,9 @@
    }
   ],
   "source": [
-    "# Create a deployment-specific yaml file from the image_classification/environment.yml\n",
+    "# Create a deployment-specific yaml file from image_classification/environment.yml\n",
    "generate_yaml(\n",
-    "    directory='../../', \n",
+    "    directory=ic_root_path(), \n",
    "    ref_filename='environment.yml',\n",
    "    needed_libraries=['pytorch', 'spacy', 'fastai', 'dataclasses'],\n",
    "    conda_filename='myenv.yml'\n",
@ -782,9 +785,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Running........................................................................................................................\n",
-      "SucceededImage creation operation finished for image image-classif-resnet18-f48:12, operation \"Succeeded\"\n",
-      "Wall time: 11min 26s\n"
+      "Running....................................................................................................................\n",
+      "SucceededImage creation operation finished for image image-classif-resnet18-f48:27, operation \"Succeeded\"\n",
+      "Wall time: 10min 54s\n"
     ]
    }
   ],
@ -921,7 +924,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Running..................................\n",
+      "Running.......................................\n",
      "SucceededACI service creation operation finished, operation \"Succeeded\"\n"
     ]
    }
@ -1003,7 +1006,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "A service typically expects input data to be in a JSON serializable format. Here, we use our own `ims2json()` function to transform our .jpg images into strings of bytes."
+    "A service typically expects input data to be in a JSON serializable format. Here, we use our own `ims2jstrlist()` function to transform our .jpg images into strings of bytes."
   ]
  },
  {
@ -1013,8 +1016,10 @@
   "outputs": [],
   "source": [
    "# Convert images to json object\n",
-    "images_fname_list = [os.path.join('test_images', 'im_11.jpg'), os.path.join('test_images', 'im_97.jpg')]\n",
-    "test_samples = ims2json(images_fname_list, current_directory)"
+    "images_fname_list = [os.path.join(ic_root_path(), 'notebooks', 'deployment', 'test_images', 'im_11.jpg'), \n",
+    "                     os.path.join(ic_root_path(), 'notebooks', 'deployment', 'test_images', 'im_97.jpg')]\n",
+    "im_string_list = ims2strlist(images_fname_list)\n",
+    "test_samples = json.dumps({\"data\": im_string_list})"
   ]
  },
  {
@ -1082,15 +1087,19 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "POST to url: http://20.42.37.68:80/score\n",
      "Prediction: [{\"label\": \"water_bottle\", \"probability\": \"0.8001841306686401\"}, {\"label\": \"water_bottle\", \"probability\": \"0.68577641248703\"}]\n"
     ]
    }
   ],
   "source": [
    "# Send the same test data\n",
-    "headers = {'Content-Type':'application/json'}\n",
+    "payload = {\"data\": im_string_list}\n",
+    "resp = requests.post(service.scoring_uri, json=payload)\n",
    "\n",
-    "resp = requests.post(service.scoring_uri, test_samples, headers=headers)\n",
+    "# Alternative way of sending the test data\n",
+    "# headers = {'Content-Type':'application/json'}\n",
+    "# resp = requests.post(service.scoring_uri, test_samples, headers=headers)\n",
    "\n",
    "print(f\"POST to url: {service.scoring_uri}\")\n",
    "print(f\"Prediction: {resp.text}\")"
@ -1107,9 +1116,9 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "As we discussed above, Azure Container Instances are typically used to develop and test deployments. They are typically configured with CPUs, which may be insufficient in a production environment, when many requests per second need to be served. They also require the user to [manage resources](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-container-groups#deployment), which may be complicated and time consuming. \n",
+    "As we discussed above, Azure Container Instances are typically used to develop and test deployments. They are typically configured with CPUs, which usually suffice when the number of requests per second is not too high. When working with several instances, we can configure them further by specifically [allocating CPU resources](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-container-groups#deployment) to each of them.\n",
    "\n",
-    "For production requirements, we recommend deploying models to Azure Kubernetes Service (AKS). It is a convenient infrastructure as it manages hosted Kubernetes environments, and makes it easy to deploy and manage containerized applications without container orchestration expertise.\n",
+    "For production requirements, i.e. when &gt; 100 requests per second are expected, we recommend deploying models to Azure Kubernetes Service (AKS). It is a convenient infrastructure as it manages hosted Kubernetes environments, and makes it easy to deploy and manage containerized applications without container orchestration expertise.\n",
    "\n",
    "We will see an example of this in the next notebook (to be published)."
   ]
@ -1118,7 +1127,13 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## 8. Clean up <a id=\"clean\"></a>"
+    "## 8. Clean up <a id=\"clean\"></a>\n",
+    "\n",
+    "Throughout the notebook, we used a workspace and Azure container instances.\n",
+    "\n",
+    "When we first created our workspace, 4 extra resources were automatically added to it: a [container registry](https://azure.microsoft.com/en-us/pricing/details/container-registry/), a [storage account](https://azure.microsoft.com/en-us/pricing/details/storage/blobs/), [Application Insights](https://azure.microsoft.com/en-us/pricing/details/monitor/) and a [key vault](https://azure.microsoft.com/en-us/pricing/details/key-vault/), each with its own cost. In this notebook, we also hosted our web service on container instances. Overall, assuming it took us about 1 hour to go through this notebook, and assuming that our web service was up for 30 minutes (so we could have time to test it), this tutorial cost us a few dollars.\n",
+    "\n",
+    "In order not to incur extra costs, let's now delete the resources we no longer need."
   ]
  },
  {
@ -1170,7 +1185,7 @@
   "source": [
    "### 8.C Workspace deletion <a id=\"wsdel\"></a>\n",
    "\n",
-    "When we first created our workspace, 4 extra resources were automatically added to it: a [container registry](https://azure.microsoft.com/en-us/pricing/details/container-registry/), a [storage account](https://azure.microsoft.com/en-us/pricing/details/storage/blobs/), [Application Insights](https://azure.microsoft.com/en-us/pricing/details/monitor/) and a [key vault](https://azure.microsoft.com/en-us/pricing/details/key-vault/), each with its own cost. If our goal is to continue using our workspace, we should keep it available. On the contrary, if we plan on no longer using it and its associated resources, we can delete it.\n",
+    "If our goal is to continue using our workspace, we should keep it available. On the contrary, if we plan on no longer using it and its associated resources, we can delete it.\n",
    "\n",
    "<i><b>Note:</b> Deleting the workspace will delete all the experiments, outputs, models, Docker images, deployments, etc. that we created in that workspace</i>"
   ]
--- a/image_classification/python/01_deployment_on_azure_container_instances.py
+++ b/image_classification/python/01_deployment_on_azure_container_instances.py
@ -87,13 +87,14 @@ from azureml.core.webservice import AciWebservice, Webservice
 from azureml.exceptions import ProjectSystemException, UserErrorException

 # Computer Vision repository
-sys.path.append("../../")
-from utils_ic.constants import IMAGENET_IM_SIZE
-from utils_ic.imagenet_models import model_to_learner
-from utils_ic.image_conversion import ims2json
-
-sys.path.append("../../../")
+sys.path.extend(["..", "../..", "../../.."])
+# This "sys.path.extend()" statement allows us to move up the directory hierarchy
+# and access the utils_ic and utils_cv packages
 from utils_cv.generate_deployment_env import generate_yaml
+from utils_ic.common import ic_root_path
+from utils_ic.constants import IMAGENET_IM_SIZE
+from utils_ic.image_conversion import ims2strlist
+from utils_ic.imagenet_models import model_to_learner


 # ## 4. Azure workspace <a id="workspace"></a>
@ -253,7 +254,8 @@ print(


 # Initialize the run
-run = experiment.start_logging()
+run = experiment.start_logging(snapshot_directory=None)
+# "snapshot_directory=None" prevents a snapshot from being saved -- this helps keep the amount of storage used low


 # Now that we have launched our run, we can see our experiment on the Azure portal, under `Experiments` (in the left-hand side list).
@ -374,9 +376,9 @@ get_ipython().run_cell_magic(
 # In[19]:


-# Create a deployment-specific yaml file from the image_classification/environment.yml
+# Create a deployment-specific yaml file from image_classification/environment.yml
 generate_yaml(
-    directory="../../",
+    directory=ic_root_path(),
    ref_filename="environment.yml",
    needed_libraries=["pytorch", "spacy", "fastai", "dataclasses"],
    conda_filename="myenv.yml",
@ -544,17 +546,22 @@ print(

 # ### 7.A Using the `run` API <a id="api"></a>

-# A service typically expects input data to be in a JSON serializable format. Here, we use our own `ims2json()` function to transform our .jpg images into strings of bytes.
+# A service typically expects input data to be in a JSON serializable format. Here, we use our own `ims2jstrlist()` function to transform our .jpg images into strings of bytes.

 # In[28]:


 # Convert images to json object
 images_fname_list = [
-    os.path.join("test_images", "im_11.jpg"),
-    os.path.join("test_images", "im_97.jpg"),
+    os.path.join(
+        ic_root_path(), "notebooks", "deployment", "test_images", "im_11.jpg"
+    ),
+    os.path.join(
+        ic_root_path(), "notebooks", "deployment", "test_images", "im_97.jpg"
+    ),
 ]
-test_samples = ims2json(images_fname_list, current_directory)
+im_string_list = ims2strlist(images_fname_list)
+test_samples = json.dumps({"data": im_string_list})


 # In[29]:
@ -584,9 +591,12 @@ for k in range(len(result)):


 # Send the same test data
-headers = {"Content-Type": "application/json"}
+payload = {"data": im_string_list}
+resp = requests.post(service.scoring_uri, json=payload)

-resp = requests.post(service.scoring_uri, test_samples, headers=headers)
+# Alternative way of sending the test data
+# headers = {'Content-Type':'application/json'}
+# resp = requests.post(service.scoring_uri, test_samples, headers=headers)

 print(f"POST to url: {service.scoring_uri}")
 print(f"Prediction: {resp.text}")
@ -594,13 +604,19 @@ print(f"Prediction: {resp.text}")

 # ### 7.C Notes on web service deployment <a id="notes"></a>

-# As we discussed above, Azure Container Instances are typically used to develop and test deployments. They are typically configured with CPUs, which may be insufficient in a production environment, when many requests per second need to be served. They also require the user to [manage resources](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-container-groups#deployment), which may be complicated and time consuming.
+# As we discussed above, Azure Container Instances are typically used to develop and test deployments. They are typically configured with CPUs, which usually suffice when the number of requests per second is not too high. When working with several instances, we can configure them further by specifically [allocating CPU resources](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-container-groups#deployment) to each of them.
 #
-# For production requirements, we recommend deploying models to Azure Kubernetes Service (AKS). It is a convenient infrastructure as it manages hosted Kubernetes environments, and makes it easy to deploy and manage containerized applications without container orchestration expertise.
+# For production requirements, i.e. when &gt; 100 requests per second are expected, we recommend deploying models to Azure Kubernetes Service (AKS). It is a convenient infrastructure as it manages hosted Kubernetes environments, and makes it easy to deploy and manage containerized applications without container orchestration expertise.
 #
 # We will see an example of this in the next notebook (to be published).

 # ## 8. Clean up <a id="clean"></a>
+#
+# Throughout the notebook, we used a workspace and Azure container instances.
+#
+# When we first created our workspace, 4 extra resources were automatically added to it: a [container registry](https://azure.microsoft.com/en-us/pricing/details/container-registry/), a [storage account](https://azure.microsoft.com/en-us/pricing/details/storage/blobs/), [Application Insights](https://azure.microsoft.com/en-us/pricing/details/monitor/) and a [key vault](https://azure.microsoft.com/en-us/pricing/details/key-vault/), each with its own cost. In this notebook, we also hosted our web service on container instances. Overall, assuming it took us about 1 hour to go through this notebook, and assuming that our web service was up for 30 minutes (so we could have time to test it), this tutorial cost us a few dollars.
+#
+# In order not to incur extra costs, let's now delete the resources we no longer need.

 # ### 8.A Service termination <a id="svcterm"></a>
 #
@ -626,7 +642,7 @@ service.delete()

 # ### 8.C Workspace deletion <a id="wsdel"></a>
 #
-# When we first created our workspace, 4 extra resources were automatically added to it: a [container registry](https://azure.microsoft.com/en-us/pricing/details/container-registry/), a [storage account](https://azure.microsoft.com/en-us/pricing/details/storage/blobs/), [Application Insights](https://azure.microsoft.com/en-us/pricing/details/monitor/) and a [key vault](https://azure.microsoft.com/en-us/pricing/details/key-vault/), each with its own cost. If our goal is to continue using our workspace, we should keep it available. On the contrary, if we plan on no longer using it and its associated resources, we can delete it.
+# If our goal is to continue using our workspace, we should keep it available. On the contrary, if we plan on no longer using it and its associated resources, we can delete it.
 #
 # <i><b>Note:</b> Deleting the workspace will delete all the experiments, outputs, models, Docker images, deployments, etc. that we created in that workspace</i>

--- a/image_classification/tests/unit/test_image_conversion.py
+++ b/image_classification/tests/unit/test_image_conversion.py
@ -4,24 +4,25 @@
 import os

 from utils_ic.datasets import Urls, unzip_url
-from utils_ic.image_conversion import im2base64, ims2json
+from utils_ic.image_conversion import im2base64, ims2strlist


-def test_im2base64():
+def test_ims2strlist():
    """ Tests extraction of image content and conversion into string"""
    data_path = unzip_url(Urls.fridge_objects_path, exist_ok=True)
    im_list = [
-        os.path.join("can", "im_1.jpg"),
-        os.path.join("carton", "im_62.jpg"),
+        os.path.join(data_path, "can", "im_1.jpg"),
+        os.path.join(data_path, "carton", "im_62.jpg"),
    ]
-    input_to_service = ims2json(im_list, data_path)
-    assert isinstance(input_to_service, str)
-    assert input_to_service[0:11] == '{"data": ["'
+    im_string_list = ims2strlist(im_list)
+    # input_to_service = json.dumps({"data": im_string_list})
+    assert isinstance(im_string_list, list)
+    # assert input_to_service[0:11] == '{"data": ["'


-def test_ims2json():
+def test_im2base64():
    """ Tests extraction of image content and conversion into bytes"""
    data_path = unzip_url(Urls.fridge_objects_path, exist_ok=True)
-    im_name = os.path.join("can", "im_1.jpg")
-    im_content = im2base64(im_name, data_path)
+    im_name = os.path.join(data_path, "can", "im_1.jpg")
+    im_content = im2base64(im_name)
    assert isinstance(im_content, bytes)
--- a/image_classification/tests/unit/test_notebooks.py
+++ b/image_classification/tests/unit/test_notebooks.py
@ -2,7 +2,10 @@
 # https://github.com/Microsoft/Recommenders/tree/master/tests


+import glob
+import os
 import papermill as pm
+import shutil
 from utils_ic.datasets import Urls, unzip_url

 # Unless manually modified, python3 should be
@ -42,6 +45,32 @@ def test_deploy_1_notebook_run(notebooks):
        OUTPUT_NOTEBOOK,
        parameters=dict(
            PM_VERSION=pm.__version__,
-            DATA_PATH=unzip_url(Urls.fridge_objects_path, exist_ok=True)),
+            DATA_PATH=unzip_url(Urls.fridge_objects_path, exist_ok=True),
+        ),
        kernel_name=KERNEL_NAME,
    )
+    try:
+        os.remove("myenv.yml")
+    except OSError:
+        pass
+    try:
+        os.remove("score.py")
+    except OSError:
+        pass
+
+    try:
+        os.remove("output.ipynb")
+    except OSError:
+        pass
+
+    # There should be only one file, but the name may be changed
+    file_list = glob.glob("./*.pkl")
+    for filePath in file_list:
+        try:
+            os.remove(filePath)
+        except OSError:
+            pass
+
+    shutil.rmtree(os.path.join(os.getcwd(), "azureml-models"))
+    shutil.rmtree(os.path.join(os.getcwd(), "models"))
+    shutil.rmtree(os.path.join(os.getcwd(), "outputs"))
--- a/image_classification/utils_ic/image_conversion.py
+++ b/image_classification/utils_ic/image_conversion.py
@ -2,24 +2,25 @@
 # Licensed under the MIT License.

 # python regular libraries
-import json
-import os
+# import json
+# import os
+from pathlib import Path
+from typing import Union

 from base64 import b64encode


-def im2base64(im_name: str, im_dir: str) -> bytes:
+def im2base64(im_path: Union[Path, str]) -> bytes:
    """

    Args:
-        im_name (string): Image file name
-        im_dir (string): Image directory name
+        im_path (string): Path to the image

    Returns: im_bytes

    """

-    with open(os.path.join(im_dir, im_name), "rb") as image:
+    with open(im_path, "rb") as image:
        # Extract image bytes
        im_content = image.read()
        # Convert bytes into a string
@ -28,22 +29,19 @@ def im2base64(im_name: str, im_dir: str) -> bytes:
    return im_bytes


-def ims2json(im_list: list, im_dir: str) -> json:
+def ims2strlist(im_path_list: list) -> list:
    """

    Args:
-        im_list (list of strings): List of image file names
-        im_dir (string): Directory name
+        im_path_list (list of strings): List of image paths

-    Returns: input_to_service: String containing the based64-encoded images
+    Returns: im_string_list: List containing based64-encoded images
    decoded into strings

    """

    im_string_list = []
-    for im_name in im_list:
-        im_string_list.append(im2base64(im_name, im_dir).decode("utf-8"))
+    for im_path in im_path_list:
+        im_string_list.append(im2base64(im_path).decode("utf-8"))

-    input_to_service = json.dumps({"data": im_string_list})
-
-    return input_to_service
+    return im_string_list
--- a/tests/unit/init.py
+++ b/tests/unit/init.py
--- a/tests/unit/empty.txt
+++ b/tests/unit/empty.txt
--- a/tests/unit/test_generate_deployment_env.py
+++ b/tests/unit/test_generate_deployment_env.py
@ -4,16 +4,16 @@
 import os
 import sys

-sys.path.append("../../")
-
+sys.path.extend([".", "..", "../..", "../../.."])
 from utils_cv.generate_deployment_env import generate_yaml
+from utils_ic.common import ic_root_path


 def test_generate_yaml():
    """Tests creation of deployment-specific yaml file
    from existing image_classification/environment.yml"""
    generate_yaml(
-        directory="../../image_classification",
+        directory=ic_root_path(),
        ref_filename="environment.yml",
        needed_libraries=["fastai", "pytorch"],
        conda_filename="mytestyml.yml",