[#167] remove wildcard imports (#218)

* remove wildcard 01 notebook and sweep.py tool * remove wildcards for multilabel notebook * 03 notebook wildcard removal * deployment on aci wildcard removal * sweeper remove wildcard * web testing notebook remove wildcard * clear notebook output * only change import cells in notebook 21 * update flake8 to not allow wildcard imports
2019-06-06 19:13:40 -04:00 · 2019-06-06 19:13:40 -04:00 · af937b45e5
--- a/.flake8
+++ b/.flake8
@ -6,11 +6,10 @@
 # E266	Too many leading '#' for block comment
 # E501	Line too long (82 > 79 characters)
 # W503	Line break occurred before a binary operator
-# F403	'from module import *' used; unable to detect undefined names
 # F405  '<function>' may be undefined, or defined from star imports
 # E402  module level import not at top of file
 # E731  do not assign a lambda expression, use a def
 # F821  undefined name 'get_ipython' --> from generated python files using nbconvert

-ignore = E203, E266, E501, W503, F403, F405, E402, E731, F821
+ignore = E203, E266, E501, W503, F405, E402, E731, F821
 max-line-length = 79
--- a/.gitignore
+++ b/.gitignore
@ -129,6 +129,7 @@ data
 *.pkl

 # aml notebooks outputs
+*/notebooks/.azureml/
 */notebooks/aml_config*
 */notebooks/azureml-models
 */notebooks/myenv.yml
--- a/classification/notebooks/01_training_introduction.ipynb
+++ b/classification/notebooks/01_training_introduction.ipynb
--- a/classification/notebooks/02_multilabel_classification.ipynb
+++ b/classification/notebooks/02_multilabel_classification.ipynb
--- a/classification/notebooks/03_training_accuracy_vs_speed.ipynb
+++ b/classification/notebooks/03_training_accuracy_vs_speed.ipynb
@ -102,7 +102,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Import `fastai`. For now, we'll import all (`import *`) so that we can easily use different utilities provided by the fast.ai library."
+    "Import all the functions we need."
   ]
  },
  {
@ -118,8 +118,10 @@
    "from utils_cv.classification.data import Urls, is_data_multilabel\n",
    "from utils_cv.common.data import unzip_url\n",
    "from utils_cv.classification.model import hamming_accuracy\n",
-    "from fastai.vision import *\n",
-    "from fastai.metrics import accuracy"
+    "from fastai.metrics import accuracy\n",
+    "from fastai.vision import (\n",
+    "    models, ImageList, imagenet_stats, cnn_learner, get_transforms, open_image\n",
+    ")"
   ]
  },
  {
@ -223,7 +225,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -269,7 +271,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@ -286,7 +288,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@ -305,18 +307,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Downloading: \"https://download.pytorch.org/models/resnet18-5c106cde.pth\" to C:\\Users\\jehrling/.torch\\models\\resnet18-5c106cde.pth\n",
-      "100%|████████████████████████████████████████████████████████████████| 46827520/46827520 [00:02<00:00, 22633424.82it/s]\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "learn = cnn_learner(data, ARCHITECTURE, metrics=metric)"
   ]
@ -330,7 +323,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
@ -349,31 +342,31 @@
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
-       "      <td>1.869025</td>\n",
-       "      <td>1.423374</td>\n",
-       "      <td>0.423077</td>\n",
-       "      <td>04:04</td>\n",
+       "      <td>2.058926</td>\n",
+       "      <td>1.514164</td>\n",
+       "      <td>0.269231</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
-       "      <td>1.818973</td>\n",
-       "      <td>1.426767</td>\n",
-       "      <td>0.230769</td>\n",
-       "      <td>02:27</td>\n",
+       "      <td>1.965364</td>\n",
+       "      <td>1.429903</td>\n",
+       "      <td>0.346154</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
-       "      <td>1.738750</td>\n",
-       "      <td>1.409257</td>\n",
-       "      <td>0.307692</td>\n",
-       "      <td>02:39</td>\n",
+       "      <td>1.909731</td>\n",
+       "      <td>1.397431</td>\n",
+       "      <td>0.269231</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
-       "      <td>1.712961</td>\n",
-       "      <td>1.407489</td>\n",
-       "      <td>0.192308</td>\n",
-       "      <td>02:43</td>\n",
+       "      <td>1.835540</td>\n",
+       "      <td>1.414460</td>\n",
+       "      <td>0.269231</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
@ -399,7 +392,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
@ -415,7 +408,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@ -434,87 +427,87 @@
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
-       "      <td>1.407487</td>\n",
-       "      <td>1.343539</td>\n",
-       "      <td>0.230769</td>\n",
-       "      <td>03:20</td>\n",
+       "      <td>1.746515</td>\n",
+       "      <td>1.363381</td>\n",
+       "      <td>0.346154</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
-       "      <td>1.415772</td>\n",
-       "      <td>1.165356</td>\n",
-       "      <td>0.384615</td>\n",
-       "      <td>03:11</td>\n",
+       "      <td>1.634118</td>\n",
+       "      <td>1.201214</td>\n",
+       "      <td>0.346154</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
-       "      <td>1.353353</td>\n",
-       "      <td>0.818305</td>\n",
-       "      <td>0.807692</td>\n",
-       "      <td>02:50</td>\n",
+       "      <td>1.531497</td>\n",
+       "      <td>0.884183</td>\n",
+       "      <td>0.730769</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
-       "      <td>1.167449</td>\n",
-       "      <td>0.554827</td>\n",
-       "      <td>0.884615</td>\n",
-       "      <td>02:49</td>\n",
+       "      <td>1.317602</td>\n",
+       "      <td>0.626355</td>\n",
+       "      <td>0.807692</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
-       "      <td>1.021737</td>\n",
-       "      <td>0.375777</td>\n",
-       "      <td>0.923077</td>\n",
-       "      <td>03:09</td>\n",
+       "      <td>1.158468</td>\n",
+       "      <td>0.490393</td>\n",
+       "      <td>0.884615</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
-       "      <td>0.890174</td>\n",
-       "      <td>0.290593</td>\n",
-       "      <td>0.923077</td>\n",
-       "      <td>03:37</td>\n",
+       "      <td>0.998016</td>\n",
+       "      <td>0.432764</td>\n",
+       "      <td>0.884615</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
-       "      <td>0.771805</td>\n",
-       "      <td>0.256439</td>\n",
-       "      <td>0.961538</td>\n",
-       "      <td>02:54</td>\n",
+       "      <td>0.889387</td>\n",
+       "      <td>0.369686</td>\n",
+       "      <td>0.923077</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
-       "      <td>0.685205</td>\n",
-       "      <td>0.245022</td>\n",
-       "      <td>0.961538</td>\n",
-       "      <td>02:54</td>\n",
+       "      <td>0.797426</td>\n",
+       "      <td>0.307707</td>\n",
+       "      <td>0.923077</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
-       "      <td>0.601584</td>\n",
-       "      <td>0.237433</td>\n",
-       "      <td>0.961538</td>\n",
-       "      <td>03:07</td>\n",
+       "      <td>0.705103</td>\n",
+       "      <td>0.292405</td>\n",
+       "      <td>0.923077</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
-       "      <td>0.535286</td>\n",
-       "      <td>0.239410</td>\n",
+       "      <td>0.630186</td>\n",
+       "      <td>0.280770</td>\n",
       "      <td>0.961538</td>\n",
-       "      <td>03:49</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
-       "      <td>0.475175</td>\n",
-       "      <td>0.237056</td>\n",
+       "      <td>0.566886</td>\n",
+       "      <td>0.273385</td>\n",
       "      <td>0.961538</td>\n",
-       "      <td>03:54</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
-       "      <td>0.440129</td>\n",
-       "      <td>0.237084</td>\n",
+       "      <td>0.519313</td>\n",
+       "      <td>0.274469</td>\n",
       "      <td>0.961538</td>\n",
-       "      <td>02:51</td>\n",
+       "      <td>00:01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
@ -558,7 +551,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
@ -585,7 +578,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
@ -595,14 +588,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "419 ms ± 10 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+      "12.3 ms ± 98.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
@ -622,7 +615,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
@ -631,7 +624,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
@ -692,7 +685,7 @@
    "\n",
    "```python\n",
    "import pandas as pd\n",
-    "from utils_ic.parameter_sweeper import add_value_labels\n",
+    "from utils_cv.classification.parameter_sweeper import add_value_labels\n",
    "%matplotlib inline\n",
    "\n",
    "df = pd.DataFrame({\n",
@ -925,7 +918,7 @@
    "\n",
    "```python\n",
    "import pandas as pd\n",
-    "from utils_ic.parameter_sweeper import add_value_labels\n",
+    "from utils_cv.classification.parameter_sweeper import add_value_labels\n",
    "%matplotlib inline\n",
    "\n",
    "df = pd.DataFrame({\n",
@ -1022,7 +1015,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "cvbp",
+   "display_name": "Python (cvbp)",
   "language": "python",
   "name": "cvbp"
  },
--- a/classification/notebooks/21_deployment_on_azure_container_instances.ipynb
+++ b/classification/notebooks/21_deployment_on_azure_container_instances.ipynb
@ -71,8 +71,7 @@
    "import sys\n",
    "\n",
    "# fast.ai\n",
-    "from fastai.vision import *\n",
-    "import torchvision.models as models\n",
+    "from fastai.vision import models\n",
    "\n",
    "# Azure\n",
    "import azureml.core\n",
@ -501,13 +500,14 @@
    "# Copyright (c) Microsoft. All rights reserved.\n",
    "# Licensed under the MIT license.\n",
    "\n",
+    "import os\n",
    "import json\n",
    "\n",
    "from base64 import b64decode\n",
    "from io import BytesIO\n",
    "\n",
    "from azureml.core.model import Model\n",
-    "from fastai.vision import *\n",
+    "from fastai.vision import load_learner, open_image\n",
    "\n",
    "def init():\n",
    "    global model\n",
@ -531,7 +531,7 @@
    "            result.append({\"label\": str(pred_class), \"probability\": str(outputs[pred_idx].item())})\n",
    "        except Exception as e:\n",
    "            result.append({\"label\": str(e), \"probability\": ''})\n",
-    "    return result\n"
+    "    return result"
   ]
  },
  {
--- a/classification/notebooks/23_aci_aks_web_service_testing.ipynb
+++ b/classification/notebooks/23_aci_aks_web_service_testing.ipynb
@ -61,12 +61,13 @@
    "\n",
    "# Regular python libraries\n",
    "import inspect\n",
+    "import json\n",
    "import os\n",
    "import requests\n",
    "import sys\n",
    "\n",
    "# fast.ai\n",
-    "from fastai.vision import *\n",
+    "from fastai.vision import open_image\n",
    "\n",
    "# Azure\n",
    "import azureml.core\n",
--- a/classification/python/01_deployment_on_azure_container_instances.py
+++ b/classification/python/01_deployment_on_azure_container_instances.py
@ -1,668 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-# <i>Copyright (c) Microsoft Corporation. All rights reserved.</i>
-#
-# <i>Licensed under the MIT License.</i>
-#
-# # Deployment of a model as a service with Azure Container Instances
-
-# ## Table of contents <a id="table_of_content"></a>
-#
-# 1. [Introduction](#intro)
-# 1. [Pre-requisites](#pre-reqs)
-# 1. [Library import](#libraries)
-# 1. [Azure workspace](#workspace)
-#   1. [SDK version](#sdk)
-#   1. [Workspace creation](#ws)
-# 1. [Model retrieval and export](#model)
-# 1. [Model deployment on Azure](#deploy)
-#   1. [Model registration](#register)
-#     1. [Without experiment](#noexp)
-#     1. [With an experiment](#exp)
-#   1. [Scoring script](#scoring)
-#   1. [Environment setup](#env)
-#   1. [Computational resources](#compute)
-#   1. [Web service deployment](#websvc)
-# 1. [Testing of the web service](#test)
-#   1. [Using the run API](#api)
-#   1. [Via a raw HTTP request](#http)
-#   1. [Notes on web service deployment](#notes)
-# 1. [Clean-up](#clean)
-#   1. [Service termination](#svcterm)
-#   1. [Image deletion](#imdel)
-#   1. [Workspace deletion](#wsdel)
-# 1. [Next steps](#next-steps)
-
-# ## 1. Introduction <a id="intro"></a>
-#
-# Building a machine learning model with high precision and/or recall is very satisfying. However, it is not necessarily the end of the story. This model may need to go into production to be called in real time, and serve results to our end users. How do we go about doing that? In this notebook, we will learn:
-# - how to register a model on Azure
-# - how to create a Docker image that contains our model
-# - how to deploy a web service on [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/) using this Docker image
-# - how to test that our service works well, from within the notebook.
-#
-# <img src="media/ACI_diagram_2.jpg" width="500" style="float: left;" alt="Web service deployment workflow">
-
-# ## 2. Pre-requisites
-# <a id="pre-reqs"></a>
-#
-# For this notebook to run properly on our machine, the following should already be in place:
-#
-# * Local machine setup
-#   * We need to set up the "cvbp" conda environment. [These instructions](https://github.com/Microsoft/ComputerVisionBestPractices/blob/staging/image_classification/README.md) explain how to do that.
-#
-#
-# * Azure subscription setup
-#   * We also need an account on the Azure platform. If we do not have one, we first need to:
-#     * [Create an account](https://azure.microsoft.com/en-us/free/services/machine-learning/)
-#     * [Add a subscription](https://ms.portal.azure.com/#blade/Microsoft_Azure_Billing/SubscriptionsBlade) -- _We can start with a free one_
-#     * [Add a resource group](https://ms.portal.azure.com/#blade/HubsExtension/Resources/resourceType/Microsoft.Resources%2Fsubscriptions%2FresourceGroups)
-
-# ## 3. Library import <a id="libraries"></a>
-# Throughout this notebook, we will be using a variety of libraries. We are listing them here for better readibility.
-
-# In[1]:
-
-
-# For automatic reloading of modified libraries
-get_ipython().run_line_magic("reload_ext", "autoreload")
-get_ipython().run_line_magic("autoreload", "2")
-
-# Regular python libraries
-import os
-import requests
-import sys
-
-# fast.ai
-from fastai.vision import *
-import torchvision.models as models
-
-# Azure
-import azureml.core
-from azureml.core import Experiment, Workspace
-from azureml.core.image import ContainerImage
-from azureml.core.model import Model
-from azureml.core.webservice import AciWebservice, Webservice
-from azureml.exceptions import ProjectSystemException, UserErrorException
-
-# Computer Vision repository
-sys.path.extend(["..", "../..", "../../.."])
-# This "sys.path.extend()" statement allows us to move up the directory hierarchy
-# and access the utils_ic and utils_cv packages
-from utils_cv.generate_deployment_env import generate_yaml
-from utils_ic.common import ic_root_path
-from utils_ic.constants import IMAGENET_IM_SIZE
-from utils_ic.image_conversion import ims2strlist
-from utils_ic.imagenet_models import model_to_learner
-
-
-# ## 4. Azure workspace <a id="workspace"></a>
-
-# ### 4.A SDK version <a id="sdk"></a>
-#
-# Before we start, let's check which version of the Azure SDK we are working with.
-
-# In[2]:
-
-
-# Check core SDK version number
-print(f"Azure ML SDK Version: {azureml.core.VERSION}")
-
-
-# ### 4.B Workspace creation <a id="ws"></a>
-# Now that we have our environment and proper libraries in place, let's load an existing workspace or create a new one on our Azure account, and save it to a local configuration file (`./aml_config/config.json`).
-#
-# If it is the first time we create a workspace, or if we are missing our `config.json` file, we need to provide the appropriate:
-# - subscription ID: the ID of the Azure subscription we are using
-# - resource group: the name of the resource group in which our workspace resides
-# - workspace_region: the geographical area in which our workspace resides (examples are available [here](https://azure.microsoft.com/en-us/global-infrastructure/geographies/))
-# - workspace_name: the name of the workspace we want to create or retrieve.
-
-# In[3]:
-
-
-# Let's define these variables here - These pieces of information can be found on the portal
-subscription_id = os.getenv("SUBSCRIPTION_ID", default="<our_subscription_id>")
-resource_group = os.getenv("RESOURCE_GROUP", default="<our_resource_group>")
-workspace_name = os.getenv(
-    "WORKSPACE_NAME", default="<our_workspace_name>"
-)  # (e.g. "myworkspace")
-workspace_region = os.getenv(
-    "WORKSPACE_REGION", default="<our_workspace_region>"
-)  # (e.g. "westus2")
-
-try:
-    # Let's load the workspace from the configuration file
-    ws = Workspace.from_config()
-    print("Workspace was loaded successfully from the configuration file")
-except (UserErrorException, ProjectSystemException):
-    # or directly from Azure, if it already exists (exist_ok=True).
-    # If it does not exist, let's create a workspace from scratch
-    ws = Workspace.create(
-        name=workspace_name,
-        subscription_id=subscription_id,
-        resource_group=resource_group,
-        location=workspace_region,
-        create_resource_group=True,
-        exist_ok=True,
-    )
-    ws.write_config()
-    print("Workspace was loaded successfully from Azure")
-
-
-# Let's check that the workspace is properly loaded
-
-# In[4]:
-
-
-# Print the workspace attributes
-print(
-    f"Workspace name: {ws.name}\n       Azure region: {ws.location}\n       Subscription id: {ws.subscription_id}\n       Resource group: {ws.resource_group}"
-)
-
-
-# We can see this workspace on the Azure portal by sequentially clicking on:
-# - Resource groups, and clicking the one we referenced above
-
-# <img src="media/resource_group.jpg" width="800" alt="Azure portal view of resource group">
-
-# - Workspace_name
-
-# <img src="media/workspace.jpg" width="800" alt="Azure portal view of workspace">
-
-# ## 5. Model retrieval and export <a id="model"></a>
-#
-# For demonstration purposes, we will use here a ResNet18 model, pretrained on ImageNet. The following steps would be the same if we had trained a model locally (cf. [**01_training_introduction.ipynb**](https://github.com/Microsoft/ComputerVisionBestPractices/blob/staging/image_classification/notebooks/01_training_introduction.ipynb) notebook for details).
-#
-# Let's first retrieve the model.
-
-# In[5]:
-
-
-learn = model_to_learner(models.resnet18(pretrained=True), IMAGENET_IM_SIZE)
-
-
-# To be able to use this model, we need to export it to our local machine. We store it in an `outputs/` subfolder.
-
-# In[6]:
-
-
-current_directory = os.getcwd()
-output_folder = os.path.join(current_directory, "outputs")
-MODEL_NAME = (
-    "im_classif_resnet18"
-)  # Name we will give our model both locally and on Azure
-PICKLED_MODEL_NAME = MODEL_NAME + ".pkl"
-os.makedirs(output_folder, exist_ok=True)
-
-learn.export(os.path.join(output_folder, PICKLED_MODEL_NAME))
-
-
-# ## 6. Model deployment on Azure <a id="deploy"></a>
-
-# ### 6.A Model registration <a id="register"></a>
-#
-# Our final goal is to deploy our model as a web service. To do so, we need to first register it in our workspace, i.e. place it in our workspace's model registry. We can do this in 2 ways:
-# 1. register the model directly
-# 2. upload the model on Azure and then register it there.
-#
-# The advantage of the first method is that it does not require the setup of an experiment or of any runs. The advantage of the second fashion is that we can keep track of the models that we used or trained in a given experiment, and understand where the ones we ended up registering come from.
-#
-# The cells below show each of the methods.
-
-# #### 6.A.a Without experiment <a id="noexp"></a>
-#
-# We leverage the `register` method from the Azure ML `Model` object. For that, we just need the location of the model we saved on our local machine, its name and our workspace object.
-
-# In[7]:
-
-
-model = Model.register(
-    model_path=os.path.join("outputs", PICKLED_MODEL_NAME),
-    model_name=MODEL_NAME,
-    tags={"Model": "Pretrained ResNet18"},
-    description="Image classifier",
-    workspace=ws,
-)
-
-
-# #### 6.A.b With an experiment <a id="exp"></a>
-#
-# An experiment contains a series of trials called `Runs`. A run typically contains some tasks, such as training a model, etc. Through a run's methods, we can log several metrics such as training and test loss and accuracy, and even tag our run. The full description of the run class is available [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.run.run?view=azure-ml-py). In our case, however, we just need the run to attach our model file to our workspace and experiment.
-#
-# We do this by using `run.upload_file()` and `run.register_model()`, which takes:
-# - a `model_name` that represents what our model does
-# - and the `model_path` relative to the run.
-#
-# Using `run.upload_file()` and specifying the `outputs/` folder allows us to check the presence of the uploaded model on the Azure portal. This is especially convenient when we want to try different versions of a model, or even different models entirely, and keep track of them all, even if we end up registering only the best performing one.
-#
-# Let's first create a new experiment. If an experiment with the same name already exists in our workspace, the run we will generate will be recorded under that already existing experiment.
-
-# In[8]:
-
-
-# Create a new/Retrieve an existing experiment
-experiment_name = "image-classifier-webservice"
-experiment = Experiment(workspace=ws, name=experiment_name)
-print(
-    f"New/Existing experiment:\n       --> Name: {experiment.name}\n       --> Workspace name: {experiment.workspace.name}"
-)
-
-
-# In[9]:
-
-
-# Initialize the run
-run = experiment.start_logging(snapshot_directory=None)
-# "snapshot_directory=None" prevents a snapshot from being saved -- this helps keep the amount of storage used low
-
-
-# Now that we have launched our run, we can see our experiment on the Azure portal, under `Experiments` (in the left-hand side list).
-#
-# <img src="media/experiment.jpg" width="800" alt="Azure portal view of experiment">
-
-# We can now attach our local model to our workspace and experiment.
-
-# In[10]:
-
-
-# Upload the model (.pkl) file to Azure
-run.upload_file(
-    name=os.path.join("outputs", PICKLED_MODEL_NAME),
-    path_or_stream=os.path.join(
-        current_directory, "outputs", PICKLED_MODEL_NAME
-    ),
-)
-
-
-# In[11]:
-
-
-# Register the model with the workspace
-model = run.register_model(
-    model_name=MODEL_NAME,
-    model_path=os.path.join("outputs", PICKLED_MODEL_NAME),
-    tags={"Model": "Pretrained ResNet18"},
-)
-# !!! We need to make sure that the model name we use here is the same as in the scoring script below !!!
-
-
-# Now that the model is uploaded and registered, we can see it on the Azure platform, under `Outputs` and `Models`
-#
-# <div class="inline-block">
-#     <img src="media/uploaded_model.jpg" width="800" alt="Azure portal view of the Outputs/ folder">
-# </div>
-#
-# <div class="inline-block">
-#     <img src="media/models.jpg" width="800" alt="Azure portal view of the Models section">
-# </div>
-
-# We can also check that it is programatically accessible
-
-# In[12]:
-
-
-print(
-    f"Model:\n --> Name: {model.name}\n       --> ID: {model.id}\n       --> Path:{model._get_model_path_remote(model.name, model.version, ws)}"
-)
-
-
-# In[13]:
-
-
-run.get_file_names()
-
-
-# If we are also interested in verifying which model we uploaded, we can download it to our local machine
-
-# In[14]:
-
-
-model.download()
-
-
-# <i><b>Note:</b> If we ran the cells in both the "with an experiment" and "without experiment" sections, we got 2 iterations of the same model registered on Azure. This is not a problem as any operation that we perform on the "model" object, later on, will be associated with the latest version of the model that we registered. To clean things up, we can go to the portal, select the model we do not want and click the "Delete" button. In general, we would register the model using only one of these 2 methods. </i>
-
-# We are all done with our model registration, so we can close our run.
-
-# In[15]:
-
-
-# Close the run
-run.complete()
-
-
-# In[16]:
-
-
-# Access the portal
-run
-
-
-# ### 6.B Scoring script <a id="scoring"></a>
-# For the web service to return predictions on a given input image, we need to provide it with instructions on how to use the model we just registered. These instructions are stored in the scoring script.
-#
-# This script must contain two required functions, `init()` and `run(input_data)`:
-# - In the `init()` function, we typically load the model into a global object. This function is executed only once when the Docker container is started.
-# - In the `run(input_data)` function, the model is used to predict a value based on the input data. The input and output of `run` typically use JSON as serialization and de-serialization format but we are not limited to that.
-#
-# <i><b>Note:</b> The "run()" function here is different from the "run" object we created in our experiment</i>
-#
-# This file must also be stored in the current directory.
-
-# In[17]:
-
-
-scoring_script = "score.py"
-
-
-# In[18]:
-
-
-get_ipython().run_cell_magic(
-    "writefile",
-    "$scoring_script",
-    '# Copyright (c) Microsoft. All rights reserved.\n# Licensed under the MIT license.\n\nimport json\n\nfrom base64 import b64decode\nfrom io import BytesIO\n\nfrom azureml.core.model import Model\nfrom fastai.vision import *\n\ndef init():\n    global model\n    model_path = Model.get_model_path(model_name=\'im_classif_resnet18\')\n    # ! We cannot use MODEL_NAME here otherwise the execution on Azure will fail !\n    \n    model_dir_path, model_filename = os.path.split(model_path)\n    model = load_learner(path=model_dir_path, fname=model_filename)\n\n\ndef run(raw_data):\n\n    # Expects raw_data to be a list within a json file\n    result = []    \n    \n    for im_string in json.loads(raw_data)[\'data\']:\n        im_bytes = b64decode(im_string)\n        try:\n            im = open_image(BytesIO(im_bytes))\n            pred_class, pred_idx, outputs = model.predict(im)\n            result.append({"label": str(pred_class), "probability": str(outputs[pred_idx].item())})\n        except Exception as e:\n            result.append({"label": str(e), "probability": \'\'})\n    return result',
-)
-
-
-# ### 6.C Environment setup <a id="env"></a>
-#
-# In order to make predictions on the Azure platform, it is important to create an environment as similar as possible to the one in which the model was trained. Here, we use a fast.ai pretrained model that also requires pytorch and a few other libraries. To re-create this environment, we use a [Docker container](https://www.docker.com/resources/what-container). We configure it via a yaml file that will contain all the conda dependencies needed by the model. This yaml file is a subset of  `image_classification/environment.yml`.
-#
-# <i><b>Note:</b> If we had trained our model locally, we would have created a yaml file that contains the same libraries as what is installed on our local machine.</i>
-
-# In[19]:
-
-
-# Create a deployment-specific yaml file from image_classification/environment.yml
-generate_yaml(
-    directory=ic_root_path(),
-    ref_filename="environment.yml",
-    needed_libraries=["pytorch", "spacy", "fastai", "dataclasses"],
-    conda_filename="myenv.yml",
-)
-
-# Note: Take a look at the generate_yaml() function for details on how to create your yaml file from scratch
-
-
-# There are different ways of creating a Docker image on Azure. Here, we create it separately from the service it will be used by. This way of proceeding gives us direct access to the Docker image object. Thus, if the service deployment fails, but the Docker image gets deployed successfully, we can try deploying the service again, without having to create a new image all over again.
-
-# In[20]:
-
-
-# Configure the Docker image
-image_config = ContainerImage.image_configuration(
-    execution_script="score.py",
-    runtime="python",
-    conda_file="myenv.yml",
-    description="Image with fast.ai Resnet18 model (fastai 1.0.48)",
-    tags={
-        "training set": "ImageNet",
-        "architecture": "CNN ResNet18",
-        "type": "Pretrained",
-    },
-)
-
-
-# In[21]:
-
-
-# Create the Docker image
-docker_image = ContainerImage.create(
-    name="image-classif-resnet18-f48",
-    models=[model],  # the model is passed as part of a list
-    image_config=image_config,
-    workspace=ws,
-)
-# The image name should not contain more than 32 characters, and should not contain any spaces, dots or underscores
-# A Docker image can contain several model objects. Here, we just have one.
-
-
-# In[22]:
-
-
-get_ipython().run_cell_magic(
-    "time",
-    "",
-    "docker_image.wait_for_creation(show_output = True)  # This can take up to 12 min",
-)
-
-
-# When the image gets successfully created, we expect to see:
-#
-# `Creating image
-# Running .....
-# SucceededImage creation operation finished for image <docker_image_name>, operation "Succeeded"
-# Wall time: Xmin`
-#
-# It happens, sometimes, that the deployment of the Docker image fails. Re-running the previous command typically solves the problem. If it doesn't, however, we can run the following one and inspect the deployment logs.
-
-# In[23]:
-
-
-print(ws.images["image-classif-resnet18-f48"].image_build_log_uri)
-
-
-# ### 6.D Computational resources <a id="compute"></a>
-
-# In this notebook, we use [Azure Container Instances](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-overview) (ACI) which are good for quick and [cost-effective](https://azure.microsoft.com/en-us/pricing/details/container-instances/) development/test deployment scenarios.
-#
-# To set them up properly, we need to indicate the number of CPU cores and the amount of memory we want to allocate to our web service. Optional tags and descriptions are also available for us to identify the instances in AzureML when looking at the `Compute` tab in the Azure Portal.
-#
-# <i><b>Note:</b> For production workloads, it is better to use [Azure Kubernetes Service](https://docs.microsoft.com/en-us/azure/aks/) (AKS) instead. We will demonstrate how to do this in the next notebook (to be published).<i>
-
-# In[24]:
-
-
-# Create a deployment configuration with 1 CPU and 5 gigabytes of RAM
-aci_config = AciWebservice.deploy_configuration(
-    cpu_cores=1,
-    memory_gb=5,
-    tags={"webservice": "image classification model (fastai 1.0.48)"},
-    description="This service classifies images into 1000 different groups.",
-)
-
-
-# ### 6.E Web service deployment <a id="websvc"></a>
-
-# The final step to deploying our web service is to call `WebService.deploy_from_image()`. This function uses the Docker image and the deployment configuration we created above to perform the following:
-#
-# - Deploy the docker image to an Azure Container Instance
-# - Call the `init()` function in our scoring file
-# - Provide an HTTP endpoint for scoring calls
-#
-# The `deploy_from_image` method requires the following parameters:
-#
-# - workspace: the workspace containing the service
-# - name: a unique name used to identify the service in the workspace
-# - image: a docker image object that contains the environment needed for scoring/inference
-# - deployment_config: a configuration object describing the compute type
-#
-# Azure Container Instances have no associated ComputeTarget, so we do not specify any here. Remember, we already provided information on the number of CPUs and the amount of memory needed in the service configuration file above.
-#
-# <i><b>Note:</b> The web service creation can take a few minutes</i>
-
-# In[25]:
-
-
-# Define how to deploy the web service
-service_name = "im-classif-websvc"
-service = Webservice.deploy_from_image(
-    workspace=ws,
-    name=service_name,
-    image=docker_image,
-    deployment_config=aci_config,
-)
-
-
-# An alternative way of deploying the service is to deploy from the model directly. In that case, we would need to provide the docker image configuration object (image_config), and our list of models (just one of them here).
-# The advantage of `deploy_from_image` over <a href="https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#deploy-from-model-workspace--name--models--image-config--deployment-config-none--deployment-target-none-">deploy_from_model</a> is that the former allows us
-# to re-use the same Docker image in case the deployment of this service fails, or even for other
-# types of deployments, as we will see in the next notebook (to be pushlished).
-
-# In[26]:
-
-
-# Deploy the web service
-service.wait_for_deployment(show_output=True)
-
-
-# When successful, we expect to see the following:
-#
-# `
-# Creating service
-# Running .....
-# SucceededACI service creation operation finished, operation "Succeeded"`
-#
-# In the case where the deployment is not successful, we can look at the image and service logs to debug. [These instructions](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-troubleshoot-deployment) can also be helpful.
-
-# In[ ]:
-
-
-# Access the service logs
-# print(service.get_logs())
-
-
-# In[27]:
-
-
-# Retrieve the service status
-print(
-    f"Service {service.name} is _{service.state}_ and available at {service.scoring_uri}"
-)
-
-
-# We can also check the presence and status of both our new Docker image and web service on the Azure portal, under the `Images` and `Deployments` tabs, respectively.
-#
-#
-# <img src="media/docker_images.jpg" width="800" alt="Azure portal view of the Images section">
-# <img src="media/deployments.jpg" width="800" alt="Azure portal view of the Deployments section">
-
-# ## 7. Testing of the web service <a id="test"></a>
-
-# Our web service is now up and running. To make sure that it is working as expected, let's test it.
-#
-# We first need to retrieve test images and to pre-process them into the format expected by our model. A service typically expects input data to be in a JSON serializable format. Here, we use our own `ims2strlist()` function to transform our .jpg images into strings of bytes.
-
-# In[28]:
-
-
-# Convert images to json object
-images_fname_list = [
-    os.path.join(
-        ic_root_path(), "notebooks", "deployment", "test_images", "im_11.jpg"
-    ),
-    os.path.join(
-        ic_root_path(), "notebooks", "deployment", "test_images", "im_97.jpg"
-    ),
-]
-im_string_list = ims2strlist(images_fname_list)
-test_samples = json.dumps({"data": im_string_list})
-
-
-# ### 7.A Using the `run` API <a id="api"></a>
-#
-# Our data are now properly formatted. We can send them to our web service.
-
-# In[29]:
-
-
-# Predict using the deployed model
-result = service.run(test_samples)
-
-
-# In[30]:
-
-
-# Plot the results
-actual_labels = ["milk_bottle", "water_bottle"]
-for k in range(len(result)):
-    title = "{}/{} - {}%".format(
-        actual_labels[k],
-        result[k]["label"],
-        round(100.0 * float(result[k]["probability"]), 2),
-    )
-    open_image(images_fname_list[k]).show(title=title)
-
-
-# ### 7.B Via a raw HTTP request <a id="http"></a>
-
-# In[31]:
-
-
-# Send the same test data
-payload = {"data": im_string_list}
-resp = requests.post(service.scoring_uri, json=payload)
-
-# Alternative way of sending the test data
-# headers = {'Content-Type':'application/json'}
-# resp = requests.post(service.scoring_uri, test_samples, headers=headers)
-
-print(f"POST to url: {service.scoring_uri}")
-print(f"Prediction: {resp.text}")
-
-
-# ### 7.C Notes on web service deployment <a id="notes"></a>
-
-# As we discussed above, Azure Container Instances tend to be used to develop and test deployments. They are typically configured with CPUs, which usually suffice when the number of requests per second is not too high. When working with several instances, we can configure them further by specifically [allocating CPU resources](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-container-groups#deployment) to each of them.
-#
-# For production requirements, i.e. when &gt; 100 requests per second are expected, we recommend deploying models to Azure Kubernetes Service (AKS). It is a convenient infrastructure as it manages hosted Kubernetes environments, and makes it easy to deploy and manage containerized applications without container orchestration expertise. It also supports deployments with CPU clusters and deployments with GPU clusters, the latter of which are [more economical and efficient](https://azure.microsoft.com/en-us/blog/gpus-vs-cpus-for-deployment-of-deep-learning-models/) when serving complex models such as deep neural networks, and/or when traffic to the endpoint is high.
-#
-# We will see an example of this in the next notebook (to be published).
-
-# ## 8. Clean up <a id="clean"></a>
-#
-# Throughout the notebook, we used a workspace and Azure container instances.
-#
-# When we first created our workspace, 4 extra resources were automatically added to it:
-# - A container registry, which hosts our Docker images
-# - A storage account, in which our output files get stored
-# - Application Insights, which allows us to monitor the health of and traffic to our web service, as we will see in the next notebook
-# - A key vault, which stores our credentials.
-#
-# In this notebook, we also hosted our web service on container instances. Overall, during the time it took us to run this notebook (assuming ~ 1h), the cost we incurred was of less than $3.
-#
-# To get a better sense of pricing, we can refer to this [calculator](https://azure.microsoft.com/en-us/pricing/calculator/). We can also navigate to the [Cost Management + Billing](https://ms.portal.azure.com/#blade/Microsoft_Azure_Billing/ModernBillingMenuBlade/Overview) pane on the portal, click on our subscription ID, and click on the Cost Analysis tab to check our credit usage.
-#
-# In order not to incur extra costs, let's now delete the resources we no longer need.
-
-# ### 8.A Service termination <a id="svcterm"></a>
-#
-# Now that we have verified that our web service works well on ACI, we can delete it. This helps reduce [costs](https://azure.microsoft.com/en-us/pricing/details/container-instances/), since the container group we were paying for no longer exists, and allows us to keep our workspace clean.
-
-# In[32]:
-
-
-service.delete()
-
-
-# At this point, the main resource we are paying for is the <b>Standard</b> Azure Container Registry (ACR), which contains our Docker image, and came as a default when we created our workspace. Details on pricing are available [here](https://azure.microsoft.com/en-us/pricing/details/container-registry/).
-
-# ### 8.B Image deletion <a id="imdel"></a>
-#
-# We may decide to use our Docker image in a separate ACI or even in an AKS deployment. In that case, we should keep it available in our workspace. However, if we no longer have a use for it, we can delete it.
-
-# In[ ]:
-
-
-# docker_image.delete()
-
-
-# ### 8.C Workspace deletion <a id="wsdel"></a>
-#
-# If our goal is to continue using our workspace, we should keep it available. On the contrary, if we plan on no longer using it and its associated resources, we can delete it.
-#
-# <i><b>Note:</b> Deleting the workspace will delete all the experiments, outputs, models, Docker images, deployments, etc. that we created in that workspace</i>
-
-# In[ ]:
-
-
-# ws.delete(delete_dependent_resources=True)
-# This deletes our workspace, the container registry, the account storage, Application Insights and the key vault
-
-
-# ## 9. Next steps <a id="next-steps"></a>
-#
-# In the next notebook (to be published), we will leverage the same Docker image, and deploy our model on AKS. We will also learn how a Flask app, with an interactive user interface, can be used to call our web service.
--- a/classification/python/01_training_introduction.py
+++ b/classification/python/01_training_introduction.py
@ -2,42 +2,55 @@
 # coding: utf-8

 # <i>Copyright (c) Microsoft Corporation. All rights reserved.</i>
-# 
+#
 # <i>Licensed under the MIT License.</i>

-# # Introduction to Training Image Classification Models
-
-# In this notebook, we will give an introduction to using [fast.ai](https://www.fast.ai/) for image classification. We will use a small dataset of four differenet beverages to train and evaluate a model. We'll also cover one of the most common ways to store your data in your file system for image classification modelling.
+# # Training an Image Classification Model
+#
+# In this notebook, we give an introduction to training an image classification model using [fast.ai](https://www.fast.ai/). Using a small dataset of four different beverage packages, we demonstrate training and evaluating a CNN image classification model. We also cover one of the most common ways to store data on a file system for this type of problem.
+#
+# ## Initialization

 # In[1]:


 # Ensure edits to libraries are loaded and plotting is shown in the notebook.
-get_ipython().run_line_magic('reload_ext', 'autoreload')
-get_ipython().run_line_magic('autoreload', '2')
-get_ipython().run_line_magic('matplotlib', 'inline')
+get_ipython().run_line_magic("reload_ext", "autoreload")
+get_ipython().run_line_magic("autoreload", "2")
+get_ipython().run_line_magic("matplotlib", "inline")


-# Import fastai. For now, we'll import all (`from fastai.vision import *`) so that we can easily use different utilies provided by the fastai library.
+# Import all functions we need.

 # In[2]:


 import sys
+
 sys.path.append("../../")

 import numpy as np
 from pathlib import Path
+import papermill as pm
+import scrapbook as sb

 # fastai and torch
 import fastai
-from fastai.vision import *
+from fastai.vision import (
+    models,
+    ImageList,
+    imagenet_stats,
+    partial,
+    cnn_learner,
+    ClassificationInterpretation,
+    to_np,
+)
 from fastai.metrics import accuracy

 # local modules
 from utils_cv.classification.model import TrainMetricsRecorder
 from utils_cv.classification.plot import plot_pr_roc_curves
-from utils_cv.classification.results_widget import ResultsWidget
+from utils_cv.classification.widget import ResultsWidget
 from utils_cv.classification.data import Urls
 from utils_cv.common.data import unzip_url
 from utils_cv.common.gpu import which_processor
@ -46,30 +59,31 @@ print(f"Fast.ai version = {fastai.__version__}")
 which_processor()


-# This shows your machine's GPUs (if has any) and which computing device fastai/torch is using. The output cells here show the run results on [Azure DSVM](https://azure.microsoft.com/en-us/services/virtual-machines/data-science-virtual-machines/) Standard NC6.
+# This shows your machine's GPUs (if has any) and the computing device `fastai/torch` is using. We suggest using an  [Azure DSVM](https://azure.microsoft.com/en-us/services/virtual-machines/data-science-virtual-machines/) Standard NC6 for an as needed GPU compute resource.

-# Set some parameters. We'll use the `unzip_url` helper function to download and unzip our data.
+# Next, set some model runtime parameters. We use the `unzip_url` helper function to download and unzip the data used in this example notebook.

-# In[13]:
+# In[3]:


-DATA_PATH     = unzip_url(Urls.fridge_objects_path, exist_ok=True)
-EPOCHS        = 5
+DATA_PATH = unzip_url(Urls.fridge_objects_path, exist_ok=True)
+EPOCHS = 5
 LEARNING_RATE = 1e-4
-IMAGE_SIZE    = 299
-BATCH_SIZE    = 16
-ARCHITECTURE  = models.resnet50
+IMAGE_SIZE = 299
+
+BATCH_SIZE = 16
+ARCHITECTURE = models.resnet50


 # ---
+#
+# # Prepare Image Classification Dataset
+#
+# In this notebook, we use a toy dataset called *Fridge Objects*, which consists of 134 images of 4 classes of beverage container `{can, carton, milk bottle, water bottle}` photos taken on different backgrounds. The helper function downloads and unzips data set to the `image_classification/data` directory.
+#
+# Set that directory in the `path` variable for ease of use throughout the notebook.

-# ## 1. Prepare Image Classification Dataset
-
-# In this notebook, we'll use a toy dataset called *Fridge Objects*, which consists of 134 images of can, carton, milk bottle and water bottle photos taken with different backgrounds. With our helper function, the data set will be downloaded and unzip to `image_classification/data`.
-# 
-# Let's set that directory to our `path` variable, which we'll use throughout the notebook, and checkout what's inside:
-
-# In[14]:
+# In[4]:


 path = Path(DATA_PATH)
@ -81,9 +95,9 @@ path.ls()
 # - `/milk_bottle`
 # - `/carton`
 # - `/can`
-
-# The most common data format for multiclass image classification is to have a folder titled the label with the images inside:
-# 
+#
+# This is most common data format for multiclass image classification. Each folder title corresponds to the image label for the images contained inside:
+#
 # ```
 # /images
 # +-- can (class 1)
@ -96,109 +110,112 @@ path.ls()
 # |   +-- ...
 # +-- ...
 # ```
-# 
-# and our data is already structured in that format!
+#
+# We have already set the data to this format structure.

-# ## 2. Load Images
+# # Load Images
+#
+# In `fastai`, an `ImageDataBunch` can easily use multiple images (mini-batches) during training time. We create the `ImageDataBunch` by using [data_block apis](https://docs.fast.ai/data_block.html).
+#
+# For training and validation, we randomly split the data in an `8:2` ratio, holding 80% of the data for training and 20% for validation.
+#

-# To use fastai, we want to create `ImageDataBunch` so that the library can easily use multiple images (mini-batches) during training time. We create an ImageDataBunch by using fastai's [data_block apis](https://docs.fast.ai/data_block.html).
-# 
-# For training and validation, we randomly split the data by 8:2, where 80% of the data is for training and the rest for validation. 
-
-# In[15]:
+# In[5]:


-data = (ImageList.from_folder(path) 
-        .split_by_rand_pct(valid_pct=0.2, seed=10) 
-        .label_from_folder() 
-        .transform(size=IMAGE_SIZE) 
-        .databunch(bs=BATCH_SIZE) 
-        .normalize(imagenet_stats))
+data = (
+    ImageList.from_folder(path)
+    .split_by_rand_pct(valid_pct=0.2, seed=10)
+    .label_from_folder()
+    .transform(size=IMAGE_SIZE)
+    .databunch(bs=BATCH_SIZE)
+    .normalize(imagenet_stats)
+)


-# Lets take a look at our data using the databunch we created.
+# We examine some sample data using the `databunch` we created.

-# In[16]:
+# In[6]:


-data.show_batch(rows=3, figsize=(15,11))
+data.show_batch(rows=3, figsize=(15, 11))


-# Lets see all available classes:
+# Show all available classes:

-# In[17]:
+# In[7]:


-print(f'number of classes: {data.c}')
+print(f"number of classes: {data.c}")
 print(data.classes)


-# We can also see how many images we have in our training and validation set.
+# Show the number of images in the training and validation set.

-# In[18]:
+# In[8]:


 data.batch_stats


-# In this notebook, we don't use test set. You can add it by using [add_test](https://docs.fast.ai/data_block.html#LabelLists.add_test). Please note that in the **fastai** framework, test datasets have no labels - this is the unknown data to be predicted. If you want to validate your model on a test dataset with labels, you probably need to use it as a validation set.
+# In a standard analysis, we would split the data into a train/validate/test data sets. For this example, we do not use a test set but this could be added using the [add_test](https://docs.fast.ai/data_block.html#LabelLists.add_test) method. Note that in the `fastai` framework, test sets do not include labels as this should be the unknown data to be predicted. The validation data set is a test set that includes labels that can be used to measure the model performance on new observations not used to train the model.

-# ## 3. Train a Model
+# # Train a Model

-# For the model, we use a convolutional neural network (CNN). Specifically, we'll use **ResNet50** architecture. You can find more details about ResNet from [here](https://arxiv.org/abs/1512.03385).
-# 
-# When training a model, there are many hypter parameters to select, such as the learning rate, the model architecture, layers to tune, and many more. With fastai, we can use the `create_cnn` function that allows us to specify the model architecture and performance indicator (metric). At this point, we already benefit from transfer learning since we download the parameters used to train on [ImageNet](http://www.image-net.org/).
-# 
-# Note, we use a custom callback `TrainMetricsRecorder` to track the accuracy on the training set during training, since fast.ai's default [recorder class](https://docs.fast.ai/basic_train.html#Recorder) only supports tracking accuracy on the validation set.
+# For this image classifier, we use a **ResNet50** convolutional neural network (CNN) architecture. You can find more details about ResNet from [here](https://arxiv.org/abs/1512.03385).
+#
+# When training CNN, there are almost an infinite number of ways to construct the model architecture. We need to determine how many and what type of layers to include and how many nodes make up each layer. Other hyperparameters that control the training of those layers are also important and add to the overall complexity of neural net methods. With `fastai`, we use the `create_cnn` function to specify the model architecture and performance metric. We will use a transfer learning approach to reuse the CNN architecture and initialize the model parameters used to train on [ImageNet](http://www.image-net.org/).
+#
+# In this work, we use a custom callback `TrainMetricsRecorder` to track the model accuracy on the training set as we tune the model. This is for instruction only, as the standard approach in `fast.ai` [recorder class](https://docs.fast.ai/basic_train.html#Recorder) only supports tracking model accuracy on the validation set.

-# In[19]:
+# In[9]:


 learn = cnn_learner(
    data,
    ARCHITECTURE,
    metrics=[accuracy],
-    callback_fns=[partial(TrainMetricsRecorder, show_graph=True)]
+    callback_fns=[partial(TrainMetricsRecorder, show_graph=True)],
 )


-# Unfreeze our CNN since we're training all the layers.
+# Use the `unfreeze` method to allow us to retrain all the CNN layers with the <i>Fridge Objects</i> data set.

-# In[20]:
+# In[10]:


 learn.unfreeze()


-# We can call the `fit` function to train the dnn.
+# The `fit` function trains the CNN using the parameters specified above.

-# In[21]:
+# In[11]:


 learn.fit(EPOCHS, LEARNING_RATE)


-# In[22]:
+# In[12]:


 # You can plot loss by using the default callback Recorder.
 learn.recorder.plot_losses()


-# ## 4. Evaluate the Model
+# # Validate the model
+#
+# To validate the model, calculate the model accuracy using the validation set.

-# To evaluate our model, lets take a look at the accuracy on the validation set.
-
-# In[23]:
+# In[13]:


 _, metric = learn.validate(learn.data.valid_dl, metrics=[accuracy])
-print(f'Accuracy on validation set: {100*float(metric):3.2f}')
+print(f"Accuracy on validation set: {100*float(metric):3.2f}")


-# Now, analyze the classification results by using `ClassificationInterpretation` module.
+# The `ClassificationInterpretation` module is used to analyze the model classification results.

-# In[24]:
+# In[14]:


 interp = ClassificationInterpretation.from_learner(learn)
@ -206,25 +223,27 @@ interp = ClassificationInterpretation.from_learner(learn)
 pred_scores = to_np(interp.probs)


-# To see details of each sample and prediction results, we use our widget helper class `ResultsWidget`. The widget shows each test image along with its ground truth label and model's prediction scores. We can use this tool to see how our model predicts each image and debug the model if needed.
-# 
+# To see these details use the widget helper class `ResultsWidget`. The widget shows test images along with the ground truth label and model prediction score. With this tool, it's possible to see how the model predicts each image and debug the model if needed.
+#
 # <img src="https://cvbp.blob.core.windows.net/public/images/ic_widget.png" width="600"/>
 # <center><i>Image Classification Result Widget</i></center>

-# In[25]:
+# In[15]:


 w_results = ResultsWidget(
    dataset=learn.data.valid_ds,
    y_score=pred_scores,
-    y_label=[data.classes[x] for x in np.argmax(pred_scores, axis=1)]
+    y_label=[data.classes[x] for x in np.argmax(pred_scores, axis=1)],
 )
 display(w_results.show())


-# We can plot precision-recall and ROC curves for each class as well. Please note that these plots are not too interesting here, since the dataset is easy and thus the accuracy is close to 100%.
+# Aside from accuracy, precision and recall are other metrics that are also important in classification settings. These are linked metrics that quantify how well the model classifies an image against how it fails. Since they are linked, there is a trade-off between optimizing for precision and optimizing for recall. They can be plotted against each other to graphically show how they are linked.
+#
+# In multiclass settings, we plot precision-recall and [ROC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) curves for each class. In this example, the dataset is not complex and the accuracy is close to 100%. In more difficult settings, these figures will be more interesting.

-# In[26]:
+# In[16]:


 # True labels of the validation set. We convert to numpy array for plotting.
@ -232,20 +251,43 @@ true_labels = to_np(interp.y_true)
 plot_pr_roc_curves(true_labels, pred_scores, data.classes)


-# Let's take a close look how our model confused some of the samples (if any). The most common way to do that is to use a confusion matrix.
+# A confusion matrix details the number of images on which the model succeeded or failed. For each class, the matrix lists correct classifications along the diagonal, and incorrect ones off-diagonal. This allows a detailed look on how the model confused the prediction of some classes.

-# In[27]:
+# In[17]:


 interp.plot_confusion_matrix()


-# When evaluating our results, we want to see where the model messes up, and whether or not we can do better. So we're interested in seeing images where the model predicted the image incorrectly but with high confidence (images with the highest loss).
+# When evaluating our results, we want to see where the model makes mistakes and if we can help it improve.

-# In[28]:
+# In[18]:


-interp.plot_top_losses(9, figsize=(15,11))
+interp.plot_top_losses(9, figsize=(15, 11))


-# That's pretty much it! Now you can bring your own dataset and train your model on them easily. 
+# In[19]:
+
+
+# The following code is used by the notebook "24_run_notebook_on_azureml.ipynb" to log metrics when using papermill or scrapbook
+# to run this notebook. We can comment out this cell when we are running this notebook directly.
+
+training_losses = [x.numpy().ravel()[0] for x in learn.recorder.losses]
+training_accuracy = [x[0].numpy().ravel()[0] for x in learn.recorder.metrics]
+
+# pm.record may get deprecated and completely replaced by sb.glue:
+# https://github.com/nteract/scrapbook#papermills-deprecated-record-feature
+try:
+    sb.glue("training_loss", training_losses)
+    sb.glue("training_accuracy", training_accuracy)
+    sb.glue("Accuracy on validation set:", 100 * float(metric))
+except Exception:
+    pm.record("training_loss", training_losses)
+    pm.record("training_accuracy", training_accuracy)
+    pm.record("Accuracy on validation set:", 100 * float(metric))
+
+
+# # Conclusion
+#
+# Using the concepts introduced in this notebook, you can bring your own dataset and train an image classifier to detect objects of interest for your specific setting.
--- a/classification/python/02_multilabel_classification.py
+++ b/classification/python/02_multilabel_classification.py
@ -5,13 +5,13 @@
 #
 # <i>Licensed under the MIT License.</i>

-# # Multilabel Classification
+# # Multi-label Classification
 #
 # In this notebook, we will look at the best practices for doing multilabel classification.
 #
-# In the previous notebook, we performed multi-class/single-label classification, which assumes that each image is assigned to only one label: an animal can be either an dog or a cat but not both at the same time. Multi-label classification on the other hand, will assume that each image can contain or represent multiple different labels: a landscape can be labeled both gloomy (weather) and of a beach (subject).
+# In the previous notebook, we performed multi-class/single-label classification, where each image is assigned to only one label. For single-label classification, a picture of a single animal can be either a dog or a cat but not both at the same time. For multi-label classification, each image can contain or represent multiple different labels: a landscape can be labeled both gloomy (weather) and of a beach (subject).
 #
-# In this notebook, we'll train a multilabel classifier and examine how best to structure data for multilabel classification problems as well as learn about new ways to evaluate our results.
+# In this notebook, we'll train a multi-label classifier. We will also examine how best to structure data for multi-label classification problems, and learn about new ways to evaluate our results.

 # In[1]:

@ -22,7 +22,7 @@ get_ipython().run_line_magic("autoreload", "2")
 get_ipython().run_line_magic("matplotlib", "inline")


-# Import fastai and other libraries needed. For now, we'll import all (`import *`) so that we can easily use different utilies provided by the fastai library.
+# Import all functions we need.

 # In[2]:

@ -35,25 +35,28 @@ import warnings

 warnings.filterwarnings("ignore")

-import inspect
 import numpy as np
 import pandas as pd
 from pathlib import Path

-# fastai and torch
+# fastai
 import fastai
-from fastai.vision import *
+from fastai.vision import (
+    models,
+    ImageList,
+    imagenet_stats,
+    cnn_learner,
+    partial,
+)

 # local modules
 from utils_cv.classification.model import (
    TrainMetricsRecorder,
-    hamming_loss,
-    zero_one_loss,
-)
-from utils_cv.classification.plot import (
-    plot_pr_roc_curves,
-    plot_loss_thresholds,
+    hamming_accuracy,
+    zero_one_accuracy,
+    get_optimal_threshold,
 )
+from utils_cv.classification.plot import plot_thresholds
 from utils_cv.classification.data import Urls
 from utils_cv.common.data import unzip_url
 from utils_cv.common.gpu import which_processor
@ -62,7 +65,7 @@ print(f"Fast.ai version = {fastai.__version__}")
 which_processor()


-# Like before, we set some parameters. This time, we can use one of the multilabel datasets that comes with this repo.
+# Like before, we set some parameters. This time, we can use one of the multi-label datasets that come with this repo.

 # In[3]:

@ -70,18 +73,18 @@ which_processor()
 DATA_PATH = unzip_url(Urls.multilabel_fridge_objects_path, exist_ok=True)
 EPOCHS = 10
 LEARNING_RATE = 1e-4
-IMAGE_SIZE = 299
+IM_SIZE = 300
 BATCH_SIZE = 16
-ARCHITECTURE = models.resnet50
+ARCHITECTURE = models.resnet18


 # ---

-# ## 1. Preparing Image Data for Multilabel Classification
+# ## 1. Prepare Image Data for Multi-label Classification
 #
 # In this notebook, we'll look at different kinds of beverages. In the repo, under `data`, we've downloaded a directory titled: __multilabelFridgeObjects__.
 #
-# Lets set that directory to our `path` variable, which we'll use throughout the notebook. We'll also inspect what's inside to get an understanding of how to structure images for multilabel classification.
+# Lets set that directory to our `path` variable, which we'll use throughout the notebook. We'll also inspect what's inside to get an understanding of how to structure images for multi-label classification.

 # In[4]:

@ -107,9 +110,9 @@ df = pd.read_csv(path / "labels.csv")
 df.sample(5)


-# As shown above, the contents of the csv file is a mapping of the filename to the labels. Since this is a multilabel classificaiton problem, each image can be associated to multiple labels.
+# As shown above, the contents of the csv file is a mapping of the filename to the labels. Since this is a multi-label classification problem, each image can be associated to multiple labels.
 #
-# This is one of the most common data formast for multilabel image classification; one csv file that contains the mapping of labels to a folder of images:
+# This is one of the most common data formats for multi-label image classification; one csv file that contains the mapping of labels to a folder of images:
 #
 # ```
 # /images
@ -126,7 +129,7 @@ df.sample(5)

 # __Loading data__
 #
-# Now that we know the structure of our data, lets use fast.ai's data block apis to create our databunches so that we can easily load mini-batches of data from our filesystem into our trainer.
+# Now that we know the structure of our data, lets use fast.ai's data block apis to create our databunches so that we can easily load mini-batches of data from our file system into our trainer.

 # In[7]:

@ -134,23 +137,23 @@ df.sample(5)
 np.random.seed(42)
 data = (
    ImageList.from_csv(path, "labels.csv", folder="images")
-    .random_split_by_pct(0.2)
+    .split_by_rand_pct(0.2, seed=10)
    .label_from_df(label_delim=" ")
-    .transform(size=299)
-    .databunch(bs=32)
+    .transform(size=IM_SIZE)
+    .databunch(bs=BATCH_SIZE)
    .normalize(imagenet_stats)
 )


 # Lets break down the code:
 #
-# The first thing we need to do is to create an `ImageList`, and we'll do so by creating it from a csv file (`from_csv`). Then we want to do a random split (`random_split_by_pct`) so that we have our validation set. For this method, we've also set a random seed (`np.random.seed(42)`) so that our validation set is consistent. Finally we want to get our labels from the df (`label_from_df`) that comes from the csv file. Since our labels are space-seperated in the csv file, we want to specify that our labels will be delimited by a space (`label_delim=' '`).
+# The first thing we need to do is to create an `ImageList`, and we'll do so by creating it from a csv file (`from_csv`). Then we want to do a random split (`random_split_by_pct`) so that we have our validation set. For this method, we've also set a random seed (`np.random.seed(42)`) so that our validation set is consistent. Finally we want to get our labels from the dataframe df (`label_from_df`) that we built from the csv file. Since our labels are space-separated in the csv file, we want to specify that our labels will be delimited by a space (`label_delim=' '`).
 #
-# In the second part, we use the `ImageList` we created and apply a transformation on it (`transform`) so that all images are resized to 299X299. Then we turn it into a databunch, which is basically the kind of object fastai's trainer uses to load mini-batches of data. Finally we'll normalize the databunch (`normalize(imagenet_states)` to the imagenet parameters.
+# In the second part, we use the `ImageList` we created and apply a transformation on it (`transform`) so that all images are resized to 299X299. Then we turn it into a databunch, which is basically the kind of object fast.ai's trainer uses to load mini-batches of data. Finally, we'll normalize the databunch (`normalize(imagenet_states)`) to the ImageNet parameters.

-# __Inpsect data__
+# __Inspect data__
 #
-# To make sure our data is correctly loaded, lets print out the number of classes, and each of the class labels.
+# To make sure our data is correctly loaded, let's print out the number of classes, and each of the class labels.

 # In[8]:

@ -159,7 +162,7 @@ print(f"number of classes: {data.c}")
 print(data.classes)


-# We can also call `batch_stats` on our databunch object to get a view on how the data is split between training and validation.
+# We can also call `batch_stats` on our databunch object to get a view of how the data is split between training and validation.

 # In[9]:

@ -167,7 +170,7 @@ print(data.classes)
 data.batch_stats


-# Lets get a sample of what the data looks like.
+# Let's get a sample of what the data looks like.

 # In[10]:

@ -175,95 +178,76 @@ data.batch_stats
 data.show_batch(rows=3, figsize=(15, 11))


-# # 3. Training our multilabel classifier
+# # 3. Training our multi-label classifier
 #
-# One of the main differences between training a multilabel classifier an a single-label classifier is how we may want to evaluate our model. In a single-label (multi-class) classification model, we often use a model's accuracy to see how well a model performs. But _accuracy_ as an evaluation metric isn't specific enough when it comes to multilabel classification problems.
+# One of the main differences between training a multi-label classifier an a single-label classifier is the _evaluation metric(s)_ we use to evaluate our model.
 #
 # __The Problem With Accuracy__
 #
-# For multilabel classification problems, a misclassification is not binary: right or wrong. Instead a prediction containing a subset of the correct labels we're looking for is better than one that contains none of them. For example, in an image that is labelled both 'rainy' and 'forest', it is usually better to predict one correct label than neither of the correct labels.
+# In traditional classification, accuracy is the most common evaluation criteria. But for multi-label classification problems, accuracy isn't as straight-forward of a concept. Do we care about label-level accuracy or do we care about image-level accuracy?
 #
-# One of the other problems when it comes to calculating accuracy is that the softmax activation function does not work well for multilabel classification problems. In single-label classification, we usually use a softmax function on the output of our neural network because we want to express a dependency across the labels; if the picture is likely of a _dog_, then it is unlikely of a _cat_. By applying a softmax on the output, we force the sum of the values to 1, enforcing this dependency.
+# A prediction that contains a subset of the correct labels we're looking for can sometimes be better than one that contains none of them. For example, in an image that is labelled both 'rainy' and 'forest', it is usually better to predict one correct label than neither of the correct labels. However, sometimes we may still want to consider the prediction a misclassification unless _all_ the labels are correctly predicted. For multi-label classification, we'll use _hamming accuracy_ and _zero-one accuracy_ which we can respectively think of as label-level accuracy and image-level accuracy.
 #
-# For multilabel classification, label likelihoods are independent from each other; the likelihood of an image being _rainy_ is independent from the likelihood of it being a _forest_. Instead of the softmax function, we can use the sigmoid activation function to normalize our result while preserving the independent relationship of each label.
+# __Hamming Accuracy & Zero-one Accuracy__
 #
+# One of the most common ways to evaluate a multi-label classification problem is by using the __hamming accuracy__, which we can think of as the fraction of wrong labels to the total number of labels.
 #
-# __Hamming Loss__
+# Zero-one accuracy is a much harsher evaluation metric than hamming accuracy. The __zero-one accuracy__ will classify an entire set of labels for a given sample incorrect if it does not entirely match the true set of labels. Hamming accuracy is more forgiving since it penalizes only the individual labels themselves.
 #
-# One of the most common ways to evaluate a multilabel classification problem is by using the hamming loss, which we can think of as the fraction of wrong labels to the total number of labels.
-#
-# For example, lets say our validation set contains 4 images and the results looks as such:
+# Let's look at these two metrics with a concrete example. Let's say our validation set contains 5 images, each with 4 labels, and the results looks as such:
 # ```
-# +-------+------------------+------------------+------------------+
-# | Image |  y_true:         |  y_pred:         |  hamming_loss:   |
-# |-------+------------------+------------------+------------------+
-# | im_01 |  [[1, 0, 0, 1],  |  [[1, 0, 0, 0],  |  [[0, 0, 0, 1],  |
-# | im_02 |   [1, 0, 1, 1],  |   [1, 1, 1, 1],  |   [0, 1, 0, 0],  |
-# | im_03 |   [0, 1, 0, 0],  |   [0, 1, 0, 0],  |   [0, 0, 0, 0],  |
-# | im_04 |   [1, 1, 0, 0]]  |   [1, 1, 1, 0]]  |   [0, 0, 1, 0]]  |
-# +-------+------------------+------------------+------------------+
-# |                                             | = 3/25 incorrect |
-# +-------+------------------+------------------+------------------+
+# +-------+------------------+------------------+------------------+------------------+
+# | Image |  y_true:         |  y_pred:         |  hamming_acc:    |  zero_one_acc:   |
+# |-------+------------------+------------------+------------------+------------------+
+# | im_01 |  [[1, 0, 0, 1],  |  [[1, 0, 0, 0],  |  [[0, 0, 0, 1],  |  [[1],           |
+# | im_02 |   [1, 0, 1, 1],  |   [1, 1, 1, 1],  |   [0, 1, 0, 0],  |   [1],           |
+# | im_03 |   [0, 1, 0, 0],  |   [0, 1, 0, 0],  |   [0, 0, 0, 0],  |   [0],           |
+# | im_04 |   [0, 1, 1, 0],  |   [0, 1, 1, 0],  |   [0, 0, 0, 0],  |   [0],           |
+# | im_05 |   [1, 1, 0, 0]]  |   [1, 1, 1, 0]]  |   [0, 0, 1, 0]]  |   [1]]           |
+# +-------+------------------+------------------+------------------+------------------+
+# |                                             | = 3/20 incorrect | = 3/5 incorrect  |
+# +-------+------------------+------------------+------------------+------------------+
 # ```
-# In this case, the predictions has 3 out of a total of 16 predictions that are not true, so the hamming loss is __0.1875__.
+# In this case, the predictions has 3 out of a total of 20 predictions that are not true, so the hamming accuracy is __0.85__. While there are only 3 misclassified labels, each of the misclassifications happen on a different image. So for calculating the zero-one accuracy, we end up with 3/5, or __0.4__. If we compare this to hamming accuracy, we can see that it is a much less forgiving metric.
 #
-# __Zero-one Loss__
-#
-# Zero-one loss is a much harsher evaluation metric than hamming loss. The zero-one loss will classify an entire set of labels for a given sample incorrect if it does not entirely match the true set of labels. Hamming loss is more forgiving since it penalizes only the individual labels themselves.
-#
-# Once again, lets say our validation set contains 4 images and the results looks as such:
-# ```
-# +-------+------------------+------------------+------------------+
-# | Image |  y_true:         |  y_pred:         |  zero_one_loss:  |
-# |-------+------------------+------------------+------------------+
-# | im_01 |  [[1, 0, 0, 1],  |  [[1, 0, 0, 0],  |  [[1],           |
-# | im_02 |   [1, 0, 1, 1],  |   [1, 1, 1, 1],  |   [1],           |
-# | im_03 |   [0, 1, 0, 0],  |   [0, 1, 0, 0],  |   [0],           |
-# | im_04 |   [1, 1, 0, 0]]  |   [1, 1, 1, 0]]  |   [1]]           |
-# +-------+------------------+------------------+------------------+
-# |                                             | = 3/4 incorrect  |
-# +-------+------------------+------------------+------------------+
-# ```
-# In this case, the predictions have only classified 3 individual labels incorrectly. But since we're using zero-one loss, and each of those misclassifications are in a different set, we end up with a zero-one loss of __0.75__. If we compare this to hamming loss, we can see that it is a much less forgiving metric.
-#
-# While hamming loss and zero-one loss are a common evaluation metric for multilabel classification, note that it may not be ideal for all multilabel classification problems. For each problem, you need to access what you're evaluating your model against to see if it is a good fit.
+# While hamming and zero-one accuries are common evaluation metrics for multi-label classification, note that it may not be ideal for all multi-label classification problems. For each problem, you need to assess what you're evaluating your model against to see if it is a good fit.

 # ---

+# The following section covers training a model with fastai. It is very similar to the previous notebook, except for the evaluation metric that is used.
 #
-# If we want to take advantage of using Hamming Loss, we'll need to define our own evaluation metric. To do this, we'll need to create a custom function that will takes a `y_pred` and a `y_true`, and returns a single metric.
+# Since this is a multi-label classification problem, we'll want to use hamming and zero-one accuracy, as our evalution metric. Unlike traditional accuracy, fast.ai does not provide these metrics in their library, so we have to define them. To create our own metrics, we'll need to define a custom function that will take `y_pred` and `y_true`, and return a single metric.
 #
-# > Since we've defined our hamming loss and zero-one loss functions in the `utils_cv.classification.models` module, lets just print out them out to see what they looks like.
+# We've defined the hamming and zero-one accuracy functions in the `utils_cv.classification.models` module so we can use them directly when defining our `cnn_learner`.
 #
+# > To inspect the implementation of these functions, you can run:
+# > - `print(inspect.getsource(hamming_accuracy))`
+# > - `print(inpsect.getsource(zero_one_accuracy))`

 # In[11]:


-print(inspect.getsource(hamming_loss))
-
-
-# In[12]:
-
-
-print(inspect.getsource(zero_one_loss))
-
-
-# We'll use the `create_cnn` function to create our CNN, passing in our custom `hamming_loss` function.
-
-# In[13]:
-
-
 learn = cnn_learner(
    data,
    ARCHITECTURE,
-    metrics=[hamming_loss, zero_one_loss],
+    metrics=[hamming_accuracy, zero_one_accuracy],
    callback_fns=[partial(TrainMetricsRecorder, show_graph=True)],
 )


-# Unfreeze our CNN since we're training all the layers.
+# For multi-label classification, we need to use a different loss function, but you'll notice that we do not specify which to use. This is because fast.ai uses the passed-in databunch to detect that this is a multi-label classification problem (that each x maps to a y that has multiple labels) and automatically sets the appropriate loss function. In this case, we see that fast.ai has chosen to use Pytorch's [`BCEWithLogitsLoss`](https://pytorch.org/docs/0.3.0/nn.html#bcewithlogitsloss), which uses the sigmoid activation instead of a softmax.
+#
+# For further details, we can inspect the loss function by calling `learn.loss_func??`. You can read more about how the [loss function differs in multi-label classification](#appendix-loss-function) in the appendix at the bottom of this notebook.

-# In[14]:
+# In[12]:
+
+
+learn.loss_func
+
+
+# We can not continue to train the model like we did in the previous notebook. We need to unfreeze our CNN, since we're training all the layers.
+
+# In[13]:


 learn.unfreeze()
@ -271,13 +255,13 @@ learn.unfreeze()

 # We can call the `fit` function to train the dnn.

-# In[15]:
+# In[14]:


 learn.fit(EPOCHS, LEARNING_RATE)


-# In[16]:
+# In[15]:


 learn.recorder.plot_losses()
@ -287,47 +271,95 @@ learn.recorder.plot_losses()

 # The learner comes with a handy function `show_results` that will show one mini-batch of the validation set. We can use that to get an intuitive sense of what is being predicted correctly and what is not.

-# In[17]:
+# In[16]:


 learn.show_results(rows=3, figsize=(15, 10))


-# To concretely evaluate our model, lets take a look at the hamming loss on the validation set. We can think of this value as the percentage of the total incorrect classifications out of the total possible classifications.
+# To quantitatively evaluate our model, let's take a look at the hamming and zero-one accuracies on the validation set.
+
+# In[17]:
+
+
+_, hl, zol = learn.validate(
+    learn.data.valid_dl, metrics=[hamming_accuracy, zero_one_accuracy]
+)
+print(f"Hamming Accuracy on validation set: {float(hl):3.2f}")
+print(f"Zero-one Accuracy on validation set: {float(zol):3.2f}")
+
+
+# We've calculated the hamming and the zero-one accuracies on our validation set with the default probability threshold of 0.2. However, this default value may not be the most optimal value. We can use the `plot_thresholds` function to plot the evaluation metric at different levels of thresholds. If, for example, we were interested in the zero-one accuracy, but we noticed that the default threshold is far from the optimal point, we may consider using a different threshold when we perform inferencing. Let's plot the zero-one accuracy at various thresholds to what the most optimal threshold is.
+#
+# Note that the threshold represents a trade-off between specificity and sensitivity. The higher the threshold, the higher the _specificity_. The lower the threshold, the higher the _sensivity_.

 # In[18]:


-_, hl, zol = learn.validate(
-    learn.data.valid_dl, metrics=[hamming_loss, zero_one_loss]
-)
-print(f"Hamming Loss on validation set: {float(hl):3.2f}")
-print(f"Zero-one Loss on validation set: {float(zol):3.2f}")
+interp = learn.interpret()
+plot_thresholds(zero_one_accuracy, interp.probs, interp.y_true)


-# We've calculated the hamming loss on our validation set with the default probability threshold of 0.2. However, this default value may not be the most optimal value. We can use the `plot_loss_thresholds` function to plot the evaluation metric at different levels of thresholds. If, for example, we were interested in the zero-one loss, but we noticed that the default threshold is far from the minimum, we may consider using a different threshold when we perform inferencing. Lets plot the zero-one loss at various thresholds to what the most optimal threshold is.
-#
-# Note that the threshold represents a trade-off between specificity and sensitivity. The higher the threshold, the higher the _specificity_. The lower the threshold, the higher the _sensivity_.
+# To get the threshold that will yield the best score for the metric we're using, we've created a helper function: `get_optimal_threshold`.

 # In[19]:


-interp = learn.interpret()
-plot_loss_thresholds(zero_one_loss, interp.probs, interp.y_true)
+optimal_threshold = get_optimal_threshold(
+    zero_one_accuracy, interp.probs, interp.y_true
+)
+optimal_threshold


-# We can clearly see that the default threshold value of 0.2 is not the mininum. Lets move the threshold to achieve a better loss.
+# With this threshold, we can then re-score our validation set using the zero_one_accuracy evaluation metric function.

 # In[20]:


-zero_one_loss(interp.probs, interp.y_true, threshold=0.3)
+zero_one_accuracy(interp.probs, interp.y_true, threshold=optimal_threshold)


-# Other than looking at zero-one loss and hamming loss, we can also plot the recision-recall and ROC curves for each class.
+# # Conclusion
+# Multi-label classification is different from traditional classification when it comes to how we organize and load our data, and when deciding on which evaluation metric(s) to use. You can now bring your own multi-label dataset and train a multi-label classifier.

-# In[21]:
+# ---

-
-# True labels of the validation set. We convert to numpy array for plotting.
-plot_pr_roc_curves(to_np(interp.y_true), to_np(interp.probs), data.classes)
+# # Appendix
+#
+# ## Loss Function <a name="appendix-loss-function"></a>
+#
+# The loss function is one of the important differences between doing single-label and multi-label classification. Fast.ai automatically detects which loss function to use depending on the dataset.
+#
+# __Softmax for Single-label Classification__
+#
+# For single-label multi-class classification, one of the most common ways to optimize our model is with [cross entropy loss](https://pytorch.org/docs/0.3.0/nn.html#crossentropyloss), which uses a softmax function to give a probability distribution around the n possible classes. This allows us to express a dependency across the labels; if the picture is likely of a _dog_, then it is unlikely of a _cat_. In fact, fast.ai automatically uses the cross entropy loss function when it detects, from your databunch, that your dataset has multiple classes and each image only has one correct label, ie multi-class/single-label classification.
+#
+# For example, lets say we're trying to predict which animal a given image is of. In this example, we pass the model  output values from the last layer of the network through a softmax function to get the joint probability distribution (notice it sums to 1) of the classes. To get our prediction, we simply use the highest probability, in this case, a horse.
+# ```
+# +--------+---------------+---------+------------+
+# | labels | model_output  | softmax | prediction |
+# |--------+---------------+---------+------------+
+# | cat    | 1.6           |  0.086  | 0          |
+# | dog    | 0.4           |  0.026  | 0          |
+# | horse  | 3.9           |  0.864  | 1          |
+# | mouse  | 0.3           |  0.024  | 0          |
+# +--------+---------------+---------+------------+
+# ```
+#
+# __Sigmoid and a Threshold for Multi-label Classification__
+#
+# A loss function that uses softmax doesn't work for multi-label classification. In single-label classification, by applying a softmax on the output, we get the joint-likelihood among the labels. However, for multi-label classification, label likelihoods are independent from each other; the likelihood of an image being _rainy_ is independent from the likelihood of it being a _forest_. Instead of the softmax function, we can use the sigmoid  function to normalize our result while preserving the independent relationship of each label.
+#
+# For example, lets say we're trying to predict features of an scenic image. In this example, we pass the model output values from the last layer of the network through a sigmoid function to give us independent probabilities (notice they do _not_ sum to 1). Based on the set threshold, we can then make a prediction on a given label. In this case, we predict 'rainy', 'cloudy', and 'misty' since these labels pass the threshold of 0.5.
+# ```
+# Threshold = 0.5
+# +-----------+---------------+---------+------------+
+# | labels    | model_output  | sigmoid | prediction |
+# |-----------+---------------+---------+------------+
+# | windy     | -1.2          | 0.231   | 0          |
+# | rainy     | 0.2           | 0.550   | 1          |
+# | cloudy    | 1.9           | 0.870   | 1          |
+# | sunny     | -0.3          | 0.426   | 0          |
+# | misty     | 3.4           | 0.968   | 1          |
+# +-----------+---------------+---------+------------+
+# ```
--- a/classification/python/02_training_accuracy_vs_speed.py
+++ b/classification/python/02_training_accuracy_vs_speed.py
@ -1,561 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-# <i>Copyright (c) Microsoft Corporation. All rights reserved.</i>
-#
-# <i>Licensed under the MIT License.</i>
-
-# # Building Models for Accuracy VS Speed
-#
-# The goal of this notebook is to understand how to train a model with different parameters to achieve either a highly accurate but slow model, or a model with fast inference speed but with lower accuracy.
-#
-# As practitioners of computer vision, we want to be able to control what to optimize when building our models. Unless you are building a model for a Kaggle competition, it is unlikely that you can build your model with only its accuracy in mind.
-#
-# For example, in an IoT setting, where the inferencing device has limited computational capabilities, we need to design our models to have a small memory footprint. In contrast, medical situations often require the highest possible accuracy because the cost of mis-classification could impact the well-being of a patient. In this scenario, the accuracy of the model can not be compromised.
-#
-# We have conducted various experiments on multiple diverse datasets to find parameters which work well on a wide variety of settings, for e.g. high accuracy or fast inference. In this notebook, we provide these parameters, so that your initial models can be trained without any parameter tuning. For most datasets, these parameters are close to optimal, so there won't need to change them much. In the second part of the notebook, we will give guidelines as to what parameters could be fine-tuned and how they impact the model, and which parameters typically do not have a big influence
-#
-# It is recommended that you first train your model with the default parameters, evaluate the results, and then only as needed, try fine tuning parameters to achieve better results.
-
-# ## Table of Contents:
-# * [Training a High Accuracy or a Fast Inference Speed Classifier ](#model)
-#   * [Choosing between two types of models](#choosing)
-#   * [Pre-processing](#preprocessing)
-#   * [Training](#training)
-#   * [Evaluation](#evaluation)
-# * [Fine tuning our models](#finetuning)
-#   * [DNN Architecture](#dnn)
-#   * [Key Parameters](#key-parameters)
-#   * [Other Parameters](#other-parameters)
-#   * [Testing Parameters](#testing-parameters)
-# * [Appendix](#appendix)
-#   * [Learning Rate](#appendix-learning-rate)
-#   * [Image Size](#appendix-imsize)
-#   * [How we got good parameters](#appendix-good-parameters)
-
-# ## Training a High Accuracy or a Fast Inference Speed Classifier <a name="model"></a>
-
-# Lets first verify our fastai version:
-
-# In[1]:
-
-
-import fastai
-
-fastai.__version__
-
-
-# Ensure edits to libraries are loaded and plotting is shown in the notebook.
-
-# In[2]:
-
-
-get_ipython().run_line_magic("reload_ext", "autoreload")
-get_ipython().run_line_magic("autoreload", "2")
-get_ipython().run_line_magic("matplotlib", "inline")
-
-
-# Import fastai. For now, we'll import all (import *) so that we can easily use different utilies provided by the fastai library.
-
-# In[3]:
-
-
-import sys
-
-sys.path.append("../../")
-import os
-from pathlib import Path
-from utils_cv.classification.data import Urls
-from utils_cv.common.data import unzip_url
-from fastai.vision import *
-from fastai.metrics import accuracy
-
-
-# Now that we've set up our notebook, lets set the hyperparameters based on which model type was selected.
-
-# ### Choosing between two types of models <a name="choosing"></a>
-
-# For most scenarios, computer vision practitioners want to create a high accuracy model, a fast-inference model or a small size model. Set your `MODEL_TYPE` variable to one of the following: `"high_accuracy"`, `"fast_inference"`, or `"small_size"`.
-#
-# For this notebook, we'll be using the FridgeObjects dataset as we did in the [previous notebook](01_training_introduction.ipynb). You can replace the `DATA_PATH` variable with your own data by passing its path.
-#
-# When choosing your batch size, its worth noting that even mid-level GPUs run out of memory when training a deeper resnet models at larger image resolutions. If you get an _out of memory_ error, try reducing the batch size by a factor of 2, and try again.
-
-# In[4]:
-
-
-# Choose between "high_accuracy", "fast_inference", or "small_size"
-MODEL_TYPE = "fast_inference"
-
-# Path to your data
-DATA_PATH = unzip_url(Urls.fridge_objects_path, exist_ok=True)
-
-# Epochs to train for
-EPOCHS_HEAD = 4
-EPOCHS_BODY = 12
-LEARNING_RATE = 1e-4
-BATCH_SIZE = 16
-
-
-# Make sure that only one is set to True
-
-# In[5]:
-
-
-assert MODEL_TYPE in ["high_accuracy", "fast_inference", "small_size"]
-
-
-# Set parameters based on your selected model.
-
-# In[6]:
-
-
-if MODEL_TYPE == "high_accuracy":
-    ARCHITECTURE = models.resnet50
-    IM_SIZE = 500
-
-if MODEL_TYPE == "fast_inference":
-    ARCHITECTURE = models.resnet18
-    IM_SIZE = 300
-
-if MODEL_TYPE == "small_size":
-    ARCHITECTURE = models.squeezenet1_1
-    IM_SIZE = 300
-
-
-# ### Pre-processing <a name="preprocessing"></a>
-#
-# JPEG decoding represents a bottleneck on systems with powerful GPUs and can slow training significantly, often by a factor of 2-3x, and sometimes by much more. We therefore recommend creating a down-sized copy of the dataset if training otherwise takes too long, or if running training multiple times e.g. to evaluate different parameters. After running the following function, update the `DATA_PATH` variable (to `out_dir`) so that this notebook uses the resized images.
-#
-# ```python
-# from utils_cv.classification.data import downsize_imagelist
-#
-# downsize_imagelist(im_list = ImageList.from_folder(Path(DATA_PATH)),
-#                    out_dir = "downsized_images",
-#                    max_dim = IM_SIZE)
-# ```
-
-# ### Training <a name="training"></a>
-#
-# We'll now re-apply the same steps we did in the [training introduction](01_training_introduction.ipynb) notebook here.
-
-# Load our data.
-
-# In[7]:
-
-
-data = (
-    ImageList.from_folder(Path(DATA_PATH))
-    .split_by_rand_pct(valid_pct=0.2, seed=10)
-    .label_from_folder()
-    .transform(tfms=get_transforms(), size=IM_SIZE)
-    .databunch(bs=16)
-    .normalize(imagenet_stats)
-)
-
-
-# Create our learner.
-
-# In[8]:
-
-
-learn = cnn_learner(data, ARCHITECTURE, metrics=accuracy)
-
-
-# Train the last layer for a few epochs.
-
-# In[9]:
-
-
-learn.fit_one_cycle(EPOCHS_HEAD, LEARNING_RATE)
-
-
-# Unfreeze the layers
-
-# In[10]:
-
-
-learn.unfreeze()
-
-
-# Fine tune the network for the remaining epochs.
-
-# In[11]:
-
-
-learn.fit_one_cycle(EPOCHS_BODY, LEARNING_RATE)
-
-
-# ### Evaluation <a name="evaluation"></a>
-
-# In this section, we test our model on the following characteristics:
-# - accuracy
-# - inference speed
-# - parameter export size / memory footprint required
-#
-#
-# Refer back to the [training introduction](01_training_introduction.ipynb) to learn about other ways to evaluate the model.
-
-# #### Accuracy
-# To keep things simple, we just a look at the final accuracy on the validation set.
-
-# In[12]:
-
-
-_, metric = learn.validate(learn.data.valid_dl, metrics=[accuracy])
-print(f"Accuracy on validation set: {float(metric)}")
-
-
-# #### Inference speed
-#
-# Use the model to inference and time how long it takes.
-
-# In[13]:
-
-
-im = open_image(f"{(Path(DATA_PATH)/learn.data.classes[0]).ls()[0]}")
-
-
-# In[14]:
-
-
-get_ipython().run_cell_magic("timeit", "", "learn.predict(im)")
-
-
-# #### Memory footprint
-#
-# Export our model and inspect the size of the file.
-
-# In[15]:
-
-
-learn.export(f"{MODEL_TYPE}")
-
-
-# In[16]:
-
-
-size_in_mb = os.path.getsize(Path(DATA_PATH) / MODEL_TYPE) / (1024 * 1024.0)
-print(f"'{MODEL_TYPE}' is {round(size_in_mb, 2)}MB.")
-
-
-# ---
-
-# ## Fine tuning parameters <a name="finetuning"></a>
-#
-# If you use the parameters provided in the repo along with the defaults that Fastai provides, you can get good results across a wide variety of datasets. However, as is true for most machine learning projects, getting the best possible results for a new dataset requires tuning the parameters that you use. The following section provides guidelines on how to optimize for accuracy, inference speed, or model size on a given dataset. We'll go through the parameters that will make the largest impact on your model as well as the parameters that may not be worth tweaking.
-#
-# Generally speaking, models for image classification comes with a trade-off between training time versus model accuracy. The four parameters that most affect this trade-off are the DNN architecture, image resolution, learning rate, and number of epochs. DNN architecture and image resolution will additionally affect the model's inference time and memory footprint. As a rule of thumb, deeper networks with high image resolution will achieve higher accuracy at the cost of large model sizes and low training and inference speeds. Shallow networks with low image resolution will result in models with fast inference speed, fast training speeds and low model sizes at the cost of the model's accuracy.
-
-# ### DNN Architectures <a name="dnn"></a>
-#
-# When choosing at an architecture, we want to make sure it fits our requirements for accuracy, memory footprint, inference speed and training speeds. Some DNNs have hundreds of layers and end up having quite a large memory footprint with millions of parameters to tune, while others are compact and small enough to fit onto memory limited edge devices.
-#
-# Lets take a __squeezenet1_1__ model, a __resnet18__ model and __resnet50__ model and compare the differences based on our experiment that is based of a diverse set of 6 different datasets. (More about the datasets in the appendix below)
-#
-# ![architecture_comparisons](media/architecture_comparisons.png)
-#
-# As you can see from the graphs above, there is a clear trade-off when deciding between the models.
-#
-# In terms of accuracy, __resnet50__ out-performs the rest, but it also suffers from having the highest memory footprint, and the longest training and inference times. On the other end of the spectrum, __squeezenet1_1__ performs the worst in terms fo accuracy, but has by far the smallest memory footprint.
-#
-# Generally speaking, given enough data, the deeper DNN and the higher the image resolution, the higher the accuracy you'll be able to achieve with your model.
-#
-# ---
-#
-# <details><summary>See the code to generate the graphs</summary>
-# <p>
-#
-# #### Code snippet to generate graphs in this cell
-#
-# ```python
-# import pandas as pd
-# from utils_ic.parameter_sweeper import add_value_labels
-# %matplotlib inline
-#
-# df = pd.DataFrame({
-#     "accuracy": [.9472, .9190, .8251],
-#     "training_duration": [385.3, 280.5, 272.5],
-#     "inference_duration": [34.2, 27.8, 27.6],
-#     "memory": [99, 45, 4.9],
-#     "model": ['resnet50', 'resnet18', 'squeezenet1_1'],
-# }).set_index("model")
-#
-# ax1, ax2, ax3, ax4 = df.plot.bar(
-#     rot=90, subplots=True, legend=False, figsize=(8,10)
-# )
-#
-# for ax in [ax1, ax2, ax3, ax4]:
-#     for i in [0, 1, 2]:
-#         if i==0: ax.get_children()[i].set_color('r')
-#         if i==1: ax.get_children()[i].set_color('g')
-#         if i==2: ax.get_children()[i].set_color('b')
-#
-# ax1.set_title("Accuracy (%)")
-# ax2.set_title("Training Duration (seconds)")
-# ax3.set_title("Inference Time (seconds)")
-# ax4.set_title("Memory Footprint (mb)")
-#
-# ax1.set_ylabel("%")
-# ax2.set_ylabel("seconds")
-# ax3.set_ylabel("seconds")
-# ax4.set_ylabel("mb")
-#
-# ax1.set_ylim(top=df["accuracy"].max() * 1.3)
-# ax2.set_ylim(top=df["training_duration"].max() * 1.3)
-# ax3.set_ylim(top=df["inference_duration"].max() * 1.3)
-# ax4.set_ylim(top=df["memory"].max() * 1.3)
-#
-# add_value_labels(ax1, percentage=True)
-# add_value_labels(ax2)
-# add_value_labels(ax3)
-# add_value_labels(ax4)
-# ```
-#
-# </p>
-# </details>
-#
-
-# ### Key Parameters <a name="key-parameters"></a>
-# This section examines some of the key parameters when training a deep learning model for image classification. The table below shows default parameters we recommend using.
-#
-# | Parameter | Default Value |
-# | --- | --- |
-# | Learning Rate | 1e-4 |
-# | Epochs | 15 |
-# | Batch Size | 16 |
-# | Image Size | 300 X 300 |
-#
-# __Learning rate__
-#
-# Learning rate step size is used when optimizing your model with gradient descent and tends to be one of the most important parameters to set when training your model. If your learning rate is set too low, training will progress very slowly since we're only making tiny updates to the weights in your network. However, if your learning rate is too high, it can cause undesirable divergent behavior in your loss function. Generally speaking, choosing a learning rate of 1e-4 was shown to work pretty well for most datasets. If you want to reduce training time (by training for fewer epochs), you can try setting the learning rate to 5e-3, but if you notice a spike in the training or validation loss, you may want to try reducing your learning rate.
-#
-# You can learn more about learning rate in the [appendix below](#appendix-learning-rate).
-#
-# __Epochs__
-#
-# When it comes to choosing the number of epochs, a common question is - _Won't too many epochs will cause overfitting_? It turns out that the accuracy on the test set typically does not get worse, even if training for too many epochs. Unless your are working with small datasets, using around 15 epochs tends to work pretty well in most cases.
-#
-#
-# __Batch Size__
-#
-# Batch size is the number of training samples you use in order to make one update to the model parameters. A batch size of 16 or 32 works well for most cases. The higher the batch size, the faster training will be, but at the expense of an increased DNN memory consumption. Depending on your dataset and the GPU you have, you can start with a batch size of 32, and move down to 16 if your GPU doesn't have enough memory. After a certain increase in batch size, improvments to training speed become marginal, hence we found 16 (or 32) to be a good trade-off between training speed and memory consumption.If you reduce the batch size, you may also have to reduce the learning rate.
-#
-# __Image size__
-#
-# The default image size is __300 X 300__ pixels. Using higher image resolution of, for example, __500 X 500__ or even higher, can improve the accuracy of the model but at the cost of longer training and inference times.
-#
-# You can learn more about the impact of image resolution in the [appendix below](#appendix-imsize).
-#
-
-# ### Other Parameters <a name="other-parameters"></a>
-#
-# In this section, we examine some of the other common hyperparameters when dealing with DNNs. The key take-away is that that exact value of these parameters do not have a big impact on the model's performance, training/inference speed, or memory footprint.
-#
-# | Parameter | Good Default Value |
-# | --- | --- |
-# | Dropout | 0.5 or (0.5 on the final layer and 0.25 on all previous layers) |
-# | Weight Decay | 0.01 |
-# | Momentum | 0.9 or (min=0.85 and max=0.95 when using cyclical momentum) |
-#
-# __Dropout__
-#
-# Dropout is a way to discard activations at random when training your model. It is a way to keep the model from over-fitting on the training data. In Fastai, dropout is by default set to 0.5 on the final layer, and 0.25 on all previous layer. Unless there is clear evidence of over-fitting, drop out tends to work well at this default so there is no need to change it much.
-#
-# __Weight decay (L2 regularization)__
-#
-# Weight decay is a regularization term applied when minimizing the network's loss. We can think of it as a penalty applied to the weights after an update. This will help prevent the weights from growing too big. In Fastai, the default weight decay is 0.1, which is what we should leave it at.
-#
-# __Momentum__
-#
-# Momentum is a way to reach convergence faster when training our model. It is a way to incorporate a weighted average of the most recent updates to the current update. Fastai implements cyclical momentum when calling `fit_one_cycle()`, so the momentum will fluctuate over the course of the training cycle, hence we need a min and max value for momentum.
-#
-# When using `fit_one_cycle()`, the default value of max=0.95 and min=0.85 is shown to work well. If using `fit()`, the default value of 0.9 has been shown to work well. These defaults provided by Fastai represent a good trade-off between training speed and the ability of the model to converge to a good solution
-
-# ### Testing Parameters <a name="testing-parameters"></a>
-# If you want to fine tune parameters and test different parameters, you can use the ParameterSweeper module the find the best parameter. See the [exploring hyperparameters notebook](./11_exploring_hyperparameters.ipynb) for more information.
-
-# ---
-
-# # Appendix <a name="appendix"></a>
-
-# ### Learning Rate <a name="appendix-learning-rate"></a>
-#
-# One way to mitigate against a low learning rate is to make sure that you're training for many epochs. But this can take a long time.
-#
-# So, to efficiently build a model, we need to make sure that our learning rate is in the correct range so that we can train for as few epochs as possible. To find a good default learning rate, we've tested various learning rates on 6 different datasets, training the full network for 3 or 15 epochs.
-#
-# ![lr_comparisons](media/lr_comparisons.png)
-#
-# <details><summary><em>Understanding the diagram</em></summary>
-# <p>
-#
-# > In the figure on the left which shows the results of the different learning rates on different datasets at 15 epochs, we can see that a learning rate of 1e-4 does the best overall. But this may not be the case for every dataset. If you look carefully, there is a pretty significant variance between the datasets and it may be possible that a learning rate of 1-e3 works better than a learning rate of 1e-4 for some datasets. In the figure on the right, both 1e-4 and 1e-3 seem to work well. At 15 epochs, the results of 1e-4 are only slightly better than that of 1e-3. However, at 3 epochs, a learning rate of 1e-3 out performs the learning rate at 1e-4. This makes sense since we're limiting the training to only 3 epochs, the model that can update its weights more quickly will perform better. As a result, we may learn towards using higher learning rates (such as 1e-3) if we want to minimize the training time, and lower learning rates (such as 1e-4) if training time is not constrained.
-#
-# </p>
-# </details>
-#
-# In both figures, we can see that a learning rate of 1e-3 and 1e-4 tends to work the best across the different datasets and the two settings for epochs. We observe that training using only 3 epochs gives inferior results compared to 15 epochs. Generally speaking, choosing a learning rate of 5e-3 (the mean of 1e-3 and 1e-4) was shown to work pretty well for most datasets. However, for some datasets, a learning rate of 5-e3 will cause the training to diverge. In those cases, try a lower epoch, like 1e-4.
-#
-# Fastai has implemented [one cycle policy with cyclical momentum](https://arxiv.org/abs/1803.09820) which requires a maximum learning rate since the learning rate will shift up and down over its training duration. Instead of calling `fit()`, we simply call `fit_one_cycle()`.
-#
-# ---
-#
-# <details><summary>See the code to generate the graphs</summary>
-# <p>
-#
-# #### Code snippet to generate graphs in this cell
-#
-# ```python
-# import matplotlib.pyplot as plt
-# %matplotlib inline
-#
-# df_dataset_comp = pd.DataFrame({
-#     "fashionTexture": [0.8749, 0.8481, 0.2491, 0.670318, 0.1643],
-#     "flickrLogos32Subset": [0.9069, 0.9064, 0.2179, 0.7175, 0.1073],
-#     "food101Subset": [0.9294, 0.9127, 0.6891, 0.9090, 0.555827],
-#     "fridgeObjects": [0.9591, 0.9727, 0.272727, 0.6136, 0.181818],
-#     "lettuce": [0.8992, 0.9104, 0.632, 0.8192, 0.5120],
-#     "recycle_v3": [0.9527, 0.9581, 0.766, 0.8591, 0.2876],
-#     "learning_rate": [0.000100, 0.001000, 0.010000, 0.000010, 0.000001]
-# }).set_index("learning_rate")
-#
-# df_epoch_comp = pd.DataFrame({
-#     "3_epochs": [0.823808, 0.846394, 0.393808, 0.455115, 0.229120],
-#     "15_epochs": [0.920367, 0.918067, 0.471138, 0.764786, 0.301474],
-#     "learning_rate": [0.000100, 0.001000, 0.010000, 0.000010, 0.000001]
-# }).set_index("learning_rate")
-#
-# plt.figure(1)
-# ax1 = plt.subplot(121)
-# ax2 = plt.subplot(122)
-#
-# vals = ax2.get_yticks()
-#
-# df_dataset_comp.sort_index().plot(kind='bar', rot=0, figsize=(15, 6), ax=ax1)
-# vals = ax1.get_yticks()
-# ax1.set_yticklabels(['{:,.2%}'.format(x) for x in vals])
-# ax1.set_ylim(0,1)
-# ax1.set_ylabel("Accuracy (%)")
-# ax1.set_title("Accuracy of Learning Rates by Datasets @ 15 Epochs")
-# ax1.legend(loc=2)
-#
-# df_epoch_comp.sort_index().plot(kind='bar', rot=0, figsize=(15, 6), ax=ax2)
-# ax2.set_yticklabels(['{:,.2%}'.format(x) for x in vals])
-# ax2.set_ylim(0,1)
-# ax2.set_title("Accuracy of Learning Rates by Epochs")
-# ax2.legend(loc=2)
-# ```
-#
-# </p>
-# </details>
-
-# ### Image Resolution <a name="appendix-imsize"></a>
-#
-# A model's input image resolution tends to affect its accuracy. Usually, convolutional neural networks are able to take advantage of higher resolution images. This is especially true is the object-of-interest is small in the image.
-#
-# But how does it impact some of the other aspects of the model?
-#
-# It turns out that the image size doesn't affect the model's memory footprint, but it has a huge effect on GPU memory. Image size also has a direct impact on training and inference speeds. An increase in image size will result in slower inference speeds.
-#
-# ![imsize_comparisons](media/imsize_comparisons.png)
-#
-# From the results, we can see that an increase in image resolution from __300 X 300__ to __500 X 500__ will increase the performance marginally at the cost of a longer training duration and slower inference speed.
-#
-# ---
-#
-# <details><summary>See the code to generate the graphs</summary>
-# <p>
-#
-# #### Code snippet to generate graphs in this cell
-#
-# ```python
-# import pandas as pd
-# from utils_ic.parameter_sweeper import add_value_labels
-# %matplotlib inline
-#
-# df = pd.DataFrame({
-#     "accuracy": [.9472, .9394, .9190, .9164, .8366, .8251],
-#     "training_duration": [385.3, 218.8, 280.5, 184.9, 272.5, 182.3],
-#     "inference_duration": [34.2, 23.2, 27.8, 17.8, 27.6, 17.3],
-#     "model": ['resnet50 X 499', 'resnet50 X 299', 'resnet18 X 499', 'resnet18 X 299', 'squeezenet1_1 X 499', 'squeezenet1_1 X 299'],
-# }).set_index("model"); df
-#
-# ax1, ax2, ax3 = df.plot.bar(
-#     rot=90, subplots=True, legend=False, figsize=(12, 12)
-# )
-#
-# for i in range(len(df)):
-#     if i < len(df)/3:
-#         ax1.get_children()[i].set_color('r')
-#         ax2.get_children()[i].set_color('r')
-#         ax3.get_children()[i].set_color('r')
-#     if i >= len(df)/3 and i < 2*len(df)/3:
-#         ax1.get_children()[i].set_color('g')
-#         ax2.get_children()[i].set_color('g')
-#         ax3.get_children()[i].set_color('g')
-#     if i >= 2*len(df)/3:
-#         ax1.get_children()[i].set_color('b')
-#         ax2.get_children()[i].set_color('b')
-#         ax3.get_children()[i].set_color('b')
-#
-# ax1.set_title("Accuracy (%)")
-# ax2.set_title("Training Duration (seconds)")
-# ax3.set_title("Inference Speed (seconds)")
-#
-# ax1.set_ylabel("%")
-# ax2.set_ylabel("seconds")
-# ax3.set_ylabel("seconds")
-#
-# ax1.set_ylim(top=df["accuracy"].max() * 1.2)
-# ax2.set_ylim(top=df["training_duration"].max() * 1.2)
-# ax3.set_ylim(top=df["inference_duration"].max() * 1.2)
-#
-# add_value_labels(ax1, percentage=True)
-# add_value_labels(ax2)
-# add_value_labels(ax3)
-# ```
-#
-# </p>
-# </details>
-
-# ### How we found good default parameters <a name="#appendix-good-parameters"></a>
-#
-# To explore the charactistics of a model, we - the computer vision repo team - have conducted various experiments to explore the impact of different hyperparameters on a model's _accuracy_, _training duration_, _inference speed_, and _memory footprint_. In this notebook, we used the results of our experiments to give us concrete evidence when it comes to understanding which parameters work and which dont.
-#
-# #### Datasets <a name="datasets"></a>
-#
-# For our experiments, we relied on a set of six different classification datasets. When selecting these datasets, we wanted to have a variety of image types with different amounts of data and number of classes.
-#
-# | Dataset Name | Number of Images | Number of Classes |
-# | --- | --- | --- |
-# | food101Subset | 5000 | 5 |
-# | flickrLogos32Subset | 2740 | 33 |
-# | fashionTexture | 1716 | 11 |
-# | recycle_v3 |  564 | 11 |
-# | lettuce | 380 | 2 |
-# | fridgeObjects | 134 | 4 |
-#
-# #### Model Characteristics <a name="model-characteristics"></a>
-#
-# In our experiment, we look at these characteristics to evaluate the impact of various paramters. Here is how we calculated each of the following metrics:
-#
-# - __Accuracy__
-#
-#     Accuracy is our evaluation metric for the model. It represents the average accuracy over 5 runs for our six different datasets.
-#
-#
-# - __Training Duration__
-#
-#     The training duration is how long it takes to train the model. It represents the average duration over 5 runs for our six different datasets.
-#
-#
-# - __Inference Speed__
-#
-#     The inference speed is the time it takes the model to run 1000 predictions.
-#
-#
-# - __Memory Footprint__
-#
-#     The memory footprint is size of the model parameters saved as the pickled file. This can be achieved by running `learn.export(...)` and examining the size of the exported file.
-#
--- a/classification/python/03_training_accuracy_vs_speed.py
+++ b/classification/python/03_training_accuracy_vs_speed.py
@ -0,0 +1,588 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# <i>Copyright (c) Microsoft Corporation. All rights reserved.</i>
+#
+# <i>Licensed under the MIT License.</i>
+
+# # Building Models for Accuracy vs. Speed
+#
+# The goal of this notebook is to understand how to train a model with different parameters to achieve either a highly accurate but slow during inference model, or a model with fast inference but lower accuracy.
+#
+# For example, in IoT settings the inferencing device has limited computational capabilities. This means we need to design our models to have a small memory footprint. In contrast, medical scenarios often require the highest possible accuracy because the cost of mis-classification could impact the well-being of a patient. In this scenario, the accuracy of the model can not be compromised.
+#
+# We have conducted various experiments on diverse datasets to find parameters which work well in a wide variety of settings balancing high accuracy or fast inference. In this notebook, we provide these parameters so that your initial models can be trained without any parameter tuning. For most datasets, these parameters are close to optimal. In the second part of the notebook, we provide guidelines on how to fine-tune these parameters based on how they impact the model.
+#
+# We recommend first training your model with the default parameters, evaluating the results, and then fine-tuning parameters to achieve better results as necessary.
+
+# ## Table of Contents:
+# * [Training a High Accuracy, Fast Inference, or Small Size Classifier](#model)
+#   * [Choosing between two types of models](#choosing)
+#   * [Pre-processing](#preprocessing)
+#   * [Training](#training)
+#   * [Evaluation](#evaluation)
+# * [Fine tuning our models](#finetuning)
+#   * [DNN architectures](#dnn)
+#   * [Key parameters](#key-parameters)
+#   * [Additional parameters](#other-parameters)
+#   * [Testing parameters](#testing-parameters)
+# * [Appendix](#appendix)
+#   * [Learning rate](#appendix-learning-rate)
+#   * [Image size](#appendix-imsize)
+#   * [How we found good parameters](#appendix-good-parameters)
+
+# # Training a High Accuracy, Fast Inference, or Small Size Classifier <a name="model"></a>
+
+# Let's first verify our fast.ai version:
+
+# In[1]:
+
+
+import fastai
+
+fastai.__version__
+
+
+# Ensure edits to libraries are loaded and plotting is shown in the notebook.
+
+# In[2]:
+
+
+get_ipython().run_line_magic("reload_ext", "autoreload")
+get_ipython().run_line_magic("autoreload", "2")
+get_ipython().run_line_magic("matplotlib", "inline")
+
+
+# Import all the functions we need.
+
+# In[3]:
+
+
+import sys
+
+sys.path.append("../../")
+import os
+from pathlib import Path
+from utils_cv.classification.data import Urls, is_data_multilabel
+from utils_cv.common.data import unzip_url
+from utils_cv.classification.model import hamming_accuracy
+from fastai.metrics import accuracy
+from fastai.vision import (
+    models,
+    ImageList,
+    imagenet_stats,
+    cnn_learner,
+    get_transforms,
+    open_image,
+)
+
+
+# Now that we've set up our notebook, let's set the hyperparameters based on which model type was selected.
+
+# ## Choosing between types of models <a name="choosing"></a>
+
+# For most scenarios, computer vision practitioners want to create a high accuracy model, a fast-inference model or a small size model. Set your `MODEL_TYPE` variable to one of the following: `"high_accuracy"`, `"fast_inference"`, or `"small_size"`.
+#
+# We will use the `FridgeObjects` dataset from a [previous notebook](01_training_introduction.ipynb) again. You can replace the `DATA_PATH` variable with your own data.
+#
+# When choosing the batch size, remember that even mid-level GPUs run out of memory when training a deeper ResNet model with larger image resolutions. If you get an _out of memory_ error, try reducing the batch size by a factor of 2.
+
+# In[4]:
+
+
+# Choose between "high_accuracy", "fast_inference", or "small_size"
+MODEL_TYPE = "fast_inference"
+
+# Path to your data
+DATA_PATH = unzip_url(Urls.fridge_objects_path, exist_ok=True)
+
+# Epochs to train for
+EPOCHS_HEAD = 4
+EPOCHS_BODY = 12
+LEARNING_RATE = 1e-4
+BATCH_SIZE = 16
+
+
+# Make sure that `MODEL_TYPE` is correctly set.
+
+# In[5]:
+
+
+assert MODEL_TYPE in ["high_accuracy", "fast_inference", "small_size"]
+
+
+# Set parameters based on your selected model.
+
+# In[6]:
+
+
+if MODEL_TYPE == "high_accuracy":
+    ARCHITECTURE = models.resnet50
+    IM_SIZE = 500
+
+if MODEL_TYPE == "fast_inference":
+    ARCHITECTURE = models.resnet18
+    IM_SIZE = 300
+
+if MODEL_TYPE == "small_size":
+    ARCHITECTURE = models.squeezenet1_1
+    IM_SIZE = 300
+
+
+# We'll automatically determine if your dataset is a multi-label or traditional (single-label) classification problem. To do so, we'll use the `is_data_multilabel` helper function. In order to detect whether or not a dataset is multi-label, the helper function will check to see if the datapath contains a csv file that has a column 'labels' where the values are space-delimited. You can inspect the function by calling `is_data_multilabel??`.
+#
+# This function assumes that your multi-label dataset is structured in the recommended format shown in the [multilabel notebook](02_multilabel_classification.ipynb).
+
+# In[7]:
+
+
+multilabel = is_data_multilabel(DATA_PATH)
+metric = accuracy if not multilabel else hamming_accuracy
+
+
+# ## Pre-processing <a name="preprocessing"></a>
+#
+# JPEG decoding represents a performance bottleneck on systems with powerful GPUs which can slow down training significantly. We recommend creating a down-sized copy of the dataset if training takes too long, or if you require multiple training runs to evaluate different parameters.
+#
+# The following function will automate image downsizing.
+# ```python
+# from utils_cv.classification.data import downsize_imagelist
+#
+# downsize_imagelist(im_list = ImageList.from_folder(Path(DATA_PATH)),
+#                    out_dir = "downsized_images",
+#                    max_dim = IM_SIZE)
+# ```
+#
+# Once complete, update the `DATA_PATH` variable to point to `out_dir` so that this notebook uses these resized images.
+#
+
+# ## Training <a name="training"></a>
+#
+# We'll now re-apply the same steps we did in the [01_training_introduction](01_training_introduction.ipynb) notebook here.
+
+# Load the data:
+
+# In[8]:
+
+
+label_list = (
+    (
+        ImageList.from_folder(Path(DATA_PATH))
+        .split_by_rand_pct(valid_pct=0.2, seed=10)
+        .label_from_folder()
+    )
+    if not multilabel
+    else (
+        ImageList.from_csv(Path(DATA_PATH), "labels.csv", folder="images")
+        .split_by_rand_pct(valid_pct=0.2, seed=10)
+        .label_from_df(label_delim=" ")
+    )
+)
+
+
+# In[9]:
+
+
+data = (
+    label_list.transform(tfms=get_transforms(), size=IM_SIZE)
+    .databunch(bs=BATCH_SIZE)
+    .normalize(imagenet_stats)
+)
+
+
+# Create the learner.
+
+# In[10]:
+
+
+learn = cnn_learner(data, ARCHITECTURE, metrics=metric)
+
+
+# Train the last layer for a few epochs.
+
+# In[11]:
+
+
+learn.fit_one_cycle(EPOCHS_HEAD, LEARNING_RATE)
+
+
+# Unfreeze the layers.
+
+# In[12]:
+
+
+learn.unfreeze()
+
+
+# Fine-tune the network for the remaining epochs.
+
+# In[13]:
+
+
+learn.fit_one_cycle(EPOCHS_BODY, LEARNING_RATE)
+
+
+# ## Evaluation <a name="evaluation"></a>
+
+# In  [01_training introduction](01_training_introduction.ipynb), we demonstrated evaluating a CV model using the performance metrics for precision, recall and ROC. In this section, we will evaluate our model using the following characteristics:
+# - accuracy (performance)
+# - inference speed
+# - parameter export size / memory footprint required
+
+# ### Performance
+# To keep things simple, we just look at the final evaluation metric on the validation set.
+
+# In[14]:
+
+
+_, score = learn.validate(learn.data.valid_dl, metrics=[metric])
+print(f"{metric.__name__} on validation set: {float(score)}")
+
+
+# ### Inference speed
+#
+# Time model inference speed.
+
+# In[15]:
+
+
+im_folder = learn.data.classes[0] if not multilabel else "images"
+im = open_image(f"{(Path(DATA_PATH)/im_folder).ls()[0]}")
+
+
+# In[16]:
+
+
+get_ipython().run_cell_magic("timeit", "", "learn.predict(im)")
+
+
+# ### Memory footprint
+#
+# Export the model to inspect the size of the model file.
+
+# In[17]:
+
+
+learn.export(f"{MODEL_TYPE}")
+
+
+# In[18]:
+
+
+size_in_mb = os.path.getsize(Path(DATA_PATH) / MODEL_TYPE) / (1024 * 1024.0)
+print(f"'{MODEL_TYPE}' is {round(size_in_mb, 2)}MB.")
+
+
+# ---
+
+# # Fine-tuning parameters <a name="finetuning"></a>
+#
+# If you use the default parameters we have provided, you can get good results across a wide variety of datasets. However, as in most machine learning projects, getting the best possible results for a new dataset often requires tuning the parameters further. The following section provides guidelines on optimizing for accuracy, inference speed, or model size for a given dataset. We'll go through the parameters that will make the largest impact on your model as well as the parameters that may not be worth modifying.
+#
+# Generally speaking, models for image classification come with a trade-off between training time versus model accuracy. The four parameters that have the biggest impact on this trade-off are the DNN architecture, image resolution, learning rate, and number of epochs. DNN architecture and image resolution will additionally affect the model's inference time and memory footprint. As a rule of thumb, deeper networks with high image resolution will achieve higher accuracy at the cost of large model sizes and low training and inference speeds. Shallow networks with low image resolution will result in models with fast inference speed, fast training speeds and low model sizes at the cost of the model accuracy.
+
+# ## DNN architectures <a name="dnn"></a>
+#
+# When choosing an architecture, we want to make sure it fits our requirements for accuracy, memory footprint, inference speed and training speeds. Some DNNs have hundreds of layers and end up with a large memory footprint and millions of parameters to tune, while others are compact and small enough to fit onto memory limited edge devices.
+#
+# Lets take a __squeezenet1_1__ model, a __resnet18__ model and __resnet50__ model and compare these using an experiment over diverse set of 6 datasets. (More about the datasets in the appendix below.)
+#
+# ![architecture_comparisons](media/architecture_comparisons.png)
+#
+# As you can see from the graph, there is a clear trade-off when deciding between the models.
+#
+# In terms of accuracy, __resnet50__ outperforms the rest, but it also suffers from having the highest memory footprint, and the longest training and inference times. Alternatively, __squeezenet1_1__ performs the worst in terms of accuracy, but has the smallest memory footprint.
+#
+# Generally speaking, given enough data, the deeper DNN and the higher the image resolution, the higher the accuracy you'll be able to achieve with your model.
+#
+# ---
+#
+# <details><summary>See the code to generate the graphs</summary>
+# <p>
+#
+# ### Code snippet to generate graphs in this cell
+#
+# ```python
+# import pandas as pd
+# from utils_cv.classification.parameter_sweeper import add_value_labels
+# %matplotlib inline
+#
+# df = pd.DataFrame({
+#     "accuracy": [.9472, .9190, .8251],
+#     "training_duration": [385.3, 280.5, 272.5],
+#     "inference_duration": [34.2, 27.8, 27.6],
+#     "memory": [99, 45, 4.9],
+#     "model": ['resnet50', 'resnet18', 'squeezenet1_1'],
+# }).set_index("model")
+#
+# ax1, ax2, ax3, ax4 = df.plot.bar(
+#     rot=90, subplots=True, legend=False, figsize=(8,10)
+# )
+#
+# for ax in [ax1, ax2, ax3, ax4]:
+#     for i in [0, 1, 2]:
+#         if i==0: ax.get_children()[i].set_color('r')
+#         if i==1: ax.get_children()[i].set_color('g')
+#         if i==2: ax.get_children()[i].set_color('b')
+#
+# ax1.set_title("Accuracy (%)")
+# ax2.set_title("Training Duration (seconds)")
+# ax3.set_title("Inference Time (seconds)")
+# ax4.set_title("Memory Footprint (mb)")
+#
+# ax1.set_ylabel("%")
+# ax2.set_ylabel("seconds")
+# ax3.set_ylabel("seconds")
+# ax4.set_ylabel("mb")
+#
+# ax1.set_ylim(top=df["accuracy"].max() * 1.3)
+# ax2.set_ylim(top=df["training_duration"].max() * 1.3)
+# ax3.set_ylim(top=df["inference_duration"].max() * 1.3)
+# ax4.set_ylim(top=df["memory"].max() * 1.3)
+#
+# add_value_labels(ax1, percentage=True)
+# add_value_labels(ax2)
+# add_value_labels(ax3)
+# add_value_labels(ax4)
+# ```
+#
+# </p>
+# </details>
+#
+
+# ## Key parameters <a name="key-parameters"></a>
+# This section examines some of the key parameters when training a deep learning model for image classification. The table below shows default parameters we recommend using.
+# ## Key Parameters <a name="key-parameters"></a>
+# This section examines some of the key parameters used in training a deep learning model for image classification. The table below shows default parameters:
+#
+# | Parameter | Default Value |
+# | --- | --- |
+# | Learning Rate | 1e-4 |
+# | Epochs | 15 |
+# | Batch Size | 16 |
+# | Image Size | 300 X 300 |
+#
+# __Learning rate__
+#
+# Learning rate or the step size is used when optimizing your model with gradient descent and tends to be one of the most important parameters to set when training your model. If your learning rate is set too low, training will progress very slowly since we're only making tiny updates to the weights in your network. However, if your learning rate is too high, it can cause undesirable divergent behavior in your loss function. Generally speaking, choosing a learning rate of 1e-4 was shown to work pretty well for most datasets. If you want to reduce training time (by training for fewer epochs), you can try setting the learning rate to 5e-3, but if you notice a spike in the training or validation loss, you may want to try reducing your learning rate.
+#
+# The learning rate section of [appendix below](#appendix-learning-rate) has more detail.
+#
+# __Epochs__
+#
+# An _epoch_ is a full gradient descent iteration cycle across the DNN architecture. Unless your are working with small datasets, using around 15 epochs tends to work well in most cases. When it comes to choosing the number of epochs, a common question is - _Won't too many epochs cause overfitting_? It turns out that the accuracy on the test set typically does not get worse, even if training for too many epochs. Unless your are working with small datasets, using around 15 epochs tends to work pretty well in most cases.
+#
+#
+# __Batch Size__
+#
+# Batch size is the number of training samples you use in order to make one update to the model parameters. A batch size of 16 or 32 works well for most cases. Larger batch sizes help speed training time, but at the expense of an increased DNN memory consumption. Depending on your dataset and the GPU you have, you can start with a batch size of 32, and move down to 16 if your GPU doesn't have enough memory. After a certain batch size, improvements to training speed become marginal, hence we found 16 (or 32) to be a good trade-off between training speed and memory consumption. If you reduce the batch size, you may also have to reduce the learning rate.
+#
+# __Image size__
+#
+# The default image size is __300 X 300__ pixels. Using higher image resolutions can help improve model accuracy but will result in longer training and inference times.
+#
+# The [appendix below](#appendix-imsize) discussed impact of image resolution in detail.
+#
+
+# ## Additional parameters <a name="other-parameters"></a>
+#
+# There are many hyperparameters used to tune DNNs, though in our experience the exact value of these parameters does not have a large impact on model performance, training/inference speed, or memory footprint.
+#
+# | Parameter | Good Default Value |
+# | --- | --- |
+# | Dropout | 0.5 or (0.5 on the final layer and 0.25 on all previous layers) |
+# | Weight Decay | 0.01 |
+# | Momentum | 0.9 or (min=0.85 and max=0.95 when using cyclical momentum) |
+#
+# __Dropout__
+#
+# Dropout is used to discard activations at random when training your model. It is a way to keep the model from over-fitting on the training data. In fast.ai, dropout is set to 0.5 by default on the final layer, and 0.25 on all other layer. Unless there is clear evidence of over-fitting, this dropout tends to work well.
+#
+# __Weight decay (L2 regularization)__
+#
+# Weight decay is a regularization term applied to help minimize the network loss function. We can think of it as a penalty applied to the weights after an update to prevent the weights from growing too large (the model may not converge if the weights get too large). In fast.ai, the default weight decay is 0.1, which we find to be almost always acceptable.
+#
+# __Momentum__
+#
+# Momentum is a way to accelerate convergence when training a model. Momentum uses a weighted average of the most recent updates applied to the current update. Fast.ai implements cyclical momentum when calling `fit_one_cycle()`, so the momentum will fluctuate over the course of the training cycle. We control this by setting a min and max value for the momentum.
+#
+# When using `fit_one_cycle()`, the default values of max=0.95 and min=0.85 are known to work well. If using `fit()`, the default value of 0.9 has been shown to work well. These defaults represent a good trade-off between training speed and the ability of the model to converge to a good solution.
+
+# ## Testing parameters <a name="testing-parameters"></a>
+# The `ParameterSweeper` module can be used to search over the parameter space to locate the "best" value for that parameter. See the [exploring hyperparameters notebook](./11_exploring_hyperparameters.ipynb) for more information.
+
+# ---
+
+# # Appendix <a name="appendix"></a>
+
+# ## Learning rate <a name="appendix-learning-rate"></a>
+#
+# Setting a low learning rate requires training for many epochs to reach convergence. However, each additional epoch directly increases the model training time in a linear fashion. To efficiently build a model, it helps to set the learning rate in the correct range. To demonstrate this, we've tested various learning rates on 6 different datasets, training the full network for 3 or 15 epochs.
+#
+# ![lr_comparisons](media/lr_comparisons.png)
+#
+# <details><summary><em>Understanding the diagram</em></summary>
+# <p>
+#
+# > The figure on the left shows results of different learning rates on different datasets at 15 epochs. We see that a learning rate of 1e-4 results in the the best overall accuracy for the datasets we have tested. Notice there is a pretty significant variance between the datasets and a learning rate of 1-e3 may work better for some datasets.
+# In the figure on the right, at 15 epochs, the results of 1e-4 are only slightly better than that of 1e-3. However, at only 3 epochs, a learning rate of 1e-3 out performs the smaller learning rates. This makes sense since we're limiting the training to only 3 epochs so a model that updates weights more quickly should perform better. Effectively a larger learning rate gets closer to the model convergence. This result indicates higher learning rates (such as 1e-3) may help minimize the training time, and lower learning rates (such as 1e-5) may be better if training time is not constrained.
+#
+# </p>
+# </details>
+#
+# In both figures, we can see that a learning rate of 1e-3 and 1e-4 tends to workin general. We observe that training with 3 epochs results in lower accuracy compared to 15 epochs. And in some cases, smaller learning rates may prevent the DNN from converging.
+#
+# Fast.ai has implemented [one cycle policy with cyclical momentum](https://arxiv.org/abs/1803.09820) which adaptively optimizes the learning rate. This function takes a maximum learning rate value as an argument to help the method avoid the convergence problem. Replace the `fit()` method with `fit_one_cycle()` to use this capability.
+#
+# ---
+#
+# <details><summary>See the code to generate the graphs</summary>
+# <p>
+#
+# ### Code snippet to generate graphs in this cell
+#
+# ```python
+# import matplotlib.pyplot as plt
+# %matplotlib inline
+#
+# df_dataset_comp = pd.DataFrame({
+#     "fashionTexture": [0.8749, 0.8481, 0.2491, 0.670318, 0.1643],
+#     "flickrLogos32Subset": [0.9069, 0.9064, 0.2179, 0.7175, 0.1073],
+#     "food101Subset": [0.9294, 0.9127, 0.6891, 0.9090, 0.555827],
+#     "fridgeObjects": [0.9591, 0.9727, 0.272727, 0.6136, 0.181818],
+#     "lettuce": [0.8992, 0.9104, 0.632, 0.8192, 0.5120],
+#     "recycle_v3": [0.9527, 0.9581, 0.766, 0.8591, 0.2876],
+#     "learning_rate": [0.000100, 0.001000, 0.010000, 0.000010, 0.000001]
+# }).set_index("learning_rate")
+#
+# df_epoch_comp = pd.DataFrame({
+#     "3_epochs": [0.823808, 0.846394, 0.393808, 0.455115, 0.229120],
+#     "15_epochs": [0.920367, 0.918067, 0.471138, 0.764786, 0.301474],
+#     "learning_rate": [0.000100, 0.001000, 0.010000, 0.000010, 0.000001]
+# }).set_index("learning_rate")
+#
+# plt.figure(1)
+# ax1 = plt.subplot(121)
+# ax2 = plt.subplot(122)
+#
+# vals = ax2.get_yticks()
+#
+# df_dataset_comp.sort_index().plot(kind='bar', rot=0, figsize=(15, 6), ax=ax1)
+# vals = ax1.get_yticks()
+# ax1.set_yticklabels(['{:,.2%}'.format(x) for x in vals])
+# ax1.set_ylim(0,1)
+# ax1.set_ylabel("Accuracy (%)")
+# ax1.set_title("Accuracy of Learning Rates by Datasets @ 15 Epochs")
+# ax1.legend(loc=2)
+#
+# df_epoch_comp.sort_index().plot(kind='bar', rot=0, figsize=(15, 6), ax=ax2)
+# ax2.set_yticklabels(['{:,.2%}'.format(x) for x in vals])
+# ax2.set_ylim(0,1)
+# ax2.set_title("Accuracy of Learning Rates by Epochs")
+# ax2.legend(loc=2)
+# ```
+#
+# </p>
+# </details>
+
+# ## Image resolution <a name="appendix-imsize"></a>
+#
+# A model's input image resolution also impacts model accuracy. Usually, convolutional neural networks are able to take advantage of higher resolution images, especially if the object-of-interest is small in the overall image. But how does image size impact  other model aspects?
+#
+# We find that image size doesn't significantly affect the model's memory footprint given the same network architecture, but it has a huge effect on GPU memory. Image size also impacts training and inference speeds.
+#
+# ![imsize_comparisons](media/imsize_comparisons.png)
+#
+# From the results, we can see that an increase in image resolution from __300 X 300__ to __500 X 500__ will increase the performance marginally at the cost of a longer training duration and slower inference speed.
+#
+# ---
+#
+# <details><summary>See the code to generate the graphs</summary>
+# <p>
+#
+# ### Code snippet to generate graphs in this cell
+#
+# ```python
+# import pandas as pd
+# from utils_cv.classification.parameter_sweeper import add_value_labels
+# %matplotlib inline
+#
+# df = pd.DataFrame({
+#     "accuracy": [.9472, .9394, .9190, .9164, .8366, .8251],
+#     "training_duration": [385.3, 218.8, 280.5, 184.9, 272.5, 182.3],
+#     "inference_duration": [34.2, 23.2, 27.8, 17.8, 27.6, 17.3],
+#     "model": ['resnet50 X 499', 'resnet50 X 299', 'resnet18 X 499', 'resnet18 X 299', 'squeezenet1_1 X 499', 'squeezenet1_1 X 299'],
+# }).set_index("model"); df
+#
+# ax1, ax2, ax3 = df.plot.bar(
+#     rot=90, subplots=True, legend=False, figsize=(12, 12)
+# )
+#
+# for i in range(len(df)):
+#     if i < len(df)/3:
+#         ax1.get_children()[i].set_color('r')
+#         ax2.get_children()[i].set_color('r')
+#         ax3.get_children()[i].set_color('r')
+#     if i >= len(df)/3 and i < 2*len(df)/3:
+#         ax1.get_children()[i].set_color('g')
+#         ax2.get_children()[i].set_color('g')
+#         ax3.get_children()[i].set_color('g')
+#     if i >= 2*len(df)/3:
+#         ax1.get_children()[i].set_color('b')
+#         ax2.get_children()[i].set_color('b')
+#         ax3.get_children()[i].set_color('b')
+#
+# ax1.set_title("Accuracy (%)")
+# ax2.set_title("Training Duration (seconds)")
+# ax3.set_title("Inference Speed (seconds)")
+#
+# ax1.set_ylabel("%")
+# ax2.set_ylabel("seconds")
+# ax3.set_ylabel("seconds")
+#
+# ax1.set_ylim(top=df["accuracy"].max() * 1.2)
+# ax2.set_ylim(top=df["training_duration"].max() * 1.2)
+# ax3.set_ylim(top=df["inference_duration"].max() * 1.2)
+#
+# add_value_labels(ax1, percentage=True)
+# add_value_labels(ax2)
+# add_value_labels(ax3)
+# ```
+#
+# </p>
+# </details>
+
+# ## How we found good default parameters <a name="appendix-good-parameters"></a>
+#
+# We conducted various experiments to explore the impact of different hyperparameters on a model's _accuracy_, _training duration_, _inference speed_, and _memory footprint_.
+#
+# ### Datasets <a name="datasets"></a>
+#
+# For our experiments, we relied on a set of six different classification datasets. When selecting these datasets, we wanted to have a variety of image types with different amounts of data and number of classes.
+#
+# | Dataset Name | Number of Images | Number of Classes |
+# | --- | --- | --- |
+# | food101Subset | 5000 | 5 |
+# | flickrLogos32Subset | 2740 | 33 |
+# | fashionTexture | 1716 | 11 |
+# | recycle_v3 |  564 | 11 |
+# | lettuce | 380 | 2 |
+# | fridgeObjects | 134 | 4 |
+#
+# ### Model Characteristics <a name="model-characteristics"></a>
+#
+# In our experiment, we look at these characteristics to evaluate the impact of various parameters. Here is how we calculated each of the following metrics:
+#
+# - __Accuracy__ metric is averaged over 5 runs for each dataset.
+#
+#
+# - __Training Duration__ metric is the average duration over 5 runs for each dataset.
+#
+#
+# - __Inference Speed__ is the time it takes the model to run 1000 predictions.
+#
+#
+# - __Memory Footprint__ is the size of the model pickle file output from the `learn.export(...)` method.
+#
+
+# In[ ]:
--- a/classification/python/11_exploring_hyperparameters.py
+++ b/classification/python/11_exploring_hyperparameters.py
@ -11,7 +11,7 @@
 #
 # ## Table of Contents
 #
-# * [Testing parameter](#hyperparam)
+# * [Testing hyperparameters](#hyperparam)
 #   * [Using Python](#python)
 #   * [Using the CLI](#cli)
 #   * [Visualizing the results](#visualize)
@ -20,7 +20,7 @@

 # ## Testing hyperparameters  <a name="hyperparam"></a>
 #
-# Lets say we want to learn more about __how different learning rates and different image sizes affect our model's accuracy when restricted to 10 epochs__, and we want to build an experiment to test out these hyperparameters. We also want to try these parameters out on two different variations of the dataset - one where the images are kept raw (maybe there is a watermark on the image) and one where the images have been altered (the same dataset where there was some attempt to remove the watermark).
+# Let's say we want to learn more about __how different learning rates and different image sizes affect our model's accuracy when restricted to 10 epochs__, and we want to build an experiment to test out these hyperparameters. We also want to try these parameters out on two different variations of the dataset - one where the images are kept raw (maybe there is a watermark on the image) and one where the images have been altered (the same dataset where there was some attempt to remove the watermark).
 #
 # In this notebook, we'll walk through how we use the Parameter Sweeper module with the following:
 #
@ -58,7 +58,11 @@ import sys
 sys.path.append("../../")
 from utils_cv.classification.data import Urls
 from utils_cv.common.data import unzip_url
-from utils_cv.classification.parameter_sweeper import *
+from utils_cv.classification.parameter_sweeper import (
+    ParameterSweeper,
+    clean_sweeper_df,
+    plot_sweeper_df,
+)


 # To use the Parameter Sweeper tool for single label classification, we'll need to make sure that the data is stored such that images are sorted into their classes inside of a subfolder. In this notebook, we'll use the Fridge Objects dataset, which is already stored in the correct format. We also want to use the Fridge Objects Watermarked dataset. We want to see whether the original images (which are watermarked) will perform just as well as the non-watermarked images.
@ -88,7 +92,7 @@ EPOCHS = [10]
 sweeper = ParameterSweeper()


-# Before we start testing, it's a good idea to see what the default parameters Are. We can use a the property `parameters` to easily see those default values.
+# Before we start testing, it's a good idea to see what the default parameters are. We can use a the property `parameters` to easily see those default values.

 # In[6]:

--- a/classification/python/21_deployment_on_azure_container_instances.py
+++ b/classification/python/21_deployment_on_azure_container_instances.py
@ -49,8 +49,7 @@ import os
 import sys

 # fast.ai
-from fastai.vision import *
-import torchvision.models as models
+from fastai.vision import models

 # Azure
 import azureml.core
@ -281,7 +280,7 @@ scoring_script = "score.py"
 get_ipython().run_cell_magic(
    "writefile",
    "$scoring_script",
-    '# Copyright (c) Microsoft. All rights reserved.\n# Licensed under the MIT license.\n\nimport json\n\nfrom base64 import b64decode\nfrom io import BytesIO\n\nfrom azureml.core.model import Model\nfrom fastai.vision import *\n\ndef init():\n    global model\n    model_path = Model.get_model_path(model_name=\'im_classif_resnet18\')\n    # ! We cannot use the *model_name* variable here otherwise the execution on Azure will fail !\n    \n    model_dir_path, model_filename = os.path.split(model_path)\n    model = load_learner(path=model_dir_path, fname=model_filename)\n\n\ndef run(raw_data):\n\n    # Expects raw_data to be a list within a json file\n    result = []    \n    \n    for im_string in json.loads(raw_data)[\'data\']:\n        im_bytes = b64decode(im_string)\n        try:\n            im = open_image(BytesIO(im_bytes))\n            pred_class, pred_idx, outputs = model.predict(im)\n            result.append({"label": str(pred_class), "probability": str(outputs[pred_idx].item())})\n        except Exception as e:\n            result.append({"label": str(e), "probability": \'\'})\n    return result',
+    '# Copyright (c) Microsoft. All rights reserved.\n# Licensed under the MIT license.\n\nimport os\nimport json\n\nfrom base64 import b64decode\nfrom io import BytesIO\n\nfrom azureml.core.model import Model\nfrom fastai.vision import load_learner, open_image\n\ndef init():\n    global model\n    model_path = Model.get_model_path(model_name=\'im_classif_resnet18\')\n    # ! We cannot use the *model_name* variable here otherwise the execution on Azure will fail !\n    \n    model_dir_path, model_filename = os.path.split(model_path)\n    model = load_learner(path=model_dir_path, fname=model_filename)\n\n\ndef run(raw_data):\n\n    # Expects raw_data to be a list within a json file\n    result = []    \n    \n    for im_string in json.loads(raw_data)[\'data\']:\n        im_bytes = b64decode(im_string)\n        try:\n            im = open_image(BytesIO(im_bytes))\n            pred_class, pred_idx, outputs = model.predict(im)\n            result.append({"label": str(pred_class), "probability": str(outputs[pred_idx].item())})\n        except Exception as e:\n            result.append({"label": str(e), "probability": \'\'})\n    return result',
 )


@ -292,7 +291,7 @@ get_ipython().run_cell_magic(
 # In[17]:


-# Create a deployment-specific yaml file from image_classification/environment.yml
+# Create a deployment-specific yaml file from classification/environment.yml
 try:
    generate_yaml(
        directory=os.path.join(root_path(), "classification"),
@ -338,7 +337,7 @@ except WebserviceException:
 # Create the Docker image
 try:
    docker_image = ContainerImage.create(
-        name="image-classif-resnet18-f48-2",
+        name="image-classif-resnet18-f48",
        models=[model],
        image_config=image_config,
        workspace=ws,
--- a/classification/python/23_aci_aks_web_service_testing.py
+++ b/classification/python/23_aci_aks_web_service_testing.py
@ -44,12 +44,13 @@ get_ipython().run_line_magic("autoreload", "2")

 # Regular python libraries
 import inspect
+import json
 import os
 import requests
 import sys

 # fast.ai
-from fastai.vision import *
+from fastai.vision import open_image

 # Azure
 import azureml.core
--- a/classification/tools/sweep.py
+++ b/classification/tools/sweep.py
@ -11,8 +11,9 @@ sys.path.append(
 )
 import argparse
 import time
+from typing import Dict, List, Any

-from utils_cv.classification.parameter_sweeper import *
+from utils_cv.classification.parameter_sweeper import ParameterSweeper
 from argparse import RawTextHelpFormatter, Namespace

 argparse_desc_msg = """
--- a/tests/unit/classification/test_classification_parameter_sweeper.py
+++ b/tests/unit/classification/test_classification_parameter_sweeper.py
@ -2,7 +2,12 @@
 # Licensed under the MIT License.

 import pytest
-from utils_cv.classification.parameter_sweeper import *
+import pandas as pd
+from utils_cv.classification.parameter_sweeper import (
+    ParameterSweeper,
+    clean_sweeper_df,
+    plot_sweeper_df,
+)


 def _test_sweeper_run(df: pd.DataFrame, df_length: int):