diff --git a/.ci/azure-pipeline-azureml-notebook-test.yml b/.ci/azure-pipeline-azureml-notebook-test.yml index 79c415a..a9eee3f 100644 --- a/.ci/azure-pipeline-azureml-notebook-test.yml +++ b/.ci/azure-pipeline-azureml-notebook-test.yml @@ -13,10 +13,6 @@ variables: value : 'reports/test-unit.xml' trigger: none -pr: -- staging -- master - jobs: - job: AzureMLNotebookTest timeoutInMinutes: 300 @@ -61,4 +57,4 @@ jobs: inputs: testResultsFiles: '**/test-*.xml' failTaskOnFailedTests: true - condition: succeededOrFailed() \ No newline at end of file + condition: succeededOrFailed() diff --git a/.ci/repo_metrics_pipeline.yml b/.ci/repo_metrics_pipeline.yml index 40a0c81..9a3b660 100644 --- a/.ci/repo_metrics_pipeline.yml +++ b/.ci/repo_metrics_pipeline.yml @@ -1,3 +1,23 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +# More info on scheduling: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/triggers?view=azure-devops&tabs=yaml#scheduled-triggers +# Implementing the scheduler from the dashboard +# Uncomment in case it wants to be done from using the yml +# schedules: +# - cron: "56 22 * * *" +# displayName: Daily track of metrics +# branches: +# include: +# - master +# always: true + + +# no PR builds +pr: none + +# no CI trigger +trigger: none jobs: - job: Repometrics @@ -5,7 +25,6 @@ jobs: vmImage: 'ubuntu-16.04' steps: - - task: UsePythonVersion@0 inputs: versionSpec: '3.6' @@ -13,13 +32,13 @@ jobs: - script: | cp tools/repo_metrics/config_template.py tools/repo_metrics/config.py - sed -i ''s/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/$(github_token)/g'' tools/repo_metrics/config.py - sed -i ''s/XXXXXXXXXXXXXXXXXXXXXXXXX/$(cosmosdb_connectionstring)/g'' tools/repo_metrics/config.py + sed -i 's##$(github_token)#' tools/repo_metrics/config.py + sed -i "s##`echo '$(cosmosdb_connectionstring)' | sed 's@&@\\\\&@g'`#" tools/repo_metrics/config.py displayName: Configure CosmosDB Connection - script: | - python -m pip install python-dateutil>=2.80 pymongo>=3.8.0 gitpython>2.1.11 requests>=2.21.0 - python tools/repo_metrics/track_metrics.py --github_repo "https://github.com/microsoft/ComputerVision" --save_to_database + python -m pip install 'python-dateutil>=2.8.0' 'pymongo>=3.8.0' 'gitpython>2.1.11' 'requests>=2.21.0' + python tools/repo_metrics/track_metrics.py --github_repo 'https://github.com/microsoft/ComputerVision' --save_to_database displayName: Python script to record stats diff --git a/.ci/templates/unit-test-steps.yml b/.ci/templates/unit-test-steps.yml index a8d1769..da25e58 100644 --- a/.ci/templates/unit-test-steps.yml +++ b/.ci/templates/unit-test-steps.yml @@ -5,6 +5,10 @@ steps: echo "##vso[task.prependpath]/data/anaconda/bin" displayName: Add Conda to PATH +- bash: | + rm -rf /data/anaconda/envs/cv + displayName: 'Remove conda env in case it was not created correctly' + - bash: | conda env create -f environment.yml source activate cv diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bc4172f..7d0fac4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,12 +20,12 @@ When you submit a pull request, a CLA-bot will automatically determine whether y ## Steps to Contributing Here are the basic steps to get started with your first contribution. Please reach out with any questions. -1. Use [open issues](https://github.com/Microsoft/Recommenders/issues) to discuss the proposed changes. Create an issue describing changes if necessary to collect feedback. Also, please use provided labels to tag issues so everyone can easily sort issues of interest. +1. Use [open issues](https://github.com/Microsoft/ComputerVision/issues) to discuss the proposed changes. Create an issue describing changes if necessary to collect feedback. Also, please use provided labels to tag issues so everyone can easily sort issues of interest. 1. [Fork the repo](https://help.github.com/articles/fork-a-repo/) so you can make and test local changes. 1. Create a new branch for the issue. We suggest prefixing the branch with your username and then a descriptive title: (e.g. gramhagen/update_contributing_docs) 1. Create a test that replicates the issue. 1. Make code changes. -1. Ensure unit tests pass and code style / formatting is consistent (see [wiki](https://github.com/Microsoft/Recommenders/wiki/Coding-Guidelines#python-and-docstrings-style) for more details). +1. Ensure unit tests pass and code style / formatting is consistent, and follows the [Zen of Python](https://github.com/Microsoft/Recommenders/wiki/Coding-Guidelines#the-zen-of-python). 1. We use [pre-commit](https://pre-commit.com/) package to run our pre-commit hooks. We use black formatter and flake8 linting on each commit. In order to set up pre-commit on your machine, follow the steps here, please note that you only need to run these steps the first time you use pre-commit for this project. * Update your conda environment, pre-commit is part of the yaml file or just do @@ -49,7 +49,6 @@ Here are the basic steps to get started with your first contribution. Please rea Note: We use the staging branch to land all new features, so please remember to create the Pull Request against staging. -Once the features included in a milestone are complete we will merge staging into master and make a release. See the wiki for more detail about our [merge strategy](https://github.com/Microsoft/Recommenders/wiki/Strategy-to-merge-the-code-to-master-branch). ## Working with Notebooks @@ -77,8 +76,6 @@ nbdiff notebook_1.ipynb notebook_2.ipynb We strive to maintain high quality code to make the utilities in the repository easy to understand, use, and extend. We also work hard to maintain a friendly and constructive environment. We've found that having clear expectations on the development process and consistent style helps to ensure everyone can contribute and collaborate effectively. -Please review the [coding guidelines](https://github.com/Microsoft/Recommenders/wiki/Coding-Guidelines) wiki page to see more details about the expectations for development approach and style. - We follow the Google docstring guidlines outlined on this [styleguide](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings) page. For example: ```python def bite(n:int, animal:animal_object) -> bool: @@ -103,7 +100,7 @@ This project has adopted the [Microsoft Open Source Code of Conduct](https://ope For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. -Apart from the official Code of Conduct developed by Microsoft, in the Recommenders team we adopt the following behaviors, to ensure a great working environment: +Apart from the official Code of Conduct developed by Microsoft, we adopt the following behaviors, to ensure a great working environment: #### Do not point fingers Let’s be constructive. For example: "This method is missing docstrings" instead of "YOU forgot to put docstrings". diff --git a/README.md b/README.md index 22ca245..f775c55 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,10 @@ The current main priority is to support image classification. Additionally, we a ## Getting Started -To get started on your local machine: +To get started: -1. Install Anaconda with Python >= 3.6. [Miniconda](https://conda.io/miniconda.html) is a quick way to get started. +1. (Optional) Create an Azure Data Science Virtual Machine with e.g. a V100 GPU ([instructions](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/provision-deep-learning-dsvm), [price table](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/windows/)). +1. Install Anaconda with Python >= 3.6. [Miniconda](https://conda.io/miniconda.html). This step can be skipped if working on a Data Science Virtual Machine. 1. Clone the repository ``` git clone https://github.com/Microsoft/ComputerVision diff --git a/classification/notebooks/11_exploring_hyperparameters.ipynb b/classification/notebooks/11_exploring_hyperparameters.ipynb index 97478b5..bbd07d9 100644 --- a/classification/notebooks/11_exploring_hyperparameters.ipynb +++ b/classification/notebooks/11_exploring_hyperparameters.ipynb @@ -22,6 +22,7 @@ "source": [ "In this notebook, we'll cover how to test different hyperparameters for a particular dataset and how to benchmark different parameters across a group of datasets.\n", "\n", + "For an example of how to scale up with remote GPU clusters on Azure Machine Learning, please view [24_exploring_hyperparameters_on_azureml.ipynb](../24_exploring_hyperparameters_on_azureml).\n", "## Table of Contents\n", "\n", "* [Testing hyperparameters](#hyperparam)\n", @@ -52,7 +53,7 @@ "metadata": {}, "source": [ "Ensure edits to libraries are loaded and plotting is shown in the notebook." - ] + ] }, { "cell_type": "code", diff --git a/classification/notebooks/24_exploring_hyperparameters_on_azureml.ipynb b/classification/notebooks/24_exploring_hyperparameters_on_azureml.ipynb index 05f2807..a887cde 100644 --- a/classification/notebooks/24_exploring_hyperparameters_on_azureml.ipynb +++ b/classification/notebooks/24_exploring_hyperparameters_on_azureml.ipynb @@ -20,16 +20,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this notebook, we'll cover how to test different hyperparameters for a particular dataset and how to benchmark different parameters across a group of datasets using AzureML" + "In this notebook, we'll cover how to test different hyperparameters for a particular dataset and how to benchmark different parameters across a group of datasets using AzureML. We assume familiarity with the basic concepts and parameters, which are discussed in the [01_training_introduction.ipynb](01_training_introduction.ipynb), [02_multilabel_classification.ipynb](02_multilabel_classification.ipynb) and [03_training_accuracy_vs_speed.ipynb](03_training_accuracy_vs_speed.ipynb) notebooks. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Similar to [11_exploring_hyperparameters.ipynb](https://github.com/microsoft/ComputerVision/blob/master/classification/notebooks/11_exploring_hyperparameters.ipynb), we will learn more about how different learning rates and different image sizes affect our model's accuracy when restricted to 10 epochs, and we want to build an AzureML experiment to test out these hyperparameters. \n", + "Similar to [11_exploring_hyperparameters.ipynb](https://github.com/microsoft/ComputerVision/blob/master/classification/notebooks/11_exploring_hyperparameters.ipynb), we will learn more about how different learning rates and different image sizes affect our model's accuracy when restricted to 16 epochs, and we want to build an AzureML experiment to test out these hyperparameters. \n", "\n", - "We will be using a ResNet50 model to classify a set of images into 4 categories - 'can', 'carton', 'milk_bottle', 'water_bottle'. We will then conduct hyper-parameter tuning to find the best set of parameters for this model. For this,\n", + "We will be using a ResNet18 model to classify a set of images into 4 categories: 'can', 'carton', 'milk_bottle', 'water_bottle'. We will then conduct hyper-parameter tuning to find the best set of parameters for this model. For this,\n", "we present an overall process of utilizing AzureML, specifically [Hyperdrive](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive?view=azure-ml-py) component to run this tuning in parallel (and not successively).We demonstrate the following key steps: \n", "* Configure AzureML Workspace\n", "* Create Remote Compute Target (GPU cluster)\n", @@ -43,15 +43,7 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SDK version: 1.0.48\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import sys\n", @@ -65,15 +57,14 @@ "from azureml.core.compute import ComputeTarget, AmlCompute\n", "from azureml.core.compute_target import ComputeTargetException\n", "import azureml.data\n", - "from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal, choice\n", "from azureml.train.estimator import Estimator\n", - "\n", + "from azureml.train.hyperdrive import (\n", + " RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal, choice, uniform\n", + ")\n", "import azureml.widgets as widgets\n", "\n", "from utils_cv.classification.data import Urls\n", - "from utils_cv.common.data import unzip_url\n", - "\n", - "print(\"SDK version:\", azureml.core.VERSION)" + "from utils_cv.common.data import unzip_url" ] }, { @@ -98,8 +89,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 1. Config AzureML workspace\n", - "Below we setup AzureML workspace and get all its details as follows:" + "We now define some parameters which will be used in this notebook:" ] }, { @@ -116,36 +106,37 @@ "subscription_id = \"YOUR_SUBSCRIPTION_ID\"\n", "resource_group = \"YOUR_RESOURCE_GROUP_NAME\" \n", "workspace_name = \"YOUR_WORKSPACE_NAME\" \n", - "workspace_region = \"YOUR_WORKSPACE_REGION\" #Possible values eastus, eastus2 and so on.\n", + "workspace_region = \"YOUR_WORKSPACE_REGION\" #Possible values eastus, eastus2, etc.\n", "\n", - "max_total_runs=50\n" + "# Choose a size for our cluster and the maximum number of nodes\n", + "VM_SIZE = \"STANDARD_NC6\" #\"STANDARD_NC6S_V3\"\n", + "MAX_NODES = 12\n", + "\n", + "# Hyperparameter search space\n", + "IM_SIZES = [150, 300]\n", + "LEARNING_RATE_MAX = 1e-3\n", + "LEARNING_RATE_MIN = 1e-5\n", + "MAX_TOTAL_RUNS = 10 #Set to higher value to test more parameter combinations\n", + "\n", + "# Image data\n", + "DATA = unzip_url(Urls.fridge_objects_path, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Config AzureML workspace\n", + "Below we setup (or load an existing) AzureML workspace, and get all its details as follows. Note that the resource group and workspace will get created if they do not yet exist. For more information regaring the AzureML workspace see also the [20_azure_workspace_setup.ipynb](20_azure_workspace_setup.ipynb) notebook.\n", + "\n", + "To simplify clean-up (see end of this notebook), we recommend creating a new resource group to run this notebook." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING - Warning: Falling back to use azure cli login credentials.\n", - "If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.\n", - "Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Workspace name: smoketestwsnew\n", - "Workspace region: eastus2\n", - "Subscription id: 0ca618d2-22a8-413a-96d0-0f1b531129c3\n", - "Resource group: smoketestnew11\n" - ] - } - ], + "outputs": [], "source": [ "from utils_cv.common.azureml import get_or_create_workspace\n", "\n", @@ -167,9 +158,9 @@ "metadata": {}, "source": [ "### 2. Create Remote Target\n", - "We create a GPU cluster as our remote compute target. If a cluster with the same name already exists in our workspace, the script will load it instead. We can see [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#compute-targets-for-training) to learn more about setting up a compute target on different locations.\n", + "We create a GPU cluster as our remote compute target. If a cluster with the same name already exists in our workspace, the script will load it instead. This [link](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#compute-targets-for-training) provides more information about how to set up a compute target on different locations.\n", "\n", - "This notebook selects STANDARD_NC6 virtual machine (VM) and sets its priority as 'lowpriority' to reduce costs." + "By default, the VM size is set to use _STANDARD_NC6_ machines. However, if quota is available, our recommendation is to use _STANDARD_NC6S_V3_ machines which come with the much faster V100 GPU." ] }, { @@ -181,37 +172,32 @@ "name": "stdout", "output_type": "stream", "text": [ - "Found existing compute target.\n", - "{'currentNodeCount': 1, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 1, 'preemptedNodeCount': 0}, 'allocationState': 'Resizing', 'allocationStateTransitionTime': '2019-07-22T04:40:41.047000+00:00', 'errors': None, 'creationTime': '2019-07-22T02:26:37.808395+00:00', 'modifiedTime': '2019-07-22T02:26:53.969636+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_NC6'}\n" + "Creating a new compute target...\n", + "Creating\n", + "Succeeded\n", + "AmlCompute wait for completion finished\n", + "Minimum number of nodes requested have been provisioned\n", + "{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2019-08-06T15:57:12.457000+00:00', 'errors': None, 'creationTime': '2019-08-06T15:56:43.315467+00:00', 'modifiedTime': '2019-08-06T15:57:25.740370+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 12, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_NC6'}\n" ] } ], "source": [ - "# choose a name for our cluster\n", - "cluster_name = \"gpu-cluster-nc6\"\n", - "# Remote compute (cluster) configuration. If you want to reduce costs even more, set these to small.\n", - "# For example, using Standard_DS1_v2 instead of using STANDARD_NC6\n", - "VM_SIZE = 'STANDARD_NC6'\n", - "VM_PRIORITY = 'lowpriority'\n", - "\n", - "# Cluster nodes\n", - "MIN_NODES = 0\n", - "MAX_NODES = 4\n", + "CLUSTER_NAME = \"gpu-cluster\"\n", "\n", "try:\n", - " # Retrieve if a compute target with the same cluster_name already exists\n", - " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " # Retrieve if a compute target with the same cluster name already exists\n", + " compute_target = ComputeTarget(workspace=ws, name=CLUSTER_NAME)\n", " print('Found existing compute target.')\n", + " \n", "except ComputeTargetException:\n", " # If it doesn't already exist, we create a new one with the name provided\n", " print('Creating a new compute target...')\n", " compute_config = AmlCompute.provisioning_configuration(vm_size=VM_SIZE,\n", - " min_nodes=MIN_NODES,\n", + " min_nodes=0,\n", " max_nodes=MAX_NODES)\n", "\n", " # create the cluster\n", - " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", - "\n", + " compute_target = ComputeTarget.create(ws, CLUSTER_NAME, compute_config)\n", " compute_target.wait_for_completion(show_output=True)\n", "\n", "# we can use get_status() to get a detailed status for the current cluster. \n", @@ -228,327 +214,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploading an estimated of 138 files\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/cvbp_milk_bottle.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/cvbp_water_bottle.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/example.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects.zip\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/1.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/10.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/11.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/12.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/13.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/14.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/15.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/16.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/17.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/18.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/19.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/2.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/20.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/21.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/22.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/23.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/24.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/25.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/26.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/27.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/28.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/29.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/3.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/30.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/31.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/32.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/4.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/5.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/15.jpg, 1 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/6.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/32.jpg, 2 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/7.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/cvbp_water_bottle.jpg, 3 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/8.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/11.jpg, 4 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/9.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/6.jpg, 5 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/33.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/cvbp_milk_bottle.jpg, 6 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/34.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/14.jpg, 7 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/35.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/5.jpg, 8 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/36.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/example.jpg, 9 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/37.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/18.jpg, 10 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/38.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/7.jpg, 11 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/39.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/9.jpg, 12 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/40.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/36.jpg, 13 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/41.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/13.jpg, 14 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/42.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/34.jpg, 15 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/43.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/39.jpg, 16 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/44.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/33.jpg, 17 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/45.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/41.jpg, 18 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/23.jpg, 19 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/1.jpg, 20 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/46.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/47.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/12.jpg, 21 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/48.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/49.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/16.jpg, 22 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/50.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/31.jpg, 23 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/51.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/26.jpg, 24 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/52.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/47.jpg, 25 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/53.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/44.jpg, 26 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/54.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/49.jpg, 27 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/55.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/53.jpg, 28 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/56.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/29.jpg, 29 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/57.jpg\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/56.jpg, 30 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/58.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/43.jpg, 31 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/59.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/54.jpg, 32 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/60.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/8.jpg, 33 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/61.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/55.jpg, 34 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/62.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/40.jpg, 35 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/63.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/10.jpg, 36 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/64.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/58.jpg, 37 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/100.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/42.jpg, 38 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/101.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/59.jpg, 39 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/65.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/100.jpg, 40 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/66.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/62.jpg, 41 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/67.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/48.jpg, 42 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/68.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/45.jpg, 43 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/69.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/51.jpg, 44 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/70.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/50.jpg, 45 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/71.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/46.jpg, 46 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/72.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/60.jpg, 47 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/73.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/66.jpg, 48 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/74.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/65.jpg, 49 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/75.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/25.jpg, 50 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/76.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/73.jpg, 51 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/77.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/67.jpg, 52 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/78.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/77.jpg, 53 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/79.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/52.jpg, 54 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/80.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/22.jpg, 55 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/81.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/75.jpg, 56 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/82.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/63.jpg, 57 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/83.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/78.jpg, 58 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/84.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/101.jpg, 59 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/85.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/79.jpg, 60 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/86.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/74.jpg, 61 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/87.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/61.jpg, 62 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/88.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/57.jpg, 63 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/89.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/69.jpg, 64 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/90.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/70.jpg, 65 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/91.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/68.jpg, 66 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/92.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/83.jpg, 67 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/93.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/72.jpg, 68 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/94.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/81.jpg, 69 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/95.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/64.jpg, 70 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/3.jpg, 71 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/96.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/97.jpg\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/84.jpg, 72 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/35.jpg, 73 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/98.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/99.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/71.jpg, 74 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/102.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/85.jpg, 75 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/103.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/17.jpg, 76 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/104.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/87.jpg, 77 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/105.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/94.jpg, 78 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/106.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/97.jpg, 79 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/107.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/2.jpg, 80 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/108.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/88.jpg, 81 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/109.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/28.jpg, 82 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/110.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/21.jpg, 83 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/111.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/27.jpg, 84 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/112.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/106.jpg, 85 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/113.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/86.jpg, 86 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/114.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/108.jpg, 87 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/115.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/102.jpg, 88 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/116.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/20.jpg, 89 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/117.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/76.jpg, 90 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/118.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/104.jpg, 91 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/119.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/90.jpg, 92 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/120.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/91.jpg, 93 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/121.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/93.jpg, 94 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/122.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/109.jpg, 95 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/123.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/113.jpg, 96 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/124.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/19.jpg, 97 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/125.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/92.jpg, 98 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/126.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/122.jpg, 99 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/127.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/80.jpg, 100 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/128.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/96.jpg, 101 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/119.jpg, 102 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/24.jpg, 103 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/129.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/130.jpg\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/131.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/116.jpg, 104 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/132.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/120.jpg, 105 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/133.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/121.jpg, 106 files out of an estimated total of 138\n", - "Uploading /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/134.jpg\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/89.jpg, 107 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/103.jpg, 108 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/123.jpg, 109 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/128.jpg, 110 files out of an estimated total of 138\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/118.jpg, 111 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/127.jpg, 112 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/126.jpg, 113 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/38.jpg, 114 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/133.jpg, 115 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/134.jpg, 116 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/124.jpg, 117 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/99.jpg, 118 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/131.jpg, 119 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/132.jpg, 120 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/82.jpg, 121 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/129.jpg, 122 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/98.jpg, 123 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/111.jpg, 124 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/117.jpg, 125 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/107.jpg, 126 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/4.jpg, 127 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/114.jpg, 128 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/milk_bottle/95.jpg, 129 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/125.jpg, 130 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/110.jpg, 131 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/105.jpg, 132 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/112.jpg, 133 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/can/30.jpg, 134 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/130.jpg, 135 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/carton/37.jpg, 136 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects/water_bottle/115.jpg, 137 files out of an estimated total of 138\n", - "Uploaded /Users/richinjain/projects/ComputerVision/data/fridgeObjects.zip, 138 files out of an estimated total of 138\n", - "Uploaded 138 files\n" - ] - }, - { - "data": { - "text/plain": [ - "$AZUREML_DATAREFERENCE_f63fbd85fa17436fa173eb6034cd9eb5" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# Note, all the files under DATA will be uploaded to the data store\n", - "DATA = unzip_url(Urls.fridge_objects_path, exist_ok=True)\n", - "REPS = 3\n", - "\n", "# Retrieving default datastore that got automatically created when we setup a workspace\n", "ds = ws.get_default_datastore()\n", "\n", @@ -594,7 +263,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Overwriting /Users/richinjain/projects/ComputerVision/classification/notebooks/hyperparameter/train.py\n" + "Overwriting C:\\Users\\pabuehle\\Desktop\\ComputerVision\\classification\\notebooks\\hyperparameter/train.py\n" ] } ], @@ -615,69 +284,66 @@ "\n", "run = Run.get_context()\n", "\n", + "\n", + "#------------------------------------------------------------------\n", "# Define parameters that we are going to use for training\n", - "ARCHITECTURE = models.resnet50\n", + "ARCHITECTURE = models.resnet18\n", + "EPOCHS_HEAD = 4\n", + "EPOCHS_BODY = 12\n", + "BATCH_SIZE = 16\n", + "#------------------------------------------------------------------\n", + "\n", "\n", "# Parse arguments passed by Hyperdrive\n", "parser = argparse.ArgumentParser()\n", "\n", - "\n", "# Data path\n", "parser.add_argument('--data-folder', type=str, dest='DATA_DIR', help=\"Datastore path\")\n", "parser.add_argument('--im_size', type=int, dest='IM_SIZE')\n", "parser.add_argument('--learning_rate', type=float, dest='LEARNING_RATE')\n", - "\n", "args = parser.parse_args()\n", "params = vars(args)\n", "\n", "if params['IM_SIZE'] is None:\n", " raise ValueError(\"Image Size empty\")\n", - " \n", "if params['LEARNING_RATE'] is None:\n", " raise ValueError(\"Learning Rate empty\")\n", - "\n", "if params['DATA_DIR'] is None:\n", " raise ValueError(\"Data folder empty\")\n", - " \n", "\n", + "# Getting training and validation data\n", "path = params['DATA_DIR'] + '/data/fridgeObjects'\n", - "\n", - "# Getting training and validation data and training the CNN as done in 01_training_introduction.ipynb\n", "data = (ImageList.from_folder(path)\n", - " .split_by_rand_pct(valid_pct=0.2, seed=10)\n", + " .split_by_rand_pct(valid_pct=0.5, seed=10)\n", " .label_from_folder() \n", " .transform(size=params['IM_SIZE']) \n", - " .databunch(bs=16) \n", + " .databunch(bs=BATCH_SIZE) \n", " .normalize(imagenet_stats))\n", "\n", + "# Get model and run training\n", "learn = cnn_learner(\n", " data,\n", " ARCHITECTURE,\n", " metrics=[accuracy]\n", ")\n", - "\n", - "epochs=1 # Change the value to 10 to see multiple runs, defaulting to 1 for quick run of notebook.\n", + "learn.fit_one_cycle(EPOCHS_HEAD, params['LEARNING_RATE'])\n", "learn.unfreeze()\n", - "learn.fit(epochs, params['LEARNING_RATE'])\n", + "learn.fit_one_cycle(EPOCHS_BODY, params['LEARNING_RATE'])\n", "\n", + "# Add log entries\n", "training_losses = [x.numpy().ravel()[0] for x in learn.recorder.losses]\n", - "accuracy = [x[0].numpy().ravel()[0] for x in learn.recorder.metrics][-1]\n", - "\n", - "#run.log_list('training_loss', training_losses)\n", - "#run.log_list('validation_loss', learn.recorder.val_losses)\n", - "#run.log_list('error_rate', error_rate)\n", + "accuracy = [100*x[0].numpy().ravel()[0] for x in learn.recorder.metrics][-1]\n", "run.log('data_dir',params['DATA_DIR'])\n", "run.log('im_size', params['IM_SIZE'])\n", "run.log('learning_rate', params['LEARNING_RATE'])\n", "run.log('accuracy', float(accuracy)) # Logging our primary metric 'accuracy'\n", "\n", + "# Save trained model\n", "current_directory = os.getcwd()\n", "output_folder = os.path.join(current_directory, 'outputs')\n", - "MODEL_NAME = 'im_classif_resnet50' # Name we will give our model both locally and on Azure\n", - "PICKLED_MODEL_NAME = MODEL_NAME + '.pkl'\n", + "model_name = 'im_classif_resnet' # Name we will give our model both locally and on Azure\n", "os.makedirs(output_folder, exist_ok=True)\n", - "\n", - "learn.export(os.path.join(output_folder, PICKLED_MODEL_NAME))" + "learn.export(os.path.join(output_folder, model_name + \".pkl\"))" ] }, { @@ -686,8 +352,6 @@ "source": [ "### 5. Setup and run Hyperdrive experiment\n", "\n", - "Next step is to prepare scripts that AzureML Hyperdrive will use to train and evaluate models with selected hyperparameters. To run the model notebook from the Hyperdrive Run, all we need is to prepare an entry script which parses the hyperparameter arguments, passes them to the notebook, and records the results of the notebook to AzureML Run logs. \n", - "\n", "#### 5.1 Create Experiment \n", "Experiment is the main entry point into experimenting with AzureML. To create new Experiment or get the existing one, we pass our experimentation name 'hyperparameter-tuning'.\n" ] @@ -708,10 +372,7 @@ "source": [ "#### 5.2. Define search space\n", "\n", - "Now we define the search space of hyperparameters. For example, if you want to test different batch sizes of {64, 128, 256}, you can use azureml.train.hyperdrive.choice(64, 128, 256). To search from a continuous space, use uniform(start, end). For more options, see [Hyperdrive parameter expressions](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive.parameter_expressions?view=azure-ml-py).\n", - "\n", - "In this notebook we use the ResNet50 architecture, and fix the number of epochs to 10.\n", - "In the search space, we set different learning rates and image sizes. Details about the hyperparameters can be found in [11_exploring_hyperparameters.ipynb notebook](https://github.com/microsoft/ComputerVision/blob/master/classification/notebooks/11_exploring_hyperparameters.ipynb).\n", + "Now we define the search space of hyperparameters. As shown below, to test discrete parameter values use 'choice()', and for uniform sampling use 'uniform()'. For more options, see [Hyperdrive parameter expressions](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive.parameter_expressions?view=azure-ml-py).\n", "\n", "Hyperdrive provides three different parameter sampling methods: 'RandomParameterSampling', 'GridParameterSampling', and 'BayesianParameterSampling'. Details about each method can be found [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters). Here, we use the 'RandomParameterSampling'." ] @@ -722,20 +383,13 @@ "metadata": {}, "outputs": [], "source": [ - "IM_SIZES = [299, 499]\n", - "LEARNING_RATES = [1e-3, 1e-4, 1e-5]\n", - "\n", "# Hyperparameter search space\n", "param_sampling = RandomParameterSampling( {\n", - " '--learning_rate': choice(LEARNING_RATES),\n", + " '--learning_rate': uniform(LEARNING_RATE_MIN, LEARNING_RATE_MAX),\n", " '--im_size': choice(IM_SIZES)\n", " }\n", ")\n", "\n", - "primary_metric_name = 'accuracy'\n", - "primary_metric_goal = PrimaryMetricGoal.MAXIMIZE\n", - "max_concurrent_runs=4\n", - "\n", "early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=20)" ] }, @@ -781,7 +435,7 @@ "- early termination policy, in this case we use [Bandit Policy](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters#bandit-policy)\n", "- primary metric name reported by our runs, in this case it is accuracy \n", "- the goal, which determines whether the primary metric has to be maximized/minimized, in this case it is to maximize our accuracy \n", - "- number of total child-runs, in this case it is 4\n", + "- number of total child-runs\n", "\n", "The bigger the search space, the more child-runs get triggered for better results." ] @@ -795,10 +449,10 @@ "hyperdrive_run_config = HyperDriveConfig(estimator=est,\n", " hyperparameter_sampling=param_sampling,\n", " policy=early_termination_policy,\n", - " primary_metric_name=primary_metric_name,\n", - " primary_metric_goal=primary_metric_goal,\n", - " max_total_runs=max_total_runs,\n", - " max_concurrent_runs= max_concurrent_runs)" + " primary_metric_name='accuracy',\n", + " primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n", + " max_total_runs=MAX_TOTAL_RUNS,\n", + " max_concurrent_runs=MAX_NODES)" ] }, { @@ -816,7 +470,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fff89f7fb8284f24a94932ca876cbae2", + "model_id": "5c51804ba4794f3aa163354fef634c59", "version_major": 2, "version_minor": 0 }, @@ -826,25 +480,12 @@ }, "metadata": {}, "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9a3c69449ea34e48a4e0c884ced34538", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ "# Now we submit the Run to our experiment. \n", "hyperdrive_run = exp.submit(config=hyperdrive_run_config)\n", + "\n", "# We can see the experiment progress from this notebook by using \n", "widgets.RunDetails(hyperdrive_run).show()" ] @@ -857,18 +498,18 @@ { "data": { "text/plain": [ - "{'runId': 'hyperparameter-tuning_1563770544897',\n", - " 'target': 'gpu-cluster-nc6',\n", + "{'runId': 'hyperparameter-tuning_1565107066432',\n", + " 'target': 'gpu-cluster',\n", " 'status': 'Completed',\n", - " 'startTimeUtc': '2019-07-22T04:42:25.393015Z',\n", - " 'endTimeUtc': '2019-07-22T04:49:58.250673Z',\n", + " 'startTimeUtc': '2019-08-06T15:57:46.90426Z',\n", + " 'endTimeUtc': '2019-08-06T16:13:21.185098Z',\n", " 'properties': {'primary_metric_config': '{\"name\": \"accuracy\", \"goal\": \"maximize\"}',\n", " 'runTemplate': 'HyperDrive',\n", " 'azureml.runsource': 'hyperdrive',\n", " 'platform': 'AML',\n", - " 'baggage': 'eyJvaWQiOiAiNmY1Yjc5M2UtZjhiOS00NGY0LTk0N2YtNTg3N2ZjMDFjZmFjIiwgInRpZCI6ICI3MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDciLCAidW5hbWUiOiAiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2In0',\n", - " 'ContentSnapshotId': 'a63feca7-742e-49c3-b568-9cf6a53b34c3'},\n", - " 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://smoketesstorage0231aa20c.blob.core.windows.net/azureml/ExperimentRun/dcid.hyperparameter-tuning_1563770544897/azureml-logs/hyperdrive.txt?sv=2018-03-28&sr=b&sig=LL8Fx6UZhJ9jddaqS1xeR%2BHi98wUHPZ%2FYuAxGH3Y39I%3D&st=2019-07-22T04%3A39%3A59Z&se=2019-07-22T12%3A49%3A59Z&sp=r'}}" + " 'baggage': 'eyJvaWQiOiAiNWFlYTJmMzAtZjQxZC00ZDA0LWJiOGUtOWU0NGUyZWQzZGQ2IiwgInRpZCI6ICI3MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDciLCAidW5hbWUiOiAiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2In0',\n", + " 'ContentSnapshotId': 'c662f56a-ff58-432e-b732-8a3bc6818778'},\n", + " 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://pabuehlestorage1c7e31216.blob.core.windows.net/azureml/ExperimentRun/dcid.hyperparameter-tuning_1565107066432/azureml-logs/hyperdrive.txt?sv=2018-11-09&sr=b&sig=8D2gwxb%2BYn7nbzgGVHE7QSzJ%2FG7C1swzmLD7%2Fior2vE%3D&st=2019-08-06T17%3A36%3A08Z&se=2019-08-07T01%3A46%3A08Z&sp=r'}}" ] }, "execution_count": 14, @@ -877,7 +518,7 @@ } ], "source": [ - "hyperdrive_run.wait_for_completion()\n" + "hyperdrive_run.wait_for_completion()" ] }, { @@ -911,15 +552,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "* Best Run Id:hyperparameter-tuning_1563770544897_0\n", + "* Best Run Id:hyperparameter-tuning_1565107066432_8\n", "Run(Experiment: hyperparameter-tuning,\n", - "Id: hyperparameter-tuning_1563770544897_0,\n", + "Id: hyperparameter-tuning_1565107066432_8,\n", "Type: azureml.scriptrun,\n", "Status: Completed)\n", "\n", "* Best hyperparameters:\n", - "{'--data-folder': '$AZUREML_DATAREFERENCE_workspaceblobstore', '--im_size': '299', '--learning_rate': '0.001'}\n", - "Accuracy = 0.26923078298568726\n" + "{'--data-folder': '$AZUREML_DATAREFERENCE_workspaceblobstore', '--im_size': '150', '--learning_rate': '0.000552896672441507'}\n", + "Accuracy = 92.53731369972229\n" ] } ], @@ -956,8 +597,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Downloading outputs/im_classif_resnet50.pkl..\n", - "119547037146038801333356\n" + "Downloading outputs/im_classif_resnet.pkl..\n" ] } ], @@ -968,11 +608,10 @@ "os.makedirs(output_folder, exist_ok=True)\n", "\n", "for f in best_run.get_file_names():\n", - " if f.startswith('outputs/im_classif_resnet50'):\n", + " if f.startswith('outputs/im_classif_resnet'):\n", " print(\"Downloading {}..\".format(f))\n", - " best_run.download_file('outputs/im_classif_resnet50.pkl')\n", - "saved_model =joblib.load('im_classif_resnet50.pkl')\n", - "print(saved_model)" + " best_run.download_file('outputs/im_classif_resnet.pkl')\n", + "saved_model =joblib.load('im_classif_resnet.pkl')" ] }, { @@ -984,12 +623,27 @@ "saved_model.predict(image)\n", "```" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7. Clean up\n", + "\n", + "To avoid unnecessary expenses, all resources which were created in this notebook need to get deleted once parameter search is concluded. To simplify this clean-up step, we recommend creating a new resource group to run this notebook. This resource group can then be deleted, e.g. using the Azure Portal, which will remove all created resources." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { - "celltoolbar": "Tags", "kernelspec": { - "display_name": "cv", + "display_name": "Python (cv)", "language": "python", "name": "cv" }, diff --git a/similarity/README.md b/similarity/README.md index 59a76bf..e089ebc 100644 --- a/similarity/README.md +++ b/similarity/README.md @@ -6,6 +6,12 @@ The majority of state-of-the-art systems for image similarity use DNNs to comput A major difference between modern image similarity approaches is how the DNN is trained. A simple but quite powerful approach is to use a standard image classification loss - this is the approach taken in this repository, and explained in the [classification](../classification/README.md) folder. More accurate similarity measures are based on DNNs which are trained explicitly for image similarity, such as the [FaceNet](https://arxiv.org/pdf/1503.03832.pdf) work which uses a Siamese network architecture. FaceNet-like approaches will be added to this repository at a later point. + +## Frequently asked questions + +Answers to Frequently Asked Questions such as "How many images do I need to train a model?" or "How to annotate images?" can be found in the [FAQ.md](FAQ.md) file. For image classification specified questions, see the [FAQ.md](../classification/FAQ.md) in the classification folder. + + ## Notebooks We provide several notebooks to show how image similarity algorithms can be designed and evaluated. @@ -14,11 +20,10 @@ We provide several notebooks to show how image similarity algorithms can be desi | --- | --- | | [00_webcam.ipynb](./notebooks/00_webcam.ipynb)| Quick start notebook which demonstrates how to build an image retrieval system using a single image or webcam as input. | [01_training_and_evaluation_introduction.ipynb](./notebooks/01_training_and_evaluation_introduction.ipynb)| Notebook which explains the basic concepts around model training and evaluation, based on using DNNs trained for image classification.| +| [11_exploring_hyperparameters.ipynb](notebooks/11_exploring_hyperparameters.ipynb)| Finds optimal model parameters using grid search. | + ## Coding guidelines See the [coding guidelines](../classification/#coding-guidelines) in the image classification folder. -## Frequently asked questions - -Answers to Frequently Asked Questions such as "How many images do I need to train a model?" or "How to annotate images?" can be found in the [FAQ.md](FAQ.md) file. For image classification specified questions, see the [FAQ.md](../classification/FAQ.md) in the classification folder. diff --git a/similarity/notebooks/11_exploring_hyperparameters.ipynb b/similarity/notebooks/11_exploring_hyperparameters.ipynb new file mode 100644 index 0000000..60ba418 --- /dev/null +++ b/similarity/notebooks/11_exploring_hyperparameters.ipynb @@ -0,0 +1,974 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing different Hyperparameters and Benchmarking" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we'll cover how to test different hyperparameters for a particular dataset and how to benchmark different parameters across a group of datasets. Note that this re-uses functionality which was already introduced and described in the [classification/notebooks/11_exploring_hyperparameters.ipynb](../../classification/notebooks/11_exploring_hyperparameters.ipynb) notebook. **Please refer to that notebook for all explanations, which this notebook will not repeat.**\n", + "\n", + "For an example of how to scale up with remote GPU clusters on Azure Machine Learning, please view [24_exploring_hyperparameters_on_azureml.ipynb](../../classification/notebooks/24_exploring_hyperparameters_on_azureml.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing hyperparameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ensure edits to libraries are loaded and plotting is shown in the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by importing the utilities we need." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1.0.48'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "import numpy as np\n", + "import scrapbook as sb\n", + "import fastai\n", + "from fastai.vision import DatasetType\n", + "\n", + "sys.path.append(\"../../\")\n", + "from utils_cv.classification.data import Urls\n", + "from utils_cv.common.data import unzip_url\n", + "from utils_cv.classification.parameter_sweeper import ParameterSweeper, clean_sweeper_df, plot_sweeper_df\n", + "from utils_cv.similarity.data import comparative_set_builder\n", + "from utils_cv.similarity.metrics import positive_image_ranks\n", + "from utils_cv.similarity.model import compute_features_learner\n", + "\n", + "fastai.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the datasets and parameters we will use in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "DATA_PATHS = [unzip_url(Urls.fridge_objects_path, exist_ok=True), unzip_url(Urls.fridge_objects_watermark_path, exist_ok=True)]\n", + "REPS = 3\n", + "LEARNING_RATES = [1e-3, 1e-4, 1e-5]\n", + "IM_SIZES = [300, 500]\n", + "EPOCHS = [10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Similiarity accuracy metric\n", + "\n", + "For image classification, we used the percentage of correctly labeled images to measure accuracy. For image retrieval, our measure is the rank of the positive example among a large number of negatives. This was described in the [01_training_and_evaluation_introduction.ipynb](01_training_and_evaluation_introduction.ipynb) notebook, and we will re-use some of the code from that notebook in the definition of the _retrieval_rank()_ function below." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def retrieval_rank(learn):\n", + " data = learn.data\n", + "\n", + " # Build multiple sets of comparative images from the validation images\n", + " comparative_sets = comparative_set_builder(\n", + " data.valid_ds, num_sets=1000, num_negatives=99\n", + " )\n", + "\n", + " # Compute DNN features for all validation images\n", + " embedding_layer = learn.model[1][6]\n", + " valid_features = compute_features_learner(\n", + " data, DatasetType.Valid, learn, embedding_layer\n", + " )\n", + "\n", + " # For each comparative set compute the distances between the query image and all reference images\n", + " for cs in comparative_sets:\n", + " cs.compute_distances(valid_features)\n", + "\n", + " # Compute the median rank of the positive example over all comparative sets\n", + " ranks = positive_image_ranks(comparative_sets)\n", + " median_rank = np.median(ranks)\n", + " return median_rank" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using Python " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by creating the Parameter Sweeper object. Before we start testing, it's a good idea to see what the default parameters are. We can use a the property `parameters` to easily see those default values." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "OrderedDict([('learning_rate', [0.0001]),\n", + " ('epochs', [15]),\n", + " ('batch_size', [16]),\n", + " ('im_size', [299]),\n", + " ('architecture',\n", + " [)>]),\n", + " ('transform', [True]),\n", + " ('dropout', [0.5]),\n", + " ('weight_decay', [0.01]),\n", + " ('training_schedule',\n", + " []),\n", + " ('discriminative_lr', [False]),\n", + " ('one_cycle_policy', [True])])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sweeper = ParameterSweeper(metric_name=\"rank\")\n", + "sweeper.parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we know the defaults, we can pass it the parameters we want to test, and run the parameter sweep." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "this Learner object self-destroyed - it still exists, but no longer usable\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
durationrank
0PARAMETERS [learning_rate: 0.0001]|[epochs: 10]|[batch_size: 16]|[im_size: 300]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects20.31468711.0
fridgeObjectsWatermark21.80125218.0
PARAMETERS [learning_rate: 0.0001]|[epochs: 10]|[batch_size: 16]|[im_size: 500]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects29.78926910.0
fridgeObjectsWatermark28.81683821.0
PARAMETERS [learning_rate: 0.001]|[epochs: 10]|[batch_size: 16]|[im_size: 300]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects21.5935791.0
fridgeObjectsWatermark18.5680011.0
PARAMETERS [learning_rate: 0.001]|[epochs: 10]|[batch_size: 16]|[im_size: 500]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects30.7427571.0
fridgeObjectsWatermark29.5538851.0
PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|[batch_size: 16]|[im_size: 300]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects18.39206128.0
fridgeObjectsWatermark18.45886131.0
PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|[batch_size: 16]|[im_size: 500]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects30.84810728.0
fridgeObjectsWatermark29.85236232.0
1PARAMETERS [learning_rate: 0.0001]|[epochs: 10]|[batch_size: 16]|[im_size: 300]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects21.64626115.0
fridgeObjectsWatermark21.59038110.0
PARAMETERS [learning_rate: 0.0001]|[epochs: 10]|[batch_size: 16]|[im_size: 500]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects32.75498314.0
fridgeObjectsWatermark32.17298523.0
PARAMETERS [learning_rate: 0.001]|[epochs: 10]|[batch_size: 16]|[im_size: 300]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects20.3322461.0
fridgeObjectsWatermark18.1994351.0
PARAMETERS [learning_rate: 0.001]|[epochs: 10]|[batch_size: 16]|[im_size: 500]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects30.5250141.0
fridgeObjectsWatermark32.1388383.0
PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|[batch_size: 16]|[im_size: 300]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects21.81931622.0
fridgeObjectsWatermark20.91263722.0
PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|[batch_size: 16]|[im_size: 500]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects30.99799836.5
fridgeObjectsWatermark31.90061126.0
2PARAMETERS [learning_rate: 0.0001]|[epochs: 10]|[batch_size: 16]|[im_size: 300]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects18.01549613.0
fridgeObjectsWatermark19.94110515.0
PARAMETERS [learning_rate: 0.0001]|[epochs: 10]|[batch_size: 16]|[im_size: 500]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects27.32765729.0
fridgeObjectsWatermark27.34250514.0
PARAMETERS [learning_rate: 0.001]|[epochs: 10]|[batch_size: 16]|[im_size: 300]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects21.6607621.0
fridgeObjectsWatermark21.6554341.0
PARAMETERS [learning_rate: 0.001]|[epochs: 10]|[batch_size: 16]|[im_size: 500]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects32.0692941.0
fridgeObjectsWatermark34.7714101.0
PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|[batch_size: 16]|[im_size: 300]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects21.24450225.0
fridgeObjectsWatermark18.23778930.0
PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|[batch_size: 16]|[im_size: 500]|[arch: resnet18]|[transforms: True]|[dropout: 0.5]|[weight_decay: 0.01]|[training_schedule: head_first_then_body]|[discriminative_lr: False]|[one_cycle_policy: True]fridgeObjects28.96686128.0
fridgeObjectsWatermark29.37790434.0
\n", + "
" + ], + "text/plain": [ + " duration \\\n", + "0 PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 20.314687 \n", + " fridgeObjectsWatermark 21.801252 \n", + " PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 29.789269 \n", + " fridgeObjectsWatermark 28.816838 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 21.593579 \n", + " fridgeObjectsWatermark 18.568001 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 30.742757 \n", + " fridgeObjectsWatermark 29.553885 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 18.392061 \n", + " fridgeObjectsWatermark 18.458861 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 30.848107 \n", + " fridgeObjectsWatermark 29.852362 \n", + "1 PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 21.646261 \n", + " fridgeObjectsWatermark 21.590381 \n", + " PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 32.754983 \n", + " fridgeObjectsWatermark 32.172985 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 20.332246 \n", + " fridgeObjectsWatermark 18.199435 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 30.525014 \n", + " fridgeObjectsWatermark 32.138838 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 21.819316 \n", + " fridgeObjectsWatermark 20.912637 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 30.997998 \n", + " fridgeObjectsWatermark 31.900611 \n", + "2 PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 18.015496 \n", + " fridgeObjectsWatermark 19.941105 \n", + " PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 27.327657 \n", + " fridgeObjectsWatermark 27.342505 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 21.660762 \n", + " fridgeObjectsWatermark 21.655434 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 32.069294 \n", + " fridgeObjectsWatermark 34.771410 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 21.244502 \n", + " fridgeObjectsWatermark 18.237789 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 28.966861 \n", + " fridgeObjectsWatermark 29.377904 \n", + "\n", + " rank \n", + "0 PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 11.0 \n", + " fridgeObjectsWatermark 18.0 \n", + " PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 10.0 \n", + " fridgeObjectsWatermark 21.0 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 1.0 \n", + " fridgeObjectsWatermark 1.0 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 1.0 \n", + " fridgeObjectsWatermark 1.0 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 28.0 \n", + " fridgeObjectsWatermark 31.0 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 28.0 \n", + " fridgeObjectsWatermark 32.0 \n", + "1 PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 15.0 \n", + " fridgeObjectsWatermark 10.0 \n", + " PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 14.0 \n", + " fridgeObjectsWatermark 23.0 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 1.0 \n", + " fridgeObjectsWatermark 1.0 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 1.0 \n", + " fridgeObjectsWatermark 3.0 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 22.0 \n", + " fridgeObjectsWatermark 22.0 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 36.5 \n", + " fridgeObjectsWatermark 26.0 \n", + "2 PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 13.0 \n", + " fridgeObjectsWatermark 15.0 \n", + " PARAMETERS [learning_rate: 0.0001]|[epochs: 10]... fridgeObjects 29.0 \n", + " fridgeObjectsWatermark 14.0 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 1.0 \n", + " fridgeObjectsWatermark 1.0 \n", + " PARAMETERS [learning_rate: 0.001]|[epochs: 10]|... fridgeObjects 1.0 \n", + " fridgeObjectsWatermark 1.0 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 25.0 \n", + " fridgeObjectsWatermark 30.0 \n", + " PARAMETERS [learning_rate: 1e-05]|[epochs: 10]|... fridgeObjects 28.0 \n", + " fridgeObjectsWatermark 34.0 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sweeper.update_parameters(learning_rate=LEARNING_RATES, im_size=IM_SIZES, epochs=EPOCHS)\n", + "df = sweeper.run(datasets=DATA_PATHS, reps=REPS, metric_fct=retrieval_rank); \n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualize Results " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When we read in multi-index dataframe, index 0 represents the run number, index 1 represents a single permutation of parameters, and index 2 represents the dataset. To see the results, show the df using the `clean_sweeper_df` helper function. This will display all the hyperparameters in a nice, readable way." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df = clean_sweeper_df(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since we've run our benchmarking over 3 repetitions, we may want to just look at the averages across the different __run numbers__." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
P: [learning_rate: 0.0001] [im_size: 300]P: [learning_rate: 0.0001] [im_size: 500]P: [learning_rate: 0.001] [im_size: 300]P: [learning_rate: 0.001] [im_size: 500]P: [learning_rate: 1e-05] [im_size: 300]P: [learning_rate: 1e-05] [im_size: 500]
fridgeObjectsfridgeObjectsWatermarkfridgeObjectsfridgeObjectsWatermarkfridgeObjectsfridgeObjectsWatermarkfridgeObjectsfridgeObjectsWatermarkfridgeObjectsfridgeObjectsWatermarkfridgeObjectsfridgeObjectsWatermark
duration19.99214821.11091329.95730329.44410921.19552919.4742931.11235532.15471120.48529319.20309630.27098930.376959
rank13.00000014.33333317.66666719.3333331.0000001.000001.0000001.66666725.00000027.66666730.83333330.666667
\n", + "
" + ], + "text/plain": [ + " P: [learning_rate: 0.0001] [im_size: 300] \\\n", + " fridgeObjects fridgeObjectsWatermark \n", + "duration 19.992148 21.110913 \n", + "rank 13.000000 14.333333 \n", + "\n", + " P: [learning_rate: 0.0001] [im_size: 500] \\\n", + " fridgeObjects fridgeObjectsWatermark \n", + "duration 29.957303 29.444109 \n", + "rank 17.666667 19.333333 \n", + "\n", + " P: [learning_rate: 0.001] [im_size: 300] \\\n", + " fridgeObjects fridgeObjectsWatermark \n", + "duration 21.195529 19.47429 \n", + "rank 1.000000 1.00000 \n", + "\n", + " P: [learning_rate: 0.001] [im_size: 500] \\\n", + " fridgeObjects fridgeObjectsWatermark \n", + "duration 31.112355 32.154711 \n", + "rank 1.000000 1.666667 \n", + "\n", + " P: [learning_rate: 1e-05] [im_size: 300] \\\n", + " fridgeObjects fridgeObjectsWatermark \n", + "duration 20.485293 19.203096 \n", + "rank 25.000000 27.666667 \n", + "\n", + " P: [learning_rate: 1e-05] [im_size: 500] \n", + " fridgeObjects fridgeObjectsWatermark \n", + "duration 30.270989 30.376959 \n", + "rank 30.833333 30.666667 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.mean(level=(1,2)).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print the average accuracy over the different runs for each dataset independently." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = df.mean(level=(1,2))[\"rank\"].unstack().plot(kind='bar', figsize=(12, 6))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Additionally, we may want simply to see which set of hyperparameters perform the best across the different __datasets__. We can do that by averaging the results of the different datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
P: [learning_rate: 0.0001] [im_size: 300]P: [learning_rate: 0.0001] [im_size: 500]P: [learning_rate: 0.001] [im_size: 300]P: [learning_rate: 0.001] [im_size: 500]P: [learning_rate: 1e-05] [im_size: 300]P: [learning_rate: 1e-05] [im_size: 500]
duration20.55153029.70070620.3349131.63353319.84419430.323974
rank13.66666718.5000001.000001.33333326.33333330.750000
\n", + "
" + ], + "text/plain": [ + " P: [learning_rate: 0.0001] [im_size: 300] \\\n", + "duration 20.551530 \n", + "rank 13.666667 \n", + "\n", + " P: [learning_rate: 0.0001] [im_size: 500] \\\n", + "duration 29.700706 \n", + "rank 18.500000 \n", + "\n", + " P: [learning_rate: 0.001] [im_size: 300] \\\n", + "duration 20.33491 \n", + "rank 1.00000 \n", + "\n", + " P: [learning_rate: 0.001] [im_size: 500] \\\n", + "duration 31.633533 \n", + "rank 1.333333 \n", + "\n", + " P: [learning_rate: 1e-05] [im_size: 300] \\\n", + "duration 19.844194 \n", + "rank 26.333333 \n", + "\n", + " P: [learning_rate: 1e-05] [im_size: 500] \n", + "duration 30.323974 \n", + "rank 30.750000 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.mean(level=(1)).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To make it easier to see which permutation did the best, we can plot the results using the `plot_sweeper_df` helper function. This plot will help us easily see which parameters offer the highest accuracies." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_sweeper_df(df.mean(level=(1)), sort_by=\"rank\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 36, + "encoder": "json", + "name": "nr_elements", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "nr_elements" + } + }, + "output_type": "display_data" + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": [ + 13.666666666666666, + 18.5, + 1, + 1.3333333333333333, + 26.333333333333332, + 30.75 + ], + "encoder": "json", + "name": "ranks", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "ranks" + } + }, + "output_type": "display_data" + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 34.771409999999996, + "encoder": "json", + "name": "max_duration", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "max_duration" + } + }, + "output_type": "display_data" + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 18.015496, + "encoder": "json", + "name": "min_duration", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "min_duration" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "# Preserve some of the notebook outputs\n", + "sb.glue(\"nr_elements\", len(df))\n", + "sb.glue(\"ranks\", list(df.mean(level=(1))[\"rank\"]))\n", + "sb.glue(\"max_duration\", df.max().duration)\n", + "sb.glue(\"min_duration\", df.min().duration)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (cv)", + "language": "python", + "name": "cv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/conftest.py b/tests/conftest.py index 835c559..731666c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -85,7 +85,7 @@ def classification_notebooks(): ), "24_exploring_hyperparameters_on_azureml": os.path.join( folder_notebooks, "24_exploring_hyperparameters_on_azureml.ipynb" - ) + ), } return paths @@ -100,6 +100,9 @@ def similarity_notebooks(): "01": os.path.join( folder_notebooks, "01_training_and_evaluation_introduction.ipynb" ), + "11": os.path.join( + folder_notebooks, "11_exploring_hyperparameters.ipynb" + ), } return paths @@ -252,14 +255,16 @@ def testing_databunch(tmp_session): def pytest_addoption(parser): - parser.addoption("--subscription_id", - help="Azure Subscription Id to create resources in") - parser.addoption("--resource_group", - help="Name of the resource group") - parser.addoption("--workspace_name", - help="Name of Azure ML Workspace") - parser.addoption("--workspace_region", - help="Azure region to create the workspace in") + parser.addoption( + "--subscription_id", + help="Azure Subscription Id to create resources in", + ) + parser.addoption("--resource_group", help="Name of the resource group") + parser.addoption("--workspace_name", help="Name of Azure ML Workspace") + parser.addoption( + "--workspace_region", help="Azure region to create the workspace in" + ) + @pytest.fixture def subscription_id(request): diff --git a/tests/integration/similarity/test_integration_similarity_notebooks.py b/tests/integration/similarity/test_integration_similarity_notebooks.py index f261e16..a3fa929 100644 --- a/tests/integration/similarity/test_integration_similarity_notebooks.py +++ b/tests/integration/similarity/test_integration_similarity_notebooks.py @@ -23,3 +23,23 @@ def test_01_notebook_run(similarity_notebooks): nb_output = sb.read_notebook(OUTPUT_NOTEBOOK) assert nb_output.scraps["median_rank"].data <= 10 + + +@pytest.mark.notebooks +@pytest.mark.linuxgpu +def test_11_notebook_run(similarity_notebooks, tiny_ic_data_path): + notebook_path = similarity_notebooks["11"] + pm.execute_notebook( + notebook_path, + OUTPUT_NOTEBOOK, + parameters=dict( + PM_VERSION=pm.__version__, + # Speed up testing since otherwise would take ~12 minutes on V100 + DATA_PATHS=[tiny_ic_data_path], + REPS=1, + IM_SIZES=[60, 100], + ), + kernel_name=KERNEL_NAME, + ) + nb_output = sb.read_notebook(OUTPUT_NOTEBOOK) + assert min(nb_output.scraps["ranks"].data) <= 30 \ No newline at end of file diff --git a/tests/smoke/test_azureml_notebooks.py b/tests/smoke/test_azureml_notebooks.py index cf623bf..0b63b08 100644 --- a/tests/smoke/test_azureml_notebooks.py +++ b/tests/smoke/test_azureml_notebooks.py @@ -113,9 +113,11 @@ def test_24_notebook_run( subscription_id, resource_group, workspace_name, - workspace_region + workspace_region, ): - notebook_path = classification_notebooks["24_exploring_hyperparameters_on_azureml"] + notebook_path = classification_notebooks[ + "24_exploring_hyperparameters_on_azureml" + ] pm.execute_notebook( notebook_path, OUTPUT_NOTEBOOK, @@ -125,8 +127,9 @@ def test_24_notebook_run( resource_group=resource_group, workspace_name=workspace_name, workspace_region=workspace_region, - epochs=1, - max_total_runs=1 + MAX_NODES=2, + MAX_TOTAL_RUNS=1, + IM_SIZES=[30, 40], ), kernel_name=KERNEL_NAME, ) diff --git a/tests/unit/similarity/test_similarity_notebooks.py b/tests/unit/similarity/test_similarity_notebooks.py index ab1bd3b..e281d64 100644 --- a/tests/unit/similarity/test_similarity_notebooks.py +++ b/tests/unit/similarity/test_similarity_notebooks.py @@ -47,4 +47,21 @@ def test_01_notebook_run(similarity_notebooks, tiny_ic_data_path): ), kernel_name=KERNEL_NAME, ) - nb_output = sb.read_notebook(OUTPUT_NOTEBOOK) + + +@pytest.mark.notebooks +def test_11_notebook_run(similarity_notebooks, tiny_ic_data_path): + notebook_path = similarity_notebooks["11"] + pm.execute_notebook( + notebook_path, + OUTPUT_NOTEBOOK, + parameters=dict( + PM_VERSION=pm.__version__, + DATA_PATHS=[tiny_ic_data_path], + REPS=1, + LEARNING_RATES=[1e-4], + IM_SIZES=[30], + EPOCHS=[1], + ), + kernel_name=KERNEL_NAME, + ) diff --git a/tools/repo_metrics/README.md b/tools/repo_metrics/README.md index 102529d..6f684b9 100644 --- a/tools/repo_metrics/README.md +++ b/tools/repo_metrics/README.md @@ -1,8 +1,8 @@ # Repository Metrics -[![Build Status](https://dev.azure.com/best-practices/computervision/_apis/build/status/repo-metrics?branchName=master)](https://dev.azure.com/best-practices/computervision/_build/latest?definitionId=27&branchName=master) +[![Build Status](https://dev.azure.com/best-practices/computervision/_apis/build/status/repo-metrics?branchName=staging)](https://dev.azure.com/best-practices/computervision/_build/latest?definitionId=27&branchName=staging) -We developed a script that allows us to track the metrics of the ComputerVisionBestPractices repo. Some of the metrics we can track are listed here: +We developed a script that allows us to track the repo metrics. Some of the metrics we can track are listed here: * Number of stars * Number of forks @@ -10,17 +10,27 @@ We developed a script that allows us to track the metrics of the ComputerVisionB * Number of views * Number of lines of code -To see the full list of metrics, see [git_stats.py](scripts/repo_metrics/git_stats.py) +To see the full list of metrics, see [git_stats.py](git_stats.py) The first step is to set up the credentials, copy the configuration file and fill up the credentials of GitHub and CosmosDB: - cp scripts/repo_metrics/config_template.py scripts/repo_metrics/config.py + cp tools/repo_metrics/config_template.py tools/repo_metrics/config.py To track the current state of the repository and save it to CosmosDB: - python scripts/repo_metrics/track_metrics.py --github_repo "https://github.com/Microsoft/ComputerVision" --save_to_database + python tools/repo_metrics/track_metrics.py --github_repo "https://github.com/Microsoft/ComputerVision" --save_to_database To track an event related to this repository and save it to CosmosDB: - python scripts/repo_metrics/track_metrics.py --event "Today we did our first blog of the project" --event_date 2018-12-01 --save_to_database + python tools/repo_metrics/track_metrics.py --event "Today we did our first blog of the project" --event_date 2018-12-01 --save_to_database + + +### Setting up Azure CosmosDB + +The API that we is used to track the GitHub metrics is the [Mongo API](https://docs.microsoft.com/en-us/azure/cosmos-db/mongodb-introduction). + +The database name and collections name are defined in the [config file](config_template.py). There are two main collections, defined as `COLLECTION_GITHUB_STATS` and `COLLECTION_EVENTS` to store the information defined on the previous section. + +**IMPORTANT NOTE**: If the database and the collections are created directly through the portal, a common partition key should be defined. We recommend to use `date` as partition key. + diff --git a/tools/repo_metrics/config_template.py b/tools/repo_metrics/config_template.py index 03efb45..1b6b42b 100644 --- a/tools/repo_metrics/config_template.py +++ b/tools/repo_metrics/config_template.py @@ -3,10 +3,12 @@ # Github token # More info: https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/ -GITHUB_TOKEN = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" +GITHUB_TOKEN = "" # CosmosDB Mongo API -CONNECTION_STRING = "mongodb://XXXXXXXXXXXXXXXXXXXXXXXXX.documents.azure.com:10255/?ssl=true&replicaSet=globaldb" +# * Azure Portal: Settings -> Connection String -> PRIMARY CONNECTION STRING +# * For example, 'mongodb://:@:/?ssl=true&replicaSet=globaldb' +CONNECTION_STRING = "" DATABASE = "cv_stats" COLLECTION_GITHUB_STATS = "github_stats" COLLECTION_EVENTS = "events" diff --git a/tools/repo_metrics/track_metrics.py b/tools/repo_metrics/track_metrics.py index 5f5b181..b0e40fd 100644 --- a/tools/repo_metrics/track_metrics.py +++ b/tools/repo_metrics/track_metrics.py @@ -14,7 +14,6 @@ import logging from datetime import datetime from dateutil.parser import isoparse from pymongo import MongoClient -from datetime import datetime from tools.repo_metrics.git_stats import Github from tools.repo_metrics.config import ( GITHUB_TOKEN, @@ -32,6 +31,7 @@ log = logging.getLogger() def parse_args(): """Argument parser. + Returns: obj: Parser. """ @@ -61,12 +61,14 @@ def parse_args(): def connect(uri="mongodb://localhost"): """Mongo connector. + Args: uri (str): Connection string. + Returns: obj: Mongo client. """ - client = MongoClient(uri, serverSelectionTimeoutMS=1000) + client = MongoClient(uri, serverSelectionTimeoutMS=5000) # Send a query to the server to see if the connection is working. try: @@ -78,9 +80,11 @@ def connect(uri="mongodb://localhost"): def event_as_dict(event, date): """Encodes an string event input as a dictionary with the date. + Args: event (str): Details of a event. date (datetime): Date of the event. + Returns: dict: Dictionary with the event and the date. """ @@ -89,8 +93,10 @@ def event_as_dict(event, date): def github_stats_as_dict(github): """Encodes Github statistics as a dictionary with the date. + Args: obj: Github object. + Returns: dict: Dictionary with Github details and the date. """ @@ -125,6 +131,7 @@ def github_stats_as_dict(github): def tracker(args): """Main function to track metrics. + Args: args (obj): Parsed arguments. """ diff --git a/utils_cv/classification/parameter_sweeper.py b/utils_cv/classification/parameter_sweeper.py index 63ee471..8ce8ad7 100644 --- a/utils_cv/classification/parameter_sweeper.py +++ b/utils_cv/classification/parameter_sweeper.py @@ -195,7 +195,7 @@ class ParameterSweeper: one_cycle_policy=True, ) - def __init__(self, **kwargs) -> None: + def __init__(self, metric_name="accuracy", **kwargs) -> None: """ Initialize class with default params if kwargs is empty. Otherwise, initialize params with kwargs. @@ -214,6 +214,8 @@ class ParameterSweeper: one_cycle_policy=[self.default_params.get("one_cycle_policy")], ) + self.metric_name = metric_name + self.param_order = tuple(self.params.keys()) self.update_parameters(**kwargs) @@ -411,8 +413,8 @@ class ParameterSweeper: Otherwise overwrite the corresponding self.params key. """ for k, v in kwargs.items(): - if k not in self.params.keys(): - raise Exception("Parameter {k} is invalid.") + if k not in set(self.params.keys()): + raise Exception(f"Parameter {k} is invalid.") if v is None: continue self.params[k] = v @@ -420,7 +422,11 @@ class ParameterSweeper: return self def run( - self, datasets: List[Path], reps: int = 3, early_stopping: bool = False + self, + datasets: List[Path], + reps: int = 3, + early_stopping: bool = False, + metric_fct=None, ) -> pd.DataFrame: """ Performs the experiment. Iterates through the number of specified , the list permutations @@ -440,8 +446,8 @@ class ParameterSweeper: res = dict() for rep in range(reps): - res[rep] = dict() + for i, permutation in enumerate(self.permutations): print( f"Running {i+1} of {len(self.permutations)} permutations. " @@ -462,15 +468,20 @@ class ParameterSweeper: dataset, permutation, early_stopping ) - _, metric = learn.validate( - learn.data.valid_dl, metrics=[accuracy] - ) + if metric_fct is None: + _, metric = learn.validate( + learn.data.valid_dl, metrics=[accuracy] + ) + + else: + metric = metric_fct(learn) res[rep][stringified_permutation][data_name][ "duration" ] = duration + res[rep][stringified_permutation][data_name][ - "accuracy" + self.metric_name ] = float(metric) learn.destroy()