Adds python files from jupytext. Updates gitignore

2018-10-08 09:58:52 +00:00 · 2018-10-08 09:58:52 +00:00 · 05ce5d72f4
--- a/.gitignore
+++ b/.gitignore
@ -105,4 +105,5 @@ ENV/
 synset.txt
 *.ckpt
 *.tar.gz
-flaskwebapp
+flaskwebapp
+driver.py
--- a/Pytorch/00_DevelopModel.py
+++ b/Pytorch/00_DevelopModel.py
@ -0,0 +1,107 @@
+# ---
+# jupyter:
+#   jupytext_format_version: '1.3'
+#   jupytext_formats: py:light
+#   kernelspec:
+#     display_name: Python [conda env:AKSDeploymentPytorch]
+#     language: python
+#     name: conda-env-AKSDeploymentPytorch-py
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.6.6
+# ---
+
+# # Develop Model
+
+# In this noteook, we will go through the steps to load the ResNet152 model, pre-process the images to the required format and call the model to find the top predictions.
+
+# +
+import torch
+import torch.nn as nn
+import numpy as np
+import torchvision
+from torchvision import datasets, models, transforms
+
+import numpy as np
+import PIL
+from PIL import Image
+import wget
+# -
+
+print(torch.__version__)
+print(torchvision.__version__)
+
+!wget "http://data.dmlc.ml/mxnet/models/imagenet/synset.txt"
+
+# We first load the model which we imported from the resnet152 module. This can take about 10s.
+
+# %%time
+model = models.resnet152(pretrained=True)
+
+# You can print the summary of the model in the below cell. We cleared the output here for brevity. When you run the cell you should see a list of the layers and the size of the model in terms of number of parameters at the bottom of the output.
+
+model=model.cuda()
+
+print(model)
+
+# Let's test our model with an image of a Lynx.
+
+wget.download('https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg')
+
+img_path = '220px-Lynx_lynx_poing.jpg'
+print(Image.open(img_path).size)
+Image.open(img_path)
+
+# Below, we load the image by resizing to (224, 224) and then preprocessing using the methods from keras preprocessing and imagenet utilities.
+
+img = Image.open(img_path).convert('RGB')
+
+preprocess_input = transforms.Compose([
+    torchvision.transforms.Resize((224, 224), interpolation=PIL.Image.BICUBIC),
+     transforms.ToTensor(),
+     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+
+img = Image.open(img_path)
+img = preprocess_input(img)
+
+# Now, let's call the model on our image to predict the top 3 labels. This will take a few seconds.
+
+def create_label_lookup():
+    with open('synset.txt', 'r') as f:
+        label_list = [l.rstrip() for l in f]
+    def _label_lookup(*label_locks):
+        return [label_list[l] for l in label_locks]
+    return _label_lookup
+
+label_lookup = create_label_lookup()
+
+softmax = nn.Softmax(dim=1).cuda()
+
+model = model.eval()
+
+# %%time
+with torch.no_grad():
+    img = img.expand(1,3,224,224)
+    image_gpu = img.type(torch.float).cuda()
+    outputs = model(image_gpu)
+    probabilities = softmax(outputs)
+
+label_lookup = create_label_lookup()
+
+probabilities_numpy = probabilities.cpu().numpy().squeeze()
+
+top_results = np.flip(np.sort(probabilities_numpy), 0)[:3]
+
+labels = label_lookup(*np.flip(probabilities_numpy.argsort(),0)[:3])
+
+dict(zip(labels, top_results))
+
+# The top guess is Lynx with probability about 99%. We can now move on to [developing the model api for our model](01_DevelopModelDriver.ipynb).
--- a/Pytorch/01_DevelopModelDriver.py
+++ b/Pytorch/01_DevelopModelDriver.py
@ -0,0 +1,165 @@
+# ---
+# jupyter:
+#   jupytext_format_version: '1.3'
+#   jupytext_formats: py:light
+#   kernelspec:
+#     display_name: Python [conda env:AKSDeploymentPytorch]
+#     language: python
+#     name: conda-env-AKSDeploymentPytorch-py
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.6.6
+# ---
+
+# # Develop Model Driver
+
+# In this notebook, we will develop the API that will call our model. This module initializes the model, transforms the input so that it is in the appropriate format and defines the scoring method that will produce the predictions. The API will expect the input to be in JSON format. Once  a request is received, the API will convert the json encoded request body into the image format. There are two main functions in the API. The first function loads the model and returns a scoring function. The second function process the images and uses the first function to score them.
+
+import logging
+from testing_utilities import img_url_to_json
+
+# We use the writefile magic to write the contents of the below cell to driver.py which includes the driver methods.
+
+# +
+# %%writefile driver.py
+import base64
+import json
+import logging
+import os
+import timeit as t
+from io import BytesIO
+from pprint import pprint
+import numpy as np
+import torch
+import torch.nn as nn
+import numpy as np
+import torchvision
+from torchvision import datasets, models, transforms
+import PIL
+from PIL import Image, ImageOps
+
+_LABEL_FILE = os.getenv('LABEL_FILE', "synset.txt")
+_NUMBER_RESULTS = 3
+
+
+def _create_label_lookup(label_path):
+    with open(label_path, 'r') as f:
+        label_list = [l.rstrip() for l in f]
+        
+    def _label_lookup(*label_locks):
+        return [label_list[l] for l in label_locks]
+    
+    return _label_lookup
+
+
+def _load_model():
+    # Load the model
+    model = models.resnet152(pretrained=True)
+    model = model.cuda()
+    softmax = nn.Softmax(dim=1).cuda()
+    model = model.eval()
+    
+    preprocess_input = transforms.Compose([
+        torchvision.transforms.Resize((224, 224), interpolation=PIL.Image.BICUBIC),
+         transforms.ToTensor(),
+         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ])
+    
+    def predict_for(image):
+        image = preprocess_input(image)
+        with torch.no_grad():
+            image = image.expand(1,3,224,224)
+            image_gpu = image.type(torch.float).cuda()
+            outputs = model(image_gpu)
+            pred_proba = softmax(outputs)
+        return pred_proba.cpu().numpy().squeeze()
+    
+    return predict_for
+
+
+def _base64img_to_pil_image(base64_img_string):
+    if base64_img_string.startswith('b\''):
+        base64_img_string = base64_img_string[2:-1]
+    base64Img = base64_img_string.encode('utf-8')
+
+    # Preprocess the input data 
+    startPreprocess = t.default_timer()
+    decoded_img = base64.b64decode(base64Img)
+    img_buffer = BytesIO(decoded_img)
+
+    # Load image with PIL (RGB)
+    pil_img = Image.open(img_buffer).convert('RGB')
+    return pil_img
+
+
+def create_scoring_func(label_path=_LABEL_FILE):
+    logger = logging.getLogger("model_driver")
+    
+    start = t.default_timer()
+    labels_for = _create_label_lookup(label_path)
+    predict_for = _load_model()
+    end = t.default_timer()
+
+    loadTimeMsg = "Model loading time: {0} ms".format(round((end-start)*1000, 2))
+    logger.info(loadTimeMsg)
+    
+    def call_model(image, number_results=_NUMBER_RESULTS):
+        pred_proba = predict_for(image).squeeze()
+        selected_results = np.flip(np.argsort(pred_proba), 0)[:number_results]
+        labels = labels_for(*selected_results)
+        return list(zip(labels, pred_proba[selected_results].astype(np.float64)))
+    return call_model
+
+
+def get_model_api():
+    logger = logging.getLogger("model_driver")
+    scoring_func = create_scoring_func()
+    
+    def process_and_score(images_dict, number_results=_NUMBER_RESULTS):
+        start = t.default_timer()
+
+        results = {}
+        for key, base64_img_string in images_dict.items():
+            rgb_image = _base64img_to_pil_image(base64_img_string)
+            results[key]=scoring_func(rgb_image, number_results=_NUMBER_RESULTS)
+        
+        end = t.default_timer()
+
+        logger.info("Predictions: {0}".format(results))
+        logger.info("Predictions took {0} ms".format(round((end-start)*1000, 2)))
+        return (results, 'Computed in {0} ms'.format(round((end-start)*1000, 2)))
+    return process_and_score
+
+def version():
+    return torch.__version__
+# -
+
+# Let's test the module.
+
+logging.basicConfig(level=logging.DEBUG)
+
+# We run the file driver.py which will bring everything into the context of the notebook.
+
+# %run driver.py
+
+# We will use the same Lynx image we used ealier to check that our driver works as expected.
+
+IMAGEURL = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg"
+
+predict_for = get_model_api()
+
+jsonimg = img_url_to_json(IMAGEURL)
+json_load_img = json.loads(jsonimg)
+body = json_load_img['input']
+resp = predict_for(body)
+
+pprint(resp[0])
+
+# Next, we can move on to [building our docker image](02_BuildImage.ipynb).
--- a/Pytorch/02_BuildImage.ipynb
+++ b/Pytorch/02_BuildImage.ipynb
--- a/Pytorch/02_BuildImage.py
+++ b/Pytorch/02_BuildImage.py
@ -0,0 +1,291 @@
+# ---
+# jupyter:
+#   jupytext_format_version: '1.3'
+#   jupytext_formats: py:light
+#   kernelspec:
+#     display_name: Python [conda env:AKSDeploymentKeras]
+#     language: python
+#     name: conda-env-AKSDeploymentKeras-py
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.5.5
+# ---
+
+# # Build Docker image 
+
+# In this notebook, we will build the docker container that contains the ResNet152 model, Flask web application, model driver and all dependencies.
+# Make sure you have logged in using docker login.
+
+import os
+from os import path
+import json
+import shutil
+from dotenv import set_key, get_key
+
+# We will be using the following Docker information to push the image to docker hub.
+
+# +
+# "YOUR_DOCKER_LOGIN"
+
+# + {"tags": ["parameters"]}
+# %%writefile .env
+# This cell is tagged `parameters`
+# Please modify the values below as you see fit
+
+# Your docker login and image repository name
+
+
+# -
+
+set_key('.env', 'docker_login', 'masalvar')
+
+set_key('.env', 'image_repo', 'pytorch-gpu')
+
+!cat .env
+
+os.makedirs('flaskwebapp', exist_ok=True)
+os.makedirs(os.path.join('flaskwebapp', 'nginx'), exist_ok=True)
+os.makedirs(os.path.join('flaskwebapp', 'etc'), exist_ok=True)
+
+shutil.copy('synset.txt', 'flaskwebapp')
+shutil.copy('driver.py', 'flaskwebapp')
+os.listdir('flaskwebapp')
+
+# Below, we create the module for the Flask web application.
+
+# +
+# %%writefile flaskwebapp/app.py
+
+from flask import Flask, request
+import logging
+import json
+import driver
+
+app = Flask(__name__)
+predict_for = driver.get_model_api()
+ 
+@app.route("/score", methods = ['POST'])
+def scoreRRS():
+    """ Endpoint for scoring
+    """
+    if request.headers['Content-Type'] != 'application/json':
+        return Response(json.dumps({}), status= 415, mimetype ='application/json')
+    request_input = request.json['input']
+    response = predict_for(request_input)
+    return json.dumps({'result': response})
+
+
+@app.route("/")
+def healthy():
+    return "Healthy"
+
+# Tensorflow Version
+@app.route('/version', methods = ['GET'])
+def version_request():
+    return driver.version()
+
+if __name__ == "__main__":
+    app.run(host='0.0.0.0', port=5000)
+
+# +
+# %%writefile flaskwebapp/wsgi.py
+from app import app as application
+
+def create():
+    print("Initialising")
+    application.run(host='127.0.0.1', port=5000)
+# -
+
+# Here, we write the configuration for the Nginx which creates a proxy between ports **80** and **5000**.
+
+# %%writefile flaskwebapp/nginx/app
+server {
+    listen 80;
+    server_name _;
+ 
+    location / {
+    include proxy_params;
+    proxy_pass http://127.0.0.1:5000;
+    proxy_connect_timeout 5000s;
+    proxy_read_timeout 5000s;
+  }
+}
+
+# +
+# %%writefile flaskwebapp/gunicorn_logging.conf
+
+[loggers]
+keys=root, gunicorn.error
+
+[handlers]
+keys=console
+
+[formatters]
+keys=json
+
+[logger_root]
+level=INFO
+handlers=console
+
+[logger_gunicorn.error]
+level=ERROR
+handlers=console
+propagate=0
+qualname=gunicorn.error
+
+[handler_console]
+class=StreamHandler
+formatter=json
+args=(sys.stdout, )
+
+[formatter_json]
+class=jsonlogging.JSONFormatter
+
+# +
+# %%writefile flaskwebapp/kill_supervisor.py
+import sys
+import os
+import signal
+
+def write_stdout(s):
+    sys.stdout.write(s)
+    sys.stdout.flush()
+
+# this function is modified from the code and knowledge found here: http://supervisord.org/events.html#example-event-listener-implementation
+def main():
+    while 1:
+        write_stdout('READY\n')
+        # wait for the event on stdin that supervisord will send
+        line = sys.stdin.readline()
+        write_stdout('Killing supervisor with this event: ' + line);
+        try:
+            # supervisord writes its pid to its file from which we read it here, see supervisord.conf
+            pidfile = open('/tmp/supervisord.pid','r')
+            pid = int(pidfile.readline());
+            os.kill(pid, signal.SIGQUIT)
+        except Exception as e:
+            write_stdout('Could not kill supervisor: ' + e.strerror + '\n')
+            write_stdout('RESULT 2\nOK')
+
+main()
+
+# +
+# %%writefile flaskwebapp/etc/supervisord.conf 
+[supervisord]
+logfile=/tmp/supervisord.log ; (main log file;default $CWD/supervisord.log)
+logfile_maxbytes=50MB        ; (max main logfile bytes b4 rotation;default 50MB)
+logfile_backups=10           ; (num of main logfile rotation backups;default 10)
+loglevel=info                ; (log level;default info; others: debug,warn,trace)
+pidfile=/tmp/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
+nodaemon=true                ; (start in foreground if true;default false)
+minfds=1024                  ; (min. avail startup file descriptors;default 1024)
+minprocs=200                 ; (min. avail process descriptors;default 200)
+
+[program:gunicorn]
+command=bash -c "gunicorn --workers 1 -m 007 --timeout 100000 --capture-output --error-logfile - --log-level debug --log-config gunicorn_logging.conf \"wsgi:create()\""
+directory=/code
+redirect_stderr=true
+stdout_logfile =/dev/stdout
+stdout_logfile_maxbytes=0
+startretries=2
+startsecs=20
+
+[program:nginx]
+command=/usr/sbin/nginx -g "daemon off;"
+startretries=2
+startsecs=5
+priority=3
+
+[eventlistener:program_exit]
+command=python kill_supervisor.py
+directory=/code
+events=PROCESS_STATE_FATAL
+priority=2
+# -
+
+# We create a custom image based on the CUDA 9 image from NVIDIA and install all the necessary dependencies. This is in order to try and keep the size of the image as small as possible.
+
+# %%writefile flaskwebapp/requirements.txt
+Pillow==5.0.0
+click==6.7
+configparser==3.5.0
+Flask==0.12.2
+gunicorn==19.6.0
+json-logging-py==0.2
+MarkupSafe==1.0
+olefile==0.44
+requests==2.12.3
+
+# +
+# %%writefile flaskwebapp/dockerfile
+
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
+
+RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
+
+RUN mkdir /code
+WORKDIR /code
+ADD . /code/
+ADD etc /etc
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        ca-certificates \
+        cmake \
+        curl \
+        git \
+        nginx \
+        supervisor \
+        wget && \
+        rm -rf /var/lib/apt/lists/*
+
+
+ENV PYTHON_VERSION=3.6
+RUN curl -o ~/miniconda.sh -O  https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh  && \
+    chmod +x ~/miniconda.sh && \
+    ~/miniconda.sh -b -p /opt/conda && \
+    rm ~/miniconda.sh && \
+    /opt/conda/bin/conda create -y --name py$PYTHON_VERSION python=$PYTHON_VERSION numpy scipy pandas scikit-learn && \
+    /opt/conda/bin/conda clean -ya
+ENV PATH /opt/conda/envs/py$PYTHON_VERSION/bin:$PATH
+ENV LD_LIBRARY_PATH /opt/conda/envs/py$PYTHON_VERSION/lib:/usr/local/cuda/lib64/:$LD_LIBRARY_PATH
+ENV PYTHONPATH /code/:$PYTHONPATH
+
+    
+RUN rm /etc/nginx/sites-enabled/default && \
+    cp /code/nginx/app /etc/nginx/sites-available/ && \
+    ln -s /etc/nginx/sites-available/app /etc/nginx/sites-enabled/ && \
+    /opt/conda/bin/conda install -c pytorch pytorch==0.4.1 && \
+    pip install --upgrade pip && \
+    pip install torchvision==0.2.1 && \
+    pip install -r /code/requirements.txt && \       
+    /opt/conda/bin/conda clean -yt
+
+EXPOSE 80
+CMD ["supervisord", "-c", "/code/etc/supervisord.conf"]
+# -
+
+# The image name below refers to our dockerhub account. If you wish to push the image to your account make sure you change the docker login.
+
+image_name = get_key('.env', 'docker_login') + '/' +get_key('.env', 'image_repo') 
+application_path = 'flaskwebapp'
+docker_file_location = path.join(application_path, 'dockerfile')
+
+# Next, we build our docker image. The output of this cell is cleared from this notebook as it is quite long due to all the installations required to build the image. However, you should make sure you see 'Successfully built' and 'Successfully tagged' messages in the last line of the output when you run the cell. 
+
+!docker build -t $image_name -f $docker_file_location $application_path --no-cache
+
+# Below we will push the image created to our dockerhub registry. Make sure you have already logged in to the appropriate dockerhub account using the docker login command. If you haven't loged in to the approrpiate dockerhub account you will get an error.
+
+!docker push $image_name
+
+print('Docker image name {}'.format(image_name))
+
+# We can now [test our image locally](03_TestLocally.ipynb).
--- a/Pytorch/03_TestLocally.py
+++ b/Pytorch/03_TestLocally.py
@ -0,0 +1,96 @@
+# ---
+# jupyter:
+#   jupytext_format_version: '1.3'
+#   jupytext_formats: py:light
+#   kernelspec:
+#     display_name: Python [conda env:AKSDeploymentPytorch]
+#     language: python
+#     name: conda-env-AKSDeploymentPytorch-py
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.6.6
+# ---
+
+# # Test web application locally
+#
+
+# This notebook pulls some images and tests them against the local web app running inside the Docker container we made previously.
+
+# %load_ext autoreload
+# %autoreload 2
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from testing_utilities import to_img, img_url_to_json, plot_predictions 
+import requests
+from dotenv import get_key
+# %matplotlib inline
+
+image_name = get_key('.env', 'docker_login') + '/' +get_key('.env', 'image_repo') 
+image_name
+
+# Run the Docker conatainer in the background and open port 80. Notice we are using nvidia-docker and not docker command.
+
+# + {"active": "ipynb", "language": "bash"}
+# nvidia-docker run -p 80:80 $1
+# -
+
+# Wait a few seconds for the application to spin up and then check that everything works.
+
+!curl 'http://0.0.0.0:80/'
+
+!curl 'http://0.0.0.0:80/version' #reports tensorflow version
+
+# Pull an image of a Lynx to test our local web app with.
+
+IMAGEURL = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg"
+
+plt.imshow(to_img(IMAGEURL))
+
+jsonimg = img_url_to_json(IMAGEURL)
+jsonimg[:100] 
+
+headers = {'content-type': 'application/json'}
+# %time r = requests.post('http://0.0.0.0:80/score', data=jsonimg, headers=headers)
+print(r)
+r.json()
+
+# Let's try a few more images.
+
+images = ('https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg',
+          'https://upload.wikimedia.org/wikipedia/commons/3/3a/Roadster_2.5_windmills_trimmed.jpg',
+          'https://upload.wikimedia.org/wikipedia/commons/thumb/e/e6/Harmony_of_the_Seas_%28ship%2C_2016%29_001.jpg/1920px-Harmony_of_the_Seas_%28ship%2C_2016%29_001.jpg',
+          'http://yourshot.nationalgeographic.com/u/ss/fQYSUbVfts-T7pS2VP2wnKyN8wxywmXtY0-FwsgxpiZv_E9ZfPsNV5B0ER8-bOdruvNfMD5EbP4SznWz4PYn/',
+          'https://cdn.arstechnica.net/wp-content/uploads/2012/04/bohol_tarsier_wiki-4f88309-intro.jpg',
+          'http://i.telegraph.co.uk/multimedia/archive/03233/BIRDS-ROBIN_3233998b.jpg')
+
+url = 'http://0.0.0.0:80/score'
+results = [requests.post(url, data=img_url_to_json(img), headers=headers) for img in images]
+
+plot_predictions(images, results)
+
+# Next let's quickly check what the request response performance is for the locally running Docker container.
+
+image_data = list(map(img_url_to_json, images)) # Retrieve the images and data
+
+timer_results = list()
+for img in image_data:
+    res=%timeit -r 1 -o -q requests.post(url, data=img, headers=headers)
+    timer_results.append(res.best)
+
+timer_results
+
+print('Average time taken: {0:4.2f} ms'.format(10**3 * np.mean(timer_results)))
+
+# + {"active": "ipynb", "language": "bash"}
+# docker stop $(docker ps -q)
+# -
+
+# We can now [deploy our web application on AKS](04_DeployOnAKS.ipynb).
--- a/Pytorch/04_DeployOnAKS.py
+++ b/Pytorch/04_DeployOnAKS.py
@ -0,0 +1,238 @@
+# ---
+# jupyter:
+#   anaconda-cloud: {}
+#   jupytext_format_version: '1.3'
+#   jupytext_formats: py:light
+#   kernelspec:
+#     display_name: Python [conda env:AKSDeploymentPytorch]
+#     language: python
+#     name: conda-env-AKSDeploymentPytorch-py
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.6.6
+# ---
+
+# ### Deploy Web App on Azure Container Services (AKS)
+# In this notebook, we will set up an Azure Container Service which will be managed by Kubernetes. We will then take the Docker image we created earlier that contains our app and deploy it to the AKS cluster. Then, we will check everything is working by sending an image to it and getting it scored.
+#     
+# The process is split into the following steps:
+# * [Define our resource names](#section1)
+# * [Login to Azure](#section2)
+# * [Create resource group and create AKS](#section3)
+# * [Connect to AKS](#section4)
+# * [Deploy our app](#section5)
+#
+# This guide assumes is designed to be run on linux and requires that the Azure CLI is installed.
+
+import os
+import json
+from testing_utilities import write_json_to_file
+from dotenv import set_key, get_key
+
+# <a id='section1'></a>
+# ## Setup
+# Below are the various name definitions for the resources needed to setup AKS.
+
+set_key('.env', 'selected_subscription', 'Team Danielle Internal')
+set_key('.env', 'resource_group', 'msaksrg')
+set_key('.env', 'aks_name', 'msaks')
+set_key('.env', 'location', 'eastus')
+
+# + {"tags": ["parameters"]}
+# # %%writefile --append .env
+# # This cell is tagged `parameters`
+# # Please modify the values below as you see fit
+
+# # If you have multiple subscriptions select the subscription you want to use 
+# selected_subscription = "Team Danielle Internal"
+
+# # Resource group, name and location for AKS cluster.
+# resource_group = "mabouaks" 
+# aks_name = "mabouaks"
+# location = "eastus"
+# -
+
+image_name = get_key('.env', 'docker_login') + '/' +get_key('.env', 'image_repo') 
+
+# <a id='section2'></a>
+# ## Azure account login
+# If you are not already logged in to an Azure account, the command below will initiate a login. It will pop up a browser where you can select an Azure account.
+
+# + {"active": "ipynb", "language": "bash"}
+# list=`az account list -o table`
+# if [ "$list" == '[]' ] || [ "$list" == '' ]; then 
+#   az login -o table
+# else
+#   az account list -o table 
+# fi
+# -
+
+!az account set --subscription "{get_key('.env', 'selected_subscription')}"
+
+!az account show
+
+!az provider register -n Microsoft.ContainerService
+
+!az provider show -n Microsoft.ContainerService
+
+# <a id='section3'></a>
+# ## Create resource group and create AKS
+
+# ### Create resource group
+# Azure encourages the use of groups to organise all the Azure components you deploy. That way it is easier to find them but also we can deleted a number of resources simply by deleting the group.
+
+!az group create --name {get_key('.env', 'resource_group')} \
+                 --location {get_key('.env', 'location')}
+
+# Below, we create the AKS cluster in the resource group we created earlier. This can take up to 15 minutes.
+
+!az aks create --resource-group {get_key('.env', 'resource_group')}  \
+               --name {get_key('.env', 'aks_name')} \
+               --node-count 1 \
+               --generate-ssh-keys \
+               -s Standard_NC6
+
+# ### Install kubectl CLI
+#
+# To connect to the Kubernetes cluster, we will use kubectl, the Kubernetes command-line client. To install, run the following:
+
+!sudo az aks install-cli
+
+# <a id='section4'></a>
+# ## Connect to AKS cluster
+#
+# To configure kubectl to connect to the Kubernetes cluster, run the following command:
+
+!az aks get-credentials --resource-group $resource_group --name $aks_name
+
+# Let's verify connection by listing the nodes.
+
+!kubectl get nodes
+
+# Let's check the pods on our cluster.
+
+!kubectl get pods --all-namespaces
+
+# <a id='section5'></a>
+# ## Deploy application
+#
+# Below we define our Kubernetes manifest file for our service and load balancer. Note that we have to specify the volume mounts to the drivers that are located on the node.
+#
+
+# +
+app_template = {
+  "apiVersion": "apps/v1beta1",
+  "kind": "Deployment",
+  "metadata": {
+      "name": "azure-dl"
+  },
+  "spec":{
+      "replicas":1,
+      "template":{
+          "metadata":{
+              "labels":{
+                  "app":"azure-dl"
+              }
+          },
+          "spec":{
+              "containers":[
+                  {
+                      "name": "azure-dl",
+                      "image": image_name,
+                      "env":[
+                          {
+                              "name": "LD_LIBRARY_PATH",
+                              "value": "$LD_LIBRARY_PATH:/usr/local/nvidia/lib64:/opt/conda/envs/py3.6/lib"
+                          }
+                      ],
+                      "ports":[
+                          {
+                              "containerPort":80,
+                              "name":"model"
+                          }
+                      ],
+                      "volumeMounts":[
+                          {
+                            "mountPath": "/usr/local/nvidia",
+                            "name": "nvidia"
+                          }
+                      ],
+                      "resources":{
+                           "requests":{
+                               "alpha.kubernetes.io/nvidia-gpu": 1
+                           },
+                           "limits":{
+                               "alpha.kubernetes.io/nvidia-gpu": 1
+                           }
+                       }  
+                  }
+              ],
+              "volumes":[
+                  {
+                      "name": "nvidia",
+                      "hostPath":{
+                          "path":"/usr/local/nvidia"
+                      },
+                  },
+              ]
+          }
+      }
+  }
+}
+
+service_temp = {
+  "apiVersion": "v1",
+  "kind": "Service",
+  "metadata": {
+      "name": "azure-dl"
+  },
+  "spec":{
+      "type": "LoadBalancer",
+      "ports":[
+          {
+              "port":80
+          }
+      ],
+      "selector":{
+            "app":"azure-dl"
+      }
+   }
+}
+# -
+
+write_json_to_file(app_template, 'az-dl.json') # We write the service template to the json file
+
+write_json_to_file(service_temp, 'az-dl.json', mode='a') # We add the loadbelanacer template to the json file
+
+# Let's check the manifest created.
+
+!cat az-dl.json
+
+# Next, we will use kubectl create command to deploy our application.
+
+!kubectl create -f az-dl.json
+
+# Let's check if the pod is deployed.
+
+!kubectl get pods --all-namespaces
+
+# If anything goes wrong you can use the commands below to observe the events on the node as well as review the logs.
+
+!kubectl get events
+
+pod_json = !kubectl get pods -o json
+pod_dict = json.loads(''.join(pod_json))
+!kubectl logs {pod_dict['items'][0]['metadata']['name']}
+
+# It can take a few minutes for the service to populate the EXTERNAL-IP field. This will be the IP you use to call the service. You can also specify an IP to use please see the AKS documentation for further details.
+
+!kubectl get service azure-dl
+
+# Next, we will [test our web application](05_TestWebApp.ipynb) deployed on AKS. 
--- a/Pytorch/05_TestWebApp.py
+++ b/Pytorch/05_TestWebApp.py
@ -0,0 +1,88 @@
+# ---
+# jupyter:
+#   anaconda-cloud: {}
+#   jupytext_format_version: '1.3'
+#   jupytext_formats: py:light
+#   kernelspec:
+#     display_name: Python [conda env:AKSDeploymentPytorch]
+#     language: python
+#     name: conda-env-AKSDeploymentPytorch-py
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.6.6
+# ---
+
+# ### Test deployed web application
+# This notebook pulls some images and tests them against the deployed web application.
+
+import matplotlib.pyplot as plt
+import numpy as np
+from testing_utilities import img_url_to_json, to_img, plot_predictions
+import requests
+import json
+# %matplotlib inline
+
+service_json = !kubectl get service azure-dl -o json
+service_dict = json.loads(''.join(service_json))
+app_url = service_dict['status']['loadBalancer']['ingress'][0]['ip']
+
+scoring_url = 'http://{}/score'.format(app_url)
+version_url = 'http://{}/version'.format(app_url)
+
+# Quickly check the web application is working
+
+!curl $version_url # Reports the Tensorflow Version
+
+# Pull an image of a Lynx to test it
+
+IMAGEURL = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg"
+
+plt.imshow(to_img(IMAGEURL))
+
+# headers = {'content-type': 'application/json','X-Marathon-App-Id': app_id}
+headers = {'content-type': 'application/json'}
+jsonimg = img_url_to_json(IMAGEURL)
+r = requests.post(scoring_url, data=jsonimg, headers=headers) # Run the request twice since the first time takes a 
+                                                              # little longer due to the loading of the model
+# %time r = requests.post(scoring_url, data=jsonimg, headers=headers)
+r.json()
+
+# From the results above we can see that the model correctly classifies this as an Lynx. 
+# The computation took around 70 ms and the whole round trip around 240 ms. The round trip time will depend on where the resuests are being made.
+
+# Lets try a few more images
+
+images = ('https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg',
+          'https://upload.wikimedia.org/wikipedia/commons/3/3a/Roadster_2.5_windmills_trimmed.jpg',
+          'https://upload.wikimedia.org/wikipedia/commons/thumb/e/e6/Harmony_of_the_Seas_%28ship%2C_2016%29_001.jpg/1920px-Harmony_of_the_Seas_%28ship%2C_2016%29_001.jpg',
+          'http://yourshot.nationalgeographic.com/u/ss/fQYSUbVfts-T7pS2VP2wnKyN8wxywmXtY0-FwsgxpiZv_E9ZfPsNV5B0ER8-bOdruvNfMD5EbP4SznWz4PYn/',
+          'https://cdn.arstechnica.net/wp-content/uploads/2012/04/bohol_tarsier_wiki-4f88309-intro.jpg',
+          'http://i.telegraph.co.uk/multimedia/archive/03233/BIRDS-ROBIN_3233998b.jpg')
+
+results = [requests.post(scoring_url, data=img_url_to_json(img), headers=headers) for img in images]
+
+plot_predictions(images, results)
+
+# The labels predicted by our model seem to be consistent with the images supplied.
+
+# Next lets quickly check what the request response performance is for our deployed model.
+
+image_data = list(map(img_url_to_json, images)) # Retrieve the images and data
+
+timer_results = list()
+for img in image_data:
+    res=%timeit -r 1 -o -q requests.post(scoring_url, data=img, headers=headers)
+    timer_results.append(res.best)
+
+timer_results
+
+print('Average time taken: {0:4.2f} ms'.format(10**3 * np.mean(timer_results)))
+
+# We have tested that the model works and we can mode on to the [next notebook to get sense of its throughput](06_SpeedTestWebApp.ipynb)  
--- a/Pytorch/06_SpeedTestWebApp.py
+++ b/Pytorch/06_SpeedTestWebApp.py
@ -0,0 +1,121 @@
+# ---
+# jupyter:
+#   anaconda-cloud: {}
+#   jupytext_format_version: '1.3'
+#   jupytext_formats: py:light
+#   kernelspec:
+#     display_name: Python [conda env:AKSDeploymentPytorch]
+#     language: python
+#     name: conda-env-AKSDeploymentPytorch-py
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.6.6
+# ---
+
+# ### Load Test deployed web application
+# This notebook pulls some images and tests them against the deployed web application. We submit requests asychronously which should reduce the contribution of latency.
+
+# +
+import asyncio
+import json
+import urllib.request
+from timeit import default_timer
+
+import aiohttp
+import matplotlib.pyplot as plt
+from testing_utilities import to_img, gen_variations_of_one_image
+from tqdm import tqdm
+
+# %matplotlib inline
+# -
+
+print(aiohttp.__version__)
+
+# We will test our deployed service with 100 calls. We will only have 4 requests concurrently at any time. We have only deployed one pod on one node and increasing the number of concurrent calls does not really increase throughput. Feel free to try different values and see how the service responds.
+
+NUMBER_OF_REQUESTS = 100  # Total number of requests
+CONCURRENT_REQUESTS = 4   # Number of requests at a time
+
+# Get the IP address of our service
+
+service_json = !kubectl get service azure-dl -o json
+service_dict = json.loads(''.join(service_json))
+app_url = service_dict['status']['loadBalancer']['ingress'][0]['ip']
+
+scoring_url = 'http://{}/score'.format(app_url)
+version_url = 'http://{}/version'.format(app_url)
+
+!curl $version_url # Reports the Tensorflow Version
+
+IMAGEURL = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg"
+plt.imshow(to_img(IMAGEURL))
+
+# Here, we use varitions of the same image to test the service.
+
+url_list = [[scoring_url, jsonimg] for jsonimg in gen_variations_of_one_image(IMAGEURL, NUMBER_OF_REQUESTS)]
+
+def decode(result):
+    return json.loads(result.decode("utf-8"))
+
+async def fetch(url, session, data, headers):
+    start_time = default_timer()
+    async with session.request('post', url, data=data, headers=headers) as response:
+        resp = await response.read()
+        elapsed = default_timer() - start_time
+        return resp, elapsed
+
+async def bound_fetch(sem, url, session, data, headers):
+    # Getter function with semaphore.
+    async with sem:
+        return await fetch(url, session, data, headers)
+
+async def await_with_progress(coros):
+    results=[]
+    for f in tqdm(asyncio.as_completed(coros), total=len(coros)):
+        result = await f
+        results.append((decode(result[0]),result[1]))
+    return results
+
+async def run(url_list, num_concurrent=CONCURRENT_REQUESTS):
+    headers = {'content-type': 'application/json'}
+    tasks = []
+    # create instance of Semaphore
+    sem = asyncio.Semaphore(num_concurrent)
+
+    # Create client session that will ensure we dont open new connection
+    # per each request.
+    async with aiohttp.ClientSession() as session:
+        for url, data in url_list:
+            # pass Semaphore and session to every POST request
+            task = asyncio.ensure_future(bound_fetch(sem, url, session, data, headers))
+            tasks.append(task)
+        return await await_with_progress(tasks)
+
+# Below we run the 100 requests against our deployed service
+
+loop = asyncio.get_event_loop()
+start_time = default_timer()
+complete_responses = loop.run_until_complete(asyncio.ensure_future(run(url_list, num_concurrent=CONCURRENT_REQUESTS)))
+elapsed = default_timer() - start_time
+print('Total Elapsed {}'.format(elapsed))
+print('Avg time taken {0:4.2f} ms'.format(1000*elapsed/len(url_list)))
+
+# Below we can see the output of some of our calls
+
+complete_responses[:3]
+
+num_succesful=[i[0]['result'][0]['image'][0][0] for i in complete_responses].count('n02127052 lynx, catamount')
+print('Succesful {} out of {}'.format(num_succesful, len(url_list)))
+
+# Example response
+plt.imshow(to_img(IMAGEURL))
+complete_responses[0]
+
+# To tear down the cluster and all related resources go to the  [tear down the cluster](07_TearDown.ipynb) notebook.
--- a/Pytorch/07_TearDown.py
+++ b/Pytorch/07_TearDown.py
@ -0,0 +1,40 @@
+# ---
+# jupyter:
+#   jupytext_format_version: '1.3'
+#   jupytext_formats: py:light
+#   kernelspec:
+#     display_name: Python [conda env:AKSDeploymentPytorch]
+#     language: python
+#     name: conda-env-AKSDeploymentPytorch-py
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.6.6
+# ---
+
+# # Tear it all down
+# Once you are done with your cluster you can use the following two commands to destroy it all.
+
+from dotenv import get_key
+
+# Once you are done with your cluster you can use the following two commands to destroy it all. First, delete the application.
+
+!kubectl delete -f az-dl.json
+
+# Next, you delete the AKS cluster. This step may take a few minutes.
+
+get_key('.env', 'resource_group')
+
+!az aks delete -n {get_key('.env', 'aks_name')} \
+               -g {get_key('.env', 'resource_group')} \
+               -y
+
+# Finally, you should delete the resource group. This also deletes the AKS cluster and can be used instead of the above command if the resource group is only used for this purpose.
+
+!az group delete --name {get_key('.env', 'resource_group')} -y