Adds python files from jupytext. Updates gitignore

This commit is contained in:
msalvaris 2018-10-08 09:58:52 +00:00
Родитель 63795c6cf7
Коммит 05ce5d72f4
10 изменённых файлов: 1149 добавлений и 16 удалений

3
.gitignore поставляемый
Просмотреть файл

@ -105,4 +105,5 @@ ENV/
synset.txt
*.ckpt
*.tar.gz
flaskwebapp
flaskwebapp
driver.py

107
Pytorch/00_DevelopModel.py Normal file
Просмотреть файл

@ -0,0 +1,107 @@
# ---
# jupyter:
# jupytext_format_version: '1.3'
# jupytext_formats: py:light
# kernelspec:
# display_name: Python [conda env:AKSDeploymentPytorch]
# language: python
# name: conda-env-AKSDeploymentPytorch-py
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.6.6
# ---
# # Develop Model
# In this noteook, we will go through the steps to load the ResNet152 model, pre-process the images to the required format and call the model to find the top predictions.
# +
import torch
import torch.nn as nn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import numpy as np
import PIL
from PIL import Image
import wget
# -
print(torch.__version__)
print(torchvision.__version__)
!wget "http://data.dmlc.ml/mxnet/models/imagenet/synset.txt"
# We first load the model which we imported from the resnet152 module. This can take about 10s.
# %%time
model = models.resnet152(pretrained=True)
# You can print the summary of the model in the below cell. We cleared the output here for brevity. When you run the cell you should see a list of the layers and the size of the model in terms of number of parameters at the bottom of the output.
model=model.cuda()
print(model)
# Let's test our model with an image of a Lynx.
wget.download('https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg')
img_path = '220px-Lynx_lynx_poing.jpg'
print(Image.open(img_path).size)
Image.open(img_path)
# Below, we load the image by resizing to (224, 224) and then preprocessing using the methods from keras preprocessing and imagenet utilities.
img = Image.open(img_path).convert('RGB')
preprocess_input = transforms.Compose([
torchvision.transforms.Resize((224, 224), interpolation=PIL.Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
img = Image.open(img_path)
img = preprocess_input(img)
# Now, let's call the model on our image to predict the top 3 labels. This will take a few seconds.
def create_label_lookup():
with open('synset.txt', 'r') as f:
label_list = [l.rstrip() for l in f]
def _label_lookup(*label_locks):
return [label_list[l] for l in label_locks]
return _label_lookup
label_lookup = create_label_lookup()
softmax = nn.Softmax(dim=1).cuda()
model = model.eval()
# %%time
with torch.no_grad():
img = img.expand(1,3,224,224)
image_gpu = img.type(torch.float).cuda()
outputs = model(image_gpu)
probabilities = softmax(outputs)
label_lookup = create_label_lookup()
probabilities_numpy = probabilities.cpu().numpy().squeeze()
top_results = np.flip(np.sort(probabilities_numpy), 0)[:3]
labels = label_lookup(*np.flip(probabilities_numpy.argsort(),0)[:3])
dict(zip(labels, top_results))
# The top guess is Lynx with probability about 99%. We can now move on to [developing the model api for our model](01_DevelopModelDriver.ipynb).

Просмотреть файл

@ -0,0 +1,165 @@
# ---
# jupyter:
# jupytext_format_version: '1.3'
# jupytext_formats: py:light
# kernelspec:
# display_name: Python [conda env:AKSDeploymentPytorch]
# language: python
# name: conda-env-AKSDeploymentPytorch-py
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.6.6
# ---
# # Develop Model Driver
# In this notebook, we will develop the API that will call our model. This module initializes the model, transforms the input so that it is in the appropriate format and defines the scoring method that will produce the predictions. The API will expect the input to be in JSON format. Once a request is received, the API will convert the json encoded request body into the image format. There are two main functions in the API. The first function loads the model and returns a scoring function. The second function process the images and uses the first function to score them.
import logging
from testing_utilities import img_url_to_json
# We use the writefile magic to write the contents of the below cell to driver.py which includes the driver methods.
# +
# %%writefile driver.py
import base64
import json
import logging
import os
import timeit as t
from io import BytesIO
from pprint import pprint
import numpy as np
import torch
import torch.nn as nn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import PIL
from PIL import Image, ImageOps
_LABEL_FILE = os.getenv('LABEL_FILE', "synset.txt")
_NUMBER_RESULTS = 3
def _create_label_lookup(label_path):
with open(label_path, 'r') as f:
label_list = [l.rstrip() for l in f]
def _label_lookup(*label_locks):
return [label_list[l] for l in label_locks]
return _label_lookup
def _load_model():
# Load the model
model = models.resnet152(pretrained=True)
model = model.cuda()
softmax = nn.Softmax(dim=1).cuda()
model = model.eval()
preprocess_input = transforms.Compose([
torchvision.transforms.Resize((224, 224), interpolation=PIL.Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
def predict_for(image):
image = preprocess_input(image)
with torch.no_grad():
image = image.expand(1,3,224,224)
image_gpu = image.type(torch.float).cuda()
outputs = model(image_gpu)
pred_proba = softmax(outputs)
return pred_proba.cpu().numpy().squeeze()
return predict_for
def _base64img_to_pil_image(base64_img_string):
if base64_img_string.startswith('b\''):
base64_img_string = base64_img_string[2:-1]
base64Img = base64_img_string.encode('utf-8')
# Preprocess the input data
startPreprocess = t.default_timer()
decoded_img = base64.b64decode(base64Img)
img_buffer = BytesIO(decoded_img)
# Load image with PIL (RGB)
pil_img = Image.open(img_buffer).convert('RGB')
return pil_img
def create_scoring_func(label_path=_LABEL_FILE):
logger = logging.getLogger("model_driver")
start = t.default_timer()
labels_for = _create_label_lookup(label_path)
predict_for = _load_model()
end = t.default_timer()
loadTimeMsg = "Model loading time: {0} ms".format(round((end-start)*1000, 2))
logger.info(loadTimeMsg)
def call_model(image, number_results=_NUMBER_RESULTS):
pred_proba = predict_for(image).squeeze()
selected_results = np.flip(np.argsort(pred_proba), 0)[:number_results]
labels = labels_for(*selected_results)
return list(zip(labels, pred_proba[selected_results].astype(np.float64)))
return call_model
def get_model_api():
logger = logging.getLogger("model_driver")
scoring_func = create_scoring_func()
def process_and_score(images_dict, number_results=_NUMBER_RESULTS):
start = t.default_timer()
results = {}
for key, base64_img_string in images_dict.items():
rgb_image = _base64img_to_pil_image(base64_img_string)
results[key]=scoring_func(rgb_image, number_results=_NUMBER_RESULTS)
end = t.default_timer()
logger.info("Predictions: {0}".format(results))
logger.info("Predictions took {0} ms".format(round((end-start)*1000, 2)))
return (results, 'Computed in {0} ms'.format(round((end-start)*1000, 2)))
return process_and_score
def version():
return torch.__version__
# -
# Let's test the module.
logging.basicConfig(level=logging.DEBUG)
# We run the file driver.py which will bring everything into the context of the notebook.
# %run driver.py
# We will use the same Lynx image we used ealier to check that our driver works as expected.
IMAGEURL = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg"
predict_for = get_model_api()
jsonimg = img_url_to_json(IMAGEURL)
json_load_img = json.loads(jsonimg)
body = json_load_img['input']
resp = predict_for(body)
pprint(resp[0])
# Next, we can move on to [building our docker image](02_BuildImage.ipynb).

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

291
Pytorch/02_BuildImage.py Normal file
Просмотреть файл

@ -0,0 +1,291 @@
# ---
# jupyter:
# jupytext_format_version: '1.3'
# jupytext_formats: py:light
# kernelspec:
# display_name: Python [conda env:AKSDeploymentKeras]
# language: python
# name: conda-env-AKSDeploymentKeras-py
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.5
# ---
# # Build Docker image
# In this notebook, we will build the docker container that contains the ResNet152 model, Flask web application, model driver and all dependencies.
# Make sure you have logged in using docker login.
import os
from os import path
import json
import shutil
from dotenv import set_key, get_key
# We will be using the following Docker information to push the image to docker hub.
# +
# "YOUR_DOCKER_LOGIN"
# + {"tags": ["parameters"]}
# %%writefile .env
# This cell is tagged `parameters`
# Please modify the values below as you see fit
# Your docker login and image repository name
# -
set_key('.env', 'docker_login', 'masalvar')
set_key('.env', 'image_repo', 'pytorch-gpu')
!cat .env
os.makedirs('flaskwebapp', exist_ok=True)
os.makedirs(os.path.join('flaskwebapp', 'nginx'), exist_ok=True)
os.makedirs(os.path.join('flaskwebapp', 'etc'), exist_ok=True)
shutil.copy('synset.txt', 'flaskwebapp')
shutil.copy('driver.py', 'flaskwebapp')
os.listdir('flaskwebapp')
# Below, we create the module for the Flask web application.
# +
# %%writefile flaskwebapp/app.py
from flask import Flask, request
import logging
import json
import driver
app = Flask(__name__)
predict_for = driver.get_model_api()
@app.route("/score", methods = ['POST'])
def scoreRRS():
""" Endpoint for scoring
"""
if request.headers['Content-Type'] != 'application/json':
return Response(json.dumps({}), status= 415, mimetype ='application/json')
request_input = request.json['input']
response = predict_for(request_input)
return json.dumps({'result': response})
@app.route("/")
def healthy():
return "Healthy"
# Tensorflow Version
@app.route('/version', methods = ['GET'])
def version_request():
return driver.version()
if __name__ == "__main__":
app.run(host='0.0.0.0', port=5000)
# +
# %%writefile flaskwebapp/wsgi.py
from app import app as application
def create():
print("Initialising")
application.run(host='127.0.0.1', port=5000)
# -
# Here, we write the configuration for the Nginx which creates a proxy between ports **80** and **5000**.
# %%writefile flaskwebapp/nginx/app
server {
listen 80;
server_name _;
location / {
include proxy_params;
proxy_pass http://127.0.0.1:5000;
proxy_connect_timeout 5000s;
proxy_read_timeout 5000s;
}
}
# +
# %%writefile flaskwebapp/gunicorn_logging.conf
[loggers]
keys=root, gunicorn.error
[handlers]
keys=console
[formatters]
keys=json
[logger_root]
level=INFO
handlers=console
[logger_gunicorn.error]
level=ERROR
handlers=console
propagate=0
qualname=gunicorn.error
[handler_console]
class=StreamHandler
formatter=json
args=(sys.stdout, )
[formatter_json]
class=jsonlogging.JSONFormatter
# +
# %%writefile flaskwebapp/kill_supervisor.py
import sys
import os
import signal
def write_stdout(s):
sys.stdout.write(s)
sys.stdout.flush()
# this function is modified from the code and knowledge found here: http://supervisord.org/events.html#example-event-listener-implementation
def main():
while 1:
write_stdout('READY\n')
# wait for the event on stdin that supervisord will send
line = sys.stdin.readline()
write_stdout('Killing supervisor with this event: ' + line);
try:
# supervisord writes its pid to its file from which we read it here, see supervisord.conf
pidfile = open('/tmp/supervisord.pid','r')
pid = int(pidfile.readline());
os.kill(pid, signal.SIGQUIT)
except Exception as e:
write_stdout('Could not kill supervisor: ' + e.strerror + '\n')
write_stdout('RESULT 2\nOK')
main()
# +
# %%writefile flaskwebapp/etc/supervisord.conf
[supervisord]
logfile=/tmp/supervisord.log ; (main log file;default $CWD/supervisord.log)
logfile_maxbytes=50MB ; (max main logfile bytes b4 rotation;default 50MB)
logfile_backups=10 ; (num of main logfile rotation backups;default 10)
loglevel=info ; (log level;default info; others: debug,warn,trace)
pidfile=/tmp/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
nodaemon=true ; (start in foreground if true;default false)
minfds=1024 ; (min. avail startup file descriptors;default 1024)
minprocs=200 ; (min. avail process descriptors;default 200)
[program:gunicorn]
command=bash -c "gunicorn --workers 1 -m 007 --timeout 100000 --capture-output --error-logfile - --log-level debug --log-config gunicorn_logging.conf \"wsgi:create()\""
directory=/code
redirect_stderr=true
stdout_logfile =/dev/stdout
stdout_logfile_maxbytes=0
startretries=2
startsecs=20
[program:nginx]
command=/usr/sbin/nginx -g "daemon off;"
startretries=2
startsecs=5
priority=3
[eventlistener:program_exit]
command=python kill_supervisor.py
directory=/code
events=PROCESS_STATE_FATAL
priority=2
# -
# We create a custom image based on the CUDA 9 image from NVIDIA and install all the necessary dependencies. This is in order to try and keep the size of the image as small as possible.
# %%writefile flaskwebapp/requirements.txt
Pillow==5.0.0
click==6.7
configparser==3.5.0
Flask==0.12.2
gunicorn==19.6.0
json-logging-py==0.2
MarkupSafe==1.0
olefile==0.44
requests==2.12.3
# +
# %%writefile flaskwebapp/dockerfile
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
RUN mkdir /code
WORKDIR /code
ADD . /code/
ADD etc /etc
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
cmake \
curl \
git \
nginx \
supervisor \
wget && \
rm -rf /var/lib/apt/lists/*
ENV PYTHON_VERSION=3.6
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
/opt/conda/bin/conda create -y --name py$PYTHON_VERSION python=$PYTHON_VERSION numpy scipy pandas scikit-learn && \
/opt/conda/bin/conda clean -ya
ENV PATH /opt/conda/envs/py$PYTHON_VERSION/bin:$PATH
ENV LD_LIBRARY_PATH /opt/conda/envs/py$PYTHON_VERSION/lib:/usr/local/cuda/lib64/:$LD_LIBRARY_PATH
ENV PYTHONPATH /code/:$PYTHONPATH
RUN rm /etc/nginx/sites-enabled/default && \
cp /code/nginx/app /etc/nginx/sites-available/ && \
ln -s /etc/nginx/sites-available/app /etc/nginx/sites-enabled/ && \
/opt/conda/bin/conda install -c pytorch pytorch==0.4.1 && \
pip install --upgrade pip && \
pip install torchvision==0.2.1 && \
pip install -r /code/requirements.txt && \
/opt/conda/bin/conda clean -yt
EXPOSE 80
CMD ["supervisord", "-c", "/code/etc/supervisord.conf"]
# -
# The image name below refers to our dockerhub account. If you wish to push the image to your account make sure you change the docker login.
image_name = get_key('.env', 'docker_login') + '/' +get_key('.env', 'image_repo')
application_path = 'flaskwebapp'
docker_file_location = path.join(application_path, 'dockerfile')
# Next, we build our docker image. The output of this cell is cleared from this notebook as it is quite long due to all the installations required to build the image. However, you should make sure you see 'Successfully built' and 'Successfully tagged' messages in the last line of the output when you run the cell.
!docker build -t $image_name -f $docker_file_location $application_path --no-cache
# Below we will push the image created to our dockerhub registry. Make sure you have already logged in to the appropriate dockerhub account using the docker login command. If you haven't loged in to the approrpiate dockerhub account you will get an error.
!docker push $image_name
print('Docker image name {}'.format(image_name))
# We can now [test our image locally](03_TestLocally.ipynb).

96
Pytorch/03_TestLocally.py Normal file
Просмотреть файл

@ -0,0 +1,96 @@
# ---
# jupyter:
# jupytext_format_version: '1.3'
# jupytext_formats: py:light
# kernelspec:
# display_name: Python [conda env:AKSDeploymentPytorch]
# language: python
# name: conda-env-AKSDeploymentPytorch-py
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.6.6
# ---
# # Test web application locally
#
# This notebook pulls some images and tests them against the local web app running inside the Docker container we made previously.
# %load_ext autoreload
# %autoreload 2
import os
import matplotlib.pyplot as plt
import numpy as np
from testing_utilities import to_img, img_url_to_json, plot_predictions
import requests
from dotenv import get_key
# %matplotlib inline
image_name = get_key('.env', 'docker_login') + '/' +get_key('.env', 'image_repo')
image_name
# Run the Docker conatainer in the background and open port 80. Notice we are using nvidia-docker and not docker command.
# + {"active": "ipynb", "language": "bash"}
# nvidia-docker run -p 80:80 $1
# -
# Wait a few seconds for the application to spin up and then check that everything works.
!curl 'http://0.0.0.0:80/'
!curl 'http://0.0.0.0:80/version' #reports tensorflow version
# Pull an image of a Lynx to test our local web app with.
IMAGEURL = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg"
plt.imshow(to_img(IMAGEURL))
jsonimg = img_url_to_json(IMAGEURL)
jsonimg[:100]
headers = {'content-type': 'application/json'}
# %time r = requests.post('http://0.0.0.0:80/score', data=jsonimg, headers=headers)
print(r)
r.json()
# Let's try a few more images.
images = ('https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg',
'https://upload.wikimedia.org/wikipedia/commons/3/3a/Roadster_2.5_windmills_trimmed.jpg',
'https://upload.wikimedia.org/wikipedia/commons/thumb/e/e6/Harmony_of_the_Seas_%28ship%2C_2016%29_001.jpg/1920px-Harmony_of_the_Seas_%28ship%2C_2016%29_001.jpg',
'http://yourshot.nationalgeographic.com/u/ss/fQYSUbVfts-T7pS2VP2wnKyN8wxywmXtY0-FwsgxpiZv_E9ZfPsNV5B0ER8-bOdruvNfMD5EbP4SznWz4PYn/',
'https://cdn.arstechnica.net/wp-content/uploads/2012/04/bohol_tarsier_wiki-4f88309-intro.jpg',
'http://i.telegraph.co.uk/multimedia/archive/03233/BIRDS-ROBIN_3233998b.jpg')
url = 'http://0.0.0.0:80/score'
results = [requests.post(url, data=img_url_to_json(img), headers=headers) for img in images]
plot_predictions(images, results)
# Next let's quickly check what the request response performance is for the locally running Docker container.
image_data = list(map(img_url_to_json, images)) # Retrieve the images and data
timer_results = list()
for img in image_data:
res=%timeit -r 1 -o -q requests.post(url, data=img, headers=headers)
timer_results.append(res.best)
timer_results
print('Average time taken: {0:4.2f} ms'.format(10**3 * np.mean(timer_results)))
# + {"active": "ipynb", "language": "bash"}
# docker stop $(docker ps -q)
# -
# We can now [deploy our web application on AKS](04_DeployOnAKS.ipynb).

238
Pytorch/04_DeployOnAKS.py Normal file
Просмотреть файл

@ -0,0 +1,238 @@
# ---
# jupyter:
# anaconda-cloud: {}
# jupytext_format_version: '1.3'
# jupytext_formats: py:light
# kernelspec:
# display_name: Python [conda env:AKSDeploymentPytorch]
# language: python
# name: conda-env-AKSDeploymentPytorch-py
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.6.6
# ---
# ### Deploy Web App on Azure Container Services (AKS)
# In this notebook, we will set up an Azure Container Service which will be managed by Kubernetes. We will then take the Docker image we created earlier that contains our app and deploy it to the AKS cluster. Then, we will check everything is working by sending an image to it and getting it scored.
#
# The process is split into the following steps:
# * [Define our resource names](#section1)
# * [Login to Azure](#section2)
# * [Create resource group and create AKS](#section3)
# * [Connect to AKS](#section4)
# * [Deploy our app](#section5)
#
# This guide assumes is designed to be run on linux and requires that the Azure CLI is installed.
import os
import json
from testing_utilities import write_json_to_file
from dotenv import set_key, get_key
# <a id='section1'></a>
# ## Setup
# Below are the various name definitions for the resources needed to setup AKS.
set_key('.env', 'selected_subscription', 'Team Danielle Internal')
set_key('.env', 'resource_group', 'msaksrg')
set_key('.env', 'aks_name', 'msaks')
set_key('.env', 'location', 'eastus')
# + {"tags": ["parameters"]}
# # %%writefile --append .env
# # This cell is tagged `parameters`
# # Please modify the values below as you see fit
# # If you have multiple subscriptions select the subscription you want to use
# selected_subscription = "Team Danielle Internal"
# # Resource group, name and location for AKS cluster.
# resource_group = "mabouaks"
# aks_name = "mabouaks"
# location = "eastus"
# -
image_name = get_key('.env', 'docker_login') + '/' +get_key('.env', 'image_repo')
# <a id='section2'></a>
# ## Azure account login
# If you are not already logged in to an Azure account, the command below will initiate a login. It will pop up a browser where you can select an Azure account.
# + {"active": "ipynb", "language": "bash"}
# list=`az account list -o table`
# if [ "$list" == '[]' ] || [ "$list" == '' ]; then
# az login -o table
# else
# az account list -o table
# fi
# -
!az account set --subscription "{get_key('.env', 'selected_subscription')}"
!az account show
!az provider register -n Microsoft.ContainerService
!az provider show -n Microsoft.ContainerService
# <a id='section3'></a>
# ## Create resource group and create AKS
# ### Create resource group
# Azure encourages the use of groups to organise all the Azure components you deploy. That way it is easier to find them but also we can deleted a number of resources simply by deleting the group.
!az group create --name {get_key('.env', 'resource_group')} \
--location {get_key('.env', 'location')}
# Below, we create the AKS cluster in the resource group we created earlier. This can take up to 15 minutes.
!az aks create --resource-group {get_key('.env', 'resource_group')} \
--name {get_key('.env', 'aks_name')} \
--node-count 1 \
--generate-ssh-keys \
-s Standard_NC6
# ### Install kubectl CLI
#
# To connect to the Kubernetes cluster, we will use kubectl, the Kubernetes command-line client. To install, run the following:
!sudo az aks install-cli
# <a id='section4'></a>
# ## Connect to AKS cluster
#
# To configure kubectl to connect to the Kubernetes cluster, run the following command:
!az aks get-credentials --resource-group $resource_group --name $aks_name
# Let's verify connection by listing the nodes.
!kubectl get nodes
# Let's check the pods on our cluster.
!kubectl get pods --all-namespaces
# <a id='section5'></a>
# ## Deploy application
#
# Below we define our Kubernetes manifest file for our service and load balancer. Note that we have to specify the volume mounts to the drivers that are located on the node.
#
# +
app_template = {
"apiVersion": "apps/v1beta1",
"kind": "Deployment",
"metadata": {
"name": "azure-dl"
},
"spec":{
"replicas":1,
"template":{
"metadata":{
"labels":{
"app":"azure-dl"
}
},
"spec":{
"containers":[
{
"name": "azure-dl",
"image": image_name,
"env":[
{
"name": "LD_LIBRARY_PATH",
"value": "$LD_LIBRARY_PATH:/usr/local/nvidia/lib64:/opt/conda/envs/py3.6/lib"
}
],
"ports":[
{
"containerPort":80,
"name":"model"
}
],
"volumeMounts":[
{
"mountPath": "/usr/local/nvidia",
"name": "nvidia"
}
],
"resources":{
"requests":{
"alpha.kubernetes.io/nvidia-gpu": 1
},
"limits":{
"alpha.kubernetes.io/nvidia-gpu": 1
}
}
}
],
"volumes":[
{
"name": "nvidia",
"hostPath":{
"path":"/usr/local/nvidia"
},
},
]
}
}
}
}
service_temp = {
"apiVersion": "v1",
"kind": "Service",
"metadata": {
"name": "azure-dl"
},
"spec":{
"type": "LoadBalancer",
"ports":[
{
"port":80
}
],
"selector":{
"app":"azure-dl"
}
}
}
# -
write_json_to_file(app_template, 'az-dl.json') # We write the service template to the json file
write_json_to_file(service_temp, 'az-dl.json', mode='a') # We add the loadbelanacer template to the json file
# Let's check the manifest created.
!cat az-dl.json
# Next, we will use kubectl create command to deploy our application.
!kubectl create -f az-dl.json
# Let's check if the pod is deployed.
!kubectl get pods --all-namespaces
# If anything goes wrong you can use the commands below to observe the events on the node as well as review the logs.
!kubectl get events
pod_json = !kubectl get pods -o json
pod_dict = json.loads(''.join(pod_json))
!kubectl logs {pod_dict['items'][0]['metadata']['name']}
# It can take a few minutes for the service to populate the EXTERNAL-IP field. This will be the IP you use to call the service. You can also specify an IP to use please see the AKS documentation for further details.
!kubectl get service azure-dl
# Next, we will [test our web application](05_TestWebApp.ipynb) deployed on AKS.

88
Pytorch/05_TestWebApp.py Normal file
Просмотреть файл

@ -0,0 +1,88 @@
# ---
# jupyter:
# anaconda-cloud: {}
# jupytext_format_version: '1.3'
# jupytext_formats: py:light
# kernelspec:
# display_name: Python [conda env:AKSDeploymentPytorch]
# language: python
# name: conda-env-AKSDeploymentPytorch-py
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.6.6
# ---
# ### Test deployed web application
# This notebook pulls some images and tests them against the deployed web application.
import matplotlib.pyplot as plt
import numpy as np
from testing_utilities import img_url_to_json, to_img, plot_predictions
import requests
import json
# %matplotlib inline
service_json = !kubectl get service azure-dl -o json
service_dict = json.loads(''.join(service_json))
app_url = service_dict['status']['loadBalancer']['ingress'][0]['ip']
scoring_url = 'http://{}/score'.format(app_url)
version_url = 'http://{}/version'.format(app_url)
# Quickly check the web application is working
!curl $version_url # Reports the Tensorflow Version
# Pull an image of a Lynx to test it
IMAGEURL = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg"
plt.imshow(to_img(IMAGEURL))
# headers = {'content-type': 'application/json','X-Marathon-App-Id': app_id}
headers = {'content-type': 'application/json'}
jsonimg = img_url_to_json(IMAGEURL)
r = requests.post(scoring_url, data=jsonimg, headers=headers) # Run the request twice since the first time takes a
# little longer due to the loading of the model
# %time r = requests.post(scoring_url, data=jsonimg, headers=headers)
r.json()
# From the results above we can see that the model correctly classifies this as an Lynx.
# The computation took around 70 ms and the whole round trip around 240 ms. The round trip time will depend on where the resuests are being made.
# Lets try a few more images
images = ('https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg',
'https://upload.wikimedia.org/wikipedia/commons/3/3a/Roadster_2.5_windmills_trimmed.jpg',
'https://upload.wikimedia.org/wikipedia/commons/thumb/e/e6/Harmony_of_the_Seas_%28ship%2C_2016%29_001.jpg/1920px-Harmony_of_the_Seas_%28ship%2C_2016%29_001.jpg',
'http://yourshot.nationalgeographic.com/u/ss/fQYSUbVfts-T7pS2VP2wnKyN8wxywmXtY0-FwsgxpiZv_E9ZfPsNV5B0ER8-bOdruvNfMD5EbP4SznWz4PYn/',
'https://cdn.arstechnica.net/wp-content/uploads/2012/04/bohol_tarsier_wiki-4f88309-intro.jpg',
'http://i.telegraph.co.uk/multimedia/archive/03233/BIRDS-ROBIN_3233998b.jpg')
results = [requests.post(scoring_url, data=img_url_to_json(img), headers=headers) for img in images]
plot_predictions(images, results)
# The labels predicted by our model seem to be consistent with the images supplied.
# Next lets quickly check what the request response performance is for our deployed model.
image_data = list(map(img_url_to_json, images)) # Retrieve the images and data
timer_results = list()
for img in image_data:
res=%timeit -r 1 -o -q requests.post(scoring_url, data=img, headers=headers)
timer_results.append(res.best)
timer_results
print('Average time taken: {0:4.2f} ms'.format(10**3 * np.mean(timer_results)))
# We have tested that the model works and we can mode on to the [next notebook to get sense of its throughput](06_SpeedTestWebApp.ipynb)

Просмотреть файл

@ -0,0 +1,121 @@
# ---
# jupyter:
# anaconda-cloud: {}
# jupytext_format_version: '1.3'
# jupytext_formats: py:light
# kernelspec:
# display_name: Python [conda env:AKSDeploymentPytorch]
# language: python
# name: conda-env-AKSDeploymentPytorch-py
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.6.6
# ---
# ### Load Test deployed web application
# This notebook pulls some images and tests them against the deployed web application. We submit requests asychronously which should reduce the contribution of latency.
# +
import asyncio
import json
import urllib.request
from timeit import default_timer
import aiohttp
import matplotlib.pyplot as plt
from testing_utilities import to_img, gen_variations_of_one_image
from tqdm import tqdm
# %matplotlib inline
# -
print(aiohttp.__version__)
# We will test our deployed service with 100 calls. We will only have 4 requests concurrently at any time. We have only deployed one pod on one node and increasing the number of concurrent calls does not really increase throughput. Feel free to try different values and see how the service responds.
NUMBER_OF_REQUESTS = 100 # Total number of requests
CONCURRENT_REQUESTS = 4 # Number of requests at a time
# Get the IP address of our service
service_json = !kubectl get service azure-dl -o json
service_dict = json.loads(''.join(service_json))
app_url = service_dict['status']['loadBalancer']['ingress'][0]['ip']
scoring_url = 'http://{}/score'.format(app_url)
version_url = 'http://{}/version'.format(app_url)
!curl $version_url # Reports the Tensorflow Version
IMAGEURL = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Lynx_lynx_poing.jpg/220px-Lynx_lynx_poing.jpg"
plt.imshow(to_img(IMAGEURL))
# Here, we use varitions of the same image to test the service.
url_list = [[scoring_url, jsonimg] for jsonimg in gen_variations_of_one_image(IMAGEURL, NUMBER_OF_REQUESTS)]
def decode(result):
return json.loads(result.decode("utf-8"))
async def fetch(url, session, data, headers):
start_time = default_timer()
async with session.request('post', url, data=data, headers=headers) as response:
resp = await response.read()
elapsed = default_timer() - start_time
return resp, elapsed
async def bound_fetch(sem, url, session, data, headers):
# Getter function with semaphore.
async with sem:
return await fetch(url, session, data, headers)
async def await_with_progress(coros):
results=[]
for f in tqdm(asyncio.as_completed(coros), total=len(coros)):
result = await f
results.append((decode(result[0]),result[1]))
return results
async def run(url_list, num_concurrent=CONCURRENT_REQUESTS):
headers = {'content-type': 'application/json'}
tasks = []
# create instance of Semaphore
sem = asyncio.Semaphore(num_concurrent)
# Create client session that will ensure we dont open new connection
# per each request.
async with aiohttp.ClientSession() as session:
for url, data in url_list:
# pass Semaphore and session to every POST request
task = asyncio.ensure_future(bound_fetch(sem, url, session, data, headers))
tasks.append(task)
return await await_with_progress(tasks)
# Below we run the 100 requests against our deployed service
loop = asyncio.get_event_loop()
start_time = default_timer()
complete_responses = loop.run_until_complete(asyncio.ensure_future(run(url_list, num_concurrent=CONCURRENT_REQUESTS)))
elapsed = default_timer() - start_time
print('Total Elapsed {}'.format(elapsed))
print('Avg time taken {0:4.2f} ms'.format(1000*elapsed/len(url_list)))
# Below we can see the output of some of our calls
complete_responses[:3]
num_succesful=[i[0]['result'][0]['image'][0][0] for i in complete_responses].count('n02127052 lynx, catamount')
print('Succesful {} out of {}'.format(num_succesful, len(url_list)))
# Example response
plt.imshow(to_img(IMAGEURL))
complete_responses[0]
# To tear down the cluster and all related resources go to the [tear down the cluster](07_TearDown.ipynb) notebook.

40
Pytorch/07_TearDown.py Normal file
Просмотреть файл

@ -0,0 +1,40 @@
# ---
# jupyter:
# jupytext_format_version: '1.3'
# jupytext_formats: py:light
# kernelspec:
# display_name: Python [conda env:AKSDeploymentPytorch]
# language: python
# name: conda-env-AKSDeploymentPytorch-py
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.6.6
# ---
# # Tear it all down
# Once you are done with your cluster you can use the following two commands to destroy it all.
from dotenv import get_key
# Once you are done with your cluster you can use the following two commands to destroy it all. First, delete the application.
!kubectl delete -f az-dl.json
# Next, you delete the AKS cluster. This step may take a few minutes.
get_key('.env', 'resource_group')
!az aks delete -n {get_key('.env', 'aks_name')} \
-g {get_key('.env', 'resource_group')} \
-y
# Finally, you should delete the resource group. This also deletes the AKS cluster and can be used instead of the above command if the resource group is only used for this purpose.
!az group delete --name {get_key('.env', 'resource_group')} -y