diff --git a/Keras_Tensorflow/04_DeployOnAKS.ipynb b/Keras_Tensorflow/04_DeployOnAKS.ipynb index bd46c62..2b1ead1 100644 --- a/Keras_Tensorflow/04_DeployOnAKS.ipynb +++ b/Keras_Tensorflow/04_DeployOnAKS.ipynb @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -51,17 +51,12 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 2, + "execution_count": null, "metadata": { "tags": [ "parameters" ] }, -======= - "execution_count": null, - "metadata": {}, ->>>>>>> Stashed changes "outputs": [], "source": [ "%%writefile --append .env\n", @@ -79,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -109,10 +104,10 @@ "source": [ "%%bash\n", "list=`az account list -o table`\n", - "if [ \"$list\" == '[]' ] || [ \"$list\" == '' ]; then \n", + "if [ \\\"$list\\\" == '[]' ] || [ \\\"$list\\\" == '' ]; then\n", " az login -o table\n", "else\n", - " az account list -o table \n", + " az account list -o table\n", "fi" ] }, @@ -222,24 +217,9 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mDownloading client to /usr/local/bin/kubectl from https://storage.googleapis.com/kubernetes-release/release/v1.11.1/bin/linux/amd64/kubectl\u001b[0m\n", - "\u001b[33mPlease ensure that /usr/local/bin is in your search PATH, so the `kubectl` command can be found.\u001b[0m\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!sudo env \"PATH=$PATH\" az aks install-cli" ] @@ -276,24 +256,9 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NAME STATUS ROLES AGE VERSION\r\n", - "aks-nodepool1-28016997-0 Ready agent 60d v1.9.6\r\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!kubectl get nodes" ] @@ -307,31 +272,9 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NAMESPACE NAME READY STATUS RESTARTS AGE\r\n", - "kube-system azureproxy-79c5db744-r5ggd 1/1 Running 2 60d\r\n", - "kube-system heapster-55f855b47-4m7xr 2/2 Running 0 60d\r\n", - "kube-system kube-dns-v20-7c556f89c5-4z4z6 3/3 Running 0 60d\r\n", - "kube-system kube-dns-v20-7c556f89c5-mp5fh 3/3 Running 0 60d\r\n", - "kube-system kube-proxy-k8t2c 1/1 Running 0 60d\r\n", - "kube-system kube-svc-redirect-z6ppp 1/1 Running 8 60d\r\n", - "kube-system kubernetes-dashboard-546f987686-8krxm 1/1 Running 2 60d\r\n", - "kube-system tunnelfront-695bcbdc68-t4l8t 1/1 Running 34 60d\r\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!kubectl get pods --all-namespaces" ] @@ -352,11 +295,7 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 11, -======= "execution_count": null, ->>>>>>> Stashed changes "metadata": {}, "outputs": [], "source": [ @@ -442,24 +381,7 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 12, -======= "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "def write_json_to_file(json_dict, filename, mode='w'):\n", - " with open(filename, mode) as outfile:\n", - " json.dump(json_dict, outfile, indent=4, sort_keys=True)\n", - " outfile.write('\\n\\n')" - ] - }, - { - "cell_type": "code", - "execution_count": null, ->>>>>>> Stashed changes "metadata": {}, "outputs": [], "source": [ @@ -468,11 +390,7 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 13, -======= "execution_count": null, ->>>>>>> Stashed changes "metadata": {}, "outputs": [], "source": [ @@ -488,101 +406,9 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\r\n", - " \"apiVersion\": \"apps/v1beta1\",\r\n", - " \"kind\": \"Deployment\",\r\n", - " \"metadata\": {\r\n", - " \"name\": \"azure-dl\"\r\n", - " },\r\n", - " \"spec\": {\r\n", - " \"replicas\": 1,\r\n", - " \"template\": {\r\n", - " \"metadata\": {\r\n", - " \"labels\": {\r\n", - " \"app\": \"azure-dl\"\r\n", - " }\r\n", - " },\r\n", - " \"spec\": {\r\n", - " \"containers\": [\r\n", - " {\r\n", - " \"env\": [\r\n", - " {\r\n", - " \"name\": \"LD_LIBRARY_PATH\",\r\n", - " \"value\": \"$LD_LIBRARY_PATH:/usr/local/nvidia/lib64:/opt/conda/envs/py3.5/lib\"\r\n", - " }\r\n", - " ],\r\n", - " \"image\": \"caia/kerastf-gpu\",\r\n", - " \"name\": \"azure-dl\",\r\n", - " \"ports\": [\r\n", - " {\r\n", - " \"containerPort\": 80,\r\n", - " \"name\": \"model\"\r\n", - " }\r\n", - " ],\r\n", - " \"resources\": {\r\n", - " \"limits\": {\r\n", - " \"alpha.kubernetes.io/nvidia-gpu\": 1\r\n", - " },\r\n", - " \"requests\": {\r\n", - " \"alpha.kubernetes.io/nvidia-gpu\": 1\r\n", - " }\r\n", - " },\r\n", - " \"volumeMounts\": [\r\n", - " {\r\n", - " \"mountPath\": \"/usr/local/nvidia\",\r\n", - " \"name\": \"nvidia\"\r\n", - " }\r\n", - " ]\r\n", - " }\r\n", - " ],\r\n", - " \"volumes\": [\r\n", - " {\r\n", - " \"hostPath\": {\r\n", - " \"path\": \"/usr/local/nvidia\"\r\n", - " },\r\n", - " \"name\": \"nvidia\"\r\n", - " }\r\n", - " ]\r\n", - " }\r\n", - " }\r\n", - " }\r\n", - "}\r\n", - "\r\n", - "{\r\n", - " \"apiVersion\": \"v1\",\r\n", - " \"kind\": \"Service\",\r\n", - " \"metadata\": {\r\n", - " \"name\": \"azure-dl\"\r\n", - " },\r\n", - " \"spec\": {\r\n", - " \"ports\": [\r\n", - " {\r\n", - " \"port\": 80\r\n", - " }\r\n", - " ],\r\n", - " \"selector\": {\r\n", - " \"app\": \"azure-dl\"\r\n", - " },\r\n", - " \"type\": \"LoadBalancer\"\r\n", - " }\r\n", - "}\r\n", - "\r\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!cat az-dl.json" ] @@ -596,24 +422,9 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "deployment.apps/azure-dl created\n", - "service/azure-dl created\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!kubectl create -f az-dl.json" ] @@ -627,32 +438,9 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NAMESPACE NAME READY STATUS RESTARTS AGE\r\n", - "default azure-dl-5f6b7dfb6f-kbxz6 1/1 Running 0 6m\r\n", - "kube-system azureproxy-79c5db744-r5ggd 1/1 Running 2 60d\r\n", - "kube-system heapster-55f855b47-4m7xr 2/2 Running 0 60d\r\n", - "kube-system kube-dns-v20-7c556f89c5-4z4z6 3/3 Running 0 60d\r\n", - "kube-system kube-dns-v20-7c556f89c5-mp5fh 3/3 Running 0 60d\r\n", - "kube-system kube-proxy-k8t2c 1/1 Running 0 60d\r\n", - "kube-system kube-svc-redirect-z6ppp 1/1 Running 8 60d\r\n", - "kube-system kubernetes-dashboard-546f987686-8krxm 1/1 Running 2 60d\r\n", - "kube-system tunnelfront-695bcbdc68-t4l8t 1/1 Running 34 60d\r\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!kubectl get pods --all-namespaces" ] @@ -666,41 +454,9 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LAST SEEN FIRST SEEN COUNT NAME KIND SUBOBJECT TYPE REASON SOURCE MESSAGE\r\n", - "46m 46m 1 aks-nodepool1-28016997-0.1548a985ff48b23b Node Normal RegisteredNode node-controller Node aks-nodepool1-28016997-0 event: Registered Node aks-nodepool1-28016997-0 in Controller\r\n", - "35m 35m 1 aks-nodepool1-28016997-0.1548aa2258f34dc5 Node Normal RegisteredNode node-controller Node aks-nodepool1-28016997-0 event: Registered Node aks-nodepool1-28016997-0 in Controller\r\n", - "6m 6m 1 azure-dl-5f6b7dfb6f-kbxz6.1548abbc1c661966 Pod Normal Scheduled default-scheduler Successfully assigned azure-dl-5f6b7dfb6f-kbxz6 to aks-nodepool1-28016997-0\r\n", - "6m 6m 1 azure-dl-5f6b7dfb6f-kbxz6.1548abbc256b0973 Pod Normal SuccessfulMountVolume kubelet, aks-nodepool1-28016997-0 MountVolume.SetUp succeeded for volume \"nvidia\" \r\n", - "6m 6m 1 azure-dl-5f6b7dfb6f-kbxz6.1548abbc2754e88a Pod Normal SuccessfulMountVolume kubelet, aks-nodepool1-28016997-0 MountVolume.SetUp succeeded for volume \"default-token-crgnj\" \r\n", - "4m 6m 2 azure-dl-5f6b7dfb6f-kbxz6.1548abbc5412d897 Pod spec.containers{azure-dl} Normal Pulling kubelet, aks-nodepool1-28016997-0 pulling image \"caia/kerastf-gpu\"\r\n", - "4m 4m 1 azure-dl-5f6b7dfb6f-kbxz6.1548abd437671289 Pod spec.containers{azure-dl} Warning Failed kubelet, aks-nodepool1-28016997-0 Failed to pull image \"caia/kerastf-gpu\": rpc error: code = Canceled desc = context canceled\r\n", - "4m 4m 1 azure-dl-5f6b7dfb6f-kbxz6.1548abd437675041 Pod spec.containers{azure-dl} Warning Failed kubelet, aks-nodepool1-28016997-0 Error: ErrImagePull\r\n", - "4m 4m 1 azure-dl-5f6b7dfb6f-kbxz6.1548abd479665ad8 Pod Normal SandboxChanged kubelet, aks-nodepool1-28016997-0 Pod sandbox changed, it will be killed and re-created.\r\n", - "4m 4m 3 azure-dl-5f6b7dfb6f-kbxz6.1548abd4bccc3504 Pod spec.containers{azure-dl} Normal BackOff kubelet, aks-nodepool1-28016997-0 Back-off pulling image \"caia/kerastf-gpu\"\r\n", - "4m 4m 3 azure-dl-5f6b7dfb6f-kbxz6.1548abd4bccc6574 Pod spec.containers{azure-dl} Warning Failed kubelet, aks-nodepool1-28016997-0 Error: ImagePullBackOff\r\n", - "2m 2m 1 azure-dl-5f6b7dfb6f-kbxz6.1548abf021a8ab22 Pod spec.containers{azure-dl} Normal Pulled kubelet, aks-nodepool1-28016997-0 Successfully pulled image \"caia/kerastf-gpu\"\r\n", - "2m 2m 1 azure-dl-5f6b7dfb6f-kbxz6.1548abf02e88d586 Pod spec.containers{azure-dl} Normal Created kubelet, aks-nodepool1-28016997-0 Created container\r\n", - "2m 2m 1 azure-dl-5f6b7dfb6f-kbxz6.1548abf037241533 Pod spec.containers{azure-dl} Normal Started kubelet, aks-nodepool1-28016997-0 Started container\r\n", - "6m 6m 1 azure-dl-5f6b7dfb6f.1548abbc1bbcf974 ReplicaSet Normal SuccessfulCreate replicaset-controller Created pod: azure-dl-5f6b7dfb6f-kbxz6\r\n", - "6m 6m 1 azure-dl.1548abbc1aaaccda Deployment Normal ScalingReplicaSet deployment-controller Scaled up replica set azure-dl-5f6b7dfb6f to 1\r\n", - "6m 6m 1 azure-dl.1548abbc284ca303 Service Normal EnsuringLoadBalancer service-controller Ensuring load balancer\r\n", - "2m 2m 1 azure-dl.1548abeeedade8ad Service Normal EnsuredLoadBalancer service-controller Ensured load balancer\r\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!kubectl get events" ] @@ -714,11 +470,7 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 19, -======= "execution_count": null, ->>>>>>> Stashed changes "metadata": {}, "outputs": [], "source": [ @@ -728,113 +480,18 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2018-08-07 17:58:41,382 CRIT Supervisor running as root (no user in config file)\r\n", - "2018-08-07 17:58:41,384 INFO supervisord started with pid 1\r\n", - "2018-08-07 17:58:42,387 INFO spawned: 'program_exit' with pid 9\r\n", - "2018-08-07 17:58:42,388 INFO spawned: 'nginx' with pid 10\r\n", - "2018-08-07 17:58:42,390 INFO spawned: 'gunicorn' with pid 11\r\n", - "2018-08-07 17:58:43,422 INFO success: program_exit entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)\r\n", - "2018-08-07 17:58:44.007138: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\r\n", - "2018-08-07 17:58:44.191739: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties: \r\n", - "name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\r\n", - "pciBusID: ddde:00:00.0\r\n", - "totalMemory: 11.17GiB freeMemory: 11.10GiB\r\n", - "2018-08-07 17:58:44.191801: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0\r\n", - "2018-08-07 17:58:44.533627: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:\r\n", - "2018-08-07 17:58:44.533679: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958] 0 \r\n", - "2018-08-07 17:58:44.533694: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0: N \r\n", - "2018-08-07 17:58:44.533952: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10761 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: ddde:00:00.0, compute capability: 3.7)\r\n", - "2018-08-07 17:58:47,537 INFO success: nginx entered RUNNING state, process has stayed up for > than 5 seconds (startsecs)\r\n", - "2018-08-07 17:59:02,553 INFO success: gunicorn entered RUNNING state, process has stayed up for > than 20 seconds (startsecs)\r\n", - "Downloading data from https://github.com/adamcasson/resnet152/releases/download/v0.1/resnet152_weights_tf.h5\r\n", - "\r\n", - " 8192/243179624 [..............................] - ETA: 2s\r\n", - " 2670592/243179624 [..............................] - ETA: 4s\r\n", - " 8912896/243179624 [>.............................] - ETA: 2s\r\n", - " 16171008/243179624 [>.............................] - ETA: 2s\r\n", - " 23740416/243179624 [=>............................] - ETA: 1s\r\n", - " 32129024/243179624 [==>...........................] - ETA: 1s\r\n", - " 40280064/243179624 [===>..........................] - ETA: 1s\r\n", - " 48472064/243179624 [====>.........................] - ETA: 1s\r\n", - " 56614912/243179624 [=====>........................] - ETA: 1s\r\n", - " 64569344/243179624 [======>.......................] - ETA: 1s\r\n", - " 70533120/243179624 [=======>......................] - ETA: 1s\r\n", - " 74948608/243179624 [========>.....................] - ETA: 1s\r\n", - " 77963264/243179624 [========>.....................] - ETA: 1s\r\n", - " 83402752/243179624 [=========>....................] - ETA: 1s\r\n", - " 88875008/243179624 [=========>....................] - ETA: 1s\r\n", - " 95723520/243179624 [==========>...................] - ETA: 1s\r\n", - "101130240/243179624 [===========>..................] - ETA: 1s\r\n", - "106102784/243179624 [============>.................] - ETA: 1s\r\n", - "110903296/243179624 [============>.................] - ETA: 1s\r\n", - "116129792/243179624 [=============>................] - ETA: 1s\r\n", - "121176064/243179624 [=============>................] - ETA: 1s\r\n", - "126164992/243179624 [==============>...............] - ETA: 0s\r\n", - "130932736/243179624 [===============>..............] - ETA: 1s\r\n", - "137437184/243179624 [===============>..............] - ETA: 0s\r\n", - "144523264/243179624 [================>.............] - ETA: 0s\r\n", - "152428544/243179624 [=================>............] - ETA: 0s\r\n", - "158539776/243179624 [==================>...........] - ETA: 0s\r\n", - "163553280/243179624 [===================>..........] - ETA: 0s\r\n", - "168517632/243179624 [===================>..........] - ETA: 0s\r\n", - "173539328/243179624 [====================>.........] - ETA: 0s\r\n", - "178610176/243179624 [=====================>........] - ETA: 0s\r\n", - "183623680/243179624 [=====================>........] - ETA: 0s\r\n", - "188628992/243179624 [======================>.......] - ETA: 0s\r\n", - "193658880/243179624 [======================>.......] - ETA: 0s\r\n", - "196231168/243179624 [=======================>......] - ETA: 0s\r\n", - "203907072/243179624 [========================>.....] - ETA: 0s\r\n", - "210108416/243179624 [========================>.....] - ETA: 0s\r\n", - "215138304/243179624 [=========================>....] - ETA: 0s\r\n", - "220168192/243179624 [==========================>...] - ETA: 0s\r\n", - "225148928/243179624 [==========================>...] - ETA: 0s\r\n", - "230211584/243179624 [===========================>..] - ETA: 0s\r\n", - "235200512/243179624 [============================>.] - ETA: 0s\r\n", - "239239168/243179624 [============================>.] - ETA: 0s\r\n", - "243187712/243179624 [==============================] - 2s 0us/step\r\n", - "{\"path\": \"/code/driver.py\", \"message\": \"Model loading time: 34161.21 ms\", \"timestamp\": \"2018-08-07T17:59:18.129430Z\", \"logger\": \"model_driver\", \"host\": \"azure-dl-5f6b7dfb6f-kbxz6\", \"level\": \"INFO\", \"stack_info\": null, \"tags\": []}\r\n", - "Initialising\r\n", - "{\"msg\": \" * Running on %s://%s:%d/ %s\", \"path\": \"/opt/conda/envs/py3.5/lib/python3.5/site-packages/werkzeug/_internal.py\", \"message\": \" * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)\", \"timestamp\": \"2018-08-07T17:59:18.134555Z\", \"logger\": \"werkzeug\", \"host\": \"azure-dl-5f6b7dfb6f-kbxz6\", \"level\": \"INFO\", \"stack_info\": null, \"tags\": []}\r\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!kubectl logs {pod_dict['items'][0]['metadata']['name']}" ] }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE\r\n", - "azure-dl 1 1 1 1 6m\r\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!kubectl get deployment" ] @@ -848,24 +505,9 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE\r\n", - "azure-dl LoadBalancer 10.0.86.30 40.117.74.122 80:31341/TCP 6m\r\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> Stashed changes "source": [ "!kubectl get service azure-dl" ] @@ -874,52 +516,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ -<<<<<<< Updated upstream "Next, we will [test our web application deployed on AKS](05_TestWebApp.ipynb)." -======= - "Next, we will [test our web application deployed on AKS](05_TestWebApp.ipynb). Once, we are done with all the notebooks of the tutorial, below instructions can be used to delete the cluster and free resources." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Tear it all down" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once you are done with your cluster you can use the following two commands to destroy it all." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!kubectl delete -f az-dl.json" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!az aks delete -n $aks_name -g $resource_group -y" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!az group delete --name $resource_group -y" ->>>>>>> Stashed changes ] } ],