{ "cells": [ { "cell_type": "markdown", "source": [ "# Impact Investigation\n", "1. Run all cells.\n", "1. View report at the bottom." ], "metadata": {} }, { "cell_type": "code", "source": [ "su = \"tfs-cus-1\"\n", "start = \"2019-07-20T16:00:00.0000000Z\"\n", "end = \"2019-07-20T16:33:36.0000000Z\"\n", "url = \"https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK/delays.ipynb\"\n", "baseUrl = \"https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK\"\n", "service = \"tfs\"\n", "hub = \"Build\"\n", "locationName = \"tfsprodcus1\"" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": false, "inputHidden": false, "outputHidden": false, "tags": [ "parameters" ] } }, { "cell_type": "code", "source": [ "%%capture\n", "!pip install nimport azure-kusto-notebooks" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": false, "inputHidden": false, "outputHidden": false } }, { "cell_type": "code", "source": [ "# Import the things we use\n", "\n", "# Note you can also use kql https://docs.microsoft.com/en-us/azure/data-explorer/kqlmagic\n", "# %kql is single line magic\n", "# %%kql is cell magic\n", "\n", "# https://nbviewer.jupyter.org/github/ipython/ipython/blob/4.0.x/examples/IPython%20Kernel/Rich%20Output.ipynb#HTML\n", "# https://ipython.readthedocs.io/en/stable/inte/magics.html\n", "from IPython.display import display, HTML, Markdown, Javascript, clear_output\n", "\n", "# http://pandas-docs.github.io/pandas-docs-travis/user_guide/reshaping.html\n", "import pandas as pd\n", "pd.options.display.html.table_schema = True\n", "from pandas import Series, DataFrame\n", "from datetime import datetime, timedelta, timezone\n", "from urllib.parse import urlencode, quote_plus\n", "from requests.utils import requote_uri\n", "import time\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "from nimport.utils import tokenize, open_nb\n", "import json\n", "import os\n", "import calendar as cal\n", "import concurrent.futures\n", "from azure.kusto.notebooks import utils as akn" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": false, "inputHidden": false, "outputHidden": false } }, { "cell_type": "code", "source": [ "params = {\n", " \"su\": su,\n", " \"start\": start,\n", " \"end\": end,\n", " \"url\": url,\n", " \"baseUrl\": baseUrl,\n", " \"service\": service\n", "}\n", "root = 'devops-pipelines' if os.path.basename(os.getcwd()) != 'devops-pipelines' else ''\n", "queryPath = os.path.join(root, 'queries')" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": false, "inputHidden": false, "outputHidden": false } }, { "cell_type": "code", "source": [ "# authenticate kusto client\n", "# you will need to copy the token into a browser window for AAD auth. \n", "client = akn.get_client('https://vso.kusto.windows.net', 'VSO')" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": false, "inputHidden": false, "outputHidden": false } }, { "cell_type": "code", "source": [ "# authenticate kusto client\n", "# you will need to copy the token into a browser window for AAD auth. \n", "icm_client = akn.get_client('https://icmcluster.kusto.windows.net', 'IcMDataWarehouse')" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": false, "outputHidden": false, "inputHidden": false } }, { "cell_type": "code", "source": [ "q_loc = os.path.join(queryPath, \"LocationName.csl\")\n", "q_whatChanged = os.path.join(queryPath, \"WhatChanged.csl\")\n", "q_vipSwap = os.path.join(queryPath, \"VIPSwap.csl\")\n", "\n", "impactPath = os.path.join(queryPath, \"impact\")\n", "q_commands = os.path.join(impactPath, \"CommandsReason.csl\")\n", "q_commandsAT = os.path.join(impactPath, \"CommandsAT.csl\")\n", "q_commandsDb = os.path.join(impactPath, \"CommandsDb.csl\")\n", "with concurrent.futures.ThreadPoolExecutor() as executor:\n", " # materialize location name immediately as we need this for other queries\n", " p1 = executor.submit(akn.execute_file, client, 'VSO', q_loc, params)\n", " locationNameResult = akn.to_dataframe_from_future(p1)\n", " locationName = locationNameResult[\"Tenant\"][0]\n", " params[\"locationName\"] = locationName\n", " p2 = executor.submit(akn.execute_file, client, 'VSO', q_whatChanged, params)\n", " p3 = executor.submit(akn.execute_file, client, 'VSO', q_vipSwap, params)\n", " \n", " p4 = executor.submit(akn.execute_file, client, 'VSO', q_commandsAT, params)\n", " p5 = executor.submit(akn.execute_file, client, 'VSO', q_commandsDb, params) \n", " p6 = executor.submit(akn.execute_file, client, 'VSO', q_commands, params)\n", " \n", " p7 = executor.submit(akn.execute_file, icm_client, 'IcmDataWarehouse', \n", " os.path.join(queryPath, 'ActiveIncidents.csl'), params)\n", "\n", "q_whatChanged_df = akn.to_dataframe_from_future(p2)\n", "\n", "vipSwapResultDf = akn.to_dataframe_from_future(p3)\n", "\n", "q_commandsAT_df = akn.to_dataframe_from_future(p4)\n", "\n", "q_commandsDb_df = akn.to_dataframe_from_future(p5)\n", "\n", "q_commands_df = akn.to_dataframe_from_future(p6)\n", "\n", "q_activeIncidentsResultDf = akn.to_dataframe_from_future(p7)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": false, "inputHidden": false, "outputHidden": false } }, { "cell_type": "code", "source": [ "print('=' * 50)\n", "print('Report!')\n", "print('=' * 50, '\\n\\n')\n", "\n", "# jarvis params\n", "jarvisParams = {\n", " 'su': su, \n", " 'start': akn.get_time(start, -10), \n", " 'end': akn.get_time(end, 10), \n", " 'service': service \n", "}\n", "\n", "# jarvis\n", "jarvisLink = \"\"\"https://jarvis-west.dc.ad.msft.net/dashboard/VSO-ServiceInsights/DevOpsReports/TFS DevOpsReports\"\"\" \\\n", " \"\"\"?overrides=[{\"query\":\"//*[id='Service']\",\"key\":\"value\",\"replacement\":\"%(service)s\"},\"\"\" \\\n", " \"\"\"{\"query\":\"//*[id='RoleInstance']\",\"key\":\"value\",\"replacement\":\"\"},\"\"\" \\\n", " \"\"\"{\"query\":\"//*[id='ScaleUnit']\",\"key\":\"value\",\"replacement\":\"%(su)s\"}]\"\"\" \\\n", " \"\"\"&globalStartTime=%(start)s&globalEndTime=%(end)s&pinGlobalTimeRange=true\"\"\" % jarvisParams;\n", "print('Jarvis dashboard link:\\n', requote_uri(jarvisLink), '\\n')\n", "\n", "#\n", "# vip swap\n", "print()\n", "print('Vip Swap? =============================')\n", "if len(vipSwapResultDf.index) > 0:\n", " viptime = vipSwapResultDf[\"TIMESTAMP\"][0]\n", " starttime = akn.to_datetime(start)\n", " delta = starttime.replace(tzinfo=None) - viptime.replace(tzinfo=None)\n", " if delta.total_seconds() > 0:\n", " print(\"\"\"VIP SWAP happened: %s days %s hours %s minutes ago (%s) (issue start: %s)\"\"\" % (delta.days, delta.seconds//3600, (delta.seconds//60) % 60, viptime, start))\n", " else:\n", " print('...no swaps recorded in the given time range')\n", "else:\n", " print('...no swaps recorded in the given time range')\n", " \n", "# slow failed reason analysis\n", "print()\n", "print('Is it slow commands or failed commands? =============================')\n", "freq = q_commands_df[\"Frequency\"]\n", "coefficientOfVariance = freq.std()/freq.mean()\n", "failedCount = q_commands_df[q_commands_df[\"Reason\"] == \"failed\"][\"Frequency\"].values[0]\n", "slowCount = q_commands_df[q_commands_df[\"Reason\"] == \"slow\"][\"Frequency\"].values[0]\n", "reason = \"failed or slow\"\n", "if coefficientOfVariance > 0.5:\n", " if failedCount > slowCount:\n", " reason = \"failed\"\n", " else:\n", " reason = \"slow\"\n", "else:\n", " print(\"Slow and failed commands are too close, both might be contributing...\")\n", "if reason:\n", " print(\"Probably due to %s commands; Failed - %s, Slow - %s\" % (reason, failedCount, slowCount))\n", "\n", "# slow failed reason for AT?\n", "print()\n", "print('Is it %s because of AT? =============================' % (reason))\n", "failed = q_commandsAT_df[q_commandsAT_df[\"Reason\"] == \"failed\"]\n", "slow = q_commandsAT_df[q_commandsAT_df[\"Reason\"] == \"slow\"]\n", "data = q_commandsAT_df\n", "if reason == \"failed\":\n", " data = failed\n", "elif reason == \"slow\":\n", " data = slow\n", "\n", "coefficientOfVariance = data[\"Frequency\"].std()/data[\"Frequency\"].mean()\n", " \n", "if coefficientOfVariance > 0.5:\n", " print(\"Found variance in AT's for %s commands\" % (reason))\n", " print(data.head(30))\n", "else:\n", " print(\"Seems be same across AT's for %s commands\" % (reason))\n", " \n", "# slow failed reason for Db?\n", "print()\n", "print('Is it %s because of Db? =============================' % (reason))\n", "failed = q_commandsDb_df[q_commandsDb_df[\"Reason\"] == \"failed\"]\n", "slow = q_commandsDb_df[q_commandsDb_df[\"Reason\"] == \"slow\"]\n", "data = q_commandsDb_df\n", "if reason == \"failed\":\n", " data = failed\n", "elif reason == \"slow\":\n", " data = slow\n", "\n", "coefficientOfVariance = data[\"Frequency\"].std()/data[\"Frequency\"].mean()\n", " \n", "if coefficientOfVariance > 0.5:\n", " print(\"Found variance in Db's for %s commands\" % (reason))\n", " print(\"Suffix '%s' to database server name\" % (\".database.windows.net\"))\n", " print(\"Prefix '%s' to database name\" % (params[\"service\"] + \"_\" + params[\"locationName\"] + \"_\"))\n", " print(data.head(30))\n", "else:\n", " print(\"Seems be same across Db's for %s commands\" % (reason)) \n", " \n", "# what changed? analysis\n", "print()\n", "print('What changed? =============================')\n", "if(len(q_whatChanged_df.index) == 0):\n", " print(\"No relevant changes found...\")\n", "else:\n", " up_prefix = \"\";\n", " mit_prefix = \"\";\n", " text = \"\";\n", " for index, row in q_whatChanged_df.iterrows():\n", " if(row.title.lower().find('upgrade') != -1):\n", " if not up_prefix:\n", " up_prefix += \"Looks like, there's upgrade in progress...\\n\";\n", " text += \"\"\"%s %s %s \\n\"\"\" % (row.TIMESTAMP, row.title, row.buildNumber);\n", " if(row.title.lower().find('mitigation') != -1):\n", " if not mit_prefix:\n", " mit_prefix += \"Looks like, there are some mitigations by health agent...\\n\";\n", " state += states[\"healthagent\"];\n", " text += \"\"\"%s %s %s\"\"\" % (row.TIMESTAMP, row.title, row.buildNumber);\n", " \n", " if text:\n", " print(up_prefix + mit_prefix + text)\n", " else:\n", " print(q_whatChanged_df)\n", " \n", "# active incidents?\n", "print()\n", "print('Active incidents? =============================')\n", "otherIncidentsCount = 0;\n", "for index, row in q_activeIncidentsResultDf.iterrows():\n", " if(row.Title.find(\"TFS Customer Impact Monitor\") == -1):\n", " otherIncidentsCount+=1;\n", " \n", "if(otherIncidentsCount > 0):\n", " print(\"We found some incidents during the time period, check if they are related...\")\n", " # styling\n", " def make_clickable(url, text):\n", " return '{0}'.format(url)\n", "\n", " newDf = q_activeIncidentsResultDf.assign(URL=[*map(lambda x: make_clickable(\"\"\"https://icm.ad.msft.net/imp/v3/incidents/details/%s/home\"\"\" % (x), \"ICMLink\"), q_activeIncidentsResultDf.IncidentId)])\n", " print(\"ICM link to copy - \" + \"https://icm.ad.msft.net/imp/v3/incidents/details/INCIDENTID/home\")\n", " print(newDf[['IncidentId','Severity','Title']])\n", "else:\n", " print(\"No active incidents that could be related are found...\") " ], "outputs": [], "execution_count": null, "metadata": { "collapsed": false, "inputHidden": false, "outputHidden": false } } ], "metadata": { "kernel_info": { "name": "python3" }, "kernelspec": { "name": "python3", "language": "python", "display_name": "Python 3" }, "language_info": { "name": "python", "version": "3.7.4", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "nteract": { "version": "0.14.5" } }, "nbformat": 4, "nbformat_minor": 0 }