560 строки
25 KiB
Plaintext
560 строки
25 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"# Orchestration delays Investigation\n",
|
|
"1. Run all cells.\n",
|
|
"1. Scroll down to see for any authentication messages\n",
|
|
"1. View report at the bottom."
|
|
],
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# These are just defaults will be overwritten if you use nimport pip\n",
|
|
"start = \"2019-10-15T20:21:54.0330000Z\"\n",
|
|
"end = \"2019-10-15T20:52:21.5370169Z\"\n",
|
|
"service = \"pipelines\"\n",
|
|
"su = \"pipelines-ghub-eus2-2\"\n",
|
|
"hub = \"Actions\"\n",
|
|
"url = \"https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK/delays.ipynb\"\n",
|
|
"baseUrl = \"https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK\""
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false,
|
|
"tags": [
|
|
"parameters"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"%%capture\n",
|
|
"!pip install --upgrade nimport azure-kusto-notebooks"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false,
|
|
"tags": [
|
|
"debug"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Import the things we use\n",
|
|
"\n",
|
|
"# Note you can also use kql https://docs.microsoft.com/en-us/azure/data-explorer/kqlmagic\n",
|
|
"# %kql is single line magic\n",
|
|
"# %%kql is cell magic\n",
|
|
"\n",
|
|
"# https://nbviewer.jupyter.org/github/ipython/ipython/blob/4.0.x/examples/IPython%20Kernel/Rich%20Output.ipynb#HTML\n",
|
|
"# https://ipython.readthedocs.io/en/stable/inte/magics.html\n",
|
|
"from IPython.display import display, HTML, Markdown, Javascript, clear_output\n",
|
|
"\n",
|
|
"# http://pandas-docs.github.io/pandas-docs-travis/user_guide/reshaping.html\n",
|
|
"import pandas as pd\n",
|
|
"pd.options.display.html.table_schema = True\n",
|
|
"from pandas import Series, DataFrame\n",
|
|
"from datetime import datetime, timedelta, timezone\n",
|
|
"from urllib.parse import urlencode, quote_plus\n",
|
|
"from requests.utils import requote_uri\n",
|
|
"import time\n",
|
|
"import numpy as np\n",
|
|
"from matplotlib import pyplot as plt\n",
|
|
"from nimport.utils import tokenize, open_nb\n",
|
|
"import json\n",
|
|
"import os\n",
|
|
"import calendar as cal\n",
|
|
"import concurrent.futures\n",
|
|
"from azure.kusto.notebooks import utils as akn"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"params = {\n",
|
|
" \"su\": su,\n",
|
|
" \"start\": start,\n",
|
|
" \"end\": end,\n",
|
|
" \"url\": url,\n",
|
|
" \"baseUrl\": baseUrl,\n",
|
|
" \"service\": service,\n",
|
|
" \"hub\": hub\n",
|
|
"}\n",
|
|
"root = 'devops-pipelines' if os.path.basename(os.getcwd()) != 'devops-pipelines' else ''\n",
|
|
"queryPath = os.path.join(root, 'queries')"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# authenticate kusto client\n",
|
|
"# you will need to copy the token into a browser window for AAD auth. \n",
|
|
"client = akn.get_client('https://vso.kusto.windows.net')"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# authenticate kusto client\n",
|
|
"# you will need to copy the token into a browser window for AAD auth. \n",
|
|
"icm_client = akn.get_client('https://icmcluster.kusto.windows.net')"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"q_loc = os.path.join(queryPath, \"LocationName.csl\")\n",
|
|
"q_whatChanged = os.path.join(queryPath, \"WhatChanged.csl\")\n",
|
|
"q_haActions = os.path.join(queryPath, \"HealthAgentActions.csl\")\n",
|
|
"q_mdm = os.path.join(queryPath, \"MDMAccount.csl\")\n",
|
|
"\n",
|
|
"delaysPath = os.path.join(queryPath, \"delays\")\n",
|
|
"q_affectedAccounts = os.path.join(delaysPath, \"AffectedAccounts.csl\")\n",
|
|
"q_abusers = os.path.join(delaysPath, \"Abusers.csl\")\n",
|
|
"q_affAccounts = os.path.join(delaysPath, \"AffectedAccounts.csl\")\n",
|
|
"q_delayedAccountsAreAbusers = os.path.join(delaysPath, \"DelayedAccountsAreAbusers.csl\")\n",
|
|
"q_whatDelayed = os.path.join(delaysPath, \"WhatDelayed.csl\")\n",
|
|
"q_load = os.path.join(delaysPath, \"Load.csl\")\n",
|
|
"\n",
|
|
"with concurrent.futures.ThreadPoolExecutor() as executor:\n",
|
|
" # materialize location name immediately as we need this for other queries\n",
|
|
" p1 = executor.submit(akn.execute_file, client, 'VSO', q_loc, params)\n",
|
|
" locationNameResult = akn.to_dataframe_from_future(p1)\n",
|
|
" locationName = locationNameResult[\"Tenant\"][0]\n",
|
|
" params[\"locationName\"] = locationName\n",
|
|
" p2 = executor.submit(akn.execute_file, client, 'VSO', q_whatChanged, params)\n",
|
|
" p4 = executor.submit(akn.execute_file, client, 'VSO', q_haActions, params) \n",
|
|
" \n",
|
|
" p5 = executor.submit(akn.execute_file, client, 'VSO', q_affectedAccounts, params)\n",
|
|
" p6 = executor.submit(akn.execute_file, client, 'VSO', q_abusers, params)\n",
|
|
" p7 = executor.submit(akn.execute_file, client, 'VSO', q_affAccounts, params)\n",
|
|
" p8 = executor.submit(akn.execute_file, client, 'VSO', q_delayedAccountsAreAbusers, params)\n",
|
|
" p9 = executor.submit(akn.execute_file, client, 'VSO', q_whatDelayed, params)\n",
|
|
" p10 = executor.submit(akn.execute_file, client, 'VSO', q_load, params)\n",
|
|
" \n",
|
|
" p11 = executor.submit(akn.execute_file, icm_client, 'IcmDataWarehouse', \n",
|
|
" os.path.join(queryPath, 'ActiveIncidents.csl'), params)\n",
|
|
" p12 = executor.submit(akn.execute_file, client, 'VSO', q_mdm, params)\n",
|
|
"\n",
|
|
"q_whatChanged_df = akn.to_dataframe_from_future(p2)\n",
|
|
"q_haActions_df = akn.to_dataframe_from_future(p4)\n",
|
|
"q_affectedAccountsResultDf = akn.to_dataframe_from_future(p5)\n",
|
|
"\n",
|
|
"abusersDf = akn.to_dataframe_from_future(p6)\n",
|
|
"finalabusersList = np.intersect1d(q_affectedAccountsResultDf[\"HostId\"].values, abusersDf[\"HostId\"].values);\n",
|
|
"\n",
|
|
"q_affAccounts_df = akn.to_dataframe_from_future(p7)\n",
|
|
"q_delayedAccountsAreAbusers_df = akn.to_dataframe_from_future(p8)\n",
|
|
"q_whatDelayedResultDf = akn.to_dataframe_from_future(p9)\n",
|
|
"q_loadResultDf = akn.to_dataframe_from_future(p10)\n",
|
|
"\n",
|
|
"q_activeIncidentsResultDf = akn.to_dataframe_from_future(p11)\n",
|
|
"\n",
|
|
"q_mdmDf = akn.to_dataframe_from_future(p12)\n",
|
|
"params[\"mdmAccount\"] = q_mdmDf[\"monitoringAccount\"][0]"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"q_spike = os.path.join(delaysPath, \"OrchestrationLogSpike.csl\")\n",
|
|
"q_parallelism = os.path.join(delaysPath, \"Parallelism.csl\")\n",
|
|
"with concurrent.futures.ThreadPoolExecutor() as executor:\n",
|
|
" sfs = [executor.submit(akn.execute_file, client, 'VSO', q_spike, \n",
|
|
" {\n",
|
|
" **params,\n",
|
|
" \"hostId\": r\n",
|
|
" }) for r in q_delayedAccountsAreAbusers_df[\"HostId\"].values]\n",
|
|
" sfsResults = [s.result() for s in concurrent.futures.as_completed(sfs)]\n",
|
|
" pfs = [executor.submit(akn.execute_file, client, 'VSO', q_parallelism, \n",
|
|
" {\n",
|
|
" **params,\n",
|
|
" \"hostId\": r\n",
|
|
" }) for r in q_delayedAccountsAreAbusers_df[\"HostId\"].values]\n",
|
|
" pfsResults = [s.result() for s in concurrent.futures.as_completed(pfs)]\n",
|
|
"\n",
|
|
"# convert to data frames\n",
|
|
"s_primary_results = [s.primary_results[0] for s in sfsResults]\n",
|
|
"spikeResultsDfs = None\n",
|
|
"\n",
|
|
"p_primary_results = [s.primary_results[0] for s in pfsResults]\n",
|
|
"parResultsDfs = None\n",
|
|
"\n",
|
|
"with concurrent.futures.ThreadPoolExecutor() as executor:\n",
|
|
" s_dataframe_futures = [executor.submit(akn.to_dataframe, r) for r in s_primary_results]\n",
|
|
" spikeResultsDfs = [dff.result() for dff in concurrent.futures.as_completed(s_dataframe_futures)]\n",
|
|
" p_dataframe_futures = [executor.submit(akn.to_dataframe, r) for r in p_primary_results]\n",
|
|
" parResultsDfs = [dff.result() for dff in concurrent.futures.as_completed(p_dataframe_futures)]\n",
|
|
"sfsResults = None\n",
|
|
"sfsResults = None"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"q_loadPerHost = os.path.join(delaysPath, \"LoadPerHost.csl\")\n",
|
|
"# utility functions\n",
|
|
"from itertools import groupby\n",
|
|
"content = ''\n",
|
|
"def r(*args):\n",
|
|
" '''construct a markdown report'''\n",
|
|
" global content\n",
|
|
" content += ''.join([str(a) for a in args]) + '\\n'\n",
|
|
"\n",
|
|
"startTime = akn.to_datetime(start)\n",
|
|
"t0 = startTime.replace(tzinfo=None)\n",
|
|
"\n",
|
|
"# report! \n",
|
|
"r('# OK SO WHAT HAPPENED')\n",
|
|
"r('|parameter|value|')\n",
|
|
"r('|---|---|')\n",
|
|
"r('|startTime|', startTime, '|')\n",
|
|
"r('|endTime|', akn.to_datetime(end), '|')\n",
|
|
"r('|scale unit|', su, '|')\n",
|
|
"r('|service|', service, '|')\n",
|
|
"\n",
|
|
"# jarvis params\n",
|
|
"jarvisParams = {\n",
|
|
" 'su': su, \n",
|
|
" 'start': akn.get_time(start, -10), \n",
|
|
" 'end': akn.get_time(end, 10), \n",
|
|
" 'service': service,\n",
|
|
" 'location': locationName,\n",
|
|
" 'account': params[\"mdmAccount\"]\n",
|
|
"}\n",
|
|
"\n",
|
|
"# abuse detection?\n",
|
|
"r('## What users are impacted?')\n",
|
|
"if len(finalabusersList) > 0:\n",
|
|
" r('INSIGHT: Found abusers -- this alert is likely a false alarm.')\n",
|
|
"r(akn.pandas_df_to_markdown_table(q_delayedAccountsAreAbusers_df)) \n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# what changed? analysis\n",
|
|
"r('## What changed?')\n",
|
|
"if q_whatChanged_df.empty:\n",
|
|
" r(\"...no relevant config changes recorded during this period.\")\n",
|
|
"else:\n",
|
|
" # compute relative times and relevant changes\n",
|
|
" history = q_whatChanged_df\n",
|
|
" history['RelativeSeconds'] = history.apply(lambda row: (row.TIMESTAMP.replace(tzinfo=None) - t0).total_seconds(), axis=1)\n",
|
|
" relevant = history[abs(history.RelativeSeconds) < 3600]\n",
|
|
" \n",
|
|
" # analysis\n",
|
|
" upgrade = False\n",
|
|
" mitigation = False\n",
|
|
" vip_swap = False\n",
|
|
" ffs = False\n",
|
|
" for t in relevant.title.values:\n",
|
|
" l = t.lower()\n",
|
|
" upgrade = upgrade or 'upgrade' in l\n",
|
|
" mitigation = mitigation or 'mitigation' in l\n",
|
|
" vip_swap = vip_swap or 'vip' in l\n",
|
|
" ffs = ffs or 'feature flag' in l\n",
|
|
" \n",
|
|
" if upgrade:\n",
|
|
" r('INSIGHT: there were database upgrades in progress')\n",
|
|
" if mitigation:\n",
|
|
" r('INSIGHT: there were mitigations in progress')\n",
|
|
" if vip_swap:\n",
|
|
" r('INSIGHT: there was a vip swap just before this period.')\n",
|
|
" if ffs:\n",
|
|
" r('INSIGHT: there were feature flag changes right before this period.')\n",
|
|
" \n",
|
|
" # full table\n",
|
|
" r(akn.pandas_df_to_markdown_table(relevant[['TIMESTAMP', 'RelativeSeconds', 'title']]))\n",
|
|
" \n",
|
|
" \n",
|
|
"# active incidents?\n",
|
|
"r('## Active incidents?')\n",
|
|
"otherIncidentsCount = 0;\n",
|
|
"\n",
|
|
"if q_activeIncidentsResultDf is not None and not q_activeIncidentsResultDf.empty:\n",
|
|
" for index, row in q_activeIncidentsResultDf.iterrows():\n",
|
|
" if(row.Title.find(\"Kalypso: Build Orchestrator Delays ICM\") == -1):\n",
|
|
" otherIncidentsCount += 1\n",
|
|
" \n",
|
|
" if otherIncidentsCount > 0:\n",
|
|
" r(\"INSIGHT: There were incidents recorded during this period. These might be related:\")\n",
|
|
" newDf = q_activeIncidentsResultDf.assign(URL=[*map(lambda x: \"\"\"[%s](https://icm.ad.msft.net/imp/v3/incidents/details/%s/home)\"\"\" % (x,x), q_activeIncidentsResultDf.IncidentId)]) \n",
|
|
" r(\"\\n\")\n",
|
|
" r(akn.pandas_df_to_markdown_table(newDf[['URL','Severity','Title']]))\n",
|
|
" else:\n",
|
|
" r(\"...no relevant incidents during this period.\") \n",
|
|
" \n",
|
|
" \n",
|
|
"r('## Queue Load')\n",
|
|
"ar = q_loadResultDf[q_loadResultDf[\"Name\"] == \"DTPlanQueued\"].values[:, 2]\n",
|
|
"queuedGreatherThan500 = np.where(ar > 500)\n",
|
|
"ar_max = np.amax(ar) if len(ar) else '?'\n",
|
|
"if len(queuedGreatherThan500[0]) > 0:\n",
|
|
" r('INSIGHT: There was a high rate of jobs queued during this period (max: ', ar_max, ' / minute)...')\n",
|
|
"else: \n",
|
|
" r('...everything looks good? (max: ', ar_max, ' / minute)')\n",
|
|
"\n",
|
|
" \n",
|
|
"r('## Parallelism')\n",
|
|
"for parResultsDf in parResultsDfs:\n",
|
|
" if len(parResultsDf.C.values) > 0: \n",
|
|
" usage = parResultsDf.C.values[0]\n",
|
|
" times = parResultsDf.sampleTime.values[0]\n",
|
|
" hostId = parResultsDf.HostId[0]\n",
|
|
" maxindex = np.argmax(usage)\n",
|
|
" maxvalue = usage[maxindex]\n",
|
|
" atTime = times[maxindex]\n",
|
|
" results = {value: len(list(freq)) for value, freq in groupby(sorted(usage))}\n",
|
|
" printed = False\n",
|
|
" r(\"\"\"\\nFor host: **%s**...\"\"\" % (hostId))\n",
|
|
" for key, value in results.items():\n",
|
|
" if key > 10:\n",
|
|
" r(\"\"\"\\nRunning plans (per 1min) %s : number of occurences during incident time %s\"\"\"%(key, value))\n",
|
|
" printed = True\n",
|
|
" if not printed:\n",
|
|
" r(\"\\nNothing found greater than 10\")\n",
|
|
" else:\n",
|
|
" r(\"\\n-\")\n",
|
|
" \n",
|
|
"\n",
|
|
"r('## Orchestration phase Load')\n",
|
|
"for spikeResultDf in spikeResultsDfs:\n",
|
|
" countResult = spikeResultDf.C.describe()\n",
|
|
" hostId = spikeResultDf[\"HostId\"].values[0]\n",
|
|
" upper = countResult[\"75%\"]\n",
|
|
" lower = countResult[\"25%\"]\n",
|
|
" # Wondering what's going on here? We detect anomolies, see https://www.purplemath.com/modules/boxwhisk3.htm\n",
|
|
" IQR = upper - lower\n",
|
|
" countResultOfInterest = spikeResultDf[spikeResultDf[\"C\"] > upper + 1.5 * IQR ].head(5)\n",
|
|
" unqCommands = list(dict.fromkeys(countResultOfInterest[\"Command\"].values).keys())\n",
|
|
" if len(unqCommands) > 0:\n",
|
|
" r(\"\"\"INSIGHT: Found anomalies for these phases in order highest to lowest for host: **%s**\"\"\" % hostId)\n",
|
|
"\n",
|
|
" # print commands table\n",
|
|
" r(akn.pandas_df_to_markdown_table(countResultOfInterest[[\"Command\", \"C\"]])) \n",
|
|
" \n",
|
|
" \n",
|
|
" if \"PlanCompleted\" in unqCommands:\n",
|
|
" if \"StartPlan\" in unqCommands or \"PlanStarted\" in unqCommands:\n",
|
|
" r(\"\\nTIP: An unusual number of plans were started during this period.\")\n",
|
|
" else:\n",
|
|
" r(\"\\nTIP: Jobs that are queued long ago might have completed now... creating this spike\") \n",
|
|
" \n",
|
|
" newParams = dict(params)\n",
|
|
" newParams[\"command\"] = next(iter(unqCommands)) \n",
|
|
" newParams[\"hostId\"] = hostId\n",
|
|
" r(akn.details_md('Kusto query for analyzing spike:', \n",
|
|
" tokenize(os.path.join(os.path.join(queryPath, \"delays\"), \"OrchestrationLogSpikeTip.csl\"), newParams)))\n",
|
|
" r(akn.details_md('Kusto for analyzing load:', tokenize(q_loadPerHost, newParams)))\n",
|
|
" \n",
|
|
" else:\n",
|
|
" r('...everything looks good?') \n",
|
|
" \n",
|
|
"# ja load\n",
|
|
"r()\n",
|
|
"r('## JA Load')\n",
|
|
"q_whatDelayedResultPendingJobsDf = q_whatDelayedResultDf[q_whatDelayedResultDf.Pivot == \"\\JobService(_Total)\\Total Pending Jobs\"]\n",
|
|
"pendingGreaterThan50Result = np.where(q_whatDelayedResultPendingJobsDf.avg_CounterValue.values > 50)\n",
|
|
"if len(pendingGreaterThan50Result[0]) > 0:\n",
|
|
" max_pending_jobs = np.max(q_whatDelayedResultPendingJobsDf.avg_CounterValue.values)\n",
|
|
" r(\"INSIGHT: There was a high number of pending jobs during this period (max was %s). Note that this is for jobs including all priorities (even low priority ones)\" % (max_pending_jobs)) \n",
|
|
" \n",
|
|
" open_nb(os.path.join(root, 'ja.ipynb'), params, redirect=False)\n",
|
|
" jaUrl = baseUrl + \"/devops-pipelines/ja.ipynb\"\n",
|
|
" r('\\n\\n[JobAgent investigation notebook](', requote_uri(jaUrl), ')')\n",
|
|
"\n",
|
|
" jaJarvisLink = \"\"\"https://jarvis-west.dc.ad.msft.net/dashboard/VSO-ServiceInsights/PlatformViews/Compute-JA\"\"\" \\\n",
|
|
" \"\"\"?overrides=[{\"query\":\"//*[id='Service']\",\"key\":\"value\",\"replacement\":\"%(service)s\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//*[id='RoleInstance']\",\"key\":\"value\",\"replacement\":\"\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//*[id='LocationName']\",\"key\":\"value\",\"replacement\":\"%(location)s\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//dataSources\",\"key\":\"namespace\",\"replacement\":\"%(su)s\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//dataSources\",\"key\":\"account\",\"replacement\":\"%(account)s\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//*[id='ApplicationEndpoint']\",\"key\":\"regex\",\"replacement\":\"*%(location)s*\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//*[id='ScaleUnit']\",\"key\":\"value\",\"replacement\":\"%(su)s\"}]\"\"\" \\\n",
|
|
" \"\"\"&globalStartTime=%(start)s&globalEndTime=%(end)s&pinGlobalTimeRange=true\"\"\" % jarvisParams\n",
|
|
" r('\\n\\n[JobAgent health dashboard](', requote_uri(jaJarvisLink), ')')\n",
|
|
"else:\n",
|
|
" r('...everything looks good?')\n",
|
|
" \n",
|
|
" \n",
|
|
" \n",
|
|
" \n",
|
|
"# more analysis? \n",
|
|
"r('## What should we look at next?')\n",
|
|
"url = baseUrl + \"/devops-pipelines/sla.ipynb\"\n",
|
|
"SLAParams = {\n",
|
|
" \"triggerTime\": params[\"start\"],\n",
|
|
" \"scaleUnit\": params[\"su\"],\n",
|
|
" \"service\": params[\"service\"],\n",
|
|
" \"lookback\": \"1h\",\n",
|
|
" \"region\": \"\"\n",
|
|
"}\n",
|
|
"open_nb(os.path.join(root, 'sla.ipynb'), SLAParams, redirect=False)\n",
|
|
"r('\\n\\n[SLA investigation notebook](', requote_uri(url), ')') \n",
|
|
"\n",
|
|
"url = baseUrl + \"/devops-pipelines/impact.ipynb\"\n",
|
|
"open_nb(os.path.join(root, 'impact.ipynb'), params, redirect=False)\n",
|
|
"r('\\n\\n[Customer impact investigation notebook](', requote_uri(url), ')') \n",
|
|
"\n",
|
|
"# Scale unit health\n",
|
|
"jarvisLink = \"\"\"https://jarvis-west.dc.ad.msft.net/dashboard/VSO-ServiceInsights/DevOpsReports/DevOpsReports\"\"\" \\\n",
|
|
" \"\"\"?overrides=[{\"query\":\"//*[id='Service']\",\"key\":\"value\",\"replacement\":\"%(service)s\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//*[id='RoleInstance']\",\"key\":\"value\",\"replacement\":\"\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//*[id='LocationName']\",\"key\":\"value\",\"replacement\":\"%(location)s\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//dataSources\",\"key\":\"namespace\",\"replacement\":\"%(su)s\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//dataSources\",\"key\":\"account\",\"replacement\":\"%(account)s\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//*[id='ApplicationEndpoint']\",\"key\":\"regex\",\"replacement\":\"*%(location)s*\"},\"\"\" \\\n",
|
|
" \"\"\"{\"query\":\"//*[id='ScaleUnit']\",\"key\":\"value\",\"replacement\":\"%(su)s\"}]\"\"\" \\\n",
|
|
" \"\"\"&globalStartTime=%(start)s&globalEndTime=%(end)s&pinGlobalTimeRange=true\"\"\" % jarvisParams;\n",
|
|
"r('\\n\\n[Scale unit health dashboard (' + su + ', ' + service + ')](', requote_uri(jarvisLink), ')')\n",
|
|
"\n",
|
|
"\n",
|
|
"Markdown(content)\n",
|
|
"# print(content)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# visualize delays\n",
|
|
"import plotly\n",
|
|
"from plotly import graph_objs as go\n",
|
|
"delays = go.Scatter(\n",
|
|
" x=q_affAccounts_df[\"PreciseTimeStamp\"],\n",
|
|
" y=q_affAccounts_df[\"MessageDelayInSeconds\"],\n",
|
|
" mode = 'lines',\n",
|
|
" name = 'Delays in seconds',\n",
|
|
" text= q_affAccounts_df['Name']\n",
|
|
")\n",
|
|
"\n",
|
|
"changed = go.Scatter(\n",
|
|
" x=q_whatChanged_df[\"TIMESTAMP\"],\n",
|
|
" y=np.repeat(50, len(q_whatChanged_df[\"TIMESTAMP\"].values)),\n",
|
|
" mode = 'lines+markers',\n",
|
|
" name = 'What Changed',\n",
|
|
" text = q_whatChanged_df[\"Name\"],\n",
|
|
" marker=dict(\n",
|
|
" size=32,\n",
|
|
" color = np.random.randn(500),\n",
|
|
" colorscale='Viridis'\n",
|
|
" )\n",
|
|
")\n",
|
|
"\n",
|
|
"mitigations = go.Scatter(\n",
|
|
" x=q_haActions_df[\"PreciseTimeStamp\"],\n",
|
|
" y=np.repeat(50, len(q_haActions_df[\"PreciseTimeStamp\"].values)),\n",
|
|
" mode = 'markers',\n",
|
|
" name = 'Mitigations',\n",
|
|
" text = q_haActions_df[[\"MitigationName\", \"RoleInstance\"]].apply(lambda x: ''.join(x), axis=1),\n",
|
|
" marker = dict(\n",
|
|
" size = 10,\n",
|
|
" color = 'rgba(152, 0, 0, .8)',\n",
|
|
" line = dict(\n",
|
|
" width = 2,\n",
|
|
" color = 'rgb(0, 0, 0)'\n",
|
|
" )\n",
|
|
" )\n",
|
|
")\n",
|
|
"\n",
|
|
"data = [delays, changed, mitigations]\n",
|
|
"plotly.offline.iplot(data)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [],
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"inputHidden": false,
|
|
"outputHidden": false
|
|
}
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernel_info": {
|
|
"name": "python3"
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"language": "python",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"version": "3.7.4",
|
|
"mimetype": "text/x-python",
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"pygments_lexer": "ipython3",
|
|
"nbconvert_exporter": "python",
|
|
"file_extension": ".py"
|
|
},
|
|
"nteract": {
|
|
"version": "0.14.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
}
|