devops-pipelines/sla.ipynb

310 строки
9.8 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SLA Investigation\n",
"1. Run all cells! (click on Menu > Cell > Run All Cells)\n",
"1. View report at the bottom."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false,
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"triggerTime = \"2019-07-20T16:00:00.0000000Z\"\n",
"scaleUnit = \"tfs-cus-1\"\n",
"service = \"tfs\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"%%capture \n",
"\n",
"# install packages, setup workspace root\n",
"!pip install --upgrade pip azure-kusto-notebooks\n",
"import os\n",
"import sys\n",
"import datetime\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"pd.options.display.html.table_schema = True\n",
"import concurrent.futures\n",
"from azure.kusto.notebooks import utils as akn\n",
"\n",
"# cwd should be workspace root\n",
"if os.path.basename(os.getcwd()) == 'devops-pipelines':\n",
" os.chdir(os.pardir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"# authenticate kusto client\n",
"# you will need to copy the token into a browser window for AAD auth. \n",
"client = akn.get_client('https://vso.kusto.windows.net', 'VSO')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"# find orchestrations that violate SLA\n",
"params = {\n",
" 'TriggerTime': akn.to_kusto_datetime(triggerTime),\n",
" 'Service': '\"' + service + '\"', \n",
" 'ScaleUnit': '\"' + scaleUnit + '\"'\n",
"}\n",
"query = os.path.join('devops-pipelines', 'queries', 'sla', 'SLADurationAnalysis.csl')\n",
"violations = akn.execute_file(client, database='VSO', path=query, params=params)\n",
"# violations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"# collect problematic orchestration ids\n",
"result = violations.primary_results[0]\n",
"oid_column_index = next((c.ordinal for c in result.columns if c.column_name == 'OrchestrationId'), None)\n",
"su_column_index = next((c.ordinal for c in result.columns if c.column_name == 'ScaleUnit'), None)\n",
"\n",
"# group\n",
"by_su = {}\n",
"for r in result.rows:\n",
" su = r[su_column_index]\n",
" oid = r[oid_column_index]\n",
" l = by_su.get(su, [])\n",
" by_su[su] = l\n",
" l.append(oid)\n",
"\n",
"max_scale_units = []\n",
"max_problems = 0\n",
"for k,v in by_su.items():\n",
" c = len(v)\n",
" if c > max_problems:\n",
" max_problems = c\n",
" max_scale_units = [k]\n",
" elif c == max_problems:\n",
" max_scale_units.append(k)\n",
"max_scale_units.sort()\n",
"\n",
"# for su, oids in by_su.items():\n",
"# print(su)\n",
"# for oid in oids:\n",
"# print(' ', oid)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"# collect visualization data sets\n",
"query = os.path.join('devops-pipelines', 'queries', 'sla', 'SLAVisualization.csl')\n",
"with concurrent.futures.ThreadPoolExecutor() as executor:\n",
" hfs = [executor.submit(akn.execute_file, client, 'VSO', query, \n",
" {\n",
" 'ScaleUnit': '\"' + r[su_column_index] + '\"', \n",
" 'OrchestrationId': '\"' + r[oid_column_index] + '\"'\n",
" }) for r in result.rows]\n",
" histories = [h.result() for h in concurrent.futures.as_completed(hfs)]\n",
"\n",
"# convert to data frames\n",
"primary_results = [h.primary_results[0] for h in histories]\n",
"dataframes = None\n",
"with concurrent.futures.ThreadPoolExecutor() as executor:\n",
" dataframe_futures = [executor.submit(akn.to_dataframe, r) for r in primary_results]\n",
" dataframes = [dff.result() for dff in concurrent.futures.as_completed(dataframe_futures)]\n",
"histories = None\n",
"\n",
"# try to filter out false positives? at least a certain number of phases must have been recorded.\n",
"required_phases = ('RunAgentJob.SendJob', 'RunAgentJob.JobCompleted')\n",
"filtered_dataframes = [df for df in dataframes if all([p in df['PhaseName'].values for p in required_phases])]\n",
"number_of_false_positives = len(dataframes) - len(filtered_dataframes)\n",
"dataframes = filtered_dataframes\n",
"number_of_violations = len(dataframes)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"# what was the worst phase?\n",
"if dataframes:\n",
" combined = pd.concat(dataframes, ignore_index=True)\n",
" worst_df = combined.loc[combined['Level'] == 2].groupby(['PhaseName']).size().to_frame('Count').nlargest(1, 'Count')\n",
" worst_phaseName = worst_df.index[0]\n",
" worst_count = worst_df.iat[0, 0]\n",
" worst_team = worst_phaseName.split('.')[0]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INSIGHT: we detected 10 false positives.\n",
"INSIGHT! There are 2 plans out of SLA.\n",
"INSIGHT: the most problems (8) are in su3\n",
"INSIGHT: There might be a problem with RunAgentJob.SendJob. It was the slowest in 2 of the 2 SLA violations.\n",
"ACTION: open icm against scale units: ['su3'] , assign it to: RunAgentJob\n"
]
}
],
"source": [
"print('INSIGHT: we detected', number_of_false_positives, 'likely false positives.')\n",
"if number_of_violations <= 0:\n",
" print('INSIGHT: no problems detected')\n",
"else:\n",
" print('INSIGHT! There are', number_of_violations, 'plans out of SLA.')\n",
" print('INSIGHT: the most problems (' + str(max_problems) + ')', 'are in', ', '.join(max_scale_units))\n",
" print('INSIGHT: There might be a problem with', worst_phaseName + '.', \n",
" 'It was the slowest in', worst_count, 'of the', number_of_violations, 'SLA violations.')\n",
" print('ACTION: open icm against scale units:', max_scale_units, ', assign it to:', worst_team)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"# view all histories\n",
"%matplotlib inline\n",
"\n",
"plt.rcdefaults()\n",
"fig, axes = plt.subplots(nrows=number_of_violations, \n",
" ncols=1, \n",
" figsize=(8, 6 * number_of_violations),\n",
" constrained_layout=True)\n",
"\n",
"vdf = akn.to_dataframe(violations.primary_results[0])\n",
"for i in range(number_of_violations):\n",
" df = dataframes[i]\n",
" ax = axes[i] if number_of_violations > 1 else axes\n",
" ax.axhline(0, color='k')\n",
" \n",
" x = df['PhaseName']\n",
" xpos = np.arange(len(x))\n",
" y = df['PercentDifference']\n",
" plan_id = df['PlanId'].iloc[0]\n",
" \n",
" violation_row = vdf.loc[vdf['PlanId'] == plan_id]\n",
" title = '\\n'.join([\n",
" 'plan id:' + plan_id,\n",
" 'scale unit:' + str(violation_row['ScaleUnit'].iloc[0]),\n",
" 'definition:' + str(df['DefinitionName'].iloc[0]),\n",
" 'plan duration: ' + str(violation_row['PlanDuration'].iloc[0]),\n",
" 'sla duration: ' + str(violation_row['TotalSLADuration'].iloc[0]),\n",
" ])\n",
" ax.title.set_text(title)\n",
" \n",
" ax.bar(x=xpos, height=y)\n",
" ax.set_xticks(xpos)\n",
" ax.set_xticklabels(x, rotation=45, ha=\"right\")\n",
"\n",
"# output_filename = 'analysis.svg'\n",
"# plt.savefig(output_filename, format='svg')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"nteract": {
"version": "nteract-on-jupyter@2.1.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}