devops-pipelines/sla.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SLA Investigation\n",
    "1. Run all cells! (click on Menu > Cell > Run All Cells)\n",
    "1. View report at the bottom."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false,
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "triggerTime = \"2019-07-20T16:00:00.0000000Z\"\n",
    "scaleUnit = \"tfs-cus-1\"\n",
    "service = \"tfs\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false
   },
   "outputs": [],
   "source": [
    "%%capture \n",
    "\n",
    "# install packages, setup workspace root\n",
    "!pip install --upgrade pip azure-kusto-notebooks\n",
    "import os\n",
    "import sys\n",
    "import datetime\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "pd.options.display.html.table_schema = True\n",
    "import concurrent.futures\n",
    "from azure.kusto.notebooks import utils as akn\n",
    "\n",
    "# cwd should be workspace root\n",
    "if os.path.basename(os.getcwd()) == 'devops-pipelines':\n",
    "    os.chdir(os.pardir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false
   },
   "outputs": [],
   "source": [
    "# authenticate kusto client\n",
    "# you will need to copy the token into a browser window for AAD auth. \n",
    "client = akn.get_client('https://vso.kusto.windows.net', 'VSO')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false
   },
   "outputs": [],
   "source": [
    "# find orchestrations that violate SLA\n",
    "params = {\n",
    "    'TriggerTime': akn.to_kusto_datetime(triggerTime),\n",
    "    'Service': '\"' + service + '\"', \n",
    "    'ScaleUnit': '\"' + scaleUnit + '\"'\n",
    "}\n",
    "query = os.path.join('devops-pipelines', 'queries', 'sla', 'SLADurationAnalysis.csl')\n",
    "violations = akn.execute_file(client, database='VSO', path=query, params=params)\n",
    "# violations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false
   },
   "outputs": [],
   "source": [
    "# collect problematic orchestration ids\n",
    "result = violations.primary_results[0]\n",
    "oid_column_index = next((c.ordinal for c in result.columns if c.column_name == 'OrchestrationId'), None)\n",
    "su_column_index = next((c.ordinal for c in result.columns if c.column_name == 'ScaleUnit'), None)\n",
    "\n",
    "# group\n",
    "by_su = {}\n",
    "for r in result.rows:\n",
    "    su = r[su_column_index]\n",
    "    oid = r[oid_column_index]\n",
    "    l = by_su.get(su, [])\n",
    "    by_su[su] = l\n",
    "    l.append(oid)\n",
    "\n",
    "max_scale_units = []\n",
    "max_problems = 0\n",
    "for k,v in by_su.items():\n",
    "  c = len(v)\n",
    "  if c > max_problems:\n",
    "    max_problems = c\n",
    "    max_scale_units = [k]\n",
    "  elif c == max_problems:\n",
    "    max_scale_units.append(k)\n",
    "max_scale_units.sort()\n",
    "\n",
    "# for su, oids in by_su.items():\n",
    "#     print(su)\n",
    "#     for oid in oids:\n",
    "#         print('   ', oid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false
   },
   "outputs": [],
   "source": [
    "# collect visualization data sets\n",
    "query = os.path.join('devops-pipelines', 'queries', 'sla', 'SLAVisualization.csl')\n",
    "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
    "    hfs = [executor.submit(akn.execute_file, client, 'VSO', query, \n",
    "            {\n",
    "                'ScaleUnit': '\"' + r[su_column_index] + '\"', \n",
    "                'OrchestrationId': '\"' + r[oid_column_index] + '\"'\n",
    "            }) for r in result.rows]\n",
    "    histories = [h.result() for h in concurrent.futures.as_completed(hfs)]\n",
    "\n",
    "# convert to data frames\n",
    "primary_results = [h.primary_results[0] for h in histories]\n",
    "dataframes = None\n",
    "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
    "    dataframe_futures = [executor.submit(akn.to_dataframe, r) for r in primary_results]\n",
    "    dataframes = [dff.result() for dff in concurrent.futures.as_completed(dataframe_futures)]\n",
    "histories = None\n",
    "\n",
    "# try to filter out false positives? at least a certain number of phases must have been recorded.\n",
    "required_phases = ('RunAgentJob.SendJob', 'RunAgentJob.JobCompleted')\n",
    "filtered_dataframes = [df for df in dataframes if all([p in df['PhaseName'].values for p in required_phases])]\n",
    "number_of_false_positives = len(dataframes) - len(filtered_dataframes)\n",
    "dataframes = filtered_dataframes\n",
    "number_of_violations = len(dataframes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false
   },
   "outputs": [],
   "source": [
    "# what was the worst phase?\n",
    "if dataframes:\n",
    "    combined = pd.concat(dataframes, ignore_index=True)\n",
    "    worst_df = combined.loc[combined['Level'] == 2].groupby(['PhaseName']).size().to_frame('Count').nlargest(1, 'Count')\n",
    "    worst_phaseName = worst_df.index[0]\n",
    "    worst_count = worst_df.iat[0, 0]\n",
    "    worst_team = worst_phaseName.split('.')[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INSIGHT: we detected 10 false positives.\n",
      "INSIGHT! There are 2 plans out of SLA.\n",
      "INSIGHT: the most problems (8) are in su3\n",
      "INSIGHT: There might be a problem with RunAgentJob.SendJob. It was the slowest in 2 of the 2 SLA violations.\n",
      "ACTION: open icm against scale units: ['su3'] , assign it to: RunAgentJob\n"
     ]
    }
   ],
   "source": [
    "print('INSIGHT: we detected', number_of_false_positives, 'likely false positives.')\n",
    "if number_of_violations <= 0:\n",
    "    print('INSIGHT: no problems detected')\n",
    "else:\n",
    "    print('INSIGHT! There are', number_of_violations, 'plans out of SLA.')\n",
    "    print('INSIGHT: the most problems (' + str(max_problems) + ')', 'are in', ', '.join(max_scale_units))\n",
    "    print('INSIGHT: There might be a problem with', worst_phaseName + '.', \n",
    "          'It was the slowest in', worst_count, 'of the', number_of_violations, 'SLA violations.')\n",
    "    print('ACTION: open icm against scale units:', max_scale_units, ', assign it to:', worst_team)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false
   },
   "outputs": [],
   "source": [
    "# view all histories\n",
    "%matplotlib inline\n",
    "\n",
    "plt.rcdefaults()\n",
    "fig, axes = plt.subplots(nrows=number_of_violations, \n",
    "                         ncols=1, \n",
    "                         figsize=(8, 6 * number_of_violations),\n",
    "                         constrained_layout=True)\n",
    "\n",
    "vdf = akn.to_dataframe(violations.primary_results[0])\n",
    "for i in range(number_of_violations):\n",
    "    df = dataframes[i]\n",
    "    ax = axes[i] if number_of_violations > 1 else axes\n",
    "    ax.axhline(0, color='k')\n",
    "    \n",
    "    x = df['PhaseName']\n",
    "    xpos = np.arange(len(x))\n",
    "    y = df['PercentDifference']\n",
    "    plan_id = df['PlanId'].iloc[0]\n",
    "    \n",
    "    violation_row = vdf.loc[vdf['PlanId'] == plan_id]\n",
    "    title = '\\n'.join([\n",
    "        'plan id:' + plan_id,\n",
    "        'scale unit:'     + str(violation_row['ScaleUnit'].iloc[0]),\n",
    "        'definition:'     + str(df['DefinitionName'].iloc[0]),\n",
    "        'plan duration: ' + str(violation_row['PlanDuration'].iloc[0]),\n",
    "        'sla duration: '  + str(violation_row['TotalSLADuration'].iloc[0]),\n",
    "    ])\n",
    "    ax.title.set_text(title)\n",
    "    \n",
    "    ax.bar(x=xpos, height=y)\n",
    "    ax.set_xticks(xpos)\n",
    "    ax.set_xticklabels(x, rotation=45, ha=\"right\")\n",
    "\n",
    "# output_filename = 'analysis.svg'\n",
    "# plt.savefig(output_filename, format='svg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "inputHidden": false,
    "outputHidden": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "nteract": {
   "version": "nteract-on-jupyter@2.1.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}