# Impact Investigation
1. Run all cells.
1. View report at the bottom.

In [None]:
su = "tfs-cus-1"
start = "2019-07-20T16:00:00.0000000Z"
end = "2019-07-20T16:33:36.0000000Z"
url = "https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK/delays.ipynb"
baseUrl = "https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK"
service = "tfs"
hub = "Build"
locationName = "tfsprodcus1"

In [None]:
%%capture
!pip install nimport azure-kusto-notebooks

In [None]:
# Import the things we use

# Note you can also use kql https://docs.microsoft.com/en-us/azure/data-explorer/kqlmagic
# %kql is single line magic
# %%kql is cell magic

# https://nbviewer.jupyter.org/github/ipython/ipython/blob/4.0.x/examples/IPython%20Kernel/Rich%20Output.ipynb#HTML
# https://ipython.readthedocs.io/en/stable/inte/magics.html
from IPython.display import display, HTML, Markdown, Javascript, clear_output

# http://pandas-docs.github.io/pandas-docs-travis/user_guide/reshaping.html
import pandas as pd
pd.options.display.html.table_schema = True
from pandas import Series, DataFrame
from datetime import datetime, timedelta, timezone
from urllib.parse import urlencode, quote_plus
from requests.utils import requote_uri
import time
import numpy as np
from matplotlib import pyplot as plt
from nimport.utils import tokenize, open_nb
import json
import os
import calendar as cal
import concurrent.futures
from azure.kusto.notebooks import utils as akn

In [None]:
params = {
    "su": su,
    "start": start,
    "end": end,
    "url": url,
    "baseUrl": baseUrl,
    "service": service
}
root = 'devops-pipelines' if os.path.basename(os.getcwd()) != 'devops-pipelines' else ''
queryPath = os.path.join(root, 'queries')

In [None]:
# authenticate kusto client
# you will need to copy the token into a browser window for AAD auth. 
client = akn.get_client('https://vso.kusto.windows.net', 'VSO')

In [None]:
# authenticate kusto client
# you will need to copy the token into a browser window for AAD auth. 
icm_client = akn.get_client('https://icmcluster.kusto.windows.net', 'IcMDataWarehouse')

In [None]:
q_loc = os.path.join(queryPath, "LocationName.csl")
q_whatChanged = os.path.join(queryPath, "WhatChanged.csl")
q_vipSwap = os.path.join(queryPath, "VIPSwap.csl")

impactPath = os.path.join(queryPath, "impact")
q_commands = os.path.join(impactPath, "CommandsReason.csl")
q_commandsAT = os.path.join(impactPath, "CommandsAT.csl")
q_commandsDb = os.path.join(impactPath, "CommandsDb.csl")
with concurrent.futures.ThreadPoolExecutor() as executor:
    # materialize location name immediately as we need this for other queries
    p1 = executor.submit(akn.execute_file, client, 'VSO', q_loc, params)
    locationNameResult = akn.to_dataframe_from_future(p1)
    locationName = locationNameResult["Tenant"][0]
    params["locationName"] = locationName
    p2 = executor.submit(akn.execute_file, client, 'VSO', q_whatChanged, params)
    p3 = executor.submit(akn.execute_file, client, 'VSO', q_vipSwap, params)
    
    p4 = executor.submit(akn.execute_file, client, 'VSO', q_commandsAT, params)
    p5 = executor.submit(akn.execute_file, client, 'VSO', q_commandsDb, params)        
    p6 = executor.submit(akn.execute_file, client, 'VSO', q_commands, params)
    
    p7 = executor.submit(akn.execute_file, icm_client, 'IcmDataWarehouse', 
                          os.path.join(queryPath, 'ActiveIncidents.csl'), params)

q_whatChanged_df = akn.to_dataframe_from_future(p2)

vipSwapResultDf = akn.to_dataframe_from_future(p3)

q_commandsAT_df = akn.to_dataframe_from_future(p4)

q_commandsDb_df = akn.to_dataframe_from_future(p5)

q_commands_df = akn.to_dataframe_from_future(p6)

q_activeIncidentsResultDf = akn.to_dataframe_from_future(p7)

In [None]:
print('=' * 50)
print('Report!')
print('=' * 50, '\n\n')

# jarvis params
jarvisParams = {
  'su': su, 
  'start': akn.get_time(start, -10), 
  'end': akn.get_time(end, 10), 
  'service': service 
}

# jarvis
jarvisLink = """https://jarvis-west.dc.ad.msft.net/dashboard/VSO-ServiceInsights/DevOpsReports/TFS DevOpsReports""" \
    """?overrides=[{"query":"//*[id='Service']","key":"value","replacement":"%(service)s"},""" \
    """{"query":"//*[id='RoleInstance']","key":"value","replacement":""},""" \
    """{"query":"//*[id='ScaleUnit']","key":"value","replacement":"%(su)s"}]""" \
    """&globalStartTime=%(start)s&globalEndTime=%(end)s&pinGlobalTimeRange=true""" % jarvisParams;
print('Jarvis dashboard link:\n', requote_uri(jarvisLink), '\n')

#
# vip swap
print()
print('Vip Swap? =============================')
if len(vipSwapResultDf.index) > 0:
    viptime = vipSwapResultDf["TIMESTAMP"][0]
    starttime = akn.to_datetime(start)
    delta = starttime.replace(tzinfo=None) - viptime.replace(tzinfo=None)
    if delta.total_seconds() > 0:
        print("""VIP SWAP happened: %s days %s hours %s minutes ago (%s) (issue start: %s)""" % (delta.days, delta.seconds//3600, (delta.seconds//60) % 60, viptime, start))
    else:
        print('...no swaps recorded in the given time range')
else:
    print('...no swaps recorded in the given time range')
    
# slow failed reason analysis
print()
print('Is it slow commands or failed commands? =============================')
freq = q_commands_df["Frequency"]
coefficientOfVariance = freq.std()/freq.mean()
failedCount = q_commands_df[q_commands_df["Reason"] == "failed"]["Frequency"].values[0]
slowCount = q_commands_df[q_commands_df["Reason"] == "slow"]["Frequency"].values[0]
reason = "failed or slow"
if coefficientOfVariance > 0.5:
    if failedCount > slowCount:
        reason = "failed"
    else:
        reason = "slow"
else:
    print("Slow and failed commands are too close, both might be contributing...")
if reason:
    print("Probably due to %s commands; Failed - %s, Slow - %s" % (reason, failedCount, slowCount))

# slow failed reason for AT?
print()
print('Is it %s because of AT? =============================' % (reason))
failed = q_commandsAT_df[q_commandsAT_df["Reason"] == "failed"]
slow = q_commandsAT_df[q_commandsAT_df["Reason"] == "slow"]
data = q_commandsAT_df
if reason == "failed":
    data = failed
elif reason == "slow":
    data = slow

coefficientOfVariance = data["Frequency"].std()/data["Frequency"].mean()
    
if coefficientOfVariance > 0.5:
    print("Found variance in AT's for %s commands" % (reason))
    print(data.head(30))
else:
    print("Seems be same across AT's for %s commands" % (reason))
    
# slow failed reason for Db?
print()
print('Is it %s because of Db? =============================' % (reason))
failed = q_commandsDb_df[q_commandsDb_df["Reason"] == "failed"]
slow = q_commandsDb_df[q_commandsDb_df["Reason"] == "slow"]
data = q_commandsDb_df
if reason == "failed":
    data = failed
elif reason == "slow":
    data = slow

coefficientOfVariance = data["Frequency"].std()/data["Frequency"].mean()
    
if coefficientOfVariance > 0.5:
    print("Found variance in Db's for %s commands" % (reason))
    print("Suffix '%s' to database server name" % (".database.windows.net"))
    print("Prefix '%s' to database name" % (params["service"] + "_" + params["locationName"] + "_"))
    print(data.head(30))
else:
    print("Seems be same across Db's for %s commands" % (reason))    
    
# what changed? analysis
print()
print('What changed? =============================')
if(len(q_whatChanged_df.index) == 0):
    print("No relevant changes found...")
else:
    up_prefix = "";
    mit_prefix = "";
    text = "";
    for index, row in q_whatChanged_df.iterrows():
        if(row.title.lower().find('upgrade') != -1):
            if not up_prefix:
                up_prefix += "Looks like, there's upgrade in progress...\n";
            text += """%s %s %s \n""" % (row.TIMESTAMP, row.title, row.buildNumber);
        if(row.title.lower().find('mitigation') != -1):
            if not mit_prefix:
                mit_prefix += "Looks like, there are some mitigations by health agent...\n";
                state += states["healthagent"];
            text += """%s %s %s""" % (row.TIMESTAMP, row.title, row.buildNumber);
            
    if text:
        print(up_prefix + mit_prefix + text)
    else:
        print(q_whatChanged_df)
        
# active incidents?
print()
print('Active incidents? =============================')
otherIncidentsCount = 0;
for index, row in q_activeIncidentsResultDf.iterrows():
    if(row.Title.find("TFS Customer Impact Monitor") == -1):
        otherIncidentsCount+=1;
        
if(otherIncidentsCount > 0):
    print("We found some incidents during the time period, check if they are related...")
    # styling
    def make_clickable(url, text):
        return '{0}'.format(url)

    newDf = q_activeIncidentsResultDf.assign(URL=[*map(lambda x: make_clickable("""https://icm.ad.msft.net/imp/v3/incidents/details/%s/home""" % (x), "ICMLink"), q_activeIncidentsResultDf.IncidentId)])
    print("ICM link to copy - " + "https://icm.ad.msft.net/imp/v3/incidents/details/INCIDENTID/home")
    print(newDf[['IncidentId','Severity','Title']])
else:
    print("No active incidents that could be related are found...")        