hpc-analytics/HpcVisualization.py

import adal
import pyodbc
import requests
import json
from datetime import datetime, timedelta
import time
import random

# Configuration related
USER_ACCOUNT = r'******@microsoft.com'
USER_PASSWORD = r'******'

# PowerBI related configuration
REDIRECT_URI = r'https://login.live.com/oauth20_desktop.srf'
RESOURCE_URI = r'https://analysis.windows.net/powerbi/api'
AUTHORITY_URI = r'https://login.microsoftonline.com/common'
CLIENT_ID = r'******'

REPORT_NAME = r'HpcVisualizationReport'
DATASET_NAME = r'HpcVisualizationDataset'

REPORT_FILE_NAME = r'HpcVisualization.pbix'
DATASET_FILE_NAME = r'HpcDataset.json'

# SQL Sever related configuration
HOST_NAME = r'******'

# Local state related configuration
RECORD_FILE = r'HpcWorkRecord.json'

# Global variables
access_token = None
refresh_token = None
dataset_id = None
work_record = None
epoch = datetime(1970, 1, 1, 8)

# Authentication related functions
def acquire_token():    
    global access_token
    global refresh_token
    auth_context = adal.AuthenticationContext(AUTHORITY_URI)
    token = auth_context.acquire_token_with_username_password(resource=RESOURCE_URI, client_id=CLIENT_ID, username=USER_ACCOUNT, password=USER_PASSWORD)
    access_token = token['accessToken']
    refresh_token = token['refreshToken']
    print("acuqire_token ok")

def refresh_access_token():
    global access_token
    global refresh_token
    auth_context = adal.AuthenticationContext(AUTHORITY_URI)
    token = auth_context.acquire_token_with_refresh_token(refresh_token=refresh_token, client_id=CLIENT_ID, resource=RESOURCE_URI)
    access_token = token['accessToken']
    refresh_token = token['refreshToken']
    print("refresh_token ok")

# PowerBI service related functions
def push_report(report_file_name, report_name):
    url = f'https://api.powerbi.com/v1.0/myorg/imports?datasetDisplayName={report_name}'
    headers = {'Authorization': f'Bearer {access_token}', 'Content-Type': 'multipart/form-data'}
    report_file = open(report_file_name, 'rb')
    files = {'file': report_file}
    resp = requests.post(url, headers=headers, files = files)
    report_file.close()
    print(f"push_report {resp}")

    url = 'https://api.powerbi.com/v1.0/myorg/reports'
    headers = {'Authorization': f'Bearer {access_token}'}
    resp = requests.get(url, headers=headers)
    reports = resp.json()['value']
    for report in reports:
        if report['name'] == report_name:
            return report['id']
    return '0'

def push_dataset(dataset_file_name):
    url = 'https://api.powerbi.com/v1.0/myorg/datasets'
    headers = {'Authorization': f'Bearer {access_token}', 'Content-Type': 'application/json'}
    dataset_file = open(dataset_file_name, 'r')
    dataset = json.load(dataset_file)
    resp = requests.post(url, headers=headers, json=dataset)
    dataset_file.close()
    print(f"push_dataset {resp}")
    return resp.json()['id']

def rebind_report(report_id, dataset_id):
    url = f'https://api.powerbi.com/v1.0/myorg/reports/{report_id}/Rebind'
    headers = {'Authorization': f'Bearer {access_token}', 'Content-Type': 'application/json'}
    body = {'datasetId': dataset_id}
    resp = requests.post(url, headers=headers, json=body)
    print(f"rebind_report {resp}")

def set_dataset_id():
    global dataset_id
    url = 'https://api.powerbi.com/v1.0/myorg/datasets'
    headers = {'Authorization': f'Bearer {access_token}'}
    resp = requests.get(url, headers=headers)
    datasets = resp.json()['value']
    for dataset in datasets:
        if dataset['name'] == DATASET_NAME:
            dataset_id = dataset['id']
            print("set_dataset_id ok")
            return
    
    # target dataset does not exist, push it
    report_id = push_report(REPORT_FILE_NAME, REPORT_NAME)
    dataset_id = push_dataset(DATASET_FILE_NAME)
    rebind_report(report_id, dataset_id)
    print("set_dataset_id ok")

def post_row(table_name, row):
    url = f'https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/tables/{table_name}/rows'
    headers = {'Authorization': f'Bearer {access_token}'}
    body = {"rows": [row]}
    print(row)
    resp = requests.post(url, headers=headers, json=body)
    print(f"post_row to {table_name} {resp}\n")

# SQL server connection
def connect_sql():
    connString = 'Driver={SQL Server};' + f'Server={HOST_NAME}; Database=HPCScheduler; Trusted_connection=yes;'
    connection = pyodbc.connect(connString)
    return connection.cursor()    

# Work record in case of crash
def load_work_record():
    global work_record
    record_file = open(RECORD_FILE, 'r')
    work_record = json.load(record_file)
    record_file.close()

def save_work_record():
    global work_record
    record_file = open(RECORD_FILE, 'w')
    json.dump(work_record, record_file)
    record_file.close()  

# Following functions deal with data push to PowerBI service 
# JobInfo table related functions
def get_job_user(job_id):
    # random result for test
    users = ['Alice', 'Bob', 'Jenny', 'Vencent', 'FAREAST/t-wem', 'Jack']
    return users[random.randint(0, 5)]

    sql_cursor = connect_sql()
    sql_cursor.execute(f"SELECT CredentialId FROM dbo.Job WHERE Id = '{job_id}'")
    row = sql_cursor.fetchone()
    if row == None or row[0] == None:
        return 'Nobody'
    credential_id = row[0]
    sql_cursor.execute(f"SELECT OwnerId FROM dbo.Credential WHERE Id = '{credential_id}'")
    row = sql_cursor.fetchone()
    if row == None or row[0] == None:
        return 'Nobody'
    owner_id = row[0]
    sql_cursor.execute(f"SELECT Name FROM dbo.SID WHERE Id = '{owner_id}'")
    row = sql_cursor.fetchone()
    if row == None or row[0] == None:
        return 'Nobody'
    return row[0]

def update_job_info_table():
    global work_record
    last_id = work_record['JobInfo_LastId']
    sql_cursor = connect_sql()
    sql_cursor.execute("SELECT Id, JobId, RequeueId, SubmitTime, StartTime, EndTime FROM dbo.JobHistory " +
                       f"WHERE Id > '{last_id}' and Event = 1")
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        work_record['JobInfo_LastId'] = row[0]
        save_work_record()
        item = {
            "JobId": row[1],
            "User": get_job_user(row[1]),
            "SubmitDate": row[3].strftime('%Y-%m-%d 00:00:00.0'),
            "SubmitTime": row[3].hour,
            "RequeueCount": row[2],
            "WaitingTime": (row[4] - row[3]).total_seconds(),
            "RunningTime": (row[5] - row[4]).total_seconds()
        }
        post_row('JobInfo', item)
        row = sql_cursor.fetchone()

# JobCost table related functions
def get_node_price(node_id):
    return 1.0

def update_job_cost_for_job(job_id):
    events = []
    sql_cursor = connect_sql()
    sql_cursor.execute("SELECT NodeId, StartTime, EndTime FROM dbo.AllocationHistory " +
                       f"WHERE JobId = {job_id} and TaskId = 0")
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        events.append({"time": (row[1] - epoch).total_seconds() * 1000.0, "node_id": row[0], "change": "add"})
        events.append({"time": (row[2] - epoch).total_seconds() * 1000.0, "node_id": row[0], "change": "sub"})
        row = sql_cursor.fetchone()
    
    item = {
        "JobId": job_id,
        "TimeMs": 0,
        "CoresPrev": 0,
        "CoresCurr": 0,
        "PricePrev": 0.0,
        "PriceCurr": 0.0
    }
    for event in sorted(events, key=lambda k: k['time']):
        if event['time'] != item['TimeMs']:
            if item['TimeMs'] != 0: 
                post_row("JobCost", item)
            item['TimeMs'] = event['time']
            item['CoresPrev'] = item['CoresCurr']
            item['PricePrev'] = item['PriceCurr']
        if event['change'] == 'add':
            item['CoresCurr'] += 1
            item['PriceCurr'] += get_node_price(event['node_id'])
        else:
            item['CoresCurr'] -= 1
            item['PriceCurr'] -= get_node_price(event['node_id'])
    post_row("JobCost", item)

def update_job_cost_table():
    global work_record
    last_id = work_record['JobCost_LastId']
    sql_cursor = connect_sql()
    sql_cursor.execute(f"SELECT Id, JobId FROM dbo.JobHistory WHERE Id > '{last_id}' and Event = 1")
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        work_record['JobCost_LastId'] = row[0]
        save_work_record()
        job_id = row[1]
        update_job_cost_for_job(job_id)
        row = sql_cursor.fetchone()

# TaskBill table related functions
def calculate_task_bill_for_task(task_id, total_requeue_count):
    effected = 0.0
    wasted = 0.0
    sql_cursor = connect_sql()
    sql_cursor.execute("SELECT NodeId, TaskRequeueCount, StartTime, EndTime FROM dbo.AllocationHistory " +
                       f"WHERE TaskId = '{task_id}'")
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        price = get_node_price(row[0])
        duration = (row[3] - row[2]).total_seconds()
        if row[1] == total_requeue_count:
            effected += duration * price
        else:
            wasted += duration * price
        row = sql_cursor.fetchone()
    return {"Effected": effected, "Wasted": wasted}

def update_task_bill_for_job(job_id):
    sql_cursor = connect_sql()
    sql_cursor.execute(f"SELECT Id, RequeueCount FROM dbo.Task WHERE ParentJobId = '{job_id}'")
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        task_id = row[0]
        requeue_count = row[1]
        item = calculate_task_bill_for_task(task_id, requeue_count)
        item['JobId'] = job_id
        item['TaskId'] = task_id
        item['RequeueCount'] = requeue_count
        post_row('TaskBill', item)
        row = sql_cursor.fetchone()

def update_task_bill_table():
    global work_record
    last_id = work_record['TaskBill_LastId']
    sql_cursor = connect_sql()
    sql_cursor.execute(f"SELECT Id, JobId FROM dbo.JobHistory WHERE Id > '{last_id}' and Event = 1")
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        work_record['TaskBill_LastId'] = row[0]
        save_work_record()
        job_id = row[1]
        update_task_bill_for_job(job_id)
        row = sql_cursor.fetchone()

# NodeAllocation table related functions
def get_node_size(node_id):
    sql_cursor = connect_sql()
    sql_cursor.execute(f"SELECT NumCores FROM dbo.Node WHERE Id = '{node_id}'")
    row = sql_cursor.fetchone()
    if row and row[0] != None:
        return row[0]

def update_node_allocation_table():
    global work_record
    last_id = work_record['NodeAllocation_LastId']
    max_start_time = datetime(3000, 1, 1)
    sql_cursor = connect_sql()
    sql_cursor.execute("SELECT MIN(StartTime) FROM dbo.AllocationHistory WHERE EndTime IS NULL")
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        max_start_time = row[0].strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
        row = sql_cursor.fetchone()

    sql_cursor.execute("SELECT Id, NodeId, CoreId, JobId, StartTime, EndTime FROM dbo.AllocationHistory " + \
                       f"WHERE Id > '{last_id}' and StartTime < '{max_start_time}' and TaskId = 0")
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        work_record['NodeAllocation_LastId'] = row[0]
        save_work_record()
        item = {
            "NodeId": row[1], 
            "NodeSize": get_node_size(row[1]),
            "CoreId": row[2],
            "JobId": row[3],
            "StartTimeMs": (row[4] - epoch).total_seconds() * 1000.0,
            "EndTimeMs": (row[5] - epoch).total_seconds() * 1000.0,
            "Time": row[4].strftime('%Y-%m-%d %H:00:00.0')
        }
        post_row('NodeAllocation', item)
        row = sql_cursor.fetchone()

# ClusterUtility table related functions
def calculate_node_utility_for_hour(node_id, right_time):
    capacity = 0.0
    usage = 0.0
    left_time = right_time - timedelta(hours = 1)
    sql_cursor = connect_sql()
    # calculate capacity of the node in term of core*hour
    sql_cursor.execute(f"SELECT NumCores FROM dbo.NodeName WHERE Id = {node_id}")
    row = sql_cursor.fetchone()
    if row == None or row[0] == None:
        return
    node_size = row[0]
    sql_cursor.execute("SELECT Event, EventTime FROM dbo.NodeHistory WHERE " +
                       f"NodeId = {node_id} and (Event = 3 or Event = 4)")
    row = sql_cursor.fetchone()
    previous_time = left_time
    while row and row[0] != None:
        event = row[0]
        current_time = row[1]
        if current_time >= left_time and current_time <= right_time:
            if event == 4:
                capacity += (current_time - previous_time).total_seconds() / 3600 * node_size
            previous_time = current_time
        elif current_time > right_time and previous_time < right_time:
            if event == 4:
                capacity += (right_time - previous_time).total_seconds() / 3600 * node_size
            previous_time = right_time
        row = sql_cursor.fetchone()
    if event == 3:
        capacity += (right_time - previous_time).total_seconds() / 3600 * node_size

    # calculate usage of the node in term of core*hour
    sql_cursor.execute("SELECT StartTime, EndTime FROM dbo.AllocationHistory WHERE " +
                       f"NodeId = {node_id} and TaskId = 0 and " +
                       f"StartTime < '{right_time}' and EndTime > '{left_time}'")
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        start_time = row[0]
        end_time = row[1]
        if start_time < left_time:
            start_time = left_time
        if end_time > right_time:
            end_time = right_time
        usage += (end_time - start_time).total_seconds() / 3600 * 1
        row = sql_cursor.fetchone()
    
    return {"Capacity": capacity, "Usage": usage}

def update_cluster_utility_for_hour(current_time):
    sql_cursor = connect_sql()
    sql_cursor.execute("SELECT DISTINCT(NodeId) FROM dbo.NodeHistory")
    nodes = []
    row = sql_cursor.fetchone()
    while row and row[0] != None:
        nodes.append(row[0])
        row = sql_cursor.fetchone()

    item = {
        "Time": (current_time - timedelta(hours=1)).strftime('%Y-%m-%d %H:00:00.0'),
        "Capacity": 0.0,
        "Usage": 0.0,
        "Utility": 0.0
    }    
    for node in nodes:
        utility = calculate_node_utility_for_hour(node, current_time)
        item['Capacity'] += utility['Capacity']
        item['Usage'] += utility['Usage']
    
    if item['Capacity'] > 0.0:
        item['Utility'] = item['Usage'] / item['Capacity']
    post_row('ClusterUtility', item)

def update_cluster_utility_table():
    global work_record
    last_time = datetime.strptime(work_record['ClusterUtility_LastTime'], '%Y-%m-%d %H:%M:%S.%f')
    sql_cursor = connect_sql()
    sql_cursor.execute("SELECT MIN(StartTime) FROM dbo.AllocationHistory")
    row = sql_cursor.fetchone()
    if row == None or row[0] == None:
        return
    round_time = row[0].replace(microsecond=0, second=0, minute=0)
    if last_time < round_time:
        last_time = round_time

    next_time = last_time + timedelta(hours=1)   
    while True:     
        sql_cursor.execute("SELECT MAX(StartTime) FROM dbo.AllocationHistory")
        row = sql_cursor.fetchone()
        if row == None or row[0] == None or row[0] <= next_time:
            return
        work_record['ClusterUtility_LastTime'] = next_time.strftime('%Y-%m-%d %H:%M:%S.%f')
        save_work_record()
        update_cluster_utility_for_hour(next_time)
        next_time += timedelta(hours=1)


# main loop
if __name__ == '__main__':
    previous_time = datetime.now()
    acquire_token()
    set_dataset_id()
    load_work_record()
    while True:
        update_job_info_table()
        update_job_cost_table()
        update_task_bill_table()
        update_node_allocation_table()
        update_cluster_utility_table()
        print("A turn finished")
        current_time = datetime.now()
        # refresh token every 50 minutes, as it expires at 60 minutes
        if current_time - previous_time > timedelta(minutes=50):
            previous_time = current_time
            refresh_access_token()
        time.sleep(10)
first commit 2018-09-28 11:21:19 +03:00			`import adal`
			`import pyodbc`
			`import requests`
			`import json`
			`from datetime import datetime, timedelta`
			`import time`
			`import random`

			`# Configuration related`
			`USER_ACCOUNT = r'******@microsoft.com'`
			`USER_PASSWORD = r'******'`

			`# PowerBI related configuration`
			`REDIRECT_URI = r'https://login.live.com/oauth20_desktop.srf'`
			`RESOURCE_URI = r'https://analysis.windows.net/powerbi/api'`
			`AUTHORITY_URI = r'https://login.microsoftonline.com/common'`
			`CLIENT_ID = r'******'`

			`REPORT_NAME = r'HpcVisualizationReport'`
			`DATASET_NAME = r'HpcVisualizationDataset'`

			`REPORT_FILE_NAME = r'HpcVisualization.pbix'`
			`DATASET_FILE_NAME = r'HpcDataset.json'`

			`# SQL Sever related configuration`
			`HOST_NAME = r'******'`

			`# Local state related configuration`
			`RECORD_FILE = r'HpcWorkRecord.json'`

			`# Global variables`
			`access_token = None`
			`refresh_token = None`
			`dataset_id = None`
			`work_record = None`
			`epoch = datetime(1970, 1, 1, 8)`

			`# Authentication related functions`
			`def acquire_token():`
			`global access_token`
			`global refresh_token`
			`auth_context = adal.AuthenticationContext(AUTHORITY_URI)`
			`token = auth_context.acquire_token_with_username_password(resource=RESOURCE_URI, client_id=CLIENT_ID, username=USER_ACCOUNT, password=USER_PASSWORD)`
			`access_token = token['accessToken']`
			`refresh_token = token['refreshToken']`
			`print("acuqire_token ok")`

			`def refresh_access_token():`
			`global access_token`
			`global refresh_token`
			`auth_context = adal.AuthenticationContext(AUTHORITY_URI)`
			`token = auth_context.acquire_token_with_refresh_token(refresh_token=refresh_token, client_id=CLIENT_ID, resource=RESOURCE_URI)`
			`access_token = token['accessToken']`
			`refresh_token = token['refreshToken']`
			`print("refresh_token ok")`

			`# PowerBI service related functions`
			`def push_report(report_file_name, report_name):`
			`url = f'https://api.powerbi.com/v1.0/myorg/imports?datasetDisplayName={report_name}'`
			`headers = {'Authorization': f'Bearer {access_token}', 'Content-Type': 'multipart/form-data'}`
			`report_file = open(report_file_name, 'rb')`
			`files = {'file': report_file}`
			`resp = requests.post(url, headers=headers, files = files)`
			`report_file.close()`
			`print(f"push_report {resp}")`

			`url = 'https://api.powerbi.com/v1.0/myorg/reports'`
			`headers = {'Authorization': f'Bearer {access_token}'}`
			`resp = requests.get(url, headers=headers)`
			`reports = resp.json()['value']`
			`for report in reports:`
			`if report['name'] == report_name:`
			`return report['id']`
			`return '0'`

			`def push_dataset(dataset_file_name):`
			`url = 'https://api.powerbi.com/v1.0/myorg/datasets'`
			`headers = {'Authorization': f'Bearer {access_token}', 'Content-Type': 'application/json'}`
			`dataset_file = open(dataset_file_name, 'r')`
			`dataset = json.load(dataset_file)`
			`resp = requests.post(url, headers=headers, json=dataset)`
			`dataset_file.close()`
			`print(f"push_dataset {resp}")`
			`return resp.json()['id']`

			`def rebind_report(report_id, dataset_id):`
			`url = f'https://api.powerbi.com/v1.0/myorg/reports/{report_id}/Rebind'`
			`headers = {'Authorization': f'Bearer {access_token}', 'Content-Type': 'application/json'}`
			`body = {'datasetId': dataset_id}`
			`resp = requests.post(url, headers=headers, json=body)`
			`print(f"rebind_report {resp}")`

			`def set_dataset_id():`
			`global dataset_id`
			`url = 'https://api.powerbi.com/v1.0/myorg/datasets'`
			`headers = {'Authorization': f'Bearer {access_token}'}`
			`resp = requests.get(url, headers=headers)`
			`datasets = resp.json()['value']`
			`for dataset in datasets:`
			`if dataset['name'] == DATASET_NAME:`
			`dataset_id = dataset['id']`
			`print("set_dataset_id ok")`
			`return`

			`# target dataset does not exist, push it`
			`report_id = push_report(REPORT_FILE_NAME, REPORT_NAME)`
			`dataset_id = push_dataset(DATASET_FILE_NAME)`
			`rebind_report(report_id, dataset_id)`
			`print("set_dataset_id ok")`

			`def post_row(table_name, row):`
			`url = f'https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/tables/{table_name}/rows'`
			`headers = {'Authorization': f'Bearer {access_token}'}`
			`body = {"rows": [row]}`
			`print(row)`
			`resp = requests.post(url, headers=headers, json=body)`
			`print(f"post_row to {table_name} {resp}\n")`

			`# SQL server connection`
			`def connect_sql():`
			`connString = 'Driver={SQL Server};' + f'Server={HOST_NAME}; Database=HPCScheduler; Trusted_connection=yes;'`
			`connection = pyodbc.connect(connString)`
			`return connection.cursor()`

			`# Work record in case of crash`
			`def load_work_record():`
			`global work_record`
			`record_file = open(RECORD_FILE, 'r')`
			`work_record = json.load(record_file)`
			`record_file.close()`

			`def save_work_record():`
			`global work_record`
			`record_file = open(RECORD_FILE, 'w')`
			`json.dump(work_record, record_file)`
			`record_file.close()`

			`# Following functions deal with data push to PowerBI service`
			`# JobInfo table related functions`
			`def get_job_user(job_id):`
			`# random result for test`
			`users = ['Alice', 'Bob', 'Jenny', 'Vencent', 'FAREAST/t-wem', 'Jack']`
			`return users[random.randint(0, 5)]`

			`sql_cursor = connect_sql()`
			`sql_cursor.execute(f"SELECT CredentialId FROM dbo.Job WHERE Id = '{job_id}'")`
			`row = sql_cursor.fetchone()`
			`if row == None or row[0] == None:`
			`return 'Nobody'`
			`credential_id = row[0]`
			`sql_cursor.execute(f"SELECT OwnerId FROM dbo.Credential WHERE Id = '{credential_id}'")`
			`row = sql_cursor.fetchone()`
			`if row == None or row[0] == None:`
			`return 'Nobody'`
			`owner_id = row[0]`
			`sql_cursor.execute(f"SELECT Name FROM dbo.SID WHERE Id = '{owner_id}'")`
			`row = sql_cursor.fetchone()`
			`if row == None or row[0] == None:`
			`return 'Nobody'`
			`return row[0]`

			`def update_job_info_table():`
			`global work_record`
			`last_id = work_record['JobInfo_LastId']`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute("SELECT Id, JobId, RequeueId, SubmitTime, StartTime, EndTime FROM dbo.JobHistory " +`
			`f"WHERE Id > '{last_id}' and Event = 1")`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`work_record['JobInfo_LastId'] = row[0]`
			`save_work_record()`
			`item = {`
			`"JobId": row[1],`
			`"User": get_job_user(row[1]),`
			`"SubmitDate": row[3].strftime('%Y-%m-%d 00:00:00.0'),`
			`"SubmitTime": row[3].hour,`
			`"RequeueCount": row[2],`
			`"WaitingTime": (row[4] - row[3]).total_seconds(),`
			`"RunningTime": (row[5] - row[4]).total_seconds()`
			`}`
			`post_row('JobInfo', item)`
			`row = sql_cursor.fetchone()`

			`# JobCost table related functions`
			`def get_node_price(node_id):`
			`return 1.0`

			`def update_job_cost_for_job(job_id):`
			`events = []`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute("SELECT NodeId, StartTime, EndTime FROM dbo.AllocationHistory " +`
			`f"WHERE JobId = {job_id} and TaskId = 0")`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`events.append({"time": (row[1] - epoch).total_seconds() * 1000.0, "node_id": row[0], "change": "add"})`
			`events.append({"time": (row[2] - epoch).total_seconds() * 1000.0, "node_id": row[0], "change": "sub"})`
			`row = sql_cursor.fetchone()`

			`item = {`
			`"JobId": job_id,`
			`"TimeMs": 0,`
			`"CoresPrev": 0,`
			`"CoresCurr": 0,`
			`"PricePrev": 0.0,`
			`"PriceCurr": 0.0`
			`}`
			`for event in sorted(events, key=lambda k: k['time']):`
			`if event['time'] != item['TimeMs']:`
			`if item['TimeMs'] != 0:`
			`post_row("JobCost", item)`
			`item['TimeMs'] = event['time']`
			`item['CoresPrev'] = item['CoresCurr']`
			`item['PricePrev'] = item['PriceCurr']`
			`if event['change'] == 'add':`
			`item['CoresCurr'] += 1`
			`item['PriceCurr'] += get_node_price(event['node_id'])`
			`else:`
			`item['CoresCurr'] -= 1`
			`item['PriceCurr'] -= get_node_price(event['node_id'])`
			`post_row("JobCost", item)`

			`def update_job_cost_table():`
			`global work_record`
			`last_id = work_record['JobCost_LastId']`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute(f"SELECT Id, JobId FROM dbo.JobHistory WHERE Id > '{last_id}' and Event = 1")`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`work_record['JobCost_LastId'] = row[0]`
			`save_work_record()`
			`job_id = row[1]`
			`update_job_cost_for_job(job_id)`
			`row = sql_cursor.fetchone()`

			`# TaskBill table related functions`
			`def calculate_task_bill_for_task(task_id, total_requeue_count):`
			`effected = 0.0`
			`wasted = 0.0`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute("SELECT NodeId, TaskRequeueCount, StartTime, EndTime FROM dbo.AllocationHistory " +`
			`f"WHERE TaskId = '{task_id}'")`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`price = get_node_price(row[0])`
			`duration = (row[3] - row[2]).total_seconds()`
			`if row[1] == total_requeue_count:`
			`effected += duration * price`
			`else:`
			`wasted += duration * price`
			`row = sql_cursor.fetchone()`
			`return {"Effected": effected, "Wasted": wasted}`

			`def update_task_bill_for_job(job_id):`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute(f"SELECT Id, RequeueCount FROM dbo.Task WHERE ParentJobId = '{job_id}'")`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`task_id = row[0]`
			`requeue_count = row[1]`
			`item = calculate_task_bill_for_task(task_id, requeue_count)`
			`item['JobId'] = job_id`
			`item['TaskId'] = task_id`
			`item['RequeueCount'] = requeue_count`
			`post_row('TaskBill', item)`
			`row = sql_cursor.fetchone()`

			`def update_task_bill_table():`
			`global work_record`
			`last_id = work_record['TaskBill_LastId']`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute(f"SELECT Id, JobId FROM dbo.JobHistory WHERE Id > '{last_id}' and Event = 1")`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`work_record['TaskBill_LastId'] = row[0]`
			`save_work_record()`
			`job_id = row[1]`
			`update_task_bill_for_job(job_id)`
			`row = sql_cursor.fetchone()`

			`# NodeAllocation table related functions`
			`def get_node_size(node_id):`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute(f"SELECT NumCores FROM dbo.Node WHERE Id = '{node_id}'")`
			`row = sql_cursor.fetchone()`
			`if row and row[0] != None:`
			`return row[0]`

			`def update_node_allocation_table():`
			`global work_record`
			`last_id = work_record['NodeAllocation_LastId']`
			`max_start_time = datetime(3000, 1, 1)`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute("SELECT MIN(StartTime) FROM dbo.AllocationHistory WHERE EndTime IS NULL")`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`max_start_time = row[0].strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]`
			`row = sql_cursor.fetchone()`

			`sql_cursor.execute("SELECT Id, NodeId, CoreId, JobId, StartTime, EndTime FROM dbo.AllocationHistory " + \`
			`f"WHERE Id > '{last_id}' and StartTime < '{max_start_time}' and TaskId = 0")`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`work_record['NodeAllocation_LastId'] = row[0]`
			`save_work_record()`
			`item = {`
			`"NodeId": row[1],`
			`"NodeSize": get_node_size(row[1]),`
			`"CoreId": row[2],`
			`"JobId": row[3],`
			`"StartTimeMs": (row[4] - epoch).total_seconds() * 1000.0,`
			`"EndTimeMs": (row[5] - epoch).total_seconds() * 1000.0,`
			`"Time": row[4].strftime('%Y-%m-%d %H:00:00.0')`
			`}`
			`post_row('NodeAllocation', item)`
			`row = sql_cursor.fetchone()`

			`# ClusterUtility table related functions`
			`def calculate_node_utility_for_hour(node_id, right_time):`
			`capacity = 0.0`
			`usage = 0.0`
			`left_time = right_time - timedelta(hours = 1)`
			`sql_cursor = connect_sql()`
			`# calculate capacity of the node in term of core*hour`
			`sql_cursor.execute(f"SELECT NumCores FROM dbo.NodeName WHERE Id = {node_id}")`
			`row = sql_cursor.fetchone()`
			`if row == None or row[0] == None:`
			`return`
			`node_size = row[0]`
			`sql_cursor.execute("SELECT Event, EventTime FROM dbo.NodeHistory WHERE " +`
			`f"NodeId = {node_id} and (Event = 3 or Event = 4)")`
			`row = sql_cursor.fetchone()`
			`previous_time = left_time`
			`while row and row[0] != None:`
			`event = row[0]`
			`current_time = row[1]`
			`if current_time >= left_time and current_time <= right_time:`
			`if event == 4:`
			`capacity += (current_time - previous_time).total_seconds() / 3600 * node_size`
			`previous_time = current_time`
			`elif current_time > right_time and previous_time < right_time:`
			`if event == 4:`
			`capacity += (right_time - previous_time).total_seconds() / 3600 * node_size`
			`previous_time = right_time`
			`row = sql_cursor.fetchone()`
			`if event == 3:`
			`capacity += (right_time - previous_time).total_seconds() / 3600 * node_size`

			`# calculate usage of the node in term of core*hour`
			`sql_cursor.execute("SELECT StartTime, EndTime FROM dbo.AllocationHistory WHERE " +`
			`f"NodeId = {node_id} and TaskId = 0 and " +`
			`f"StartTime < '{right_time}' and EndTime > '{left_time}'")`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`start_time = row[0]`
			`end_time = row[1]`
			`if start_time < left_time:`
			`start_time = left_time`
			`if end_time > right_time:`
			`end_time = right_time`
			`usage += (end_time - start_time).total_seconds() / 3600 * 1`
			`row = sql_cursor.fetchone()`

			`return {"Capacity": capacity, "Usage": usage}`

			`def update_cluster_utility_for_hour(current_time):`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute("SELECT DISTINCT(NodeId) FROM dbo.NodeHistory")`
			`nodes = []`
			`row = sql_cursor.fetchone()`
			`while row and row[0] != None:`
			`nodes.append(row[0])`
			`row = sql_cursor.fetchone()`

			`item = {`
			`"Time": (current_time - timedelta(hours=1)).strftime('%Y-%m-%d %H:00:00.0'),`
			`"Capacity": 0.0,`
			`"Usage": 0.0,`
			`"Utility": 0.0`
			`}`
			`for node in nodes:`
			`utility = calculate_node_utility_for_hour(node, current_time)`
			`item['Capacity'] += utility['Capacity']`
			`item['Usage'] += utility['Usage']`

			`if item['Capacity'] > 0.0:`
			`item['Utility'] = item['Usage'] / item['Capacity']`
			`post_row('ClusterUtility', item)`

			`def update_cluster_utility_table():`
			`global work_record`
			`last_time = datetime.strptime(work_record['ClusterUtility_LastTime'], '%Y-%m-%d %H:%M:%S.%f')`
			`sql_cursor = connect_sql()`
			`sql_cursor.execute("SELECT MIN(StartTime) FROM dbo.AllocationHistory")`
			`row = sql_cursor.fetchone()`
			`if row == None or row[0] == None:`
			`return`
			`round_time = row[0].replace(microsecond=0, second=0, minute=0)`
			`if last_time < round_time:`
			`last_time = round_time`

			`next_time = last_time + timedelta(hours=1)`
			`while True:`
			`sql_cursor.execute("SELECT MAX(StartTime) FROM dbo.AllocationHistory")`
			`row = sql_cursor.fetchone()`
			`if row == None or row[0] == None or row[0] <= next_time:`
			`return`
			`work_record['ClusterUtility_LastTime'] = next_time.strftime('%Y-%m-%d %H:%M:%S.%f')`
			`save_work_record()`
			`update_cluster_utility_for_hour(next_time)`
			`next_time += timedelta(hours=1)`


			`# main loop`
			`if __name__ == '__main__':`
			`previous_time = datetime.now()`
			`acquire_token()`
			`set_dataset_id()`
			`load_work_record()`
			`while True:`
			`update_job_info_table()`
			`update_job_cost_table()`
			`update_task_bill_table()`
			`update_node_allocation_table()`
			`update_cluster_utility_table()`
			`print("A turn finished")`
			`current_time = datetime.now()`
			`# refresh token every 50 minutes, as it expires at 60 minutes`
			`if current_time - previous_time > timedelta(minutes=50):`
			`previous_time = current_time`
			`refresh_access_token()`
			`time.sleep(10)`