443 строки
16 KiB
Python
Executable File
443 строки
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Copyright (c) Microsoft Corporation
|
|
#
|
|
# All rights reserved.
|
|
#
|
|
# MIT License
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
# copy of this software and associated documentation files (the "Software"),
|
|
# to deal in the Software without restriction, including without limitation
|
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
# and/or sell copies of the Software, and to permit persons to whom the
|
|
# Software is furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
# DEALINGS IN THE SOFTWARE.
|
|
|
|
# stdlib imports
|
|
import argparse
|
|
import copy
|
|
import datetime
|
|
import json
|
|
import pathlib
|
|
import subprocess
|
|
import sys
|
|
# non-stdlib imports
|
|
import azure.cosmosdb.table as azuretable
|
|
|
|
# global defines
|
|
_PARTITION_KEY = None
|
|
_TABLE_NAME = None
|
|
|
|
|
|
def _create_credentials(config: dict) -> azuretable.TableService:
|
|
"""Create authenticated clients
|
|
:param dict config: configuration dict
|
|
:rtype: azure.cosmosdb.table.TableService
|
|
:return: table client
|
|
"""
|
|
global _PARTITION_KEY, _TABLE_NAME
|
|
_PARTITION_KEY = '{}${}'.format(
|
|
config['credentials']['batch']['account'],
|
|
config['pool_specification']['id'])
|
|
try:
|
|
sep = config['batch_shipyard']['storage_entity_prefix']
|
|
if sep is None or len(sep) == 0:
|
|
raise KeyError()
|
|
except KeyError:
|
|
sep = 'shipyard'
|
|
_TABLE_NAME = sep + 'perf'
|
|
ssel = config['batch_shipyard']['storage_account_settings']
|
|
table_client = azuretable.TableService(
|
|
account_name=config['credentials']['storage'][ssel]['account'],
|
|
account_key=config['credentials']['storage'][ssel]['account_key'],
|
|
endpoint_suffix=config['credentials']['storage'][ssel]['endpoint'])
|
|
return table_client
|
|
|
|
|
|
def _compute_delta_t(
|
|
data: dict, nodeid: str, event1: str, event1_pos: int, event2: str,
|
|
event2_pos: int) -> float:
|
|
"""Compute time delta between two events
|
|
:param dict data: data
|
|
:param str nodeid: node id
|
|
:param str event1: event1
|
|
:param int event1_pos: event1 position in stream
|
|
:param str event2: event2
|
|
:param int event2_pos: event2 position in stream
|
|
:rtype: float
|
|
:return: delta t of events
|
|
"""
|
|
# attempt to get directly recorded diff
|
|
try:
|
|
return data[nodeid][event2][event2_pos]['message']['diff']
|
|
except (TypeError, KeyError):
|
|
return (data[nodeid][event2][event2_pos]['timestamp'] -
|
|
data[nodeid][event1][event1_pos]['timestamp']).total_seconds()
|
|
|
|
|
|
def _parse_message(event: str, msg: str) -> dict:
|
|
"""Parse message
|
|
:param str event: event
|
|
:param str msg: message
|
|
:rtype: dict
|
|
:return: dict of message entries
|
|
"""
|
|
parts = msg.split(',')
|
|
m = {}
|
|
for part in parts:
|
|
tmp = part.split('=')
|
|
if tmp[0] == 'size':
|
|
if event == 'cascade:pull-end':
|
|
sz = tmp[1].split()
|
|
sz[0] = float(sz[0])
|
|
if sz[1] == 'kB':
|
|
sz[0] *= 1024
|
|
elif sz[1] == 'MB':
|
|
sz[0] *= 1024 * 1024
|
|
elif sz[1] == 'GB':
|
|
sz[0] *= 1024 * 1024 * 1024
|
|
elif sz[1] == 'TB':
|
|
sz[0] *= 1024 * 1024 * 1024 * 1024
|
|
tmp[1] = sz[0]
|
|
m[tmp[0]] = int(tmp[1])
|
|
elif tmp[0] == 'nglobalresources':
|
|
m[tmp[0]] = int(tmp[1])
|
|
elif tmp[0] == 'diff':
|
|
m[tmp[0]] = float(tmp[1])
|
|
else:
|
|
m[tmp[0]] = tmp[1]
|
|
return m
|
|
|
|
|
|
def _diff_events(
|
|
data: dict, nodeid: str, event: str, end_event: str, timing: dict,
|
|
prefix: str, sizes: dict = None) -> None:
|
|
"""Diff start and end event
|
|
:param dict data: data
|
|
:param str nodeid: node id
|
|
:param str event: start event
|
|
:param str end_event: end event
|
|
:param dict timing: timing dict
|
|
:param str prefix: prefix
|
|
:param dict sizes: sizes dict
|
|
"""
|
|
for i in range(0, len(data[nodeid][event])):
|
|
# torrent start -> load start may not always exist due to pull
|
|
if (event == 'cascade:torrent-start' and
|
|
end_event == 'cascade:load-start' and
|
|
end_event not in data[nodeid]):
|
|
return
|
|
# find end event for this img
|
|
subevent = data[nodeid][event][i]
|
|
img = subevent['message']['img']
|
|
found = False
|
|
for j in range(0, len(data[nodeid][end_event])):
|
|
pei = data[
|
|
nodeid][end_event][j]['message']['img']
|
|
if pei == img:
|
|
timing[prefix + img] = _compute_delta_t(
|
|
data, nodeid, event, i, end_event, j)
|
|
if sizes is not None and img not in sizes:
|
|
try:
|
|
if event == 'cascade:load-start':
|
|
sizes[img] = data[
|
|
nodeid][event][j]['message']['size']
|
|
else:
|
|
sizes[img] = data[
|
|
nodeid][end_event][j]['message']['size']
|
|
except KeyError:
|
|
pass
|
|
found = True
|
|
break
|
|
if not found and event != 'cascade:torrent-start':
|
|
raise RuntimeError(
|
|
'could not find corresponding event for {}:{}'.format(
|
|
event, img))
|
|
|
|
|
|
def coalesce_data(table_client: azuretable.TableService) -> tuple:
|
|
"""Coalesce perf data from table
|
|
:param azure.cosmosdb.table.TableService table_client: table client
|
|
:rtype: tuple
|
|
:return: (timing, sizes, offer, sku)
|
|
"""
|
|
print('graphing data from {} with pk={}'.format(
|
|
_TABLE_NAME, _PARTITION_KEY))
|
|
entities = table_client.query_entities(
|
|
_TABLE_NAME, filter='PartitionKey eq \'{}\''.format(_PARTITION_KEY))
|
|
data = {}
|
|
# process events
|
|
for ent in entities:
|
|
nodeid = ent['NodeId']
|
|
event = ent['Event']
|
|
if nodeid not in data:
|
|
data[nodeid] = {}
|
|
if event not in data[nodeid]:
|
|
data[nodeid][event] = []
|
|
ev = {
|
|
'timestamp': datetime.datetime.fromtimestamp(
|
|
float(ent['RowKey'])),
|
|
}
|
|
try:
|
|
ev['message'] = _parse_message(event, ent['Message'])
|
|
except KeyError:
|
|
ev['message'] = None
|
|
data[nodeid][event].append(ev)
|
|
del entities
|
|
sizes = {}
|
|
offer = None
|
|
sku = None
|
|
for nodeid in data:
|
|
if offer is None:
|
|
offer = data[nodeid]['nodeprep:start'][0]['message']['offer']
|
|
sku = data[nodeid]['nodeprep:start'][0]['message']['sku']
|
|
# calculate dt timings
|
|
timing = {
|
|
'nodeprep': _compute_delta_t(
|
|
data, nodeid, 'nodeprep:start', 0, 'nodeprep:end', 0),
|
|
'global_resources_loaded': _compute_delta_t(
|
|
data, nodeid, 'cascade:start', 0, 'cascade:gr-done', 0),
|
|
}
|
|
try:
|
|
timing['docker_install'] = _compute_delta_t(
|
|
data, nodeid, 'nodeprep:start', 0, 'privateregistry:start', 0)
|
|
except KeyError:
|
|
# when no private registry setup exists, install time is
|
|
# equivalent to nodeprep time
|
|
timing['docker_install'] = timing['nodeprep']
|
|
try:
|
|
timing['private_registry_setup'] = _compute_delta_t(
|
|
data, nodeid, 'privateregistry:start', 0,
|
|
'privateregistry:end', 0)
|
|
except KeyError:
|
|
timing['private_registry_setup'] = 0
|
|
try:
|
|
timing['docker_shipyard_container_pull'] = _compute_delta_t(
|
|
data, nodeid, 'shipyard:pull-start', 0,
|
|
'shipyard:pull-end', 0)
|
|
except KeyError:
|
|
timing['docker_shipyard_container_pull'] = 0
|
|
data[nodeid]['start'] = data[
|
|
nodeid]['nodeprep:start'][0]['timestamp'].timestamp()
|
|
data[nodeid].pop('nodeprep:start')
|
|
data[nodeid].pop('nodeprep:end')
|
|
data[nodeid].pop('privateregistry:start', None)
|
|
data[nodeid].pop('privateregistry:end', None)
|
|
data[nodeid].pop('shipyard:pull-start', None)
|
|
data[nodeid].pop('shipyard:pull-end', None)
|
|
data[nodeid].pop('cascade:start')
|
|
data[nodeid].pop('cascade:gr-done')
|
|
for event in data[nodeid]:
|
|
# print(event, data[nodeid][event])
|
|
if event == 'cascade:pull-start':
|
|
_diff_events(
|
|
data, nodeid, event, 'cascade:pull-end', timing, 'pull:',
|
|
sizes)
|
|
elif event == 'cascade:save-start':
|
|
_diff_events(
|
|
data, nodeid, event, 'cascade:save-end', timing, 'save:',
|
|
sizes)
|
|
elif event == 'cascade:torrent-start':
|
|
_diff_events(
|
|
data, nodeid, event, 'cascade:load-start', timing,
|
|
'torrent:')
|
|
elif event == 'cascade:load-start':
|
|
_diff_events(
|
|
data, nodeid, event, 'cascade:load-end', timing,
|
|
'load:', sizes)
|
|
data[nodeid].pop('cascade:pull-start', None)
|
|
data[nodeid].pop('cascade:pull-end', None)
|
|
data[nodeid].pop('cascade:save-start', None)
|
|
data[nodeid].pop('cascade:save-end', None)
|
|
data[nodeid].pop('cascade:torrent-start', None)
|
|
data[nodeid].pop('cascade:load-start', None)
|
|
data[nodeid].pop('cascade:load-end', None)
|
|
data[nodeid]['timing'] = timing
|
|
return data, sizes, offer, sku
|
|
|
|
|
|
def graph_data(data: dict, sizes: dict, offer: str, sku: str):
|
|
"""Graph data via gnuplot
|
|
:param dict data: timing data
|
|
:param dict sizes: size data
|
|
:param str offer: offer
|
|
:param str sku: sku
|
|
"""
|
|
print(sizes)
|
|
# create data file
|
|
dat_fname = _PARTITION_KEY.replace('$', '-') + '.dat'
|
|
mintime = float(sys.maxsize)
|
|
maxtime = 0.0
|
|
rdata = {}
|
|
for nodeid in data:
|
|
start = data[nodeid]['start']
|
|
if start in rdata:
|
|
raise RuntimeError('cannot create reverse mapping')
|
|
rdata[start] = nodeid
|
|
if start < mintime:
|
|
mintime = start
|
|
if start > maxtime:
|
|
maxtime = start
|
|
print('nodeready variance:', maxtime - mintime)
|
|
total_gr = 0
|
|
total_ac = 0
|
|
with open(dat_fname, 'w') as f:
|
|
f.write(
|
|
'NodePrepStartTime NodeId NodePrep+DockerInstall '
|
|
'PrivateRegistrySetup ShipyardContainerPull GlobalResourcesLoad '
|
|
'TotalPull TotalSave TotalLoad TotalTorrent\n')
|
|
for start in sorted(rdata):
|
|
nodeid = rdata[start]
|
|
pull = 0
|
|
save = 0
|
|
load = 0
|
|
torrent = 0
|
|
for event in data[nodeid]['timing']:
|
|
if event.startswith('pull:'):
|
|
pull += data[nodeid]['timing'][event]
|
|
elif event.startswith('save:'):
|
|
save += data[nodeid]['timing'][event]
|
|
elif event.startswith('load:'):
|
|
load += data[nodeid]['timing'][event]
|
|
elif event.startswith('torrent:'):
|
|
torrent += data[nodeid]['timing'][event]
|
|
acquisition = pull + torrent + load
|
|
total_ac += acquisition
|
|
print(nodeid, data[nodeid]['timing'])
|
|
f.write(
|
|
('{0} {1} {2} {3} {4} {5} {6:.5f} {7:.5f} {8:.5f} '
|
|
'{9:.5f}\n').format(
|
|
datetime.datetime.fromtimestamp(start).strftime(
|
|
'%Y-%m-%d-%H:%M:%S.%f'),
|
|
nodeid,
|
|
data[nodeid]['timing']['docker_install'],
|
|
data[nodeid]['timing']['private_registry_setup'],
|
|
data[nodeid]['timing']['docker_shipyard_container_pull'],
|
|
data[nodeid]['timing']['global_resources_loaded'],
|
|
pull,
|
|
save,
|
|
load,
|
|
torrent)
|
|
)
|
|
total_gr += data[nodeid]['timing']['global_resources_loaded']
|
|
print('total gr: {} avg: {}'.format(total_gr, total_gr / len(data)))
|
|
print('total acq: {} avg: {}'.format(total_ac, total_ac / len(data)))
|
|
# create plot file
|
|
plot_fname = _PARTITION_KEY.replace('$', '-') + '.plot'
|
|
with open(plot_fname, 'w') as f:
|
|
f.write('set terminal pngcairo enhanced transparent crop\n')
|
|
f.write(
|
|
('set title "Shipyard Performance for {} ({} {})" '
|
|
'font ", 10" \n').format(
|
|
_PARTITION_KEY.split('$')[-1], offer, sku))
|
|
f.write(
|
|
'set key top right horizontal autotitle columnhead '
|
|
'font ", 7"\n')
|
|
f.write('set xtics rotate by 45 right font ", 7"\n')
|
|
f.write('set ytics font ", 8"\n')
|
|
f.write('set xlabel "Node Prep Start Time" font ", 8"\n')
|
|
f.write('set ylabel "Seconds" font ", 8"\n')
|
|
f.write('set format x "%H:%M:%.3S"\n')
|
|
f.write('set xdata time\n')
|
|
f.write('set timefmt "%Y-%m-%d-%H:%M:%S"\n')
|
|
f.write('set style fill solid\n')
|
|
f.write('set boxwidth {0:.5f} absolute\n'.format(
|
|
(maxtime - mintime) / 100.0))
|
|
f.write('plot "{}" using 1:($3+$4+$5+$6) with boxes, \\\n'.format(
|
|
dat_fname))
|
|
f.write('\t"" using 1:($3+$4+$5) with boxes, \\\n')
|
|
f.write('\t"" using 1:($3+$4) with boxes, \\\n')
|
|
f.write('\t"" using 1:3 with boxes\n')
|
|
png_fname = _PARTITION_KEY.replace('$', '-') + '.png'
|
|
subprocess.check_call(
|
|
'gnuplot {} > {}'.format(plot_fname, png_fname), shell=True)
|
|
|
|
|
|
def merge_dict(dict1: dict, dict2: dict) -> dict:
|
|
"""Recursively merge dictionaries: dict2 on to dict1. This differs
|
|
from dict.update() in that values that are dicts are recursively merged.
|
|
Note that only dict value types are merged, not lists, etc.
|
|
|
|
:param dict dict1: dictionary to merge to
|
|
:param dict dict2: dictionary to merge with
|
|
:rtype: dict
|
|
:return: merged dictionary
|
|
"""
|
|
if not isinstance(dict1, dict) or not isinstance(dict2, dict):
|
|
raise ValueError('dict1 or dict2 is not a dictionary')
|
|
result = copy.deepcopy(dict1)
|
|
for k, v in dict2.items():
|
|
if k in result and isinstance(result[k], dict):
|
|
result[k] = merge_dict(result[k], v)
|
|
else:
|
|
result[k] = copy.deepcopy(v)
|
|
return result
|
|
|
|
|
|
def main():
|
|
"""Main function"""
|
|
# get command-line args
|
|
args = parseargs()
|
|
|
|
if args.configdir is not None:
|
|
if args.credentials is None:
|
|
args.credentials = str(
|
|
pathlib.Path(args.configdir, 'credentials.json'))
|
|
if args.config is None:
|
|
args.config = str(pathlib.Path(args.configdir, 'config.json'))
|
|
if args.pool is None:
|
|
args.pool = str(pathlib.Path(args.configdir, 'pool.json'))
|
|
|
|
if args.credentials is None:
|
|
raise ValueError('credentials json not specified')
|
|
if args.config is None:
|
|
raise ValueError('config json not specified')
|
|
if args.pool is None:
|
|
raise ValueError('pool json not specified')
|
|
|
|
with open(args.credentials, 'r') as f:
|
|
config = json.load(f)
|
|
with open(args.config, 'r') as f:
|
|
config = merge_dict(config, json.load(f))
|
|
with open(args.pool, 'r') as f:
|
|
config = merge_dict(config, json.load(f))
|
|
|
|
# create storage credentials
|
|
table_client = _create_credentials(config)
|
|
# graph data
|
|
data, sizes, offer, sku = coalesce_data(table_client)
|
|
graph_data(data, sizes, offer, sku)
|
|
|
|
|
|
def parseargs():
|
|
"""Parse program arguments
|
|
:rtype: argparse.Namespace
|
|
:return: parsed arguments
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
description='Batch Shipyard perf graph generator')
|
|
parser.add_argument(
|
|
'--configdir', help='json config dir')
|
|
parser.add_argument(
|
|
'--credentials', help='credentials json config')
|
|
parser.add_argument(
|
|
'--config', help='general json config for option')
|
|
parser.add_argument(
|
|
'--pool', help='pool json config')
|
|
return parser.parse_args()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|