Move blocking pull/save/load to thread code

This commit is contained in:
Fred Park 2016-08-15 15:16:45 -07:00
Родитель d4c44f811c
Коммит d29349f7ec
2 изменённых файлов: 292 добавлений и 209 удалений

Просмотреть файл

@ -9,7 +9,10 @@ import hashlib
import os import os
import pathlib import pathlib
import random import random
import subprocess
import sys import sys
import threading
import time
# non-stdlib imports # non-stdlib imports
import azure.common import azure.common
import azure.storage.blob as azureblob import azure.storage.blob as azureblob
@ -35,9 +38,17 @@ _NODEID = os.environ['AZ_BATCH_NODE_ID']
_SHARED_DIR = os.environ['AZ_BATCH_NODE_SHARED_DIR'] _SHARED_DIR = os.environ['AZ_BATCH_NODE_SHARED_DIR']
_TORRENT_DIR = pathlib.Path(_SHARED_DIR, '.torrents') _TORRENT_DIR = pathlib.Path(_SHARED_DIR, '.torrents')
_PARTITION_KEY = '{}${}'.format(_BATCHACCOUNT, _POOLID) _PARTITION_KEY = '{}${}'.format(_BATCHACCOUNT, _POOLID)
_LR_LOCK_ASYNC = asyncio.Lock()
_PT_LOCK = threading.Lock()
_DIRECTDL_LOCK = threading.Lock()
_SELF_REGISTRY_PTR = None
_ENABLE_P2P = True
_NON_P2P_CONCURRENT_DOWNLOADING = True
_REGISTRIES = {}
# mutable global state # mutable global state
_CBHANDLES = {} _CBHANDLES = {}
_QUEUE_MESSAGES = {} _QUEUE_MESSAGES = {}
_DHT_ROUTERS = []
_PREFIX = None _PREFIX = None
_STORAGE_CONTAINERS = { _STORAGE_CONTAINERS = {
'table_dht': None, 'table_dht': None,
@ -47,16 +58,12 @@ _STORAGE_CONTAINERS = {
'table_globalresources': None, 'table_globalresources': None,
'queue_globalresources': None, 'queue_globalresources': None,
} }
_SELF_REGISTRY_PTR = None
_REGISTRIES = {}
_TORRENTS = {} _TORRENTS = {}
_PENDING_TORRENTS = {}
_TORRENT_REVERSE_LOOKUP = {} _TORRENT_REVERSE_LOOKUP = {}
_DIRECTDL = {} _DIRECTDL = []
_DHT_ROUTERS = [] _DIRECTDL_DOWNLOADING = []
_LR_LOCK_ASYNC = asyncio.Lock()
_GR_DONE = False _GR_DONE = False
_ENABLE_P2P = True
_NON_P2P_CONCURRENT_DOWNLOADING = True
def _setup_container_names(sep: str): def _setup_container_names(sep: str):
@ -97,9 +104,10 @@ def _create_credentials() -> tuple:
return blob_client, queue_client, table_client return blob_client, queue_client, table_client
def generate_torrent(incl_file: pathlib.Path) -> dict: def generate_torrent(incl_file: pathlib.Path, resource_hash: str) -> dict:
"""Generate torrent file for a given file and write it to disk """Generate torrent file for a given file and write it to disk
:param pathlib.Path incl_file: file to include in torrent :param pathlib.Path incl_file: file to include in torrent
:param str resource_hash: resource hash
:rtype: tuple :rtype: tuple
:return: (torrent file as pathlib, torrent file encoded as base64, :return: (torrent file as pathlib, torrent file encoded as base64,
torrent file data sha1 hash) torrent file data sha1 hash)
@ -113,7 +121,7 @@ def generate_torrent(incl_file: pathlib.Path) -> dict:
torrent_data = libtorrent.bencode(torrent) torrent_data = libtorrent.bencode(torrent)
torrent_b64 = base64.b64encode(torrent_data).decode('ascii') torrent_b64 = base64.b64encode(torrent_data).decode('ascii')
torrent_sha1 = hashlib.sha1(torrent_data).hexdigest() torrent_sha1 = hashlib.sha1(torrent_data).hexdigest()
fp = _TORRENT_DIR / '{}.torrent'.format(torrent_sha1) fp = _TORRENT_DIR / '{}.torrent'.format(resource_hash)
with fp.open('wb') as f: with fp.open('wb') as f:
f.write(torrent_data) f.write(torrent_data)
return fp, torrent_b64, torrent_sha1 return fp, torrent_b64, torrent_sha1
@ -160,8 +168,6 @@ def _renew_queue_message_lease(
:param str queue_key: queue name key index into _STORAGE_CONTAINERS :param str queue_key: queue name key index into _STORAGE_CONTAINERS
:param str msg_id: message id :param str msg_id: message id
""" """
print('updating queue message id={} pr={}'.format(
msg_id, _QUEUE_MESSAGES[msg_id].pop_receipt))
msg = queue_client.update_message( msg = queue_client.update_message(
_STORAGE_CONTAINERS[queue_key], _STORAGE_CONTAINERS[queue_key],
message_id=msg_id, message_id=msg_id,
@ -172,8 +178,6 @@ def _renew_queue_message_lease(
'update message failed for id={} pr={}'.format( 'update message failed for id={} pr={}'.format(
msg_id, _QUEUE_MESSAGES[msg_id].pop_receipt)) msg_id, _QUEUE_MESSAGES[msg_id].pop_receipt))
_QUEUE_MESSAGES[msg_id].pop_receipt = msg.pop_receipt _QUEUE_MESSAGES[msg_id].pop_receipt = msg.pop_receipt
print('queue message updated id={} pr={}'.format(
msg_id, _QUEUE_MESSAGES[msg_id].pop_receipt))
_CBHANDLES[queue_key] = loop.call_later( _CBHANDLES[queue_key] = loop.call_later(
15, _renew_queue_message_lease, loop, queue_client, queue_key, msg_id) 15, _renew_queue_message_lease, loop, queue_client, queue_key, msg_id)
@ -210,33 +214,168 @@ async def _record_perf_async(loop, event, message):
print('could not record perf to storage for event: {}'.format(event)) print('could not record perf to storage for event: {}'.format(event))
def _record_perf(event, message):
subprocess.check_call(
'perf.py cascade {ev} --prefix {pr} --message "{msg}"'.format(
ev=event, pr=_PREFIX, msg=message), shell=True)
class DockerSaveThread(threading.Thread):
def __init__(self, queue_client, resource, msg_id):
threading.Thread.__init__(self)
self.queue_client = queue_client
self.resource = resource
self.msg_id = msg_id
with _DIRECTDL_LOCK:
_DIRECTDL_DOWNLOADING.append(self.resource)
def run(self):
file = None
resource_hash = hashlib.sha1(self.resource.encode('utf8')).hexdigest()
if self.resource.startswith(_DOCKER_TAG):
if len(_REGISTRIES) < 1:
raise RuntimeError(
('{} image specified for global resource, but there are '
'no registries available').format(self.resource))
image = self.resource[
self.resource.find(_DOCKER_TAG) + len(_DOCKER_TAG):]
registry = None
_record_perf('pull-start', 'img={}'.format(image))
start = datetime.datetime.now()
while True:
# pick random registry to download from
registry = _REGISTRIES[_pick_random_registry_key()]
print('pulling image {} from {}'.format(image, registry))
if registry == 'registry.hub.docker.com':
proc = subprocess.Popen(
'docker pull {}'.format(image), shell=True)
else:
proc = subprocess.Popen(
'docker pull {}/{}'.format(registry, image),
shell=True)
proc.wait()
if proc.returncode == 0:
break
else:
print('docker pull non-zero rc: {}'.format(
proc.returncode))
time.sleep(1)
# tag image to remove registry ip
if registry != 'registry.hub.docker.com':
subprocess.check_call(
'docker tag {}/{} {}'.format(registry, image, image),
shell=True)
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec to pull docker image {} from {}'.format(
diff, image, registry))
_record_perf('pull-end', 'img={},diff={}'.format(image, diff))
# save docker image to seed to torrent
if _ENABLE_P2P:
_record_perf('save-start', 'img={}'.format(image))
start = datetime.datetime.now()
file = _TORRENT_DIR / '{}.tar.gz'.format(resource_hash)
print('saving docker image {} to {} for seeding'.format(
image, file))
subprocess.check_call(
'docker save {} | gzip -c > {}'.format(image, file),
shell=True)
print('docker image {} saved for seeding'.format(image))
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec to save docker image {} to {}'.format(
diff, image, file.parent))
_record_perf('save-end', 'img={},size={},diff={}'.format(
image, file.stat().st_size, diff))
else:
# TODO download via blob, explode uri to get container/blob
# use download to path into /tmp and move to _TORRENT_DIR
raise NotImplementedError()
# generate torrent file
if _ENABLE_P2P:
start = datetime.datetime.now()
torrent_file, torrent_b64, torrent_sha1 = generate_torrent(
file, resource_hash)
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec to generate torrent file: {}'.format(
diff, torrent_file))
start = datetime.datetime.now()
# add to torrent dict (effectively enqueues for torrent start)
entity = {
'PartitionKey': _PARTITION_KEY,
'RowKey': resource_hash,
'Resource': self.resource,
'TorrentFileBase64': torrent_b64,
'TorrentFileSHA1': torrent_sha1,
'FileSizeBytes': file.stat().st_size,
# 'FileSHA1': compute_sha1_for_file(file),
}
with _PT_LOCK:
_PENDING_TORRENTS[self.resource] = {
'entity': entity,
'torrent_file': torrent_file,
'started': False,
'seed': True,
'loaded': True,
'loading': False,
'registered': False,
}
_TORRENT_REVERSE_LOOKUP[resource_hash] = self.resource
# wait until torrent has started
print('waiting for torrent {} to start'.format(self.resource))
while (self.resource not in _TORRENTS or
not _TORRENTS[self.resource]['started']):
time.sleep(0.1)
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec for {} torrent to start'.format(
diff, self.resource))
# cancel callback
if _ENABLE_P2P or not _NON_P2P_CONCURRENT_DOWNLOADING:
_CBHANDLES['queue_globalresources'].cancel()
_CBHANDLES.pop('queue_globalresources')
# release queue message
self.queue_client.update_message(
_STORAGE_CONTAINERS['queue_globalresources'],
message_id=self.msg_id,
pop_receipt=_QUEUE_MESSAGES[self.msg_id].pop_receipt,
visibility_timeout=0)
_QUEUE_MESSAGES.pop(self.msg_id)
print('queue message released for {}'.format(self.resource))
# remove from downloading list
with _DIRECTDL_LOCK:
_DIRECTDL_DOWNLOADING.remove(self.resource)
_DIRECTDL.remove(self.resource)
async def _direct_download_resources_async( async def _direct_download_resources_async(
loop, blob_client, queue_client, table_client, ipaddress): loop, blob_client, queue_client, table_client, ipaddress):
# iterate through downloads to see if there are any torrents available # iterate through downloads to see if there are any torrents available
# TODO allow multiple downloads with _DIRECTDL_LOCK:
rmdl = [] if len(_DIRECTDL) == 0:
for dl in _DIRECTDL: return
if _check_resource_has_torrent(loop, table_client, dl, False):
rmdl.append(dl)
if len(rmdl) > 0:
for dl in rmdl:
_DIRECTDL.pop(dl, None)
if len(_DIRECTDL) == 0:
return
# go through queue and find resources we can download # go through queue and find resources we can download
msg = None msg = None
rmdl = []
_release_list = [] _release_list = []
while True: while True:
msgs = queue_client.get_messages( msgs = queue_client.get_messages(
_STORAGE_CONTAINERS['queue_globalresources'], num_messages=32, _STORAGE_CONTAINERS['queue_globalresources'], num_messages=1,
visibility_timeout=45) visibility_timeout=45)
if len(msgs) == 0: if len(msgs) == 0:
break break
for _msg in msgs: with _DIRECTDL_LOCK:
if _msg.content in _DIRECTDL and msg is None: for _msg in msgs:
msg = _msg if (msg is None and _msg.content in _DIRECTDL and
else: _msg.content not in _DIRECTDL_DOWNLOADING):
_release_list.append(_msg) # TODO modify this to work with concurrent source downloads
# check number of seeds
nseeds = _get_torrent_num_seeds(table_client, _msg.content)
# TODO determine a good number of seeds to cut off directdl
if nseeds < 3:
msg = _msg
else:
rmdl.append(_msg.content)
_release_list.append(_msg)
else:
_release_list.append(_msg)
if msg is not None: if msg is not None:
break break
# renew lease and create renew callback # renew lease and create renew callback
@ -256,126 +395,19 @@ async def _direct_download_resources_async(
pop_receipt=_msg.pop_receipt, pop_receipt=_msg.pop_receipt,
visibility_timeout=0) visibility_timeout=0)
del _release_list del _release_list
# remove messages out of rmdl
if len(rmdl) > 0:
with _DIRECTDL_LOCK:
for dl in rmdl:
try:
_DIRECTDL.remove(dl)
except ValueError:
pass
if msg is None: if msg is None:
return return
file = None # pull and save docker image in thread
# download data thr = DockerSaveThread(queue_client, msg.content, msg.id)
resource = msg.content thr.start()
resource_hash = hashlib.sha1(resource.encode('utf8')).hexdigest()
if resource.startswith(_DOCKER_TAG):
if len(_REGISTRIES) < 1:
raise RuntimeError(
('{} image specified for global resource, but there are '
'no registries available').format(resource))
image = resource[resource.find(_DOCKER_TAG) + len(_DOCKER_TAG):]
registry = None
await _record_perf_async(loop, 'pull-start', 'img={}'.format(image))
start = datetime.datetime.now()
while True:
# pick random registry to download from
registry = _REGISTRIES[_pick_random_registry_key()]
print('pulling image {} from {}'.format(image, registry))
if registry == 'registry.hub.docker.com':
proc = await asyncio.subprocess.create_subprocess_shell(
'docker pull {}'.format(image), loop=loop)
else:
proc = await asyncio.subprocess.create_subprocess_shell(
'docker pull {}/{}'.format(registry, image), loop=loop)
await proc.wait()
if proc.returncode == 0:
break
else:
print('docker pull non-zero rc: {}'.format(
proc.returncode))
await asyncio.sleep(1)
# tag image to remove registry ip
if registry != 'registry.hub.docker.com':
proc = await asyncio.subprocess.create_subprocess_shell(
'docker tag {}/{} {}'.format(registry, image, image),
loop=loop)
await proc.wait()
if proc.returncode != 0:
raise RuntimeError('docker tag non-zero rc: {}'.format(
proc.returncode))
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec to pull docker image {} from {}'.format(
diff, image, registry))
await _record_perf_async(loop, 'pull-end', 'img={},diff={}'.format(
image, diff))
# save docker image to seed to torrent
if _ENABLE_P2P:
await _record_perf_async(loop, 'save-start', 'img={}'.format(
image))
start = datetime.datetime.now()
file = _TORRENT_DIR / '{}.tar.gz'.format(resource_hash)
print('saving docker image {} to {} for seeding'.format(
image, file))
proc = await asyncio.subprocess.create_subprocess_shell(
'docker save {} | gzip -c > {}'.format(image, file), loop=loop)
await proc.wait()
if proc.returncode != 0:
raise RuntimeError('docker save non-zero rc: {}'.format(
proc.returncode))
else:
print('docker image {} saved for seeding'.format(image))
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec to save docker image {} to {}'.format(
diff, image, file.parent))
await _record_perf_async(
loop, 'save-end', 'img={},size={},diff={}'.format(
image, file.stat().st_size, diff))
else:
# TODO download via blob, explode uri to get container/blob
# use download to path into /tmp and move to _TORRENT_DIR
raise NotImplementedError()
# generate torrent file
if _ENABLE_P2P:
start = datetime.datetime.now()
future = loop.run_in_executor(None, generate_torrent, file)
torrent_file, torrent_b64, torrent_sha1 = await future
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec to generate torrent file: {}'.format(
diff, torrent_file))
start = datetime.datetime.now()
# add to torrent dict (effectively enqueues for torrent start)
entity = {
'PartitionKey': _PARTITION_KEY,
'RowKey': resource_hash,
'Resource': resource,
'TorrentFileBase64': torrent_b64,
'TorrentFileSHA1': torrent_sha1,
'FileSizeBytes': file.stat().st_size,
# 'FileSHA1': compute_sha1_for_file(file),
}
_TORRENTS[resource] = {
'entity': entity,
'torrent_file': torrent_file,
'started': False,
'seed': True,
'loaded': True,
'registered': False,
}
_TORRENT_REVERSE_LOOKUP[resource_hash] = resource
# wait until torrent has started
print('waiting for torrent {} to start'.format(resource))
while not _TORRENTS[resource]['started']:
await asyncio.sleep(0.1)
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec for {} torrent to start'.format(diff, resource))
# cancel callback
if _ENABLE_P2P or not _NON_P2P_CONCURRENT_DOWNLOADING:
_CBHANDLES['queue_globalresources'].cancel()
_CBHANDLES.pop('queue_globalresources')
# release queue message
queue_client.update_message(
_STORAGE_CONTAINERS['queue_globalresources'],
message_id=msg.id,
pop_receipt=_QUEUE_MESSAGES[msg.id].pop_receipt,
visibility_timeout=0)
_QUEUE_MESSAGES.pop(msg.id)
print('queue message released for {}'.format(resource))
# remove resources from download list
_DIRECTDL.pop(resource)
def _merge_service( def _merge_service(
@ -470,6 +502,32 @@ def bootstrap_dht_nodes(
loop.call_later(1, bootstrap_dht_nodes, loop, table_client, ipaddress) loop.call_later(1, bootstrap_dht_nodes, loop, table_client, ipaddress)
class DockerLoadThread(threading.Thread):
def __init__(self, resource):
threading.Thread.__init__(self)
self.resource = resource
_TORRENTS[self.resource]['seed'] = True
_TORRENTS[self.resource]['loading'] = True
def run(self):
print('loading resource: {}'.format(self.resource))
resource_hash = hashlib.sha1(self.resource.encode('utf8')).hexdigest()
image = self.resource[
self.resource.find(_DOCKER_TAG) + len(_DOCKER_TAG):]
file = _TORRENT_DIR / '{}.tar.gz'.format(resource_hash)
_record_perf('load-start', 'img={},size={}'.format(
image, file.stat().st_size))
start = datetime.datetime.now()
print('loading docker image {} from {}'.format(image, file))
subprocess.check_call(
'gunzip -c {} | docker load'.format(file), shell=True)
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec to load docker image from {}'.format(diff, file))
_record_perf('load-end', 'img={},diff={}'.format(image, diff))
_TORRENTS[self.resource]['loading'] = False
_TORRENTS[self.resource]['loaded'] = True
async def _load_and_register_async( async def _load_and_register_async(
loop: asyncio.BaseEventLoop, loop: asyncio.BaseEventLoop,
table_client: azure.storage.table.TableService, table_client: azure.storage.table.TableService,
@ -482,41 +540,18 @@ async def _load_and_register_async(
if _TORRENTS[resource]['started']: if _TORRENTS[resource]['started']:
if _TORRENTS[resource]['handle'].is_seed(): if _TORRENTS[resource]['handle'].is_seed():
# docker load image # docker load image
if not _TORRENTS[resource]['loaded']: if (not _TORRENTS[resource]['loaded'] and
resource_hash = hashlib.sha1( not _TORRENTS[resource]['loading']):
resource.encode('utf8')).hexdigest() thr = DockerLoadThread(resource)
image = resource[ thr.start()
resource.find(_DOCKER_TAG) + len(_DOCKER_TAG):]
file = _TORRENT_DIR / '{}.tar.gz'.format(resource_hash)
await _record_perf_async(
loop, 'load-start', 'img={},size={}'.format(
image, file.stat().st_size))
start = datetime.datetime.now()
print('loading docker image {} from {}'.format(
image, file))
proc = await \
asyncio.subprocess.create_subprocess_shell(
'gunzip -c {} | docker load'.format(file),
loop=loop)
await proc.wait()
if proc.returncode != 0:
raise RuntimeError(
'docker load non-zero rc: {}'.format(
proc.returncode))
_TORRENTS[resource]['loaded'] = True
diff = (datetime.datetime.now() -
start).total_seconds()
print(('took {} sec to load docker image '
'from {}').format(diff, file))
await _record_perf_async(
loop, 'load-end', 'img={},diff={}'.format(
image, diff))
# register to services table # register to services table
if not _TORRENTS[resource]['registered']: if (_TORRENTS[resource]['loaded'] and
_merge_service(table_client, resource) not _TORRENTS[resource]['loading']):
_TORRENTS[resource]['registered'] = True if not _TORRENTS[resource]['registered']:
else: _merge_service(table_client, resource)
nfinished += 1 _TORRENTS[resource]['registered'] = True
else:
nfinished += 1
if not _GR_DONE and nfinished == nglobalresources: if not _GR_DONE and nfinished == nglobalresources:
await _record_perf_async( await _record_perf_async(
loop, 'gr-done', loop, 'gr-done',
@ -535,6 +570,11 @@ async def manage_torrents_async(
if not _GR_DONE and not _LR_LOCK_ASYNC.locked(): if not _GR_DONE and not _LR_LOCK_ASYNC.locked():
asyncio.ensure_future(_load_and_register_async( asyncio.ensure_future(_load_and_register_async(
loop, table_client, nglobalresources)) loop, table_client, nglobalresources))
# move pending torrents into torrents
with _PT_LOCK:
for pt in _PENDING_TORRENTS:
_TORRENTS[pt] = _PENDING_TORRENTS[pt]
_PENDING_TORRENTS.clear()
# start applicable torrent sessions # start applicable torrent sessions
for resource in _TORRENTS: for resource in _TORRENTS:
if _TORRENTS[resource]['started']: if _TORRENTS[resource]['started']:
@ -586,6 +626,20 @@ async def download_monitor_async(
await asyncio.sleep(1) await asyncio.sleep(1)
def _get_torrent_num_seeds(
table_client: azure.storage.table.TableService,
resource: str) -> int:
try:
rk = hashlib.sha1(resource.encode('utf8')).hexdigest()
se = table_client.get_entity(
_STORAGE_CONTAINERS['table_services'],
_PARTITION_KEY, rk)
numseeds = len(se['VmList'].split(','))
except azure.common.AzureMissingResourceHttpError:
numseeds = 0
return numseeds
def _check_resource_has_torrent( def _check_resource_has_torrent(
loop: asyncio.BaseEventLoop, loop: asyncio.BaseEventLoop,
table_client: azure.storage.table.TableService, table_client: azure.storage.table.TableService,
@ -600,7 +654,8 @@ def _check_resource_has_torrent(
_PARTITION_KEY, rk) _PARTITION_KEY, rk)
except azure.common.AzureMissingResourceHttpError: except azure.common.AzureMissingResourceHttpError:
if add_to_dict: if add_to_dict:
_DIRECTDL[resource] = None with _DIRECTDL_LOCK:
_DIRECTDL.append(resource)
return False return False
else: else:
# write torrent file to disk # write torrent file to disk
@ -608,15 +663,17 @@ def _check_resource_has_torrent(
torrent_file = _TORRENT_DIR / '{}.torrent'.format(entity['RowKey']) torrent_file = _TORRENT_DIR / '{}.torrent'.format(entity['RowKey'])
with open(str(torrent_file), 'wb') as f: with open(str(torrent_file), 'wb') as f:
f.write(torrent) f.write(torrent)
_TORRENTS[resource] = { with _PT_LOCK:
'entity': entity, _PENDING_TORRENTS[resource] = {
'torrent_file': torrent_file, 'entity': entity,
'started': False, 'torrent_file': torrent_file,
'seed': False, 'started': False,
'loaded': False, 'seed': False,
'registered': False, 'loaded': False,
} 'loading': False,
_TORRENT_REVERSE_LOOKUP[entity['RowKey']] = resource 'registered': False,
}
_TORRENT_REVERSE_LOOKUP[entity['RowKey']] = resource
print('found torrent for resource {}'.format(resource)) print('found torrent for resource {}'.format(resource))
return True return True

Просмотреть файл

@ -62,11 +62,16 @@ def _parse_message(msg):
return m return m
def _diff_events(data, nodeid, event, end_event, timing, prefix): def _diff_events(data, nodeid, event, end_event, timing, prefix, sizes=None):
for i in range(0, len(data[nodeid][event])): for i in range(0, len(data[nodeid][event])):
# torrent start -> load start may not always exist due to pull
if (event == 'cascade:torrent-start' and
end_event == 'cascade:load-start' and
end_event not in data[nodeid]):
return
# find end event for this img
subevent = data[nodeid][event][i] subevent = data[nodeid][event][i]
img = subevent['message']['img'] img = subevent['message']['img']
# find end event for this img
found = False found = False
for j in range(0, len(data[nodeid][end_event])): for j in range(0, len(data[nodeid][end_event])):
pei = data[ pei = data[
@ -74,15 +79,21 @@ def _diff_events(data, nodeid, event, end_event, timing, prefix):
if pei == img: if pei == img:
timing[prefix + img] = _compute_delta_t( timing[prefix + img] = _compute_delta_t(
data, nodeid, event, i, end_event, j) data, nodeid, event, i, end_event, j)
if sizes is not None and img not in sizes:
if event == 'cascade:load-start':
sizes[img] = data[nodeid][event][j]['message']['size']
else:
sizes[img] = data[
nodeid][end_event][j]['message']['size']
found = True found = True
break break
if not found: if not found and event != 'cascade:torrent-start':
raise RuntimeError( raise RuntimeError(
'could not find corresponding event for {}:{}'.format( 'could not find corresponding event for {}:{}'.format(
subevent, img)) event, img))
def graph_data(table_client): def coalesce_data(table_client):
print('graphing data from {} with pk={}'.format( print('graphing data from {} with pk={}'.format(
_TABLE_NAME, _PARTITION_KEY)) _TABLE_NAME, _PARTITION_KEY))
entities = table_client.query_entities( entities = table_client.query_entities(
@ -106,8 +117,8 @@ def graph_data(table_client):
ev['message'] = None ev['message'] = None
data[nodeid][event].append(ev) data[nodeid][event].append(ev)
del entities del entities
sizes = {}
for nodeid in data: for nodeid in data:
print(nodeid)
# calculate dt timings # calculate dt timings
timing = { timing = {
'docker_install': _compute_delta_t( 'docker_install': _compute_delta_t(
@ -132,19 +143,33 @@ def graph_data(table_client):
_diff_events( _diff_events(
data, nodeid, event, 'cascade:pull-end', timing, 'pull:') data, nodeid, event, 'cascade:pull-end', timing, 'pull:')
elif event == 'cascade:save-start': elif event == 'cascade:save-start':
pass _diff_events(
elif event == 'cascade:save-end': data, nodeid, event, 'cascade:save-end', timing, 'save:',
# message will contain size info sizes)
pass
elif event == 'cascade:torrent-start': elif event == 'cascade:torrent-start':
pass _diff_events(
data, nodeid, event, 'cascade:load-start', timing,
'torrent:')
elif event == 'cascade:load-start': elif event == 'cascade:load-start':
# load start also marks torrent-seed _diff_events(
# message will contain size info data, nodeid, event, 'cascade:load-end', timing,
pass 'load:', sizes)
elif event == 'cascade:load-end': data[nodeid].pop('cascade:pull-start', None)
pass data[nodeid].pop('cascade:pull-end', None)
print(timing) data[nodeid].pop('cascade:save-start', None)
data[nodeid].pop('cascade:save-end', None)
data[nodeid].pop('cascade:torrent-start')
data[nodeid].pop('cascade:load-start', None)
data[nodeid].pop('cascade:load-end', None)
data[nodeid]['timing'] = timing
return data, sizes
def graph_data(data, sizes):
print(sizes)
for nodeid in data:
print(nodeid)
print(data[nodeid])
def merge_dict(dict1, dict2): def merge_dict(dict1, dict2):
@ -189,7 +214,8 @@ def main():
# create storage credentials # create storage credentials
table_client = _create_credentials(config) table_client = _create_credentials(config)
# graph data # graph data
graph_data(table_client) data, sizes = coalesce_data(table_client)
graph_data(data, sizes)
def parseargs(): def parseargs():