Add more configurable options for P2P mode
- Add WIP of graph.py
This commit is contained in:
Родитель
b186ae3099
Коммит
d4c44f811c
44
cascade.py
44
cascade.py
|
@ -55,6 +55,8 @@ _DIRECTDL = {}
|
|||
_DHT_ROUTERS = []
|
||||
_LR_LOCK_ASYNC = asyncio.Lock()
|
||||
_GR_DONE = False
|
||||
_ENABLE_P2P = True
|
||||
_NON_P2P_CONCURRENT_DOWNLOADING = True
|
||||
|
||||
|
||||
def _setup_container_names(sep: str):
|
||||
|
@ -211,6 +213,7 @@ async def _record_perf_async(loop, event, message):
|
|||
async def _direct_download_resources_async(
|
||||
loop, blob_client, queue_client, table_client, ipaddress):
|
||||
# iterate through downloads to see if there are any torrents available
|
||||
# TODO allow multiple downloads
|
||||
rmdl = []
|
||||
for dl in _DIRECTDL:
|
||||
if _check_resource_has_torrent(loop, table_client, dl, False):
|
||||
|
@ -238,6 +241,9 @@ async def _direct_download_resources_async(
|
|||
break
|
||||
# renew lease and create renew callback
|
||||
if msg is not None:
|
||||
if _NON_P2P_CONCURRENT_DOWNLOADING:
|
||||
_release_list.append(msg)
|
||||
else:
|
||||
_QUEUE_MESSAGES[msg.id] = msg
|
||||
_CBHANDLES['queue_globalresources'] = loop.call_later(
|
||||
15, _renew_queue_message_lease, loop, queue_client,
|
||||
|
@ -297,6 +303,7 @@ async def _direct_download_resources_async(
|
|||
await _record_perf_async(loop, 'pull-end', 'img={},diff={}'.format(
|
||||
image, diff))
|
||||
# save docker image to seed to torrent
|
||||
if _ENABLE_P2P:
|
||||
await _record_perf_async(loop, 'save-start', 'img={}'.format(
|
||||
image))
|
||||
start = datetime.datetime.now()
|
||||
|
@ -314,13 +321,15 @@ async def _direct_download_resources_async(
|
|||
diff = (datetime.datetime.now() - start).total_seconds()
|
||||
print('took {} sec to save docker image {} to {}'.format(
|
||||
diff, image, file.parent))
|
||||
await _record_perf_async(loop, 'save-end', 'img={},diff={}'.format(
|
||||
image, diff))
|
||||
await _record_perf_async(
|
||||
loop, 'save-end', 'img={},size={},diff={}'.format(
|
||||
image, file.stat().st_size, diff))
|
||||
else:
|
||||
# TODO download via blob, explode uri to get container/blob
|
||||
# use download to path into /tmp and move to _TORRENT_DIR
|
||||
raise NotImplementedError()
|
||||
# generate torrent file
|
||||
if _ENABLE_P2P:
|
||||
start = datetime.datetime.now()
|
||||
future = loop.run_in_executor(None, generate_torrent, file)
|
||||
torrent_file, torrent_b64, torrent_sha1 = await future
|
||||
|
@ -354,6 +363,7 @@ async def _direct_download_resources_async(
|
|||
diff = (datetime.datetime.now() - start).total_seconds()
|
||||
print('took {} sec for {} torrent to start'.format(diff, resource))
|
||||
# cancel callback
|
||||
if _ENABLE_P2P or not _NON_P2P_CONCURRENT_DOWNLOADING:
|
||||
_CBHANDLES['queue_globalresources'].cancel()
|
||||
_CBHANDLES.pop('queue_globalresources')
|
||||
# release queue message
|
||||
|
@ -477,10 +487,11 @@ async def _load_and_register_async(
|
|||
resource.encode('utf8')).hexdigest()
|
||||
image = resource[
|
||||
resource.find(_DOCKER_TAG) + len(_DOCKER_TAG):]
|
||||
await _record_perf_async(
|
||||
loop, 'load-start', 'img={}'.format(image))
|
||||
start = datetime.datetime.now()
|
||||
file = _TORRENT_DIR / '{}.tar.gz'.format(resource_hash)
|
||||
await _record_perf_async(
|
||||
loop, 'load-start', 'img={},size={}'.format(
|
||||
image, file.stat().st_size))
|
||||
start = datetime.datetime.now()
|
||||
print('loading docker image {} from {}'.format(
|
||||
image, file))
|
||||
proc = await \
|
||||
|
@ -561,8 +572,11 @@ async def download_monitor_async(
|
|||
ipaddress: str,
|
||||
nglobalresources: int):
|
||||
# begin async manage torrent sessions
|
||||
if _ENABLE_P2P:
|
||||
asyncio.ensure_future(
|
||||
manage_torrents_async(loop, table_client, ipaddress, nglobalresources))
|
||||
manage_torrents_async(
|
||||
loop, table_client, ipaddress, nglobalresources)
|
||||
)
|
||||
while True:
|
||||
# check if there are any direct downloads
|
||||
if len(_DIRECTDL) > 0:
|
||||
|
@ -577,6 +591,8 @@ def _check_resource_has_torrent(
|
|||
table_client: azure.storage.table.TableService,
|
||||
resource: str,
|
||||
add_to_dict: bool=False) -> bool:
|
||||
if not _ENABLE_P2P:
|
||||
return False
|
||||
try:
|
||||
rk = hashlib.sha1(resource.encode('utf8')).hexdigest()
|
||||
entity = table_client.get_entity(
|
||||
|
@ -619,6 +635,7 @@ def distribute_global_resources(
|
|||
:param str ipaddress: ip address
|
||||
"""
|
||||
# set torrent session port listen
|
||||
if _ENABLE_P2P:
|
||||
print('creating torrent session on {}:{}'.format(
|
||||
ipaddress, _DEFAULT_PORT_BEGIN))
|
||||
_TORRENT_SESSION.listen_on(_DEFAULT_PORT_BEGIN, _DEFAULT_PORT_END)
|
||||
|
@ -665,8 +682,11 @@ async def _get_ipaddress_async(loop: asyncio.BaseEventLoop) -> str:
|
|||
|
||||
def main():
|
||||
"""Main function"""
|
||||
global _ENABLE_P2P, _NON_P2P_CONCURRENT_DOWNLOADING
|
||||
# get command-line args
|
||||
args = parseargs()
|
||||
_ENABLE_P2P = args.torrent
|
||||
_NON_P2P_CONCURRENT_DOWNLOADING = args.nonp2pcd
|
||||
|
||||
# get event loop
|
||||
if _ON_WINDOWS:
|
||||
|
@ -694,8 +714,11 @@ def main():
|
|||
|
||||
# get registry list
|
||||
global _REGISTRIES, _SELF_REGISTRY_PTR
|
||||
try:
|
||||
_REGISTRIES = [line.rstrip('\n') for line in open(
|
||||
'.cascade_private_registries.txt', 'r')]
|
||||
except Exception:
|
||||
pass
|
||||
if len(_REGISTRIES) == 0:
|
||||
_REGISTRIES.append('registry.hub.docker.com')
|
||||
for i in range(0, len(_REGISTRIES)):
|
||||
|
@ -717,10 +740,17 @@ def parseargs():
|
|||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Cascade: Azure Batch P2P File/Image Replicator')
|
||||
parser.set_defaults(ipaddress=None, nonp2pcd=False, torrent=True)
|
||||
parser.add_argument(
|
||||
'ipaddress', nargs='?', default=None, help='ip address')
|
||||
'ipaddress', nargs='?', help='ip address')
|
||||
parser.add_argument(
|
||||
'--prefix', help='storage container prefix')
|
||||
parser.add_argument(
|
||||
'--no-torrent', action='store_false', dest='torrent',
|
||||
help='disable peer-to-peer transfer')
|
||||
parser.add_argument(
|
||||
'--nonp2pcd', action='store_true',
|
||||
help='non-p2p concurrent downloading')
|
||||
return parser.parse_args()
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -0,0 +1,211 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# stdlib imports
|
||||
import argparse
|
||||
import copy
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
# non-stdlib imports
|
||||
import azure.storage.table as azuretable
|
||||
|
||||
# global defines
|
||||
_STORAGEACCOUNT = os.getenv('STORAGEACCOUNT')
|
||||
_STORAGEACCOUNTKEY = os.getenv('STORAGEACCOUNTKEY')
|
||||
_BATCHACCOUNT = None
|
||||
_POOLID = None
|
||||
_PARTITION_KEY = None
|
||||
_TABLE_NAME = None
|
||||
|
||||
|
||||
def _create_credentials(config: dict):
|
||||
"""Create authenticated clients
|
||||
:param dict config: configuration dict
|
||||
:rtype: azure.storage.table.TableService
|
||||
:return: table client
|
||||
"""
|
||||
global _STORAGEACCOUNT, _STORAGEACCOUNTKEY, _BATCHACCOUNT, _POOLID, \
|
||||
_PARTITION_KEY, _TABLE_NAME
|
||||
_STORAGEACCOUNT = config['credentials']['storage_account']
|
||||
_STORAGEACCOUNTKEY = config['credentials']['storage_account_key']
|
||||
_BATCHACCOUNT = config['credentials']['batch_account']
|
||||
_POOLID = config['poolspec']['id']
|
||||
_PARTITION_KEY = '{}${}'.format(_BATCHACCOUNT, _POOLID)
|
||||
_TABLE_NAME = config['storage_entity_prefix'] + 'perf'
|
||||
table_client = azuretable.TableService(
|
||||
account_name=_STORAGEACCOUNT,
|
||||
account_key=_STORAGEACCOUNTKEY,
|
||||
endpoint_suffix=config['credentials']['storage_endpoint'])
|
||||
return table_client
|
||||
|
||||
|
||||
def _compute_delta_t(data, nodeid, event1, event1_pos, event2, event2_pos):
|
||||
# attempt to get directly recorded diff
|
||||
try:
|
||||
return data[nodeid][event2][event2_pos]['message']['diff']
|
||||
except (TypeError, KeyError):
|
||||
return (data[nodeid][event2][event2_pos]['timestamp'] -
|
||||
data[nodeid][event1][event1_pos]['timestamp']).total_seconds()
|
||||
|
||||
|
||||
def _parse_message(msg):
|
||||
parts = msg.split(',')
|
||||
m = {}
|
||||
for part in parts:
|
||||
tmp = part.split('=')
|
||||
if tmp[0] == 'size' or tmp[0] == 'nglobalresources':
|
||||
m[tmp[0]] = int(tmp[1])
|
||||
elif tmp[0] == 'diff':
|
||||
m[tmp[0]] = float(tmp[1])
|
||||
else:
|
||||
m[tmp[0]] = tmp[1]
|
||||
return m
|
||||
|
||||
|
||||
def _diff_events(data, nodeid, event, end_event, timing, prefix):
|
||||
for i in range(0, len(data[nodeid][event])):
|
||||
subevent = data[nodeid][event][i]
|
||||
img = subevent['message']['img']
|
||||
# find end event for this img
|
||||
found = False
|
||||
for j in range(0, len(data[nodeid][end_event])):
|
||||
pei = data[
|
||||
nodeid][end_event][j]['message']['img']
|
||||
if pei == img:
|
||||
timing[prefix + img] = _compute_delta_t(
|
||||
data, nodeid, event, i, end_event, j)
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
raise RuntimeError(
|
||||
'could not find corresponding event for {}:{}'.format(
|
||||
subevent, img))
|
||||
|
||||
|
||||
def graph_data(table_client):
|
||||
print('graphing data from {} with pk={}'.format(
|
||||
_TABLE_NAME, _PARTITION_KEY))
|
||||
entities = table_client.query_entities(
|
||||
_TABLE_NAME, filter='PartitionKey eq \'{}\''.format(_PARTITION_KEY))
|
||||
data = {}
|
||||
# process events
|
||||
for ent in entities:
|
||||
nodeid = ent['NodeId']
|
||||
event = ent['Event']
|
||||
if nodeid not in data:
|
||||
data[nodeid] = {}
|
||||
if event not in data[nodeid]:
|
||||
data[nodeid][event] = []
|
||||
ev = {
|
||||
'timestamp': datetime.datetime.fromtimestamp(
|
||||
float(ent['RowKey'])),
|
||||
}
|
||||
try:
|
||||
ev['message'] = _parse_message(ent['Message'])
|
||||
except KeyError:
|
||||
ev['message'] = None
|
||||
data[nodeid][event].append(ev)
|
||||
del entities
|
||||
for nodeid in data:
|
||||
print(nodeid)
|
||||
# calculate dt timings
|
||||
timing = {
|
||||
'docker_install': _compute_delta_t(
|
||||
data, nodeid, 'nodeprep:start', 0, 'privateregistry:start', 0),
|
||||
'private_registry_setup': _compute_delta_t(
|
||||
data, nodeid, 'privateregistry:start', 0,
|
||||
'privateregistry:end', 0),
|
||||
'nodeprep': _compute_delta_t(
|
||||
data, nodeid, 'nodeprep:start', 0, 'nodeprep:end', 0),
|
||||
'global_resources_loaded': _compute_delta_t(
|
||||
data, nodeid, 'cascade:start', 0, 'cascade:gr-done', 0),
|
||||
}
|
||||
data[nodeid].pop('nodeprep:start')
|
||||
data[nodeid].pop('nodeprep:end')
|
||||
data[nodeid].pop('privateregistry:start')
|
||||
data[nodeid].pop('privateregistry:end')
|
||||
data[nodeid].pop('cascade:start')
|
||||
data[nodeid].pop('cascade:gr-done')
|
||||
for event in data[nodeid]:
|
||||
# print(event, data[nodeid][event])
|
||||
if event == 'cascade:pull-start':
|
||||
_diff_events(
|
||||
data, nodeid, event, 'cascade:pull-end', timing, 'pull:')
|
||||
elif event == 'cascade:save-start':
|
||||
pass
|
||||
elif event == 'cascade:save-end':
|
||||
# message will contain size info
|
||||
pass
|
||||
elif event == 'cascade:torrent-start':
|
||||
pass
|
||||
elif event == 'cascade:load-start':
|
||||
# load start also marks torrent-seed
|
||||
# message will contain size info
|
||||
pass
|
||||
elif event == 'cascade:load-end':
|
||||
pass
|
||||
print(timing)
|
||||
|
||||
|
||||
def merge_dict(dict1, dict2):
|
||||
"""Recursively merge dictionaries: dict2 on to dict1. This differs
|
||||
from dict.update() in that values that are dicts are recursively merged.
|
||||
Note that only dict value types are merged, not lists, etc.
|
||||
|
||||
Code adapted from:
|
||||
https://www.xormedia.com/recursively-merge-dictionaries-in-python/
|
||||
|
||||
:param dict dict1: dictionary to merge to
|
||||
:param dict dict2: dictionary to merge with
|
||||
:rtype: dict
|
||||
:return: merged dictionary
|
||||
"""
|
||||
if not isinstance(dict1, dict) or not isinstance(dict2, dict):
|
||||
raise ValueError('dict1 or dict2 is not a dictionary')
|
||||
result = copy.deepcopy(dict1)
|
||||
for k, v in dict2.items():
|
||||
if k in result and isinstance(result[k], dict):
|
||||
result[k] = merge_dict(result[k], v)
|
||||
else:
|
||||
result[k] = copy.deepcopy(v)
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function"""
|
||||
# get command-line args
|
||||
args = parseargs()
|
||||
|
||||
if args.settings is None:
|
||||
raise ValueError('global settings not specified')
|
||||
if args.config is None:
|
||||
raise ValueError('config settings for action not specified')
|
||||
|
||||
with open(args.settings, 'r') as f:
|
||||
config = json.load(f)
|
||||
with open(args.config, 'r') as f:
|
||||
config = merge_dict(config, json.load(f))
|
||||
|
||||
# create storage credentials
|
||||
table_client = _create_credentials(config)
|
||||
# graph data
|
||||
graph_data(table_client)
|
||||
|
||||
|
||||
def parseargs():
|
||||
"""Parse program arguments
|
||||
:rtype: argparse.Namespace
|
||||
:return: parsed arguments
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Shipyard perf graph generator')
|
||||
parser.add_argument(
|
||||
'--settings',
|
||||
help='global settings json file config. required for all actions')
|
||||
parser.add_argument(
|
||||
'--config',
|
||||
help='json file config for option. required for all actions')
|
||||
return parser.parse_args()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
9
perf.py
9
perf.py
|
@ -5,6 +5,7 @@ import argparse
|
|||
import datetime
|
||||
import os
|
||||
# non-stdlib imports
|
||||
import azure.common
|
||||
import azure.storage.table as azuretable
|
||||
|
||||
# global defines
|
||||
|
@ -37,7 +38,15 @@ def process_event(table_client, table_name, source, event, ts, message):
|
|||
'NodeId': _NODEID,
|
||||
'Message': message,
|
||||
}
|
||||
while True:
|
||||
try:
|
||||
table_client.insert_entity(table_name, entity)
|
||||
break
|
||||
except azure.common.AzureConflictHttpError:
|
||||
if not isinstance(ts, float):
|
||||
ts = float(ts)
|
||||
ts += 0.000001
|
||||
entity['RowKey'] = str(ts)
|
||||
|
||||
|
||||
def main():
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
set -o pipefail
|
||||
|
||||
nonp2pcd=
|
||||
offer=
|
||||
p2p=0
|
||||
prefix=
|
||||
|
@ -10,12 +11,13 @@ privateregarchive=
|
|||
privateregimageid=
|
||||
sku=
|
||||
|
||||
while getopts "h?a:i:o:p:r:s:t" opt; do
|
||||
while getopts "h?a:ci:o:p:r:s:t" opt; do
|
||||
case "$opt" in
|
||||
h|\?)
|
||||
echo "nodeprep.sh parameters"
|
||||
echo ""
|
||||
echo "-a [registry archive] registry archive file"
|
||||
echo "-c concurrent downloading in non-p2p mode"
|
||||
echo "-i [registry image id] registry image id"
|
||||
echo "-o [offer] VM offer"
|
||||
echo "-p [prefix] storage container prefix"
|
||||
|
@ -28,6 +30,9 @@ while getopts "h?a:i:o:p:r:s:t" opt; do
|
|||
a)
|
||||
privateregarchive="--regarchive $OPTARG"
|
||||
;;
|
||||
c)
|
||||
nonp2pcd="--nonp2pcd"
|
||||
;;
|
||||
i)
|
||||
privateregimageid="--regimageid $OPTARG"
|
||||
;;
|
||||
|
@ -147,11 +152,14 @@ fi
|
|||
./perf.py nodeprep end $prefix
|
||||
|
||||
# enable p2p sharing
|
||||
torrentflag=
|
||||
if [ $p2p -eq 1 ]; then
|
||||
# disable DHT connection tracking
|
||||
iptables -t raw -I PREROUTING -p udp --dport 6881 -j CT --notrack
|
||||
iptables -t raw -I OUTPUT -p udp --sport 6881 -j CT --notrack
|
||||
# start cascade
|
||||
./perf.py cascade start $prefix --message "ipaddress=$ipaddress"
|
||||
./cascade.py $ipaddress $prefix > cascade.log &
|
||||
else
|
||||
torrentflag="--no-torrent"
|
||||
fi
|
||||
# start cascade
|
||||
./perf.py cascade start $prefix
|
||||
./cascade.py $ipaddress $prefix $torrentflag $nonp2pcd > cascade.log &
|
||||
|
|
|
@ -303,10 +303,10 @@ def main():
|
|||
# get private registries
|
||||
registries = get_private_registries(table_client)
|
||||
|
||||
if len(registries) > 0:
|
||||
# modify init scripts with registry info
|
||||
register_insecure_registries(
|
||||
args.offer.lower(), args.sku.lower(), registries)
|
||||
|
||||
# write registry file
|
||||
with open('.cascade_private_registries.txt', 'w') as f:
|
||||
for registry in registries:
|
||||
|
|
33
shipyard.py
33
shipyard.py
|
@ -105,10 +105,7 @@ def _create_credentials(config: dict) -> tuple:
|
|||
_BATCHACCOUNTKEY)
|
||||
batch_client = batch.BatchServiceClient(
|
||||
credentials,
|
||||
base_url='https://{}.{}.{}'.format(
|
||||
config['credentials']['batch_account'],
|
||||
config['credentials']['batch_account_region'],
|
||||
config['credentials']['batch_endpoint']))
|
||||
base_url=config['credentials']['batch_account_service_url'])
|
||||
blob_client = azureblob.BlockBlobService(
|
||||
account_name=_STORAGEACCOUNT,
|
||||
account_key=_STORAGEACCOUNTKEY,
|
||||
|
@ -189,9 +186,17 @@ def add_pool(
|
|||
offer = config['poolspec']['offer']
|
||||
sku = config['poolspec']['sku']
|
||||
try:
|
||||
p2p = config['peer_to_peer']['enabled']
|
||||
p2p = config['data_replication']['peer_to_peer']['enabled']
|
||||
except KeyError:
|
||||
p2p = True
|
||||
if not p2p:
|
||||
try:
|
||||
nonp2pcd = config[
|
||||
'data_replication']['non_peer_to_peer_concurrent_downloading']
|
||||
except KeyError:
|
||||
nonp2pcd = True
|
||||
else:
|
||||
nonp2pcd = False
|
||||
try:
|
||||
preg = 'private' in config['docker_registry']
|
||||
pcont = config['docker_registry']['private']['container']
|
||||
|
@ -240,7 +245,7 @@ def add_pool(
|
|||
target_dedicated=config['poolspec']['vm_count'],
|
||||
enable_inter_node_communication=True,
|
||||
start_task=batchmodels.StartTask(
|
||||
command_line='{} -o {} -s {}{}{}{}{}{}'.format(
|
||||
command_line='{} -o {} -s {}{}{}{}{}{}{}'.format(
|
||||
_NODEPREP_FILE[0], offer, sku,
|
||||
' -p {}'.format(prefix) if prefix else '',
|
||||
' -r {}'.format(pcont) if preg else '',
|
||||
|
@ -248,7 +253,8 @@ def add_pool(
|
|||
if _REGISTRY_FILE[0] else '',
|
||||
' -i {}'.format(_REGISTRY_FILE[2])
|
||||
if _REGISTRY_FILE[2] else '',
|
||||
' -t' if p2p else ''
|
||||
' -t' if p2p else '',
|
||||
' -c' if nonp2pcd else '',
|
||||
),
|
||||
run_elevated=True,
|
||||
wait_for_success=True,
|
||||
|
@ -494,6 +500,18 @@ def populate_queues(queue_client, table_client, config):
|
|||
queue_client.put_message(
|
||||
_STORAGE_CONTAINERS['queue_registry'], 'create-{}'.format(i))
|
||||
# populate global resources
|
||||
try:
|
||||
p2p = config['data_replication']['peer_to_peer']['enabled']
|
||||
except KeyError:
|
||||
p2p = True
|
||||
if p2p:
|
||||
try:
|
||||
p2pcsd = config['data_replication']['peer_to_peer'][
|
||||
'concurrent_source_downloads']
|
||||
except KeyError:
|
||||
p2pcsd = 1
|
||||
else:
|
||||
p2pcsd = 1
|
||||
try:
|
||||
for gr in config['global_resources']:
|
||||
table_client.insert_or_replace_entity(
|
||||
|
@ -504,6 +522,7 @@ def populate_queues(queue_client, table_client, config):
|
|||
'Resource': gr,
|
||||
}
|
||||
)
|
||||
for _ in range(0, p2pcsd):
|
||||
queue_client.put_message(
|
||||
_STORAGE_CONTAINERS['queue_globalresources'], gr)
|
||||
except KeyError:
|
||||
|
|
Загрузка…
Ссылка в новой задаче