Add more configurable options for P2P mode

- Add WIP of graph.py
This commit is contained in:
Fred Park 2016-08-12 15:31:05 -07:00
Родитель b186ae3099
Коммит d4c44f811c
6 изменённых файлов: 381 добавлений и 104 удалений

Просмотреть файл

@ -55,6 +55,8 @@ _DIRECTDL = {}
_DHT_ROUTERS = []
_LR_LOCK_ASYNC = asyncio.Lock()
_GR_DONE = False
_ENABLE_P2P = True
_NON_P2P_CONCURRENT_DOWNLOADING = True
def _setup_container_names(sep: str):
@ -211,6 +213,7 @@ async def _record_perf_async(loop, event, message):
async def _direct_download_resources_async(
loop, blob_client, queue_client, table_client, ipaddress):
# iterate through downloads to see if there are any torrents available
# TODO allow multiple downloads
rmdl = []
for dl in _DIRECTDL:
if _check_resource_has_torrent(loop, table_client, dl, False):
@ -238,6 +241,9 @@ async def _direct_download_resources_async(
break
# renew lease and create renew callback
if msg is not None:
if _NON_P2P_CONCURRENT_DOWNLOADING:
_release_list.append(msg)
else:
_QUEUE_MESSAGES[msg.id] = msg
_CBHANDLES['queue_globalresources'] = loop.call_later(
15, _renew_queue_message_lease, loop, queue_client,
@ -297,6 +303,7 @@ async def _direct_download_resources_async(
await _record_perf_async(loop, 'pull-end', 'img={},diff={}'.format(
image, diff))
# save docker image to seed to torrent
if _ENABLE_P2P:
await _record_perf_async(loop, 'save-start', 'img={}'.format(
image))
start = datetime.datetime.now()
@ -314,13 +321,15 @@ async def _direct_download_resources_async(
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec to save docker image {} to {}'.format(
diff, image, file.parent))
await _record_perf_async(loop, 'save-end', 'img={},diff={}'.format(
image, diff))
await _record_perf_async(
loop, 'save-end', 'img={},size={},diff={}'.format(
image, file.stat().st_size, diff))
else:
# TODO download via blob, explode uri to get container/blob
# use download to path into /tmp and move to _TORRENT_DIR
raise NotImplementedError()
# generate torrent file
if _ENABLE_P2P:
start = datetime.datetime.now()
future = loop.run_in_executor(None, generate_torrent, file)
torrent_file, torrent_b64, torrent_sha1 = await future
@ -354,6 +363,7 @@ async def _direct_download_resources_async(
diff = (datetime.datetime.now() - start).total_seconds()
print('took {} sec for {} torrent to start'.format(diff, resource))
# cancel callback
if _ENABLE_P2P or not _NON_P2P_CONCURRENT_DOWNLOADING:
_CBHANDLES['queue_globalresources'].cancel()
_CBHANDLES.pop('queue_globalresources')
# release queue message
@ -477,10 +487,11 @@ async def _load_and_register_async(
resource.encode('utf8')).hexdigest()
image = resource[
resource.find(_DOCKER_TAG) + len(_DOCKER_TAG):]
await _record_perf_async(
loop, 'load-start', 'img={}'.format(image))
start = datetime.datetime.now()
file = _TORRENT_DIR / '{}.tar.gz'.format(resource_hash)
await _record_perf_async(
loop, 'load-start', 'img={},size={}'.format(
image, file.stat().st_size))
start = datetime.datetime.now()
print('loading docker image {} from {}'.format(
image, file))
proc = await \
@ -561,8 +572,11 @@ async def download_monitor_async(
ipaddress: str,
nglobalresources: int):
# begin async manage torrent sessions
if _ENABLE_P2P:
asyncio.ensure_future(
manage_torrents_async(loop, table_client, ipaddress, nglobalresources))
manage_torrents_async(
loop, table_client, ipaddress, nglobalresources)
)
while True:
# check if there are any direct downloads
if len(_DIRECTDL) > 0:
@ -577,6 +591,8 @@ def _check_resource_has_torrent(
table_client: azure.storage.table.TableService,
resource: str,
add_to_dict: bool=False) -> bool:
if not _ENABLE_P2P:
return False
try:
rk = hashlib.sha1(resource.encode('utf8')).hexdigest()
entity = table_client.get_entity(
@ -619,6 +635,7 @@ def distribute_global_resources(
:param str ipaddress: ip address
"""
# set torrent session port listen
if _ENABLE_P2P:
print('creating torrent session on {}:{}'.format(
ipaddress, _DEFAULT_PORT_BEGIN))
_TORRENT_SESSION.listen_on(_DEFAULT_PORT_BEGIN, _DEFAULT_PORT_END)
@ -665,8 +682,11 @@ async def _get_ipaddress_async(loop: asyncio.BaseEventLoop) -> str:
def main():
"""Main function"""
global _ENABLE_P2P, _NON_P2P_CONCURRENT_DOWNLOADING
# get command-line args
args = parseargs()
_ENABLE_P2P = args.torrent
_NON_P2P_CONCURRENT_DOWNLOADING = args.nonp2pcd
# get event loop
if _ON_WINDOWS:
@ -694,8 +714,11 @@ def main():
# get registry list
global _REGISTRIES, _SELF_REGISTRY_PTR
try:
_REGISTRIES = [line.rstrip('\n') for line in open(
'.cascade_private_registries.txt', 'r')]
except Exception:
pass
if len(_REGISTRIES) == 0:
_REGISTRIES.append('registry.hub.docker.com')
for i in range(0, len(_REGISTRIES)):
@ -717,10 +740,17 @@ def parseargs():
"""
parser = argparse.ArgumentParser(
description='Cascade: Azure Batch P2P File/Image Replicator')
parser.set_defaults(ipaddress=None, nonp2pcd=False, torrent=True)
parser.add_argument(
'ipaddress', nargs='?', default=None, help='ip address')
'ipaddress', nargs='?', help='ip address')
parser.add_argument(
'--prefix', help='storage container prefix')
parser.add_argument(
'--no-torrent', action='store_false', dest='torrent',
help='disable peer-to-peer transfer')
parser.add_argument(
'--nonp2pcd', action='store_true',
help='non-p2p concurrent downloading')
return parser.parse_args()
if __name__ == '__main__':

211
graph.py Executable file
Просмотреть файл

@ -0,0 +1,211 @@
#!/usr/bin/env python3
# stdlib imports
import argparse
import copy
import datetime
import json
import os
# non-stdlib imports
import azure.storage.table as azuretable
# global defines
_STORAGEACCOUNT = os.getenv('STORAGEACCOUNT')
_STORAGEACCOUNTKEY = os.getenv('STORAGEACCOUNTKEY')
_BATCHACCOUNT = None
_POOLID = None
_PARTITION_KEY = None
_TABLE_NAME = None
def _create_credentials(config: dict):
"""Create authenticated clients
:param dict config: configuration dict
:rtype: azure.storage.table.TableService
:return: table client
"""
global _STORAGEACCOUNT, _STORAGEACCOUNTKEY, _BATCHACCOUNT, _POOLID, \
_PARTITION_KEY, _TABLE_NAME
_STORAGEACCOUNT = config['credentials']['storage_account']
_STORAGEACCOUNTKEY = config['credentials']['storage_account_key']
_BATCHACCOUNT = config['credentials']['batch_account']
_POOLID = config['poolspec']['id']
_PARTITION_KEY = '{}${}'.format(_BATCHACCOUNT, _POOLID)
_TABLE_NAME = config['storage_entity_prefix'] + 'perf'
table_client = azuretable.TableService(
account_name=_STORAGEACCOUNT,
account_key=_STORAGEACCOUNTKEY,
endpoint_suffix=config['credentials']['storage_endpoint'])
return table_client
def _compute_delta_t(data, nodeid, event1, event1_pos, event2, event2_pos):
# attempt to get directly recorded diff
try:
return data[nodeid][event2][event2_pos]['message']['diff']
except (TypeError, KeyError):
return (data[nodeid][event2][event2_pos]['timestamp'] -
data[nodeid][event1][event1_pos]['timestamp']).total_seconds()
def _parse_message(msg):
parts = msg.split(',')
m = {}
for part in parts:
tmp = part.split('=')
if tmp[0] == 'size' or tmp[0] == 'nglobalresources':
m[tmp[0]] = int(tmp[1])
elif tmp[0] == 'diff':
m[tmp[0]] = float(tmp[1])
else:
m[tmp[0]] = tmp[1]
return m
def _diff_events(data, nodeid, event, end_event, timing, prefix):
for i in range(0, len(data[nodeid][event])):
subevent = data[nodeid][event][i]
img = subevent['message']['img']
# find end event for this img
found = False
for j in range(0, len(data[nodeid][end_event])):
pei = data[
nodeid][end_event][j]['message']['img']
if pei == img:
timing[prefix + img] = _compute_delta_t(
data, nodeid, event, i, end_event, j)
found = True
break
if not found:
raise RuntimeError(
'could not find corresponding event for {}:{}'.format(
subevent, img))
def graph_data(table_client):
print('graphing data from {} with pk={}'.format(
_TABLE_NAME, _PARTITION_KEY))
entities = table_client.query_entities(
_TABLE_NAME, filter='PartitionKey eq \'{}\''.format(_PARTITION_KEY))
data = {}
# process events
for ent in entities:
nodeid = ent['NodeId']
event = ent['Event']
if nodeid not in data:
data[nodeid] = {}
if event not in data[nodeid]:
data[nodeid][event] = []
ev = {
'timestamp': datetime.datetime.fromtimestamp(
float(ent['RowKey'])),
}
try:
ev['message'] = _parse_message(ent['Message'])
except KeyError:
ev['message'] = None
data[nodeid][event].append(ev)
del entities
for nodeid in data:
print(nodeid)
# calculate dt timings
timing = {
'docker_install': _compute_delta_t(
data, nodeid, 'nodeprep:start', 0, 'privateregistry:start', 0),
'private_registry_setup': _compute_delta_t(
data, nodeid, 'privateregistry:start', 0,
'privateregistry:end', 0),
'nodeprep': _compute_delta_t(
data, nodeid, 'nodeprep:start', 0, 'nodeprep:end', 0),
'global_resources_loaded': _compute_delta_t(
data, nodeid, 'cascade:start', 0, 'cascade:gr-done', 0),
}
data[nodeid].pop('nodeprep:start')
data[nodeid].pop('nodeprep:end')
data[nodeid].pop('privateregistry:start')
data[nodeid].pop('privateregistry:end')
data[nodeid].pop('cascade:start')
data[nodeid].pop('cascade:gr-done')
for event in data[nodeid]:
# print(event, data[nodeid][event])
if event == 'cascade:pull-start':
_diff_events(
data, nodeid, event, 'cascade:pull-end', timing, 'pull:')
elif event == 'cascade:save-start':
pass
elif event == 'cascade:save-end':
# message will contain size info
pass
elif event == 'cascade:torrent-start':
pass
elif event == 'cascade:load-start':
# load start also marks torrent-seed
# message will contain size info
pass
elif event == 'cascade:load-end':
pass
print(timing)
def merge_dict(dict1, dict2):
"""Recursively merge dictionaries: dict2 on to dict1. This differs
from dict.update() in that values that are dicts are recursively merged.
Note that only dict value types are merged, not lists, etc.
Code adapted from:
https://www.xormedia.com/recursively-merge-dictionaries-in-python/
:param dict dict1: dictionary to merge to
:param dict dict2: dictionary to merge with
:rtype: dict
:return: merged dictionary
"""
if not isinstance(dict1, dict) or not isinstance(dict2, dict):
raise ValueError('dict1 or dict2 is not a dictionary')
result = copy.deepcopy(dict1)
for k, v in dict2.items():
if k in result and isinstance(result[k], dict):
result[k] = merge_dict(result[k], v)
else:
result[k] = copy.deepcopy(v)
return result
def main():
"""Main function"""
# get command-line args
args = parseargs()
if args.settings is None:
raise ValueError('global settings not specified')
if args.config is None:
raise ValueError('config settings for action not specified')
with open(args.settings, 'r') as f:
config = json.load(f)
with open(args.config, 'r') as f:
config = merge_dict(config, json.load(f))
# create storage credentials
table_client = _create_credentials(config)
# graph data
graph_data(table_client)
def parseargs():
"""Parse program arguments
:rtype: argparse.Namespace
:return: parsed arguments
"""
parser = argparse.ArgumentParser(
description='Shipyard perf graph generator')
parser.add_argument(
'--settings',
help='global settings json file config. required for all actions')
parser.add_argument(
'--config',
help='json file config for option. required for all actions')
return parser.parse_args()
if __name__ == '__main__':
main()

Просмотреть файл

@ -5,6 +5,7 @@ import argparse
import datetime
import os
# non-stdlib imports
import azure.common
import azure.storage.table as azuretable
# global defines
@ -37,7 +38,15 @@ def process_event(table_client, table_name, source, event, ts, message):
'NodeId': _NODEID,
'Message': message,
}
while True:
try:
table_client.insert_entity(table_name, entity)
break
except azure.common.AzureConflictHttpError:
if not isinstance(ts, float):
ts = float(ts)
ts += 0.000001
entity['RowKey'] = str(ts)
def main():

Просмотреть файл

@ -2,6 +2,7 @@
set -o pipefail
nonp2pcd=
offer=
p2p=0
prefix=
@ -10,12 +11,13 @@ privateregarchive=
privateregimageid=
sku=
while getopts "h?a:i:o:p:r:s:t" opt; do
while getopts "h?a:ci:o:p:r:s:t" opt; do
case "$opt" in
h|\?)
echo "nodeprep.sh parameters"
echo ""
echo "-a [registry archive] registry archive file"
echo "-c concurrent downloading in non-p2p mode"
echo "-i [registry image id] registry image id"
echo "-o [offer] VM offer"
echo "-p [prefix] storage container prefix"
@ -28,6 +30,9 @@ while getopts "h?a:i:o:p:r:s:t" opt; do
a)
privateregarchive="--regarchive $OPTARG"
;;
c)
nonp2pcd="--nonp2pcd"
;;
i)
privateregimageid="--regimageid $OPTARG"
;;
@ -147,11 +152,14 @@ fi
./perf.py nodeprep end $prefix
# enable p2p sharing
torrentflag=
if [ $p2p -eq 1 ]; then
# disable DHT connection tracking
iptables -t raw -I PREROUTING -p udp --dport 6881 -j CT --notrack
iptables -t raw -I OUTPUT -p udp --sport 6881 -j CT --notrack
# start cascade
./perf.py cascade start $prefix --message "ipaddress=$ipaddress"
./cascade.py $ipaddress $prefix > cascade.log &
else
torrentflag="--no-torrent"
fi
# start cascade
./perf.py cascade start $prefix
./cascade.py $ipaddress $prefix $torrentflag $nonp2pcd > cascade.log &

Просмотреть файл

@ -303,10 +303,10 @@ def main():
# get private registries
registries = get_private_registries(table_client)
if len(registries) > 0:
# modify init scripts with registry info
register_insecure_registries(
args.offer.lower(), args.sku.lower(), registries)
# write registry file
with open('.cascade_private_registries.txt', 'w') as f:
for registry in registries:

Просмотреть файл

@ -105,10 +105,7 @@ def _create_credentials(config: dict) -> tuple:
_BATCHACCOUNTKEY)
batch_client = batch.BatchServiceClient(
credentials,
base_url='https://{}.{}.{}'.format(
config['credentials']['batch_account'],
config['credentials']['batch_account_region'],
config['credentials']['batch_endpoint']))
base_url=config['credentials']['batch_account_service_url'])
blob_client = azureblob.BlockBlobService(
account_name=_STORAGEACCOUNT,
account_key=_STORAGEACCOUNTKEY,
@ -189,9 +186,17 @@ def add_pool(
offer = config['poolspec']['offer']
sku = config['poolspec']['sku']
try:
p2p = config['peer_to_peer']['enabled']
p2p = config['data_replication']['peer_to_peer']['enabled']
except KeyError:
p2p = True
if not p2p:
try:
nonp2pcd = config[
'data_replication']['non_peer_to_peer_concurrent_downloading']
except KeyError:
nonp2pcd = True
else:
nonp2pcd = False
try:
preg = 'private' in config['docker_registry']
pcont = config['docker_registry']['private']['container']
@ -240,7 +245,7 @@ def add_pool(
target_dedicated=config['poolspec']['vm_count'],
enable_inter_node_communication=True,
start_task=batchmodels.StartTask(
command_line='{} -o {} -s {}{}{}{}{}{}'.format(
command_line='{} -o {} -s {}{}{}{}{}{}{}'.format(
_NODEPREP_FILE[0], offer, sku,
' -p {}'.format(prefix) if prefix else '',
' -r {}'.format(pcont) if preg else '',
@ -248,7 +253,8 @@ def add_pool(
if _REGISTRY_FILE[0] else '',
' -i {}'.format(_REGISTRY_FILE[2])
if _REGISTRY_FILE[2] else '',
' -t' if p2p else ''
' -t' if p2p else '',
' -c' if nonp2pcd else '',
),
run_elevated=True,
wait_for_success=True,
@ -494,6 +500,18 @@ def populate_queues(queue_client, table_client, config):
queue_client.put_message(
_STORAGE_CONTAINERS['queue_registry'], 'create-{}'.format(i))
# populate global resources
try:
p2p = config['data_replication']['peer_to_peer']['enabled']
except KeyError:
p2p = True
if p2p:
try:
p2pcsd = config['data_replication']['peer_to_peer'][
'concurrent_source_downloads']
except KeyError:
p2pcsd = 1
else:
p2pcsd = 1
try:
for gr in config['global_resources']:
table_client.insert_or_replace_entity(
@ -504,6 +522,7 @@ def populate_queues(queue_client, table_client, config):
'Resource': gr,
}
)
for _ in range(0, p2pcsd):
queue_client.put_message(
_STORAGE_CONTAINERS['queue_globalresources'], gr)
except KeyError: