This commit is contained in:
miguelgfierro 2018-09-05 16:29:40 +01:00
Родитель dd70f958ca
Коммит e1c9753be7
1 изменённых файлов: 61 добавлений и 13 удалений

Просмотреть файл

@ -141,6 +141,50 @@ def generate_job_dict(image_name,
}
def generate_job_dict_cntk(image_name,
command,
node_count=2,
processes_per_node=4):
return {
"$schema": "https://raw.githubusercontent.com/Azure/BatchAI/master/schemas/2018-03-01/job.json",
"properties": {
"nodeCount": node_count,
"cntkSettings": {
"pythonScriptFilePath": "$AZ_BATCHAI_INPUT_SCRIPTS/imagenet_cntk.py",
"processCount": processes_per_node
},
"environmentVariables": [{
"name": "DISTRIBUTED",
"value": "True"
}],
"stdOutErrPathPrefix": "$AZ_BATCHAI_MOUNT_ROOT/extfs",
"inputDirectories": [{
"id": "SCRIPTS",
"path": "$AZ_BATCHAI_MOUNT_ROOT/extfs/scripts"
},
{
"id": "TRAIN",
"path": "$AZ_BATCHAI_MOUNT_ROOT/imagenet",
},
{
"id": "TEST",
"path": "$AZ_BATCHAI_MOUNT_ROOT/imagenet",
},
],
"outputDirectories": [{
"id": "MODEL",
"pathPrefix": "$AZ_BATCHAI_MOUNT_ROOT/extfs",
"pathSuffix": "Models"
}],
"containerSettings": {
"imageSourceRegistry": {
"image": image_name
}
}
}
}
def write_json_to_file(json_dict, filename, mode='w'):
with open(filename, mode) as outfile:
json.dump(json_dict, outfile, indent=4, sort_keys=True)
@ -155,8 +199,10 @@ def synthetic_data_job(image_name,
total_processes=None,
processes_per_node=4,
synthetic_length=1281167):
logger.info('Creating manifest for job with synthetic data {} with {} image...'.format(filename, image_name))
total_processes = processes_per_node * node_count if total_processes is None else total_processes
logger.info('Creating manifest for job with synthetic data {} with {} image...'.format(
filename, image_name))
total_processes = processes_per_node * \
node_count if total_processes is None else total_processes
command = _prepare_command(mpitype,
total_processes,
processes_per_node,
@ -178,8 +224,10 @@ def imagenet_data_job(image_name,
node_count=2,
total_processes=None,
processes_per_node=4):
logger.info('Creating manifest for job with real data {} with {} image...'.format(filename, image_name))
total_processes = processes_per_node * node_count if total_processes is None else total_processes
logger.info('Creating manifest for job with real data {} with {} image...'.format(
filename, image_name))
total_processes = processes_per_node * \
node_count if total_processes is None else total_processes
command = _prepare_command(mpitype,
total_processes,
processes_per_node,