Updates for final working version
This commit is contained in:
Родитель
d279779908
Коммит
dec79e118b
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
2
Makefile
2
Makefile
|
@ -41,6 +41,8 @@ create-service-principal:
|
|||
select-subscription:
|
||||
az login -o table
|
||||
az account set --subscription "$(SELECTED_SUBSCRIPTION)"
|
||||
ln -s /anaconda/envs/py35/bin/conda /home/mat/repos/deep_bait/envs/default/bin/conda
|
||||
|
||||
|
||||
create-storage:
|
||||
@echo "Creating storage account"
|
||||
|
|
|
@ -45,6 +45,7 @@ variables:
|
|||
TENANT:
|
||||
SUBSCRIPTION_ID:
|
||||
STORAGE_ACCOUNT_KEY:
|
||||
|
||||
downloads:
|
||||
DATA:
|
||||
filename: data/cifar-10-python.tar.gz
|
||||
|
@ -75,7 +76,9 @@ env_specs:
|
|||
packages:
|
||||
- anaconda-project
|
||||
- pip:
|
||||
- msrest==0.4.29
|
||||
- olefile
|
||||
- keyring
|
||||
- msrestazure
|
||||
- fire==0.1.2
|
||||
- toolz==0.8.2
|
||||
- requests==2.18.4
|
||||
|
|
|
@ -395,7 +395,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.2"
|
||||
"version": "3.6.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
"from os import path\n",
|
||||
"from utils import cifar_for_library, yield_mb, create_logger, Timer\n",
|
||||
"from nb_logging import NotebookLogger, output_to, error_to\n",
|
||||
"from gpumon import db_log_context\n",
|
||||
"from gpumon.influxdb import log_context\n",
|
||||
"import codecs\n",
|
||||
"\n",
|
||||
"from influxdb import InfluxDBClient"
|
||||
|
@ -301,7 +301,7 @@
|
|||
],
|
||||
"source": [
|
||||
"with Timer() as t:\n",
|
||||
" with db_log_context(LOGGER_URL, '8086', LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB, LOGGER_SERIES, \n",
|
||||
" with log_context(LOGGER_URL, LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB, LOGGER_SERIES, \n",
|
||||
" node_id=node_id, task_id=task_id, job_id=job_id):\n",
|
||||
" for j in range(EPOCHS):\n",
|
||||
" for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n",
|
||||
|
@ -377,7 +377,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.4"
|
||||
"version": "3.6.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -1,18 +1,18 @@
|
|||
''' Script that sets everything up and introduces helper functions into the namespace
|
||||
'''
|
||||
|
||||
import logging
|
||||
logging.basicConfig(level=logging.ERROR)
|
||||
import os
|
||||
from glob import iglob
|
||||
from itertools import chain
|
||||
from os import path
|
||||
from pprint import pprint
|
||||
|
||||
import utilities as ut
|
||||
import azure.mgmt.batchai.models as models
|
||||
|
||||
import utilities as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
NODE_COUNT = 10
|
||||
CLUSTER_NAME = 'mync6'
|
||||
|
@ -274,7 +274,7 @@ def download_files(job_name, output_id, output_folder=None):
|
|||
if output_folder:
|
||||
logger.info('Downloading files to {}'.format(output_folder))
|
||||
|
||||
files = client.jobs.list_output_files(config.group_name, job_name, models.JobsListOutputFilesOptions(output_id))
|
||||
files = client.jobs.list_output_files(config.group_name, job_name, models.JobsListOutputFilesOptions(outputdirectoryid=output_id))
|
||||
for file in files:
|
||||
logger.info('Downloading {}'.format(file.name))
|
||||
file_name = path.join(output_folder, file.name) if output_folder else file.name
|
||||
|
|
10
utilities.py
10
utilities.py
|
@ -12,8 +12,8 @@ import requests
|
|||
from azure.common.credentials import ServicePrincipalCredentials
|
||||
from azure.storage.file import FileService
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
POLLING_INTERVAL_SEC = 5
|
||||
|
||||
|
@ -72,7 +72,7 @@ class OutputStreamer:
|
|||
files = self.client.jobs.list_output_files(
|
||||
self.resource_group, self.job_name,
|
||||
models.JobsListOutputFilesOptions(
|
||||
self.output_directory_id))
|
||||
outputdirectoryid=self.output_directory_id))
|
||||
if not files:
|
||||
return
|
||||
else:
|
||||
|
@ -248,12 +248,12 @@ def create_job(config, cluster_id, job_name, image_name, command, number_of_vms=
|
|||
|
||||
parameters = models.job_create_parameters.JobCreateParameters(
|
||||
location=config.location,
|
||||
cluster=models.ResourceId(cluster_id),
|
||||
cluster=models.ResourceId(id=cluster_id),
|
||||
node_count=number_of_vms,
|
||||
input_directories=input_directories,
|
||||
std_out_err_path_prefix=std_output_path_prefix,
|
||||
output_directories=output_directories,
|
||||
container_settings=models.ContainerSettings(models.ImageSourceRegistry(image=image_name)),
|
||||
container_settings=models.ContainerSettings(image_source_registry=models.ImageSourceRegistry(image=image_name)),
|
||||
custom_toolkit_settings=models.CustomToolkitSettings(command_line=command))
|
||||
|
||||
|
||||
|
@ -268,7 +268,7 @@ def wait_for_job(config, job_name):
|
|||
|
||||
def setup_cluster(config):
|
||||
client = client_from(config)
|
||||
container_setting_for = lambda img: models.ContainerSettings(image_source_registry=models.ImageSourceRegistry(img))
|
||||
container_setting_for = lambda img: models.ContainerSettings(image_source_registry=models.ImageSourceRegistry(image=img))
|
||||
container_settings = [container_setting_for(img) for img in config.image_names]
|
||||
|
||||
volumes = create_volume(config.storage_account['name'],config.storage_account['key'], config.fileshare_name, config.fileshare_mount_point)
|
||||
|
|
Загрузка…
Ссылка в новой задаче