diff --git a/.gitignore b/.gitignore index e649f55..9681806 100644 --- a/.gitignore +++ b/.gitignore @@ -103,8 +103,6 @@ venv.bak/ # mypy .mypy_cache/ -*/TensorFlow_benchmark/src/* - */.vscode/* .vscode/settings.json diff --git a/Makefile b/Makefile index 2de2625..85f2eaf 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ include .dev_env cookiecutter: ifdef subscription_id - cd ../ && cookiecutter AMLDistCC --no-input \ + cd ../ && cookiecutter DistributedDeepLearning --no-input \ subscription_id=${subscription_id} \ resource_group=mstestdistrg \ data=/mnt/imagenet_test \ diff --git a/{{cookiecutter.project_name}}/TensorFlow_benchmark/src/.gitkeep b/{{cookiecutter.project_name}}/TensorFlow_benchmark/src/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.project_name}}/control/src/aml_compute.py b/{{cookiecutter.project_name}}/control/src/aml_compute.py index 58539a9..978375b 100644 --- a/{{cookiecutter.project_name}}/control/src/aml_compute.py +++ b/{{cookiecutter.project_name}}/control/src/aml_compute.py @@ -79,7 +79,7 @@ def _prepare_environment_definition(dependencies_file, distributed): env_def.python.conda_dependencies = conda_dep env_def.docker.enabled = True env_def.docker.gpu_support = True - env_def.docker.base_image = azureml.core.runconfig.DEFAULT_GPU_IMAGE + env_def.docker.base_image = "mcr.microsoft.com/azureml/base-gpu:intelmpi2018.3-cuda9.0-cudnn7-ubuntu16.04" env_def.docker.shm_size = "8g" env_def.environment_variables["NCCL_SOCKET_IFNAME"] = "eth0" env_def.environment_variables["NCCL_IB_DISABLE"] = 1 @@ -371,7 +371,7 @@ class TFExperimentCLI(ExperimentCLI): script_params, node_count=node_count, process_count_per_node=process_count_per_node, - docker_args=docker_args, + docker_args=docker_args ) # TEMPORARY HACK: Bugs with AML necessitate the code below, once fixed remove estimator.conda_dependencies.remove_pip_package("horovod==0.15.2")