This commit is contained in:
miguelgfierro 2018-09-03 16:48:03 +01:00
Родитель f4e048c1b7 c1e29faa51
Коммит 94937857cf
3 изменённых файлов: 19 добавлений и 114 удалений

Просмотреть файл

@ -1,74 +1,6 @@
# Ubuntu 16.04, CUDA 9.0
FROM nvidia/cuda:9.0-runtime-ubuntu16.04
FROM microsoft/cntk:2.5.1-gpu-python3.5-cuda9.0-cudnn7.0
ENV CNTK_VERSION=2.5.1
ENV CUDNN_VERSION=7.0.5.15-1+cuda9.0
ENV PYTHON_VERSION=3.5
ENV PY_VERSION=35
ENV NCCL_VERSION=2.1.15-1+cuda9.0
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cmake \
sudo \
git \
curl \
wget \
ca-certificates \
libopenmpi-dev \
libcudnn7=$CUDNN_VERSION \
libnccl2 \
libnccl-dev \
libjpeg-dev \
libpng-dev \
ssh \
python$PYTHON_VERSION \
python$PYTHON_VERSION-dev
RUN ln -s /usr/bin/python$PYTHON_VERSION /usr/bin/python
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py && \
rm get-pip.py
# Install Open MPI
RUN mkdir /tmp/openmpi && \
cd /tmp/openmpi && \
wget https://www.open-mpi.org/software/ompi/v3.0/downloads/openmpi-3.0.0.tar.gz && \
tar zxf openmpi-3.0.0.tar.gz && \
cd openmpi-3.0.0 && \
./configure --enable-orterun-prefix-by-default && \
make -j $(nproc) all && \
make install && \
ldconfig && \
rm -rf /tmp/openmpi
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
chmod a+x /usr/local/bin/mpirun
# Configure OpenMPI to run good defaults:
# --bind-to none --map-by slot --mca btl_tcp_if_exclude lo,docker0
RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf && \
echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf && \
echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
# Set default NCCL parameters
RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf && \
echo NCCL_SOCKET_IFNAME=^docker0 >> /etc/nccl.conf
# Install CNTK
RUN pip install --no-cache-dir https://cntk.ai/PythonWheel/GPU/cntk_gpu-$CNTK_VERSION-cp$PY_VERSION-cp${PY_VERSION}m-linux_x86_64.whl h5py scipy jupyter ipykernel numpy toolz pandas scikit-learn
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
WORKDIR /root
ENV PATH /cntk/cntk/bin:/root/anaconda3/envs/cntk-py35/bin:$PATH
ENV LD_LIBRARY_PATH /cntk/cntk/lib:/cntk/cntk/dependencies/lib:$LD_LIBRARY_PATH

Просмотреть файл

@ -1,46 +1,9 @@
define PROJECT_HELP_MSG
Usage:
make help show this message
make build make CNTK image with Open MPI
make run-mpi run training using Open MPI image
make push push CNTK image with Open MPI
endef
export PROJECT_HELP_MSG
DATA_DIR:=/mnt/imagenet
PWD:=$(shell pwd)
FAKE:='False'
FAKE_DATA_LENGTH:=1281167
image-open:=hoaphumanoid/cntk:distributed
open-path:=$(PWD)/Docker
script:=\$$AZ_BATCHAI_INPUT_SCRIPTS/imagenet_cntk.py
include ../include/build.mk
setup_volumes:=-v $(PWD)/src/execution:/mnt/script \
-v $(DATA_DIR):/mnt/input \
-v $(DATA_DIR)/temp/model:/mnt/model \
-v $(DATA_DIR)/temp/output:/mnt/output
setup_environment:=--env AZ_BATCHAI_INPUT_TRAIN='/mnt/input' \
--env AZ_BATCHAI_INPUT_TEST='/mnt/input' \
--env AZ_BATCHAI_OUTPUT_MODEL='/mnt/model' \
--env AZ_BATCHAI_JOB_TEMP_DIR='/mnt/output'
name_prefix:=hoaphumanoid
define execute_mpi
nvidia-docker run -it \
$(setup_volumes) \
$(setup_environment) \
$(1) bash -c "mpirun -np 2 -H localhost:2 python /mnt/script/ImagenetEstimatorCNTK.py"
endef
help:
echo "$$PROJECT_HELP_MSG" | less
build:
docker build -t $(name_prefix)/cntk Docker
run-mpi:
$(call execute_mpi, $(name_prefix)/cntk)
push:
docker push $(name_prefix)/cntk
.PHONY: help build push

Просмотреть файл

@ -69,6 +69,13 @@ define execute
$(1) bash -c "python $(2)"
endef
define execute_jupyter
nvidia-docker run -p 8888:8888 -it \
--shm-size="8g" \
$(setup_volumes) \
$(setup_environment) \
$(1) bash -c "jupyter notebook --ip=* --no-browser --allow-root"
endef
help:
echo "$$PROJECT_HELP_MSG" | less
@ -87,6 +94,9 @@ run-mpi-intel:
run:
$(call execute, $(image-open), $(script))
run-jupyter:
$(call execute_jupyter, $(image-open))
push:
docker push $(image-open)