# Need devel version cause we need /usr/include/cudnn.h # for compiling libctc_decoder_with_kenlm.so FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 # >> START Install base software # Get basic packages RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ curl \ wget \ git \ python3 \ python3-dev \ python3-pip \ python3-wheel \ python3-numpy \ libcurl3-dev \ ca-certificates \ gcc \ sox \ libsox-fmt-mp3 \ htop \ nano \ swig \ cmake \ libboost-all-dev \ zlib1g-dev \ libbz2-dev \ liblzma-dev \ locales \ pkg-config \ libpng-dev \ libsox-dev \ libmagic-dev \ libgsm1-dev \ libltdl-dev \ openjdk-8-jdk \ bash-completion \ g++ \ unzip RUN ln -s -f /usr/bin/python3 /usr/bin/python # Install NCCL 2.2 RUN apt-get install -qq -y --allow-downgrades --allow-change-held-packages libnccl2=2.3.7-1+cuda10.0 libnccl-dev=2.3.7-1+cuda10.0 # Install Bazel RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel_0.24.1-linux-x86_64.deb" RUN dpkg -i bazel_*.deb # Install CUDA CLI Tools RUN apt-get install -qq -y cuda-command-line-tools-10-0 # Install pip RUN wget https://bootstrap.pypa.io/get-pip.py && \ python3 get-pip.py && \ rm get-pip.py # << END Install base software # >> START Configure Tensorflow Build # Clone TensoFlow from Mozilla repo RUN git clone https://github.com/mozilla/tensorflow/ WORKDIR /tensorflow RUN git checkout r1.14 # GPU Environment Setup ENV TF_NEED_CUDA 1 ENV TF_CUDA_PATHS "/usr/local/cuda,/usr/lib/x86_64-linux-gnu/" ENV TF_CUDA_VERSION 10.0 ENV TF_CUDNN_VERSION 7 ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0 ENV TF_NCCL_VERSION 2.3 # Common Environment Setup ENV TF_BUILD_CONTAINER_TYPE GPU ENV TF_BUILD_OPTIONS OPT ENV TF_BUILD_DISABLE_GCP 1 ENV TF_BUILD_ENABLE_XLA 0 ENV TF_BUILD_PYTHON_VERSION PYTHON3 ENV TF_BUILD_IS_OPT OPT ENV TF_BUILD_IS_PIP PIP # Other Parameters ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma ENV TF_NEED_GCP 0 ENV TF_NEED_HDFS 0 ENV TF_NEED_JEMALLOC 1 ENV TF_NEED_OPENCL 0 ENV TF_CUDA_CLANG 0 ENV TF_NEED_MKL 0 ENV TF_ENABLE_XLA 0 ENV TF_NEED_AWS 0 ENV TF_NEED_KAFKA 0 ENV TF_NEED_NGRAPH 0 ENV TF_DOWNLOAD_CLANG 0 ENV TF_NEED_TENSORRT 0 ENV TF_NEED_GDR 0 ENV TF_NEED_VERBS 0 ENV TF_NEED_OPENCL_SYCL 0 ENV PYTHON_BIN_PATH /usr/bin/python3.6 ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages # << END Configure Tensorflow Build # >> START Configure Bazel # Running bazel inside a `docker build` command causes trouble, cf: # https://github.com/bazelbuild/bazel/issues/134 # The easiest solution is to set up a bazelrc file forcing --batch. RUN echo "startup --batch" >>/etc/bazel.bazelrc # Similarly, we need to workaround sandboxing issues: # https://github.com/bazelbuild/bazel/issues/418 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ >>/etc/bazel.bazelrc # Put cuda libraries to where they are expected to be RUN mkdir /usr/local/cuda/lib && \ ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \ ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h && \ ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h # Set library paths ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/ # << END Configure Bazel # Copy DeepSpeech repo contents to container's /DeepSpeech COPY . /DeepSpeech/ # Alternative clone from GitHub # RUN apt-get update && apt-get install -y git-lfs # WORKDIR / # RUN git clone https://github.com/mozilla/DeepSpeech.git WORKDIR /DeepSpeech RUN pip3 --no-cache-dir install -r requirements.txt # Link DeepSpeech native_client libs to tf folder RUN ln -s /DeepSpeech/native_client /tensorflow # >> START Build and bind WORKDIR /tensorflow # Fix for not found script https://github.com/tensorflow/tensorflow/issues/471 RUN ./configure # Using CPU optimizations: # -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx. # Adding --config=cuda flag to build using CUDA. # passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment # Build DeepSpeech RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} ### ### Using TensorFlow upstream should work ### # # Build TF pip package # RUN bazel build --config=opt --config=cuda --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //tensorflow/tools/pip_package:build_pip_package --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} # # # Build wheel # RUN bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg # # # Install tensorflow from our custom wheel # RUN pip3 install /tmp/tensorflow_pkg/*.whl # Copy built libs to /DeepSpeech/native_client RUN cp /tensorflow/bazel-bin/native_client/generate_trie /DeepSpeech/native_client/ \ && cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ # Install TensorFlow WORKDIR /DeepSpeech/ RUN pip3 install tensorflow-gpu==1.14.0 # Make DeepSpeech and install Python bindings ENV TFDIR /tensorflow WORKDIR /DeepSpeech/native_client RUN make deepspeech WORKDIR /DeepSpeech/native_client/python RUN make bindings RUN pip3 install dist/deepspeech* WORKDIR /DeepSpeech/native_client/ctcdecode RUN make RUN pip3 install dist/*.whl # << END Build and bind # Allow Python printing utf-8 ENV PYTHONIOENCODING UTF-8 # Build KenLM in /DeepSpeech/native_client/kenlm folder WORKDIR /DeepSpeech/native_client RUN rm -rf kenlm \ && git clone --depth 1 https://github.com/kpu/kenlm && cd kenlm \ && mkdir -p build \ && cd build \ && cmake .. \ && make -j 4 # Done WORKDIR /DeepSpeech