DeepSpeech/Dockerfile

224 строки
6.3 KiB
Docker

# Need devel version cause we need /usr/include/cudnn.h
# for compiling libctc_decoder_with_kenlm.so
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
# >> START Install base software
# Get basic packages
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
wget \
git \
python3 \
python3-dev \
python3-pip \
python3-wheel \
python3-numpy \
libcurl3-dev \
ca-certificates \
gcc \
sox \
libsox-fmt-mp3 \
htop \
nano \
swig \
cmake \
libboost-all-dev \
zlib1g-dev \
libbz2-dev \
liblzma-dev \
locales \
pkg-config \
libsox-dev \
openjdk-8-jdk \
bash-completion \
g++ \
unzip
RUN ln -s -f /usr/bin/python3 /usr/bin/python
# Install NCCL 2.2
RUN apt-get install -qq -y --allow-downgrades --allow-change-held-packages libnccl2=2.3.7-1+cuda10.0 libnccl-dev=2.3.7-1+cuda10.0
# Install Bazel
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel_0.24.1-linux-x86_64.deb"
RUN dpkg -i bazel_*.deb
# Install CUDA CLI Tools
RUN apt-get install -qq -y cuda-command-line-tools-10-0
# Install pip
RUN wget https://bootstrap.pypa.io/get-pip.py && \
python3 get-pip.py && \
rm get-pip.py
# << END Install base software
# >> START Configure Tensorflow Build
# Clone TensoFlow from Mozilla repo
RUN git clone https://github.com/mozilla/tensorflow/
WORKDIR /tensorflow
RUN git checkout r1.14
# GPU Environment Setup
ENV TF_NEED_CUDA 1
ENV TF_CUDA_PATHS "/usr/local/cuda,/usr/lib/x86_64-linux-gnu/"
ENV TF_CUDA_VERSION 10.0
ENV TF_CUDNN_VERSION 7
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
ENV TF_NCCL_VERSION 2.3
# Common Environment Setup
ENV TF_BUILD_CONTAINER_TYPE GPU
ENV TF_BUILD_OPTIONS OPT
ENV TF_BUILD_DISABLE_GCP 1
ENV TF_BUILD_ENABLE_XLA 0
ENV TF_BUILD_PYTHON_VERSION PYTHON3
ENV TF_BUILD_IS_OPT OPT
ENV TF_BUILD_IS_PIP PIP
# Other Parameters
ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma
ENV TF_NEED_GCP 0
ENV TF_NEED_HDFS 0
ENV TF_NEED_JEMALLOC 1
ENV TF_NEED_OPENCL 0
ENV TF_CUDA_CLANG 0
ENV TF_NEED_MKL 0
ENV TF_ENABLE_XLA 0
ENV TF_NEED_AWS 0
ENV TF_NEED_KAFKA 0
ENV TF_NEED_NGRAPH 0
ENV TF_DOWNLOAD_CLANG 0
ENV TF_NEED_TENSORRT 0
ENV TF_NEED_GDR 0
ENV TF_NEED_VERBS 0
ENV TF_NEED_OPENCL_SYCL 0
ENV PYTHON_BIN_PATH /usr/bin/python3.6
ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages
# << END Configure Tensorflow Build
# >> START Configure Bazel
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
RUN echo "startup --batch" >>/etc/bazel.bazelrc
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
>>/etc/bazel.bazelrc
# Put cuda libraries to where they are expected to be
RUN mkdir /usr/local/cuda/lib && \
ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \
ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h && \
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h
# Set library paths
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/
# << END Configure Bazel
# Copy DeepSpeech repo contents to container's /DeepSpeech
COPY . /DeepSpeech/
# Alternative clone from GitHub
# RUN apt-get update && apt-get install -y git-lfs
# WORKDIR /
# RUN git clone https://github.com/mozilla/DeepSpeech.git
WORKDIR /DeepSpeech
RUN pip3 --no-cache-dir install -r requirements.txt
# Link DeepSpeech native_client libs to tf folder
RUN ln -s /DeepSpeech/native_client /tensorflow
# >> START Build and bind
WORKDIR /tensorflow
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
RUN ./configure
# Using CPU optimizations:
# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.
# Adding --config=cuda flag to build using CUDA.
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
# Build DeepSpeech
RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
###
### Using TensorFlow upstream should work
###
# # Build TF pip package
# RUN bazel build --config=opt --config=cuda --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //tensorflow/tools/pip_package:build_pip_package --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
#
# # Build wheel
# RUN bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
#
# # Install tensorflow from our custom wheel
# RUN pip3 install /tmp/tensorflow_pkg/*.whl
# Copy built libs to /DeepSpeech/native_client
RUN cp /tensorflow/bazel-bin/native_client/generate_trie /DeepSpeech/native_client/ \
&& cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
# Install TensorFlow
WORKDIR /DeepSpeech/
RUN pip3 install tensorflow-gpu==1.14.0
# Make DeepSpeech and install Python bindings
ENV TFDIR /tensorflow
WORKDIR /DeepSpeech/native_client
RUN make deepspeech
WORKDIR /DeepSpeech/native_client/python
RUN make bindings
RUN pip3 install dist/deepspeech*
WORKDIR /DeepSpeech/native_client/ctcdecode
RUN make
RUN pip3 install dist/*.whl
# << END Build and bind
# Allow Python printing utf-8
ENV PYTHONIOENCODING UTF-8
# Build KenLM in /DeepSpeech/native_client/kenlm folder
WORKDIR /DeepSpeech/native_client
RUN rm -rf kenlm \
&& git clone --depth 1 https://github.com/kpu/kenlm && cd kenlm \
&& mkdir -p build \
&& cd build \
&& cmake .. \
&& make -j 4
# Done
WORKDIR /DeepSpeech