DeepSpeech/Dockerfile.build.tmpl

# Please refer to the USING documentation, "Dockerfile for building from source"

# Need devel version cause we need /usr/include/cudnn.h 
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04

ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
    DEEPSPEECH_SHA=#DEEPSPEECH_SHA#

# >> START Install base software

# Get basic packages
RUN apt-get update && apt-get install -y --no-install-recommends \
    apt-utils \
    bash-completion \
    build-essential \
    ca-certificates \
    cmake \
    curl \
    g++ \
    gcc \
    git \
    libbz2-dev \
    libboost-all-dev \
    libgsm1-dev \
    libltdl-dev \
    liblzma-dev \
    libmagic-dev \
    libpng-dev \
    libsox-fmt-mp3 \
    libsox-dev \
    locales \
    openjdk-8-jdk \
    pkg-config \
    python3 \
    python3-dev \
    python3-pip \
    python3-wheel \
    python3-numpy \
    sox \
    unzip \
    wget \
    zlib1g-dev; \
    update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
    update-alternatives --install /usr/bin/python python /usr/bin/python3 1; \
    # Install Bazel \
    curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb" && dpkg -i bazel_*.deb; \
    # Try and free some space \
    rm -rf /var/lib/apt/lists/* bazel_*.deb

# << END Install base software

# >> START Configure Tensorflow Build

# GPU Environment Setup
ENV TF_NEED_ROCM=0 \
    TF_NEED_OPENCL_SYCL=0 \
    TF_NEED_OPENCL=0 \
    TF_NEED_CUDA=1 \
    TF_CUDA_PATHS="/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" \
    TF_CUDA_VERSION=10.1 \
    TF_CUDNN_VERSION=7.6 \
    TF_CUDA_COMPUTE_CAPABILITIES=6.0 \
    TF_NCCL_VERSION=2.8 \
    # Common Environment Setup \
    TF_BUILD_CONTAINER_TYPE=GPU \
    TF_BUILD_OPTIONS=OPT \
    TF_BUILD_DISABLE_GCP=1 \
    TF_BUILD_ENABLE_XLA=0 \
    TF_BUILD_PYTHON_VERSION=PYTHON3 \
    TF_BUILD_IS_OPT=OPT \
    TF_BUILD_IS_PIP=PIP \
    # Build client.cc and install Python client and decoder bindings \
    TFDIR=/DeepSpeech/tensorflow \
    # Allow Python printing utf-8 \
    PYTHONIOENCODING=UTF-8 \
    # Other Parameters \
    CC_OPT_FLAGS="-mavx -mavx2 -msse4.1 -msse4.2 -mfma" \
    TF_NEED_GCP=0 \
    TF_NEED_HDFS=0 \
    TF_NEED_JEMALLOC=1 \
    TF_NEED_OPENCL=0 \
    TF_CUDA_CLANG=0 \
    TF_NEED_MKL=0 \
    TF_ENABLE_XLA=0 \
    TF_NEED_AWS=0 \
    TF_NEED_KAFKA=0 \
    TF_NEED_NGRAPH=0 \
    TF_DOWNLOAD_CLANG=0 \
    TF_NEED_TENSORRT=0 \
    TF_NEED_GDR=0 \
    TF_NEED_VERBS=0 \
    TF_NEED_OPENCL_SYCL=0 \
    PYTHON_BIN_PATH=/usr/bin/python3.6 \
    PYTHON_LIB_PATH=/usr/local/lib/python3.6/dist-packages

# << END Configure Tensorflow Build

# >> START Configure Bazel

# Running bazel inside a `docker build` command causes trouble, cf:
#   https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
# Similarly, we need to workaround sandboxing issues:
#   https://github.com/bazelbuild/bazel/issues/418
RUN echo "startup --batch" >>/etc/bazel.bazelrc; \
    echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >> /etc/bazel.bazelrc

# << END Configure Bazel

WORKDIR /

RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech && \
    cd /DeepSpeech && \
    git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA; \
    git submodule sync tensorflow/ && git submodule update --init tensorflow/; \
    git submodule sync kenlm/ && git submodule update --init kenlm/

# >> START Build and bind
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
# Using CPU optimizations:
# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.
# Adding --config=cuda flag to build using CUDA.

# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment

# Build DeepSpeech
RUN cd /DeepSpeech/tensorflow && ./configure && bazel build \
	--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
	--config=monolithic \
	--config=cuda \
	-c opt \
	--copt=-O3 \
	--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
	--copt=-mtune=generic \
	--copt=-march=x86-64 \
	--copt=-msse \
	--copt=-msse2 \
	--copt=-msse3 \
	--copt=-msse4.1 \
	--copt=-msse4.2 \
	--copt=-mavx \
	--copt=-fvisibility=hidden \
	//native_client:libdeepspeech.so \
	--verbose_failures \
	--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} && \
    cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ && \
    rm -fr /root/.cache/*

RUN cd /DeepSpeech/native_client && make NUM_PROCESSES=$(nproc) deepspeech ; \
    cd /DeepSpeech/native_client/python && make NUM_PROCESSES=$(nproc) bindings; \
    pip3 install --upgrade dist/*.whl; \
    cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings; \
    pip3 install --upgrade dist/*.whl

# << END Build and bind

# Build KenLM in /DeepSpeech/kenlm folder
WORKDIR /DeepSpeech/kenlm
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj; \
    mkdir -p build && \
    cd build && \
    EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
    make -j $(nproc)

# Done
WORKDIR /DeepSpeech
Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00			`# Please refer to the USING documentation, "Dockerfile for building from source"`

added Dockerfile 2018-04-26 23:25:13 +03:00			`# Need devel version cause we need /usr/include/cudnn.h`
Build DeepSpeech using TensorFlow r2.2 2020-06-02 15:06:38 +03:00			`FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04`
added Dockerfile 2018-04-26 23:25:13 +03:00
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \`
			`DEEPSPEECH_SHA=#DEEPSPEECH_SHA#`
added Dockerfile 2018-04-26 23:25:13 +03:00
			`# >> START Install base software`

			`# Get basic packages`
			`RUN apt-get update && apt-get install -y --no-install-recommends \`
Use KenLM submodule 2021-04-07 23:34:43 +03:00			`apt-utils \`
			`bash-completion \`
			`build-essential \`
			`ca-certificates \`
			`cmake \`
			`curl \`
			`g++ \`
			`gcc \`
			`git \`
			`libbz2-dev \`
			`libboost-all-dev \`
			`libgsm1-dev \`
			`libltdl-dev \`
			`liblzma-dev \`
			`libmagic-dev \`
			`libpng-dev \`
			`libsox-fmt-mp3 \`
			`libsox-dev \`
			`locales \`
			`openjdk-8-jdk \`
			`pkg-config \`
			`python3 \`
			`python3-dev \`
			`python3-pip \`
			`python3-wheel \`
			`python3-numpy \`
			`sox \`
			`unzip \`
			`wget \`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`zlib1g-dev; \`
			`update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \`
			`update-alternatives --install /usr/bin/python python /usr/bin/python3 1; \`
			`# Install Bazel \`
			`curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb" && dpkg -i bazel_*.deb; \`
			`# Try and free some space \`
			`rm -rf /var/lib/apt/lists/* bazel_*.deb`
Fix #3157: Add CircleCI config 2020-07-15 14:46:13 +03:00
added Dockerfile 2018-04-26 23:25:13 +03:00			`# << END Install base software`

			`# >> START Configure Tensorflow Build`

			`# GPU Environment Setup`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`ENV TF_NEED_ROCM=0 \`
			`TF_NEED_OPENCL_SYCL=0 \`
			`TF_NEED_OPENCL=0 \`
			`TF_NEED_CUDA=1 \`
			`TF_CUDA_PATHS="/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" \`
			`TF_CUDA_VERSION=10.1 \`
			`TF_CUDNN_VERSION=7.6 \`
			`TF_CUDA_COMPUTE_CAPABILITIES=6.0 \`
			`TF_NCCL_VERSION=2.8 \`
			`# Common Environment Setup \`
			`TF_BUILD_CONTAINER_TYPE=GPU \`
			`TF_BUILD_OPTIONS=OPT \`
			`TF_BUILD_DISABLE_GCP=1 \`
			`TF_BUILD_ENABLE_XLA=0 \`
			`TF_BUILD_PYTHON_VERSION=PYTHON3 \`
			`TF_BUILD_IS_OPT=OPT \`
			`TF_BUILD_IS_PIP=PIP \`
			`# Build client.cc and install Python client and decoder bindings \`
			`TFDIR=/DeepSpeech/tensorflow \`
			`# Allow Python printing utf-8 \`
			`PYTHONIOENCODING=UTF-8 \`
			`# Other Parameters \`
			`CC_OPT_FLAGS="-mavx -mavx2 -msse4.1 -msse4.2 -mfma" \`
			`TF_NEED_GCP=0 \`
			`TF_NEED_HDFS=0 \`
			`TF_NEED_JEMALLOC=1 \`
			`TF_NEED_OPENCL=0 \`
			`TF_CUDA_CLANG=0 \`
			`TF_NEED_MKL=0 \`
			`TF_ENABLE_XLA=0 \`
			`TF_NEED_AWS=0 \`
			`TF_NEED_KAFKA=0 \`
			`TF_NEED_NGRAPH=0 \`
			`TF_DOWNLOAD_CLANG=0 \`
			`TF_NEED_TENSORRT=0 \`
			`TF_NEED_GDR=0 \`
			`TF_NEED_VERBS=0 \`
			`TF_NEED_OPENCL_SYCL=0 \`
			`PYTHON_BIN_PATH=/usr/bin/python3.6 \`
			`PYTHON_LIB_PATH=/usr/local/lib/python3.6/dist-packages`
added Dockerfile 2018-04-26 23:25:13 +03:00
			`# << END Configure Tensorflow Build`

			`# >> START Configure Bazel`

			# Running bazel inside a `docker build` command causes trouble, cf:
			`# https://github.com/bazelbuild/bazel/issues/134`
			`# The easiest solution is to set up a bazelrc file forcing --batch.`
			`# Similarly, we need to workaround sandboxing issues:`
			`# https://github.com/bazelbuild/bazel/issues/418`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`RUN echo "startup --batch" >>/etc/bazel.bazelrc; \`
			`echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >> /etc/bazel.bazelrc`
added Dockerfile 2018-04-26 23:25:13 +03:00
			`# << END Configure Bazel`

Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00			`WORKDIR /`
added Dockerfile 2018-04-26 23:25:13 +03:00
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech && \`
			`cd /DeepSpeech && \`
			`git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA; \`
			`git submodule sync tensorflow/ && git submodule update --init tensorflow/; \`
			`git submodule sync kenlm/ && git submodule update --init kenlm/`
added Dockerfile 2018-04-26 23:25:13 +03:00
			`# >> START Build and bind`
Update Dockerfile to TensorFlow r1.11 2018-09-25 09:46:27 +03:00			`# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471`
fixed --cuda flag, added cpu optimization flags 2018-05-02 13:45:06 +03:00			`# Using CPU optimizations:`
			`# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.`
			`# Adding --config=cuda flag to build using CUDA.`

added Dockerfile 2018-04-26 23:25:13 +03:00			`# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment`

Revert "Merge pull request #3246 from lissyx/fix-docker" This reverts commit c01fda56c058779cc9dba952ce940c47398c4ed3, reversing changes made to 3e99b0d8b2b2d6e47c8ff7eb1dfd9a88eba8e6d8. 2020-08-25 16:35:03 +03:00			`# Build DeepSpeech`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`RUN cd /DeepSpeech/tensorflow && ./configure && bazel build \`
Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00			`--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \`
			`--config=monolithic \`
			`--config=cuda \`
			`-c opt \`
			`--copt=-O3 \`
			`--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \`
			`--copt=-mtune=generic \`
			`--copt=-march=x86-64 \`
			`--copt=-msse \`
			`--copt=-msse2 \`
			`--copt=-msse3 \`
			`--copt=-msse4.1 \`
			`--copt=-msse4.2 \`
			`--copt=-mavx \`
			`--copt=-fvisibility=hidden \`
Revert "Merge branch 'rename-real'" This reverts commit ae9fdb183ec6eb422635c0e3a44c0c2ee5732224, reversing changes made to 2eb75b62064ac30c1c537f4174d00b6e521042c5. 2020-08-25 16:37:58 +03:00			`//native_client:libdeepspeech.so \`
Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00			`--verbose_failures \`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} && \`
			`cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ && \`
			`rm -fr /root/.cache/*`
added Dockerfile 2018-04-26 23:25:13 +03:00
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`RUN cd /DeepSpeech/native_client && make NUM_PROCESSES=$(nproc) deepspeech ; \`
			`cd /DeepSpeech/native_client/python && make NUM_PROCESSES=$(nproc) bindings; \`
			`pip3 install --upgrade dist/*.whl; \`
			`cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings; \`
			`pip3 install --upgrade dist/*.whl`
added Dockerfile 2018-04-26 23:25:13 +03:00
			`# << END Build and bind`

Use KenLM submodule 2021-04-07 23:34:43 +03:00			`# Build KenLM in /DeepSpeech/kenlm folder`
			`WORKDIR /DeepSpeech/kenlm`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 \| tar xj; \`
			`mkdir -p build && \`
Use KenLM submodule 2021-04-07 23:34:43 +03:00			`cd build && \`
			`EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \`
			`make -j $(nproc)`
added Dockerfile 2018-04-26 23:25:13 +03:00
			`# Done`
Revert "Merge pull request #3246 from lissyx/fix-docker" This reverts commit c01fda56c058779cc9dba952ce940c47398c4ed3, reversing changes made to 3e99b0d8b2b2d6e47c8ff7eb1dfd9a88eba8e6d8. 2020-08-25 16:35:03 +03:00			`WORKDIR /DeepSpeech`