2020-06-02 22:23:20 +03:00
|
|
|
# Please refer to the USING documentation, "Dockerfile for building from source"
|
|
|
|
|
2018-04-26 23:25:13 +03:00
|
|
|
# Need devel version cause we need /usr/include/cudnn.h
|
2020-06-02 15:06:38 +03:00
|
|
|
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
|
2018-04-26 23:25:13 +03:00
|
|
|
|
2021-04-08 23:21:24 +03:00
|
|
|
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
|
|
|
|
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
|
2018-04-26 23:25:13 +03:00
|
|
|
|
|
|
|
# >> START Install base software
|
|
|
|
|
|
|
|
# Get basic packages
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
2021-04-07 23:34:43 +03:00
|
|
|
apt-utils \
|
|
|
|
bash-completion \
|
|
|
|
build-essential \
|
|
|
|
ca-certificates \
|
|
|
|
cmake \
|
|
|
|
curl \
|
|
|
|
g++ \
|
|
|
|
gcc \
|
|
|
|
git \
|
|
|
|
libbz2-dev \
|
|
|
|
libboost-all-dev \
|
|
|
|
libgsm1-dev \
|
|
|
|
libltdl-dev \
|
|
|
|
liblzma-dev \
|
|
|
|
libmagic-dev \
|
|
|
|
libpng-dev \
|
|
|
|
libsox-fmt-mp3 \
|
|
|
|
libsox-dev \
|
|
|
|
locales \
|
|
|
|
openjdk-8-jdk \
|
|
|
|
pkg-config \
|
|
|
|
python3 \
|
|
|
|
python3-dev \
|
|
|
|
python3-pip \
|
|
|
|
python3-wheel \
|
|
|
|
python3-numpy \
|
|
|
|
sox \
|
|
|
|
unzip \
|
|
|
|
wget \
|
2021-04-08 23:21:24 +03:00
|
|
|
zlib1g-dev; \
|
|
|
|
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
|
|
|
|
update-alternatives --install /usr/bin/python python /usr/bin/python3 1; \
|
|
|
|
# Install Bazel \
|
|
|
|
curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb" && dpkg -i bazel_*.deb; \
|
|
|
|
# Try and free some space \
|
|
|
|
rm -rf /var/lib/apt/lists/* bazel_*.deb
|
2020-07-15 14:46:13 +03:00
|
|
|
|
2018-04-26 23:25:13 +03:00
|
|
|
# << END Install base software
|
|
|
|
|
|
|
|
# >> START Configure Tensorflow Build
|
|
|
|
|
|
|
|
# GPU Environment Setup
|
2021-04-08 23:21:24 +03:00
|
|
|
ENV TF_NEED_ROCM=0 \
|
|
|
|
TF_NEED_OPENCL_SYCL=0 \
|
|
|
|
TF_NEED_OPENCL=0 \
|
|
|
|
TF_NEED_CUDA=1 \
|
|
|
|
TF_CUDA_PATHS="/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" \
|
|
|
|
TF_CUDA_VERSION=10.1 \
|
|
|
|
TF_CUDNN_VERSION=7.6 \
|
|
|
|
TF_CUDA_COMPUTE_CAPABILITIES=6.0 \
|
|
|
|
TF_NCCL_VERSION=2.8 \
|
|
|
|
# Common Environment Setup \
|
|
|
|
TF_BUILD_CONTAINER_TYPE=GPU \
|
|
|
|
TF_BUILD_OPTIONS=OPT \
|
|
|
|
TF_BUILD_DISABLE_GCP=1 \
|
|
|
|
TF_BUILD_ENABLE_XLA=0 \
|
|
|
|
TF_BUILD_PYTHON_VERSION=PYTHON3 \
|
|
|
|
TF_BUILD_IS_OPT=OPT \
|
|
|
|
TF_BUILD_IS_PIP=PIP \
|
|
|
|
# Build client.cc and install Python client and decoder bindings \
|
|
|
|
TFDIR=/DeepSpeech/tensorflow \
|
|
|
|
# Allow Python printing utf-8 \
|
|
|
|
PYTHONIOENCODING=UTF-8 \
|
|
|
|
# Other Parameters \
|
|
|
|
CC_OPT_FLAGS="-mavx -mavx2 -msse4.1 -msse4.2 -mfma" \
|
|
|
|
TF_NEED_GCP=0 \
|
|
|
|
TF_NEED_HDFS=0 \
|
|
|
|
TF_NEED_JEMALLOC=1 \
|
|
|
|
TF_NEED_OPENCL=0 \
|
|
|
|
TF_CUDA_CLANG=0 \
|
|
|
|
TF_NEED_MKL=0 \
|
|
|
|
TF_ENABLE_XLA=0 \
|
|
|
|
TF_NEED_AWS=0 \
|
|
|
|
TF_NEED_KAFKA=0 \
|
|
|
|
TF_NEED_NGRAPH=0 \
|
|
|
|
TF_DOWNLOAD_CLANG=0 \
|
|
|
|
TF_NEED_TENSORRT=0 \
|
|
|
|
TF_NEED_GDR=0 \
|
|
|
|
TF_NEED_VERBS=0 \
|
|
|
|
TF_NEED_OPENCL_SYCL=0 \
|
|
|
|
PYTHON_BIN_PATH=/usr/bin/python3.6 \
|
|
|
|
PYTHON_LIB_PATH=/usr/local/lib/python3.6/dist-packages
|
2018-04-26 23:25:13 +03:00
|
|
|
|
|
|
|
# << END Configure Tensorflow Build
|
|
|
|
|
|
|
|
# >> START Configure Bazel
|
|
|
|
|
|
|
|
# Running bazel inside a `docker build` command causes trouble, cf:
|
|
|
|
# https://github.com/bazelbuild/bazel/issues/134
|
|
|
|
# The easiest solution is to set up a bazelrc file forcing --batch.
|
|
|
|
# Similarly, we need to workaround sandboxing issues:
|
|
|
|
# https://github.com/bazelbuild/bazel/issues/418
|
2021-04-08 23:21:24 +03:00
|
|
|
RUN echo "startup --batch" >>/etc/bazel.bazelrc; \
|
|
|
|
echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >> /etc/bazel.bazelrc
|
2018-04-26 23:25:13 +03:00
|
|
|
|
|
|
|
# << END Configure Bazel
|
|
|
|
|
2020-06-02 22:23:20 +03:00
|
|
|
WORKDIR /
|
2018-04-26 23:25:13 +03:00
|
|
|
|
2021-04-08 23:21:24 +03:00
|
|
|
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech && \
|
|
|
|
cd /DeepSpeech && \
|
|
|
|
git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA; \
|
|
|
|
git submodule sync tensorflow/ && git submodule update --init tensorflow/; \
|
|
|
|
git submodule sync kenlm/ && git submodule update --init kenlm/
|
2018-04-26 23:25:13 +03:00
|
|
|
|
|
|
|
# >> START Build and bind
|
2018-09-25 09:46:27 +03:00
|
|
|
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
|
2018-05-02 13:45:06 +03:00
|
|
|
# Using CPU optimizations:
|
|
|
|
# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.
|
|
|
|
# Adding --config=cuda flag to build using CUDA.
|
|
|
|
|
2018-04-26 23:25:13 +03:00
|
|
|
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
|
|
|
|
|
2020-08-25 16:35:03 +03:00
|
|
|
# Build DeepSpeech
|
2021-04-08 23:21:24 +03:00
|
|
|
RUN cd /DeepSpeech/tensorflow && ./configure && bazel build \
|
2020-06-02 22:23:20 +03:00
|
|
|
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
|
|
|
|
--config=monolithic \
|
|
|
|
--config=cuda \
|
|
|
|
-c opt \
|
|
|
|
--copt=-O3 \
|
|
|
|
--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
|
|
|
|
--copt=-mtune=generic \
|
|
|
|
--copt=-march=x86-64 \
|
|
|
|
--copt=-msse \
|
|
|
|
--copt=-msse2 \
|
|
|
|
--copt=-msse3 \
|
|
|
|
--copt=-msse4.1 \
|
|
|
|
--copt=-msse4.2 \
|
|
|
|
--copt=-mavx \
|
|
|
|
--copt=-fvisibility=hidden \
|
2020-08-25 16:37:58 +03:00
|
|
|
//native_client:libdeepspeech.so \
|
2020-06-02 22:23:20 +03:00
|
|
|
--verbose_failures \
|
2021-04-08 23:21:24 +03:00
|
|
|
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} && \
|
|
|
|
cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ && \
|
|
|
|
rm -fr /root/.cache/*
|
2018-04-26 23:25:13 +03:00
|
|
|
|
2021-04-08 23:21:24 +03:00
|
|
|
RUN cd /DeepSpeech/native_client && make NUM_PROCESSES=$(nproc) deepspeech ; \
|
|
|
|
cd /DeepSpeech/native_client/python && make NUM_PROCESSES=$(nproc) bindings; \
|
|
|
|
pip3 install --upgrade dist/*.whl; \
|
|
|
|
cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings; \
|
|
|
|
pip3 install --upgrade dist/*.whl
|
2018-04-26 23:25:13 +03:00
|
|
|
|
|
|
|
# << END Build and bind
|
|
|
|
|
2021-04-07 23:34:43 +03:00
|
|
|
# Build KenLM in /DeepSpeech/kenlm folder
|
|
|
|
WORKDIR /DeepSpeech/kenlm
|
2021-04-08 23:21:24 +03:00
|
|
|
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj; \
|
|
|
|
mkdir -p build && \
|
2021-04-07 23:34:43 +03:00
|
|
|
cd build && \
|
|
|
|
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
|
|
|
|
make -j $(nproc)
|
2018-04-26 23:25:13 +03:00
|
|
|
|
|
|
|
# Done
|
2020-08-25 16:35:03 +03:00
|
|
|
WORKDIR /DeepSpeech
|