This commit is contained in:
Alexandre Lissy 2021-04-08 22:21:24 +02:00
Родитель ab134af507
Коммит acecbc3e3e
4 изменённых файлов: 101 добавлений и 131 удалений

9
.github/workflows/docker.yml поставляемый
Просмотреть файл

@ -14,11 +14,14 @@ jobs:
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
submodules: 'recursive'
fetch-depth: 1
- run: |
make Dockerfile.${{ matrix.template }} \
DEEPSPEECH_REPO=https://github.com/${{ github.repository }} \
DEEPSPEECH_SHA=${{ github.sha }}
- run: |
docker build -t app:${{ matrix.template }} -f Dockerfile.${{ matrix.template }} .
mkdir /tmp/empty
- run: |
cd /tmp/empty; docker build -t app:${{ matrix.template }} -f ${{ github.workspace }}/Dockerfile.${{ matrix.template }} .
- run: |
docker save app:${{ matrix.template}} | zstd -o app_${{ matrix.template }}.zstd

Просмотреть файл

@ -3,8 +3,8 @@
# Need devel version cause we need /usr/include/cudnn.h
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
# >> START Install base software
@ -39,62 +39,59 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
sox \
unzip \
wget \
zlib1g-dev
RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
# Install Bazel
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb"
RUN dpkg -i bazel_*.deb
# Try and free some space
RUN rm -rf /var/lib/apt/lists/*
zlib1g-dev; \
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
update-alternatives --install /usr/bin/python python /usr/bin/python3 1; \
# Install Bazel \
curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb" && dpkg -i bazel_*.deb; \
# Try and free some space \
rm -rf /var/lib/apt/lists/* bazel_*.deb
# << END Install base software
# >> START Configure Tensorflow Build
# GPU Environment Setup
ENV TF_NEED_ROCM 0
ENV TF_NEED_OPENCL_SYCL 0
ENV TF_NEED_OPENCL 0
ENV TF_NEED_CUDA 1
ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
ENV TF_CUDA_VERSION 10.1
ENV TF_CUDNN_VERSION 7.6
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
ENV TF_NCCL_VERSION 2.8
# Common Environment Setup
ENV TF_BUILD_CONTAINER_TYPE GPU
ENV TF_BUILD_OPTIONS OPT
ENV TF_BUILD_DISABLE_GCP 1
ENV TF_BUILD_ENABLE_XLA 0
ENV TF_BUILD_PYTHON_VERSION PYTHON3
ENV TF_BUILD_IS_OPT OPT
ENV TF_BUILD_IS_PIP PIP
# Other Parameters
ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma
ENV TF_NEED_GCP 0
ENV TF_NEED_HDFS 0
ENV TF_NEED_JEMALLOC 1
ENV TF_NEED_OPENCL 0
ENV TF_CUDA_CLANG 0
ENV TF_NEED_MKL 0
ENV TF_ENABLE_XLA 0
ENV TF_NEED_AWS 0
ENV TF_NEED_KAFKA 0
ENV TF_NEED_NGRAPH 0
ENV TF_DOWNLOAD_CLANG 0
ENV TF_NEED_TENSORRT 0
ENV TF_NEED_GDR 0
ENV TF_NEED_VERBS 0
ENV TF_NEED_OPENCL_SYCL 0
ENV PYTHON_BIN_PATH /usr/bin/python3.6
ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
ENV TF_NEED_ROCM=0 \
TF_NEED_OPENCL_SYCL=0 \
TF_NEED_OPENCL=0 \
TF_NEED_CUDA=1 \
TF_CUDA_PATHS="/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" \
TF_CUDA_VERSION=10.1 \
TF_CUDNN_VERSION=7.6 \
TF_CUDA_COMPUTE_CAPABILITIES=6.0 \
TF_NCCL_VERSION=2.8 \
# Common Environment Setup \
TF_BUILD_CONTAINER_TYPE=GPU \
TF_BUILD_OPTIONS=OPT \
TF_BUILD_DISABLE_GCP=1 \
TF_BUILD_ENABLE_XLA=0 \
TF_BUILD_PYTHON_VERSION=PYTHON3 \
TF_BUILD_IS_OPT=OPT \
TF_BUILD_IS_PIP=PIP \
# Build client.cc and install Python client and decoder bindings \
TFDIR=/DeepSpeech/tensorflow \
# Allow Python printing utf-8 \
PYTHONIOENCODING=UTF-8 \
# Other Parameters \
CC_OPT_FLAGS="-mavx -mavx2 -msse4.1 -msse4.2 -mfma" \
TF_NEED_GCP=0 \
TF_NEED_HDFS=0 \
TF_NEED_JEMALLOC=1 \
TF_NEED_OPENCL=0 \
TF_CUDA_CLANG=0 \
TF_NEED_MKL=0 \
TF_ENABLE_XLA=0 \
TF_NEED_AWS=0 \
TF_NEED_KAFKA=0 \
TF_NEED_NGRAPH=0 \
TF_DOWNLOAD_CLANG=0 \
TF_NEED_TENSORRT=0 \
TF_NEED_GDR=0 \
TF_NEED_VERBS=0 \
TF_NEED_OPENCL_SYCL=0 \
PYTHON_BIN_PATH=/usr/bin/python3.6 \
PYTHON_LIB_PATH=/usr/local/lib/python3.6/dist-packages
# << END Configure Tensorflow Build
@ -103,29 +100,23 @@ ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
RUN echo "startup --batch" >>/etc/bazel.bazelrc
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
>>/etc/bazel.bazelrc
RUN echo "startup --batch" >>/etc/bazel.bazelrc; \
echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >> /etc/bazel.bazelrc
# << END Configure Bazel
WORKDIR /
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech
WORKDIR /DeepSpeech
RUN git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA
RUN git submodule sync tensorflow/ && git submodule update --init tensorflow/
RUN git submodule sync kenlm/ && git submodule update --init kenlm/
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech && \
cd /DeepSpeech && \
git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA; \
git submodule sync tensorflow/ && git submodule update --init tensorflow/; \
git submodule sync kenlm/ && git submodule update --init kenlm/
# >> START Build and bind
WORKDIR /DeepSpeech/tensorflow
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
RUN ./configure
# Using CPU optimizations:
# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.
# Adding --config=cuda flag to build using CUDA.
@ -133,7 +124,7 @@ RUN ./configure
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
# Build DeepSpeech
RUN bazel build \
RUN cd /DeepSpeech/tensorflow && ./configure && bazel build \
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
--config=monolithic \
--config=cuda \
@ -151,36 +142,22 @@ RUN bazel build \
--copt=-fvisibility=hidden \
//native_client:libdeepspeech.so \
--verbose_failures \
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} && \
cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ && \
rm -fr /root/.cache/*
# Copy built libs to /DeepSpeech/native_client
RUN cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
# Build client.cc and install Python client and decoder bindings
ENV TFDIR /DeepSpeech/tensorflow
RUN nproc
WORKDIR /DeepSpeech/native_client
RUN make NUM_PROCESSES=$(nproc) deepspeech
WORKDIR /DeepSpeech
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/python/dist/*.whl
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
RUN cd /DeepSpeech/native_client && make NUM_PROCESSES=$(nproc) deepspeech ; \
cd /DeepSpeech/native_client/python && make NUM_PROCESSES=$(nproc) bindings; \
pip3 install --upgrade dist/*.whl; \
cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings; \
pip3 install --upgrade dist/*.whl
# << END Build and bind
# Allow Python printing utf-8
ENV PYTHONIOENCODING UTF-8
# Build KenLM in /DeepSpeech/kenlm folder
WORKDIR /DeepSpeech/kenlm
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj
RUN ls -hal
RUN mkdir -p build && \
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj; \
mkdir -p build && \
cd build && \
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
make -j $(nproc)

Просмотреть файл

@ -1,10 +1,9 @@
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
FROM tensorflow/tensorflow:1.15.4-gpu-py3
ENV DEBIAN_FRONTEND=noninteractive
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
ENV DEBIAN_FRONTEND=noninteractive \
DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils \
@ -20,48 +19,39 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
python3-venv \
unzip \
xz-utils \
wget
# We need to remove it because it's breaking deepspeech install later with
# weird errors about setuptools
RUN apt-get purge -y python3-xdg
# Install dependencies for audio augmentation
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
# Try and free some space
RUN rm -rf /var/lib/apt/lists/*
wget && \
# We need to remove it because it's breaking deepspeech install later with \
# weird errors about setuptools \
apt-get purge -y python3-xdg && \
# Install dependencies for audio augmentation \
apt-get install -y --no-install-recommends libopus0 libsndfile1 && \
# Try and free some space \
rm -rf /var/lib/apt/lists/*
WORKDIR /
RUN git clone $DEEPSPEECH_REPO DeepSpeech
WORKDIR /DeepSpeech
RUN git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA
RUN git submodule sync kenlm/ && git submodule update --init kenlm/
RUN git clone $DEEPSPEECH_REPO DeepSpeech && \
cd /DeepSpeech && git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA && \
git submodule sync kenlm/ && git submodule update --init kenlm/
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
RUN cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings && \
pip3 install --upgrade dist/*.whl
# Prepare deps
RUN pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0
# Install DeepSpeech
# - No need for the decoder since we did it earlier
# - There is already correct TensorFlow GPU installed on the base image,
# we don't want to break that
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
# Tool to convert output graph for inference
RUN curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format
RUN chmod +x convert_graphdef_memmapped_format
RUN cd /DeepSpeech && pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0 && \
# Install DeepSpeech \
# - No need for the decoder since we did it earlier \
# - There is already correct TensorFlow GPU installed on the base image, \
# we don't want to break that \
DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . && \
# Tool to convert output graph for inference \
curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format && \
chmod +x convert_graphdef_memmapped_format
# Build KenLM to generate new scorers
WORKDIR /DeepSpeech/kenlm
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj
RUN ls -hal
RUN mkdir -p build && \
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj && \
mkdir -p build && \
cd build && \
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
make -j $(nproc)

Просмотреть файл

@ -1,5 +1,5 @@
DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
DEEPSPEECH_SHA ?= origin/master
DEEPSPEECH_SHA ?= master
Dockerfile%: Dockerfile%.tmpl
sed \