зеркало из https://github.com/mozilla/DeepSpeech.git
Optimize a bit Docker
This commit is contained in:
Родитель
ab134af507
Коммит
acecbc3e3e
|
@ -14,11 +14,14 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
submodules: 'recursive'
|
||||
fetch-depth: 1
|
||||
- run: |
|
||||
make Dockerfile.${{ matrix.template }} \
|
||||
DEEPSPEECH_REPO=https://github.com/${{ github.repository }} \
|
||||
DEEPSPEECH_SHA=${{ github.sha }}
|
||||
- run: |
|
||||
docker build -t app:${{ matrix.template }} -f Dockerfile.${{ matrix.template }} .
|
||||
mkdir /tmp/empty
|
||||
- run: |
|
||||
cd /tmp/empty; docker build -t app:${{ matrix.template }} -f ${{ github.workspace }}/Dockerfile.${{ matrix.template }} .
|
||||
- run: |
|
||||
docker save app:${{ matrix.template}} | zstd -o app_${{ matrix.template }}.zstd
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
# Need devel version cause we need /usr/include/cudnn.h
|
||||
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
|
||||
|
||||
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
|
||||
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
|
||||
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
|
||||
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
|
||||
|
||||
# >> START Install base software
|
||||
|
||||
|
@ -39,62 +39,59 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
sox \
|
||||
unzip \
|
||||
wget \
|
||||
zlib1g-dev
|
||||
|
||||
RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
|
||||
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
|
||||
|
||||
# Install Bazel
|
||||
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb"
|
||||
RUN dpkg -i bazel_*.deb
|
||||
|
||||
# Try and free some space
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
zlib1g-dev; \
|
||||
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
|
||||
update-alternatives --install /usr/bin/python python /usr/bin/python3 1; \
|
||||
# Install Bazel \
|
||||
curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb" && dpkg -i bazel_*.deb; \
|
||||
# Try and free some space \
|
||||
rm -rf /var/lib/apt/lists/* bazel_*.deb
|
||||
|
||||
# << END Install base software
|
||||
|
||||
# >> START Configure Tensorflow Build
|
||||
|
||||
# GPU Environment Setup
|
||||
ENV TF_NEED_ROCM 0
|
||||
ENV TF_NEED_OPENCL_SYCL 0
|
||||
ENV TF_NEED_OPENCL 0
|
||||
ENV TF_NEED_CUDA 1
|
||||
ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
|
||||
ENV TF_CUDA_VERSION 10.1
|
||||
ENV TF_CUDNN_VERSION 7.6
|
||||
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
|
||||
ENV TF_NCCL_VERSION 2.8
|
||||
|
||||
# Common Environment Setup
|
||||
ENV TF_BUILD_CONTAINER_TYPE GPU
|
||||
ENV TF_BUILD_OPTIONS OPT
|
||||
ENV TF_BUILD_DISABLE_GCP 1
|
||||
ENV TF_BUILD_ENABLE_XLA 0
|
||||
ENV TF_BUILD_PYTHON_VERSION PYTHON3
|
||||
ENV TF_BUILD_IS_OPT OPT
|
||||
ENV TF_BUILD_IS_PIP PIP
|
||||
|
||||
# Other Parameters
|
||||
ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma
|
||||
ENV TF_NEED_GCP 0
|
||||
ENV TF_NEED_HDFS 0
|
||||
ENV TF_NEED_JEMALLOC 1
|
||||
ENV TF_NEED_OPENCL 0
|
||||
ENV TF_CUDA_CLANG 0
|
||||
ENV TF_NEED_MKL 0
|
||||
ENV TF_ENABLE_XLA 0
|
||||
ENV TF_NEED_AWS 0
|
||||
ENV TF_NEED_KAFKA 0
|
||||
ENV TF_NEED_NGRAPH 0
|
||||
ENV TF_DOWNLOAD_CLANG 0
|
||||
ENV TF_NEED_TENSORRT 0
|
||||
ENV TF_NEED_GDR 0
|
||||
ENV TF_NEED_VERBS 0
|
||||
ENV TF_NEED_OPENCL_SYCL 0
|
||||
|
||||
ENV PYTHON_BIN_PATH /usr/bin/python3.6
|
||||
ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
|
||||
ENV TF_NEED_ROCM=0 \
|
||||
TF_NEED_OPENCL_SYCL=0 \
|
||||
TF_NEED_OPENCL=0 \
|
||||
TF_NEED_CUDA=1 \
|
||||
TF_CUDA_PATHS="/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" \
|
||||
TF_CUDA_VERSION=10.1 \
|
||||
TF_CUDNN_VERSION=7.6 \
|
||||
TF_CUDA_COMPUTE_CAPABILITIES=6.0 \
|
||||
TF_NCCL_VERSION=2.8 \
|
||||
# Common Environment Setup \
|
||||
TF_BUILD_CONTAINER_TYPE=GPU \
|
||||
TF_BUILD_OPTIONS=OPT \
|
||||
TF_BUILD_DISABLE_GCP=1 \
|
||||
TF_BUILD_ENABLE_XLA=0 \
|
||||
TF_BUILD_PYTHON_VERSION=PYTHON3 \
|
||||
TF_BUILD_IS_OPT=OPT \
|
||||
TF_BUILD_IS_PIP=PIP \
|
||||
# Build client.cc and install Python client and decoder bindings \
|
||||
TFDIR=/DeepSpeech/tensorflow \
|
||||
# Allow Python printing utf-8 \
|
||||
PYTHONIOENCODING=UTF-8 \
|
||||
# Other Parameters \
|
||||
CC_OPT_FLAGS="-mavx -mavx2 -msse4.1 -msse4.2 -mfma" \
|
||||
TF_NEED_GCP=0 \
|
||||
TF_NEED_HDFS=0 \
|
||||
TF_NEED_JEMALLOC=1 \
|
||||
TF_NEED_OPENCL=0 \
|
||||
TF_CUDA_CLANG=0 \
|
||||
TF_NEED_MKL=0 \
|
||||
TF_ENABLE_XLA=0 \
|
||||
TF_NEED_AWS=0 \
|
||||
TF_NEED_KAFKA=0 \
|
||||
TF_NEED_NGRAPH=0 \
|
||||
TF_DOWNLOAD_CLANG=0 \
|
||||
TF_NEED_TENSORRT=0 \
|
||||
TF_NEED_GDR=0 \
|
||||
TF_NEED_VERBS=0 \
|
||||
TF_NEED_OPENCL_SYCL=0 \
|
||||
PYTHON_BIN_PATH=/usr/bin/python3.6 \
|
||||
PYTHON_LIB_PATH=/usr/local/lib/python3.6/dist-packages
|
||||
|
||||
# << END Configure Tensorflow Build
|
||||
|
||||
|
@ -103,29 +100,23 @@ ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
|
|||
# Running bazel inside a `docker build` command causes trouble, cf:
|
||||
# https://github.com/bazelbuild/bazel/issues/134
|
||||
# The easiest solution is to set up a bazelrc file forcing --batch.
|
||||
RUN echo "startup --batch" >>/etc/bazel.bazelrc
|
||||
# Similarly, we need to workaround sandboxing issues:
|
||||
# https://github.com/bazelbuild/bazel/issues/418
|
||||
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
|
||||
>>/etc/bazel.bazelrc
|
||||
RUN echo "startup --batch" >>/etc/bazel.bazelrc; \
|
||||
echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >> /etc/bazel.bazelrc
|
||||
|
||||
# << END Configure Bazel
|
||||
|
||||
WORKDIR /
|
||||
|
||||
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech
|
||||
WORKDIR /DeepSpeech
|
||||
RUN git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA
|
||||
RUN git submodule sync tensorflow/ && git submodule update --init tensorflow/
|
||||
RUN git submodule sync kenlm/ && git submodule update --init kenlm/
|
||||
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech && \
|
||||
cd /DeepSpeech && \
|
||||
git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA; \
|
||||
git submodule sync tensorflow/ && git submodule update --init tensorflow/; \
|
||||
git submodule sync kenlm/ && git submodule update --init kenlm/
|
||||
|
||||
# >> START Build and bind
|
||||
|
||||
WORKDIR /DeepSpeech/tensorflow
|
||||
|
||||
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
|
||||
RUN ./configure
|
||||
|
||||
# Using CPU optimizations:
|
||||
# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.
|
||||
# Adding --config=cuda flag to build using CUDA.
|
||||
|
@ -133,7 +124,7 @@ RUN ./configure
|
|||
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
|
||||
|
||||
# Build DeepSpeech
|
||||
RUN bazel build \
|
||||
RUN cd /DeepSpeech/tensorflow && ./configure && bazel build \
|
||||
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
|
||||
--config=monolithic \
|
||||
--config=cuda \
|
||||
|
@ -151,36 +142,22 @@ RUN bazel build \
|
|||
--copt=-fvisibility=hidden \
|
||||
//native_client:libdeepspeech.so \
|
||||
--verbose_failures \
|
||||
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
|
||||
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} && \
|
||||
cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ && \
|
||||
rm -fr /root/.cache/*
|
||||
|
||||
# Copy built libs to /DeepSpeech/native_client
|
||||
RUN cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
|
||||
|
||||
# Build client.cc and install Python client and decoder bindings
|
||||
ENV TFDIR /DeepSpeech/tensorflow
|
||||
|
||||
RUN nproc
|
||||
|
||||
WORKDIR /DeepSpeech/native_client
|
||||
RUN make NUM_PROCESSES=$(nproc) deepspeech
|
||||
|
||||
WORKDIR /DeepSpeech
|
||||
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
|
||||
RUN pip3 install --upgrade native_client/python/dist/*.whl
|
||||
|
||||
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
|
||||
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
|
||||
RUN cd /DeepSpeech/native_client && make NUM_PROCESSES=$(nproc) deepspeech ; \
|
||||
cd /DeepSpeech/native_client/python && make NUM_PROCESSES=$(nproc) bindings; \
|
||||
pip3 install --upgrade dist/*.whl; \
|
||||
cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings; \
|
||||
pip3 install --upgrade dist/*.whl
|
||||
|
||||
# << END Build and bind
|
||||
|
||||
# Allow Python printing utf-8
|
||||
ENV PYTHONIOENCODING UTF-8
|
||||
|
||||
# Build KenLM in /DeepSpeech/kenlm folder
|
||||
WORKDIR /DeepSpeech/kenlm
|
||||
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj
|
||||
RUN ls -hal
|
||||
RUN mkdir -p build && \
|
||||
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj; \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
|
||||
make -j $(nproc)
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
|
||||
|
||||
FROM tensorflow/tensorflow:1.15.4-gpu-py3
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
|
||||
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
|
||||
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
apt-utils \
|
||||
|
@ -20,48 +19,39 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
python3-venv \
|
||||
unzip \
|
||||
xz-utils \
|
||||
wget
|
||||
|
||||
# We need to remove it because it's breaking deepspeech install later with
|
||||
# weird errors about setuptools
|
||||
RUN apt-get purge -y python3-xdg
|
||||
|
||||
# Install dependencies for audio augmentation
|
||||
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
|
||||
|
||||
# Try and free some space
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
wget && \
|
||||
# We need to remove it because it's breaking deepspeech install later with \
|
||||
# weird errors about setuptools \
|
||||
apt-get purge -y python3-xdg && \
|
||||
# Install dependencies for audio augmentation \
|
||||
apt-get install -y --no-install-recommends libopus0 libsndfile1 && \
|
||||
# Try and free some space \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /
|
||||
RUN git clone $DEEPSPEECH_REPO DeepSpeech
|
||||
|
||||
WORKDIR /DeepSpeech
|
||||
RUN git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA
|
||||
RUN git submodule sync kenlm/ && git submodule update --init kenlm/
|
||||
RUN git clone $DEEPSPEECH_REPO DeepSpeech && \
|
||||
cd /DeepSpeech && git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA && \
|
||||
git submodule sync kenlm/ && git submodule update --init kenlm/
|
||||
|
||||
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
|
||||
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
|
||||
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
|
||||
RUN cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings && \
|
||||
pip3 install --upgrade dist/*.whl
|
||||
|
||||
# Prepare deps
|
||||
RUN pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0
|
||||
|
||||
# Install DeepSpeech
|
||||
# - No need for the decoder since we did it earlier
|
||||
# - There is already correct TensorFlow GPU installed on the base image,
|
||||
# we don't want to break that
|
||||
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
|
||||
|
||||
# Tool to convert output graph for inference
|
||||
RUN curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format
|
||||
|
||||
RUN chmod +x convert_graphdef_memmapped_format
|
||||
RUN cd /DeepSpeech && pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0 && \
|
||||
# Install DeepSpeech \
|
||||
# - No need for the decoder since we did it earlier \
|
||||
# - There is already correct TensorFlow GPU installed on the base image, \
|
||||
# we don't want to break that \
|
||||
DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . && \
|
||||
# Tool to convert output graph for inference \
|
||||
curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format && \
|
||||
chmod +x convert_graphdef_memmapped_format
|
||||
|
||||
# Build KenLM to generate new scorers
|
||||
WORKDIR /DeepSpeech/kenlm
|
||||
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj
|
||||
RUN ls -hal
|
||||
RUN mkdir -p build && \
|
||||
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
|
||||
make -j $(nproc)
|
||||
|
|
2
Makefile
2
Makefile
|
@ -1,5 +1,5 @@
|
|||
DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
|
||||
DEEPSPEECH_SHA ?= origin/master
|
||||
DEEPSPEECH_SHA ?= master
|
||||
|
||||
Dockerfile%: Dockerfile%.tmpl
|
||||
sed \
|
||||
|
|
Загрузка…
Ссылка в новой задаче