2020-06-02 22:23:20 +03:00
|
|
|
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
|
|
|
|
|
2020-09-28 11:23:00 +03:00
|
|
|
FROM tensorflow/tensorflow:1.15.4-gpu-py3
|
2021-04-08 23:21:24 +03:00
|
|
|
ENV DEBIAN_FRONTEND=noninteractive \
|
|
|
|
DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
|
|
|
|
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
|
2020-06-02 22:23:20 +03:00
|
|
|
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
2021-04-07 23:34:43 +03:00
|
|
|
apt-utils \
|
|
|
|
bash-completion \
|
|
|
|
build-essential \
|
|
|
|
cmake \
|
|
|
|
curl \
|
|
|
|
git \
|
|
|
|
libboost-all-dev \
|
|
|
|
libbz2-dev \
|
|
|
|
liblzma-dev \
|
|
|
|
locales \
|
|
|
|
python3-venv \
|
|
|
|
unzip \
|
|
|
|
xz-utils \
|
2021-04-08 23:21:24 +03:00
|
|
|
wget && \
|
|
|
|
# We need to remove it because it's breaking deepspeech install later with \
|
|
|
|
# weird errors about setuptools \
|
|
|
|
apt-get purge -y python3-xdg && \
|
|
|
|
# Install dependencies for audio augmentation \
|
|
|
|
apt-get install -y --no-install-recommends libopus0 libsndfile1 && \
|
|
|
|
# Try and free some space \
|
|
|
|
rm -rf /var/lib/apt/lists/*
|
2020-07-15 14:46:13 +03:00
|
|
|
|
2020-06-02 22:23:20 +03:00
|
|
|
WORKDIR /
|
2021-04-08 23:21:24 +03:00
|
|
|
RUN git clone $DEEPSPEECH_REPO DeepSpeech && \
|
|
|
|
cd /DeepSpeech && git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA && \
|
|
|
|
git submodule sync kenlm/ && git submodule update --init kenlm/
|
2020-06-02 22:23:20 +03:00
|
|
|
|
|
|
|
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
|
2021-04-08 23:21:24 +03:00
|
|
|
RUN cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings && \
|
|
|
|
pip3 install --upgrade dist/*.whl
|
2020-06-02 22:23:20 +03:00
|
|
|
|
|
|
|
# Prepare deps
|
2021-04-08 23:21:24 +03:00
|
|
|
RUN cd /DeepSpeech && pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0 && \
|
|
|
|
# Install DeepSpeech \
|
|
|
|
# - No need for the decoder since we did it earlier \
|
|
|
|
# - There is already correct TensorFlow GPU installed on the base image, \
|
|
|
|
# we don't want to break that \
|
|
|
|
DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . && \
|
|
|
|
# Tool to convert output graph for inference \
|
|
|
|
curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format && \
|
|
|
|
chmod +x convert_graphdef_memmapped_format
|
2020-06-18 16:20:28 +03:00
|
|
|
|
2020-06-26 16:07:18 +03:00
|
|
|
# Build KenLM to generate new scorers
|
2021-04-07 23:34:43 +03:00
|
|
|
WORKDIR /DeepSpeech/kenlm
|
2021-04-08 23:21:24 +03:00
|
|
|
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj && \
|
|
|
|
mkdir -p build && \
|
2021-04-07 23:34:43 +03:00
|
|
|
cd build && \
|
|
|
|
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
|
|
|
|
make -j $(nproc)
|
|
|
|
|
2020-08-25 16:35:03 +03:00
|
|
|
WORKDIR /DeepSpeech
|
2020-06-26 16:07:18 +03:00
|
|
|
|
2020-06-02 22:23:20 +03:00
|
|
|
RUN ./bin/run-ldc93s1.sh
|