2020-06-02 22:23:20 +03:00
|
|
|
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
|
|
|
|
|
|
|
|
FROM tensorflow/tensorflow:1.15.2-gpu-py3
|
2020-06-18 16:20:46 +03:00
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
2020-06-02 22:23:20 +03:00
|
|
|
|
2020-08-10 21:19:50 +03:00
|
|
|
ENV MOZILLA_VOICE_STT_REPO=#MOZILLA_VOICE_STT_REPO#
|
|
|
|
ENV MOZILLA_VOICE_STT_SHA=#MOZILLA_VOICE_STT_SHA#
|
2020-06-02 22:23:20 +03:00
|
|
|
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
apt-utils \
|
|
|
|
bash-completion \
|
|
|
|
build-essential \
|
2020-06-26 16:07:18 +03:00
|
|
|
cmake \
|
2020-06-02 22:23:20 +03:00
|
|
|
curl \
|
|
|
|
git \
|
2020-06-26 16:07:18 +03:00
|
|
|
libboost-all-dev \
|
2020-06-02 22:23:20 +03:00
|
|
|
libbz2-dev \
|
|
|
|
locales \
|
|
|
|
python3-venv \
|
|
|
|
unzip \
|
|
|
|
wget
|
|
|
|
|
2020-06-16 23:14:24 +03:00
|
|
|
# We need to remove it because it's breaking deepspeech install later with
|
|
|
|
# weird errors about setuptools
|
|
|
|
RUN apt-get purge -y python3-xdg
|
|
|
|
|
2020-06-18 13:24:56 +03:00
|
|
|
# Install dependencies for audio augmentation
|
|
|
|
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
|
|
|
|
|
2020-07-15 14:46:13 +03:00
|
|
|
# Try and free some space
|
|
|
|
RUN rm -rf /var/lib/apt/lists/*
|
|
|
|
|
2020-06-02 22:23:20 +03:00
|
|
|
WORKDIR /
|
2020-08-10 21:19:50 +03:00
|
|
|
RUN git clone $MOZILLA_VOICE_STT_REPO
|
2020-06-02 22:23:20 +03:00
|
|
|
|
2020-08-25 16:35:03 +03:00
|
|
|
WORKDIR /DeepSpeech
|
2020-08-10 21:19:50 +03:00
|
|
|
RUN git checkout $MOZILLA_VOICE_STT_SHA
|
2020-06-02 22:23:20 +03:00
|
|
|
|
|
|
|
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
|
|
|
|
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
|
|
|
|
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
|
|
|
|
|
|
|
|
# Prepare deps
|
|
|
|
RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3
|
|
|
|
|
2020-08-25 16:35:03 +03:00
|
|
|
# Install DeepSpeech
|
2020-06-16 23:14:24 +03:00
|
|
|
# - No need for the decoder since we did it earlier
|
|
|
|
# - There is already correct TensorFlow GPU installed on the base image,
|
|
|
|
# we don't want to break that
|
2020-06-17 16:26:31 +03:00
|
|
|
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
|
2020-06-02 22:23:20 +03:00
|
|
|
|
2020-06-18 16:20:28 +03:00
|
|
|
# Tool to convert output graph for inference
|
2020-06-18 18:26:38 +03:00
|
|
|
RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \
|
|
|
|
--artifact convert_graphdef_memmapped_format --target .
|
2020-06-18 16:20:28 +03:00
|
|
|
|
2020-06-26 16:07:18 +03:00
|
|
|
# Build KenLM to generate new scorers
|
2020-08-25 16:35:03 +03:00
|
|
|
WORKDIR /DeepSpeech/native_client
|
2020-06-26 16:07:18 +03:00
|
|
|
RUN rm -rf kenlm && \
|
|
|
|
git clone https://github.com/kpu/kenlm && \
|
|
|
|
cd kenlm && \
|
|
|
|
git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
|
|
|
|
mkdir -p build && \
|
|
|
|
cd build && \
|
|
|
|
cmake .. && \
|
|
|
|
make -j $(nproc)
|
2020-08-25 16:35:03 +03:00
|
|
|
WORKDIR /DeepSpeech
|
2020-06-26 16:07:18 +03:00
|
|
|
|
2020-06-02 22:23:20 +03:00
|
|
|
RUN ./bin/run-ldc93s1.sh
|