DeepSpeech/Dockerfile.train.tmpl

# Please refer to the TRAINING documentation, "Basic Dockerfile for training"

FROM tensorflow/tensorflow:1.15.4-gpu-py3
ENV DEBIAN_FRONTEND=noninteractive \
    DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
    DEEPSPEECH_SHA=#DEEPSPEECH_SHA#

RUN apt-get update && apt-get install -y --no-install-recommends \
    apt-utils \
    bash-completion \
    build-essential \
    cmake \
    curl \
    git \
    libboost-all-dev \
    libbz2-dev \
    liblzma-dev \
    locales \
    python3-venv \
    unzip \
    xz-utils \
    wget && \
    # We need to remove it because it's breaking deepspeech install later with \
    # weird errors about setuptools \
    apt-get purge -y python3-xdg && \
    # Install dependencies for audio augmentation \
    apt-get install -y --no-install-recommends libopus0 libsndfile1 && \
    # Try and free some space \
    rm -rf /var/lib/apt/lists/*

WORKDIR /
RUN git clone $DEEPSPEECH_REPO DeepSpeech && \
    cd /DeepSpeech && git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA && \
    git submodule sync kenlm/ && git submodule update --init kenlm/

# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
RUN cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings && \
    pip3 install --upgrade dist/*.whl

# Prepare deps
RUN cd /DeepSpeech && pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0 && \
    # Install DeepSpeech \
    #  - No need for the decoder since we did it earlier \
    #  - There is already correct TensorFlow GPU installed on the base image, \
    #    we don't want to break that \
    DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . && \
    # Tool to convert output graph for inference \
    curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format && \
    chmod +x convert_graphdef_memmapped_format

# Build KenLM to generate new scorers
WORKDIR /DeepSpeech/kenlm
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj && \
    mkdir -p build && \
    cd build && \
    EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
    make -j $(nproc)

WORKDIR /DeepSpeech

RUN ./bin/run-ldc93s1.sh
Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00			`# Please refer to the TRAINING documentation, "Basic Dockerfile for training"`

Use correct 1.15.4 docker image 2020-09-28 11:23:00 +03:00			`FROM tensorflow/tensorflow:1.15.4-gpu-py3`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`ENV DEBIAN_FRONTEND=noninteractive \`
			`DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \`
			`DEEPSPEECH_SHA=#DEEPSPEECH_SHA#`
Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00
			`RUN apt-get update && apt-get install -y --no-install-recommends \`
Use KenLM submodule 2021-04-07 23:34:43 +03:00			`apt-utils \`
			`bash-completion \`
			`build-essential \`
			`cmake \`
			`curl \`
			`git \`
			`libboost-all-dev \`
			`libbz2-dev \`
			`liblzma-dev \`
			`locales \`
			`python3-venv \`
			`unzip \`
			`xz-utils \`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`wget && \`
			`# We need to remove it because it's breaking deepspeech install later with \`
			`# weird errors about setuptools \`
			`apt-get purge -y python3-xdg && \`
			`# Install dependencies for audio augmentation \`
			`apt-get install -y --no-install-recommends libopus0 libsndfile1 && \`
			`# Try and free some space \`
			`rm -rf /var/lib/apt/lists/*`
Fix #3157: Add CircleCI config 2020-07-15 14:46:13 +03:00
Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00			`WORKDIR /`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`RUN git clone $DEEPSPEECH_REPO DeepSpeech && \`
			`cd /DeepSpeech && git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA && \`
			`git submodule sync kenlm/ && git submodule update --init kenlm/`
Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00
			`# Build CTC decoder first, to avoid clashes on incompatible versions upgrades`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`RUN cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings && \`
			`pip3 install --upgrade dist/*.whl`
Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00
			`# Prepare deps`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`RUN cd /DeepSpeech && pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0 && \`
			`# Install DeepSpeech \`
			`# - No need for the decoder since we did it earlier \`
			`# - There is already correct TensorFlow GPU installed on the base image, \`
			`# we don't want to break that \`
			`DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . && \`
			`# Tool to convert output graph for inference \`
			`curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz \| xz -d > convert_graphdef_memmapped_format && \`
			`chmod +x convert_graphdef_memmapped_format`
Install checkpoint converting tool. 2020-06-18 16:20:28 +03:00
Build kenlm in training container image. 2020-06-26 16:07:18 +03:00			`# Build KenLM to generate new scorers`
Use KenLM submodule 2021-04-07 23:34:43 +03:00			`WORKDIR /DeepSpeech/kenlm`
Optimize a bit Docker 2021-04-08 23:21:24 +03:00			`RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 \| tar xj && \`
			`mkdir -p build && \`
Use KenLM submodule 2021-04-07 23:34:43 +03:00			`cd build && \`
			`EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \`
			`make -j $(nproc)`

Revert "Merge pull request #3246 from lissyx/fix-docker" This reverts commit c01fda56c058779cc9dba952ce940c47398c4ed3, reversing changes made to 3e99b0d8b2b2d6e47c8ff7eb1dfd9a88eba8e6d8. 2020-08-25 16:35:03 +03:00			`WORKDIR /DeepSpeech`
Build kenlm in training container image. 2020-06-26 16:07:18 +03:00
Decouple Dockerfile into build and train 2020-06-02 22:23:20 +03:00			`RUN ./bin/run-ldc93s1.sh`