Skip to content

Commit acecbc3

Browse files
author
Alexandre Lissy
committed
Optimize a bit Docker
1 parent ab134af commit acecbc3

File tree

4 files changed

+101
-131
lines changed

4 files changed

+101
-131
lines changed

.github/workflows/docker.yml

+6-3
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,14 @@ jobs:
1414
steps:
1515
- uses: actions/checkout@v2
1616
with:
17-
fetch-depth: 0
18-
submodules: 'recursive'
17+
fetch-depth: 1
1918
- run: |
2019
make Dockerfile.${{ matrix.template }} \
2120
DEEPSPEECH_REPO=https://github.com/${{ github.repository }} \
2221
DEEPSPEECH_SHA=${{ github.sha }}
2322
- run: |
24-
docker build -t app:${{ matrix.template }} -f Dockerfile.${{ matrix.template }} .
23+
mkdir /tmp/empty
24+
- run: |
25+
cd /tmp/empty; docker build -t app:${{ matrix.template }} -f ${{ github.workspace }}/Dockerfile.${{ matrix.template }} .
26+
- run: |
27+
docker save app:${{ matrix.template}} | zstd -o app_${{ matrix.template }}.zstd

Dockerfile.build.tmpl

+67-90
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
# Need devel version cause we need /usr/include/cudnn.h
44
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
55

6-
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
7-
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
6+
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
7+
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
88

99
# >> START Install base software
1010

@@ -39,62 +39,59 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
3939
sox \
4040
unzip \
4141
wget \
42-
zlib1g-dev
43-
44-
RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
45-
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
46-
47-
# Install Bazel
48-
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb"
49-
RUN dpkg -i bazel_*.deb
50-
51-
# Try and free some space
52-
RUN rm -rf /var/lib/apt/lists/*
42+
zlib1g-dev; \
43+
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
44+
update-alternatives --install /usr/bin/python python /usr/bin/python3 1; \
45+
# Install Bazel \
46+
curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb" && dpkg -i bazel_*.deb; \
47+
# Try and free some space \
48+
rm -rf /var/lib/apt/lists/* bazel_*.deb
5349

5450
# << END Install base software
5551

5652
# >> START Configure Tensorflow Build
5753

5854
# GPU Environment Setup
59-
ENV TF_NEED_ROCM 0
60-
ENV TF_NEED_OPENCL_SYCL 0
61-
ENV TF_NEED_OPENCL 0
62-
ENV TF_NEED_CUDA 1
63-
ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
64-
ENV TF_CUDA_VERSION 10.1
65-
ENV TF_CUDNN_VERSION 7.6
66-
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
67-
ENV TF_NCCL_VERSION 2.8
68-
69-
# Common Environment Setup
70-
ENV TF_BUILD_CONTAINER_TYPE GPU
71-
ENV TF_BUILD_OPTIONS OPT
72-
ENV TF_BUILD_DISABLE_GCP 1
73-
ENV TF_BUILD_ENABLE_XLA 0
74-
ENV TF_BUILD_PYTHON_VERSION PYTHON3
75-
ENV TF_BUILD_IS_OPT OPT
76-
ENV TF_BUILD_IS_PIP PIP
77-
78-
# Other Parameters
79-
ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma
80-
ENV TF_NEED_GCP 0
81-
ENV TF_NEED_HDFS 0
82-
ENV TF_NEED_JEMALLOC 1
83-
ENV TF_NEED_OPENCL 0
84-
ENV TF_CUDA_CLANG 0
85-
ENV TF_NEED_MKL 0
86-
ENV TF_ENABLE_XLA 0
87-
ENV TF_NEED_AWS 0
88-
ENV TF_NEED_KAFKA 0
89-
ENV TF_NEED_NGRAPH 0
90-
ENV TF_DOWNLOAD_CLANG 0
91-
ENV TF_NEED_TENSORRT 0
92-
ENV TF_NEED_GDR 0
93-
ENV TF_NEED_VERBS 0
94-
ENV TF_NEED_OPENCL_SYCL 0
95-
96-
ENV PYTHON_BIN_PATH /usr/bin/python3.6
97-
ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
55+
ENV TF_NEED_ROCM=0 \
56+
TF_NEED_OPENCL_SYCL=0 \
57+
TF_NEED_OPENCL=0 \
58+
TF_NEED_CUDA=1 \
59+
TF_CUDA_PATHS="/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" \
60+
TF_CUDA_VERSION=10.1 \
61+
TF_CUDNN_VERSION=7.6 \
62+
TF_CUDA_COMPUTE_CAPABILITIES=6.0 \
63+
TF_NCCL_VERSION=2.8 \
64+
# Common Environment Setup \
65+
TF_BUILD_CONTAINER_TYPE=GPU \
66+
TF_BUILD_OPTIONS=OPT \
67+
TF_BUILD_DISABLE_GCP=1 \
68+
TF_BUILD_ENABLE_XLA=0 \
69+
TF_BUILD_PYTHON_VERSION=PYTHON3 \
70+
TF_BUILD_IS_OPT=OPT \
71+
TF_BUILD_IS_PIP=PIP \
72+
# Build client.cc and install Python client and decoder bindings \
73+
TFDIR=/DeepSpeech/tensorflow \
74+
# Allow Python printing utf-8 \
75+
PYTHONIOENCODING=UTF-8 \
76+
# Other Parameters \
77+
CC_OPT_FLAGS="-mavx -mavx2 -msse4.1 -msse4.2 -mfma" \
78+
TF_NEED_GCP=0 \
79+
TF_NEED_HDFS=0 \
80+
TF_NEED_JEMALLOC=1 \
81+
TF_NEED_OPENCL=0 \
82+
TF_CUDA_CLANG=0 \
83+
TF_NEED_MKL=0 \
84+
TF_ENABLE_XLA=0 \
85+
TF_NEED_AWS=0 \
86+
TF_NEED_KAFKA=0 \
87+
TF_NEED_NGRAPH=0 \
88+
TF_DOWNLOAD_CLANG=0 \
89+
TF_NEED_TENSORRT=0 \
90+
TF_NEED_GDR=0 \
91+
TF_NEED_VERBS=0 \
92+
TF_NEED_OPENCL_SYCL=0 \
93+
PYTHON_BIN_PATH=/usr/bin/python3.6 \
94+
PYTHON_LIB_PATH=/usr/local/lib/python3.6/dist-packages
9895

9996
# << END Configure Tensorflow Build
10097

@@ -103,37 +100,31 @@ ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
103100
# Running bazel inside a `docker build` command causes trouble, cf:
104101
# https://github.com/bazelbuild/bazel/issues/134
105102
# The easiest solution is to set up a bazelrc file forcing --batch.
106-
RUN echo "startup --batch" >>/etc/bazel.bazelrc
107103
# Similarly, we need to workaround sandboxing issues:
108104
# https://github.com/bazelbuild/bazel/issues/418
109-
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
110-
>>/etc/bazel.bazelrc
105+
RUN echo "startup --batch" >>/etc/bazel.bazelrc; \
106+
echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >> /etc/bazel.bazelrc
111107

112108
# << END Configure Bazel
113109

114110
WORKDIR /
115111

116-
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech
117-
WORKDIR /DeepSpeech
118-
RUN git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA
119-
RUN git submodule sync tensorflow/ && git submodule update --init tensorflow/
120-
RUN git submodule sync kenlm/ && git submodule update --init kenlm/
112+
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech && \
113+
cd /DeepSpeech && \
114+
git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA; \
115+
git submodule sync tensorflow/ && git submodule update --init tensorflow/; \
116+
git submodule sync kenlm/ && git submodule update --init kenlm/
121117

122118
# >> START Build and bind
123-
124-
WORKDIR /DeepSpeech/tensorflow
125-
126119
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
127-
RUN ./configure
128-
129120
# Using CPU optimizations:
130121
# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.
131122
# Adding --config=cuda flag to build using CUDA.
132123

133124
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
134125

135126
# Build DeepSpeech
136-
RUN bazel build \
127+
RUN cd /DeepSpeech/tensorflow && ./configure && bazel build \
137128
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
138129
--config=monolithic \
139130
--config=cuda \
@@ -151,36 +142,22 @@ RUN bazel build \
151142
--copt=-fvisibility=hidden \
152143
//native_client:libdeepspeech.so \
153144
--verbose_failures \
154-
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
145+
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} && \
146+
cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ && \
147+
rm -fr /root/.cache/*
155148

156-
# Copy built libs to /DeepSpeech/native_client
157-
RUN cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
158-
159-
# Build client.cc and install Python client and decoder bindings
160-
ENV TFDIR /DeepSpeech/tensorflow
161-
162-
RUN nproc
163-
164-
WORKDIR /DeepSpeech/native_client
165-
RUN make NUM_PROCESSES=$(nproc) deepspeech
166-
167-
WORKDIR /DeepSpeech
168-
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
169-
RUN pip3 install --upgrade native_client/python/dist/*.whl
170-
171-
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
172-
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
149+
RUN cd /DeepSpeech/native_client && make NUM_PROCESSES=$(nproc) deepspeech ; \
150+
cd /DeepSpeech/native_client/python && make NUM_PROCESSES=$(nproc) bindings; \
151+
pip3 install --upgrade dist/*.whl; \
152+
cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings; \
153+
pip3 install --upgrade dist/*.whl
173154

174155
# << END Build and bind
175156

176-
# Allow Python printing utf-8
177-
ENV PYTHONIOENCODING UTF-8
178-
179157
# Build KenLM in /DeepSpeech/kenlm folder
180158
WORKDIR /DeepSpeech/kenlm
181-
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj
182-
RUN ls -hal
183-
RUN mkdir -p build && \
159+
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj; \
160+
mkdir -p build && \
184161
cd build && \
185162
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
186163
make -j $(nproc)

Dockerfile.train.tmpl

+27-37
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
22

33
FROM tensorflow/tensorflow:1.15.4-gpu-py3
4-
ENV DEBIAN_FRONTEND=noninteractive
5-
6-
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
7-
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
4+
ENV DEBIAN_FRONTEND=noninteractive \
5+
DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
6+
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
87

98
RUN apt-get update && apt-get install -y --no-install-recommends \
109
apt-utils \
@@ -20,48 +19,39 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
2019
python3-venv \
2120
unzip \
2221
xz-utils \
23-
wget
24-
25-
# We need to remove it because it's breaking deepspeech install later with
26-
# weird errors about setuptools
27-
RUN apt-get purge -y python3-xdg
28-
29-
# Install dependencies for audio augmentation
30-
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
31-
32-
# Try and free some space
33-
RUN rm -rf /var/lib/apt/lists/*
22+
wget && \
23+
# We need to remove it because it's breaking deepspeech install later with \
24+
# weird errors about setuptools \
25+
apt-get purge -y python3-xdg && \
26+
# Install dependencies for audio augmentation \
27+
apt-get install -y --no-install-recommends libopus0 libsndfile1 && \
28+
# Try and free some space \
29+
rm -rf /var/lib/apt/lists/*
3430

3531
WORKDIR /
36-
RUN git clone $DEEPSPEECH_REPO DeepSpeech
37-
38-
WORKDIR /DeepSpeech
39-
RUN git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA
40-
RUN git submodule sync kenlm/ && git submodule update --init kenlm/
32+
RUN git clone $DEEPSPEECH_REPO DeepSpeech && \
33+
cd /DeepSpeech && git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA && \
34+
git submodule sync kenlm/ && git submodule update --init kenlm/
4135

4236
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
43-
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
44-
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
37+
RUN cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings && \
38+
pip3 install --upgrade dist/*.whl
4539

4640
# Prepare deps
47-
RUN pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0
48-
49-
# Install DeepSpeech
50-
# - No need for the decoder since we did it earlier
51-
# - There is already correct TensorFlow GPU installed on the base image,
52-
# we don't want to break that
53-
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
54-
55-
# Tool to convert output graph for inference
56-
RUN curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format
57-
58-
RUN chmod +x convert_graphdef_memmapped_format
41+
RUN cd /DeepSpeech && pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0 && \
42+
# Install DeepSpeech \
43+
# - No need for the decoder since we did it earlier \
44+
# - There is already correct TensorFlow GPU installed on the base image, \
45+
# we don't want to break that \
46+
DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . && \
47+
# Tool to convert output graph for inference \
48+
curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format && \
49+
chmod +x convert_graphdef_memmapped_format
5950

6051
# Build KenLM to generate new scorers
6152
WORKDIR /DeepSpeech/kenlm
62-
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj
63-
RUN ls -hal
64-
RUN mkdir -p build && \
53+
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj && \
54+
mkdir -p build && \
6555
cd build && \
6656
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
6757
make -j $(nproc)

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
2-
DEEPSPEECH_SHA ?= origin/master
2+
DEEPSPEECH_SHA ?= master
33

44
Dockerfile%: Dockerfile%.tmpl
55
sed \

0 commit comments

Comments
 (0)