Skip to content

Commit ab435d4

Browse files
committed
ngc-pytorch: add .dockerignore and optimize Dockerfile
Add vendor/ngc-pytorch/.dockerignore and refactor the Dockerfile to reduce image size and layers. Changes include chaining apt commands, running apt-get clean and removing /var/lib/apt/lists, consolidating Python package installs (requirements + datasets + mpi4py + mlflow), removing temporary build artifacts (/tmp, OpenBLAS sources, pip wheel/requirement leftovers), removing Python __pycache__ directories, combining git-lfs and bashtop installation steps, adding additional cleanup after JupyterLab build, and small housekeeping tweaks (sed/terminfo). These edits aim to produce a smaller, cleaner build image with fewer intermediate layers.
1 parent 55607c6 commit ab435d4

2 files changed

Lines changed: 45 additions & 37 deletions

File tree

vendor/ngc-pytorch/.dockerignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Exclude unnecessary files from Docker build context
2+
*.md
3+
LICENSE
4+
.git
5+
.gitignore
6+
.dockerignore

vendor/ngc-pytorch/Dockerfile.25.12-pytorch2.10-py312-cuda13.1

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ RUN dpkgArch="$(dpkg --print-architecture)"; \
2222
CPLUS_INCLUDE_PATH=/usr/include/gdal \
2323
C_INCLUDE_PATH=/usr/include/gdal
2424

25-
RUN apt-key adv --refresh-keys --keyserver keyserver.ubuntu.com
26-
RUN apt-get update
27-
RUN apt-get install -y --no-install-recommends \
25+
RUN apt-key adv --refresh-keys --keyserver keyserver.ubuntu.com && \
26+
apt-get update && \
27+
apt-get install -y --no-install-recommends \
2828
automake \
2929
bison \
3030
build-essential \
@@ -159,7 +159,9 @@ RUN apt-get install -y --no-install-recommends \
159159
yasm \
160160
zip \
161161
tcl \
162-
udev
162+
udev && \
163+
apt-get clean && \
164+
rm -rf /var/lib/apt/lists/*
163165

164166
RUN dpkgArch="$(dpkg --print-architecture)"; \
165167
case "${dpkgArch##*-}" in \
@@ -184,7 +186,9 @@ RUN cd /tmp && \
184186
apt-get update && apt-get install -y nodejs && \
185187
npm install -g corepack && \
186188
corepack enable && \
187-
corepack prepare yarn@stable --activate
189+
corepack prepare yarn@stable --activate && \
190+
apt-get clean && \
191+
rm -rf /var/lib/apt/lists/*
188192

189193
# Install CUDA + cuDNN
190194
RUN dpkgArch="$(dpkg --print-architecture)"; \
@@ -208,12 +212,12 @@ RUN dpkgArch="$(dpkg --print-architecture)"; \
208212
git clone -q --branch=v0.3.30 https://github.com/OpenMathLib/OpenBLAS.git && \
209213
cd OpenBLAS && \
210214
make TARGET=${openblasTarget} CROSS=${crossCompile} ${EXTRA_FLAGS} NO_AFFINITY=1 NUM_THREADS=48 FC=gfortran && \
211-
make install
215+
make install && \
216+
rm -rf /tmp/OpenBLAS
212217

213-
# install git-lfs
218+
# install git-lfs + bashtop
214219
WORKDIR /tmp
215-
RUN cd /tmp && \
216-
dpkgArch="$(dpkg --print-architecture)"; \
220+
RUN dpkgArch="$(dpkg --print-architecture)"; \
217221
case "${dpkgArch##*-}" in \
218222
amd64) tarArch='amd64'; dirArch='x64';; \
219223
arm64) tarArch='arm64'; dirArch='aarch64';; \
@@ -223,11 +227,9 @@ RUN cd /tmp && \
223227
curl -sLO "https://github.com/git-lfs/git-lfs/releases/download/v${GIT_LFS_VERSION}/git-lfs-linux-${tarArch}-v${GIT_LFS_VERSION}.tar.gz" && \
224228
tar -zxf "git-lfs-linux-${tarArch}-v${GIT_LFS_VERSION}.tar.gz" && \
225229
cd /tmp/git-lfs-${GIT_LFS_VERSION} && \
226-
bash install.sh
227-
228-
# install bashtop
229-
RUN cd /tmp && \
230-
git clone https://github.com/aristocratos/bashtop.git && \
230+
bash install.sh && \
231+
cd /tmp && \
232+
git clone https://github.com/aristocratos/bashtop.git && \
231233
cd bashtop && \
232234
make install && \
233235
rm -rf /tmp/*
@@ -246,38 +248,40 @@ RUN dpkgArch="$(dpkg --print-architecture)"; \
246248
# remove hwloc-like packages (ImportError: /opt/hpcx/ucc/lib/libucc.so.1: undefined symbol issue)
247249
#RUN apt-get purge -y hwloc-nox libhwloc-plugins
248250

249-
# Python packages installation
251+
# Python packages installation (consolidated: requirements + datasets + mpi4py + mlflow)
250252
COPY ./requirements.25.12.*.txt /tmp/
251253
RUN dpkgArch="$(dpkg --print-architecture)"; \
252254
case "${dpkgArch##*-}" in \
253255
amd64) tarArch='amd64';; \
254256
arm64) tarArch='arm64';; \
255257
*) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \
256258
esac; \
257-
python3 -m pip install --disable-pip-version-check --no-cache-dir -r requirements.25.12.${tarArch}.txt
258-
259-
# install huggingface datasets
260-
WORKDIR /tmp
261-
RUN python3 -m pip install --no-cache-dir datasets
262-
263-
RUN python3 -m pip install --no-cache-dir \
264-
mpi4py==4.1.1 mlflow==3.5.0
259+
python3 -m pip install --disable-pip-version-check --no-cache-dir \
260+
-r requirements.25.12.${tarArch}.txt \
261+
datasets \
262+
mpi4py==4.1.1 mlflow==3.5.0 && \
263+
find /usr/local/lib/python3.12/dist-packages -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
264+
rm -rf /tmp/*
265265

266+
# PyTorch extensions (requires --no-build-isolation)
266267
RUN python3 -m pip install --no-build-isolation --no-cache-dir \
267268
pytorch-lightning \
268269
torch-scatter \
269270
torch-sparse \
270271
torch-cluster \
271272
torch-spline-conv \
272273
torch-geometric \
273-
torchao
274+
torchao && \
275+
find /usr/local/lib/python3.12/dist-packages -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
276+
rm -rf /tmp/*
274277

275278
WORKDIR /tmp
276279
RUN git clone --recursive -q https://github.com/bitsandbytes-foundation/bitsandbytes.git && \
277280
cd /tmp/bitsandbytes && \
278281
cmake -DCOMPUTE_BACKEND=cuda -DCMAKE_CUDA_COMPILER="/usr/local/cuda-13/bin/nvcc" -DCOMPUTE_CAPABILITY="75;80;86;87;89;90;100;103;110;120;121" -S . && \
279282
make && \
280-
python setup.py install
283+
python setup.py install && \
284+
rm -rf /tmp/*
281285

282286
# Install ipython kernelspec
283287
RUN python3 -m ipykernel install --display-name "PyTorch 2.10 (NGC 25.12/Python 3.12) on Backend.AI" && \
@@ -299,7 +303,7 @@ LABEL ai.backend.kernelspec="1" \
299303
ai.backend.runtime-path="/usr/bin/python" \
300304
ai.backend.service-ports="ipython:pty:3000,jupyter:http:8091,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080"
301305

302-
# Install Jupyterlab extensions
306+
# Install Jupyterlab extensions + build (merged with cleanup)
303307
RUN python3 -m pip install --no-cache-dir \
304308
jupyter_nbextensions_configurator>=0.6.5 \
305309
jupyter_core \
@@ -324,24 +328,22 @@ RUN python3 -m pip install --no-cache-dir \
324328
jupyter-client==8.6.3 \
325329
jupyter_bokeh==2.0.4 \
326330
markupsafe>=3.0.2 \
327-
jsonschema[format,format-nongpl]>=4.23.0
328-
329-
RUN python3 -m pip install jupyter_lsp markupsafe==3.0.2 jupyterlab_widgets && \
331+
jsonschema[format,format-nongpl]>=4.23.0 && \
332+
python3 -m pip install --no-cache-dir jupyter_lsp markupsafe==3.0.2 jupyterlab_widgets && \
330333
jupyter labextension install --no-build @jupyter-widgets/jupyterlab-manager && \
331334
jupyter labextension install --no-build @jupyter-widgets/controls && \
332335
jupyter labextension install --no-build @jupyterlab/toc-extension && \
333336
jupyter labextension install --no-build @krassowski/jupyterlab-lsp && \
334337
jupyter labextension install @jupyterlab/toc-extension && \
335-
jupyter lab build --dev-build=False --minimize=False
336-
337-
RUN apt-get autoclean && \
338-
sed -i 's/source \/usr\/local\/nvm\/nvm.sh//' /etc/bash.bashrc && \
339-
ln -sf /usr/share/terminfo/x/xterm-color /usr/share/terminfo/x/xterm-256color && \
340-
rm -f /tmp/*.whl /tmp/requirem* && \
341-
rm -rf /var/lib/apt/lists/* && \
342-
rm -rf /root/.cache && \
338+
jupyter lab build --dev-build=False --minimize=False && \
339+
find /usr/local/lib/python3.12/dist-packages -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
340+
rm -rf /usr/local/share/jupyter/lab/staging && \
341+
rm -rf /root/.cache /root/.npm && \
343342
rm -rf /tmp/*
344343

344+
RUN sed -i 's/source \/usr\/local\/nvm\/nvm.sh//' /etc/bash.bashrc && \
345+
ln -sf /usr/share/terminfo/x/xterm-color /usr/share/terminfo/x/xterm-256color
346+
345347
# change permission
346348
RUN chown root:root /usr/lib
347349

0 commit comments

Comments
 (0)