Skip to content

Commit 2f849df

Browse files
committed
add ngc-pytorch:26.03
add ngc-pytorch:26.03 - based on ubuntu24.04 - ptrhon 3.12 - pytorch 2.11 - cuda13.2
1 parent 6156d22 commit 2f849df

3 files changed

Lines changed: 906 additions & 0 deletions

File tree

Lines changed: 350 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
FROM nvcr.io/nvidia/pytorch:26.03-py3
2+
# NVIDIA NGC PyTorch 2.11 with Python 3.12
3+
4+
ARG CODE_SERVER_VERSION=4.112.0 \
5+
GIT_LFS_VERSION=3.7.1
6+
7+
ENV DEBIAN_FRONTEND=noninteractive \
8+
MPLBACKEND=Svg \
9+
PIP_IGNORE_INSTALLED=0 \
10+
PIP_BREAK_SYSTEM_PACKAGES=1 \
11+
PYTHONUNBUFFERED=1 \
12+
LANG=C.UTF-8
13+
14+
RUN dpkgArch="$(dpkg --print-architecture)"; \
15+
case "${dpkgArch##*-}" in \
16+
amd64) tarArch='amd64'; dirArch='x86_64';; \
17+
arm64) tarArch='arm64'; dirArch='aarch64';; \
18+
*) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \
19+
esac; \
20+
LD_LIBRARY_PATH="/usr/local/lib/python3.12/dist-packages/torch/lib:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/lib:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda:/usr/local/cuda/include:/usr/include/${dirArch}-linux-gnu:/opt/hpcx/ucc/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH" \
21+
PATH="/usr/local/lib/python3.12/dist-packages/torch_tensorrt/bin:/usr/local/mpi/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/tensorrt/bin" \
22+
CPLUS_INCLUDE_PATH=/usr/include/gdal \
23+
C_INCLUDE_PATH=/usr/include/gdal
24+
25+
RUN apt-key adv --refresh-keys --keyserver keyserver.ubuntu.com && \
26+
apt-get update && \
27+
apt-get install -y --no-install-recommends \
28+
automake \
29+
bison \
30+
build-essential \
31+
bzip2 \
32+
ca-certificates \
33+
cabextract \
34+
chrpath \
35+
devscripts \
36+
dh-make \
37+
dirmngr \
38+
dkms \
39+
ethtool \
40+
fakeroot \
41+
ffmpeg \
42+
flex \
43+
fonts-nanum \
44+
fonts-nanum-coding \
45+
fonts-nanum-extra \
46+
g++ \
47+
gcc \
48+
gdal-bin \
49+
gfortran \
50+
gir1.2-gtk-3.0 \
51+
git \
52+
graphviz \
53+
htop \
54+
iputils-ping \
55+
libasound2-dev \
56+
libatlas-base-dev \
57+
libavcodec-dev \
58+
libavformat-dev \
59+
libboost-dev \
60+
libboost-filesystem-dev \
61+
libboost-system-dev \
62+
libcairo2-dev \
63+
libcurl4-openssl-dev \
64+
libdc1394-dev \
65+
libeigen3-dev \
66+
libfaac-dev \
67+
libfftw3-dev \
68+
libfreetype-dev \
69+
libfuse2 \
70+
libgdal-dev \
71+
libgeos++-dev \
72+
libgeos-dev \
73+
libgflags-dev \
74+
libgirepository1.0-dev \
75+
libglib2.0-0 \
76+
libgoogle-glog-dev \
77+
libgphoto2-dev \
78+
libgstreamer-plugins-base1.0-dev \
79+
libgstreamer1.0-dev \
80+
libgtk-3-dev \
81+
libhdf5-dev \
82+
libjemalloc-dev \
83+
libjpeg-dev \
84+
liblapack-dev \
85+
libmnl0 \
86+
libmp3lame-dev \
87+
libncurses-dev \
88+
libnl-3-200 \
89+
libnl-3-dev \
90+
libnl-route-3-200 \
91+
libnl-route-3-dev \
92+
libnlopt-dev \
93+
libnuma-dev \
94+
libopenblas-dev \
95+
libopencore-amrnb-dev \
96+
libopencore-amrwb-dev \
97+
libpci3 \
98+
libpng-dev \
99+
libportmidi-dev \
100+
libproj-dev \
101+
libprotobuf-dev \
102+
libsdl2-dev \
103+
libsdl-image1.2-dev \
104+
libsdl-mixer1.2-dev \
105+
libsdl-ttf2.0-dev \
106+
libsm6 \
107+
libsmpeg-dev \
108+
libssl-dev \
109+
libswscale-dev \
110+
libswresample-dev \
111+
libtbb-dev \
112+
libtheora-dev \
113+
libtiff-dev \
114+
libv4l-dev \
115+
libvorbis-dev \
116+
libx264-dev \
117+
libxext6 \
118+
libxine2-dev \
119+
libxml2-dev \
120+
libxrender-dev \
121+
libxrender1 \
122+
libxslt1-dev \
123+
libxvidcore-dev \
124+
libyaml-dev \
125+
libzmq3-dev \
126+
lintian \
127+
lsof \
128+
make \
129+
mercurial \
130+
media-types \
131+
mailcap \
132+
ncurses-term \
133+
net-tools \
134+
openjdk-11-jdk \
135+
openssh-client \
136+
openssh-server \
137+
pbzip2 \
138+
pciutils \
139+
pdsh \
140+
pkg-config \
141+
proj-bin \
142+
protobuf-compiler \
143+
pv \
144+
python-is-python3 \
145+
python3-xkit \
146+
python3-gdal \
147+
screen-resolution-extra \
148+
sudo \
149+
subversion \
150+
swig \
151+
tk \
152+
unzip \
153+
v4l-utils \
154+
vim \
155+
x264 \
156+
xvfb \
157+
xz-utils \
158+
sudo \
159+
yasm \
160+
zip \
161+
tcl \
162+
udev && \
163+
apt-get clean && \
164+
rm -rf /var/lib/apt/lists/*
165+
166+
RUN dpkgArch="$(dpkg --print-architecture)"; \
167+
case "${dpkgArch##*-}" in \
168+
amd64) tarArch='amd64'; dirArch='x64';; \
169+
arm64) tarArch='arm64'; dirArch='-arm64';; \
170+
*) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \
171+
esac; \
172+
\
173+
mkdir -p /opt/oracle && \
174+
cd /opt/oracle && \
175+
wget https://download.oracle.com/otn_software/linux/instantclient/instantclient-basiclite-linux${dirArch}.zip && \
176+
unzip instantclient-basiclite-linux${dirArch}.zip && \
177+
rm -f instantclient-basiclite-linux${dirArch}.zip && \
178+
cd /opt/oracle/instantclient* && \
179+
rm -f *jdbc* *occi* *mysql* *README *jar uidrvci genezi adrci && \
180+
echo /opt/oracle/instantclient* > /etc/ld.so.conf.d/oracle-instantclient.conf && \
181+
ldconfig
182+
183+
# Install Node.js (LTS) + yarn
184+
RUN cd /tmp && \
185+
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
186+
apt-get update && apt-get install -y nodejs && \
187+
npm install -g corepack && \
188+
corepack enable && \
189+
corepack prepare yarn@stable --activate && \
190+
apt-get clean && \
191+
rm -rf /var/lib/apt/lists/*
192+
193+
# Install CUDA + cuDNN
194+
RUN dpkgArch="$(dpkg --print-architecture)"; \
195+
case "${dpkgArch##*-}" in \
196+
amd64) tarArch='amd64'; dirArch='x86_64';; \
197+
arm64) tarArch='arm64'; dirArch='aarch64';; \
198+
*) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \
199+
esac; \
200+
mkdir -p /usr/local/nvidia/lib && \
201+
ln -s /usr/lib/${dirArch}-linux-gnu/libcudnn.so.9.20.0 /usr/local/cuda/lib64/libcudnn.so && \
202+
ldconfig && \
203+
update-alternatives --install /usr/bin/python python /usr/bin/python3 2
204+
205+
WORKDIR /tmp
206+
RUN dpkgArch="$(dpkg --print-architecture)"; \
207+
case "${dpkgArch##*-}" in \
208+
amd64) openblasTarget='CORE2'; crossCompile='0'; EXTRA_FLAGS="DYNAMIC_ARCH=1"; ;; \
209+
arm64) openblasTarget='ARMV8'; crossCompile='1'; EXTRA_FLAGS=""; ;; \
210+
*) echo >&2 "Unsupported: $dpkgArch"; exit 1 ;; \
211+
esac; \
212+
git clone -q --branch=v0.3.30 https://github.com/OpenMathLib/OpenBLAS.git && \
213+
cd OpenBLAS && \
214+
make TARGET=${openblasTarget} CROSS=${crossCompile} ${EXTRA_FLAGS} NO_AFFINITY=1 NUM_THREADS=48 FC=gfortran && \
215+
make install && \
216+
rm -rf /tmp/OpenBLAS
217+
218+
# install git-lfs + bashtop
219+
WORKDIR /tmp
220+
RUN dpkgArch="$(dpkg --print-architecture)"; \
221+
case "${dpkgArch##*-}" in \
222+
amd64) tarArch='amd64'; dirArch='x64';; \
223+
arm64) tarArch='arm64'; dirArch='aarch64';; \
224+
*) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \
225+
esac; \
226+
\
227+
curl -sLO "https://github.com/git-lfs/git-lfs/releases/download/v${GIT_LFS_VERSION}/git-lfs-linux-${tarArch}-v${GIT_LFS_VERSION}.tar.gz" && \
228+
tar -zxf "git-lfs-linux-${tarArch}-v${GIT_LFS_VERSION}.tar.gz" && \
229+
cd /tmp/git-lfs-${GIT_LFS_VERSION} && \
230+
bash install.sh && \
231+
cd /tmp && \
232+
git clone https://github.com/aristocratos/bashtop.git && \
233+
cd bashtop && \
234+
make install && \
235+
rm -rf /tmp/*
236+
237+
RUN dpkgArch="$(dpkg --print-architecture)"; \
238+
case "${dpkgArch##*-}" in \
239+
amd64) tarArch='amd64'; dirArch='x64';; \
240+
arm64) tarArch='arm64'; dirArch='aarch64';; \
241+
*) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \
242+
esac; \
243+
curl -fL https://github.com/cdr/code-server/releases/download/v${CODE_SERVER_VERSION}/code-server-${CODE_SERVER_VERSION}-linux-${tarArch}.tar.gz \
244+
| tar -C /usr/local/lib -xz && \
245+
mv /usr/local/lib/code-server-${CODE_SERVER_VERSION}-linux-${tarArch} /usr/local/lib/code-server-${CODE_SERVER_VERSION} && \
246+
ln -s /usr/local/lib/code-server-${CODE_SERVER_VERSION}/bin/code-server /usr/local/bin/code-server
247+
248+
# remove hwloc-like packages (ImportError: /opt/hpcx/ucc/lib/libucc.so.1: undefined symbol issue)
249+
#RUN apt-get purge -y hwloc-nox libhwloc-plugins
250+
251+
# Python packages installation (consolidated: requirements + datasets + mpi4py + mlflow)
252+
COPY ./requirements.26.03.*.txt /tmp/
253+
RUN dpkgArch="$(dpkg --print-architecture)"; \
254+
case "${dpkgArch##*-}" in \
255+
amd64) tarArch='amd64';; \
256+
arm64) tarArch='arm64';; \
257+
*) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \
258+
esac; \
259+
python3 -m pip install --disable-pip-version-check --no-cache-dir \
260+
-r requirements.26.03.${tarArch}.txt \
261+
datasets \
262+
mpi4py==4.1.1 mlflow==3.5.0 && \
263+
find /usr/local/lib/python3.12/dist-packages -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
264+
rm -rf /tmp/*
265+
266+
# PyTorch extensions (requires --no-build-isolation)
267+
RUN python3 -m pip install --no-build-isolation --no-cache-dir \
268+
pytorch-lightning \
269+
torch-scatter \
270+
torch-sparse \
271+
torch-cluster \
272+
torch-spline-conv \
273+
torch-geometric \
274+
torchao && \
275+
find /usr/local/lib/python3.12/dist-packages -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
276+
rm -rf /tmp/*
277+
278+
WORKDIR /tmp
279+
RUN git clone --recursive -q https://github.com/bitsandbytes-foundation/bitsandbytes.git && \
280+
cd /tmp/bitsandbytes && \
281+
cmake -DCOMPUTE_BACKEND=cuda -DCMAKE_CUDA_COMPILER="/usr/local/cuda-13/bin/nvcc" -DCOMPUTE_CAPABILITY="75;80;86;87;89;90;100;103;110;120;121" -S . && \
282+
make && \
283+
python setup.py install && \
284+
rm -rf /tmp/*
285+
286+
# Install ipython kernelspec
287+
RUN python3 -m ipykernel install --display-name "PyTorch 2.11 (NGC 26.03/Python 3.12) on Backend.AI" && \
288+
cat /usr/local/share/jupyter/kernels/python3/kernel.json
289+
# Backend.AI specifics
290+
COPY ./service-defs /etc/backend.ai/service-defs
291+
COPY ./runner-scripts/bootstrap.sh runner-scripts/setup_multinode.py /opt/container/
292+
293+
LABEL ai.backend.kernelspec="1" \
294+
ai.backend.envs.corecount="OPENBLAS_NUM_THREADS,OMP_NUM_THREADS,NPROC" \
295+
ai.backend.features="batch query uid-match user-input" \
296+
ai.backend.base-distro="ubuntu24.04" \
297+
ai.backend.accelerators="cuda" \
298+
ai.backend.resource.min.cpu="1" \
299+
ai.backend.resource.min.mem="1g" \
300+
ai.backend.resource.min.cuda.device=0 \
301+
ai.backend.resource.min.cuda.shares=0 \
302+
ai.backend.runtime-type="python" \
303+
ai.backend.runtime-path="/usr/bin/python" \
304+
ai.backend.service-ports="ipython:pty:3000,jupyter:http:8091,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080"
305+
306+
# Install Jupyterlab extensions + build (merged with cleanup)
307+
RUN python3 -m pip install --no-cache-dir \
308+
jupyter_nbextensions_configurator>=0.6.5 \
309+
jupyter_core \
310+
jupyter-contrib-core>=0.4.2 \
311+
jupyter-highlight-selected-word>=0.2.0 \
312+
jupyter-server>=2.17.0 \
313+
jupyter_lsp>=2.3.0 \
314+
jupyter-dash \
315+
jupyter-js-widgets-nbextension \
316+
jupyter-latex-envs \
317+
jupyter-packaging \
318+
jupyterlab \
319+
jupyterlab-code-formatter>=3.0.2 \
320+
jupyterlab-hdf \
321+
jupyterlab-launcher \
322+
jupyterlab-widgets==3.0.16 \
323+
notebook \
324+
jupyter-server-mathjax \
325+
jupyter-server-proxy>=4.4.0 \
326+
jupyter-telemetry==0.1.0 \
327+
jupyter>=1.1.1 \
328+
jupyter-client==8.6.3 \
329+
jupyter_bokeh==2.0.4 \
330+
markupsafe>=3.0.2 \
331+
jsonschema[format,format-nongpl]>=4.23.0 && \
332+
python3 -m pip install --no-cache-dir jupyter_lsp markupsafe==3.0.2 jupyterlab_widgets && \
333+
jupyter labextension install --no-build @jupyter-widgets/jupyterlab-manager && \
334+
jupyter labextension install --no-build @jupyter-widgets/controls && \
335+
jupyter labextension install --no-build @jupyterlab/toc-extension && \
336+
jupyter labextension install --no-build @krassowski/jupyterlab-lsp && \
337+
jupyter labextension install @jupyterlab/toc-extension && \
338+
jupyter lab build --dev-build=False --minimize=False && \
339+
find /usr/local/lib/python3.12/dist-packages -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
340+
rm -rf /usr/local/share/jupyter/lab/staging && \
341+
rm -rf /root/.cache /root/.npm && \
342+
rm -rf /tmp/*
343+
344+
RUN sed -i 's/source \/usr\/local\/nvm\/nvm.sh//' /etc/bash.bashrc && \
345+
ln -sf /usr/share/terminfo/x/xterm-color /usr/share/terminfo/x/xterm-256color
346+
347+
# change permission
348+
RUN chown root:root /usr/lib
349+
350+
WORKDIR /home/work

0 commit comments

Comments
 (0)