|
| 1 | +FROM nvcr.io/nvidia/pytorch:26.03-py3 |
| 2 | +# NVIDIA NGC PyTorch 2.11 with Python 3.12 |
| 3 | + |
| 4 | +ARG CODE_SERVER_VERSION=4.112.0 \ |
| 5 | + GIT_LFS_VERSION=3.7.1 |
| 6 | + |
| 7 | +ENV DEBIAN_FRONTEND=noninteractive \ |
| 8 | + MPLBACKEND=Svg \ |
| 9 | + PIP_IGNORE_INSTALLED=0 \ |
| 10 | + PIP_BREAK_SYSTEM_PACKAGES=1 \ |
| 11 | + PYTHONUNBUFFERED=1 \ |
| 12 | + LANG=C.UTF-8 |
| 13 | + |
| 14 | +RUN dpkgArch="$(dpkg --print-architecture)"; \ |
| 15 | + case "${dpkgArch##*-}" in \ |
| 16 | + amd64) tarArch='amd64'; dirArch='x86_64';; \ |
| 17 | + arm64) tarArch='arm64'; dirArch='aarch64';; \ |
| 18 | + *) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \ |
| 19 | + esac; \ |
| 20 | + LD_LIBRARY_PATH="/usr/local/lib/python3.12/dist-packages/torch/lib:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/lib:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda:/usr/local/cuda/include:/usr/include/${dirArch}-linux-gnu:/opt/hpcx/ucc/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH" \ |
| 21 | + PATH="/usr/local/lib/python3.12/dist-packages/torch_tensorrt/bin:/usr/local/mpi/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/tensorrt/bin" \ |
| 22 | + CPLUS_INCLUDE_PATH=/usr/include/gdal \ |
| 23 | + C_INCLUDE_PATH=/usr/include/gdal |
| 24 | + |
| 25 | +RUN apt-key adv --refresh-keys --keyserver keyserver.ubuntu.com && \ |
| 26 | + apt-get update && \ |
| 27 | + apt-get install -y --no-install-recommends \ |
| 28 | + automake \ |
| 29 | + bison \ |
| 30 | + build-essential \ |
| 31 | + bzip2 \ |
| 32 | + ca-certificates \ |
| 33 | + cabextract \ |
| 34 | + chrpath \ |
| 35 | + devscripts \ |
| 36 | + dh-make \ |
| 37 | + dirmngr \ |
| 38 | + dkms \ |
| 39 | + ethtool \ |
| 40 | + fakeroot \ |
| 41 | + ffmpeg \ |
| 42 | + flex \ |
| 43 | + fonts-nanum \ |
| 44 | + fonts-nanum-coding \ |
| 45 | + fonts-nanum-extra \ |
| 46 | + g++ \ |
| 47 | + gcc \ |
| 48 | + gdal-bin \ |
| 49 | + gfortran \ |
| 50 | + gir1.2-gtk-3.0 \ |
| 51 | + git \ |
| 52 | + graphviz \ |
| 53 | + htop \ |
| 54 | + iputils-ping \ |
| 55 | + libasound2-dev \ |
| 56 | + libatlas-base-dev \ |
| 57 | + libavcodec-dev \ |
| 58 | + libavformat-dev \ |
| 59 | + libboost-dev \ |
| 60 | + libboost-filesystem-dev \ |
| 61 | + libboost-system-dev \ |
| 62 | + libcairo2-dev \ |
| 63 | + libcurl4-openssl-dev \ |
| 64 | + libdc1394-dev \ |
| 65 | + libeigen3-dev \ |
| 66 | + libfaac-dev \ |
| 67 | + libfftw3-dev \ |
| 68 | + libfreetype-dev \ |
| 69 | + libfuse2 \ |
| 70 | + libgdal-dev \ |
| 71 | + libgeos++-dev \ |
| 72 | + libgeos-dev \ |
| 73 | + libgflags-dev \ |
| 74 | + libgirepository1.0-dev \ |
| 75 | + libglib2.0-0 \ |
| 76 | + libgoogle-glog-dev \ |
| 77 | + libgphoto2-dev \ |
| 78 | + libgstreamer-plugins-base1.0-dev \ |
| 79 | + libgstreamer1.0-dev \ |
| 80 | + libgtk-3-dev \ |
| 81 | + libhdf5-dev \ |
| 82 | + libjemalloc-dev \ |
| 83 | + libjpeg-dev \ |
| 84 | + liblapack-dev \ |
| 85 | + libmnl0 \ |
| 86 | + libmp3lame-dev \ |
| 87 | + libncurses-dev \ |
| 88 | + libnl-3-200 \ |
| 89 | + libnl-3-dev \ |
| 90 | + libnl-route-3-200 \ |
| 91 | + libnl-route-3-dev \ |
| 92 | + libnlopt-dev \ |
| 93 | + libnuma-dev \ |
| 94 | + libopenblas-dev \ |
| 95 | + libopencore-amrnb-dev \ |
| 96 | + libopencore-amrwb-dev \ |
| 97 | + libpci3 \ |
| 98 | + libpng-dev \ |
| 99 | + libportmidi-dev \ |
| 100 | + libproj-dev \ |
| 101 | + libprotobuf-dev \ |
| 102 | + libsdl2-dev \ |
| 103 | + libsdl-image1.2-dev \ |
| 104 | + libsdl-mixer1.2-dev \ |
| 105 | + libsdl-ttf2.0-dev \ |
| 106 | + libsm6 \ |
| 107 | + libsmpeg-dev \ |
| 108 | + libssl-dev \ |
| 109 | + libswscale-dev \ |
| 110 | + libswresample-dev \ |
| 111 | + libtbb-dev \ |
| 112 | + libtheora-dev \ |
| 113 | + libtiff-dev \ |
| 114 | + libv4l-dev \ |
| 115 | + libvorbis-dev \ |
| 116 | + libx264-dev \ |
| 117 | + libxext6 \ |
| 118 | + libxine2-dev \ |
| 119 | + libxml2-dev \ |
| 120 | + libxrender-dev \ |
| 121 | + libxrender1 \ |
| 122 | + libxslt1-dev \ |
| 123 | + libxvidcore-dev \ |
| 124 | + libyaml-dev \ |
| 125 | + libzmq3-dev \ |
| 126 | + lintian \ |
| 127 | + lsof \ |
| 128 | + make \ |
| 129 | + mercurial \ |
| 130 | + media-types \ |
| 131 | + mailcap \ |
| 132 | + ncurses-term \ |
| 133 | + net-tools \ |
| 134 | + openjdk-11-jdk \ |
| 135 | + openssh-client \ |
| 136 | + openssh-server \ |
| 137 | + pbzip2 \ |
| 138 | + pciutils \ |
| 139 | + pdsh \ |
| 140 | + pkg-config \ |
| 141 | + proj-bin \ |
| 142 | + protobuf-compiler \ |
| 143 | + pv \ |
| 144 | + python-is-python3 \ |
| 145 | + python3-xkit \ |
| 146 | + python3-gdal \ |
| 147 | + screen-resolution-extra \ |
| 148 | + sudo \ |
| 149 | + subversion \ |
| 150 | + swig \ |
| 151 | + tk \ |
| 152 | + unzip \ |
| 153 | + v4l-utils \ |
| 154 | + vim \ |
| 155 | + x264 \ |
| 156 | + xvfb \ |
| 157 | + xz-utils \ |
| 158 | + sudo \ |
| 159 | + yasm \ |
| 160 | + zip \ |
| 161 | + tcl \ |
| 162 | + udev && \ |
| 163 | + apt-get clean && \ |
| 164 | + rm -rf /var/lib/apt/lists/* |
| 165 | + |
| 166 | +RUN dpkgArch="$(dpkg --print-architecture)"; \ |
| 167 | + case "${dpkgArch##*-}" in \ |
| 168 | + amd64) tarArch='amd64'; dirArch='x64';; \ |
| 169 | + arm64) tarArch='arm64'; dirArch='-arm64';; \ |
| 170 | + *) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \ |
| 171 | + esac; \ |
| 172 | + \ |
| 173 | + mkdir -p /opt/oracle && \ |
| 174 | + cd /opt/oracle && \ |
| 175 | + wget https://download.oracle.com/otn_software/linux/instantclient/instantclient-basiclite-linux${dirArch}.zip && \ |
| 176 | + unzip instantclient-basiclite-linux${dirArch}.zip && \ |
| 177 | + rm -f instantclient-basiclite-linux${dirArch}.zip && \ |
| 178 | + cd /opt/oracle/instantclient* && \ |
| 179 | + rm -f *jdbc* *occi* *mysql* *README *jar uidrvci genezi adrci && \ |
| 180 | + echo /opt/oracle/instantclient* > /etc/ld.so.conf.d/oracle-instantclient.conf && \ |
| 181 | + ldconfig |
| 182 | + |
| 183 | +# Install Node.js (LTS) + yarn |
| 184 | +RUN cd /tmp && \ |
| 185 | + curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ |
| 186 | + apt-get update && apt-get install -y nodejs && \ |
| 187 | + npm install -g corepack && \ |
| 188 | + corepack enable && \ |
| 189 | + corepack prepare yarn@stable --activate && \ |
| 190 | + apt-get clean && \ |
| 191 | + rm -rf /var/lib/apt/lists/* |
| 192 | + |
| 193 | +# Install CUDA + cuDNN |
| 194 | +RUN dpkgArch="$(dpkg --print-architecture)"; \ |
| 195 | + case "${dpkgArch##*-}" in \ |
| 196 | + amd64) tarArch='amd64'; dirArch='x86_64';; \ |
| 197 | + arm64) tarArch='arm64'; dirArch='aarch64';; \ |
| 198 | + *) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \ |
| 199 | + esac; \ |
| 200 | + mkdir -p /usr/local/nvidia/lib && \ |
| 201 | + ln -s /usr/lib/${dirArch}-linux-gnu/libcudnn.so.9.20.0 /usr/local/cuda/lib64/libcudnn.so && \ |
| 202 | + ldconfig && \ |
| 203 | + update-alternatives --install /usr/bin/python python /usr/bin/python3 2 |
| 204 | + |
| 205 | +WORKDIR /tmp |
| 206 | +RUN dpkgArch="$(dpkg --print-architecture)"; \ |
| 207 | + case "${dpkgArch##*-}" in \ |
| 208 | + amd64) openblasTarget='CORE2'; crossCompile='0'; EXTRA_FLAGS="DYNAMIC_ARCH=1"; ;; \ |
| 209 | + arm64) openblasTarget='ARMV8'; crossCompile='1'; EXTRA_FLAGS=""; ;; \ |
| 210 | + *) echo >&2 "Unsupported: $dpkgArch"; exit 1 ;; \ |
| 211 | + esac; \ |
| 212 | + git clone -q --branch=v0.3.30 https://github.com/OpenMathLib/OpenBLAS.git && \ |
| 213 | + cd OpenBLAS && \ |
| 214 | + make TARGET=${openblasTarget} CROSS=${crossCompile} ${EXTRA_FLAGS} NO_AFFINITY=1 NUM_THREADS=48 FC=gfortran && \ |
| 215 | + make install && \ |
| 216 | + rm -rf /tmp/OpenBLAS |
| 217 | + |
| 218 | +# install git-lfs + bashtop |
| 219 | +WORKDIR /tmp |
| 220 | +RUN dpkgArch="$(dpkg --print-architecture)"; \ |
| 221 | + case "${dpkgArch##*-}" in \ |
| 222 | + amd64) tarArch='amd64'; dirArch='x64';; \ |
| 223 | + arm64) tarArch='arm64'; dirArch='aarch64';; \ |
| 224 | + *) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \ |
| 225 | + esac; \ |
| 226 | + \ |
| 227 | + curl -sLO "https://github.com/git-lfs/git-lfs/releases/download/v${GIT_LFS_VERSION}/git-lfs-linux-${tarArch}-v${GIT_LFS_VERSION}.tar.gz" && \ |
| 228 | + tar -zxf "git-lfs-linux-${tarArch}-v${GIT_LFS_VERSION}.tar.gz" && \ |
| 229 | + cd /tmp/git-lfs-${GIT_LFS_VERSION} && \ |
| 230 | + bash install.sh && \ |
| 231 | + cd /tmp && \ |
| 232 | + git clone https://github.com/aristocratos/bashtop.git && \ |
| 233 | + cd bashtop && \ |
| 234 | + make install && \ |
| 235 | + rm -rf /tmp/* |
| 236 | + |
| 237 | +RUN dpkgArch="$(dpkg --print-architecture)"; \ |
| 238 | + case "${dpkgArch##*-}" in \ |
| 239 | + amd64) tarArch='amd64'; dirArch='x64';; \ |
| 240 | + arm64) tarArch='arm64'; dirArch='aarch64';; \ |
| 241 | + *) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \ |
| 242 | + esac; \ |
| 243 | + curl -fL https://github.com/cdr/code-server/releases/download/v${CODE_SERVER_VERSION}/code-server-${CODE_SERVER_VERSION}-linux-${tarArch}.tar.gz \ |
| 244 | + | tar -C /usr/local/lib -xz && \ |
| 245 | + mv /usr/local/lib/code-server-${CODE_SERVER_VERSION}-linux-${tarArch} /usr/local/lib/code-server-${CODE_SERVER_VERSION} && \ |
| 246 | + ln -s /usr/local/lib/code-server-${CODE_SERVER_VERSION}/bin/code-server /usr/local/bin/code-server |
| 247 | + |
| 248 | +# remove hwloc-like packages (ImportError: /opt/hpcx/ucc/lib/libucc.so.1: undefined symbol issue) |
| 249 | +#RUN apt-get purge -y hwloc-nox libhwloc-plugins |
| 250 | + |
| 251 | +# Python packages installation (consolidated: requirements + datasets + mpi4py + mlflow) |
| 252 | +COPY ./requirements.26.03.*.txt /tmp/ |
| 253 | +RUN dpkgArch="$(dpkg --print-architecture)"; \ |
| 254 | + case "${dpkgArch##*-}" in \ |
| 255 | + amd64) tarArch='amd64';; \ |
| 256 | + arm64) tarArch='arm64';; \ |
| 257 | + *) echo >&2 "error: current architecture ($dpkgArch) does not have a corresponding binary release"; exit 1 ;; \ |
| 258 | + esac; \ |
| 259 | + python3 -m pip install --disable-pip-version-check --no-cache-dir \ |
| 260 | + -r requirements.26.03.${tarArch}.txt \ |
| 261 | + datasets \ |
| 262 | + mpi4py==4.1.1 mlflow==3.5.0 && \ |
| 263 | + find /usr/local/lib/python3.12/dist-packages -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \ |
| 264 | + rm -rf /tmp/* |
| 265 | + |
| 266 | +# PyTorch extensions (requires --no-build-isolation) |
| 267 | +RUN python3 -m pip install --no-build-isolation --no-cache-dir \ |
| 268 | + pytorch-lightning \ |
| 269 | + torch-scatter \ |
| 270 | + torch-sparse \ |
| 271 | + torch-cluster \ |
| 272 | + torch-spline-conv \ |
| 273 | + torch-geometric \ |
| 274 | + torchao && \ |
| 275 | + find /usr/local/lib/python3.12/dist-packages -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \ |
| 276 | + rm -rf /tmp/* |
| 277 | + |
| 278 | +WORKDIR /tmp |
| 279 | +RUN git clone --recursive -q https://github.com/bitsandbytes-foundation/bitsandbytes.git && \ |
| 280 | + cd /tmp/bitsandbytes && \ |
| 281 | + cmake -DCOMPUTE_BACKEND=cuda -DCMAKE_CUDA_COMPILER="/usr/local/cuda-13/bin/nvcc" -DCOMPUTE_CAPABILITY="75;80;86;87;89;90;100;103;110;120;121" -S . && \ |
| 282 | + make && \ |
| 283 | + python setup.py install && \ |
| 284 | + rm -rf /tmp/* |
| 285 | + |
| 286 | +# Install ipython kernelspec |
| 287 | +RUN python3 -m ipykernel install --display-name "PyTorch 2.11 (NGC 26.03/Python 3.12) on Backend.AI" && \ |
| 288 | + cat /usr/local/share/jupyter/kernels/python3/kernel.json |
| 289 | +# Backend.AI specifics |
| 290 | +COPY ./service-defs /etc/backend.ai/service-defs |
| 291 | +COPY ./runner-scripts/bootstrap.sh runner-scripts/setup_multinode.py /opt/container/ |
| 292 | + |
| 293 | +LABEL ai.backend.kernelspec="1" \ |
| 294 | + ai.backend.envs.corecount="OPENBLAS_NUM_THREADS,OMP_NUM_THREADS,NPROC" \ |
| 295 | + ai.backend.features="batch query uid-match user-input" \ |
| 296 | + ai.backend.base-distro="ubuntu24.04" \ |
| 297 | + ai.backend.accelerators="cuda" \ |
| 298 | + ai.backend.resource.min.cpu="1" \ |
| 299 | + ai.backend.resource.min.mem="1g" \ |
| 300 | + ai.backend.resource.min.cuda.device=0 \ |
| 301 | + ai.backend.resource.min.cuda.shares=0 \ |
| 302 | + ai.backend.runtime-type="python" \ |
| 303 | + ai.backend.runtime-path="/usr/bin/python" \ |
| 304 | + ai.backend.service-ports="ipython:pty:3000,jupyter:http:8091,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080" |
| 305 | + |
| 306 | +# Install Jupyterlab extensions + build (merged with cleanup) |
| 307 | +RUN python3 -m pip install --no-cache-dir \ |
| 308 | + jupyter_nbextensions_configurator>=0.6.5 \ |
| 309 | + jupyter_core \ |
| 310 | + jupyter-contrib-core>=0.4.2 \ |
| 311 | + jupyter-highlight-selected-word>=0.2.0 \ |
| 312 | + jupyter-server>=2.17.0 \ |
| 313 | + jupyter_lsp>=2.3.0 \ |
| 314 | + jupyter-dash \ |
| 315 | + jupyter-js-widgets-nbextension \ |
| 316 | + jupyter-latex-envs \ |
| 317 | + jupyter-packaging \ |
| 318 | + jupyterlab \ |
| 319 | + jupyterlab-code-formatter>=3.0.2 \ |
| 320 | + jupyterlab-hdf \ |
| 321 | + jupyterlab-launcher \ |
| 322 | + jupyterlab-widgets==3.0.16 \ |
| 323 | + notebook \ |
| 324 | + jupyter-server-mathjax \ |
| 325 | + jupyter-server-proxy>=4.4.0 \ |
| 326 | + jupyter-telemetry==0.1.0 \ |
| 327 | + jupyter>=1.1.1 \ |
| 328 | + jupyter-client==8.6.3 \ |
| 329 | + jupyter_bokeh==2.0.4 \ |
| 330 | + markupsafe>=3.0.2 \ |
| 331 | + jsonschema[format,format-nongpl]>=4.23.0 && \ |
| 332 | + python3 -m pip install --no-cache-dir jupyter_lsp markupsafe==3.0.2 jupyterlab_widgets && \ |
| 333 | + jupyter labextension install --no-build @jupyter-widgets/jupyterlab-manager && \ |
| 334 | + jupyter labextension install --no-build @jupyter-widgets/controls && \ |
| 335 | + jupyter labextension install --no-build @jupyterlab/toc-extension && \ |
| 336 | + jupyter labextension install --no-build @krassowski/jupyterlab-lsp && \ |
| 337 | + jupyter labextension install @jupyterlab/toc-extension && \ |
| 338 | + jupyter lab build --dev-build=False --minimize=False && \ |
| 339 | + find /usr/local/lib/python3.12/dist-packages -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \ |
| 340 | + rm -rf /usr/local/share/jupyter/lab/staging && \ |
| 341 | + rm -rf /root/.cache /root/.npm && \ |
| 342 | + rm -rf /tmp/* |
| 343 | + |
| 344 | +RUN sed -i 's/source \/usr\/local\/nvm\/nvm.sh//' /etc/bash.bashrc && \ |
| 345 | + ln -sf /usr/share/terminfo/x/xterm-color /usr/share/terminfo/x/xterm-256color |
| 346 | + |
| 347 | +# change permission |
| 348 | +RUN chown root:root /usr/lib |
| 349 | + |
| 350 | +WORKDIR /home/work |
0 commit comments