diff --git a/official-templates/pytorch/Dockerfile b/official-templates/pytorch/Dockerfile index bf5617b..5ac88ce 100644 --- a/official-templates/pytorch/Dockerfile +++ b/official-templates/pytorch/Dockerfile @@ -22,13 +22,13 @@ ENV DEBIAN_FRONTEND=noninteractive \ SHELL=/bin/bash \ PATH=/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/bin:$PATH \ LD_LIBRARY_PATH=/usr/local/nvidia/lib64:$LD_LIBRARY_PATH \ - JUPYTER_PASSWORD=ubuntu + JUPYTER_PASSWORD=yotta # =============================== # Workspace # =============================== WORKDIR / -RUN mkdir -p /workspace && chmod 777 /workspace +RUN mkdir -p /workspace && chmod 777 /workspace /root # =============================== # Base system packages @@ -44,46 +44,16 @@ RUN apt-get update -y && \ build-essential pkg-config \ && echo "en_US.UTF-8 UTF-8" > /etc/locale.gen \ && locale-gen \ - && mkdir -p /var/run/sshd \ + && mkdir -p /var/run/sshd /var/log/supervisor \ + && chmod 700 /var/run/sshd /var/log/supervisor \ + && chmod 755 /var/log \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # =============================== -# uv (Astral) - Python package manager -# - Install to /usr/local/bin -# - Avoids modifying shell profile (suitable for container/CI) +# Remove ubuntu user (for security: prevent unauthorized SSH access) # =============================== -ARG UV_VERSION="latest" -RUN set -eux; \ - if [ "${UV_VERSION}" = "latest" ]; then \ - curl -LsSf https://astral.sh/uv/install.sh | env UV_UNMANAGED_INSTALL="/usr/local/bin" sh; \ - else \ - curl -LsSf "https://astral.sh/uv/${UV_VERSION}/install.sh" | env UV_UNMANAGED_INSTALL="/usr/local/bin" sh; \ - fi; \ - uv --version - -# =============================== -# Miniconda -# =============================== -ARG MINICONDA_VERSION="py311_24.1.2-0" -ARG CONDA_DIR="/opt/conda" - -RUN set -eux; \ - ARCH="$(uname -m)"; \ - case "${ARCH}" in \ - x86_64) MINICONDA_ARCH="x86_64" ;; \ - aarch64) MINICONDA_ARCH="aarch64" ;; \ - *) echo "Unsupported arch: ${ARCH}" && exit 1 ;; \ - esac; \ - curl -fsSL \ - "https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-${MINICONDA_ARCH}.sh" \ - -o /tmp/miniconda.sh; \ - bash /tmp/miniconda.sh -b -p "${CONDA_DIR}"; \ - rm -f /tmp/miniconda.sh; \ - "${CONDA_DIR}/bin/conda" config --system --set auto_activate_base false; \ - "${CONDA_DIR}/bin/conda" clean -afy - -RUN ln -sf /opt/conda/bin/conda /usr/local/bin/conda +RUN userdel -r ubuntu || true # =============================== # Python 3.11 (build from source, with ensurepip) @@ -101,7 +71,7 @@ RUN set -eux; \ && tar -xzf /tmp/Python.tgz -C /tmp/python-src --strip-components=1 \ && rm -f /tmp/Python.tgz \ && cd /tmp/python-src \ - && ./configure --enable-optimizations --with-ensurepip=install \ + && ./configure --with-ensurepip=install \ && make -j"$(nproc)" \ && make altinstall \ && cd / \ @@ -125,15 +95,23 @@ RUN python -m pip install --no-cache-dir \ huggingface-hub datasets # =============================== -# Patch: ensure python3.11 has Jupyter (required by /start.sh) -# Only adds jupyter to the python3.11 environment, does not modify the existing pip install logic +# Build-time assertion: verify Jupyter installation # =============================== -RUN /usr/local/bin/python3.11 -m ensurepip --upgrade && \ - /usr/local/bin/python3.11 -m pip install --no-cache-dir \ - jupyterlab ipywidgets jupyter-archive notebook==7.3.3 +RUN python -c "import jupyter; import notebook; import jupyterlab; print('jupyter ok')" -# Build-time assertion: prevents pushing a broken image -RUN /usr/local/bin/python3.11 -c "import jupyter; import notebook; import jupyterlab; print('python3.11 jupyter ok')" +# =============================== +# Configure JupyterLab: auto-login with token (no password prompt) +# =============================== +RUN mkdir -p /root/.jupyter && printf '%s\n' \ + 'c.ServerApp.token = "yotta"' \ + 'c.ServerApp.password = ""' \ + 'c.ServerApp.allow_remote_access = True' \ + 'c.ServerApp.allow_origin = "*"' \ + 'c.NotebookApp.token = "yotta"' \ + 'c.NotebookApp.password = ""' \ + 'c.NotebookApp.allow_remote_access = True' \ + > /root/.jupyter/jupyter_lab_config.py && \ + chmod 600 /root/.jupyter/jupyter_lab_config.py # =============================== # NCCL tests (build from source, force MPI=0 to avoid mpi.h missing) @@ -145,20 +123,6 @@ RUN set -eux; \ ln -sf /opt/nccl-tests/build/* /usr/local/bin/; \ rm -rf /opt/nccl-tests/.git -# =============================== -# User -# =============================== -RUN useradd -ms /bin/bash ubuntu && \ - usermod -aG sudo ubuntu && \ - echo "ubuntu ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/ubuntu && \ - echo "ubuntu:ubuntu" | chpasswd - -# =============================== -# SSH config (start.sh handles sshd startup; this ensures password login is enabled) -# =============================== -RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config && \ - sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ - rm -f /etc/ssh/ssh_host_* # =============================== # CUDA bin convenience @@ -166,10 +130,16 @@ RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/ RUN ln -sf /usr/local/cuda/bin/* /usr/bin/ || true # =============================== -# start.sh (from buildx bake context "scripts") +# Supervisor configuration # =============================== -COPY --from=scripts start.sh /start.sh -RUN chmod 755 /start.sh +RUN mkdir -p /var/log/supervisor /usr/local/bin && \ + chmod 777 /var/log/supervisor /workspace /var/run /var/lib/nginx && \ + mkdir -p /run/sshd && \ + chmod 700 /run/sshd + +COPY --from=scripts start1.sh /start1.sh +RUN chmod 755 /start1.sh && \ + sed -i 's/\r$//' /start1.sh # =============================== # nginx / branding @@ -187,8 +157,8 @@ RUN echo 'cat /etc/yotta.txt' >> /root/.bashrc EXPOSE 22 80 8888 # =============================== -# Entrypoint: root runs start.sh directly (does not modify the shared start.sh) +# Entrypoint: root runs start1.sh with explicit bash (ensures bash syntax works) # =============================== USER root WORKDIR /root -CMD ["/bin/bash", "-lc", "exec /start.sh"] +CMD ["/bin/bash", "-c", "exec /bin/bash /start1.sh"] diff --git a/official-templates/pytorch/docker-bake.hcl b/official-templates/pytorch/docker-bake.hcl index 97b1a0c..14433ec 100644 --- a/official-templates/pytorch/docker-bake.hcl +++ b/official-templates/pytorch/docker-bake.hcl @@ -1,5 +1,5 @@ variable "PUBLISHER" { default = "yottalabsai" } -variable "TAG_SUFFIX" { default = "2026010901" } +variable "TAG_SUFFIX" { default = "2026031701" } group "default" { targets = ["pytorch290"] @@ -15,7 +15,7 @@ target "pytorch290" { dockerfile = "Dockerfile" tags = [ - "${PUBLISHER}/pytorch:2.9.0-py3.11-cuda12.8.1-cudnn-devel-ubuntu22.04" + "${PUBLISHER}/pytorch:${TAG_SUFFIX}" ] contexts = { @@ -25,7 +25,7 @@ target "pytorch290" { } args = { - BASE_IMAGE = "nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04" + BASE_IMAGE = "nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04" PYTHON_VERSION = "3.11.14" TORCH = "torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128" } diff --git a/official-templates/unsloth/Dockerfile b/official-templates/unsloth/Dockerfile index 496a116..850c1f3 100644 --- a/official-templates/unsloth/Dockerfile +++ b/official-templates/unsloth/Dockerfile @@ -1,188 +1,192 @@ # =============================== -# logo + scripts stages +# Build args +# =============================== +ARG PYTHON_VERSION="3.11.14" + +# =============================== +# logo + scripts + proxy stages # =============================== FROM scratch AS logo COPY container-template/yotta.txt yotta.txt FROM scratch AS scripts -COPY container-template/start.sh start.sh +COPY container-template/start1.sh start1.sh + +FROM scratch AS proxy +COPY container-template/proxy/nginx.conf nginx.conf +COPY container-template/proxy/readme.html readme.html # =============================== # main stage # =============================== FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 -ARG DEBIAN_FRONTEND=noninteractive -ENV TZ=UTC \ +# Re-declare ARGs after FROM +ARG PYTHON_VERSION="3.11.14" + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +ENV DEBIAN_FRONTEND=noninteractive \ + SHELL=/bin/bash \ + TZ=UTC \ LANG=C.UTF-8 \ LC_ALL=C.UTF-8 \ PYTHONUNBUFFERED=1 \ - VENV_PATH=/opt/venv + PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/bin:$PATH \ + LD_LIBRARY_PATH=/usr/local/nvidia/lib64:$LD_LIBRARY_PATH \ + JUPYTER_PASSWORD=yotta -# ------------------------------- +# =============================== +# Workspace +# =============================== +WORKDIR / +RUN mkdir -p /workspace && chmod 777 /workspace /root + +# =============================== # System packages -# ------------------------------- +# =============================== RUN set -eux; \ - apt-get update; \ + apt-get update -y; \ apt-get install -y --no-install-recommends --allow-change-held-packages \ ca-certificates curl wget git \ - sudo \ - openssh-server \ - nginx \ - tini \ - build-essential make g++ \ + build-essential pkg-config \ software-properties-common \ + locales tzdata \ + openssh-server nginx sudo \ vim jq tree htop tmux rsync \ - pciutils iproute2 net-tools lsof procps; \ + zip unzip less procps net-tools lsof \ + pciutils iproute2; \ + echo "en_US.UTF-8 UTF-8" > /etc/locale.gen; \ + locale-gen; \ + mkdir -p /var/run/sshd /var/log/supervisor; \ + chmod 700 /var/run/sshd /var/log/supervisor; \ + chmod 755 /var/log; \ + apt-get clean; \ rm -rf /var/lib/apt/lists/* -# ------------------------------- -# sshd wrapper (unchanged) -# ------------------------------- -RUN <<'EOF' -set -eux -if [ -x /usr/sbin/sshd ] && [ ! -x /usr/sbin/sshd.real ]; then - mv /usr/sbin/sshd /usr/sbin/sshd.real - cat > /usr/sbin/sshd <<'EOSSHD' -#!/usr/bin/env bash -set -euo pipefail -REAL="/usr/sbin/sshd.real" -if [[ " $* " == *" -D "* ]]; then - args=() - for a in "$@"; do - [[ "$a" == "-D" ]] && continue - args+=("$a") - done - exec "$REAL" "${args[@]}" -fi -exec "$REAL" "$@" -EOSSHD - chmod 755 /usr/sbin/sshd -fi -EOF - -# ------------------------------- -# Python 3.11 + venv -# ------------------------------- +# =============================== +# Remove ubuntu user (security: prevent unauthorized SSH access) +# =============================== +RUN userdel -r ubuntu || true + +# =============================== +# Python (build from source, with ensurepip) +# =============================== RUN set -eux; \ - add-apt-repository -y ppa:deadsnakes/ppa; \ - apt-get update; \ + PY_MM="$(echo "${PYTHON_VERSION}" | awk -F. '{print $1"."$2}')"; \ + apt-get update -y; \ apt-get install -y --no-install-recommends \ - python3.11 python3.11-venv python3.11-dev; \ - rm -rf /var/lib/apt/lists/*; \ - python3.11 -m venv "${VENV_PATH}"; \ - "${VENV_PATH}/bin/pip" install -U pip setuptools wheel - -RUN ln -sf "${VENV_PATH}/bin/python3.11" /usr/local/bin/python3.11 && \ - ln -sf "${VENV_PATH}/bin/python" /usr/local/bin/python || true && \ - ln -sf "${VENV_PATH}/bin/pip" /usr/local/bin/pip || true - -RUN echo 'export PATH=/opt/venv/bin:$PATH' >/etc/profile.d/venv.sh && \ - chmod 644 /etc/profile.d/venv.sh && \ - echo 'Defaults secure_path="/opt/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"' \ - >/etc/sudoers.d/99-secure-path - -# ------------------------------- -# Python packages (Unsloth) -# ------------------------------- -RUN set -eux; \ - pip install --no-cache-dir \ + libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev \ + libffi-dev libncursesw5-dev xz-utils tk-dev uuid-dev liblzma-dev; \ + curl -fSL --retry 10 --retry-delay 2 --retry-all-errors \ + "https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz" \ + -o /tmp/Python.tgz; \ + mkdir -p /tmp/python-src; \ + tar -xzf /tmp/Python.tgz -C /tmp/python-src --strip-components=1; \ + rm -f /tmp/Python.tgz; \ + cd /tmp/python-src; \ + ./configure --with-ensurepip=install; \ + make -j"$(nproc)"; \ + make altinstall; \ + cd /; \ + rm -rf /tmp/python-src; \ + ln -sf "/usr/local/bin/python${PY_MM}" /usr/bin/python; \ + ln -sf "/usr/local/bin/python${PY_MM}" /usr/bin/python3; \ + python -m pip install --no-cache-dir --upgrade pip setuptools wheel; \ + apt-get clean; \ + rm -rf /var/lib/apt/lists/* + +# =============================== +# PyTorch (cu128) +# =============================== +RUN python -m pip install --no-cache-dir \ --index-url https://download.pytorch.org/whl/cu128 \ - torch torchvision torchaudio; \ - pip install --no-cache-dir \ - unsloth \ - jupyterlab jupyter_server ipykernel \ + torch torchvision torchaudio + +# =============================== +# Unsloth + dependencies +# =============================== +RUN python -m pip install --no-cache-dir \ + unsloth vllm \ huggingface_hub datasets; \ - pip install --no-cache-dir xformers --no-deps; \ - pip install --no-cache-dir triton + python -m pip install --no-cache-dir xformers --no-deps; \ + python -m pip install --no-cache-dir triton -RUN ln -sf "${VENV_PATH}/bin/jupyter" /usr/local/bin/jupyter || true +# =============================== +# Jupyter + common tools +# =============================== +RUN python -m pip install --no-cache-dir \ + jupyterlab ipywidgets jupyter-archive notebook==7.3.3 ipykernel -# ------------------------------- -# Users -# ------------------------------- -RUN set -eux; \ - useradd -m -s /bin/bash -u 1000 unsloth || true; \ - useradd -m -s /bin/bash -u 1001 ubuntu || true; \ - usermod -aG sudo unsloth; \ - usermod -aG sudo ubuntu; \ - echo "unsloth ALL=(ALL) NOPASSWD:ALL" >/etc/sudoers.d/90-unsloth; \ - echo "ubuntu ALL=(ALL) NOPASSWD:ALL" >/etc/sudoers.d/90-ubuntu; \ - chmod 0440 /etc/sudoers.d/90-unsloth /etc/sudoers.d/90-ubuntu; \ - mkdir -p /workspace /workspace/work /workspace/unsloth-notebooks; \ - chown -R unsloth:unsloth /workspace - -# ========================================================= -# Download Unsloth notebooks templates and flatten structure -# ========================================================= +# =============================== +# Build-time assertion: verify key packages +# (unsloth requires GPU at import time, so only check it's installed) +# =============================== +RUN python -c "import torch; import jupyterlab; from importlib.metadata import version; print(f'unsloth=={version(\"unsloth\")} torch=={torch.__version__} ok')" + +# =============================== +# Configure JupyterLab +# =============================== +RUN mkdir -p /root/.jupyter && printf '%s\n' \ + 'c.ServerApp.token = "yotta"' \ + 'c.ServerApp.password = ""' \ + 'c.ServerApp.allow_remote_access = True' \ + 'c.ServerApp.allow_origin = "*"' \ + 'c.NotebookApp.token = "yotta"' \ + 'c.NotebookApp.password = ""' \ + 'c.NotebookApp.allow_remote_access = True' \ + > /root/.jupyter/jupyter_lab_config.py && \ + chmod 600 /root/.jupyter/jupyter_lab_config.py + +# =============================== +# Download Unsloth notebook templates +# =============================== RUN set -eux; \ cd /tmp; \ git clone --depth=1 https://github.com/unslothai/notebooks.git unsloth-notebooks-src; \ - \ - # Ensure target directory exists mkdir -p /workspace/unsloth-notebooks; \ - \ - # Copy all scripts from original_template to the target directory cp -a unsloth-notebooks-src/original_template/. /workspace/unsloth-notebooks/; \ - \ - # Also copy template.ipynb (if needed) cp -a unsloth-notebooks-src/template.ipynb /workspace/unsloth-notebooks/ || true; \ - \ - # Fix permissions (Jupyter runs as unsloth user) - chown -R unsloth:unsloth /workspace/unsloth-notebooks; \ - \ - # Cleanup rm -rf /tmp/unsloth-notebooks-src +# =============================== +# CUDA bin convenience +# =============================== +RUN ln -sf /usr/local/cuda/bin/* /usr/bin/ || true -# ========================================================= -# Key fix: do not COPY unsloth-notebooks directly -# instead COPY the entire build context, then copy if present, skip if absent -# ========================================================= -COPY . /__build_context__ +# =============================== +# Supervisor dirs +# =============================== +RUN mkdir -p /var/log/supervisor /usr/local/bin && \ + chmod 777 /var/log/supervisor /workspace /var/run /var/lib/nginx && \ + mkdir -p /run/sshd && \ + chmod 700 /run/sshd + +# =============================== +# start1.sh +# =============================== +COPY --from=scripts start1.sh /start1.sh +RUN chmod 755 /start1.sh && \ + sed -i 's/\r$//' /start1.sh + +# =============================== +# nginx / branding +# =============================== +COPY --from=proxy nginx.conf /etc/nginx/nginx.conf +COPY --from=proxy readme.html /usr/share/nginx/html/readme.html +COPY README.md /usr/share/nginx/html/README.md -RUN set -eux; \ - if [ -d /__build_context__/unsloth-notebooks ]; then \ - cp -a /__build_context__/unsloth-notebooks/. /workspace/unsloth-notebooks/; \ - chown -R unsloth:unsloth /workspace/unsloth-notebooks; \ - fi; \ - rm -rf /__build_context__ - -# ------------------------------- -# sshd dirs -# ------------------------------- -RUN mkdir -p /var/run/sshd - -# ------------------------------- -# Branding -# ------------------------------- COPY --from=logo yotta.txt /etc/yotta.txt RUN echo 'cat /etc/yotta.txt' >> /root/.bashrc -# ------------------------------- -# start.sh (must not change) -# ------------------------------- -COPY --from=scripts start.sh /start.sh -RUN chmod 755 /start.sh - -# ------------------------------- -# entry.sh -# ------------------------------- -RUN <<'EOF' -cat > /entry.sh <<'EOENTRY' -#!/usr/bin/env bash -set -euo pipefail -(/usr/sbin/sshd || true) -exec sudo -u unsloth -H bash -lc \ - 'jupyter lab --ip=0.0.0.0 --port=${JUPYTER_PORT:-8888} --no-browser \ - --ServerApp.token="" --ServerApp.password="" \ - --ServerApp.allow_origin="*" \ - --ServerApp.root_dir=/workspace' -EOENTRY -chmod 755 /entry.sh -EOF - -EXPOSE 8888 22 80 - -ENTRYPOINT ["/usr/bin/tini","--"] -CMD ["/start.sh"] +# =============================== +# Ports +# =============================== +EXPOSE 22 80 8888 + +# =============================== +# Entrypoint: root runs start1.sh +# =============================== +USER root +WORKDIR /root +CMD ["/bin/bash", "-c", "exec /bin/bash /start1.sh"] diff --git a/official-templates/unsloth/docker-bake.hcl b/official-templates/unsloth/docker-bake.hcl index 613e078..0fd09fa 100644 --- a/official-templates/unsloth/docker-bake.hcl +++ b/official-templates/unsloth/docker-bake.hcl @@ -22,7 +22,7 @@ target "unsloth" { platforms = ["linux/amd64"] tags = [ - "${PUBLISHER}/unsloth:0.6.9-py3.11-cuda12.1-cudnn-devel-ubuntu22.04", + "${PUBLISHER}/unsloth:0.6.9-py3.11-cuda12.1-cudnn-devel-ubuntu22.04-260407", ] # Keeping contexts here to align with your existing setup