|
| 1 | +Bootstrap: docker |
| 2 | +From: python:3.11-slim |
| 3 | + |
| 4 | +%labels |
| 5 | + TORCH_CUDA_VERSION cu124 |
| 6 | + PYTHON_VERSION 3.11 |
| 7 | + BASE_IMAGE python:3.11-slim |
| 8 | + |
| 9 | +%help |
| 10 | + GPU + CPU image: PhysicsNeMo with PyTorch CUDA 12.4 wheels (~4 GB). |
| 11 | + PyTorch CUDA wheels bundle their own CUDA/cuDNN runtime, so no CUDA |
| 12 | + base image is needed. Works on CPU without --nv; uses NVIDIA GPU with --nv. |
| 13 | + |
| 14 | + Build: |
| 15 | + apptainer build th-holo-gpu.sif docker/gpu.def |
| 16 | + |
| 17 | + Run with GPU passthrough: |
| 18 | + apptainer exec --nv th-holo-gpu.sif python train.py --config-name fno |
| 19 | + apptainer shell --nv th-holo-gpu.sif |
| 20 | + |
| 21 | + Run CPU-only (no --nv needed): |
| 22 | + apptainer exec th-holo-gpu.sif python train.py --config-name fno |
| 23 | + apptainer shell th-holo-gpu.sif |
| 24 | + |
| 25 | + Verify GPU access inside the container: |
| 26 | + apptainer exec --nv th-holo-gpu.sif python -c \ |
| 27 | + "import torch; print(torch.cuda.get_device_name(0))" |
| 28 | + |
| 29 | +%files |
| 30 | + physicsnemo-curator /workspace/physicsnemo-curator |
| 31 | + docker/certs/ /tmp/certs/ |
| 32 | + |
| 33 | +%environment |
| 34 | + export DEBIAN_FRONTEND=noninteractive |
| 35 | + export UV_SYSTEM_PYTHON=1 |
| 36 | + export UV_BREAK_SYSTEM_PACKAGES=1 |
| 37 | + export PYTHONDONTWRITEBYTECODE=1 |
| 38 | + export PYTHONUNBUFFERED=1 |
| 39 | + export SSL_CERT_FILE=/etc/ssl/certs/ca-bundle.pem |
| 40 | + export REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-bundle.pem |
| 41 | + export NODE_EXTRA_CA_CERTS=/etc/ssl/certs/ca-bundle.pem |
| 42 | + |
| 43 | +%post |
| 44 | + export DEBIAN_FRONTEND=noninteractive |
| 45 | + |
| 46 | + apt-get update && apt-get install -y --no-install-recommends \ |
| 47 | + ca-certificates \ |
| 48 | + curl \ |
| 49 | + git \ |
| 50 | + build-essential \ |
| 51 | + libgl1 \ |
| 52 | + libglib2.0-0 \ |
| 53 | + && rm -rf /var/lib/apt/lists/* |
| 54 | + |
| 55 | + # Install uv |
| 56 | + curl -fsSL https://astral.sh/uv/0.10.3/install.sh | sh |
| 57 | + cp /root/.local/bin/uv /usr/local/bin/uv |
| 58 | + cp /root/.local/bin/uvx /usr/local/bin/uvx |
| 59 | + |
| 60 | + # Handle custom CA certs |
| 61 | + cp /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-bundle.pem 2>/dev/null || true |
| 62 | + found=0 |
| 63 | + for cert_file in /tmp/certs/*; do |
| 64 | + [ -e "${cert_file}" ] || continue |
| 65 | + case "${cert_file}" in |
| 66 | + *.pem|*.crt|*.cer) |
| 67 | + cat "${cert_file}" >> /etc/ssl/certs/ca-bundle.pem |
| 68 | + found=1 ;; |
| 69 | + esac |
| 70 | + done |
| 71 | + if [ "${found}" -eq 0 ]; then |
| 72 | + echo "No custom CA files found under /tmp/certs (supported: .pem/.crt/.cer)" |
| 73 | + fi |
| 74 | + rm -rf /tmp/certs |
| 75 | + |
| 76 | + export SSL_CERT_FILE=/etc/ssl/certs/ca-bundle.pem |
| 77 | + export REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-bundle.pem |
| 78 | + export UV_SYSTEM_PYTHON=1 |
| 79 | + export UV_BREAK_SYSTEM_PACKAGES=1 |
| 80 | + |
| 81 | + # Install PyTorch CUDA 12.4 wheels first. The wheels bundle their own |
| 82 | + # CUDA/cuDNN runtime, so no CUDA base image is needed. nvidia-physicsnemo |
| 83 | + # is installed with --no-deps afterwards to avoid pulling in the default |
| 84 | + # CPU-only torch wheel. |
| 85 | + uv --native-tls pip install --system \ |
| 86 | + torch \ |
| 87 | + torchvision \ |
| 88 | + torchaudio \ |
| 89 | + --index-url https://download.pytorch.org/whl/cu124 |
| 90 | + |
| 91 | + # Install PhysicsNeMo without overwriting the CUDA PyTorch above. |
| 92 | + uv --native-tls pip install --system --no-deps "nvidia-physicsnemo" |
| 93 | + |
| 94 | + # Remaining project dependencies. |
| 95 | + uv --native-tls pip install --system \ |
| 96 | + "hydra-core>=1.3" \ |
| 97 | + "omegaconf>=2.3" \ |
| 98 | + "optuna>=4.0" \ |
| 99 | + "netCDF4" \ |
| 100 | + "scipy" \ |
| 101 | + "zarr" \ |
| 102 | + "pytest>=9.0" |
| 103 | + |
| 104 | + uv --native-tls pip install --system -e /workspace/physicsnemo-curator |
| 105 | + |
| 106 | +%runscript |
| 107 | + exec bash "$@" |
0 commit comments