|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -ex |
| 4 | + |
| 5 | +# https://raw.githubusercontent.com/pytorch/pytorch/842d51500be144d53f4d046d31169e8f46c063f6/.ci/docker/common/install_cuda.sh |
| 6 | + |
| 7 | +function prune_cuda { |
| 8 | + # Remove non-essential CUDA components to reduce image size: |
| 9 | + # - Documentation and manual pages |
| 10 | + # - Sample code, demos, and example projects |
| 11 | + # - IDE integration (Nsight Eclipse Edition) |
| 12 | + # - Debugging tools (compute-sanitizer, debugger) |
| 13 | + # - Profiling tools (Nsight Compute, Nsight Systems) |
| 14 | + # - Legacy tools (Visual Profiler) |
| 15 | + # This keeps only the essential runtime libraries, headers and development tools |
| 16 | + rm -rf /usr/local/cuda/compute-sanitizer/docs \ |
| 17 | + /usr/local/cuda/nsight-compute-****.*.*/docs \ |
| 18 | + /usr/local/cuda/nsight-systems-****.*.*/documentation \ |
| 19 | + /usr/local/cuda/extras/demo_suite \ |
| 20 | + /usr/local/cuda/extras/CUPTI/samples \ |
| 21 | + /usr/local/cuda/nsight-compute-****.*.*/extras/samples \ |
| 22 | + /usr/local/cuda/libnvvp \ |
| 23 | + /usr/local/cuda/nsightee_plugins \ |
| 24 | + /usr/local/cuda/compute-sanitizer \ |
| 25 | + /usr/local/cuda/extras/Debugger \ |
| 26 | + /usr/local/cuda/nsight-compute-****.*.* \ |
| 27 | + /usr/local/cuda/nsight-systems-****.*.* |
| 28 | + rm -rf /usr/local/cuda/doc |
| 29 | + rm -rf /usr/local/cuda/samples |
| 30 | + rm -rf /usr/local/cuda/share/doc |
| 31 | +} |
| 32 | + |
| 33 | +function install_cuda118_stack { |
| 34 | + CUDNN_VERSION=9.1.0.70 |
| 35 | + NCCL_VERSION=v2.21.5-1 |
| 36 | + CUDA_HOME="/usr/local/cuda" |
| 37 | + |
| 38 | + # move cuda-compt and remove existing cuda dir from nvidia/cuda:**.*.*-base-* |
| 39 | + mv /usr/local/cuda/compat /usr/local |
| 40 | + rm -rf /usr/local/cuda-* |
| 41 | + rm -rf /usr/local/cuda |
| 42 | + |
| 43 | + # install cuda |
| 44 | + cd /tmp |
| 45 | + wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run |
| 46 | + chmod +x cuda_11.8.0_520.61.05_linux.run |
| 47 | + ./cuda_11.8.0_520.61.05_linux.run --toolkit --silent |
| 48 | + rm -f cuda_11.8.0_520.61.05_linux.run |
| 49 | + ln -s /usr/local/cuda-11.8 /usr/local/cuda |
| 50 | + # bring back cuda-compat |
| 51 | + mv /usr/local/compat /usr/local/cuda/compat |
| 52 | + |
| 53 | + # install cudnn |
| 54 | + mkdir -p /tmp/cudnn |
| 55 | + cd /tmp/cudnn |
| 56 | + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz |
| 57 | + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz |
| 58 | + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/ |
| 59 | + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/ |
| 60 | + |
| 61 | + # install nccl |
| 62 | + mkdir -p /tmp/nccl |
| 63 | + cd /tmp/nccl |
| 64 | + git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git |
| 65 | + cd nccl |
| 66 | + make -j src.build |
| 67 | + cp -a build/include/* /usr/local/cuda/include/ |
| 68 | + cp -a build/lib/* /usr/local/cuda/lib64/ |
| 69 | + |
| 70 | + prune_cuda |
| 71 | + ldconfig |
| 72 | +} |
| 73 | + |
| 74 | + |
| 75 | +function install_cuda124_stack { |
| 76 | + CUDNN_VERSION="9.1.0.70" |
| 77 | + NCCL_VERSION="v2.23.4-1" |
| 78 | + CUDA_HOME="/usr/local/cuda" |
| 79 | + |
| 80 | + # move cuda-compt and remove existing cuda dir from nvidia/cuda:**.*.*-base-* |
| 81 | + mv /usr/local/cuda/compat /usr/local |
| 82 | + rm -rf /usr/local/cuda-* |
| 83 | + rm -rf /usr/local/cuda |
| 84 | + |
| 85 | + # install cuda |
| 86 | + cd /tmp |
| 87 | + wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run |
| 88 | + chmod +x cuda_12.4.1_550.54.15_linux.run |
| 89 | + ./cuda_12.4.1_550.54.15_linux.run --toolkit --silent |
| 90 | + rm -f cuda_12.4.1_550.54.15_linux.run |
| 91 | + ln -s /usr/local/cuda-12.4 /usr/local/cuda |
| 92 | + # bring back cuda-compat |
| 93 | + mv /usr/local/compat /usr/local/cuda/compat |
| 94 | + |
| 95 | + # install cudnn |
| 96 | + mkdir -p /tmp/cudnn |
| 97 | + cd /tmp/cudnn |
| 98 | + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz |
| 99 | + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz |
| 100 | + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ |
| 101 | + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ |
| 102 | + |
| 103 | + # install nccl |
| 104 | + mkdir -p /tmp/nccl |
| 105 | + cd /tmp/nccl |
| 106 | + git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git |
| 107 | + cd nccl |
| 108 | + make -j src.build |
| 109 | + cp -a build/include/* /usr/local/cuda/include/ |
| 110 | + cp -a build/lib/* /usr/local/cuda/lib64/ |
| 111 | + |
| 112 | + prune_cuda |
| 113 | + ldconfig |
| 114 | +} |
| 115 | + |
| 116 | + |
| 117 | +function install_cuda126_stack { |
| 118 | + CUDNN_VERSION="9.7.0.66" |
| 119 | + NCCL_VERSION="v2.23.4-1" |
| 120 | + CUDA_HOME="/usr/local/cuda" |
| 121 | + |
| 122 | + # move cuda-compt and remove existing cuda dir from nvidia/cuda:**.*.*-base-* |
| 123 | + mv /usr/local/cuda/compat /usr/local |
| 124 | + rm -rf /usr/local/cuda-* |
| 125 | + rm -rf /usr/local/cuda |
| 126 | + |
| 127 | + # install CUDA |
| 128 | + wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run |
| 129 | + chmod +x cuda_12.6.3_560.35.05_linux.run |
| 130 | + ./cuda_12.6.3_560.35.05_linux.run --toolkit --silent |
| 131 | + rm -f cuda_12.6.3_560.35.05_linux.run |
| 132 | + ln -s /usr/local/cuda-12.6 /usr/local/cuda |
| 133 | + # bring back cuda-compat |
| 134 | + mv /usr/local/compat /usr/local/cuda/compat |
| 135 | + |
| 136 | + # install cudnn |
| 137 | + mkdir -p /tmp/cudnn |
| 138 | + cd /tmp/cudnn |
| 139 | + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz |
| 140 | + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz |
| 141 | + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ |
| 142 | + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ |
| 143 | + |
| 144 | + # install nccl |
| 145 | + mkdir -p /tmp/nccl |
| 146 | + cd /tmp/nccl |
| 147 | + git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git |
| 148 | + cd nccl |
| 149 | + make -j src.build |
| 150 | + cp -a build/include/* /usr/local/cuda/include/ |
| 151 | + cp -a build/lib/* /usr/local/cuda/lib64/ |
| 152 | + |
| 153 | + prune_cuda |
| 154 | + ldconfig |
| 155 | +} |
| 156 | + |
| 157 | + |
| 158 | +function install_cuda128_stack { |
| 159 | + CUDNN_VERSION="9.8.0.87" |
| 160 | + NCCL_VERSION="v2.26.2-1" |
| 161 | + CUDA_HOME="/usr/local/cuda" |
| 162 | + |
| 163 | + # move cuda-compt and remove existing cuda dir from nvidia/cuda:**.*.*-base-* |
| 164 | + mv /usr/local/cuda/compat /usr/local |
| 165 | + rm -rf /usr/local/cuda-* |
| 166 | + rm -rf /usr/local/cuda |
| 167 | + |
| 168 | + # install CUDA |
| 169 | + wget -q https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_570.124.06_linux.run |
| 170 | + chmod +x cuda_12.8.1_570.124.06_linux.run |
| 171 | + ./cuda_12.8.1_570.124.06_linux.run --toolkit --silent |
| 172 | + rm -f cuda_12.8.1_570.124.06_linux.run |
| 173 | + ln -s /usr/local/cuda-12.8 /usr/local/cuda |
| 174 | + # bring back cuda-compat |
| 175 | + mv /usr/local/compat /usr/local/cuda/compat |
| 176 | + |
| 177 | + # install cudnn |
| 178 | + mkdir -p /tmp/cudnn |
| 179 | + cd /tmp/cudnn |
| 180 | + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz |
| 181 | + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz |
| 182 | + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ |
| 183 | + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ |
| 184 | + |
| 185 | + # install nccl |
| 186 | + mkdir -p /tmp/nccl |
| 187 | + cd /tmp/nccl |
| 188 | + git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git |
| 189 | + cd nccl |
| 190 | + make -j src.build |
| 191 | + cp -a build/include/* /usr/local/cuda/include/ |
| 192 | + cp -a build/lib/* /usr/local/cuda/lib64/ |
| 193 | + |
| 194 | + prune_cuda |
| 195 | + ldconfig |
| 196 | +} |
| 197 | + |
| 198 | +# idiomatic parameter and option handling in sh |
| 199 | +while test $# -gt 0 |
| 200 | +do |
| 201 | + case "$1" in |
| 202 | + 11.8) install_cuda118_stack; |
| 203 | + ;; |
| 204 | + 12.4) install_cuda124_stack; |
| 205 | + ;; |
| 206 | + 12.6) install_cuda126_stack; |
| 207 | + ;; |
| 208 | + 12.8) install_cuda128_stack; |
| 209 | + ;; |
| 210 | + *) echo "bad argument $1"; exit 1 |
| 211 | + ;; |
| 212 | + esac |
| 213 | + shift |
| 214 | +done |
0 commit comments