diff --git a/.github/container/Dockerfile.base b/.github/container/Dockerfile.base index db14f684e..1954b352d 100644 --- a/.github/container/Dockerfile.base +++ b/.github/container/Dockerfile.base @@ -162,6 +162,13 @@ RUN install-nccl-sanity-check.sh ADD jax-nccl-test parallel-launch /usr/local/bin/ +############################################################################## +## Install a newer Nsight Systems version than is included in cuda-dl-base +############################################################################## + +ADD install-nsight-systems.sh /usr/local/bin +RUN install-nsight-systems.sh + ############################################################################### ## Install the nsys-jax JAX/XLA-aware profiling scripts, patch Nsight Systems ############################################################################### diff --git a/.github/container/install-nsight-systems.sh b/.github/container/install-nsight-systems.sh new file mode 100755 index 000000000..1ca264182 --- /dev/null +++ b/.github/container/install-nsight-systems.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -exuo pipefail + +# Remove the symlink that makes `nsys` refer to the CUDA-bundled version: +rm /usr/local/cuda/bin/nsys + +# Repo for newer nsight versions +UBUNTU_ARCH=$(dpkg --print-architecture) +UBUNTU_VERSION=$(. /etc/os-release && echo ${ID}${VERSION_ID/./}) # e.g. ubuntu2204 +DEVTOOLS_URL=https://developer.download.nvidia.com/devtools/repos/${UBUNTU_VERSION}/${UBUNTU_ARCH} +curl -o /usr/share/keyrings/nvidia.pub "${DEVTOOLS_URL}/nvidia.pub" +echo "deb [signed-by=/usr/share/keyrings/nvidia.pub] ${DEVTOOLS_URL}/ /" > /etc/apt/sources.list.d/devtools-${UBUNTU_VERSION}-${UBUNTU_ARCH}.list + +export DEBIAN_FRONTEND=noninteractive +export TZ=America/Los_Angeles + +apt-get update +apt-get install -y nsight-systems-cli-2025.3.1 +apt-get clean + +rm -rf /var/lib/apt/lists/*