Add a script to configure the sccache client to use the build cluster

trxcllnt · trxcllnt · commit 6fd8800d9e32 · 2025-03-19T11:33:41.000-07:00
diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -15,10 +15,15 @@
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
+    "./features/src/sccache": {
+      "repository": "trxcllnt/sccache",
+      "version": "0.10.0-rapids.1"
+    },
     "./features/src/utils": {},
     "./features/src/rapids-build-utils": {}
   },
   "overrideFeatureInstallOrder": [
+    "./features/src/sccache",
     "./features/src/utils",
     "./features/src/rapids-build-utils"
   ],
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -24,11 +24,16 @@
       "installcuRAND": true,
       "installcuSPARSE": true
     },
+    "./features/src/sccache": {
+      "repository": "trxcllnt/sccache",
+      "version": "0.10.0-rapids.1"
+    },
     "./features/src/utils": {},
     "./features/src/rapids-build-utils": {}
   },
   "overrideFeatureInstallOrder": [
     "./features/src/cuda",
+    "./features/src/sccache",
     "./features/src/utils",
     "./features/src/rapids-build-utils"
   ],
diff --git a/.devcontainer/cuda12.8-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json
@@ -37,11 +37,16 @@
       "installnvJPEG": false,
       "pruneStaticLibs": true
     },
+    "./features/src/sccache": {
+      "repository": "trxcllnt/sccache",
+      "version": "0.10.0-rapids.1"
+    },
     "./features/src/utils": {},
     "./features/src/rapids-build-utils": {}
   },
   "overrideFeatureInstallOrder": [
     "./features/src/cuda",
+    "./features/src/sccache",
     "./features/src/utils",
     "./features/src/rapids-build-utils"
   ],
diff --git a/.devcontainer/cuda12.8-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json
@@ -25,11 +25,16 @@
       "installcuSPARSE": true,
       "installProfilers": true
     },
+    "./features/src/sccache": {
+      "repository": "trxcllnt/sccache",
+      "version": "0.10.0-rapids.1"
+    },
     "./features/src/utils": {},
     "./features/src/rapids-build-utils": {}
   },
   "overrideFeatureInstallOrder": [
     "./features/src/cuda",
+    "./features/src/sccache",
     "./features/src/utils",
     "./features/src/rapids-build-utils"
   ],
diff --git a/.devcontainer/rapids.Dockerfile b/.devcontainer/rapids.Dockerfile
@@ -49,9 +49,18 @@ ENV PYTHONDONTWRITEBYTECODE="1"
 
 ENV SCCACHE_REGION="us-east-2"
 ENV SCCACHE_BUCKET="rapids-sccache-devs"
-ENV SCCACHE_IDLE_TIMEOUT=900
+ENV SCCACHE_DIST_SCHEDULER_URL="https://amd64.linux.sccache.gha-runners.nvidia.com"
+ENV SCCACHE_IDLE_TIMEOUT=1800
 ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
 
 ENV HISTFILE="/home/coder/.cache/._bash_history"
 
 ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache"
+
+# Prevent the sccache server from shutting down
+ENV SCCACHE_IDLE_TIMEOUT=0
+ENV SCCACHE_SERVER_LOG="sccache=debug"
+ENV SCCACHE_S3_KEY_PREFIX=rapids-test-sccache-dist
+# Build as much in parallel as possible
+ENV INFER_NUM_DEVICE_ARCHITECTURES=1
+ENV MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=20
diff --git a/.github/workflows/build-all-rapids-repos.yml b/.github/workflows/build-all-rapids-repos.yml
@@ -27,7 +27,7 @@ jobs:
     if: needs.check-event.outputs.ok == 'true'
     needs: check-event
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.04
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@rapids-aux-secret-input
     permissions:
       actions: read
       packages: read
@@ -37,22 +37,48 @@ jobs:
     with:
       arch: '["amd64"]'
       cuda: '["12.8"]'
-      node_type: cpu32
+      node_type: cpu4
       extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
+      rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN
       build_command: |
+        # Stop the sccache client
+        SCCACHE_NO_DAEMON=1 sccache --stop-server;
+
+        # Write the sccache client build cluster configuration
+        devcontainer-utils-configure-sccache-dist - <<< "--auth-type 'token' --auth-token '$RAPIDS_AUX_SECRET_1'";
+
+        # Prevent the sccache server from shutting down
         export SCCACHE_IDLE_TIMEOUT=0
-        SCCACHE_NO_DAEMON=1 sccache --stop-server
-        sccache -z;
-        sccache --show-adv-stats;
+
+        # Disable CUB/Thrust arch-dependent namespaces, since this prevents
+        # reusing ptx and cubins from mult-iarch compilations in single-arch
+        # subset compilations.
+        for VAR in CFLAGS CXXFLAGS CUDAFLAGS; do
+          export "$VAR=${!VAR:+${!VAR} }-DCUB_DISABLE_NAMESPACE_MAGIC -DCUB_IGNORE_NAMESPACE_MAGIC_ERROR"
+          export "$VAR=${!VAR:+${!VAR} }-DTHRUST_DISABLE_ABI_NAMESPACE -DTHRUST_IGNORE_ABI_NAMESPACE_ERROR"
+        done
+
+        # Restart the sccache client with the new configuration
+        sccache --start-server;
+
+        # Verify cache and dist configuration
+        sccache -s;
+        devcontainer-utils-sccache-dist-status -c 10 -f tsv;
+
+        # Clone all the repos
         clone-all -j$(nproc) -v -q --clone-upstream --single-branch --shallow-submodules;
+
+        # Build all the repos
         build-all                        \
           -v                             \
-          -j$(nproc --ignore=1)          \
+          -j64                           \
           -DBUILD_SHARED_LIBS=ON         \
           -DBUILD_TESTS=ON               \
           -DBUILD_BENCHMARKS=ON          \
           -DBUILD_PRIMS_BENCH=ON         \
           -DRAFT_COMPILE_LIBRARY=ON      \
           -DBUILD_CUGRAPH_MG_TESTS=ON    \
           ;
+
+        # Print cache and dist stats
         sccache --show-adv-stats;
diff --git a/features/src/rapids-build-utils/devcontainer-feature.json b/features/src/rapids-build-utils/devcontainer-feature.json
@@ -1,7 +1,7 @@
 {
   "name": "NVIDIA RAPIDS devcontainer build utilities",
   "id": "rapids-build-utils",
-  "version": "25.4.4",
+  "version": "25.4.5",
   "description": "A feature to install the RAPIDS devcontainer build utilities",
   "containerEnv": {
     "BASH_ENV": "/etc/bash.bash_env"
diff --git a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/get-num-archs-jobs-and-load.sh b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/get-num-archs-jobs-and-load.sh
@@ -4,10 +4,6 @@
 #  rapids-get-num-archs-jobs-and-load [OPTION]...
 #
 # Compute an appropriate total number of jobs, load, and CUDA archs to build in parallel.
-# This routine scales the input `-j` with respect to the `-a` and `-m` values, taking into account the
-# amount of available system memory (free mem + swap), in order to balance the job and arch parallelism.
-#
-# note: This wouldn't be necessary if `nvcc` interacted with the POSIX jobserver.
 #
 # Boolean options:
 #  -h,--help                              Print this text.
@@ -18,16 +14,6 @@
 #  -j,--parallel <num>                    Run <num> parallel compilation jobs.
 #  --max-archs <num>                      Build at most <num> CUDA archs in parallel.
 #                                         (default: 3)
-#  --max-total-system-memory <num>        An upper-bound on the amount of total system memory (in GiB) to use during
-#                                         C++ and CUDA device compilations.
-#                                         Smaller values yield fewer parallel C++ and CUDA device compilations.
-#                                         (default: all available memory)
-#  --max-device-obj-memory-usage <num>    An upper-bound on the amount of memory each CUDA device object compilation
-#                                         is expected to take. This is used to estimate the number of parallel device
-#                                         object compilations that can be launched without hitting the system memory
-#                                         limit.
-#                                         Higher values yield fewer parallel CUDA device object compilations.
-#                                         (default: 1)
 
 # shellcheck disable=SC1091
 . rapids-generate-docstring;
@@ -41,33 +27,22 @@ get_num_archs_jobs_and_load() {
     # shellcheck disable=SC1091
     . devcontainer-utils-debug-output 'rapids_build_utils_debug' 'get-num-archs-jobs-and-load';
 
-    # The return value of nproc is (who knew!) constrained by the
-    # values of OMP_NUM_THREADS and/or OMP_THREAD_LIMIT
-    # Since we want the physical number of processors here, pass --all
-    local -r n_cpus="$(nproc --all)";
+    # nproc --all returns 2x the number of physical cores in Ubuntu24.04+,
+    # so instead we cound the number of processors in /proc/cpuinfo
+    local -r n_cpus="$(grep -cP 'processor\s+:' /proc/cpuinfo)";
 
     if test ${#j[@]} -gt 0 && test -z "${j:-}"; then
         j="${n_cpus}";
     fi
 
     parallel="${j:-${JOBS:-${PARALLEL_LEVEL:-1}}}";
     max_archs="${max_archs:-${MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL:-${arch:-}}}";
-    max_device_obj_memory_usage="${max_device_obj_memory_usage:-${MAX_DEVICE_OBJ_MEMORY_USAGE:-1Gi}}";
-
-    local num_re="^[0-9]+$";
-
-    # Assume un-suffixed inputs means gibibytes
-    if [[ "${max_device_obj_memory_usage}" =~ ${num_re} ]]; then
-        max_device_obj_memory_usage="${max_device_obj_memory_usage}Gi";
-    fi
-
-    max_device_obj_memory_usage="$(numfmt --from=auto "${max_device_obj_memory_usage}")";
 
     local n_arch="${archs:-1}";
 
-    # currently: 70-real;75-real;80-real;86-real;90
-    # see: https://github.com/rapidsai/rapids-cmake/blob/branch-24.04/rapids-cmake/cuda/set_architectures.cmake#L54
-    local n_arch_rapids=5;
+    # currently: 70-real;75-real;80-real;86-real;90-real;100-real;120
+    # see: https://github.com/rapidsai/rapids-cmake/blob/branch-25.04/rapids-cmake/cuda/set_architectures.cmake#L59
+    local n_arch_rapids=7;
 
     if test -z "${archs:-}" \
     && test -n "${INFER_NUM_DEVICE_ARCHITECTURES:-}"; then
@@ -102,31 +77,8 @@ get_num_archs_jobs_and_load() {
         n_arch=$((n_arch > max_archs ? max_archs : n_arch));
     fi
 
-    local mem_for_device_objs="$((n_arch * max_device_obj_memory_usage))";
-    local mem_total="${max_total_system_memory:-${MAX_TOTAL_SYSTEM_MEMORY:-}}";
-
-    if test -z "${mem_total}"; then
-        local -r free_mem="$(free --bytes | grep -E '^Mem:' | tr -s '[:space:]' | cut -d' ' -f7 || echo '0')";
-        local -r freeswap="$(free --bytes | grep -E '^Swap:' | tr -s '[:space:]' | cut -d' ' -f4 || echo '0')";
-        mem_total="$((free_mem + freeswap))";
-    # Assume un-suffixed inputs means gibibytes
-    elif [[ "${mem_total}" =~ ${num_re} ]]; then
-        mem_total="${mem_total}Gi";
-    fi
-    mem_total="$(numfmt --from=auto "${mem_total}")";
-
     local n_load=$((parallel > n_cpus ? n_cpus : parallel));
-    # shellcheck disable=SC2155
-    local n_jobs="$(
-        echo "
-scale=0
-max_cpu=(${n_load} / ${n_arch} / 2 * 3)
-max_mem=(${mem_total} / ${mem_for_device_objs})
-if(max_cpu < max_mem) max_cpu else max_mem
-" | bc
-    )"
-    n_jobs=$((n_jobs < 1 ? 1 : n_jobs));
-    n_jobs=$((n_arch > 1 ? n_jobs : n_load));
+    local n_jobs="$((parallel < 1 ? 1 : parallel))";
 
     echo "declare n_arch=${n_arch}";
     echo "declare n_jobs=${n_jobs}";
diff --git a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp.configure.tmpl.sh b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp.configure.tmpl.sh
@@ -57,6 +57,7 @@ configure_${CPP_LIB}_cpp() {
 
     time (
         export ${CPP_ENV} PATH="$PATH";
+        SCCACHE_NO_DIST_COMPILE=1                         \
         CUDAFLAGS="${CUDAFLAGS:+$CUDAFLAGS }-t=${n_arch}" \
             cmake "${cmake_args[@]}";
         { set +x; } 2>/dev/null; echo -n "lib${CPP_LIB} configure time:";
diff --git a/features/src/utils/devcontainer-feature.json b/features/src/utils/devcontainer-feature.json
@@ -1,7 +1,7 @@
 {
   "name": "devcontainer-utils",
   "id": "utils",
-  "version": "25.4.0",
+  "version": "25.4.1",
   "description": "A feature to install RAPIDS devcontainer utility scripts",
   "containerEnv": {
     "BASH_ENV": "/etc/bash.bash_env"
diff --git a/features/src/utils/install.sh b/features/src/utils/install.sh
@@ -104,6 +104,8 @@ declare -a commands_and_sources=(
     "init-gitlab-cli                    gitlab/cli/init.sh"
     "clone-gitlab-repo                  gitlab/repo/clone.sh"
     "print-missing-gitlab-token-warning gitlab/print-missing-token-warning.sh"
+    "configure-sccache-dist             sccache-dist/configure.sh"
+    "sccache-dist-status                sccache-dist/status.sh"
 )
 
 # Install alternatives
diff --git a/features/src/utils/opt/devcontainer/bin/github/cli/init.sh b/features/src/utils/opt/devcontainer/bin/github/cli/init.sh
@@ -31,7 +31,7 @@ init_github_cli() {
       | tr -d ','                             \
     )";
 
-    local needed_scopes="read:org";
+    local needed_scopes="read:org read:enterprise";
 
     needed_scopes="$(                                                     \
       comm -23                                                            \
diff --git a/features/src/utils/opt/devcontainer/bin/post-attach-command.sh b/features/src/utils/opt/devcontainer/bin/post-attach-command.sh
@@ -5,4 +5,11 @@ if test -z "${SKIP_DEVCONTAINER_UTILS_POST_ATTACH_COMMAND:-}"; then
     . devcontainer-utils-init-git-interactive;
     # shellcheck disable=SC1091
     . devcontainer-utils-creds-s3-init;
+
+    if test -n "${ENABLE_SCCACHE_DIST+}"; then
+        devcontainer-utils-configure-sccache-dist - <<< "           \
+            --auth-type 'token'                                     \
+            --auth-token '${SCCACHE_DIST_TOKEN:-$(gh auth token)}'  \
+        ";
+    fi
 fi
diff --git a/features/src/utils/opt/devcontainer/bin/sccache-dist/configure.sh b/features/src/utils/opt/devcontainer/bin/sccache-dist/configure.sh
diff --git a/features/src/utils/opt/devcontainer/bin/sccache-dist/status.sh b/features/src/utils/opt/devcontainer/bin/sccache-dist/status.sh
diff --git a/image/.devcontainer/devcontainer.json b/image/.devcontainer/devcontainer.json

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "NVIDIA RAPIDS devcontainer build utilities",`
`3`	`3`	`"id": "rapids-build-utils",`
`4`		`- "version": "25.4.4",`
	`4`	`+ "version": "25.4.5",`
`5`	`5`	`"description": "A feature to install the RAPIDS devcontainer build utilities",`
`6`	`6`	`"containerEnv": {`
`7`	`7`	`"BASH_ENV": "/etc/bash.bash_env"`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "devcontainer-utils",`
`3`	`3`	`"id": "utils",`
`4`		`- "version": "25.4.0",`
	`4`	`+ "version": "25.4.1",`
`5`	`5`	`"description": "A feature to install RAPIDS devcontainer utility scripts",`
`6`	`6`	`"containerEnv": {`
`7`	`7`	`"BASH_ENV": "/etc/bash.bash_env"`
Original file line number	Diff line number	Diff line change
`@@ -104,6 +104,8 @@ declare -a commands_and_sources=(`
`104`	`104`	`"init-gitlab-cli gitlab/cli/init.sh"`
`105`	`105`	`"clone-gitlab-repo gitlab/repo/clone.sh"`
`106`	`106`	`"print-missing-gitlab-token-warning gitlab/print-missing-token-warning.sh"`
	`107`	`+ "configure-sccache-dist sccache-dist/configure.sh"`
	`108`	`+ "sccache-dist-status sccache-dist/status.sh"`
`107`	`109`	`)`
`108`	`110`
`109`	`111`	`# Install alternatives`