Skip to content

Commit b7d8a75

Browse files
committed
add devcontainer-utils-init-sccache-dist script
1 parent 6fd8800 commit b7d8a75

File tree

6 files changed

+153
-81
lines changed

6 files changed

+153
-81
lines changed

.devcontainer/rapids.Dockerfile

+3
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ ENV PYTHONDONTWRITEBYTECODE="1"
4949

5050
ENV SCCACHE_REGION="us-east-2"
5151
ENV SCCACHE_BUCKET="rapids-sccache-devs"
52+
ENV SCCACHE_DIST_CONNECT_TIMEOUT=30
53+
ENV SCCACHE_DIST_REQUEST_TIMEOUT=1800
5254
ENV SCCACHE_DIST_SCHEDULER_URL="https://amd64.linux.sccache.gha-runners.nvidia.com"
5355
ENV SCCACHE_IDLE_TIMEOUT=1800
5456
ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
@@ -61,6 +63,7 @@ ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAG
6163
ENV SCCACHE_IDLE_TIMEOUT=0
6264
ENV SCCACHE_SERVER_LOG="sccache=debug"
6365
ENV SCCACHE_S3_KEY_PREFIX=rapids-test-sccache-dist
66+
6467
# Build as much in parallel as possible
6568
ENV INFER_NUM_DEVICE_ARCHITECTURES=1
6669
ENV MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=20

.github/workflows/build-all-rapids-repos.yml

+17-14
Original file line numberDiff line numberDiff line change
@@ -41,29 +41,32 @@ jobs:
4141
extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
4242
rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN
4343
build_command: |
44-
# Stop the sccache client
45-
SCCACHE_NO_DAEMON=1 sccache --stop-server;
46-
47-
# Write the sccache client build cluster configuration
48-
devcontainer-utils-configure-sccache-dist - <<< "--auth-type 'token' --auth-token '$RAPIDS_AUX_SECRET_1'";
49-
5044
# Prevent the sccache server from shutting down
5145
export SCCACHE_IDLE_TIMEOUT=0
5246
53-
# Disable CUB/Thrust arch-dependent namespaces, since this prevents
54-
# reusing ptx and cubins from mult-iarch compilations in single-arch
55-
# subset compilations.
47+
# Infinitely retry transient errors
48+
export SCCACHE_DIST_MAX_RETRIES=inf
49+
50+
# Never fallback to locally compiling
51+
export SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false
52+
53+
# Disable CUB/Thrust arch-dependent namespaces.
54+
# These prevent reusing ptx and cubins from multi-arch
55+
# compilations in single-arch subset compilations.
5656
for VAR in CFLAGS CXXFLAGS CUDAFLAGS; do
5757
export "$VAR=${!VAR:+${!VAR} }-DCUB_DISABLE_NAMESPACE_MAGIC -DCUB_IGNORE_NAMESPACE_MAGIC_ERROR"
5858
export "$VAR=${!VAR:+${!VAR} }-DTHRUST_DISABLE_ABI_NAMESPACE -DTHRUST_IGNORE_ABI_NAMESPACE_ERROR"
5959
done
6060
61-
# Restart the sccache client with the new configuration
62-
sccache --start-server;
61+
# Configure the sccache client to talk to the build cluster
62+
devcontainer-utils-init-sccache-dist \
63+
--enable-sccache-dist \
64+
- <<< "--auth-token '$RAPIDS_AUX_SECRET_1' \
65+
--auth-type 'token' \
66+
";
6367
64-
# Verify cache and dist configuration
65-
sccache -s;
66-
devcontainer-utils-sccache-dist-status -c 10 -f tsv;
68+
# Verify sccache cache location
69+
sccache --show-adv-stats;
6770
6871
# Clone all the repos
6972
clone-all -j$(nproc) -v -q --clone-upstream --single-branch --shallow-submodules;

features/src/utils/install.sh

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ declare -a commands_and_sources=(
104104
"init-gitlab-cli gitlab/cli/init.sh"
105105
"clone-gitlab-repo gitlab/repo/clone.sh"
106106
"print-missing-gitlab-token-warning gitlab/print-missing-token-warning.sh"
107+
"init-sccache-dist sccache-dist/init.sh"
107108
"configure-sccache-dist sccache-dist/configure.sh"
108109
"sccache-dist-status sccache-dist/status.sh"
109110
)
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
#! /usr/bin/env bash
2+
set -x
23

34
if test -z "${SKIP_DEVCONTAINER_UTILS_POST_ATTACH_COMMAND:-}"; then
45
# shellcheck disable=SC1091
56
. devcontainer-utils-init-git-interactive;
67
# shellcheck disable=SC1091
78
. devcontainer-utils-creds-s3-init;
8-
9-
if test -n "${ENABLE_SCCACHE_DIST+}"; then
10-
devcontainer-utils-configure-sccache-dist - <<< " \
11-
--auth-type 'token' \
12-
--auth-token '${SCCACHE_DIST_TOKEN:-$(gh auth token)}' \
13-
";
14-
fi
9+
# Update sccache client configuration to enable/disable sccache-dist
10+
devcontainer-utils-init-sccache-dist \
11+
${DEVCONTAINER_UTILS_ENABLE_SCCACHE_DIST:+--enable-sccache-dist} \
12+
- <<< "--auth-token '${SCCACHE_DIST_TOKEN:-$(gh auth token)}' \
13+
--auth-type 'token' \
14+
";
1515
fi

features/src/utils/opt/devcontainer/bin/sccache-dist/configure.sh

+54-60
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
#
1515
# Options that require values:
1616
# --auth-type (token|oauth2_code_grant_pkce|oauth2_implicit) Auth type for build cluster auth.
17-
# (default: "token")
17+
# Disables client auth if omitted (not recommended).
1818
# --auth-token <token> Token used to authenticate with the build cluster when `--auth-type=token`.
19-
# --client-id <id> Client ID used to authenticate with the build cluster when `--auth-type=oauth2_code_grant_pkce|oauth2_implicit`.
20-
# --auth-url <url> Auth URL used to authenticate with the build cluster when `--auth-type=oauth2_code_grant_pkce|oauth2_implicit`.
19+
# --client-id <id> Client ID used to authenticate with the build cluster when `--auth-type=(oauth2_code_grant_pkce|oauth2_implicit)`.
20+
# --auth-url <url> Auth URL used to authenticate with the build cluster when `--auth-type=(oauth2_code_grant_pkce|oauth2_implicit)`.
2121
# --token-url <url> Token URL used to authenticate with the build cluster when `--auth-type=oauth2_code_grant_pkce`.
2222
# --connect-timeout <num> The sccache client HTTP connection timeout.
2323
# (default: 30)
@@ -40,28 +40,24 @@ _configure_sccache_dist() {
4040
. devcontainer-utils-debug-output 'devcontainer_utils_debug' 'sccache configure-sccache-dist';
4141

4242
auth_type="${auth_type-}";
43-
auth_token="${auth_token-}";
44-
max_retries="${max_retries:-${SCCACHE_DIST_MAX_RETRIES:-0}}";
45-
scheduler_url="${scheduler_url:-${SCCACHE_DIST_SCHEDULER_URL:-}}";
46-
connect_timeout="${connect_timeout:-${SCCACHE_DIST_CONNECT_TIMEOUT:-30}}";
47-
request_timeout="${request_timeout:-${SCCACHE_DIST_REQUEST_TIMEOUT:-1800}}";
43+
max_retries="${max_retries:-${SCCACHE_DIST_MAX_RETRIES-}}";
44+
scheduler_url="${scheduler_url:-${SCCACHE_DIST_SCHEDULER_URL-}}";
45+
connect_timeout="${connect_timeout:-${SCCACHE_DIST_CONNECT_TIMEOUT-}}";
46+
request_timeout="${request_timeout:-${SCCACHE_DIST_REQUEST_TIMEOUT-}}";
4847

49-
# local os="$(uname -s)";
50-
# local arch="$(dpkg --print-architecture)";
51-
52-
local connection_pool="false";
48+
local connection_pool="${SCCACHE_DIST_CONNECTION_POOL-}";
5349

5450
if test -n "${use_connection_pool+x}"; then
5551
connection_pool="true";
5652
fi
5753

58-
local fallback_to_local_compile="true";
54+
local fallback_to_local_compile="${SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE-}";
5955

6056
if test -n "${no_local_compile_fallback+x}"; then
6157
fallback_to_local_compile="false";
6258
fi
6359

64-
local sccache_conf="$HOME/.config/sccache/config";
60+
local sccache_conf="${SCCACHE_CONF:-"$HOME/.config/sccache/config"}";
6561

6662
mkdir -p "$(dirname "$sccache_conf")";
6763
touch "$sccache_conf";
@@ -94,54 +90,52 @@ _configure_sccache_dist() {
9490
sed -r '/^(\[dist.*\]|dist\.)/,/^$/d' "${sccache_conf}" > "${sccache_conf}.new";
9591

9692
# Write our new values
97-
cat <<EOF >> "${sccache_conf}.new"
98-
[dist]
99-
max_retries = ${max_retries}
100-
fallback_to_local_compile = ${fallback_to_local_compile}
101-
scheduler_url = "${scheduler_url}"
102-
103-
EOF
104-
105-
cat <<EOF >> "${sccache_conf}.new"
106-
[dist.net]
107-
connection_pool = ${connection_pool}
108-
connect_timeout = ${connect_timeout}
109-
request_timeout = ${request_timeout}
110-
111-
EOF
112-
113-
if test "${auth_type}" = token; then
114-
if ! grep -qE "^$" <<< "${auth_token-}"; then
115-
cat <<EOF >> "${sccache_conf}.new"
116-
[dist.auth]
117-
type = "${auth_type}"
118-
token = "${auth_token}"
119-
120-
EOF
93+
if ! grep -qE "^$" <<< "${scheduler_url}"; then
94+
cat <<< "" >> "${sccache_conf}.new";
95+
cat <<< "[dist]" >> "${sccache_conf}.new";
96+
cat <<< "scheduler_url = \"${scheduler_url}\"" >> "${sccache_conf}.new";
97+
if ! grep -qE "^$" <<< "${max_retries}"; then
98+
cat <<< "max_retries = ${max_retries}" >> "${sccache_conf}.new";
99+
fi
100+
if ! grep -qE "^$" <<< "${fallback_to_local_compile}"; then
101+
cat <<< "fallback_to_local_compile = ${fallback_to_local_compile}" >> "${sccache_conf}.new";
102+
fi
103+
104+
cat <<< "[dist.net]" >> "${sccache_conf}.new";
105+
if ! grep -qE "^$" <<< "${connection_pool}"; then
106+
cat <<< "connection_pool = ${connection_pool}" >> "${sccache_conf}.new";
107+
fi
108+
if ! grep -qE "^$" <<< "${connect_timeout}"; then
109+
cat <<< "connect_timeout = "${connect_timeout}"" >> "${sccache_conf}.new";
121110
fi
122-
elif test "${auth_type}" = oauth2_implicit; then
123-
if ! grep -qE "^$" <<< "${auth_url-}" \
124-
&& ! grep -qE "^$" <<< "${client_id-}"; then
125-
cat <<EOF >> "${sccache_conf}.new"
126-
[dist.auth]
127-
type = "${auth_type}"
128-
auth_url = "${auth_url}"
129-
client_id = "${client_id}"
130-
131-
EOF
111+
if ! grep -qE "^$" <<< "${request_timeout}"; then
112+
cat <<< "request_timeout = "${request_timeout}"" >> "${sccache_conf}.new";
132113
fi
133-
elif test "${auth_type}" = oauth2_code_grant_pkce; then
134-
if ! grep -qE "^$" <<< "${auth_url-}" \
135-
&& ! grep -qE "^$" <<< "${client_id-}" \
136-
&& ! grep -qE "^$" <<< "${token_url-}"; then
137-
cat <<EOF >> "${sccache_conf}.new"
138-
[dist.auth]
139-
type = "${auth_type}"
140-
auth_url = "${auth_url}"
141-
client_id = "${client_id}"
142-
token_url = "${token_url}"
143-
144-
EOF
114+
115+
if test "${auth_type}" = token; then
116+
if ! grep -qE "^$" <<< "${auth_token-}"; then
117+
cat <<< "[dist.auth]" >> "${sccache_conf}.new";
118+
cat <<< "type = \"${auth_type}\"" >> "${sccache_conf}.new";
119+
cat <<< "token = \"${auth_token}\"" >> "${sccache_conf}.new";
120+
fi
121+
elif test "${auth_type}" = oauth2_implicit; then
122+
if ! grep -qE "^$" <<< "${auth_url-}" \
123+
&& ! grep -qE "^$" <<< "${client_id-}"; then
124+
cat <<< "[dist.auth]" >> "${sccache_conf}.new";
125+
cat <<< "type = \"${auth_type}\"" >> "${sccache_conf}.new";
126+
cat <<< "auth_url = \"${auth_url}\"" >> "${sccache_conf}.new";
127+
cat <<< "client_id = \"${client_id}\"" >> "${sccache_conf}.new";
128+
fi
129+
elif test "${auth_type}" = oauth2_code_grant_pkce; then
130+
if ! grep -qE "^$" <<< "${auth_url-}" \
131+
&& ! grep -qE "^$" <<< "${client_id-}" \
132+
&& ! grep -qE "^$" <<< "${token_url-}"; then
133+
cat <<< "[dist.auth]" >> "${sccache_conf}.new";
134+
cat <<< "type = \"${auth_type}\"" >> "${sccache_conf}.new";
135+
cat <<< "auth_url = \"${auth_url}\"" >> "${sccache_conf}.new";
136+
cat <<< "client_id = \"${client_id}\"" >> "${sccache_conf}.new";
137+
cat <<< "token_url = \"${token_url}\"" >> "${sccache_conf}.new";
138+
fi
145139
fi
146140
fi
147141
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/usr/bin/env bash
2+
3+
# Usage:
4+
# devcontainer-utils-init-sccache-dist [OPTION]...
5+
#
6+
# Generate and write sccache-dist configuration to `$HOME/.config/sccache/config`,
7+
# then restart sccache to ensure the configuration is applied.
8+
#
9+
# Boolean options:
10+
# -h,--help Print this text.
11+
# -e,--enable-sccache-dist Enable sccache-dist. If omitted, disable sccache-dist.
12+
# (default: false)
13+
# --no-local-compile-fallback Disable building locally after retrying transient sccache-dist errors.
14+
# (default: false)
15+
# --use-connection-pool Enable sccache client HTTP connection pool.
16+
# (default: false)
17+
#
18+
# Options that require values:
19+
# --auth-type (token|oauth2_code_grant_pkce|oauth2_implicit) Auth type for build cluster auth.
20+
# (default: token)
21+
# --auth-token <token> Token used to authenticate with the build cluster when `--auth-type=token`.
22+
# --client-id <id> Client ID used to authenticate with the build cluster when `--auth-type=(oauth2_code_grant_pkce|oauth2_implicit)`.
23+
# --auth-url <url> Auth URL used to authenticate with the build cluster when `--auth-type=(oauth2_code_grant_pkce|oauth2_implicit)`.
24+
# --token-url <url> Token URL used to authenticate with the build cluster when `--auth-type=oauth2_code_grant_pkce`.
25+
# --connect-timeout <num> The sccache client HTTP connection timeout.
26+
# (default: 30)
27+
# --request-timeout <num> The sccache client HTTP request timeout.
28+
# (default: 1800)
29+
# --scheduler-url <url> URL of the sccache-dist build cluster.
30+
# --max-retries <count> Maximum number of times to retry transient sccache-dist errors.
31+
# Pass `--max-retries inf` to retry infinitely.
32+
# Combining `--max-retries inf` with `--no-local-compile-fallback`
33+
# ensures the sccache client relies exclusively on the build cluster.
34+
# (default: 0)
35+
36+
_init_sccache_dist() {
37+
local -;
38+
set -euo pipefail;
39+
40+
# shellcheck disable=SC1091
41+
. devcontainer-utils-debug-output 'devcontainer_utils_debug' 'sccache init-sccache-dist';
42+
43+
eval "$(devcontainer-utils-parse-args "$0" --take '-e,--enable-sccache-dist' "$@")";
44+
45+
# Stop the sccache client
46+
sccache --stop-server >/dev/null 2>&1 || true;
47+
48+
if test -n "${enable_sccache_dist+x}"; then
49+
# Add sccache-dist configuration to ~/.config/sccache/config
50+
devcontainer-utils-configure-sccache-dist "${OPTS[@]}" - <&0;
51+
else
52+
# Delete sccache-dist configuration from ~/.config/sccache/config
53+
SCCACHE_DIST_MAX_RETRIES= \
54+
SCCACHE_DIST_SCHEDULER_URL= \
55+
SCCACHE_DIST_CONNECT_TIMEOUT= \
56+
SCCACHE_DIST_REQUEST_TIMEOUT= \
57+
SCCACHE_DIST_CONNECTION_POOL= \
58+
SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE= \
59+
devcontainer-utils-configure-sccache-dist;
60+
fi
61+
62+
# Restart the sccache client with the new configuration
63+
sccache --start-server >/dev/null 2>&1 || true;
64+
65+
# Verify sccache-dist status and configuration
66+
if sccache --dist-status | jq -er '.SchedulerStatus? != null' >/dev/null 2>&1; then
67+
devcontainer-utils-sccache-dist-status -c 10 -f tsv;
68+
fi
69+
}
70+
71+
_init_sccache_dist "$@" <&0;

0 commit comments

Comments
 (0)