Skip to content

Commit 8077ce6

Browse files
authored
Add a script to launch a self-hosted GitHub runner (#116)
* Add a script to launch a self-hosted GitHub runner Signed-off-by: Fabrice Normandin <[email protected]> * Dont set --ephemeral, and dont exit if configured Signed-off-by: Fabrice Normandin <[email protected]> * Adjust the local_actions_runner.sh script Signed-off-by: Fabrice Normandin <[email protected]> * Actually use the new script with github actions Signed-off-by: Fabrice Normandin <[email protected]> * Change comments in the build workflow Signed-off-by: Fabrice Normandin <[email protected]> --------- Signed-off-by: Fabrice Normandin <[email protected]>
1 parent f49651d commit 8077ce6

File tree

4 files changed

+210
-79
lines changed

4 files changed

+210
-79
lines changed

.github/actions-runner-job.sh

Lines changed: 0 additions & 72 deletions
This file was deleted.

.github/launch_actions_runner.sh

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/bin/bash
2+
#SBATCH --nodes=1
3+
#SBATCH --ntasks=1
4+
#SBATCH --cpus-per-task=4
5+
#SBATCH --mem=32G
6+
#SBATCH --gpus=1
7+
#SBATCH --time=00:30:00
8+
#SBATCH --dependency=singleton
9+
#SBATCH --output=logs/runner_%j.out
10+
11+
set -euo pipefail
12+
## This script can be used to launch a new self-hosted GitHub runner.
13+
## It assumes that the SH_TOKEN environment variable contains a GitHub token
14+
## that is used to authenticate with the GitHub API in order to allow launching a new runner.
15+
set -euo pipefail
16+
set -o errexit
17+
set -o nounset
18+
19+
readonly repo="mila-iqia/ResearchTemplate"
20+
readonly action_runner_version="2.317.0"
21+
readonly expected_checksum_for_version="9e883d210df8c6028aff475475a457d380353f9d01877d51cc01a17b2a91161d"
22+
23+
# Check for required commands.
24+
for cmd in curl tar uvx; do
25+
if ! command -v $cmd &> /dev/null; then
26+
echo "Error: $cmd is not installed."
27+
exit 1
28+
fi
29+
done
30+
31+
if [ -z "${SH_TOKEN:-}" ]; then
32+
echo "Error: SH_TOKEN environment variable is not set."
33+
echo "This script requires the SH_TOKEN environment variable be set to a GitHub token with permissions to create new self-hosted runners for the current repository."
34+
echo "To create this token, Follow the docs here: "
35+
echo " - https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token"
36+
echo " - and click here to create the new token: https://github.com/settings/personal-access-tokens/new"
37+
echo "The fine-grained token must have the 'Administration - repository permissions (write)' scope."
38+
exit 1
39+
fi
40+
41+
# If we're on a SLURM cluster, download the archive to SCRATCH, but use the SLURM_TMPDIR as the working directory.
42+
# Otherwise, use $HOME/scratch as the working directory.
43+
WORKDIR="${SCRATCH:-$HOME}/actions-runners/$repo"
44+
mkdir -p "$WORKDIR"
45+
cd "$WORKDIR"
46+
47+
echo "Setting up self-hosted runner in $WORKDIR"
48+
archive="actions-runner-linux-x64-$action_runner_version.tar.gz"
49+
50+
# Look for the actions-runner archive. Download it if it doesn't exist.
51+
if [ ! -f "$archive" ]; then
52+
curl --fail -o "$archive" \
53+
-L "https://github.com/actions/runner/releases/download/v$action_runner_version/$archive"
54+
fi
55+
56+
# Check the archive integrity.
57+
echo "$expected_checksum_for_version $archive" | shasum -a 256 -c
58+
# Extract the installer
59+
tar xzf $archive
60+
# Use the GitHub API to get a temporary registration token for a new self-hosted runner.
61+
# This requires you to be an admin of the repository and to have the $SH_TOKEN secret set to a
62+
# github token with (ideally only) the appropriate permissions.
63+
# https://docs.github.com/en/rest/actions/self-hosted-runners?apiVersion=2022-11-28#create-a-registration-token-for-a-repository
64+
# Example output:
65+
# {
66+
# "token": "XXXXX",
67+
# "expires_at": "2020-01-22T12:13:35.123-08:00"
68+
# }
69+
t=$(tempfile) || exit
70+
trap "rm -f -- '$t'" EXIT
71+
72+
# Write headers to the tempfile
73+
cat <<EOF > "$t"
74+
Accept: application/vnd.github+json
75+
Authorization: Bearer $SH_TOKEN
76+
X-GitHub-Api-Version: 2022-11-28
77+
EOF
78+
79+
# Uses `uvx python` to just get python. Assumes that `uv` is already installed.
80+
TOKEN=`curl --fail -L \
81+
-X POST \
82+
-H @$t \
83+
https://api.github.com/repos/$repo/actions/runners/registration-token | \
84+
uvx python -c "import sys, json; print(json.load(sys.stdin)['token'])"`
85+
86+
rm -f -- "$t"
87+
trap - EXIT
88+
89+
90+
# Create the runner and configure it programmatically with the token we just got
91+
# from the GitHub API.
92+
# TODO: Reconfigure it if it doesn't already exist? Or only configure it once?
93+
# TODO: use --ephemeral to run only one job and exit? Or set it up to keep running?
94+
# For now, don't exit if the runner is already configured, and enable more than one job.
95+
./config.sh --url https://github.com/$repo --token $TOKEN \
96+
--unattended --replace --labels self-hosted || true
97+
98+
# BUG: Seems weird that we'd have to export those ourselves. Shouldn't they be set already?
99+
export GITHUB_ACTIONS="true"
100+
export RUNNER_LABELS="self-hosted"
101+
102+
# Launch the actions runner.
103+
exec ./run.sh
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#!/bin/bash
2+
#SBATCH --nodes=1
3+
#SBATCH --ntasks=1
4+
#SBATCH --cpus-per-task=4
5+
#SBATCH --mem=32G
6+
#SBATCH --gpus=1
7+
#SBATCH --time=00:30:00
8+
#SBATCH --dependency=singleton
9+
#SBATCH --output=logs/runner_%j.out
10+
11+
## This script can be used to launch a new self-hosted GitHub runner.
12+
## It assumes that the SH_TOKEN environment variable contains a GitHub token
13+
## that is used to authenticate with the GitHub API in order to allow launching a new runner.
14+
set -euo pipefail
15+
set -o errexit
16+
set -o nounset
17+
18+
readonly repo="{{github_user}}/{{project_name}}"
19+
readonly action_runner_version="2.317.0"
20+
readonly expected_checksum_for_version="9e883d210df8c6028aff475475a457d380353f9d01877d51cc01a17b2a91161d"
21+
22+
# Check for required commands.
23+
for cmd in curl tar uvx; do
24+
if ! command -v $cmd &> /dev/null; then
25+
echo "Error: $cmd is not installed."
26+
exit 1
27+
fi
28+
done
29+
30+
if [ -z "${SH_TOKEN:-}" ]; then
31+
echo "Error: SH_TOKEN environment variable is not set."
32+
echo "This script requires the SH_TOKEN environment variable be set to a GitHub token with permissions to create new self-hosted runners for the current repository."
33+
echo "To create this token, Follow the docs here: "
34+
echo " - https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token"
35+
echo " - and click here to create the new token: https://github.com/settings/personal-access-tokens/new"
36+
echo "The fine-grained token must have the 'Administration - repository permissions (write)' scope."
37+
exit 1
38+
fi
39+
40+
# If we're on a SLURM cluster, download the archive to SCRATCH, but use the SLURM_TMPDIR as the working directory.
41+
# Otherwise, use $HOME/scratch as the working directory.
42+
WORKDIR="${SCRATCH:-$HOME}/actions-runners/$repo"
43+
mkdir -p "$WORKDIR"
44+
cd "$WORKDIR"
45+
46+
echo "Setting up self-hosted runner in $WORKDIR"
47+
archive="actions-runner-linux-x64-$action_runner_version.tar.gz"
48+
49+
# Look for the actions-runner archive. Download it if it doesn't exist.
50+
if [ ! -f "$archive" ]; then
51+
curl --fail -o "$archive" \
52+
-L "https://github.com/actions/runner/releases/download/v$action_runner_version/$archive"
53+
fi
54+
55+
# Check the archive integrity.
56+
echo "$expected_checksum_for_version $archive" | shasum -a 256 -c
57+
# Extract the installer
58+
tar xzf $archive
59+
# Use the GitHub API to get a temporary registration token for a new self-hosted runner.
60+
# This requires you to be an admin of the repository and to have the $SH_TOKEN secret set to a
61+
# github token with (ideally only) the appropriate permissions.
62+
# https://docs.github.com/en/rest/actions/self-hosted-runners?apiVersion=2022-11-28#create-a-registration-token-for-a-repository
63+
# Example output:
64+
# {
65+
# "token": "XXXXX",
66+
# "expires_at": "2020-01-22T12:13:35.123-08:00"
67+
# }
68+
t=$(tempfile) || exit
69+
trap "rm -f -- '$t'" EXIT
70+
71+
# Write headers to the tempfile
72+
cat <<EOF > "$t"
73+
Accept: application/vnd.github+json
74+
Authorization: Bearer $SH_TOKEN
75+
X-GitHub-Api-Version: 2022-11-28
76+
EOF
77+
78+
# Uses `uvx python` to just get python. Assumes that `uv` is already installed.
79+
TOKEN=`curl --fail -L \
80+
-X POST \
81+
-H @$t \
82+
https://api.github.com/repos/$repo/actions/runners/registration-token | \
83+
uvx python -c "import sys, json; print(json.load(sys.stdin)['token'])"`
84+
85+
rm -f -- "$t"
86+
trap - EXIT
87+
88+
# Create the runner and configure it programmatically with the token we just got
89+
# from the GitHub API.
90+
# TODO: Reconfigure it if it doesn't already exist? Or only configure it once?
91+
# TODO: use --ephemeral to run only one job and exit? Or set it up to keep running?
92+
# For now, don't exit if the runner is already configured, and enable more than one job.
93+
./config.sh --url https://github.com/$repo --token $TOKEN \
94+
--unattended --replace --labels self-hosted || true
95+
96+
# BUG: Seems weird that we'd have to export those ourselves. Shouldn't they be set already?
97+
export GITHUB_ACTIONS="true"
98+
export RUNNER_LABELS="self-hosted"
99+
100+
# Launch the actions runner.
101+
exec ./run.sh

.github/workflows/build.yml

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,18 +104,17 @@ jobs:
104104
# note: The script will be overwritten by different CI runs, but it shouldn't really
105105
# change, so not a big deal.
106106
# todo: there are some assumptions about the GPU type to use in that script.
107-
run: "scp .github/actions-runner-job.sh ${{ matrix.cluster }}:actions-runner-job.sh"
107+
run: "scp .github/launch_actions_runner.sh ${{ matrix.cluster }}:launch_actions_runner.sh"
108108

109109
- name: Launch Slurm Actions Runner
110110
id: sbatch
111-
# TODO: for DRAC clusters, the account would need to be passed somehow (and obviously not be hard-coded here).
112-
# Maybe the account name on DRAC could be stored as a GitHub secret? But this is fishy.
113-
# Output the job ID to a file so that the next step can use it.
114-
# NOTE: Could also use the --wait flag to wait for the job to finish (and have this run at the same time as the other step).
115-
# TODO: Hard-coded mila-specific GPU to use for the tests.
111+
# TODO: for DRAC clusters, we could set the default slurm account to use with a
112+
# export SLURM_ACCOUNT=... in ~/.bash_aliases.
113+
# TODO: Hard-coded mila-specific GPU to use for running tests.
116114
# This isn't great, but currently necessary for reproducibility tests.
115+
# Output the job ID to a file so that the next step can use it.
117116
run: |
118-
job_id=`ssh ${{ matrix.cluster }} 'cd $SCRATCH && sbatch --gpus=rtx8000:1 --parsable $HOME/actions-runner-job.sh'`
117+
job_id=`ssh ${{ matrix.cluster }} 'cd $SCRATCH && sbatch --gpus=rtx8000:1 --parsable $HOME/launch_actions_runner.sh'`
119118
echo "Submitted job $job_id on the ${{ matrix.cluster }} cluster!"
120119
echo "job_id=$job_id" >> "$GITHUB_OUTPUT"
121120

0 commit comments

Comments
 (0)