Skip to content

Commit a68f7a5

Browse files
authored
Merge pull request #116 from ParaToolsInc/slurm-ci-from-base
Slurm testsuite in GitHub Actions
2 parents 6055436 + f07b1f9 commit a68f7a5

File tree

14 files changed

+352
-1
lines changed

14 files changed

+352
-1
lines changed

.github/workflows/ci.yml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ on:
1010

1111
permissions:
1212
contents: read
13+
packages: read
1314

1415
concurrency:
1516
group: ci-${{github.ref}}-${{github.event.pull_request.number || github.run_number}}
@@ -104,3 +105,61 @@ jobs:
104105
cd containers/spindle-flux-ubuntu
105106
docker compose down
106107
108+
spindle-slurm-ubuntu:
109+
name: Testsuite (Slurm, Ubuntu)
110+
environment: Spindle CI
111+
runs-on: ubuntu-latest
112+
timeout-minutes: 20
113+
steps:
114+
- name: Check out Spindle
115+
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
116+
117+
- name: Setup Docker Compose
118+
uses: docker/setup-compose-action@364cc21a5de5b1ee4a7f5f9d3fa374ce0ccde746
119+
with:
120+
version: latest
121+
122+
- name: Login to GitHub Container Registry
123+
if: ${{ !env.ACT }}
124+
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
125+
with:
126+
registry: ghcr.io
127+
username: ${{ github.actor }}
128+
password: ${{ secrets.GITHUB_TOKEN }}
129+
130+
- name: Generate MariaDB configuration
131+
id: slurm-ubuntu-mariadb
132+
run: |
133+
cd containers/spindle-slurm-ubuntu/testing
134+
./generate_config.sh
135+
136+
- name: Build spindle-slurm-ubuntu image
137+
id: slurm-ubuntu-build
138+
run: |
139+
cd containers/spindle-slurm-ubuntu/testing
140+
docker compose --progress=plain build
141+
142+
- name: Bring spindle-slurm-ubuntu up
143+
id: slurm-ubuntu-up
144+
run: |
145+
cd containers/spindle-slurm-ubuntu/testing
146+
docker compose up -d --wait --wait-timeout 120
147+
148+
- name: Verify munge works in spindle-slurm-ubuntu
149+
id: slurm-ubuntu-munge
150+
run: |
151+
docker exec slurm-head bash -c 'munge -n | unmunge'
152+
153+
- name: Run spindle-slurm-ubuntu testsuite
154+
id: slurm-ubuntu-testsuite
155+
run: |
156+
docker exec slurm-head bash -c 'cd Spindle-build/testsuite && salloc -n${workers} -N${workers} ./runTests ${workers}'
157+
158+
- name: Bring spindle-slurm-ubuntu down
159+
id: slurm-ubuntu-down
160+
if: ${{ always() }}
161+
continue-on-error: true
162+
run: |
163+
cd containers/spindle-slurm-ubuntu/testing
164+
docker compose down
165+
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
ARG BASE_VERSION=latest
2+
FROM ghcr.io/llnl/spindle-slurm-base:${BASE_VERSION}
3+
ARG replicas=4
4+
ENV workers=${replicas}
5+
6+
ARG BUILD_ROOT=containers/spindle-slurm-ubuntu/testing
7+
8+
# Slurm daemons run as $SLURM_USER
9+
ARG SLURM_USER=slurm
10+
11+
# Applications run as $USER
12+
ARG USER=slurmuser
13+
ARG UID=1001
14+
15+
# Set up the Slurm install already present in the base image
16+
COPY ${BUILD_ROOT}/scripts/setup_slurm.sh /setup_slurm.sh
17+
COPY ${BUILD_ROOT}/conf/slurm.conf /home/${SLURM_USER}/slurm.conf
18+
COPY ${BUILD_ROOT}/conf/slurmdbd.conf /home/${SLURM_USER}/slurmdbd.conf
19+
COPY ${BUILD_ROOT}/conf/cgroup.conf /home/${SLURM_USER}/cgroup.conf
20+
RUN /setup_slurm.sh
21+
22+
# Slurm without Spank plugin needs passwordless ssh
23+
USER ${USER}
24+
WORKDIR /home/${USER}
25+
COPY ${BUILD_ROOT}/conf/ssh_config /home/${USER}/
26+
COPY ${BUILD_ROOT}/scripts/setup_ssh.sh /home/${USER}/
27+
RUN /home/${USER}/setup_ssh.sh
28+
29+
# Copy the Spindle repo into the container and build it
30+
RUN mkdir -p /home/${USER}/Spindle
31+
COPY . /home/${USER}/Spindle
32+
COPY ${BUILD_ROOT}/scripts/build_spindle.sh /home/${USER}/build_spindle.sh
33+
RUN ./build_spindle.sh
34+
35+
COPY ${BUILD_ROOT}/scripts/entrypoint.sh /home/${USER}/entrypoint.sh
36+
ENV PATH /home/${USER}/Spindle-inst/bin:$PATH
37+
38+
ENTRYPOINT /bin/bash ./entrypoint.sh
39+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
CgroupPlugin=cgroup/v1
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
ClusterName=linux
2+
ControlMachine=slurm-head
3+
ControlAddr=slurm-head
4+
SlurmUser=slurm
5+
SlurmctldPort=6817
6+
SlurmdPort=6818
7+
AuthType=auth/munge
8+
StateSaveLocation=/var/lib/slurmd
9+
SlurmdSpoolDir=/var/spool/slurmd
10+
SwitchType=switch/none
11+
MpiDefault=none
12+
SlurmctldPidFile=/var/run/slurmd/slurmctld.pid
13+
SlurmdPidFile=/var/run/slurmd/slurmd.pid
14+
ProctrackType=proctrack/linuxproc
15+
TaskPlugin=task/affinity
16+
ReturnToService=2
17+
SlurmctldTimeout=300
18+
SlurmdTimeout=300
19+
InactiveLimit=0
20+
MinJobAge=300
21+
KillWait=30
22+
Waittime=0
23+
SchedulerType=sched/backfill
24+
SelectType=select/cons_tres
25+
SelectTypeParameters=CR_Core_Memory
26+
SlurmctldDebug=3
27+
SlurmctldLogFile=/var/log/slurm/slurmctld.log
28+
SlurmdDebug=3
29+
SlurmdLogFile=/var/log/slurm/slurmd.log
30+
JobCompType=jobcomp/filetxt
31+
JobCompLoc=/var/log/slurm/jobcomp.log
32+
JobAcctGatherType=jobacct_gather/linux
33+
JobAcctGatherFrequency=30
34+
AccountingStorageType=accounting_storage/slurmdbd
35+
AccountingStorageHost=slurm-db
36+
AccountingStoragePort=6819
37+
NodeName=slurm-node-1 NodeAddr=slurm-node-1 CPUs=3 RealMemory=1000 State=UNKNOWN
38+
NodeName=slurm-node-2 NodeAddr=slurm-node-2 CPUs=3 RealMemory=1000 State=UNKNOWN
39+
NodeName=slurm-node-3 NodeAddr=slurm-node-3 CPUs=3 RealMemory=1000 State=UNKNOWN
40+
NodeName=slurm-node-4 NodeAddr=slurm-node-4 CPUs=3 RealMemory=1000 State=UNKNOWN
41+
PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
42+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
AuthType=auth/munge
2+
DbdAddr=slurm-db
3+
DbdHost=slurm-db
4+
SlurmUser=slurm
5+
DebugLevel=4
6+
LogFile=/var/log/slurm/slurmdbd.log
7+
PidFile=/var/run/slurmdbd/slurmdbd.pid
8+
StorageType=accounting_storage/mysql
9+
StorageHost=slurm-mariadb
10+
StorageUser=slurm
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Host slurm-*
2+
StrictHostKeyChecking no
3+
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# `replicas` must match the number of nodes defined in the services section
2+
x-shared-workers:
3+
&workers
4+
replicas: 4
5+
6+
# Base image version to use
7+
x-shared-build-args: &shared-build-args
8+
BASE_VERSION: latest
9+
<<: *workers
10+
11+
# Docker prohibits copying files from outside of the build context.
12+
# In order to be able to copy the whole repo into the container,
13+
# we have to set the context to be the root of the repo.
14+
# We then have to specify the path from there to the Dockerfile.
15+
x-shared-build-context: &shared-build-context
16+
context: ../../..
17+
dockerfile: containers/spindle-slurm-ubuntu/testing/Dockerfile
18+
args: *shared-build-args
19+
20+
# Name of the head node
21+
x-shared-environment: &shared-environment
22+
SLURM_HEAD_NODE: slurm-head
23+
<<: *workers
24+
25+
# The entrypoint runs different services depending
26+
# on the node's role. Valid options are:
27+
# - worker: runs slurmd
28+
# - db: runs slurmdbd
29+
# - ctl: runs slurmctld
30+
x-worker-environment: &worker-environment
31+
SLURM_ROLE: worker
32+
<<: *shared-environment
33+
34+
networks:
35+
slurm:
36+
driver: bridge
37+
38+
# Common parameters for all nodes.
39+
x-shared-node-parameters: &shared-node-parameters
40+
build: *shared-build-context
41+
networks:
42+
- slurm
43+
cap_add:
44+
- SYS_NICE # Required for libnuma
45+
46+
x-healthcheck-parameters: &healthcheck-parameters
47+
start_period: 3s
48+
interval: 3s
49+
timeout: 5s
50+
retries: 5
51+
52+
x-worker-parameters: &worker-node-parameters
53+
<<: *shared-node-parameters
54+
environment: *worker-environment
55+
depends_on:
56+
slurm-head:
57+
condition: service_healthy
58+
healthcheck:
59+
test: ["CMD", "stat", "/var/run/slurmd/slurmd.pid"]
60+
<<: *healthcheck-parameters
61+
62+
services:
63+
slurm-mariadb:
64+
image: mariadb:12
65+
networks:
66+
- slurm
67+
hostname: slurm-mariadb
68+
container_name: slurm-mariadb
69+
env_file: mariadb.env
70+
environment:
71+
MYSQL_RANDOM_ROOT_PASSWORD: "yes"
72+
MYSQL_DATABASE: "slurm_acct_db"
73+
MYSQL_USER: "slurm"
74+
healthcheck:
75+
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
76+
<<: *healthcheck-parameters
77+
78+
slurm-db:
79+
<<: *shared-node-parameters
80+
hostname: slurm-db
81+
container_name: slurm-db
82+
environment:
83+
SLURM_ROLE: db
84+
<<: *shared-environment
85+
depends_on:
86+
slurm-mariadb:
87+
condition: service_healthy
88+
healthcheck:
89+
test: ["CMD", "stat", "/var/run/slurmdbd/slurmdbd.pid"]
90+
<<: *healthcheck-parameters
91+
92+
slurm-head:
93+
<<: *shared-node-parameters
94+
hostname: slurm-head
95+
container_name: slurm-head
96+
tty: true
97+
environment:
98+
SLURM_ROLE: ctl
99+
<<: *shared-environment
100+
depends_on:
101+
slurm-db:
102+
condition: service_healthy
103+
healthcheck:
104+
test: ["CMD", "stat", "/var/run/slurmd/slurmctld.pid"]
105+
<<: *healthcheck-parameters
106+
107+
slurm-node-1:
108+
<<: *worker-node-parameters
109+
hostname: slurm-node-1
110+
container_name: slurm-node-1
111+
112+
slurm-node-2:
113+
<<: *worker-node-parameters
114+
hostname: slurm-node-2
115+
container_name: slurm-node-2
116+
117+
slurm-node-3:
118+
<<: *worker-node-parameters
119+
hostname: slurm-node-3
120+
container_name: slurm-node-3
121+
122+
slurm-node-4:
123+
<<: *worker-node-parameters
124+
hostname: slurm-node-4
125+
container_name: slurm-node-4
126+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
# Generate random password for the MariaDB slurm user
4+
# and set it in config files
5+
6+
MARIADB_PASS=$(openssl rand --base64 16 | head -c -3)
7+
echo "MARIADB_PASSWORD: \"${MARIADB_PASS}\"" > mariadb.env
8+
cp conf/slurmdbd.conf.template conf/slurmdbd.conf
9+
echo "StoragePass=${MARIADB_PASS}" >> conf/slurmdbd.conf
10+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env bash
2+
set -euxo pipefail
3+
4+
sudo groupadd -g ${UID} ${USER}
5+
sudo useradd -g ${USER} -u ${UID} -d /home/${USER} -m ${USER}
6+
# Allow user to run as other users so that munge can be started as the munge user
7+
sudo sh -c "printf \"${USER} ALL=(ALL) NOPASSWD: ALL\\n\" >> /etc/sudoers"
8+
sudo adduser ${USER} sudo
9+
sudo usermod -s /bin/bash ${USER}
10+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
set -euxo pipefail
3+
4+
mkdir -p /home/${USER}/Spindle-build
5+
cd /home/${USER}/Spindle-build
6+
/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=slurm --with-rsh-launch --with-rsh-cmd=/usr/bin/ssh --with-localstorage=/tmp CFLAGS="-O2 -g" CXXFLAGS="-O2 -g"
7+
make -j$(nproc)
8+
make install
9+

0 commit comments

Comments
 (0)