Skip to content

Add SPANK plugin symlink test to CI #5126

Add SPANK plugin symlink test to CI

Add SPANK plugin symlink test to CI #5126

Workflow file for this run

name: Build All in one job
on:
workflow_call:
inputs:
unstable:
description: "Build unstable version"
type: string
required: false
default: "true"
multi_arch:
description: "Build for both amd64 and arm64 platforms"
type: string
required: false
default: "false"
workflow_dispatch:
inputs:
unstable:
description: "Build unstable version"
type: string
required: false
default: "true"
multi_arch:
description: "Build for both amd64 and arm64 platforms"
type: string
required: false
default: "false"
push:
branches:
- main
- soperator-release-*
tags:
- "**" # Trigger on any tag
# pull_request are defined separately to allow to run CI from forks.
pull_request:
types: [opened, synchronize, reopened]
permissions:
contents: read
packages: write
attestations: write
id-token: write
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
changes:
name: Detect Changes
runs-on: ubuntu-latest
outputs:
should_build: ${{ steps.filter.outputs.has_code_changes == 'true' || github.event_name != 'pull_request' }}
steps:
- uses: actions/checkout@v6
- uses: dorny/paths-filter@v3
id: filter
with:
predicate-quantifier: 'every'
filters: |
has_code_changes:
- '**'
- '!docs/**'
- '!CODEOWNERS'
- '!LICENSE'
- '!PROJECT'
- '!README.md'
- '!SECURITY.md'
pre-build:
needs: [changes]
if: needs.changes.outputs.should_build == 'true'
runs-on:
- self-hosted
- build
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0 # Fetch git history for the VERSION file changes detection
- name: Install GO
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version-file: "go.mod"
cache: false
- name: Generate version file
run: |
UNSTABLE="${{ inputs.unstable || 'true' }}"
echo "Building with unstable=${UNSTABLE}"
make get-version UNSTABLE=${UNSTABLE} >> version.txt
echo "${UNSTABLE}" >> version.txt
- name: Run make sync-version-from-scratch
run: |
make kustomize helmify yq
make sync-version-from-scratch UNSTABLE=false
- name: Check for uncommitted changes
run: |
if [[ -n "$(git status --porcelain)" ]]; then
echo "❌ Uncommitted changes detected after make sync-version-from-scratch"
git status --porcelain
git diff
exit 1
fi
- name: Upload version file
uses: actions/upload-artifact@v6
with:
name: version
path: version.txt
- name: Download version artifact
uses: actions/download-artifact@v7
with:
name: version
path: ./version
- name: Read version and unstable
id: read-version
run: |
VERSION=$(sed -n '1p' ./version/version.txt)
UNSTABLE=$(sed -n '2p' ./version/version.txt)
echo "Version: $VERSION"
echo "Unstable: $UNSTABLE"
lint:
needs: [changes]
if: needs.changes.outputs.should_build == 'true'
runs-on:
- self-hosted
- build
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Install GO
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version-file: "go.mod"
cache: false
# These steps are not a matrix to avoid allocating 3 jobs on runners for something this small
- name: golangci-lint on linux/amd64
uses: golangci/golangci-lint-action@1481404843c368bc19ca9406f87d6e0fc97bdcfd # v7
with:
version: v2.5.0 # version of golangci-lint, should be in sync with Makefile.
env:
GOARCH: amd64
GOOS: linux
- name: golangci-lint on linux/arm64
uses: golangci/golangci-lint-action@1481404843c368bc19ca9406f87d6e0fc97bdcfd # v7
with:
version: v2.5.0 # version of golangci-lint, should be in sync with Makefile.
env:
GOARCH: arm64
GOOS: linux
- name: golangci-lint on darwin/arm64
uses: golangci/golangci-lint-action@1481404843c368bc19ca9406f87d6e0fc97bdcfd # v7
with:
version: v2.5.0 # version of golangci-lint, should be in sync with Makefile.
env:
GOARCH: arm64
GOOS: darwin
build-docker-images:
needs: [changes, pre-build]
if: needs.changes.outputs.should_build == 'true'
runs-on:
- self-hosted
- X64
- build
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Install GO
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version-file: "go.mod"
cache: false
- name: Download version artifact
uses: actions/download-artifact@v7
with:
name: version
path: ./version
- name: Read version and unstable
id: read-version
run: |
VERSION=$(sed -n '1p' ./version/version.txt)
UNSTABLE=$(sed -n '2p' ./version/version.txt)
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "unstable=$UNSTABLE" >> "$GITHUB_OUTPUT"
echo "Version: $VERSION"
echo "Unstable: $UNSTABLE"
- name: Log in to the Github Container registry
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build and push Docker images
shell: bash
run: |
UNSTABLE=${{ steps.read-version.outputs.unstable }}
MULTI_ARCH="${{ inputs.multi_arch || 'false' }}"
if [[ "$MULTI_ARCH" == "true" ]]; then
export PLATFORMS="linux/amd64,linux/arm64"
else
export PLATFORMS="linux/amd64"
fi
echo "Building for platforms: ${PLATFORMS}"
IMAGE_VERSION="$(make get-image-version UNSTABLE=${UNSTABLE})"
VERSION=$(make get-version UNSTABLE=${UNSTABLE})
OPERATOR_IMAGE_TAG=$(make get-operator-tag-version UNSTABLE=${UNSTABLE})
DOCKER_BUILD_ARGS="\
--cache-from=type=local,src=/mnt/shared-fs/docker/soperator-build-0 \
--cache-from=type=local,src=/mnt/shared-fs/docker/soperator-build-1 \
--cache-from=type=local,src=/mnt/shared-fs/docker/soperator-build-2 \
--cache-from=type=local,src=/mnt/shared-fs/docker/soperator-build-3 \
--cache-to=type=local,dest=/mnt/shared-fs/docker/${{ runner.name }},mode=max \
"
NFS_VERSION=$(make get-nfs-version UNSTABLE=${UNSTABLE})
make sync-version UNSTABLE=${UNSTABLE}
echo "Updating CRDs & auto-generated code (included in test step) & run tests"
make test-coverage UNSTABLE="${UNSTABLE}"
echo "Building local go base image"
make docker-build-go-base DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}"
echo "Building and pushing image of the soperatorchecks ${OPERATOR_IMAGE_TAG}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=soperatorchecks DOCKERFILE=soperatorchecks/soperatorchecks.dockerfile IMAGE_VERSION="${OPERATOR_IMAGE_TAG}"
echo "Building and pushing image of the sconfigcontroller ${OPERATOR_IMAGE_TAG}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=sconfigcontroller DOCKERFILE=sconfigcontroller/sconfigcontroller.dockerfile IMAGE_VERSION="${OPERATOR_IMAGE_TAG}"
echo "Building and pushing image of the soperator ${OPERATOR_IMAGE_TAG}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurm-operator DOCKERFILE=soperator/Dockerfile IMAGE_VERSION="${OPERATOR_IMAGE_TAG}"
echo "Building and pushing image of the rebooter ${OPERATOR_IMAGE_TAG}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=rebooter DOCKERFILE=rebooter/rebooter.dockerfile IMAGE_VERSION="${OPERATOR_IMAGE_TAG}"
echo "Building and pushing image of the munge ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=munge DOCKERFILE=munge/munge.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the controller_slurmctld ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=controller_slurmctld DOCKERFILE=controller/slurmctld.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the controller_slurmdbd ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=controller_slurmdbd DOCKERFILE=accounting/slurmdbd.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the slurmrestd ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurmrestd DOCKERFILE=restd/slurmrestd.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the soperator-exporter ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=soperator-exporter DOCKERFILE=soperator-exporter/soperator-exporter.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the slurm_check_job ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurm_check_job DOCKERFILE=slurm_check_job/slurm_check_job.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the k8s_check_job ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=k8s_check_job DOCKERFILE=k8s_check_job/k8s_check_job.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the login_sshd ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=login_sshd DOCKERFILE=login/sshd.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the worker_slurmd ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=worker_slurmd DOCKERFILE=worker/slurmd.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the nfs-server ${NFS_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=nfs-server DOCKERFILE=nfs-server/nfs.dockerfile IMAGE_VERSION=${NFS_VERSION}
echo "Removing previous jail rootfs tar archive"
rm -f images/jail_rootfs*.tar
echo "Building tarball for jail"
make docker-build-jail DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_VERSION=${IMAGE_VERSION}
echo "Building and pushing image of the populate_jail ${IMAGE_VERSION}"
make docker-build-and-push DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS}" UNSTABLE="${UNSTABLE}" IMAGE_NAME=populate_jail DOCKERFILE=populate_jail/populate_jail.dockerfile IMAGE_VERSION=${IMAGE_VERSION}
echo "Removing jail rootfs tar archive to speedup further docker builds."
rm -f images/jail_rootfs*.tar
- name: Test SPANK plugins are accessible
shell: bash
run: |
UNSTABLE=${{ steps.read-version.outputs.unstable }}
IMAGE_VERSION="$(make get-image-version UNSTABLE=${UNSTABLE})"
IMAGE_REPO="$(make get-image-repo UNSTABLE=${UNSTABLE})"
failed=0
test_plugin() {
local image=$1
local plugin=$2
local full_image="${IMAGE_REPO}/${image}:${IMAGE_VERSION}"
if ! docker run --rm --entrypoint test "${full_image}" -e "/usr/lib/slurm/${plugin}"; then
echo "FAIL: ${image} missing /usr/lib/slurm/${plugin}"
return 1
fi
# Verify the shared library is loadable (valid ELF, dependencies satisfied)
if ! docker run --rm --entrypoint ldd "${full_image}" "/usr/lib/slurm/${plugin}" > /dev/null 2>&1; then
echo "FAIL: ${image} has broken /usr/lib/slurm/${plugin}"
docker run --rm --entrypoint ldd "${full_image}" "/usr/lib/slurm/${plugin}" 2>&1 || true
return 1
fi
echo " OK: ${image} has /usr/lib/slurm/${plugin}"
}
# Test chroot.so in all slurm images
for image in slurm_check_job login_sshd worker_slurmd; do
test_plugin "${image}" "chroot.so" || ((++failed))
done
# Test spanknccldebug.so (only in login_sshd and worker_slurmd)
for image in login_sshd worker_slurmd; do
test_plugin "${image}" "spanknccldebug.so" || ((++failed))
done
echo ""
if [[ $failed -gt 0 ]]; then
echo "FAILED: $failed plugin check(s) failed"
exit 1
fi
echo "All SPANK plugin tests passed!"
build-helm-charts:
needs: [changes, pre-build]
if: needs.changes.outputs.should_build == 'true'
runs-on:
- self-hosted
- X64
- build
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Install GO
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version-file: "go.mod"
cache: false
- name: Download version artifact
uses: actions/download-artifact@v7
with:
name: version
path: ./version
- name: Read version and unstable
id: read-version
run: |
VERSION=$(sed -n '1p' ./version/version.txt)
UNSTABLE=$(sed -n '2p' ./version/version.txt)
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "unstable=$UNSTABLE" >> "$GITHUB_OUTPUT"
echo "Version: $VERSION"
echo "Unstable: $UNSTABLE"
- name: Run Helm Tests
run: make helmtest
- name: Push Helm charts
run: |
UNSTABLE=${{ steps.read-version.outputs.unstable }}
make sync-version UNSTABLE=${UNSTABLE}
make release-helm UNSTABLE="${UNSTABLE}"
helm-integration-test:
name: Helm Chart Integration Test with Built Images
needs: [changes, build-docker-images, build-helm-charts]
if: needs.changes.outputs.should_build == 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Set up Go
uses: actions/setup-go@v6
with:
go-version-file: "go.mod"
cache: true
- name: Download version artifact
uses: actions/download-artifact@v7
with:
name: version
path: ./version
- name: Read version and unstable
id: read-version
run: |
VERSION=$(sed -n '1p' ./version/version.txt)
UNSTABLE=$(sed -n '2p' ./version/version.txt)
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "unstable=$UNSTABLE" >> "$GITHUB_OUTPUT"
echo "Version: $VERSION"
echo "Unstable: $UNSTABLE"
- name: Install dependencies
run: |
go mod download
go mod verify
- name: Install kind
run: make install-kind
- name: Install flux
run: make install-flux
- name: Install yq
run: make yq
- name: Run Helm Integration Tests
run: |
UNSTABLE=${{ steps.read-version.outputs.unstable }}
make sync-version UNSTABLE=${UNSTABLE}
go test -v -timeout 10m -tags=integration ./test/integration/
env:
GO111MODULE: on
UNSTABLE: ${{ steps.read-version.outputs.unstable }}
- name: Get cluster logs on failure
if: failure() || cancelled()
run: |
kubectl cluster-info dump --output-directory=./cluster-logs || true
kubectl get pods -A -o wide || true
kubectl get helmreleases -n flux-system || true
./bin/flux get all -n flux-system || true
- name: Cleanup
if: always()
run: |
make kind-delete || true
- name: Upload test results
if: always()
uses: actions/upload-artifact@v6
with:
name: helm-integration-test-results
path: |
test/integration/*.log
test/integration/*.xml
cluster-logs/
retention-days: 1
ci-success:
name: CI Success
runs-on: ubuntu-latest
needs:
- changes
- pre-build
- lint
- build-docker-images
- build-helm-charts
- helm-integration-test
if: always()
steps:
- name: Check CI status
shell: bash
run: |
# If build was skipped (PR with no code changes), that's OK
if [[ "${{ needs.changes.outputs.should_build }}" != "true" ]]; then
echo "No code changes in PR - CI skipped successfully"
exit 0
fi
# Otherwise, check that all jobs passed
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "Some jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "Some jobs were cancelled"
exit 1
fi
echo "All CI jobs passed!"
notify-failure:
name: Notify Slack on failure
needs: [ci-success]
if: |
always() &&
github.event_name == 'workflow_dispatch' &&
(needs.ci-success.result == 'failure' || needs.ci-success.result == 'cancelled')
runs-on: ubuntu-latest
steps:
- name: Send Slack notification
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
shell: bash
run: |
STATUS="${{ needs.ci-success.result }}"
if [[ "$STATUS" == "failure" ]]; then
EMOJI=":x:"
COLOR="danger"
else
EMOJI=":warning:"
COLOR="warning"
fi
curl -X POST "$SLACK_WEBHOOK_URL" \
-H 'Content-type: application/json' \
--data "{
\"attachments\": [{
\"color\": \"${COLOR}\",
\"title\": \"${EMOJI} Nightly Build ${STATUS}\",
\"text\": \"Branch: ${{ github.ref_name }}\nWorkflow: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Run>\",
\"footer\": \"soperator nightly build\"
}]
}"