Skip to content

fix: reuse existing cluster secrets when scaling Talos nodes during update #13402

fix: reuse existing cluster secrets when scaling Talos nodes during update

fix: reuse existing cluster secrets when scaling Talos nodes during update #13402

Workflow file for this run

name: CI - KSail
on:
push:
branches: [main]
pull_request:
merge_group:
workflow_dispatch:
inputs:
run_system_tests:
description: "Run system tests"
required: false
type: boolean
default: true
env:
# Use pipe (|) instead of comma (,) so Go falls through to direct on ANY proxy
# error (including 403), not just 404/410. Hardens CI against transient
# proxy.golang.org outages. go.sum still verifies module integrity.
GOPROXY: "https://proxy.golang.org|direct"
# Soft memory limit for Go build toolchain (compiler, linker). The KSail
# binary links a large dependency tree and the Go linker can exceed the
# default runner memory on cache-miss builds. Setting GOMEMLIMIT makes the
# GC work harder to stay within budget, reducing peak RSS.
GOMEMLIMIT: "6GiB"
concurrency:
group: "ci-ksail-${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
permissions: {}
jobs:
# cancel-stale-merge-queue — kept for future use when jobs are moved to merge_group
# cancel-stale-merge-queue:
# name: 🧹 Cancel Stale Merge Queue Runs
# if: github.event_name == 'merge_group'
# runs-on: ubuntu-latest
# timeout-minutes: 2
# permissions:
# actions: write
# steps:
# - name: Cancel previous runs for same merge queue entry
# uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
# with:
# script: |
# const ref = context.ref;
# const match = ref.match(/^(refs\/heads\/gh-readonly-queue\/.+)-[0-9a-f]+$/);
# if (!match) {
# console.log('Not a merge queue ref, skipping');
# return;
# }
# const stablePrefix = match[1];
#
# for (const status of ['in_progress', 'queued']) {
# let page = 1;
# while (true) {
# const { data: { workflow_runs: runs } } = await github.rest.actions.listWorkflowRuns({
# owner: context.repo.owner,
# repo: context.repo.repo,
# workflow_id: 'ci.yaml',
# event: 'merge_group',
# status,
# per_page: 100,
# page,
# });
#
# if (runs.length === 0) break;
#
# for (const run of runs) {
# if (run.run_number >= context.runNumber) continue;
# const runRef = `refs/heads/${run.head_branch}`;
# const runMatch = runRef.match(/^(refs\/heads\/gh-readonly-queue\/.+)-[0-9a-f]+$/);
# if (runMatch && runMatch[1] === stablePrefix) {
# console.log(`Cancelling stale run ${run.id} (${runRef})`);
# try {
# await github.rest.actions.cancelWorkflowRun({
# owner: context.repo.owner,
# repo: context.repo.repo,
# run_id: run.id,
# });
# } catch (error) {
# if (error && (error.status === 409 || error.status === 422)) {
# console.log(`Skipping run ${run.id}; it is no longer cancellable (${error.status})`);
# continue;
# }
# throw error;
# }
# }
# }
#
# if (runs.length < 100) break;
# page += 1;
# }
# }
# Wait for sufficient GitHub API rate limit before proceeding
rate-limit-gate:
name: ⏳ Rate Limit Gate
if: github.event_name != 'merge_group'
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: ./.github/actions/rate-limit-gate
# Detect which file categories changed to conditionally run jobs
changes:
name: 🔍 Detect Changes
if: github.event_name != 'merge_group'
needs: [rate-limit-gate]
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
pull-requests: read
outputs:
code: ${{ steps.filter.outputs.code }}
benchmark: ${{ steps.filter.outputs.benchmark }}
system-test: ${{ steps.filter.outputs.system-test }}
schema: ${{ steps.filter.outputs.schema }}
cli: ${{ steps.filter.outputs.cli }}
vsce: ${{ steps.filter.outputs.vsce }}
docs: ${{ steps.filter.outputs.docs }}
docs-deps: ${{ steps.filter.outputs.docs-deps }}
vsce-deps: ${{ steps.filter.outputs.vsce-deps }}
copilot-plugin: ${{ steps.filter.outputs.copilot-plugin }}
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: 🔍 Filter paths
uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
id: filter
with:
filters: |
code:
- '**/*.go'
- 'go.mod'
- 'go.sum'
- '.github/actions/**'
benchmark:
# Limit to packages that contain benchmarks plus module files.
# Avoids paying ~30 min of benchmark cost on PRs that touch only
# unrelated Go code. Keep this in sync with the package discovery
# step in the `benchmark` job (`grep -rl '^func Benchmark'`).
- 'pkg/apis/cluster/v1alpha1/**/*.go'
- 'pkg/cli/cmd/cipher/**/*.go'
- 'pkg/cli/cmd/cluster/**/*.go'
- 'pkg/client/argocd/**/*.go'
- 'pkg/client/docker/**/*.go'
- 'pkg/client/flux/**/*.go'
- 'pkg/client/helm/**/*.go'
- 'pkg/client/kubectl/**/*.go'
- 'pkg/client/kustomize/**/*.go'
- 'pkg/fsutil/configmanager/ksail/**/*.go'
- 'pkg/fsutil/marshaller/**/*.go'
- 'pkg/k8s/readiness/**/*.go'
- 'pkg/notify/**/*.go'
- 'pkg/svc/diff/**/*.go'
- 'pkg/svc/image/**/*.go'
- 'pkg/svc/registryresolver/**/*.go'
- 'go.mod'
- 'go.sum'
- '.github/workflows/ci.yaml'
system-test:
# NOTE: Do not use negative patterns (!) here.
# dorny/paths-filter v4 uses picomatch where negated patterns
# like '!**/*_test.go' match ANY file that is not a test file
# (including .mdx, .json, etc.), causing docs-only PRs to
# falsely trigger system tests.
# Use an explicit allowlist of directories whose changes truly
# require system tests (cluster/workload code, providers,
# provisioners, installers, client libraries). Chat, MCP,
# cipher, toolgen, docs generators, and schema generators are
# intentionally omitted.
- 'main.go'
- 'internal/buildmeta/**'
- 'pkg/apis/**'
- 'pkg/cli/annotations/**'
- 'pkg/cli/cmd/cluster/**'
- 'pkg/cli/cmd/workload/**'
- 'pkg/cli/cmd/tenant/**'
- 'pkg/cli/dockerutil/**'
- 'pkg/cli/editor/**'
- 'pkg/cli/flags/**'
- 'pkg/cli/kubeconfig/**'
- 'pkg/cli/kubeconfighook/**'
- 'pkg/cli/lifecycle/**'
- 'pkg/cli/setup/**'
- 'pkg/cli/ui/asciiart/**'
- 'pkg/cli/ui/confirm/**'
- 'pkg/cli/ui/errorhandler/**'
- 'pkg/cli/ui/picker/**'
- 'pkg/client/**'
- 'pkg/di/**'
- 'pkg/envvar/**'
- 'pkg/fsutil/**'
- 'pkg/k8s/**'
- 'pkg/notify/**'
- 'pkg/runner/**'
- 'pkg/svc/detector/**'
- 'pkg/svc/diff/**'
- 'pkg/svc/image/**'
- 'pkg/svc/installer/**'
- 'pkg/svc/provider/**'
- 'pkg/svc/provisioner/**'
- 'pkg/svc/registryresolver/**'
- 'pkg/svc/state/**'
- 'pkg/svc/tenant/**'
- 'pkg/svc/versionresolver/**'
- 'pkg/timer/**'
- 'go.mod'
- 'go.sum'
- '.github/actions/**'
- '.github/fixtures/**'
- '.github/workflows/ci.yaml'
schema:
- 'pkg/apis/**/*.go'
- 'go.mod'
- 'schemas/gen_schema.go'
cli:
- 'cmd/**/*.go'
- 'pkg/**/*.go'
- 'go.mod'
- 'docs/gen_docs.go'
- 'docs/gen_docs_prose.go'
vsce:
- 'vsce/**'
- '.github/workflows/ci.yaml'
docs:
# Matches docs content (MDX, Astro, config) but not Go generators.
# Avoid negative patterns — see system-test comment above.
- 'docs/src/**'
- 'docs/public/**'
- 'docs/astro.config.*'
- 'docs/tsconfig.json'
- 'docs/package*.json'
- 'docs/.npmrc'
- '.github/workflows/ci.yaml'
docs-deps:
- 'docs/package*.json'
vsce-deps:
- 'vsce/package*.json'
copilot-plugin:
- 'copilot-plugin/**'
- '.github/plugin/**'
- '.claude-plugin/**'
- '.github/workflows/ci.yaml'
ci-go:
name: ✅ Validate Go Project
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: devantler-tech/reusable-workflows/.github/workflows/validate-go-project.yaml@3b781dc75a8218d568ddbd4b92aee971bf9e835d # v3.1.3
permissions:
contents: write
issues: write
pull-requests: write
secrets:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
APP_PRIVATE_KEY: ${{ secrets.APP_PRIVATE_KEY }}
# Gate: wait for the org-wide Required Workflow "✅ Validate Go Project" to
# finish before starting expensive system tests. On workflow_dispatch the
# required workflow does not run, so this job is skipped.
wait-for-validate-go:
name: ⏳ Wait for Go Validation
runs-on: ubuntu-latest
timeout-minutes: 30
needs: [changes]
if: >-
github.event_name == 'pull_request'
&& needs.changes.outputs.system-test == 'true'
&& github.event.pull_request.head.repo.full_name == github.repository
permissions:
checks: read
steps:
- name: Wait for ✅ Validate Go Project
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
with:
script: |
const sha = context.payload.pull_request.head.sha;
const owner = context.repo.owner;
const repo = context.repo.repo;
const prefix = '✅ Validate Go Project';
const maxWait = 25 * 60 * 1000; // 25 minutes
const poll = 30 * 1000; // 30 seconds
const start = Date.now();
while (Date.now() - start < maxWait) {
const { data: checks } = await github.rest.checks.listForRef({
owner, repo, ref: sha, per_page: 100,
});
const relevant = checks.check_runs.filter(cr =>
cr.name.startsWith(prefix)
);
if (relevant.length > 0) {
const pending = relevant.filter(cr => cr.status !== 'completed');
if (pending.length === 0) {
const failed = relevant.filter(cr =>
!['success', 'skipped', 'neutral'].includes(cr.conclusion)
);
if (failed.length > 0) {
const names = failed.map(cr => `${cr.name}: ${cr.conclusion}`).join('\n');
core.setFailed(`Go validation failed:\n${names}`);
return;
}
core.info(`✅ All ${relevant.length} Go validation check(s) passed`);
return;
}
core.info(`⏳ Waiting for ${pending.length}/${relevant.length} check(s)...`);
} else {
core.info('⏳ No Go validation checks found yet...');
}
await new Promise(r => setTimeout(r, poll));
}
// On timeout, succeed gracefully if no checks ever appeared
// (required workflow may not be configured for this repo).
const { data: final } = await github.rest.checks.listForRef({
owner, repo, ref: sha, per_page: 100,
});
const remaining = final.check_runs.filter(cr =>
cr.name.startsWith(prefix)
);
if (remaining.length === 0) {
core.warning('No Go validation checks found — required workflow may not apply to this repo');
return;
}
core.setFailed('Timed out waiting for Go validation checks to complete');
build-artifact:
name: 🏗️ Build KSail Binary
runs-on: ubuntu-latest
timeout-minutes: 20
needs: [changes]
if: github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && (needs.changes.outputs.code == 'true' || needs.changes.outputs.cli == 'true' || needs.changes.outputs.system-test == 'true'))
permissions:
contents: read
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: ⚙️ Setup Go
id: setup-go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod
- name: 📦 Cache KSail Binary
uses: ./.github/actions/cache-ksail-binary
with:
go-version: ${{ steps.setup-go.outputs.go-version }}
source-hash: ${{ hashFiles('go.mod', 'go.sum', '**/*.go') }}
output-path: /usr/local/bin/ksail
generate:
name: 📝 Generate Schema & Docs
runs-on: ubuntu-latest
timeout-minutes: 25
# Depend on build-artifact so the Go build cache is warm by the time
# `go generate ./docs/...` (and ./schemas/...) re-compiles the cli import
# graph. On events where build-artifact is skipped (push to main), this
# falls through and the job still runs.
needs: [changes, build-artifact]
if: >-
!cancelled()
&& github.event_name != 'merge_group'
&& (needs.changes.outputs.schema == 'true' || needs.changes.outputs.cli == 'true')
&& (needs.build-artifact.result == 'success' || needs.build-artifact.result == 'skipped')
permissions:
contents: read
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
repository: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name || github.repository }}
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
- name: ⚙️ Setup Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod
- name: 📝 Generate JSON schema
if: needs.changes.outputs.schema == 'true'
run: go generate ./schemas/...
- name: 📚 Generate reference documentation
if: needs.changes.outputs.cli == 'true'
run: go generate ./docs/...
- name: 📤 Upload patch
run: |
git add -N .
git diff > /tmp/generate.patch
if [ -s /tmp/generate.patch ]; then
echo "Changes detected"
else
echo "No changes"
fi
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: generate-patch
path: /tmp/generate.patch
if-no-files-found: ignore
retention-days: 1
- name: ❌ Fail if generated files are out of date (fork PR)
if: >-
github.event_name == 'pull_request'
&& github.event.pull_request.head.repo.full_name != github.repository
shell: bash
run: |
if [ -s /tmp/generate.patch ]; then
echo "::error::Generated files are out of date. Please run 'go generate ./schemas/...' and 'go generate ./docs/...' locally and commit the results."
echo ""
cat /tmp/generate.patch
exit 1
fi
auto-commit:
name: 📤 Auto-Commit Generated Changes
runs-on: ubuntu-latest
timeout-minutes: 15
needs: [changes, generate]
if: >-
always()
&& github.event_name != 'merge_group'
&& (github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository)
&& needs.generate.result == 'success'
permissions:
contents: write
pull-requests: write
steps:
- name: 🔑 Generate GitHub App Token
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
id: generate-token
with:
client-id: ${{ vars.APP_CLIENT_ID }}
private-key: ${{ secrets.APP_PRIVATE_KEY }} # zizmor: ignore[secrets-outside-env] - GitHub App token generation, no environment scoping available for app installations
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.head_ref || github.ref_name }}
persist-credentials: true
token: ${{ steps.generate-token.outputs.token }}
- name: 📥 Download patches
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
pattern: "*-patch"
merge-multiple: true
path: /tmp/patches
- name: 📋 Apply patches
run: |
set -euo pipefail
shopt -s nullglob
for patch in /tmp/patches/*.patch; do
if [ -s "$patch" ]; then
patch_name="$(basename "$patch")"
echo "Applying ${patch_name}..."
if git apply --3way "$patch"; then
echo "Applied ${patch_name} with 3-way merge."
else
echo "3-way apply failed for ${patch_name}; falling back to direct apply with rejects..." >&2
if git apply --reject "$patch"; then
echo "Applied ${patch_name} with rejects."
rej_files=$(find . -name '*.rej' 2>/dev/null || true)
if [ -n "$rej_files" ]; then
echo "::error::Reject files found after applying ${patch_name}:"
echo "${rej_files}"
git status --short || true
echo "Failing job to avoid committing partially-applied patches." >&2
exit 1
fi
else
echo "::error::Failed to apply patch ${patch_name}. See diagnostics above." >&2
git status --short || true
exit 1
fi
fi
else
echo "Skipping empty patch $(basename "$patch")"
fi
done
- name: 📤 Commit and push generated changes (PR branch)
if: github.event_name == 'pull_request'
uses: stefanzweifel/git-auto-commit-action@04702edda442b2e678b25b537cec683a1493fcb9 # v7.1.0
with:
commit_message: "chore: sync modules and update generated files"
commit_user_name: generator-bot
commit_user_email: generator-bot@users.noreply.github.com
branch: ${{ github.head_ref }}
- name: 📤 Open PR for generated changes (protected branch)
if: github.event_name != 'pull_request'
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
with:
token: ${{ steps.generate-token.outputs.token }}
commit-message: "chore: sync modules and update generated files"
committer: "generator-bot <generator-bot@users.noreply.github.com>"
branch: chore/sync-generated-files/${{ github.ref_name }}
base: ${{ github.ref_name }}
title: "chore: sync modules and update generated files"
body: |
Auto-generated changes detected on `${{ github.ref_name }}`.
This PR updates generated files (schemas, docs, etc.) that were
not included in the originating commit.
delete-branch: true
audit-docs:
name: 🔍 Audit Docs Dependencies
runs-on: ubuntu-latest
timeout-minutes: 10
needs: [changes]
if: github.event_name != 'merge_group' && needs.changes.outputs.docs-deps == 'true'
permissions:
contents: write
steps:
- name: � Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: �🔍 NPM Audit and Fix
uses: ./.github/actions/npm-audit-and-fix
with:
working-directory: docs
client-id: ${{ vars.APP_CLIENT_ID }}
app-private-key: ${{ secrets.APP_PRIVATE_KEY }} # zizmor: ignore[secrets-outside-env] - GitHub App token generation, no environment scoping available for app installations
commit-message: "chore(docs): fix npm audit vulnerabilities"
audit-vsce:
name: 🔍 Audit VSCode Extension Dependencies
runs-on: ubuntu-latest
timeout-minutes: 10
needs: [changes]
if: github.event_name != 'merge_group' && needs.changes.outputs.vsce-deps == 'true'
permissions:
contents: write
steps:
- name: � Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: �🔍 NPM Audit and Fix
uses: ./.github/actions/npm-audit-and-fix
with:
working-directory: vsce
client-id: ${{ vars.APP_CLIENT_ID }}
app-private-key: ${{ secrets.APP_PRIVATE_KEY }} # zizmor: ignore[secrets-outside-env] - GitHub App token generation, no environment scoping available for app installations
commit-message: "chore(vsce): fix npm audit vulnerabilities"
build-docs:
name: 📚 Build Documentation
runs-on: ubuntu-latest
needs: [changes]
if: github.event_name == 'pull_request' && needs.changes.outputs.docs == 'true'
timeout-minutes: 10
permissions:
contents: read
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: ⚙️ Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "npm"
cache-dependency-path: docs/package-lock.json
- name: 📦 Install dependencies
working-directory: docs
run: npm ci
- name: 🏗️ Build with Astro
working-directory: docs
run: npm run build
vscode-extension:
name: 🧩 VSCode Extension
runs-on: ubuntu-latest
timeout-minutes: 15
needs: [changes]
if: github.event_name != 'merge_group' && needs.changes.outputs.vsce == 'true'
permissions:
contents: read
defaults:
run:
working-directory: vsce
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: ⚙️ Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "npm"
cache-dependency-path: "vsce/package-lock.json"
- name: 📦 Install dependencies
run: npm ci
- name: 🏗️ Compile
run: npm run compile
- name: 📦 Package extension
run: npm run package
- name: 📦 Create VSIX package
if: github.event_name == 'pull_request' || github.event_name == 'merge_group'
run: npx @vscode/vsce package --out ksail.vsix
- name: 📤 Upload VSIX artifact
if: github.event_name == 'pull_request' || github.event_name == 'merge_group'
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: ksail-vsix
path: ksail.vsix
retention-days: 7
copilot-plugin:
name: 🧩 Copilot CLI Plugin
runs-on: ubuntu-latest
timeout-minutes: 5
needs: [changes]
if: github.event_name != 'merge_group' && needs.changes.outputs.copilot-plugin == 'true'
permissions:
contents: read
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: 🧪 Validate plugin manifests
run: |
set -euo pipefail
for f in \
copilot-plugin/plugin.json \
copilot-plugin/.claude-plugin/plugin.json \
copilot-plugin/.mcp.json \
.github/plugin/marketplace.json \
.claude-plugin/marketplace.json; do
echo "Validating $f"
jq -e . "$f" >/dev/null
done
# Required fields on the Copilot CLI + Claude Code plugin manifests
for f in copilot-plugin/plugin.json copilot-plugin/.claude-plugin/plugin.json; do
jq -e '.name == "ksail" and .version' "$f" >/dev/null
done
# Each marketplace must reference the plugin directory
for f in .github/plugin/marketplace.json .claude-plugin/marketplace.json; do
jq -e '.plugins[0].source == "./copilot-plugin" and .plugins[0].name == "ksail"' "$f" >/dev/null
done
# Skill manifest must exist
test -f copilot-plugin/skills/ksail/SKILL.md
# MCP server must point at the ksail binary
jq -e '.mcpServers.ksail.command == "ksail"' copilot-plugin/.mcp.json >/dev/null
warm-helm-cache:
name: 🔥 Warm Helm Cache
runs-on: ubuntu-latest
timeout-minutes: 20
needs: [changes, build-artifact, wait-for-validate-go]
if: >-
!cancelled()
&& (needs.wait-for-validate-go.result == 'success' || needs.wait-for-validate-go.result == 'skipped')
&& (
(github.event_name == 'pull_request'
&& needs.changes.outputs.system-test == 'true'
&& github.event.pull_request.head.repo.full_name == github.repository)
|| (github.event_name == 'workflow_dispatch' && inputs.run_system_tests == true)
)
permissions:
contents: read
outputs:
cache-key: ${{ steps.warm.outputs.cache-key }}
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: ⚙️ Setup Go
id: setup-go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod
- name: 📦 Cache KSail Binary
uses: ./.github/actions/cache-ksail-binary
with:
go-version: ${{ steps.setup-go.outputs.go-version }}
source-hash: ${{ hashFiles('go.mod', 'go.sum', '**/*.go') }}
output-path: /usr/local/bin/ksail
save: "false"
- name: 🔥 Warm Helm chart cache
id: warm
uses: ./.github/actions/warm-helm-cache
warm-mirror-cache:
name: 🔥 Warm Mirror Cache
runs-on: ubuntu-latest
timeout-minutes: 30
needs: [changes, build-artifact, wait-for-validate-go]
if: >-
!cancelled()
&& (needs.wait-for-validate-go.result == 'success' || needs.wait-for-validate-go.result == 'skipped')
&& (
(github.event_name == 'pull_request'
&& needs.changes.outputs.system-test == 'true'
&& github.event.pull_request.head.repo.full_name == github.repository)
|| (github.event_name == 'workflow_dispatch' && inputs.run_system_tests == true)
)
permissions:
contents: read
outputs:
cache-key: ${{ steps.warm.outputs.cache-key }}
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: ⚙️ Setup Go
id: setup-go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod
- name: 📦 Cache KSail Binary
uses: ./.github/actions/cache-ksail-binary
with:
go-version: ${{ steps.setup-go.outputs.go-version }}
source-hash: ${{ hashFiles('go.mod', 'go.sum', '**/*.go') }}
output-path: /usr/local/bin/ksail
save: "false"
- name: 🔥 Warm mirror cache
id: warm
uses: ./.github/actions/warm-mirror-cache
# Docker-based system tests run in full parallelism with no concurrency
# constraints — each job gets its own runner with local Docker containers.
system-test-docker:
name: 🧪 System Test (Docker)
runs-on: ubuntu-latest
timeout-minutes: 75
needs:
[
changes,
build-artifact,
warm-helm-cache,
warm-mirror-cache,
wait-for-validate-go,
]
if: >-
!cancelled()
&& (needs.wait-for-validate-go.result == 'success' || needs.wait-for-validate-go.result == 'skipped')
&& (
(github.event_name == 'pull_request'
&& needs.changes.outputs.system-test == 'true'
&& github.event.pull_request.head.repo.full_name == github.repository)
|| (github.event_name == 'workflow_dispatch' && inputs.run_system_tests == true)
)
permissions:
contents: read
packages: write
strategy:
fail-fast: true
max-parallel: 15
matrix:
distribution: [KWOK, Vanilla, K3s, VCluster, Talos]
provider: [Docker]
init: [true, false]
args:
- ""
- "--name system-test-cluster --cni Cilium --csi Enabled --load-balancer Enabled --metrics-server Enabled --policy-engine Kyverno --cert-manager Enabled --gitops-engine Flux"
- "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
- "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
- "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
exclude:
# init=false primarily validates CLI flag parsing — the provisioner/installer
# logic is the same as init=true. Keep only default and full-stack variants.
- distribution: Vanilla
provider: Docker
init: false
args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
- distribution: Vanilla
provider: Docker
init: false
args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
- distribution: Vanilla
provider: Docker
init: false
args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
- distribution: K3s
provider: Docker
init: false
args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
- distribution: K3s
provider: Docker
init: false
args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
- distribution: K3s
provider: Docker
init: false
args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
- distribution: Talos
provider: Docker
init: false
args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
- distribution: Talos
provider: Docker
init: false
args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
- distribution: Talos
provider: Docker
init: false
args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
- distribution: VCluster
provider: Docker
init: false
args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
- distribution: VCluster
provider: Docker
init: false
args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
- distribution: VCluster
provider: Docker
init: false
args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
# KWOK: init=false primarily validates CLI flag parsing (same as other distributions).
# Additionally, KWOK suppresses Flux so workload push cannot discover a registry
# via FluxInstance, and init=false provides no ksail.yaml fallback.
# Exclude all non-default init=false args for KWOK, matching other distributions.
- distribution: KWOK
provider: Docker
init: false
args: "--name system-test-cluster --cni Cilium --csi Enabled --load-balancer Enabled --metrics-server Enabled --policy-engine Kyverno --cert-manager Enabled --gitops-engine Flux"
- distribution: KWOK
provider: Docker
init: false
args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
- distribution: KWOK
provider: Docker
init: false
args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
- distribution: KWOK
provider: Docker
init: false
args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
include:
- distribution: Talos
provider: Docker
init: true
args: "--name system-test-cluster-with-image-verification --image-verification Enabled"
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: 🧹 Free disk space
# Targeted `rm -rf` of large hosted-tooling directories that the system
# tests do not need. Avoids the slow `apt-get remove` path used by
# endersonmenezes/free-disk-space, which previously consumed 3–9 min per
# job (with apt autoremove pulling in update-initramfs / man-db rebuilds).
# Each directory removal is best-effort: missing paths are not an error.
run: |
set -euo pipefail
before=$(df --output=avail -BG / | tail -1 | tr -dc '0-9')
# Largest offenders on ubuntu-latest (sizes from
# https://github.com/actions/runner-images). Order by descending size.
sudo rm -rf \
/usr/local/lib/android \
/usr/share/dotnet \
/opt/ghc \
/usr/local/share/boost \
/usr/share/swift \
/opt/hostedtoolcache/CodeQL \
/opt/hostedtoolcache/PyPy \
/opt/hostedtoolcache/Ruby \
/opt/hostedtoolcache/Python \
/usr/local/share/chromium \
/usr/local/share/powershell \
/usr/local/julia* \
/usr/local/aws-cli \
/usr/local/aws-sam-cli \
/usr/share/gradle* \
/usr/share/az* \
/usr/share/miniconda || true
docker system prune -af --volumes || true
after=$(df --output=avail -BG / | tail -1 | tr -dc '0-9')
echo "Free disk space on /: ${before}G -> ${after}G (gained $((after - before))G)"
- name: 🔐 Login to Docker Hub
if: ${{ vars.DOCKERHUB_USERNAME != '' }}
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ vars.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }} # zizmor: ignore[secrets-outside-env] - third-party action input, no environment scoping available
- name: ⚙️ Setup Go
id: setup-go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod
cache: false
- name: 📦 Cache KSail Binary
uses: ./.github/actions/cache-ksail-binary
with:
go-version: ${{ steps.setup-go.outputs.go-version }}
source-hash: ${{ hashFiles('go.mod', 'go.sum', '**/*.go') }}
output-path: /usr/local/bin/ksail
save: "false"
- name: 📥 Restore Helm Cache
uses: ./.github/actions/restore-helm-cache
- name: 📥 Restore Mirror Cache
uses: ./.github/actions/restore-mirror-cache
- name: 🧪 Run KSail System Test
uses: ./.github/actions/ksail-system-test
with:
distribution: ${{ matrix.distribution }}
provider: ${{ matrix.provider }}
init: ${{ matrix.init }}
args: ${{ matrix.args }}
apply-overlay-path: ".github/fixtures/podinfo-overlay"
ghcr-user: ${{ github.actor }}
ghcr-token: ${{ secrets.GITHUB_TOKEN }}
benchmark:
name: 📊 Benchmark
needs: [changes]
if: >-
(github.event_name == 'push' || github.event_name == 'pull_request')
&& needs.changes.outputs.benchmark == 'true'
runs-on: ubuntu-latest
timeout-minutes: 40
permissions:
contents: read
pull-requests: write
outputs:
skip: ${{ steps.discover.outputs.skip }}
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: ⚙️ Setup Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod
- name: 🔍 Discover benchmark packages
id: discover
shell: bash
run: |
set -euo pipefail
pkgs=$(grep -rl '^func Benchmark' --include='*_test.go' . \
| xargs -I{} dirname {} \
| sort -u || true)
if [ -z "$pkgs" ]; then
echo "skip=true" >> "$GITHUB_OUTPUT"
else
{
echo "skip=false"
echo "packages<<EOF"
echo "$pkgs"
echo "EOF"
} >> "$GITHUB_OUTPUT"
fi
- name: 🏃 Run benchmarks
if: steps.discover.outputs.skip == 'false'
shell: bash
env:
PACKAGES: ${{ steps.discover.outputs.packages }}
# Reduce -count on PRs (-count=3) to halve benchmark wall time while
# keeping enough samples for the awk averaging step in
# "Prepare benchmark regression gate input" to produce a stable signal.
# On push to main (which feeds benchmark-store / the historical chart),
# keep -count=5 for higher-quality baselines.
BENCH_COUNT: ${{ github.event_name == 'push' && '5' || '3' }}
run: |
set -euo pipefail
echo "$PACKAGES" \
| xargs go test -bench=. -benchmem -run='^$' -count="${BENCH_COUNT}" -timeout=30m \
| tee "$RUNNER_TEMP/bench.txt"
- name: 🔍 Prepare benchmark regression gate input
if: steps.discover.outputs.skip == 'false'
shell: bash
run: |
set -euo pipefail
# Filter excluded benchmarks and average -count=N duplicate samples into
# a single representative value per benchmark.
#
# Excluded: BenchmarkCreateTarball_* — I/O-bound; timing is dominated by
# CI runner disk-cache state and can vary 4-5x between runs (see #4090).
# Excluded: benchmarks with ns/op < 100 — too fast for reliable measurement
# on shared runners with CPU clock jitter (see #3698).
#
# Averaging: go test -count=N emits N lines per benchmark. Without
# averaging, github-action-benchmark would compare each of the N lines
# individually against the single stored baseline, generating N alerts
# instead of one and skewing the history chart. Averaging collapses them
# to a single geometric-mean-like value for a clean 1:1 comparison.
#
# The original bench.txt is preserved for artifact upload and historical
# storage.
awk '
{ lines[NR] = $0 }
/^Benchmark/ && /ns\/op/ {
if ($1 ~ /^BenchmarkCreateTarball_/) next
for (i = 1; i <= NF; i++) {
if ($(i+1) == "ns/op" && $i + 0 < 100) next
}
name = $1; count[name]++; iters[name] += $2
for (i = 3; i <= NF; i++) {
if ($i == "ns/op") ns[name] += $(i-1)
if ($i == "B/op") bop[name] += $(i-1)
if ($i == "allocs/op") alloc[name] += $(i-1)
}
}
END {
for (i = 1; i <= NR; i++) {
line = lines[i]
if (line !~ /^Benchmark/ || line !~ /ns\/op/) { print line; continue }
split(line, f)
nm = f[1]
if (nm ~ /^BenchmarkCreateTarball_/) continue
skip = 0
for (j = 1; j < length(f); j++) {
if (f[j+1] == "ns/op" && f[j] + 0 < 100) { skip = 1; break }
}
if (skip) continue
if (!emitted[nm]) {
emitted[nm] = 1; n = count[nm]
out = nm "\t" int(iters[nm]/n) "\t" int(ns[nm]/n) " ns/op"
if (nm in bop) out = out " " int(bop[nm]/n) " B/op"
if (nm in alloc) out = out " " int(alloc[nm]/n) " allocs/op"
print out
}
}
}
' "$RUNNER_TEMP/bench.txt" > "$RUNNER_TEMP/bench-filtered.txt"
- name: 📊 Compare benchmark results
if: steps.discover.outputs.skip == 'false'
uses: benchmark-action/github-action-benchmark@a60cea5bc7b49e15c1f58f411161f99e0df48372 # v1.22.0
with:
tool: go
output-file-path: ${{ runner.temp }}/bench-filtered.txt
gh-pages-branch: benchmark-data
benchmark-data-dir-path: dev/bench
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: false
alert-threshold: "150%"
fail-threshold: ${{ github.event_name != 'pull_request' && '200%' || '' }}
fail-on-alert: false
comment-on-alert: ${{ github.event_name == 'pull_request' }}
summary-always: true
- name: 📤 Upload benchmark results
if: steps.discover.outputs.skip == 'false' && github.event_name == 'push'
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: bench-results
path: |
${{ runner.temp }}/bench.txt
${{ runner.temp }}/bench-filtered.txt
retention-days: 1
benchmark-store:
name: 📤 Store Benchmark Data
needs: [changes, benchmark]
if: >-
github.event_name == 'push'
&& needs.changes.outputs.benchmark == 'true'
&& needs.benchmark.outputs.skip == 'false'
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: write
steps:
- name: 📄 Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: 📥 Download benchmark results
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: bench-results
path: ${{ runner.temp }}
- name: 📊 Store benchmark data
uses: benchmark-action/github-action-benchmark@a60cea5bc7b49e15c1f58f411161f99e0df48372 # v1.22.0
with:
tool: go
output-file-path: ${{ runner.temp }}/bench-filtered.txt
gh-pages-branch: benchmark-data
benchmark-data-dir-path: dev/bench
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: true
fail-on-alert: false
comment-on-alert: false
summary-always: false
require-checks-in-pr:
name: CI - Required Checks
runs-on: ubuntu-latest
timeout-minutes: 5
needs:
[
rate-limit-gate,
changes,
ci-go,
wait-for-validate-go,
build-artifact,
generate,
auto-commit,
warm-helm-cache,
warm-mirror-cache,
system-test-docker,
audit-docs,
audit-vsce,
build-docs,
vscode-extension,
copilot-plugin,
benchmark,
benchmark-store,
]
if: ${{ always() }}
steps:
- uses: devantler-tech/actions/require-checks-in-pr@61ae89ec83b943bd3a486fdcaf91cb66fd6b8760 # feat/require-checks-in-pr
with:
job-results: >-
${{ needs.rate-limit-gate.result }}
${{ needs.changes.result }}
${{ needs.ci-go.result }}
${{ needs.wait-for-validate-go.result }}
${{ needs.build-artifact.result }}
${{ needs.generate.result }}
${{ needs.auto-commit.result }}
${{ needs.warm-helm-cache.result }}
${{ needs.warm-mirror-cache.result }}
${{ needs.system-test-docker.result }}
${{ needs.audit-docs.result }}
${{ needs.audit-vsce.result }}
${{ needs.build-docs.result }}
${{ needs.vscode-extension.result }}
${{ needs.copilot-plugin.result }}
${{ needs.benchmark.result }}
${{ needs.benchmark-store.result }}