fix: reuse existing cluster secrets when scaling Talos nodes during update #13402

Workflow file for this run

	name: CI - KSail

	on:
	push:
	branches: [main]
	pull_request:
	merge_group:
	workflow_dispatch:
	inputs:
	run_system_tests:
	description: "Run system tests"
	required: false
	type: boolean
	default: true

	env:
	# Use pipe (\|) instead of comma (,) so Go falls through to direct on ANY proxy
	# error (including 403), not just 404/410. Hardens CI against transient
	# proxy.golang.org outages. go.sum still verifies module integrity.
	GOPROXY: "https://proxy.golang.org\|direct"
	# Soft memory limit for Go build toolchain (compiler, linker). The KSail
	# binary links a large dependency tree and the Go linker can exceed the
	# default runner memory on cache-miss builds. Setting GOMEMLIMIT makes the
	# GC work harder to stay within budget, reducing peak RSS.
	GOMEMLIMIT: "6GiB"

	concurrency:
	group: "ci-ksail-${{ github.workflow }}-${{ github.ref }}"
	cancel-in-progress: true

	permissions: {}

	jobs:
	# cancel-stale-merge-queue — kept for future use when jobs are moved to merge_group
	# cancel-stale-merge-queue:
	# name: 🧹 Cancel Stale Merge Queue Runs
	# if: github.event_name == 'merge_group'
	# runs-on: ubuntu-latest
	# timeout-minutes: 2
	# permissions:
	# actions: write
	# steps:
	# - name: Cancel previous runs for same merge queue entry
	# uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
	# with:
	# script: \|
	# const ref = context.ref;
	# const match = ref.match(/^(refs\/heads\/gh-readonly-queue\/.+)-[0-9a-f]+$/);
	# if (!match) {
	# console.log('Not a merge queue ref, skipping');
	# return;
	# }
	# const stablePrefix = match[1];
	#
	# for (const status of ['in_progress', 'queued']) {
	# let page = 1;
	# while (true) {
	# const { data: { workflow_runs: runs } } = await github.rest.actions.listWorkflowRuns({
	# owner: context.repo.owner,
	# repo: context.repo.repo,
	# workflow_id: 'ci.yaml',
	# event: 'merge_group',
	# status,
	# per_page: 100,
	# page,
	# });
	#
	# if (runs.length === 0) break;
	#
	# for (const run of runs) {
	# if (run.run_number >= context.runNumber) continue;
	# const runRef = `refs/heads/${run.head_branch}`;
	# const runMatch = runRef.match(/^(refs\/heads\/gh-readonly-queue\/.+)-[0-9a-f]+$/);
	# if (runMatch && runMatch[1] === stablePrefix) {
	# console.log(`Cancelling stale run ${run.id} (${runRef})`);
	# try {
	# await github.rest.actions.cancelWorkflowRun({
	# owner: context.repo.owner,
	# repo: context.repo.repo,
	# run_id: run.id,
	# });
	# } catch (error) {
	# if (error && (error.status === 409 \|\| error.status === 422)) {
	# console.log(`Skipping run ${run.id}; it is no longer cancellable (${error.status})`);
	# continue;
	# }
	# throw error;
	# }
	# }
	# }
	#
	# if (runs.length < 100) break;
	# page += 1;
	# }
	# }

	# Wait for sufficient GitHub API rate limit before proceeding
	rate-limit-gate:
	name: ⏳ Rate Limit Gate
	if: github.event_name != 'merge_group'
	runs-on: ubuntu-latest
	timeout-minutes: 5
	permissions:
	contents: read
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- uses: ./.github/actions/rate-limit-gate

	# Detect which file categories changed to conditionally run jobs
	changes:
	name: 🔍 Detect Changes
	if: github.event_name != 'merge_group'
	needs: [rate-limit-gate]
	runs-on: ubuntu-latest
	timeout-minutes: 5
	permissions:
	contents: read
	pull-requests: read
	outputs:
	code: ${{ steps.filter.outputs.code }}
	benchmark: ${{ steps.filter.outputs.benchmark }}
	system-test: ${{ steps.filter.outputs.system-test }}
	schema: ${{ steps.filter.outputs.schema }}
	cli: ${{ steps.filter.outputs.cli }}
	vsce: ${{ steps.filter.outputs.vsce }}
	docs: ${{ steps.filter.outputs.docs }}
	docs-deps: ${{ steps.filter.outputs.docs-deps }}
	vsce-deps: ${{ steps.filter.outputs.vsce-deps }}
	copilot-plugin: ${{ steps.filter.outputs.copilot-plugin }}
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: 🔍 Filter paths
	uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
	id: filter
	with:
	filters: \|
	code:
	- '*/.go'
	- 'go.mod'
	- 'go.sum'
	- '.github/actions/**'
	benchmark:
	# Limit to packages that contain benchmarks plus module files.
	# Avoids paying ~30 min of benchmark cost on PRs that touch only
	# unrelated Go code. Keep this in sync with the package discovery
	# step in the `benchmark` job (`grep -rl '^func Benchmark'`).
	- 'pkg/apis/cluster/v1alpha1/*/.go'
	- 'pkg/cli/cmd/cipher/*/.go'
	- 'pkg/cli/cmd/cluster/*/.go'
	- 'pkg/client/argocd/*/.go'
	- 'pkg/client/docker/*/.go'
	- 'pkg/client/flux/*/.go'
	- 'pkg/client/helm/*/.go'
	- 'pkg/client/kubectl/*/.go'
	- 'pkg/client/kustomize/*/.go'
	- 'pkg/fsutil/configmanager/ksail/*/.go'
	- 'pkg/fsutil/marshaller/*/.go'
	- 'pkg/k8s/readiness/*/.go'
	- 'pkg/notify/*/.go'
	- 'pkg/svc/diff/*/.go'
	- 'pkg/svc/image/*/.go'
	- 'pkg/svc/registryresolver/*/.go'
	- 'go.mod'
	- 'go.sum'
	- '.github/workflows/ci.yaml'
	system-test:
	# NOTE: Do not use negative patterns (!) here.
	# dorny/paths-filter v4 uses picomatch where negated patterns
	# like '!*/_test.go' match ANY file that is not a test file
	# (including .mdx, .json, etc.), causing docs-only PRs to
	# falsely trigger system tests.
	# Use an explicit allowlist of directories whose changes truly
	# require system tests (cluster/workload code, providers,
	# provisioners, installers, client libraries). Chat, MCP,
	# cipher, toolgen, docs generators, and schema generators are
	# intentionally omitted.
	- 'main.go'
	- 'internal/buildmeta/**'
	- 'pkg/apis/**'
	- 'pkg/cli/annotations/**'
	- 'pkg/cli/cmd/cluster/**'
	- 'pkg/cli/cmd/workload/**'
	- 'pkg/cli/cmd/tenant/**'
	- 'pkg/cli/dockerutil/**'
	- 'pkg/cli/editor/**'
	- 'pkg/cli/flags/**'
	- 'pkg/cli/kubeconfig/**'
	- 'pkg/cli/kubeconfighook/**'
	- 'pkg/cli/lifecycle/**'
	- 'pkg/cli/setup/**'
	- 'pkg/cli/ui/asciiart/**'
	- 'pkg/cli/ui/confirm/**'
	- 'pkg/cli/ui/errorhandler/**'
	- 'pkg/cli/ui/picker/**'
	- 'pkg/client/**'
	- 'pkg/di/**'
	- 'pkg/envvar/**'
	- 'pkg/fsutil/**'
	- 'pkg/k8s/**'
	- 'pkg/notify/**'
	- 'pkg/runner/**'
	- 'pkg/svc/detector/**'
	- 'pkg/svc/diff/**'
	- 'pkg/svc/image/**'
	- 'pkg/svc/installer/**'
	- 'pkg/svc/provider/**'
	- 'pkg/svc/provisioner/**'
	- 'pkg/svc/registryresolver/**'
	- 'pkg/svc/state/**'
	- 'pkg/svc/tenant/**'
	- 'pkg/svc/versionresolver/**'
	- 'pkg/timer/**'
	- 'go.mod'
	- 'go.sum'
	- '.github/actions/**'
	- '.github/fixtures/**'
	- '.github/workflows/ci.yaml'
	schema:
	- 'pkg/apis/*/.go'
	- 'go.mod'
	- 'schemas/gen_schema.go'
	cli:
	- 'cmd/*/.go'
	- 'pkg/*/.go'
	- 'go.mod'
	- 'docs/gen_docs.go'
	- 'docs/gen_docs_prose.go'
	vsce:
	- 'vsce/**'
	- '.github/workflows/ci.yaml'
	docs:
	# Matches docs content (MDX, Astro, config) but not Go generators.
	# Avoid negative patterns — see system-test comment above.
	- 'docs/src/**'
	- 'docs/public/**'
	- 'docs/astro.config.*'
	- 'docs/tsconfig.json'
	- 'docs/package*.json'
	- 'docs/.npmrc'
	- '.github/workflows/ci.yaml'
	docs-deps:
	- 'docs/package*.json'
	vsce-deps:
	- 'vsce/package*.json'
	copilot-plugin:
	- 'copilot-plugin/**'
	- '.github/plugin/**'
	- '.claude-plugin/**'
	- '.github/workflows/ci.yaml'

	ci-go:
	name: ✅ Validate Go Project
	if: github.event_name == 'push' && github.ref == 'refs/heads/main'
	uses: devantler-tech/reusable-workflows/.github/workflows/validate-go-project.yaml@3b781dc75a8218d568ddbd4b92aee971bf9e835d # v3.1.3
	permissions:
	contents: write
	issues: write
	pull-requests: write
	secrets:
	CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
	APP_PRIVATE_KEY: ${{ secrets.APP_PRIVATE_KEY }}

	# Gate: wait for the org-wide Required Workflow "✅ Validate Go Project" to
	# finish before starting expensive system tests. On workflow_dispatch the
	# required workflow does not run, so this job is skipped.
	wait-for-validate-go:
	name: ⏳ Wait for Go Validation
	runs-on: ubuntu-latest
	timeout-minutes: 30
	needs: [changes]
	if: >-
	github.event_name == 'pull_request'
	&& needs.changes.outputs.system-test == 'true'
	&& github.event.pull_request.head.repo.full_name == github.repository
	permissions:
	checks: read
	steps:
	- name: Wait for ✅ Validate Go Project
	uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
	with:
	script: \|
	const sha = context.payload.pull_request.head.sha;
	const owner = context.repo.owner;
	const repo = context.repo.repo;
	const prefix = '✅ Validate Go Project';
	const maxWait = 25 * 60 * 1000; // 25 minutes
	const poll = 30 * 1000; // 30 seconds
	const start = Date.now();

	while (Date.now() - start < maxWait) {
	const { data: checks } = await github.rest.checks.listForRef({
	owner, repo, ref: sha, per_page: 100,
	});

	const relevant = checks.check_runs.filter(cr =>
	cr.name.startsWith(prefix)
	);

	if (relevant.length > 0) {
	const pending = relevant.filter(cr => cr.status !== 'completed');
	if (pending.length === 0) {
	const failed = relevant.filter(cr =>
	!['success', 'skipped', 'neutral'].includes(cr.conclusion)
	);
	if (failed.length > 0) {
	const names = failed.map(cr => `${cr.name}: ${cr.conclusion}`).join('\n');
	core.setFailed(`Go validation failed:\n${names}`);
	return;
	}
	core.info(`✅ All ${relevant.length} Go validation check(s) passed`);
	return;
	}
	core.info(`⏳ Waiting for ${pending.length}/${relevant.length} check(s)...`);
	} else {
	core.info('⏳ No Go validation checks found yet...');
	}

	await new Promise(r => setTimeout(r, poll));
	}

	// On timeout, succeed gracefully if no checks ever appeared
	// (required workflow may not be configured for this repo).
	const { data: final } = await github.rest.checks.listForRef({
	owner, repo, ref: sha, per_page: 100,
	});
	const remaining = final.check_runs.filter(cr =>
	cr.name.startsWith(prefix)
	);
	if (remaining.length === 0) {
	core.warning('No Go validation checks found — required workflow may not apply to this repo');
	return;
	}
	core.setFailed('Timed out waiting for Go validation checks to complete');

	build-artifact:
	name: 🏗️ Build KSail Binary
	runs-on: ubuntu-latest
	timeout-minutes: 20
	needs: [changes]
	if: github.event_name == 'workflow_dispatch' \|\| (github.event_name == 'pull_request' && (needs.changes.outputs.code == 'true' \|\| needs.changes.outputs.cli == 'true' \|\| needs.changes.outputs.system-test == 'true'))
	permissions:
	contents: read
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: ⚙️ Setup Go
	id: setup-go
	uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
	with:
	go-version-file: go.mod

	- name: 📦 Cache KSail Binary
	uses: ./.github/actions/cache-ksail-binary
	with:
	go-version: ${{ steps.setup-go.outputs.go-version }}
	source-hash: ${{ hashFiles('go.mod', 'go.sum', '*/.go') }}
	output-path: /usr/local/bin/ksail

	generate:
	name: 📝 Generate Schema & Docs
	runs-on: ubuntu-latest
	timeout-minutes: 25
	# Depend on build-artifact so the Go build cache is warm by the time
	# `go generate ./docs/...` (and ./schemas/...) re-compiles the cli import
	# graph. On events where build-artifact is skipped (push to main), this
	# falls through and the job still runs.
	needs: [changes, build-artifact]
	if: >-
	!cancelled()
	&& github.event_name != 'merge_group'
	&& (needs.changes.outputs.schema == 'true' \|\| needs.changes.outputs.cli == 'true')
	&& (needs.build-artifact.result == 'success' \|\| needs.build-artifact.result == 'skipped')
	permissions:
	contents: read
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false
	repository: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name \|\| github.repository }}
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}

	- name: ⚙️ Setup Go
	uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
	with:
	go-version-file: go.mod

	- name: 📝 Generate JSON schema
	if: needs.changes.outputs.schema == 'true'
	run: go generate ./schemas/...

	- name: 📚 Generate reference documentation
	if: needs.changes.outputs.cli == 'true'
	run: go generate ./docs/...

	- name: 📤 Upload patch
	run: \|
	git add -N .
	git diff > /tmp/generate.patch
	if [ -s /tmp/generate.patch ]; then
	echo "Changes detected"
	else
	echo "No changes"
	fi
	- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
	with:
	name: generate-patch
	path: /tmp/generate.patch
	if-no-files-found: ignore
	retention-days: 1

	- name: ❌ Fail if generated files are out of date (fork PR)
	if: >-
	github.event_name == 'pull_request'
	&& github.event.pull_request.head.repo.full_name != github.repository
	shell: bash
	run: \|
	if [ -s /tmp/generate.patch ]; then
	echo "::error::Generated files are out of date. Please run 'go generate ./schemas/...' and 'go generate ./docs/...' locally and commit the results."
	echo ""
	cat /tmp/generate.patch
	exit 1
	fi

	auto-commit:
	name: 📤 Auto-Commit Generated Changes
	runs-on: ubuntu-latest
	timeout-minutes: 15
	needs: [changes, generate]
	if: >-
	always()
	&& github.event_name != 'merge_group'
	&& (github.event_name != 'pull_request' \|\| github.event.pull_request.head.repo.full_name == github.repository)
	&& needs.generate.result == 'success'
	permissions:
	contents: write
	pull-requests: write
	steps:
	- name: 🔑 Generate GitHub App Token
	uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
	id: generate-token
	with:
	client-id: ${{ vars.APP_CLIENT_ID }}
	private-key: ${{ secrets.APP_PRIVATE_KEY }} # zizmor: ignore[secrets-outside-env] - GitHub App token generation, no environment scoping available for app installations

	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	ref: ${{ github.head_ref \|\| github.ref_name }}
	persist-credentials: true
	token: ${{ steps.generate-token.outputs.token }}

	- name: 📥 Download patches
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
	with:
	pattern: "*-patch"
	merge-multiple: true
	path: /tmp/patches

	- name: 📋 Apply patches
	run: \|
	set -euo pipefail
	shopt -s nullglob

	for patch in /tmp/patches/*.patch; do
	if [ -s "$patch" ]; then
	patch_name="$(basename "$patch")"
	echo "Applying ${patch_name}..."

	if git apply --3way "$patch"; then
	echo "Applied ${patch_name} with 3-way merge."
	else
	echo "3-way apply failed for ${patch_name}; falling back to direct apply with rejects..." >&2

	if git apply --reject "$patch"; then
	echo "Applied ${patch_name} with rejects."
	rej_files=$(find . -name '*.rej' 2>/dev/null \|\| true)
	if [ -n "$rej_files" ]; then
	echo "::error::Reject files found after applying ${patch_name}:"
	echo "${rej_files}"
	git status --short \|\| true
	echo "Failing job to avoid committing partially-applied patches." >&2
	exit 1
	fi
	else
	echo "::error::Failed to apply patch ${patch_name}. See diagnostics above." >&2
	git status --short \|\| true
	exit 1
	fi
	fi
	else
	echo "Skipping empty patch $(basename "$patch")"
	fi
	done

	- name: 📤 Commit and push generated changes (PR branch)
	if: github.event_name == 'pull_request'
	uses: stefanzweifel/git-auto-commit-action@04702edda442b2e678b25b537cec683a1493fcb9 # v7.1.0
	with:
	commit_message: "chore: sync modules and update generated files"
	commit_user_name: generator-bot
	commit_user_email: generator-bot@users.noreply.github.com
	branch: ${{ github.head_ref }}

	- name: 📤 Open PR for generated changes (protected branch)
	if: github.event_name != 'pull_request'
	uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
	with:
	token: ${{ steps.generate-token.outputs.token }}
	commit-message: "chore: sync modules and update generated files"
	committer: "generator-bot <generator-bot@users.noreply.github.com>"
	branch: chore/sync-generated-files/${{ github.ref_name }}
	base: ${{ github.ref_name }}
	title: "chore: sync modules and update generated files"
	body: \|
	Auto-generated changes detected on `${{ github.ref_name }}`.
	This PR updates generated files (schemas, docs, etc.) that were
	not included in the originating commit.
	delete-branch: true

	audit-docs:
	name: 🔍 Audit Docs Dependencies
	runs-on: ubuntu-latest
	timeout-minutes: 10
	needs: [changes]
	if: github.event_name != 'merge_group' && needs.changes.outputs.docs-deps == 'true'
	permissions:
	contents: write
	steps:
	- name: � Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: �🔍 NPM Audit and Fix
	uses: ./.github/actions/npm-audit-and-fix
	with:
	working-directory: docs
	client-id: ${{ vars.APP_CLIENT_ID }}
	app-private-key: ${{ secrets.APP_PRIVATE_KEY }} # zizmor: ignore[secrets-outside-env] - GitHub App token generation, no environment scoping available for app installations
	commit-message: "chore(docs): fix npm audit vulnerabilities"

	audit-vsce:
	name: 🔍 Audit VSCode Extension Dependencies
	runs-on: ubuntu-latest
	timeout-minutes: 10
	needs: [changes]
	if: github.event_name != 'merge_group' && needs.changes.outputs.vsce-deps == 'true'
	permissions:
	contents: write
	steps:
	- name: � Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: �🔍 NPM Audit and Fix
	uses: ./.github/actions/npm-audit-and-fix
	with:
	working-directory: vsce
	client-id: ${{ vars.APP_CLIENT_ID }}
	app-private-key: ${{ secrets.APP_PRIVATE_KEY }} # zizmor: ignore[secrets-outside-env] - GitHub App token generation, no environment scoping available for app installations
	commit-message: "chore(vsce): fix npm audit vulnerabilities"

	build-docs:
	name: 📚 Build Documentation
	runs-on: ubuntu-latest
	needs: [changes]
	if: github.event_name == 'pull_request' && needs.changes.outputs.docs == 'true'
	timeout-minutes: 10
	permissions:
	contents: read
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: ⚙️ Setup Node.js
	uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
	with:
	node-version: "24"
	cache: "npm"
	cache-dependency-path: docs/package-lock.json

	- name: 📦 Install dependencies
	working-directory: docs
	run: npm ci

	- name: 🏗️ Build with Astro
	working-directory: docs
	run: npm run build

	vscode-extension:
	name: 🧩 VSCode Extension
	runs-on: ubuntu-latest
	timeout-minutes: 15
	needs: [changes]
	if: github.event_name != 'merge_group' && needs.changes.outputs.vsce == 'true'
	permissions:
	contents: read
	defaults:
	run:
	working-directory: vsce
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: ⚙️ Setup Node.js
	uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
	with:
	node-version: "24"
	cache: "npm"
	cache-dependency-path: "vsce/package-lock.json"

	- name: 📦 Install dependencies
	run: npm ci

	- name: 🏗️ Compile
	run: npm run compile

	- name: 📦 Package extension
	run: npm run package

	- name: 📦 Create VSIX package
	if: github.event_name == 'pull_request' \|\| github.event_name == 'merge_group'
	run: npx @vscode/vsce package --out ksail.vsix

	- name: 📤 Upload VSIX artifact
	if: github.event_name == 'pull_request' \|\| github.event_name == 'merge_group'
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
	with:
	name: ksail-vsix
	path: ksail.vsix
	retention-days: 7

	copilot-plugin:
	name: 🧩 Copilot CLI Plugin
	runs-on: ubuntu-latest
	timeout-minutes: 5
	needs: [changes]
	if: github.event_name != 'merge_group' && needs.changes.outputs.copilot-plugin == 'true'
	permissions:
	contents: read
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: 🧪 Validate plugin manifests
	run: \|
	set -euo pipefail
	for f in \
	copilot-plugin/plugin.json \
	copilot-plugin/.claude-plugin/plugin.json \
	copilot-plugin/.mcp.json \
	.github/plugin/marketplace.json \
	.claude-plugin/marketplace.json; do
	echo "Validating $f"
	jq -e . "$f" >/dev/null
	done

	# Required fields on the Copilot CLI + Claude Code plugin manifests
	for f in copilot-plugin/plugin.json copilot-plugin/.claude-plugin/plugin.json; do
	jq -e '.name == "ksail" and .version' "$f" >/dev/null
	done

	# Each marketplace must reference the plugin directory
	for f in .github/plugin/marketplace.json .claude-plugin/marketplace.json; do
	jq -e '.plugins[0].source == "./copilot-plugin" and .plugins[0].name == "ksail"' "$f" >/dev/null
	done

	# Skill manifest must exist
	test -f copilot-plugin/skills/ksail/SKILL.md

	# MCP server must point at the ksail binary
	jq -e '.mcpServers.ksail.command == "ksail"' copilot-plugin/.mcp.json >/dev/null

	warm-helm-cache:
	name: 🔥 Warm Helm Cache
	runs-on: ubuntu-latest
	timeout-minutes: 20
	needs: [changes, build-artifact, wait-for-validate-go]
	if: >-
	!cancelled()
	&& (needs.wait-for-validate-go.result == 'success' \|\| needs.wait-for-validate-go.result == 'skipped')
	&& (
	(github.event_name == 'pull_request'
	&& needs.changes.outputs.system-test == 'true'
	&& github.event.pull_request.head.repo.full_name == github.repository)
	\|\| (github.event_name == 'workflow_dispatch' && inputs.run_system_tests == true)
	)
	permissions:
	contents: read
	outputs:
	cache-key: ${{ steps.warm.outputs.cache-key }}
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: ⚙️ Setup Go
	id: setup-go
	uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
	with:
	go-version-file: go.mod

	- name: 📦 Cache KSail Binary
	uses: ./.github/actions/cache-ksail-binary
	with:
	go-version: ${{ steps.setup-go.outputs.go-version }}
	source-hash: ${{ hashFiles('go.mod', 'go.sum', '*/.go') }}
	output-path: /usr/local/bin/ksail
	save: "false"

	- name: 🔥 Warm Helm chart cache
	id: warm
	uses: ./.github/actions/warm-helm-cache

	warm-mirror-cache:
	name: 🔥 Warm Mirror Cache
	runs-on: ubuntu-latest
	timeout-minutes: 30
	needs: [changes, build-artifact, wait-for-validate-go]
	if: >-
	!cancelled()
	&& (needs.wait-for-validate-go.result == 'success' \|\| needs.wait-for-validate-go.result == 'skipped')
	&& (
	(github.event_name == 'pull_request'
	&& needs.changes.outputs.system-test == 'true'
	&& github.event.pull_request.head.repo.full_name == github.repository)
	\|\| (github.event_name == 'workflow_dispatch' && inputs.run_system_tests == true)
	)
	permissions:
	contents: read
	outputs:
	cache-key: ${{ steps.warm.outputs.cache-key }}
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: ⚙️ Setup Go
	id: setup-go
	uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
	with:
	go-version-file: go.mod

	- name: 📦 Cache KSail Binary
	uses: ./.github/actions/cache-ksail-binary
	with:
	go-version: ${{ steps.setup-go.outputs.go-version }}
	source-hash: ${{ hashFiles('go.mod', 'go.sum', '*/.go') }}
	output-path: /usr/local/bin/ksail
	save: "false"

	- name: 🔥 Warm mirror cache
	id: warm
	uses: ./.github/actions/warm-mirror-cache

	# Docker-based system tests run in full parallelism with no concurrency
	# constraints — each job gets its own runner with local Docker containers.
	system-test-docker:
	name: 🧪 System Test (Docker)
	runs-on: ubuntu-latest
	timeout-minutes: 75
	needs:
	[
	changes,
	build-artifact,
	warm-helm-cache,
	warm-mirror-cache,
	wait-for-validate-go,
	]
	if: >-
	!cancelled()
	&& (needs.wait-for-validate-go.result == 'success' \|\| needs.wait-for-validate-go.result == 'skipped')
	&& (
	(github.event_name == 'pull_request'
	&& needs.changes.outputs.system-test == 'true'
	&& github.event.pull_request.head.repo.full_name == github.repository)
	\|\| (github.event_name == 'workflow_dispatch' && inputs.run_system_tests == true)
	)
	permissions:
	contents: read
	packages: write
	strategy:
	fail-fast: true
	max-parallel: 15
	matrix:
	distribution: [KWOK, Vanilla, K3s, VCluster, Talos]
	provider: [Docker]
	init: [true, false]
	args:
	- ""
	- "--name system-test-cluster --cni Cilium --csi Enabled --load-balancer Enabled --metrics-server Enabled --policy-engine Kyverno --cert-manager Enabled --gitops-engine Flux"
	- "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
	- "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	- "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	exclude:
	# init=false primarily validates CLI flag parsing — the provisioner/installer
	# logic is the same as init=true. Keep only default and full-stack variants.
	- distribution: Vanilla
	provider: Docker
	init: false
	args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
	- distribution: Vanilla
	provider: Docker
	init: false
	args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	- distribution: Vanilla
	provider: Docker
	init: false
	args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	- distribution: K3s
	provider: Docker
	init: false
	args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
	- distribution: K3s
	provider: Docker
	init: false
	args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	- distribution: K3s
	provider: Docker
	init: false
	args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	- distribution: Talos
	provider: Docker
	init: false
	args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
	- distribution: Talos
	provider: Docker
	init: false
	args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	- distribution: Talos
	provider: Docker
	init: false
	args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	- distribution: VCluster
	provider: Docker
	init: false
	args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
	- distribution: VCluster
	provider: Docker
	init: false
	args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	- distribution: VCluster
	provider: Docker
	init: false
	args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	# KWOK: init=false primarily validates CLI flag parsing (same as other distributions).
	# Additionally, KWOK suppresses Flux so workload push cannot discover a registry
	# via FluxInstance, and init=false provides no ksail.yaml fallback.
	# Exclude all non-default init=false args for KWOK, matching other distributions.
	- distribution: KWOK
	provider: Docker
	init: false
	args: "--name system-test-cluster --cni Cilium --csi Enabled --load-balancer Enabled --metrics-server Enabled --policy-engine Kyverno --cert-manager Enabled --gitops-engine Flux"
	- distribution: KWOK
	provider: Docker
	init: false
	args: "--cni Calico --csi Disabled --load-balancer Disabled --metrics-server Disabled --policy-engine Gatekeeper --gitops-engine ArgoCD"
	- distribution: KWOK
	provider: Docker
	init: false
	args: "--gitops-engine Flux --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	- distribution: KWOK
	provider: Docker
	init: false
	args: "--gitops-engine ArgoCD --local-registry ghcr.io/devantler-tech/ksail/system-test-manifests"
	include:
	- distribution: Talos
	provider: Docker
	init: true
	args: "--name system-test-cluster-with-image-verification --image-verification Enabled"
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: 🧹 Free disk space
	# Targeted `rm -rf` of large hosted-tooling directories that the system
	# tests do not need. Avoids the slow `apt-get remove` path used by
	# endersonmenezes/free-disk-space, which previously consumed 3–9 min per
	# job (with apt autoremove pulling in update-initramfs / man-db rebuilds).
	# Each directory removal is best-effort: missing paths are not an error.
	run: \|
	set -euo pipefail
	before=$(df --output=avail -BG / \| tail -1 \| tr -dc '0-9')
	# Largest offenders on ubuntu-latest (sizes from
	# https://github.com/actions/runner-images). Order by descending size.
	sudo rm -rf \
	/usr/local/lib/android \
	/usr/share/dotnet \
	/opt/ghc \
	/usr/local/share/boost \
	/usr/share/swift \
	/opt/hostedtoolcache/CodeQL \
	/opt/hostedtoolcache/PyPy \
	/opt/hostedtoolcache/Ruby \
	/opt/hostedtoolcache/Python \
	/usr/local/share/chromium \
	/usr/local/share/powershell \
	/usr/local/julia* \
	/usr/local/aws-cli \
	/usr/local/aws-sam-cli \
	/usr/share/gradle* \
	/usr/share/az* \
	/usr/share/miniconda \|\| true
	docker system prune -af --volumes \|\| true
	after=$(df --output=avail -BG / \| tail -1 \| tr -dc '0-9')
	echo "Free disk space on /: ${before}G -> ${after}G (gained $((after - before))G)"

	- name: 🔐 Login to Docker Hub
	if: ${{ vars.DOCKERHUB_USERNAME != '' }}
	uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
	with:
	username: ${{ vars.DOCKERHUB_USERNAME }}
	password: ${{ secrets.DOCKERHUB_TOKEN }} # zizmor: ignore[secrets-outside-env] - third-party action input, no environment scoping available

	- name: ⚙️ Setup Go
	id: setup-go
	uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
	with:
	go-version-file: go.mod
	cache: false

	- name: 📦 Cache KSail Binary
	uses: ./.github/actions/cache-ksail-binary
	with:
	go-version: ${{ steps.setup-go.outputs.go-version }}
	source-hash: ${{ hashFiles('go.mod', 'go.sum', '*/.go') }}
	output-path: /usr/local/bin/ksail
	save: "false"

	- name: 📥 Restore Helm Cache
	uses: ./.github/actions/restore-helm-cache

	- name: 📥 Restore Mirror Cache
	uses: ./.github/actions/restore-mirror-cache

	- name: 🧪 Run KSail System Test
	uses: ./.github/actions/ksail-system-test
	with:
	distribution: ${{ matrix.distribution }}
	provider: ${{ matrix.provider }}
	init: ${{ matrix.init }}
	args: ${{ matrix.args }}
	apply-overlay-path: ".github/fixtures/podinfo-overlay"

	ghcr-user: ${{ github.actor }}
	ghcr-token: ${{ secrets.GITHUB_TOKEN }}

	benchmark:
	name: 📊 Benchmark
	needs: [changes]
	if: >-
	(github.event_name == 'push' \|\| github.event_name == 'pull_request')
	&& needs.changes.outputs.benchmark == 'true'
	runs-on: ubuntu-latest
	timeout-minutes: 40
	permissions:
	contents: read
	pull-requests: write
	outputs:
	skip: ${{ steps.discover.outputs.skip }}
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: ⚙️ Setup Go
	uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
	with:
	go-version-file: go.mod

	- name: 🔍 Discover benchmark packages
	id: discover
	shell: bash
	run: \|
	set -euo pipefail
	pkgs=$(grep -rl '^func Benchmark' --include='*_test.go' . \
	\| xargs -I{} dirname {} \
	\| sort -u \|\| true)
	if [ -z "$pkgs" ]; then
	echo "skip=true" >> "$GITHUB_OUTPUT"
	else
	{
	echo "skip=false"
	echo "packages<<EOF"
	echo "$pkgs"
	echo "EOF"
	} >> "$GITHUB_OUTPUT"
	fi

	- name: 🏃 Run benchmarks
	if: steps.discover.outputs.skip == 'false'
	shell: bash
	env:
	PACKAGES: ${{ steps.discover.outputs.packages }}
	# Reduce -count on PRs (-count=3) to halve benchmark wall time while
	# keeping enough samples for the awk averaging step in
	# "Prepare benchmark regression gate input" to produce a stable signal.
	# On push to main (which feeds benchmark-store / the historical chart),
	# keep -count=5 for higher-quality baselines.
	BENCH_COUNT: ${{ github.event_name == 'push' && '5' \|\| '3' }}
	run: \|
	set -euo pipefail
	echo "$PACKAGES" \
	\| xargs go test -bench=. -benchmem -run='^$' -count="${BENCH_COUNT}" -timeout=30m \
	\| tee "$RUNNER_TEMP/bench.txt"

	- name: 🔍 Prepare benchmark regression gate input
	if: steps.discover.outputs.skip == 'false'
	shell: bash
	run: \|
	set -euo pipefail
	# Filter excluded benchmarks and average -count=N duplicate samples into
	# a single representative value per benchmark.
	#
	# Excluded: BenchmarkCreateTarball_* — I/O-bound; timing is dominated by
	# CI runner disk-cache state and can vary 4-5x between runs (see #4090).
	# Excluded: benchmarks with ns/op < 100 — too fast for reliable measurement
	# on shared runners with CPU clock jitter (see #3698).
	#
	# Averaging: go test -count=N emits N lines per benchmark. Without
	# averaging, github-action-benchmark would compare each of the N lines
	# individually against the single stored baseline, generating N alerts
	# instead of one and skewing the history chart. Averaging collapses them
	# to a single geometric-mean-like value for a clean 1:1 comparison.
	#
	# The original bench.txt is preserved for artifact upload and historical
	# storage.
	awk '
	{ lines[NR] = $0 }
	/^Benchmark/ && /ns\/op/ {
	if ($1 ~ /^BenchmarkCreateTarball_/) next
	for (i = 1; i <= NF; i++) {
	if ($(i+1) == "ns/op" && $i + 0 < 100) next
	}
	name = $1; count[name]++; iters[name] += $2
	for (i = 3; i <= NF; i++) {
	if ($i == "ns/op") ns[name] += $(i-1)
	if ($i == "B/op") bop[name] += $(i-1)
	if ($i == "allocs/op") alloc[name] += $(i-1)
	}
	}
	END {
	for (i = 1; i <= NR; i++) {
	line = lines[i]
	if (line !~ /^Benchmark/ \|\| line !~ /ns\/op/) { print line; continue }
	split(line, f)
	nm = f[1]
	if (nm ~ /^BenchmarkCreateTarball_/) continue
	skip = 0
	for (j = 1; j < length(f); j++) {
	if (f[j+1] == "ns/op" && f[j] + 0 < 100) { skip = 1; break }
	}
	if (skip) continue
	if (!emitted[nm]) {
	emitted[nm] = 1; n = count[nm]
	out = nm "\t" int(iters[nm]/n) "\t" int(ns[nm]/n) " ns/op"
	if (nm in bop) out = out " " int(bop[nm]/n) " B/op"
	if (nm in alloc) out = out " " int(alloc[nm]/n) " allocs/op"
	print out
	}
	}
	}
	' "$RUNNER_TEMP/bench.txt" > "$RUNNER_TEMP/bench-filtered.txt"

	- name: 📊 Compare benchmark results
	if: steps.discover.outputs.skip == 'false'
	uses: benchmark-action/github-action-benchmark@a60cea5bc7b49e15c1f58f411161f99e0df48372 # v1.22.0
	with:
	tool: go
	output-file-path: ${{ runner.temp }}/bench-filtered.txt
	gh-pages-branch: benchmark-data
	benchmark-data-dir-path: dev/bench
	github-token: ${{ secrets.GITHUB_TOKEN }}
	auto-push: false
	alert-threshold: "150%"
	fail-threshold: ${{ github.event_name != 'pull_request' && '200%' \|\| '' }}
	fail-on-alert: false
	comment-on-alert: ${{ github.event_name == 'pull_request' }}
	summary-always: true

	- name: 📤 Upload benchmark results
	if: steps.discover.outputs.skip == 'false' && github.event_name == 'push'
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
	with:
	name: bench-results
	path: \|
	${{ runner.temp }}/bench.txt
	${{ runner.temp }}/bench-filtered.txt
	retention-days: 1

	benchmark-store:
	name: 📤 Store Benchmark Data
	needs: [changes, benchmark]
	if: >-
	github.event_name == 'push'
	&& needs.changes.outputs.benchmark == 'true'
	&& needs.benchmark.outputs.skip == 'false'
	runs-on: ubuntu-latest
	timeout-minutes: 10
	permissions:
	contents: write
	steps:
	- name: 📄 Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: 📥 Download benchmark results
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
	with:
	name: bench-results
	path: ${{ runner.temp }}

	- name: 📊 Store benchmark data
	uses: benchmark-action/github-action-benchmark@a60cea5bc7b49e15c1f58f411161f99e0df48372 # v1.22.0
	with:
	tool: go
	output-file-path: ${{ runner.temp }}/bench-filtered.txt
	gh-pages-branch: benchmark-data
	benchmark-data-dir-path: dev/bench
	github-token: ${{ secrets.GITHUB_TOKEN }}
	auto-push: true
	fail-on-alert: false
	comment-on-alert: false
	summary-always: false

	require-checks-in-pr:
	name: CI - Required Checks
	runs-on: ubuntu-latest
	timeout-minutes: 5
	needs:
	[
	rate-limit-gate,
	changes,
	ci-go,
	wait-for-validate-go,
	build-artifact,
	generate,
	auto-commit,
	warm-helm-cache,
	warm-mirror-cache,
	system-test-docker,
	audit-docs,
	audit-vsce,
	build-docs,
	vscode-extension,
	copilot-plugin,
	benchmark,
	benchmark-store,
	]
	if: ${{ always() }}
	steps:
	- uses: devantler-tech/actions/require-checks-in-pr@61ae89ec83b943bd3a486fdcaf91cb66fd6b8760 # feat/require-checks-in-pr
	with:
	job-results: >-
	${{ needs.rate-limit-gate.result }}
	${{ needs.changes.result }}
	${{ needs.ci-go.result }}
	${{ needs.wait-for-validate-go.result }}
	${{ needs.build-artifact.result }}
	${{ needs.generate.result }}
	${{ needs.auto-commit.result }}
	${{ needs.warm-helm-cache.result }}
	${{ needs.warm-mirror-cache.result }}
	${{ needs.system-test-docker.result }}
	${{ needs.audit-docs.result }}
	${{ needs.audit-vsce.result }}
	${{ needs.build-docs.result }}
	${{ needs.vscode-extension.result }}
	${{ needs.copilot-plugin.result }}
	${{ needs.benchmark.result }}
	${{ needs.benchmark-store.result }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

fix: reuse existing cluster secrets when scaling Talos nodes during update #13402

Workflow file

fix: reuse existing cluster secrets when scaling Talos nodes during update #13402

Uh oh!

Workflow file for this run