Release Gate #114
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Release Gate | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| backend_tag: | |
| description: Backend image tag to test | |
| required: true | |
| type: string | |
| web_tag: | |
| description: Web image tag to test | |
| required: false | |
| type: string | |
| default: 'latest' | |
| test_suite: | |
| description: Test suite to run | |
| required: false | |
| type: choice | |
| options: | |
| - all | |
| - formats | |
| - repos | |
| - promotion | |
| - rbac | |
| - lifecycle | |
| - webhooks | |
| - search | |
| - platform | |
| - auth | |
| - stress | |
| - resilience | |
| - mesh | |
| - security | |
| - compatibility | |
| - pullthrough | |
| default: 'all' | |
| skip_teardown: | |
| description: Skip teardown (for debugging) | |
| required: false | |
| type: boolean | |
| default: false | |
| iac_ref: | |
| description: artifact-keeper-iac git ref for the Helm chart (default main) | |
| required: false | |
| type: string | |
| default: 'main' | |
| run_smoke_with_deps: | |
| description: | | |
| Run the clean-install-smoke-with-deps variant (issue #53). | |
| Disabled by default because enabling Trivy/DT/edge/openSCAP | |
| can exceed the standard ARC runner namespace's 4 CPU / 8 Gi | |
| quota. Set to true once a beefier runner pool is wired. | |
| required: false | |
| type: boolean | |
| default: false | |
| workflow_call: | |
| inputs: | |
| backend_tag: | |
| required: true | |
| type: string | |
| web_tag: | |
| required: false | |
| type: string | |
| default: 'latest' | |
| test_suite: | |
| required: false | |
| type: string | |
| default: 'all' | |
| skip_teardown: | |
| required: false | |
| type: boolean | |
| default: false | |
| iac_ref: | |
| required: false | |
| type: string | |
| default: 'main' | |
| run_smoke_with_deps: | |
| required: false | |
| type: boolean | |
| default: false | |
| env: | |
| NAMESPACE_CPU: ${{ vars.TEST_NAMESPACE_CPU || '4000m' }} | |
| NAMESPACE_MEMORY: ${{ vars.TEST_NAMESPACE_MEMORY || '8Gi' }} | |
| jobs: | |
| # ------------------------------------------------------------------- | |
| # Version-set integrity check (issue #63) | |
| # | |
| # Runs FIRST, before any namespace is provisioned. Verifies that every | |
| # container image referenced by the release set actually exists at the | |
| # tag the chart references. This catches the structural failure mode | |
| # behind artifact-keeper#872 (customer-flagged: "current main Helm | |
| # chart only works with main backend and frontend, not a tagged | |
| # release"), artifact-keeper#905 (versioned tags missing on ghcr.io), | |
| # and artifact-keeper-web#320 (v1.1.8 web image never published). | |
| # | |
| # A green release-gate today says "the test cluster works"; this job | |
| # turns that into "every image referenced by the release set actually | |
| # exists at that tag." Failing here is a release-blocker that should | |
| # NEVER be soft-failed: a missing tag is a publish-pipeline regression, | |
| # not a flake. | |
| # ------------------------------------------------------------------- | |
| version-set-integrity: | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 5 | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install Helm (for chart-default verification) | |
| uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 | |
| - name: Clone iac chart for default-tag verification | |
| env: | |
| IAC_REF: ${{ inputs.iac_ref || 'main' }} | |
| run: | | |
| # Clone the iac repo into a sibling directory of the test | |
| # checkout so verify-image-set.sh can render the chart with | |
| # no overrides and compare default image tags. This is the | |
| # #872 customer-pain shape: chart on a tag, images on a | |
| # different tag, no --set bridging them. | |
| git clone --depth 1 --branch "${IAC_REF}" \ | |
| https://github.com/artifact-keeper/artifact-keeper-iac.git \ | |
| "${RUNNER_TEMP}/iac" | |
| - name: Verify backend / web / openscap image tags exist on ghcr.io | |
| env: | |
| BACKEND_TAG: ${{ inputs.backend_tag }} | |
| WEB_TAG: ${{ inputs.web_tag }} | |
| # openscap is published in lockstep with backend on the same | |
| # tag. When that lockstep breaks (#872 customer pain), we want | |
| # to know BEFORE we try to deploy. | |
| OPENSCAP_TAG: ${{ inputs.backend_tag }} | |
| run: | | |
| chmod +x tests/release-gate/verify-image-set.sh | |
| ./tests/release-gate/verify-image-set.sh \ | |
| --backend-tag "${BACKEND_TAG}" \ | |
| --web-tag "${WEB_TAG}" \ | |
| --openscap-tag "${OPENSCAP_TAG}" \ | |
| --chart-dir "${RUNNER_TEMP}/iac/charts/artifact-keeper" | |
| - name: Upload version-set diagnostics | |
| if: failure() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: version-set-integrity-logs | |
| path: /tmp/version-set-*.log | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Clean-install smoke test | |
| # | |
| # Boots a fresh namespace, runs `helm install` against the documented | |
| # values-production.yaml (with overrides for deps the smoke can't | |
| # satisfy), waits for backend AND web Deployments to reach Ready, then | |
| # probes /readyz from inside the cluster. Catches startup panics (e.g. | |
| # the v1.1.8 Debian route panic) that crash the backend before it can | |
| # serve traffic. | |
| # | |
| # The `deploy` job (and therefore the entire test matrix downstream) | |
| # `needs:` this gate. A startup-broken release fails fast here without | |
| # burning runner time on the matrix. | |
| # ------------------------------------------------------------------- | |
| clean-install-smoke: | |
| needs: version-set-integrity | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 12 | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@829323503d1be3d00ca8346e5391ca0b07a9ab0d # v5.1.0 | |
| - name: Install Helm | |
| uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 | |
| - name: Run clean-install smoke test | |
| env: | |
| BACKEND_TAG: ${{ inputs.backend_tag }} | |
| WEB_TAG: ${{ inputs.web_tag }} | |
| # Pin iac chart ref so the gate validates against the chart | |
| # version that ships with the release. Defaults to `main` when | |
| # the workflow input is unset; release pipelines should pass | |
| # the corresponding iac tag. | |
| IAC_REF: ${{ inputs.iac_ref || 'main' }} | |
| # Pull-secret for ghcr.io. Without this, private image tags | |
| # fail with ImagePullBackOff and the gate fails for the wrong | |
| # reason. Workflows that test public-only tags can omit it. | |
| GHCR_DOCKER_CONFIG: ${{ secrets.GHCR_DOCKER_CONFIG }} | |
| run: | | |
| chmod +x scripts/clean-install-smoke.sh | |
| # github.run_id + github.run_attempt is unique per workflow | |
| # attempt (re-runs increment run_attempt). Avoids RUN_ID | |
| # collisions when a job is retried. | |
| RUN_ID="${{ github.run_id }}-${{ github.run_attempt }}" | |
| ./scripts/clean-install-smoke.sh \ | |
| --run-id "${RUN_ID}" \ | |
| --backend-tag "${BACKEND_TAG}" \ | |
| --web-tag "${WEB_TAG}" \ | |
| --iac-ref "${IAC_REF}" \ | |
| --timeout 300 | |
| - name: Upload smoke diagnostics | |
| if: failure() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: clean-install-smoke-logs | |
| path: /tmp/test-logs/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Clean-install smoke WITH dependencies (issue #53) | |
| # | |
| # The basic `clean-install-smoke` above disables Trivy, Dependency- | |
| # Track, edge replication, ingress, and openSCAP to keep the smoke | |
| # under the runner's memory budget. That leaves a real coverage gap: | |
| # chart wiring regressions in those subsystems pass the gate. A | |
| # v1.1.8-class regression that broke ONLY Trivy or Dependency-Track | |
| # wiring would NOT be caught by the basic smoke. | |
| # | |
| # This job runs the same smoke flow against a values overlay that | |
| # enables every optional subsystem and asserts each one reaches a | |
| # healthy state. | |
| # | |
| # CURRENTLY DISABLED with `if: false`. Enabling all subsystems can | |
| # exceed the ARC runner namespace's 4 CPU / 8 Gi quota: | |
| # - Trivy: 1 CPU / 2 Gi limit | |
| # - DependencyTrack: 2 CPU / 4 Gi limit | |
| # - Edge: 500m / 512 Mi limit | |
| # - OpenSCAP: 500m / 1 Gi limit | |
| # - Backend: 2 CPU / 2 Gi limit | |
| # - Web/Postgres/OpenSearch: ~1 CPU / ~2 Gi combined | |
| # Total: roughly 7 CPU / 12 Gi limits, which can OOM the namespace. | |
| # | |
| # To enable: bump the namespace quota OR move this job to a beefier | |
| # runner pool (e.g. `ak-beefy-runners`) and flip the `if:` here. | |
| # Tracked under #53. | |
| # ------------------------------------------------------------------- | |
| clean-install-smoke-with-deps: | |
| needs: clean-install-smoke | |
| # TODO(#53): enable by default once a runner with >= 8 CPU / 16 Gi | |
| # is available in the ARC pool. The `run_smoke_with_deps` input | |
| # (defined in workflow_dispatch.inputs and workflow_call.inputs at | |
| # the top of this file) defaults to false, so the job is wired | |
| # and validated by actionlint but does not execute unless the | |
| # dispatching workflow explicitly opts in. The dispatching | |
| # workflow can override to true when running against a beefier | |
| # runner pool. | |
| if: ${{ inputs.run_smoke_with_deps == true }} | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 20 | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@829323503d1be3d00ca8346e5391ca0b07a9ab0d # v5.1.0 | |
| - name: Install Helm | |
| uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 | |
| - name: Run clean-install-smoke with all deps enabled | |
| env: | |
| BACKEND_TAG: ${{ inputs.backend_tag }} | |
| WEB_TAG: ${{ inputs.web_tag }} | |
| IAC_REF: ${{ inputs.iac_ref || 'main' }} | |
| GHCR_DOCKER_CONFIG: ${{ secrets.GHCR_DOCKER_CONFIG }} | |
| run: | | |
| chmod +x tests/release-gate/clean-install-smoke-with-deps.sh | |
| RUN_ID="${{ github.run_id }}-${{ github.run_attempt }}-deps" | |
| ./tests/release-gate/clean-install-smoke-with-deps.sh \ | |
| --run-id "${RUN_ID}" \ | |
| --backend-tag "${BACKEND_TAG}" \ | |
| --web-tag "${WEB_TAG}" \ | |
| --iac-ref "${IAC_REF}" \ | |
| --timeout 600 | |
| - name: Upload smoke-with-deps diagnostics | |
| if: failure() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: clean-install-smoke-with-deps-logs | |
| path: /tmp/test-logs/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Chart upgrade smoke (issue #54) | |
| # | |
| # `clean-install-smoke` catches startup panics on a fresh install. | |
| # It does NOT catch: | |
| # - Migration-on-upgrade failures (schema change that fails to | |
| # apply when upgrading prev -> current) | |
| # - Chart-template breakage that only manifests on `helm upgrade` | |
| # (immutable field changes, StatefulSet rollout deadlocks) | |
| # - Resources that get re-created instead of preserved across | |
| # upgrades | |
| # | |
| # The script installs the previous stable release tag, pushes a | |
| # small artifact through the management API to establish state, | |
| # runs `helm upgrade` to the current backend image, then asserts | |
| # the artifact is still retrievable and `/readyz` returns 200. | |
| # | |
| # PREVIOUS_TAG: hardcoded today. Update on each release. Once | |
| # release tooling can introspect the previous tag automatically, | |
| # this can be derived from `gh release list` in a setup step. | |
| # ------------------------------------------------------------------- | |
| chart-upgrade-smoke: | |
| needs: clean-install-smoke | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 25 | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@829323503d1be3d00ca8346e5391ca0b07a9ab0d # v5.1.0 | |
| - name: Install Helm | |
| uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 | |
| - name: Run chart-upgrade-smoke | |
| env: | |
| BACKEND_TAG: ${{ inputs.backend_tag }} | |
| WEB_TAG: ${{ inputs.web_tag }} | |
| IAC_REF: ${{ inputs.iac_ref || 'main' }} | |
| GHCR_DOCKER_CONFIG: ${{ secrets.GHCR_DOCKER_CONFIG }} | |
| # PREVIOUS_TAG: the previously-released stable tag whose chart | |
| # the upgrade originates from. UPDATE THIS ON EACH RELEASE. | |
| # When v1.1.10 ships, bump this to "1.1.9" (the new "previous | |
| # stable"). The script accepts the unprefixed semver form | |
| # because docker tags drop the leading 'v' (see CLAUDE.md). | |
| PREVIOUS_TAG: '1.1.9' | |
| # PREVIOUS_WEB_TAG: the web image tag for the previous-tag side | |
| # of the upgrade. The web repo cuts its own release cadence and | |
| # does NOT mirror backend version tags, so the backend's | |
| # PREVIOUS_TAG (e.g. "1.1.9") is NOT a valid web image tag and | |
| # causes ImagePullBackOff (closes artifact-keeper#1378). `main` | |
| # is published on every push to artifact-keeper-web main and is | |
| # always pullable. If a future release wants stricter pinning, | |
| # bump this to a specific SHA tag (e.g. `sha-ea664a1`). | |
| PREVIOUS_WEB_TAG: 'main' | |
| run: | | |
| chmod +x tests/release-gate/chart-upgrade-smoke.sh | |
| RUN_ID="${{ github.run_id }}-${{ github.run_attempt }}-upgrade" | |
| # PREVIOUS_IAC_REF: the iac chart tag that shipped with the | |
| # previous release. Defaults to artifact-keeper-1.1.9 inside | |
| # the script; pinned here so the chart-template upgrade path | |
| # exercises the actual prev->current chart diff (issue #54). | |
| PREVIOUS_IAC_REF="artifact-keeper-${PREVIOUS_TAG}" | |
| ./tests/release-gate/chart-upgrade-smoke.sh \ | |
| --run-id "${RUN_ID}" \ | |
| --previous-tag "${PREVIOUS_TAG}" \ | |
| --backend-tag "${BACKEND_TAG}" \ | |
| --web-tag "${WEB_TAG}" \ | |
| --previous-web-tag "${PREVIOUS_WEB_TAG}" \ | |
| --iac-ref "${IAC_REF}" \ | |
| --previous-iac-ref "${PREVIOUS_IAC_REF}" \ | |
| --timeout 600 | |
| - name: Upload chart-upgrade diagnostics | |
| if: failure() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: chart-upgrade-smoke-logs | |
| path: /tmp/test-logs/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Deploy test environment | |
| # | |
| # Gated on `clean-install-smoke` so that the matrix below cannot run | |
| # against a backend that fails to even start. A startup-broken release | |
| # fails fast in `clean-install-smoke` and the entire matrix is skipped, | |
| # preserving runner-time for releases that can actually be tested. | |
| # ------------------------------------------------------------------- | |
| deploy: | |
| needs: clean-install-smoke | |
| runs-on: ak-e2e-runners | |
| outputs: | |
| run_id: ${{ steps.setup.outputs.run_id }} | |
| namespace: ${{ steps.setup.outputs.namespace }} | |
| backend_url: ${{ steps.deploy.outputs.backend_url }} | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@829323503d1be3d00ca8346e5391ca0b07a9ab0d # v5.1.0 | |
| - name: Install Helm | |
| uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 | |
| - name: Generate run ID | |
| id: setup | |
| run: | | |
| RUN_ID="e2e-$(date +%s)-${GITHUB_RUN_NUMBER}" | |
| echo "run_id=${RUN_ID}" >> "$GITHUB_OUTPUT" | |
| echo "namespace=test-${RUN_ID}" >> "$GITHUB_OUTPUT" | |
| - name: Deploy test namespace | |
| id: deploy | |
| env: | |
| RUN_ID: ${{ steps.setup.outputs.run_id }} | |
| BACKEND_TAG: ${{ inputs.backend_tag }} | |
| WEB_TAG: ${{ inputs.web_tag }} | |
| run: | | |
| chmod +x scripts/create-test-namespace.sh | |
| # --full-stack enables Trivy + scan workspace so the security | |
| # tests actually exercise the scanner instead of false-passing | |
| # against a no-scanner stack (#888 silent-success class). | |
| ./scripts/create-test-namespace.sh \ | |
| --run-id "${RUN_ID}" \ | |
| --backend-tag "${BACKEND_TAG}" \ | |
| --web-tag "${WEB_TAG}" \ | |
| --full-stack | |
| NAMESPACE="test-${RUN_ID}" | |
| BACKEND_URL="http://artifact-keeper-backend.${NAMESPACE}.svc.cluster.local:8080" | |
| echo "backend_url=${BACKEND_URL}" >> "$GITHUB_OUTPUT" | |
| - name: Wait for stack ready | |
| env: | |
| RUN_ID: ${{ steps.setup.outputs.run_id }} | |
| run: | | |
| NAMESPACE="test-${RUN_ID}" | |
| BACKEND_URL="http://artifact-keeper-backend.${NAMESPACE}.svc.cluster.local:8080" | |
| chmod +x tests/lib/wait-for-ready.sh | |
| ./tests/lib/wait-for-ready.sh "${BACKEND_URL}" 180 | |
| # Trivy rollout must be Available before the security tests | |
| # dispatch. The chart's fullnameOverride is "artifact-keeper" | |
| # (see helm/values-test-full.yaml) so the deployment is | |
| # named artifact-keeper-trivy regardless of the release name. | |
| # Without this gate, security-tests can race scanner pod | |
| # scale-up and the lite scan-completion gate sees an unreachable | |
| # scanner as "real" failure (the #888 false-fail mirror). | |
| echo "Waiting for Trivy rollout in ${NAMESPACE}..." | |
| kubectl -n "${NAMESPACE}" rollout status \ | |
| deployment/artifact-keeper-trivy --timeout=180s | |
| kubectl -n "${NAMESPACE}" wait --for=condition=Available \ | |
| deployment/artifact-keeper-trivy --timeout=60s | |
| # ------------------------------------------------------------------- | |
| # Real-flow smoke (issue #45) | |
| # | |
| # The user's actual flow as a single gate check: push an artifact | |
| # through a native client, pull it back, trigger a scan, poll until | |
| # completion. Regressions in any step (broken upload, broken | |
| # download, scan-stuck-queued mirror of #871) fail the gate before | |
| # the broader format/security/repo matrix runs, so the operator | |
| # sees the FIRST signal that pierced the smoke. | |
| # | |
| # Uses npm (already installed on the runner pod for the `node` batch | |
| # in format-tests) so the gate stays under the 5-minute target in | |
| # the acceptance criteria for #45. | |
| # ------------------------------------------------------------------- | |
| real-flow-smoke: | |
| needs: deploy | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 8 | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped real-flow smoke is exactly the silent-success | |
| # class (#870/#871/#888) this gate was added to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install npm (real-flow uses the npm native client) | |
| run: | | |
| if ! command -v npm >/dev/null 2>&1; then | |
| curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - > /dev/null 2>&1 | |
| sudo apt-get install -y -qq nodejs > /dev/null | |
| fi | |
| npm --version | |
| - name: Run real-flow smoke | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x tests/release-gate/test-real-flow-smoke.sh | |
| ./tests/release-gate/test-real-flow-smoke.sh | |
| - name: Upload real-flow smoke results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-real-flow-smoke | |
| path: /tmp/test-results/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Scan-completion gate (matrix across representative formats). | |
| # | |
| # Closes the gap surfaced by artifact-keeper#888 ("works for npm, | |
| # silently fails for docker"). Each matrix entry runs the gate | |
| # primitive in its own job (parallel) so a regression in any one | |
| # format's scanner pipeline fails the release-gate loud. | |
| # | |
| # The wired formats (currently: npm) run the lite primitive at | |
| # tests/security/test-scan-completes.sh via the release-gate wrapper. | |
| # | |
| # The matrix is intentionally restricted to formats whose fixtures | |
| # exist. Scaffolded formats (oci, maven, pypi, cargo, helm) are NOT | |
| # in the matrix because a green checkmark on an `exit 0` scaffold | |
| # is the same silent-success class the gate exists to prevent. The | |
| # scan-completion-gate-scaffolds-pending job below surfaces the | |
| # deferred formats as ::warning:: annotations so the gap is visible | |
| # without painting the dashboard with fake passes. When a fixture- | |
| # builder for a deferred format lands (#62), add the format to this | |
| # matrix in the same PR. | |
| # | |
| # Why a matrix rather than a single sequential driver: | |
| # - Each format scan can take 30-60s; sequential 6-format runs | |
| # blow the 5-min release-gate budget. | |
| # - The workflow-level matrix surfaces per-format outcomes in the | |
| # GitHub Actions UI so an operator can see "oci failed, npm | |
| # passed" at a glance. | |
| # ------------------------------------------------------------------- | |
| scan-completion-gate: | |
| needs: deploy | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 8 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| format: [npm] | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # ALLOW_SCANNER_SKIP=0 is enforced inside the gate primitive -- | |
| # a scanner-pod-down skip in release-gate context is exactly the | |
| # silent-success class this gate exists to catch. | |
| RELEASE_GATE: '1' | |
| ALLOW_SCANNER_SKIP: '0' | |
| FIXTURE_FORMAT: ${{ matrix.format }} | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run scan-completion gate for ${{ matrix.format }} | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x tests/release-gate/scan-completion-gate.sh | |
| # Per-format RUN_ID suffix so concurrent matrix jobs do not | |
| # collide on the repo key (scan-complete-<RUN_ID>). | |
| RUN_ID="${RUN_ID}-${{ matrix.format }}" \ | |
| ./tests/release-gate/scan-completion-gate.sh | |
| - name: Capture scanner pod logs on failure | |
| if: failure() | |
| run: | | |
| # The deploy job exports the test namespace; resolve it via | |
| # the standard naming pattern. We use kubectl best-effort: | |
| # if the runner pod has no kubeconfig, this step's failure | |
| # is benign (we still surface the JUnit XML). | |
| NS="test-${{ needs.deploy.outputs.run_id }}" | |
| mkdir -p /tmp/test-logs | |
| kubectl -n "$NS" logs -l app.kubernetes.io/component=scanner \ | |
| --tail=2000 > /tmp/test-logs/scanner-${{ matrix.format }}.log 2>&1 || true | |
| # #1379: chart labels Trivy pods with app.kubernetes.io/component=trivy | |
| # (app.kubernetes.io/name is the chart name "artifact-keeper", not the | |
| # component). Use the component label so the log capture actually finds | |
| # the pod when the gate is failing and the operator most needs the logs. | |
| kubectl -n "$NS" logs -l app.kubernetes.io/component=trivy \ | |
| --tail=2000 > /tmp/test-logs/trivy-${{ matrix.format }}.log 2>&1 || true | |
| kubectl -n "$NS" logs -l app=artifact-keeper-backend \ | |
| --tail=1000 > /tmp/test-logs/backend-${{ matrix.format }}.log 2>&1 || true | |
| - name: Upload scan-completion gate results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-scan-completion-${{ matrix.format }} | |
| path: | | |
| /tmp/test-results/ | |
| /tmp/test-logs/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Scaffolds-pending sentinel (#62). | |
| # | |
| # This job exists ONLY to surface the deferred formats as a | |
| # ::warning:: annotation on every release-gate run so the gap is | |
| # visible. It does NOT run any test. It is NOT a required gate. | |
| # Deliberately a separate job (not a matrix step) so the GitHub | |
| # Actions UI shows ONE warning row rather than five green | |
| # checkmarks that imply broader format coverage than exists. | |
| # | |
| # When a fixture-builder for one of the listed formats lands, | |
| # remove the format from this list and add it to the matrix above. | |
| # ------------------------------------------------------------------- | |
| scan-completion-gate-scaffolds-pending: | |
| needs: deploy | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 2 | |
| steps: | |
| - name: Emit scaffolds-pending warnings | |
| run: | | |
| for fmt in oci maven pypi cargo helm; do | |
| echo "::warning title=scan-completion gate: ${fmt} fixture missing::No scan-completion fixture exists for ${fmt}; the silent-success class (#888) is NOT covered for this format. Tracked under artifact-keeper-test#62." | |
| done | |
| # Echo to the runner log too so the operator sees this even | |
| # without expanding the annotations pane. | |
| echo "" | |
| echo "Scan-completion format coverage:" | |
| echo " wired: npm" | |
| echo " deferred: oci, maven, pypi, cargo, helm (artifact-keeper-test#62)" | |
| # ------------------------------------------------------------------- | |
| # SBOM correctness gate (scaffold). | |
| # | |
| # Pins the SBOM endpoint contract (POST /api/v1/sbom returns 200 | |
| # with the documented SbomResponse shape) so an endpoint deletion | |
| # or 5xx regression fails the release loud. The component_count > 0 | |
| # assertion is deferred to artifact-keeper#903 (--list-all-pkgs) | |
| # per the #57 epic. | |
| # ------------------------------------------------------------------- | |
| sbom-correctness-gate: | |
| needs: deploy | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 6 | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run SBOM correctness gate | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x tests/release-gate/sbom-correctness-gate.sh | |
| ./tests/release-gate/sbom-correctness-gate.sh | |
| - name: Capture scanner pod logs on failure | |
| if: failure() | |
| run: | | |
| NS="test-${{ needs.deploy.outputs.run_id }}" | |
| mkdir -p /tmp/test-logs | |
| kubectl -n "$NS" logs -l app.kubernetes.io/component=scanner \ | |
| --tail=2000 > /tmp/test-logs/sbom-scanner.log 2>&1 || true | |
| kubectl -n "$NS" logs -l app=artifact-keeper-backend \ | |
| --tail=1000 > /tmp/test-logs/sbom-backend.log 2>&1 || true | |
| - name: Upload SBOM gate results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-sbom-correctness | |
| path: | | |
| /tmp/test-results/ | |
| /tmp/test-logs/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Pinned-CVE assertion (#64). | |
| # | |
| # Tightens the gate beyond "findings_count >= 1" to a specific CVE | |
| # id. Catches scanner-DB drift and parser-correctness regressions | |
| # that the findings-count check alone would let through. | |
| # ------------------------------------------------------------------- | |
| pinned-cve-gate: | |
| needs: deploy | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 8 | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| RELEASE_GATE: '1' | |
| # CVE-2019-10744 (lodash 4.17.4 prototype pollution) is what the | |
| # lite fixture pins. log4j 2.14.0 / CVE-2021-44228 will move | |
| # under the oci matrix entry once the oci fixture-builder lands | |
| # (#62 + #64 extension). | |
| EXPECTED_VULN_CVE: CVE-2019-10744 | |
| # Trivy DB age threshold for the freshness pre-flight. The DB is | |
| # rebuilt every 6 hours upstream; we accept anything <= 14 days | |
| # so weekend gaps and slow mirror sync do not generate false | |
| # failures, while still catching DBs old enough that | |
| # CVE-2019-10744 (published 2019) could fall out of recent index | |
| # shards. Override at workflow_dispatch via repo variable | |
| # TRIVY_DB_MAX_AGE_DAYS if needed. | |
| TRIVY_DB_MAX_AGE_DAYS: ${{ vars.TRIVY_DB_MAX_AGE_DAYS || '14' }} | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@829323503d1be3d00ca8346e5391ca0b07a9ab0d # v5.1.0 | |
| - name: Trivy DB freshness pre-flight | |
| # Distinguish "CVE database is stale" from "CVE-2019-10744 not | |
| # surfaced". Without this step, a stale Trivy DB on the runner | |
| # image causes the assertion to silently pass (no CVE found == | |
| # 0 findings == "did Trivy run at all?"). The misleading failure | |
| # message blames the gate; the actual root cause is upstream. | |
| # | |
| # Exit codes: | |
| # 0 - DB reachable, age within TRIVY_DB_MAX_AGE_DAYS | |
| # 42 - DB too old (distinct so the rollup operator can tell | |
| # scanner DB drift apart from a real CVE-detection bug) | |
| # 43 - Trivy pod unreachable or trivy --version failed | |
| run: | | |
| set -uo pipefail | |
| NS="test-${{ needs.deploy.outputs.run_id }}" | |
| MAX_AGE_DAYS="${TRIVY_DB_MAX_AGE_DAYS}" | |
| MAX_AGE_SECONDS=$(( MAX_AGE_DAYS * 86400 )) | |
| # Locate the Trivy pod. The Helm chart names the deployment | |
| # artifact-keeper-trivy; we kubectl-exec into the first ready | |
| # pod backed by that deployment. | |
| # | |
| # Label selector note (#1379): the chart's _helpers.tpl labels | |
| # every component with app.kubernetes.io/name=artifact-keeper | |
| # (the chart name) and distinguishes components via | |
| # app.kubernetes.io/component=<name>. Earlier revisions of this | |
| # pre-flight queried `app.kubernetes.io/name=trivy`, which | |
| # never matched, producing the misleading "Trivy pod not | |
| # found" error even though the deploy job's | |
| # `kubectl rollout status deployment/artifact-keeper-trivy` | |
| # had already succeeded. The correct selector is | |
| # `app.kubernetes.io/component=trivy`. | |
| POD=$(kubectl -n "$NS" get pods \ | |
| -l app.kubernetes.io/component=trivy \ | |
| -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) | |
| if [ -z "$POD" ]; then | |
| echo "::error::Trivy pod not found in namespace ${NS} (pre-flight cannot run)" | |
| echo "::error::Checked selector: app.kubernetes.io/component=trivy" | |
| echo "::error::Deploy job should have installed Trivy via values-test-full.yaml (trivy.enabled=true)." | |
| kubectl -n "$NS" get pods --show-labels 2>&1 | head -40 || true | |
| exit 43 | |
| fi | |
| # Warm up the Trivy vulnerability DB before reading its | |
| # UpdatedAt. Trivy 0.62 downloads the DB lazily on first scan, | |
| # not at pod startup, so `trivy --version` immediately after | |
| # rollout returns only `Version: <x>` with no `UpdatedAt:` | |
| # line, which the parser below cannot handle (#197). Using | |
| # `image --download-db-only` is idempotent: it no-ops if the | |
| # DB is already present and within the upstream | |
| # download-interval window. | |
| echo "Warming up Trivy vulnerability DB..." | |
| kubectl -n "$NS" exec "$POD" -c trivy -- \ | |
| trivy image --download-db-only --quiet >/dev/null 2>&1 \ | |
| || kubectl -n "$NS" exec "$POD" -- \ | |
| trivy image --download-db-only --quiet >/dev/null 2>&1 \ | |
| || echo "::warning::trivy image --download-db-only returned non-zero; will inspect --version anyway" | |
| # trivy --version emits "Vulnerability DB: ... UpdatedAt: 2026-04-30 ..." | |
| # in older versions and a YAML-ish block in newer versions. | |
| # Parse both shapes. | |
| VER_OUT=$(kubectl -n "$NS" exec "$POD" -c trivy -- trivy --version 2>/dev/null \ | |
| || kubectl -n "$NS" exec "$POD" -- trivy --version 2>/dev/null \ | |
| || true) | |
| if [ -z "$VER_OUT" ]; then | |
| echo "::error::trivy --version returned empty output from pod ${POD}" | |
| exit 43 | |
| fi | |
| echo "trivy --version output:" | |
| echo "$VER_OUT" | |
| echo "" | |
| # Extract UpdatedAt timestamp. Format varies: | |
| # " UpdatedAt: 2026-04-30 12:34:56.789 +0000 UTC" | |
| # " UpdatedAt 2026-04-30 12:34:56.789 +0000 UTC" | |
| # | |
| # Pipeline hardening (#197): grep's no-match exit code (1) | |
| # used to propagate via `set -o pipefail` and the workflow | |
| # shell's implicit `set -e`, killing the script before the | |
| # `if [ -z "$DB_DATE" ]` guard below could surface a clean | |
| # exit 43. The `|| true` here keeps `DB_DATE` empty on | |
| # no-match so the guard does its job. | |
| DB_DATE=$( { echo "$VER_OUT" | grep -iE 'UpdatedAt' || true; } | head -n1 \ | |
| | sed -E 's/.*UpdatedAt[: ]+([0-9-]+ [0-9:.]+).*/\1/' \ | |
| | awk '{print $1" "$2}') | |
| if [ -z "$DB_DATE" ] || ! date -d "$DB_DATE" +%s >/dev/null 2>&1; then | |
| echo "::error::Could not parse Trivy DB UpdatedAt from --version output." | |
| echo "::error::Likely cause: Trivy DB download failed during warmup, or trivy --version output format changed." | |
| echo "::error::Raw version output above for diagnosis." | |
| exit 43 | |
| fi | |
| DB_EPOCH=$(date -d "$DB_DATE" -u +%s) | |
| NOW_EPOCH=$(date -u +%s) | |
| AGE_SECONDS=$(( NOW_EPOCH - DB_EPOCH )) | |
| AGE_DAYS=$(( AGE_SECONDS / 86400 )) | |
| echo "Trivy DB UpdatedAt: ${DB_DATE} (age: ${AGE_DAYS} days, threshold: ${MAX_AGE_DAYS} days)" | |
| if [ "$AGE_SECONDS" -gt "$MAX_AGE_SECONDS" ]; then | |
| echo "::error::Trivy DB is ${AGE_DAYS} days old, exceeds threshold of ${MAX_AGE_DAYS} days." | |
| echo "::error::The pinned-CVE assertion below would surface a misleading 'CVE-2019-10744 not found' failure;" | |
| echo "::error::the actual root cause is upstream DB staleness. Refresh Trivy DB or bump the runner image." | |
| exit 42 | |
| fi | |
| echo "Trivy DB freshness OK." | |
| - name: Run pinned-CVE gate | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x tests/release-gate/test-pinned-cve.sh | |
| ./tests/release-gate/test-pinned-cve.sh | |
| - name: Upload pinned-CVE gate results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-pinned-cve | |
| path: /tmp/test-results/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Format tests (8 parallel batches) | |
| # ------------------------------------------------------------------- | |
| format-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'formats' | |
| runs-on: ak-e2e-runners | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| batch: | |
| - name: node | |
| scripts: "test-npm.sh test-npm-remote.sh test-vscode.sh" | |
| - name: python | |
| scripts: "test-pypi.sh test-pypi-native-client.sh test-pypi-remote.sh test-conda.sh test-huggingface.sh test-mlmodel.sh" | |
| - name: jvm | |
| scripts: "test-maven.sh test-maven-native-client.sh test-maven-remote.sh test-maven-virtual-snapshot.sh test-sbt.sh test-gradle-conformance.sh" | |
| - name: rust-go-swift | |
| scripts: "test-cargo.sh test-cargo-remote.sh test-go.sh test-swift.sh test-pub.sh" | |
| - name: system-packages | |
| scripts: "test-debian.sh test-rpm.sh test-alpine.sh test-opkg.sh" | |
| - name: containers | |
| scripts: "test-oci.sh test-oci-remote.sh test-docker-native-client.sh test-helm.sh test-incus.sh" | |
| - name: misc-native | |
| scripts: "test-terraform.sh test-composer.sh test-hex.sh test-rubygems.sh test-nuget.sh test-cocoapods.sh test-cran.sh" | |
| - name: generic-protocol | |
| scripts: "test-generic.sh test-generic-native-client.sh test-gitlfs.sh test-protobuf.sh test-bazel.sh test-conan.sh test-conan-auth.sh test-conan-recipes.sh test-conan-packages.sh test-conan-search.sh test-conan-revisions.sh test-conan-remote.sh test-conan-errors.sh test-conan-stress.sh test-ansible.sh test-p2.sh test-jetbrains.sh test-vagrant.sh test-wasm.sh test-puppet.sh test-chef.sh" | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install test dependencies | |
| run: | | |
| BATCH="${{ matrix.batch.name }}" | |
| echo "Installing dependencies for batch: $BATCH" | |
| # Common: ensure zip is available (used by maven, swift, vscode, go) | |
| if ! command -v zip &>/dev/null; then | |
| sudo apt-get update -qq && sudo apt-get install -y -qq zip > /dev/null | |
| fi | |
| case "$BATCH" in | |
| node) | |
| if ! command -v npm &>/dev/null; then | |
| curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - > /dev/null 2>&1 | |
| sudo apt-get install -y -qq nodejs > /dev/null | |
| fi | |
| ;; | |
| python) | |
| if ! command -v python3 &>/dev/null; then | |
| sudo apt-get update -qq && sudo apt-get install -y -qq python3 python3-pip python3-setuptools python3-venv > /dev/null | |
| fi | |
| # python3-venv is required by test-pypi-native-client.sh; install | |
| # it even if python3 is already present, since the bundled | |
| # interpreter may have ensurepip stripped out. | |
| if ! python3 -c 'import venv; venv.EnvBuilder().ensure_directories' &>/dev/null; then | |
| sudo apt-get update -qq && sudo apt-get install -y -qq python3-venv > /dev/null || true | |
| fi | |
| ;; | |
| jvm) | |
| # mvn is required by test-maven-native-client.sh; the suite is | |
| # auto-skipped if maven is missing, but we install it here so | |
| # the gate actually exercises native-client coverage. | |
| if ! command -v mvn &>/dev/null; then | |
| sudo apt-get update -qq && sudo apt-get install -y -qq maven > /dev/null || true | |
| fi | |
| ;; | |
| rust-go-swift) | |
| if ! command -v go &>/dev/null; then | |
| GO_VERSION="1.23.6" | |
| curl -sSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" | sudo tar -C /usr/local -xz | |
| echo "/usr/local/go/bin" >> "$GITHUB_PATH" | |
| fi | |
| if ! command -v cargo &>/dev/null; then | |
| curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal | |
| echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" | |
| fi | |
| ;; | |
| containers) | |
| # Helm is needed for test-helm.sh | |
| if ! command -v helm &>/dev/null; then | |
| curl -sSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash | |
| fi | |
| ;; | |
| system-packages) | |
| # ar (from binutils) for Debian package assembly | |
| if ! command -v ar &>/dev/null; then | |
| sudo apt-get update -qq && sudo apt-get install -y -qq binutils > /dev/null | |
| fi | |
| ;; | |
| esac | |
| - name: Run ${{ matrix.batch.name }} format tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| exit_code=0 | |
| for script in ${{ matrix.batch.scripts }}; do | |
| echo "=== Running ${script} ===" | |
| if ! bash "tests/formats/${script}"; then | |
| echo "FAILED: ${script}" | |
| exit_code=1 | |
| fi | |
| done | |
| exit $exit_code | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-formats-${{ matrix.batch.name }} | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Security tests | |
| # | |
| # cache-poisoning + cache-stampede boot a Python mock upstream on the | |
| # runner pod and need the backend to dial the runner pod by hostname. | |
| # We compute the runner's pod IP at runtime and translate it to the | |
| # cluster-DNS pod-DNS form (10-1-2-3.<ns>.pod.cluster.local) which the | |
| # backend pod can resolve via ClusterFirst. | |
| # | |
| # PROXY_MAX_CONCURRENT_FETCHES / PROXY_QUEUE_TIMEOUT_SECS pin the | |
| # values the test asserts against so chart-default drift doesn't | |
| # silently make the assertion measure the wrong limit. They MUST match | |
| # the values the deployed backend was configured with. | |
| # ------------------------------------------------------------------- | |
| security-tests: | |
| needs: deploy | |
| # continue-on-error: test-scan-completes.sh asserts on Grype scanner | |
| # finishing with findings. Grype on the v1.1.x backend image fails | |
| # deterministically because the vulnerability DB is not pre-seeded | |
| # in the Dockerfile and our network-restricted ARC runner pods can't | |
| # fetch grype.anchore.io at scan time. The quality gate is | |
| # LAST-scanner-wins (policy_service reads LIMIT 1 ORDER BY created_at | |
| # DESC), so Trivy success satisfies block_unscanned and the practical | |
| # security posture is unaffected. Tracked for fix in v1.1.10: | |
| # artifact-keeper#1001 (pre-seed Grype DB in Dockerfile). The other | |
| # 44 security tests in the suite still run and gate the release. | |
| continue-on-error: true | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'security' | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 20 | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| # Per-script timeout for run-suite.sh. Several Epic 2 tests | |
| # (cve-history, license-policy, scan-policy, quality-gate-blocks-upload, | |
| # scan-dedup-checksum) poll for scan completion with default | |
| # SCAN_TIMEOUT=180. The default 120s wrapper would SIGKILL them before | |
| # they could write JUnit XML. 300s gives 120s headroom over SCAN_TIMEOUT | |
| # for fixture build, upload, and cleanup. | |
| TEST_TIMEOUT: '300' | |
| # Stampede / poisoning test knobs. Must match the values the chart | |
| # rendered for the backend Deployment (see helm values-test.yaml). | |
| PROXY_MAX_CONCURRENT_FETCHES: '20' | |
| PROXY_QUEUE_TIMEOUT_SECS: '5' | |
| STAMPEDE_UPSTREAM_DELAY_MS: '2000' | |
| # AK_BACKEND_BRANCH for the feature-flag layer (issue #65). The | |
| # security suite hosts test-feature-flag-drift.sh which is the | |
| # truth-side check that AK_FEATURES matches the deployed | |
| # backend's reported version. See pullthrough-tests env for the | |
| # rationale on how this maps from inputs.backend_tag. | |
| AK_BACKEND_BRANCH: ${{ startsWith(inputs.backend_tag, '1.1.') && 'release/1.1.x' || (startsWith(inputs.backend_tag, '1.2.') && 'release/1.2.x' || 'main') }} | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Resolve runner pod address for backend dial-back | |
| id: mock-host | |
| run: | | |
| # The runner is a Pod inside the cluster; its pod IP is reachable | |
| # from the backend Pod over the cluster network. We pass the bare | |
| # IP as MOCK_UPSTREAM_HOSTNAME so the backend's upstream-URL | |
| # resolver does not need cluster DNS to be configured for the | |
| # `<ip-dashed>.<ns>.pod.cluster.local` form (which depends on | |
| # CoreDNS `pods` plugin mode). | |
| # | |
| # ARC runners with `spec.template.spec.containers[].env.POD_IP` | |
| # via the downward API populate $POD_IP. We fall back to | |
| # `hostname -i` if the env var is missing. | |
| POD_IP="${POD_IP:-$(hostname -i 2>/dev/null | awk '{print $1}')}" | |
| if [ -z "$POD_IP" ]; then | |
| echo "ERROR: could not determine runner pod IP for mock dial-back" >&2 | |
| exit 1 | |
| fi | |
| # Sanity: must look like an IPv4. Reject 127.* (loopback would | |
| # only be reachable from inside the runner pod itself, not from | |
| # the backend pod across the cluster network). | |
| if ! echo "$POD_IP" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'; then | |
| echo "ERROR: POD_IP '${POD_IP}' is not an IPv4 address" >&2 | |
| exit 1 | |
| fi | |
| if echo "$POD_IP" | grep -Eq '^127\.'; then | |
| echo "ERROR: POD_IP '${POD_IP}' is loopback; backend pod cannot reach this" >&2 | |
| exit 1 | |
| fi | |
| echo "Runner pod IP: ${POD_IP}" | |
| echo "MOCK_UPSTREAM_HOSTNAME=${POD_IP}" >> "$GITHUB_ENV" | |
| - name: Run security tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite security --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-security | |
| # Directory upload (not *.xml glob) so per-test diagnostic | |
| # JSON breadcrumbs (e.g. scan-completes-final-resp.json) reach | |
| # the operator. With *.xml glob the dump dies silently and the | |
| # gate's failure rendering is a one-line message attribute. | |
| path: /tmp/test-results/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Pull-through cache reliability tests (epic #69 cluster D, v1.1.9). | |
| # | |
| # Lives in its own job (not merged into repo-tests or security-tests) | |
| # for three reasons: | |
| # | |
| # 1. The cross-format shadowing-guard test takes ~60s on its own | |
| # because it cycles through six format handlers. Folding that | |
| # into repo-tests would dominate the suite's wall-clock. | |
| # | |
| # 2. The cache-ttl tests need stable TTL plumbing on the backend; | |
| # a regression in only this surface should fail this job | |
| # without blocking the other 20 jobs. | |
| # | |
| # 3. RELEASE_GATE=1 is set: silent skips here are the exact | |
| # silent-success class (#888) this cluster exists to catch. | |
| # ------------------------------------------------------------------- | |
| pullthrough-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'pullthrough' | |
| runs-on: ak-e2e-runners | |
| timeout-minutes: 15 | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| # Per-script timeout for run-suite.sh. SCAN_TIMEOUT defaults to | |
| # 180s in test-stuck-scan-janitor.sh and the scan-depth scripts; | |
| # virtual-shadowing-guard.sh walks 6 formats serially. The default | |
| # 120s budget will SIGTERM polls mid-flight on a slightly slow | |
| # backend, masking real signal with timeout failures. 300s gives | |
| # ~2x headroom over the worst per-script case and matches the | |
| # security-tests / webhook-tests jobs. | |
| TEST_TIMEOUT: '300' | |
| # AK_BACKEND_BRANCH feeds the branch-aware feature flag layer | |
| # in tests/lib/feature-flags.sh (issue #65). We derive the branch | |
| # from the backend image tag: a tag like `1.1.9` or `1.1.10-rc.2` | |
| # is release/1.1.x; `1.2.0` is release/1.2.x; `latest`, `main`, | |
| # or anything else falls back to `main`. The mapping below is | |
| # intentionally conservative -- if we can't tell, we use the | |
| # most-restrictive 1.1.x flag set (see feature_flags_init for | |
| # rationale). When that's wrong, test-feature-flag-drift.sh | |
| # fails loudly with the actual /health version, surfacing the | |
| # workflow-vs-deploy mismatch in ONE place. | |
| AK_BACKEND_BRANCH: ${{ startsWith(inputs.backend_tag, '1.1.') && 'release/1.1.x' || (startsWith(inputs.backend_tag, '1.2.') && 'release/1.2.x' || 'main') }} | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run pull-through cache tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite pullthrough --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-pullthrough | |
| path: /tmp/test-results/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Compatibility tests | |
| # ------------------------------------------------------------------- | |
| compatibility-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'compatibility' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run compatibility tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite compatibility --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-compatibility | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Repository type tests (virtual, remote, CRUD, labels) | |
| # ------------------------------------------------------------------- | |
| repo-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'repos' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run repo type tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite repos --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-repos | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Promotion tests | |
| # ------------------------------------------------------------------- | |
| promotion-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'promotion' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run promotion tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite promotion --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-promotion | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # RBAC tests | |
| # ------------------------------------------------------------------- | |
| rbac-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'rbac' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run RBAC tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite rbac --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-rbac | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Lifecycle tests | |
| # ------------------------------------------------------------------- | |
| lifecycle-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'lifecycle' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run lifecycle tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite lifecycle --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-lifecycle | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Webhook tests | |
| # ------------------------------------------------------------------- | |
| webhook-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'webhooks' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| # Per-script timeout for run-suite.sh. Webhook resilience tests | |
| # poll for retry/dead-letter behavior on schedules up to 180s | |
| # (WEBHOOK_RETRY_TIMEOUT) so the wrapping timeout must exceed that. | |
| TEST_TIMEOUT: '300' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Resolve runner pod address for backend dial-back | |
| id: receiver-host | |
| run: | | |
| # Webhook tests spin up a mock receiver in this runner pod and | |
| # pass the URL to the backend, which lives in a DIFFERENT pod. | |
| # 127.0.0.1 from the backend's perspective is the backend pod | |
| # itself, not the runner, so the mock would never be reached. | |
| # Loopback is also a hard block in the backend's SSRF guard | |
| # (#199, artifact-keeper validation.rs:203-212), so we MUST | |
| # use the runner pod's RFC1918 IP. | |
| POD_IP="${POD_IP:-$(hostname -i 2>/dev/null | awk '{print $1}')}" | |
| if [ -z "$POD_IP" ]; then | |
| echo "ERROR: could not determine runner pod IP" >&2 | |
| exit 1 | |
| fi | |
| if ! echo "$POD_IP" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'; then | |
| echo "ERROR: POD_IP '${POD_IP}' is not an IPv4 address" >&2 | |
| exit 1 | |
| fi | |
| if echo "$POD_IP" | grep -Eq '^127\.'; then | |
| echo "ERROR: POD_IP '${POD_IP}' is loopback; backend pod cannot reach this" >&2 | |
| exit 1 | |
| fi | |
| echo "Runner pod IP: ${POD_IP}" | |
| echo "WEBHOOK_RECEIVER_HOST=${POD_IP}" >> "$GITHUB_ENV" | |
| - name: Run webhook tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite webhooks --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-webhooks | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Search tests | |
| # ------------------------------------------------------------------- | |
| search-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'search' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run search tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite search --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-search | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Platform tests (signing, SBOM, curation, labels, audit, backup) | |
| # | |
| # Serialized after auth-tests to avoid cross-suite admin-JWT | |
| # contamination. test-admin-password-recovery.sh changes the admin | |
| # password, which calls the backend's change_password handler | |
| # (backend/src/api/handlers/users.rs) and triggers | |
| # invalidate_user_tokens(admin_id). That writes the admin's UUID | |
| # into the global CREDENTIAL_INVALIDATIONS map in | |
| # backend/src/services/auth_service.rs, so any auth-tests step | |
| # running in parallel that uses the admin JWT starts getting 401s | |
| # mid-suite once its cached token validation flips to "rejected". | |
| # See #137. admin-tests is serialized for the same reason and on the | |
| # same job ordering (see the comment block above the admin-tests | |
| # job definition below). | |
| # ------------------------------------------------------------------- | |
| platform-tests: | |
| needs: [deploy, auth-tests] | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'platform' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run platform tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite platform --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-platform | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Auth tests (tokens, TOTP, rate limiting) | |
| # ------------------------------------------------------------------- | |
| auth-tests: | |
| needs: deploy | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'auth' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run auth tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite auth --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-auth | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Admin tests (Epic 10, #77): operational admin endpoints | |
| # livez, backup execute/cancel/delete, monitoring alerts, | |
| # storage backends listing, reindex trigger. | |
| # | |
| # Serialized after auth-tests (same reason as platform-tests, see #137): | |
| # test-admin-password-recovery.sh in platform-tests changes the admin | |
| # password via change_password (backend/src/api/handlers/users.rs), | |
| # which writes the admin's UUID into the global CREDENTIAL_INVALIDATIONS | |
| # map (backend/src/services/auth_service.rs). Any suite using the admin | |
| # JWT that runs concurrently with auth-tests' password-change paths | |
| # starts getting 401s mid-suite once its cached token validation flips | |
| # to "rejected". admin-tests reuses ADMIN_PASS and exercises the admin | |
| # JWT on every endpoint it hits, so it has the same exposure as | |
| # platform-tests and gets the same serialization. | |
| # ------------------------------------------------------------------- | |
| admin-tests: | |
| needs: [deploy, auth-tests] | |
| if: inputs.test_suite == 'all' || inputs.test_suite == 'admin' | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| NAMESPACE: ${{ needs.deploy.outputs.namespace }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@829323503d1be3d00ca8346e5391ca0b07a9ab0d # v5.1.0 | |
| - name: Wait for OpenSearch ready | |
| # test-reindex.sh hits POST /api/v1/admin/reindex which requires | |
| # OpenSearch to be ready. Without this wait the endpoint can 404 | |
| # (route not yet mounted on the backend's OpenSearch-dependent | |
| # path) or 503 and the test was previously soft-skipping on 404, | |
| # passing vacuously under RELEASE_GATE=1. Wait up to 120s for the | |
| # OpenSearch pod to be Ready before running the suite. | |
| run: | | |
| # Try the bitnami chart label first, then app=opensearch as a | |
| # fallback for charts that use the legacy label scheme. If the | |
| # selector matches no pods (e.g. opensearch.enabled=false in this | |
| # build) the wait is a no-op and the suite proceeds, which is | |
| # the right behavior for builds that don't ship OpenSearch. | |
| set -e | |
| if kubectl -n "$NAMESPACE" get pod -l app.kubernetes.io/name=opensearch -o name 2>/dev/null | grep -q pod/; then | |
| kubectl -n "$NAMESPACE" wait --for=condition=Ready pod \ | |
| -l app.kubernetes.io/name=opensearch --timeout=120s | |
| elif kubectl -n "$NAMESPACE" get pod -l app=opensearch -o name 2>/dev/null | grep -q pod/; then | |
| kubectl -n "$NAMESPACE" wait --for=condition=Ready pod \ | |
| -l app=opensearch --timeout=120s | |
| else | |
| echo "No OpenSearch pods found in ${NAMESPACE}; skipping wait." | |
| fi | |
| - name: Run admin tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite admin --run-id "${RUN_ID}" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-admin | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Stress tests (after formats + security pass) | |
| # | |
| # continue-on-error: stress tests measure backend behavior under | |
| # sustained mixed-workload (auth + upload + download + list) on a | |
| # 2 CPU test pod inside the namespace's 4 CPU / 8 Gi quota. Error- | |
| # rate variance is high on ARC runners (observed 22-54% across | |
| # otherwise-identical runs) because the bcrypt-bound auth path | |
| # saturates first and the worker count drives RPS up faster than | |
| # the pod can absorb. The test still produces JUnit + run logs so | |
| # regressions are visible, but a single failed run does not block | |
| # the release gate. Real perf regressions are caught by dedicated | |
| # benchmark workflows on Rocky, not by this CI smoke gate. | |
| # See artifact-keeper#991 for v1.1.x auth-path perf investigation. | |
| # ------------------------------------------------------------------- | |
| stress-tests: | |
| needs: [deploy, format-tests, repo-tests, promotion-tests, rbac-tests, lifecycle-tests, webhook-tests, search-tests, platform-tests, auth-tests, security-tests, compatibility-tests] | |
| continue-on-error: true | |
| if: | | |
| always() && | |
| needs.deploy.result == 'success' && | |
| (inputs.test_suite == 'all' || inputs.test_suite == 'stress') | |
| runs-on: ak-e2e-runners | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # Per-request HTTP-code logs end up here. The Upload stress-test logs | |
| # step below ships this directory as a workflow artifact so a failed | |
| # stress run can be debugged endpoint-by-endpoint instead of from | |
| # aggregate error counts alone (artifact-keeper-test#138 / | |
| # artifact-keeper#1088). | |
| STRESS_LOG_DIR: /tmp/stress-logs | |
| # Postgres + pod-resource snapshot directory. The Collect postgres | |
| # stats step below runs tests/stress/collect-pg-stats.sh after the | |
| # stress run and ships this directory as the stress-pg-stats | |
| # artifact. Captures the direct measurement that PR #148's | |
| # Fresh-Eyes review (artifact-keeper-test#154) asked for: pg | |
| # connection saturation + kubectl top, so the postgres-CPU | |
| # narrative behind PR #140 is backed by evidence on every run. | |
| PG_STATS_DIR: /tmp/pg-stats | |
| NAMESPACE: ${{ needs.deploy.outputs.namespace }} | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Run stress tests | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| mkdir -p "$STRESS_LOG_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite stress --run-id "${RUN_ID}" | |
| - name: Summarize per-request status codes | |
| if: always() | |
| run: | | |
| if [ ! -d "$STRESS_LOG_DIR" ] || [ -z "$(ls -A "$STRESS_LOG_DIR" 2>/dev/null)" ]; then | |
| echo "No stress logs were emitted at ${STRESS_LOG_DIR}" | |
| exit 0 | |
| fi | |
| echo "## Stress-test per-request status codes" >> "$GITHUB_STEP_SUMMARY" | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| echo "Per-request rows logged to artifact \`stress-request-logs\` at \`${STRESS_LOG_DIR}\`." >> "$GITHUB_STEP_SUMMARY" | |
| echo "Row format: \`<epoch_ms> <suite> <method> <endpoint> <http_code> <elapsed_ms>\`." >> "$GITHUB_STEP_SUMMARY" | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| Suite | Total | 2xx | 3xx | 4xx | 5xx | Timeouts (000) |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "|-------|------:|----:|----:|----:|----:|---------------:|" >> "$GITHUB_STEP_SUMMARY" | |
| for log in "$STRESS_LOG_DIR"/*.log; do | |
| [ -f "$log" ] || continue | |
| suite=$(basename "$log" .log) | |
| total=$(wc -l < "$log" | tr -d ' ') | |
| s2=$(awk '$5 ~ /^2[0-9][0-9]$/ {c++} END {print c+0}' "$log") | |
| s3=$(awk '$5 ~ /^3[0-9][0-9]$/ {c++} END {print c+0}' "$log") | |
| s4=$(awk '$5 ~ /^4[0-9][0-9]$/ {c++} END {print c+0}' "$log") | |
| s5=$(awk '$5 ~ /^5[0-9][0-9]$/ {c++} END {print c+0}' "$log") | |
| s0=$(awk '$5 == "000" {c++} END {print c+0}' "$log") | |
| echo "| ${suite} | ${total} | ${s2} | ${s3} | ${s4} | ${s5} | ${s0} |" >> "$GITHUB_STEP_SUMMARY" | |
| done | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| # Echo a short per-suite breakdown to the runner log too, so an | |
| # operator can scan the job page without first downloading the | |
| # artifact. Cap at the top 10 non-2xx endpoints to keep the log | |
| # readable. | |
| echo "Top non-2xx endpoints per suite:" | |
| for log in "$STRESS_LOG_DIR"/*.log; do | |
| [ -f "$log" ] || continue | |
| suite=$(basename "$log" .log) | |
| echo " ${suite}:" | |
| awk '$5 !~ /^2[0-9][0-9]$/ {print $5, $3, $4}' "$log" \ | |
| | sort | uniq -c | sort -rn | head -n 10 \ | |
| | sed 's/^/ /' | |
| done | |
| - name: Collect postgres stats | |
| # Snapshot pg_stat_activity, pg_stat_statements, connection-count | |
| # vs max_connections, and kubectl top for the backend + postgres | |
| # pods. Runs after the stress workload and before teardown so the | |
| # capture reflects steady-state load. if: always() ensures the | |
| # snapshot still ships on a failed run, which is exactly when | |
| # the data is most useful (artifact-keeper-test#154; rationale | |
| # in PR #148 Fresh-Eyes review, Finding 2). | |
| if: always() | |
| run: | | |
| bash tests/stress/collect-pg-stats.sh | |
| - name: Upload pg-stats snapshot | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: stress-pg-stats-${{ github.run_attempt }} | |
| path: /tmp/pg-stats/ | |
| if-no-files-found: ignore | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-stress | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| - name: Upload stress-test logs | |
| # Per-request HTTP-code rows from the stress-test workers. Captured | |
| # before namespace teardown so a 50%-error run can be debugged | |
| # endpoint-by-endpoint (which path returned what code, how often) | |
| # instead of from aggregate counters alone. Investigating | |
| # artifact-keeper#1088 (POST /repositories DB contention under | |
| # load) would have been faster with this artifact in hand. | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: stress-request-logs | |
| path: /tmp/stress-logs/ | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Resilience tests (after stress completes) | |
| # | |
| # Run regardless of stress-tests outcome. Resilience tests target | |
| # crash recovery, network partition, storage failures, etc., which | |
| # are independent of the bcrypt/auth saturation that stress-tests | |
| # measures. Skipping resilience because stress hit its error-rate | |
| # threshold loses signal on a different failure class. | |
| # ------------------------------------------------------------------- | |
| resilience-tests: | |
| needs: [deploy, stress-tests] | |
| if: | | |
| always() && | |
| needs.deploy.result == 'success' && | |
| (inputs.test_suite == 'all' || inputs.test_suite == 'resilience') | |
| runs-on: ak-e2e-runners | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| category: [crash, restart, network, storage, data] | |
| env: | |
| BASE_URL: ${{ needs.deploy.outputs.backend_url }} | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| NAMESPACE: ${{ needs.deploy.outputs.namespace }} | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@829323503d1be3d00ca8346e5391ca0b07a9ab0d # v5.1.0 | |
| - name: Run ${{ matrix.category }} resilience tests | |
| continue-on-error: true | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| FAILED=0 | |
| for script in tests/resilience/${{ matrix.category }}/test-*.sh; do | |
| [ -f "$script" ] || continue | |
| echo "=== Running ${script} ===" | |
| if ! bash "$script"; then | |
| echo "FAILED: ${script}" | |
| FAILED=$((FAILED + 1)) | |
| fi | |
| done | |
| if [ "$FAILED" -gt 0 ]; then | |
| echo "::warning::${FAILED} resilience test(s) failed in ${{ matrix.category }} (non-blocking on ARC runners)" | |
| exit 1 | |
| fi | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-resilience-${{ matrix.category }} | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Mesh tests (after resilience passes) | |
| # ------------------------------------------------------------------- | |
| mesh-tests: | |
| needs: [deploy, resilience-tests] | |
| if: | | |
| always() && | |
| (inputs.test_suite == 'all' || inputs.test_suite == 'mesh') && | |
| (needs.resilience-tests.result == 'success' || needs.resilience-tests.result == 'skipped') | |
| runs-on: ak-e2e-runners | |
| env: | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| ADMIN_PASS: TestRunner!2026secure | |
| JUNIT_OUTPUT_DIR: /tmp/test-results | |
| # RELEASE_GATE=1 turns common.sh's skip_suite() into a hard fail. | |
| # A silently-skipped test in release-gate context is exactly the | |
| # silent-success class (#870/#871/#888) the gate exists to catch. | |
| RELEASE_GATE: '1' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@829323503d1be3d00ca8346e5391ca0b07a9ab0d # v5.1.0 | |
| - name: Install Helm | |
| uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 | |
| - name: Deploy mesh topology | |
| id: mesh-deploy | |
| run: | | |
| MESH_RUN_ID="${RUN_ID}" | |
| chmod +x scripts/create-test-namespace.sh | |
| # Deploy 4 mesh instances | |
| for i in main peer1 peer2 peer3; do | |
| MESH_NS="test-${MESH_RUN_ID}-mesh-${i}" | |
| ./scripts/create-test-namespace.sh \ | |
| --run-id "${MESH_RUN_ID}-mesh-${i}" \ | |
| --backend-tag "${{ inputs.backend_tag }}" \ | |
| --web-tag "${{ inputs.web_tag }}" \ | |
| --values helm/values-test-mesh.yaml | |
| done | |
| # Output URLs | |
| BASE_NS="test-${MESH_RUN_ID}-mesh" | |
| echo "MAIN_URL=http://artifact-keeper-backend.${BASE_NS}-main.svc.cluster.local:8080" >> "$GITHUB_OUTPUT" | |
| echo "PEER1_URL=http://artifact-keeper-backend.${BASE_NS}-peer1.svc.cluster.local:8080" >> "$GITHUB_OUTPUT" | |
| echo "PEER2_URL=http://artifact-keeper-backend.${BASE_NS}-peer2.svc.cluster.local:8080" >> "$GITHUB_OUTPUT" | |
| echo "PEER3_URL=http://artifact-keeper-backend.${BASE_NS}-peer3.svc.cluster.local:8080" >> "$GITHUB_OUTPUT" | |
| - name: Wait for mesh instances ready | |
| run: | | |
| chmod +x tests/lib/wait-for-ready.sh | |
| for url in "${{ steps.mesh-deploy.outputs.MAIN_URL }}" \ | |
| "${{ steps.mesh-deploy.outputs.PEER1_URL }}" \ | |
| "${{ steps.mesh-deploy.outputs.PEER2_URL }}" \ | |
| "${{ steps.mesh-deploy.outputs.PEER3_URL }}"; do | |
| ./tests/lib/wait-for-ready.sh "$url" 300 | |
| done | |
| - name: Run mesh tests | |
| env: | |
| MAIN_URL: ${{ steps.mesh-deploy.outputs.MAIN_URL }} | |
| PEER1_URL: ${{ steps.mesh-deploy.outputs.PEER1_URL }} | |
| PEER2_URL: ${{ steps.mesh-deploy.outputs.PEER2_URL }} | |
| PEER3_URL: ${{ steps.mesh-deploy.outputs.PEER3_URL }} | |
| BASE_URL: ${{ steps.mesh-deploy.outputs.MAIN_URL }} | |
| run: | | |
| mkdir -p "$JUNIT_OUTPUT_DIR" | |
| chmod +x scripts/run-suite.sh | |
| ./scripts/run-suite.sh --suite mesh --run-id "${RUN_ID}" | |
| - name: Teardown mesh namespaces | |
| if: always() && inputs.skip_teardown != true | |
| run: | | |
| chmod +x scripts/teardown-test-namespace.sh | |
| for i in main peer1 peer2 peer3; do | |
| ./scripts/teardown-test-namespace.sh --run-id "${RUN_ID}-mesh-${i}" || true | |
| done | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: junit-mesh | |
| path: /tmp/test-results/*.xml | |
| if-no-files-found: ignore | |
| # ------------------------------------------------------------------- | |
| # Collect results and publish summary | |
| # ------------------------------------------------------------------- | |
| collect-results: | |
| needs: [version-set-integrity, clean-install-smoke, clean-install-smoke-with-deps, chart-upgrade-smoke, deploy, real-flow-smoke, scan-completion-gate, sbom-correctness-gate, pinned-cve-gate, format-tests, security-tests, compatibility-tests, repo-tests, promotion-tests, rbac-tests, lifecycle-tests, webhook-tests, search-tests, platform-tests, auth-tests, admin-tests, stress-tests, resilience-tests, mesh-tests] | |
| if: always() | |
| runs-on: ak-e2e-runners | |
| steps: | |
| - name: Download all test artifacts | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 | |
| with: | |
| pattern: junit-* | |
| path: /tmp/all-results | |
| merge-multiple: true | |
| - name: Publish test summary | |
| if: always() | |
| run: | | |
| echo "## Release Gate Results" >> "$GITHUB_STEP_SUMMARY" | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| Suite | Status |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "|-------|--------|" >> "$GITHUB_STEP_SUMMARY" | |
| for job in version-set-integrity clean-install-smoke clean-install-smoke-with-deps chart-upgrade-smoke real-flow-smoke scan-completion-gate sbom-correctness-gate pinned-cve-gate format-tests repo-tests promotion-tests rbac-tests lifecycle-tests webhook-tests search-tests platform-tests auth-tests admin-tests security-tests compatibility-tests stress-tests resilience-tests mesh-tests; do | |
| status="skipped" | |
| case "$job" in | |
| version-set-integrity) status="${{ needs.version-set-integrity.result }}" ;; | |
| clean-install-smoke) status="${{ needs.clean-install-smoke.result }}" ;; | |
| clean-install-smoke-with-deps) status="${{ needs.clean-install-smoke-with-deps.result }}" ;; | |
| chart-upgrade-smoke) status="${{ needs.chart-upgrade-smoke.result }}" ;; | |
| real-flow-smoke) status="${{ needs.real-flow-smoke.result }}" ;; | |
| scan-completion-gate) status="${{ needs.scan-completion-gate.result }}" ;; | |
| sbom-correctness-gate) status="${{ needs.sbom-correctness-gate.result }}" ;; | |
| pinned-cve-gate) status="${{ needs.pinned-cve-gate.result }}" ;; | |
| format-tests) status="${{ needs.format-tests.result }}" ;; | |
| repo-tests) status="${{ needs.repo-tests.result }}" ;; | |
| promotion-tests) status="${{ needs.promotion-tests.result }}" ;; | |
| rbac-tests) status="${{ needs.rbac-tests.result }}" ;; | |
| lifecycle-tests) status="${{ needs.lifecycle-tests.result }}" ;; | |
| webhook-tests) status="${{ needs.webhook-tests.result }}" ;; | |
| search-tests) status="${{ needs.search-tests.result }}" ;; | |
| platform-tests) status="${{ needs.platform-tests.result }}" ;; | |
| auth-tests) status="${{ needs.auth-tests.result }}" ;; | |
| admin-tests) status="${{ needs.admin-tests.result }}" ;; | |
| security-tests) status="${{ needs.security-tests.result }}" ;; | |
| compatibility-tests) status="${{ needs.compatibility-tests.result }}" ;; | |
| stress-tests) status="${{ needs.stress-tests.result }}" ;; | |
| resilience-tests) status="${{ needs.resilience-tests.result }}" ;; | |
| mesh-tests) status="${{ needs.mesh-tests.result }}" ;; | |
| esac | |
| echo "| ${job} | ${status} |" >> "$GITHUB_STEP_SUMMARY" | |
| done | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| echo "**Backend tag:** \`${{ inputs.backend_tag }}\`" >> "$GITHUB_STEP_SUMMARY" | |
| echo "**Web tag:** \`${{ inputs.web_tag }}\`" >> "$GITHUB_STEP_SUMMARY" | |
| echo "**Run ID:** \`${{ needs.deploy.outputs.run_id }}\`" >> "$GITHUB_STEP_SUMMARY" | |
| - name: Upload combined results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: release-gate-results | |
| path: /tmp/all-results/ | |
| if-no-files-found: ignore | |
| - name: Gate check - fail if any required suite did not succeed | |
| # stress-tests and security-tests are intentionally excluded from | |
| # this rollup. Both have continue-on-error: true (see the comments | |
| # above each job) so their outcome can be 'failure' on | |
| # known-flaky / known-infra-debt scenarios without blocking the | |
| # release gate: | |
| # - stress-tests: bcrypt-bound auth saturation under sustained | |
| # load on shared ARC runners (artifact-keeper#991). | |
| # - security-tests: Grype DB not pre-seeded in v1.1.x backend | |
| # image; quality gate is last-scanner-wins so Trivy covers | |
| # the policy gate (artifact-keeper#1001). | |
| # | |
| # The three new silent-success gates (scan-completion-gate, | |
| # sbom-correctness-gate, pinned-cve-gate) use the STRICTER | |
| # `result != 'success'` predicate. The looser | |
| # `result == 'failure' || result == 'cancelled'` form lets a | |
| # 'skipped' outcome (matrix-leg eval failure, future conditional | |
| # `if:` gate, or a transitive `needs:` skip when `deploy` is | |
| # skipped) through as green. For the silent-success gates that | |
| # is precisely the regression class we are guarding against, so | |
| # we close it explicitly. | |
| # | |
| # clean-install-smoke-with-deps is the one legitimate 'skipped' | |
| # case: it is opt-in (gated on the `run_smoke_with_deps` workflow | |
| # input, default false; see #53). When the input is false the | |
| # job's result is 'skipped' by design. We continue to use the | |
| # looser predicate for it so the default-off path stays green. | |
| # When the input is true, a failure or cancellation DOES block | |
| # the release. | |
| # | |
| # The wildcard form contains(needs.*.result, 'failure') still | |
| # observes failures because needs.<job>.result reflects the | |
| # job's outcome, not its continue-on-error-adjusted conclusion. | |
| # So we list the required suites explicitly here. If you add a | |
| # new required suite, add it to this list. Soft-failing suites | |
| # stay off the list. | |
| if: >- | |
| needs.version-set-integrity.result == 'failure' || needs.version-set-integrity.result == 'cancelled' || | |
| needs.clean-install-smoke.result == 'failure' || needs.clean-install-smoke.result == 'cancelled' || | |
| needs.clean-install-smoke-with-deps.result == 'failure' || needs.clean-install-smoke-with-deps.result == 'cancelled' || | |
| needs.chart-upgrade-smoke.result == 'failure' || needs.chart-upgrade-smoke.result == 'cancelled' || | |
| needs.real-flow-smoke.result == 'failure' || needs.real-flow-smoke.result == 'cancelled' || | |
| needs.scan-completion-gate.result != 'success' || | |
| needs.sbom-correctness-gate.result != 'success' || | |
| needs.pinned-cve-gate.result != 'success' || | |
| needs.deploy.result == 'failure' || needs.deploy.result == 'cancelled' || | |
| needs.format-tests.result == 'failure' || needs.format-tests.result == 'cancelled' || | |
| needs.compatibility-tests.result == 'failure' || needs.compatibility-tests.result == 'cancelled' || | |
| needs.repo-tests.result == 'failure' || needs.repo-tests.result == 'cancelled' || | |
| needs.promotion-tests.result == 'failure' || needs.promotion-tests.result == 'cancelled' || | |
| needs.rbac-tests.result == 'failure' || needs.rbac-tests.result == 'cancelled' || | |
| needs.lifecycle-tests.result == 'failure' || needs.lifecycle-tests.result == 'cancelled' || | |
| needs.webhook-tests.result == 'failure' || needs.webhook-tests.result == 'cancelled' || | |
| needs.search-tests.result == 'failure' || needs.search-tests.result == 'cancelled' || | |
| needs.platform-tests.result == 'failure' || needs.platform-tests.result == 'cancelled' || | |
| needs.auth-tests.result == 'failure' || needs.auth-tests.result == 'cancelled' || | |
| needs.admin-tests.result == 'failure' || needs.admin-tests.result == 'cancelled' || | |
| needs.resilience-tests.result == 'failure' || needs.resilience-tests.result == 'cancelled' || | |
| needs.mesh-tests.result == 'failure' || needs.mesh-tests.result == 'cancelled' | |
| run: | | |
| echo "::error::Release gate FAILED - one or more required test suites did not pass" | |
| echo "Review the workflow summary above for details" | |
| echo "Note: stress-tests is non-blocking; its outcome is shown in the summary but does not gate the release" | |
| exit 1 | |
| # ------------------------------------------------------------------- | |
| # Teardown | |
| # ------------------------------------------------------------------- | |
| teardown: | |
| needs: [deploy, collect-results] | |
| if: always() && inputs.skip_teardown != true | |
| runs-on: ak-e2e-runners | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: artifact-keeper/artifact-keeper-test | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@829323503d1be3d00ca8346e5391ca0b07a9ab0d # v5.1.0 | |
| - name: Install Helm | |
| uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 | |
| - name: Teardown test namespace | |
| env: | |
| RUN_ID: ${{ needs.deploy.outputs.run_id }} | |
| run: | | |
| chmod +x scripts/teardown-test-namespace.sh | |
| ./scripts/teardown-test-namespace.sh --run-id "${RUN_ID}" | |
| - name: Upload pod logs | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: pod-logs | |
| path: /tmp/test-logs/ | |
| if-no-files-found: ignore |