Skip to content

Commit 4993529

Browse files
committed
ci: merge E2E jobs into one with overlapped cluster setup
Merge the two E2E jobs (Chainsaw + Go) into a single job that: 1. Starts cluster setup immediately (no dependency on lint/unit) 2. Polls the GitHub API to wait for lint+unit to pass 3. Runs both Chainsaw and Go E2E concurrently on the shared cluster Before: lint (3m) -> E2E setup (4m) -> tests (5.5m) = ~12.5m critical path After: max(lint 3m, E2E setup 4m) -> tests (5.5m) = ~9.5m critical path Saves ~3 minutes per CI run, eliminates duplicate cluster provisioning (one cluster instead of two), and halves the runner cost for E2E. Signed-off-by: Sebastien Tardif <sebtardif@ncf.ca>
1 parent c0b77f2 commit 4993529

1 file changed

Lines changed: 72 additions & 64 deletions

File tree

.github/workflows/ci.yaml

Lines changed: 72 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -440,18 +440,19 @@ jobs:
440440
with:
441441
paths: test-results/integration.xml
442442

443-
# E2E tests split into two parallel jobs (Chainsaw + Go) for faster wall time.
444-
# Each provisions its own k3d cluster via the shared setup-e2e-cluster action.
445-
# Full K8s version matrix runs nightly; PR CI runs a single version.
446-
# Skipped for Dockerfile-only and workflow-only changes.
447-
test-e2e-chainsaw:
448-
name: E2E Chainsaw
443+
# E2E tests run in a single job with one shared k3d cluster.
444+
# Cluster setup starts immediately (needs: changes only) and overlaps with
445+
# lint/unit. A gate step polls the GitHub API until lint+unit complete,
446+
# then both Chainsaw and Go E2E run concurrently on the shared cluster.
447+
# This saves ~3 min vs two separate jobs blocked on lint/unit.
448+
test-e2e:
449+
name: E2E
449450
runs-on: ${{ vars.RUNNER || 'ubuntu-latest' }}
450451
timeout-minutes: 30
451-
needs: [changes, lint, test-unit]
452+
needs: [changes]
452453
if: needs.changes.outputs.go-source == 'true'
453454
env:
454-
K3D_CLUSTER_NAME: e2e-chainsaw-${{ github.run_id }}-${{ github.run_attempt }}
455+
K3D_CLUSTER_NAME: e2e-${{ github.run_id }}-${{ github.run_attempt }}
455456
steps:
456457
- uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
457458
with:
@@ -461,7 +462,7 @@ jobs:
461462
- uses: ./.github/actions/setup-e2e-cluster
462463
with:
463464
cluster-name: ${{ env.K3D_CLUSTER_NAME }}
464-
kubeconfig-path: ${{ runner.temp }}/attune-e2e-chainsaw-${{ github.run_id }}.kubeconfig
465+
kubeconfig-path: ${{ runner.temp }}/attune-e2e-${{ github.run_id }}.kubeconfig
465466
go-version: ${{ env.GO_VERSION }}
466467
k3d-version: ${{ env.K3D_VERSION }}
467468
k3s-image: ${{ env.K3S_IMAGE }}
@@ -470,74 +471,83 @@ jobs:
470471
prometheus-chart-version: ${{ env.PROMETHEUS_CHART_VERSION }}
471472
stress-ng-image: ${{ env.STRESS_NG_IMAGE }}
472473

474+
# Wait for lint and unit tests before running E2E.
475+
# Cluster setup above already ran in parallel with those jobs.
476+
- name: Wait for lint and unit gate
477+
env:
478+
GH_TOKEN: ${{ github.token }}
479+
shell: bash -Eeuo pipefail {0}
480+
run: |
481+
echo "Waiting for lint and test-unit jobs to complete..."
482+
for i in $(seq 1 60); do
483+
jobs_json=$(gh api "repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100" \
484+
--jq '[.jobs[] | select(.name == "Lint" or .name == "Unit Tests") | {name: .name, status: .status, conclusion: .conclusion}]')
485+
486+
all_done=true
487+
any_failed=false
488+
for row in $(echo "$jobs_json" | jq -c '.[]'); do
489+
status=$(echo "$row" | jq -r '.status')
490+
conclusion=$(echo "$row" | jq -r '.conclusion')
491+
name=$(echo "$row" | jq -r '.name')
492+
if [[ "$status" != "completed" ]]; then
493+
all_done=false
494+
elif [[ "$conclusion" != "success" && "$conclusion" != "skipped" ]]; then
495+
echo "::error::$name failed ($conclusion), aborting E2E"
496+
any_failed=true
497+
fi
498+
done
499+
500+
if [[ "$any_failed" == "true" ]]; then
501+
exit 1
502+
fi
503+
if [[ "$all_done" == "true" ]]; then
504+
echo "Lint and unit tests passed, proceeding with E2E"
505+
break
506+
fi
507+
508+
if (( i == 60 )); then
509+
echo "::error::Timed out waiting for lint/unit (5 min)"
510+
exit 1
511+
fi
512+
sleep 5
513+
done
514+
473515
- name: Install Chainsaw
474516
uses: kyverno/action-install-chainsaw@1223ef75bedeb59c4e7b5455463d4316e76dff01 # v0.2.15
475-
- shell: bash -Eeuo pipefail -x {0}
476-
run: chainsaw test test/e2e/ --config .chainsaw.yaml
477517

478-
- name: Collect debug info on failure
479-
if: failure()
480-
shell: bash -Eeuo pipefail {0}
518+
- name: Run Chainsaw and Go E2E concurrently
519+
shell: bash -Eeuo pipefail -x {0}
481520
run: |
482-
echo "=== cert-manager pods ==="
483-
kubectl get pods -n cert-manager
484-
echo "=== Operator logs ==="
485-
kubectl logs -n attune-system -l app.kubernetes.io/name=attune --tail=100 || true
486-
echo "=== Pod status ==="
487-
kubectl get pods -A
488-
echo "=== Events ==="
489-
kubectl get events -A --sort-by='.lastTimestamp' | tail -30
521+
mkdir -p test-results
490522
491-
- name: Cleanup k3d cluster
492-
if: always()
493-
shell: bash -Eeuo pipefail {0}
494-
run: |
495-
export PATH="$HOME/.local/bin:$PATH"
496-
k3d cluster delete "$K3D_CLUSTER_NAME" 2>/dev/null || true
497-
rm -f "$KUBECONFIG"
523+
chainsaw test test/e2e/ --config .chainsaw.yaml 2>&1 | tee test-results/chainsaw.log &
524+
chainsaw_pid=$!
498525
499-
test-e2e-go:
500-
name: E2E Go
501-
runs-on: ${{ vars.RUNNER || 'ubuntu-latest' }}
502-
timeout-minutes: 30
503-
needs: [changes, lint, test-unit]
504-
if: needs.changes.outputs.go-source == 'true'
505-
env:
506-
K3D_CLUSTER_NAME: e2e-go-${{ github.run_id }}-${{ github.run_attempt }}
507-
steps:
508-
- uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
509-
with:
510-
egress-policy: audit
511-
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
526+
go test -tags=e2e ./test/e2e-go/... -race -count=1 -timeout=15m -v 2>&1 | tee test-results/go-e2e.log &
527+
go_pid=$!
512528
513-
- uses: ./.github/actions/setup-e2e-cluster
514-
with:
515-
cluster-name: ${{ env.K3D_CLUSTER_NAME }}
516-
kubeconfig-path: ${{ runner.temp }}/attune-e2e-go-${{ github.run_id }}.kubeconfig
517-
go-version: ${{ env.GO_VERSION }}
518-
k3d-version: ${{ env.K3D_VERSION }}
519-
k3s-image: ${{ env.K3S_IMAGE }}
520-
cert-manager-version: ${{ env.CERT_MANAGER_VERSION }}
521-
prometheus-image: ${{ env.PROMETHEUS_IMAGE }}
522-
prometheus-chart-version: ${{ env.PROMETHEUS_CHART_VERSION }}
523-
stress-ng-image: ${{ env.STRESS_NG_IMAGE }}
529+
chainsaw_rc=0
530+
go_rc=0
531+
wait $chainsaw_pid || chainsaw_rc=$?
532+
wait $go_pid || go_rc=$?
524533
525-
- name: Run Go E2E tests
526-
shell: bash -Eeuo pipefail -x {0}
527-
run: go test -tags=e2e ./test/e2e-go/... -race -count=1 -timeout=15m -v
534+
echo "Chainsaw exit=$chainsaw_rc, Go E2E exit=$go_rc"
535+
if (( chainsaw_rc != 0 || go_rc != 0 )); then
536+
exit 1
537+
fi
528538
529539
- name: Collect debug info on failure
530540
if: failure()
531541
shell: bash -Eeuo pipefail {0}
532542
run: |
533543
echo "=== cert-manager pods ==="
534-
kubectl get pods -n cert-manager
544+
kubectl get pods -n cert-manager || true
535545
echo "=== Operator logs ==="
536-
kubectl logs -n attune-system -l app.kubernetes.io/name=attune --tail=100 || true
546+
kubectl logs -n attune-system -l app.kubernetes.io/name=attune --tail=300 || true
537547
echo "=== Pod status ==="
538-
kubectl get pods -A
548+
kubectl get pods -A || true
539549
echo "=== Events ==="
540-
kubectl get events -A --sort-by='.lastTimestamp' | tail -30
550+
kubectl get events -A --sort-by='.lastTimestamp' | tail -50
541551
542552
- name: Cleanup k3d cluster
543553
if: always()
@@ -706,8 +716,7 @@ jobs:
706716
- test-unit
707717
- test-bench
708718
- test-integration
709-
- test-e2e-chainsaw
710-
- test-e2e-go
719+
- test-e2e
711720
- crd-freshness
712721
- helm-lint
713722
- build
@@ -722,8 +731,7 @@ jobs:
722731
"${{ needs.test-unit.result }}" \
723732
"${{ needs.test-bench.result }}" \
724733
"${{ needs.test-integration.result }}" \
725-
"${{ needs.test-e2e-chainsaw.result }}" \
726-
"${{ needs.test-e2e-go.result }}" \
734+
"${{ needs.test-e2e.result }}" \
727735
"${{ needs.crd-freshness.result }}" \
728736
"${{ needs.helm-lint.result }}" \
729737
"${{ needs.build.result }}" \

0 commit comments

Comments
 (0)