From 887dd428a52d98c54542df7ef0a9d1a60041ea79 Mon Sep 17 00:00:00 2001 From: Sebastien Tardif Date: Tue, 26 May 2026 14:07:11 -0700 Subject: [PATCH 1/2] ci: parallelize benchmark tests across 3 runners Split the single Benchmark Tests job into a matrix of 3 parallel jobs, one per package (controller, metrics, recommendation). This reduces wall-clock time from ~10 minutes (sum of all) to ~max(controller), roughly a 2-3x speedup. Each shard maintains its own baseline cache for benchstat comparison. Signed-off-by: Sebastien Tardif --- .github/workflows/ci.yaml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a4f86b6f..5082d176 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -300,11 +300,21 @@ jobs: fail_ci_if_error: false test-bench: - name: Benchmark Tests + name: Benchmark Tests (${{ matrix.pkg }}) runs-on: ${{ vars.RUNNER || 'ubuntu-latest' }} timeout-minutes: 15 needs: changes if: needs.changes.outputs.go == 'true' + strategy: + fail-fast: false + matrix: + include: + - pkg: controller + path: ./internal/controller/... + - pkg: metrics + path: ./internal/metrics/... + - pkg: recommendation + path: ./internal/recommendation/... steps: - uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 with: @@ -324,13 +334,13 @@ jobs: uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: bench-baseline.txt - key: bench-baseline-${{ runner.os }}-${{ hashFiles('go.sum') }} - restore-keys: bench-baseline-${{ runner.os }}- + key: bench-baseline-${{ matrix.pkg }}-${{ runner.os }}-${{ hashFiles('go.sum') }} + restore-keys: bench-baseline-${{ matrix.pkg }}-${{ runner.os }}- - name: Run benchmarks shell: bash -Eeuo pipefail -x {0} run: | - go test ./internal/... -bench=. -benchmem -run='^$' \ + go test ${{ matrix.path }} -bench=. -benchmem -run='^$' \ -count=5 -timeout=10m | tee bench-current.txt - name: Compare with baseline @@ -360,7 +370,7 @@ jobs: uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: bench-baseline.txt - key: bench-baseline-${{ runner.os }}-${{ hashFiles('go.sum') }} + key: bench-baseline-${{ matrix.pkg }}-${{ runner.os }}-${{ hashFiles('go.sum') }} test-integration: name: Integration Tests From c9e0801b9d20937946f0462d87fc2ccb11a1522a Mon Sep 17 00:00:00 2001 From: Sebastien Tardif Date: Tue, 26 May 2026 14:21:48 -0700 Subject: [PATCH 2/2] ci: split controller benchmarks into core and scale shards Further split the controller benchmark shard (8m41s) into two parallel jobs using -bench regex filtering: - controller-core: fast benchmarks (BuildPrometheusQuery, Reconcile, ComputeRecommendations) -- ~1 min expected - controller-scale: scale benchmarks (ManyWorkloads, ManyPolicies, ConcurrentPolicies up to 1000) -- the heavy tail Total shards: 4 (controller-core, controller-scale, metrics, recommendation) Expected wall-clock: max(controller-scale) instead of sum(all). Signed-off-by: Sebastien Tardif --- .github/workflows/ci.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5082d176..f421564d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -309,12 +309,18 @@ jobs: fail-fast: false matrix: include: - - pkg: controller + - pkg: controller-core path: ./internal/controller/... + bench: '^Benchmark(BuildPrometheusQuery|Reconcile$|ComputeRecommendations)' + - pkg: controller-scale + path: ./internal/controller/... + bench: '^BenchmarkReconcile_(Many|Concurrent)' - pkg: metrics path: ./internal/metrics/... + bench: '.' - pkg: recommendation path: ./internal/recommendation/... + bench: '.' steps: - uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 with: @@ -340,7 +346,7 @@ jobs: - name: Run benchmarks shell: bash -Eeuo pipefail -x {0} run: | - go test ${{ matrix.path }} -bench=. -benchmem -run='^$' \ + go test ${{ matrix.path }} -bench='${{ matrix.bench }}' -benchmem -run='^$' \ -count=5 -timeout=10m | tee bench-current.txt - name: Compare with baseline