From 887dd428a52d98c54542df7ef0a9d1a60041ea79 Mon Sep 17 00:00:00 2001
From: Sebastien Tardif <sebtardif@ncf.ca>
Date: Tue, 26 May 2026 14:07:11 -0700
Subject: [PATCH 1/2] ci: parallelize benchmark tests across 3 runners

Split the single Benchmark Tests job into a matrix of 3 parallel jobs,
one per package (controller, metrics, recommendation). This reduces
wall-clock time from ~10 minutes (sum of all) to ~max(controller),
roughly a 2-3x speedup.

Each shard maintains its own baseline cache for benchstat comparison.

Signed-off-by: Sebastien Tardif <sebtardif@ncf.ca>
---
 .github/workflows/ci.yaml | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index a4f86b6f..5082d176 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -300,11 +300,21 @@ jobs:
           fail_ci_if_error: false
 
   test-bench:
-    name: Benchmark Tests
+    name: Benchmark Tests (${{ matrix.pkg }})
     runs-on: ${{ vars.RUNNER || 'ubuntu-latest' }}
     timeout-minutes: 15
     needs: changes
     if: needs.changes.outputs.go == 'true'
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - pkg: controller
+            path: ./internal/controller/...
+          - pkg: metrics
+            path: ./internal/metrics/...
+          - pkg: recommendation
+            path: ./internal/recommendation/...
     steps:
       - uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
         with:
@@ -324,13 +334,13 @@ jobs:
         uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
         with:
           path: bench-baseline.txt
-          key: bench-baseline-${{ runner.os }}-${{ hashFiles('go.sum') }}
-          restore-keys: bench-baseline-${{ runner.os }}-
+          key: bench-baseline-${{ matrix.pkg }}-${{ runner.os }}-${{ hashFiles('go.sum') }}
+          restore-keys: bench-baseline-${{ matrix.pkg }}-${{ runner.os }}-
 
       - name: Run benchmarks
         shell: bash -Eeuo pipefail -x {0}
         run: |
-          go test ./internal/... -bench=. -benchmem -run='^$' \
+          go test ${{ matrix.path }} -bench=. -benchmem -run='^$' \
             -count=5 -timeout=10m | tee bench-current.txt
 
       - name: Compare with baseline
@@ -360,7 +370,7 @@ jobs:
         uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
         with:
           path: bench-baseline.txt
-          key: bench-baseline-${{ runner.os }}-${{ hashFiles('go.sum') }}
+          key: bench-baseline-${{ matrix.pkg }}-${{ runner.os }}-${{ hashFiles('go.sum') }}
 
   test-integration:
     name: Integration Tests

From c9e0801b9d20937946f0462d87fc2ccb11a1522a Mon Sep 17 00:00:00 2001
From: Sebastien Tardif <sebtardif@ncf.ca>
Date: Tue, 26 May 2026 14:21:48 -0700
Subject: [PATCH 2/2] ci: split controller benchmarks into core and scale
 shards

Further split the controller benchmark shard (8m41s) into two parallel
jobs using -bench regex filtering:

- controller-core: fast benchmarks (BuildPrometheusQuery, Reconcile,
  ComputeRecommendations) -- ~1 min expected
- controller-scale: scale benchmarks (ManyWorkloads, ManyPolicies,
  ConcurrentPolicies up to 1000) -- the heavy tail

Total shards: 4 (controller-core, controller-scale, metrics, recommendation)
Expected wall-clock: max(controller-scale) instead of sum(all).

Signed-off-by: Sebastien Tardif <sebtardif@ncf.ca>
---
 .github/workflows/ci.yaml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 5082d176..f421564d 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -309,12 +309,18 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - pkg: controller
+          - pkg: controller-core
             path: ./internal/controller/...
+            bench: '^Benchmark(BuildPrometheusQuery|Reconcile$|ComputeRecommendations)'
+          - pkg: controller-scale
+            path: ./internal/controller/...
+            bench: '^BenchmarkReconcile_(Many|Concurrent)'
           - pkg: metrics
             path: ./internal/metrics/...
+            bench: '.'
           - pkg: recommendation
             path: ./internal/recommendation/...
+            bench: '.'
     steps:
       - uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
         with:
@@ -340,7 +346,7 @@ jobs:
       - name: Run benchmarks
         shell: bash -Eeuo pipefail -x {0}
         run: |
-          go test ${{ matrix.path }} -bench=. -benchmem -run='^$' \
+          go test ${{ matrix.path }} -bench='${{ matrix.bench }}' -benchmem -run='^$' \
             -count=5 -timeout=10m | tee bench-current.txt
 
       - name: Compare with baseline