Integrate Bencher for overhead_bench tracking (#150)

fmzbl · web-flow · commit 5968025d5e36 · 2026-04-10T16:04:55.000Z
* bencher integration

* formater

* remove path annotation

* add comment about bench result units

* make project slug a repo secret

* implement e2e bench

* fix

* make ci run on relative branch

* improve thearshold

* remove temp files
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -0,0 +1,208 @@
+# Continuous benchmarking with Bencher (https://bencher.dev)
+#
+# PREREQUISITES:
+#   1. Create a project at https://bencher.dev and note its slug.
+#   2. Add the following secrets in GitHub → Settings → Secrets and variables → Actions:
+#      - BENCHER_API_TOKEN: API token from bencher.dev
+#      - BENCHER_PROJECT:   your project slug
+#   3. GITHUB_TOKEN is provided automatically — no setup needed.
+#
+# overhead_bench uses a custom harness (not Criterion). Its --bmf flag
+# outputs Bencher Metric Format directly.
+
+name: Benchmarks
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
+
+permissions:
+  checks: write
+  pull-requests: write
+
+jobs:
+  # Runs on every push to main to build the statistical baseline.
+  benchmark_main:
+    name: Benchmark (main baseline)
+    if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    env:
+      RUST_BACKTRACE: 1
+      BENCHER_PROJECT: ${{ secrets.BENCHER_PROJECT }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
+
+      - name: Enable perf_event_open and kallsyms
+        run: |
+          sudo sysctl kernel.perf_event_paranoid=1
+          sudo sysctl kernel.kptr_restrict=0
+
+      - uses: bencherdev/bencher@main
+
+      - name: Benchmark — poll_overhead
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.ref_name }}' \
+            --testbed ubuntu-latest \
+            --adapter rust_criterion \
+            "cargo bench --package dial9-tokio-telemetry --bench poll_overhead --features task-dump"
+
+      - name: Benchmark — writer_encode
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.ref_name }}' \
+            --testbed ubuntu-latest \
+            --adapter rust_criterion \
+            "cargo bench --package dial9-tokio-telemetry --bench writer_encode"
+
+      - name: Benchmark — codec
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.ref_name }}' \
+            --testbed ubuntu-latest \
+            --adapter rust_criterion \
+            "cargo bench --package dial9-trace-format --bench codec"
+
+      - name: Benchmark — overhead_bench
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.ref_name }}' \
+            --testbed ubuntu-latest \
+            --adapter json \
+            "cargo bench --bench overhead_bench -- --bmf 10"
+
+      - name: Benchmark — e2e_workload
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.ref_name }}' \
+            --testbed ubuntu-latest \
+            --adapter json \
+            "cargo bench --bench e2e_workload -- --bmf 10"
+
+  # Runs on same-repo PRs. Fork PRs are skipped — they have no access to
+  # BENCHER_API_TOKEN, so the job would fail rather than silently skip.
+  benchmark_pr:
+    name: Benchmark (PR regression check)
+    if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    env:
+      RUST_BACKTRACE: 1
+      BENCHER_PROJECT: ${{ secrets.BENCHER_PROJECT }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
+
+      - name: Enable perf_event_open and kallsyms
+        run: |
+          sudo sysctl kernel.perf_event_paranoid=1
+          sudo sysctl kernel.kptr_restrict=0
+
+      - uses: bencherdev/bencher@main
+
+      - name: Benchmark — poll_overhead
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.head_ref }}' \
+            --start-point main \
+            --start-point-reset \
+            --testbed ubuntu-latest \
+            --adapter rust_criterion \
+            --threshold-measure latency \
+            --threshold-test percentage \
+            --threshold-upper-boundary 0.25 \
+            --threshold-measure throughput \
+            --threshold-test percentage \
+            --threshold-lower-boundary 0.25 \
+            --error-on-alert \
+            --github-actions '${{ secrets.GITHUB_TOKEN }}' \
+            "cargo bench --package dial9-tokio-telemetry --bench poll_overhead --features task-dump"
+
+      - name: Benchmark — writer_encode
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.head_ref }}' \
+            --start-point main \
+            --start-point-reset \
+            --testbed ubuntu-latest \
+            --adapter rust_criterion \
+            --threshold-measure latency \
+            --threshold-test percentage \
+            --threshold-upper-boundary 0.25 \
+            --threshold-measure throughput \
+            --threshold-test percentage \
+            --threshold-lower-boundary 0.25 \
+            --error-on-alert \
+            --github-actions '${{ secrets.GITHUB_TOKEN }}' \
+            "cargo bench --package dial9-tokio-telemetry --bench writer_encode"
+
+      - name: Benchmark — codec
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.head_ref }}' \
+            --start-point main \
+            --start-point-reset \
+            --testbed ubuntu-latest \
+            --adapter rust_criterion \
+            --threshold-measure latency \
+            --threshold-test percentage \
+            --threshold-upper-boundary 0.25 \
+            --threshold-measure throughput \
+            --threshold-test percentage \
+            --threshold-lower-boundary 0.25 \
+            --error-on-alert \
+            --github-actions '${{ secrets.GITHUB_TOKEN }}' \
+            "cargo bench --package dial9-trace-format --bench codec"
+
+      - name: Benchmark — overhead_bench
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.head_ref }}' \
+            --start-point main \
+            --start-point-reset \
+            --testbed ubuntu-latest \
+            --adapter json \
+            --threshold-measure latency \
+            --threshold-test percentage \
+            --threshold-upper-boundary 0.25 \
+            --threshold-measure throughput \
+            --threshold-test percentage \
+            --threshold-lower-boundary 0.25 \
+            --error-on-alert \
+            --github-actions '${{ secrets.GITHUB_TOKEN }}' \
+            "cargo bench --bench overhead_bench -- --bmf 10"
+
+      - name: Benchmark — e2e_workload
+        run: |
+          bencher run \
+            --token '${{ secrets.BENCHER_API_TOKEN }}' \
+            --branch '${{ github.head_ref }}' \
+            --start-point main \
+            --start-point-reset \
+            --testbed ubuntu-latest \
+            --adapter json \
+            --threshold-measure latency \
+            --threshold-test percentage \
+            --threshold-upper-boundary 0.25 \
+            --threshold-measure throughput \
+            --threshold-test percentage \
+            --threshold-lower-boundary 0.25 \
+            --error-on-alert \
+            --github-actions '${{ secrets.GITHUB_TOKEN }}' \
+            "cargo bench --bench e2e_workload -- --bmf 10"
diff --git a/dial9-tokio-telemetry/Cargo.toml b/dial9-tokio-telemetry/Cargo.toml
@@ -76,6 +76,10 @@ required-features = ["task-dump"]
 name = "overhead_bench"
 harness = false
 
+[[bench]]
+name = "e2e_workload"
+harness = false
+
 [[example]]
 name = "long_sleep"
 required-features = ["task-dump"]
diff --git a/dial9-tokio-telemetry/benches/bmf/mod.rs b/dial9-tokio-telemetry/benches/bmf/mod.rs
@@ -0,0 +1,35 @@
+//! Bencher Metric Format (BMF) helpers.
+//! Spec: <https://bencher.dev/docs/reference/bencher-metric-format/>
+
+use serde::Serialize;
+use std::collections::BTreeMap;
+
+pub type Report = BTreeMap<String, Metric>;
+
+#[derive(Serialize)]
+pub struct Metric {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub latency: Option<Measure>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub throughput: Option<Measure>,
+}
+
+#[derive(Serialize)]
+pub struct Measure {
+    pub value: f64,
+}
+
+impl Metric {
+    pub fn latency(value: f64) -> Self {
+        Self {
+            latency: Some(Measure { value }),
+            throughput: None,
+        }
+    }
+    pub fn throughput(value: f64) -> Self {
+        Self {
+            latency: None,
+            throughput: Some(Measure { value }),
+        }
+    }
+}
diff --git a/dial9-tokio-telemetry/benches/e2e_workload.rs b/dial9-tokio-telemetry/benches/e2e_workload.rs
@@ -0,0 +1,112 @@
+//! Runs a fixed-size mixed CPU/IO workload — modelled on the
+// realistic_workload example;
+
+mod bmf;
+
+#[cfg(target_os = "linux")]
+use dial9_tokio_telemetry::telemetry::CpuProfilingConfig;
+use dial9_tokio_telemetry::telemetry::{RotatingWriter, TracedRuntime};
+use std::time::Instant;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::net::TcpListener;
+
+const NUM_CLIENTS: usize = 4;
+const REQUESTS_PER_CLIENT: usize = 1_000;
+const NUM_CPU_TASKS: usize = 3;
+const CPU_TASK_ITERATIONS: usize = 20;
+const CPU_ITERS_PER_REQUEST: u64 = 10_000;
+const CPU_ITERS_PER_BURST: u64 = 50_000;
+const TOTAL_REQUESTS: usize = NUM_CLIENTS * REQUESTS_PER_CLIENT;
+
+fn cpu_work(iterations: u64) -> u64 {
+    let mut result = 0u64;
+    for i in 0..iterations {
+        result = result.wrapping_add(i.wrapping_mul(i));
+    }
+    result
+}
+
+async fn workload_server(listener: TcpListener) {
+    loop {
+        let Ok((mut sock, _)) = listener.accept().await else {
+            return;
+        };
+        tokio::spawn(async move {
+            let mut buf = [0u8; 64];
+            let Ok(n) = sock.read(&mut buf).await else {
+                return;
+            };
+            if n == 0 {
+                return;
+            }
+            let checksum = cpu_work(CPU_ITERS_PER_REQUEST);
+            let _ = sock.write_all(&checksum.to_le_bytes()).await;
+        });
+    }
+}
+
+async fn workload_client(port: u16) {
+    for _ in 0..REQUESTS_PER_CLIENT {
+        let mut stream = tokio::net::TcpStream::connect(("127.0.0.1", port))
+            .await
+            .expect("connect");
+        stream.write_all(b"request").await.expect("write");
+        let mut buf = [0u8; 8];
+        stream.read_exact(&mut buf).await.expect("read");
+    }
+}
+
+async fn cpu_task() {
+    for _ in 0..CPU_TASK_ITERATIONS {
+        cpu_work(CPU_ITERS_PER_BURST);
+        tokio::task::yield_now().await;
+    }
+}
+
+fn main() {
+    let mut builder = tokio::runtime::Builder::new_multi_thread();
+    builder.worker_threads(4).enable_all();
+
+    let writer = RotatingWriter::single_file("/tmp/e2e_workload_trace.bin").unwrap();
+    let tb = TracedRuntime::builder().with_task_tracking(true);
+    #[cfg(target_os = "linux")]
+    let tb = tb.with_cpu_profiling(CpuProfilingConfig::default());
+    let (runtime, _guard) = tb.build_and_start(builder, writer).unwrap();
+
+    let start = Instant::now();
+    runtime.block_on(async {
+        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let port = listener.local_addr().unwrap().port();
+        let server = tokio::spawn(workload_server(listener));
+
+        let clients: Vec<_> = (0..NUM_CLIENTS)
+            .map(|_| tokio::spawn(workload_client(port)))
+            .collect();
+        let cpu_tasks: Vec<_> = (0..NUM_CPU_TASKS)
+            .map(|_| tokio::spawn(cpu_task()))
+            .collect();
+
+        for c in clients {
+            c.await.expect("client");
+        }
+        for t in cpu_tasks {
+            t.await.expect("cpu task");
+        }
+        server.abort();
+    });
+    let wall = start.elapsed();
+
+    drop(_guard);
+
+    let rps = TOTAL_REQUESTS as f64 / wall.as_secs_f64();
+    let mut report = bmf::Report::new();
+    report.insert(
+        "e2e::wall_time_ns".to_string(),
+        bmf::Metric::latency(wall.as_nanos() as f64),
+    );
+    report.insert(
+        "e2e::throughput_rps".to_string(),
+        bmf::Metric::throughput(rps),
+    );
+    println!("{}", serde_json::to_string_pretty(&report).unwrap());
+}
diff --git a/dial9-tokio-telemetry/benches/overhead_bench.rs b/dial9-tokio-telemetry/benches/overhead_bench.rs