adelrodriguez
diff --git a/‎.github/workflows/benchmarks.yml‎
Lines changed: 52 additions & 0 deletions b/‎.github/workflows/benchmarks.yml‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bench/README.md‎
Lines changed: 36 additions & 0 deletions b/‎bench/README.md‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎bench/__tests__/report.test.ts‎
Lines changed: 156 additions & 0 deletions b/‎bench/__tests__/report.test.ts‎
Lines changed: 156 additions & 0 deletions
diff --git a/‎bench/constants.ts‎
Lines changed: 1 addition & 0 deletions b/‎bench/constants.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bench/index.ts‎
Lines changed: 49 additions & 0 deletions b/‎bench/index.ts‎
Lines changed: 49 additions & 0 deletions
@@ -0,0 +1,52 @@
+name: benchmarks
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 8 * * 1"
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: 1.3.1
+
+      - name: Cache dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.bun/install/cache
+            node_modules
+          key: ${{ runner.os }}-bun-bench-${{ hashFiles('bun.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-bun-bench-
+
+      - name: Install dependencies
+        run: bun install --frozen-lockfile
+
+      - name: Run benchmarks
+        run: bun run bench:ci
+
+      - name: Upload benchmark artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results
+          path: bench-results
+
+      - name: Publish benchmark summary
+        run: cat bench-results/summary.md >> "$GITHUB_STEP_SUMMARY"
@@ -4,6 +4,7 @@ node_modules
 # output
 out
 dist
+bench-results
 *.tgz
 
 # code coverage
 
@@ -0,0 +1,36 @@
+# Benchmarks
+
+The benchmark suite tracks relative performance trends for `tryharder` without turning noisy microbenchmarks into a required PR gate.
+
+## Commands
+
+```bash
+bun run bench
+bun run bench:ci
+```
+
+`bun run bench` builds the package and prints human-readable `mitata` output.
+
+`bun run bench:ci` builds the package, emits structured benchmark JSON, and writes these artifacts:
+
+- `bench-results/latest.json`
+- `bench-results/summary.md`
+
+## Benchmark discipline
+
+- Benchmarks import from `dist`, not `src`, so measurements match the published package surface.
+- Cases stay deterministic and timer-free. Do not use `sleep`, real timeout expiry, or cancellation races in this suite.
+- Reuse task graphs and builder fixtures where setup can stay outside the measured loop.
+- Route results through the shared sink in `bench/shared.ts` so the runtime cannot optimize work away.
+- Run on the same Bun version when comparing history. This repo pins benchmark runs to `bun@1.3.1`.
+- Treat results as trend signals. Do not infer product-level latency from these microbenchmarks.
+
+## How to read these benchmarks
+
+- Treat the suite as an overhead tracker for `tryharder`, not as an end-to-end application latency test.
+- Compare like-for-like cases over time. The most useful regressions are usually `run/function/sync-success`, `runSync/function/success`, `run/object/mapped-error`, `all/two-independent-sync-tasks`, `allSettled/two-successful-tasks`, and `flow/immediate-exit`.
+- Use the direct baselines to understand scale, but do not over-index on huge ratios against `baseline/direct-sync-call`. That case is so small that tiny absolute changes can create very large relative multipliers.
+- Prefer absolute changes in `ns/iter` or `us/iter` when reading results. A `+200 ns` regression on a hot-path benchmark is usually more meaningful than a percentage quoted without context.
+- Read policy benchmarks as incremental overhead on top of execution. `wrap/runSync/success`, `signal/runSync/success`, `timeout/run/success-no-expiry`, and `retry/runSync/succeeds-on-third-attempt` show the cost of enabling those features even when they do not fail. The async control cases `signal/run/async-success-no-abort`, `timeout/run/async-success-no-expiry`, and `signal-timeout/run/async-success-no-abort-no-expiry` are the cases to watch when evaluating `resolveWithAbort()` changes.
+- Read orchestration benchmarks as framework cost for very small graphs. `all`, `allSettled`, and `flow` are expected to be much slower than `Promise.all` in these tiny cases because they are doing dependency tracking, cancellation wiring, and result shaping. Compare unused-feature cases with exercised-feature cases like `all/two-independent-sync-tasks-with-signal`, `all/two-independent-sync-tasks-with-disposer`, `allSettled/two-successful-tasks-with-disposer`, and `flow/two-node-dependency-then-exit` to see where fixed setup cost is going.
+- Only compare history across runs that use the same Bun version, machine class, and benchmark suite version. Cross-machine numbers are not reliable enough for regression calls.
@@ -0,0 +1,156 @@
+import { describe, expect, it } from "bun:test"
+import {
+  normalizeBenchmarkPayload,
+  parseRawBenchmarkPayload,
+  renderBenchmarkSummary,
+} from "../report"
+
+describe("benchmark reporting", () => {
+  it("normalizes benchmark payloads into artifact output", () => {
+    const payload = parseRawBenchmarkPayload(
+      JSON.stringify({
+        groups: {
+          "all/two-independent-sync-tasks": "orchestration",
+          "runSync/function/success": "core",
+        },
+        results: {
+          benchmarks: [
+            {
+              runs: [
+                {
+                  name: "runSync/function/success",
+                  stats: {
+                    avg: 125,
+                    samples: [100, 125, 150],
+                  },
+                },
+              ],
+            },
+            {
+              runs: [
+                {
+                  name: "all/two-independent-sync-tasks",
+                  stats: {
+                    avg: 250,
+                    samples: [200, 250],
+                  },
+                },
+              ],
+            },
+          ],
+          context: {
+            cpu: {
+              name: "Test CPU",
+            },
+            version: "1.3.1",
+          },
+        },
+        suiteVersion: 1,
+      })
+    )
+
+    const artifact = normalizeBenchmarkPayload(payload, {
+      arch: "arm64",
+      date: "2026-03-07T00:00:00.000Z",
+      gitSha: "abc123",
+      platform: "darwin",
+    })
+
+    expect(artifact).toEqual({
+      cases: [
+        {
+          avgNs: 125,
+          group: "core",
+          hz: 8_000_000,
+          name: "runSync/function/success",
+          samples: 3,
+        },
+        {
+          avgNs: 250,
+          group: "orchestration",
+          hz: 4_000_000,
+          name: "all/two-independent-sync-tasks",
+          samples: 2,
+        },
+      ],
+      meta: {
+        arch: "arm64",
+        bunVersion: "1.3.1",
+        cpuModel: "Test CPU",
+        date: "2026-03-07T00:00:00.000Z",
+        gitSha: "abc123",
+        platform: "darwin",
+        suiteVersion: 1,
+      },
+    })
+  })
+
+  it("throws when benchmark fields are missing", () => {
+    expect(() =>
+      normalizeBenchmarkPayload({
+        groups: {},
+        results: {
+          benchmarks: [
+            {
+              runs: [
+                {
+                  stats: {
+                    avg: 125,
+                    samples: [100],
+                  },
+                },
+              ],
+            },
+          ],
+        },
+      })
+    ).toThrow("Benchmark run is missing name")
+  })
+
+  it("supports empty benchmark results", () => {
+    const artifact = normalizeBenchmarkPayload(
+      {
+        groups: {},
+        results: {
+          benchmarks: [],
+          context: {},
+        },
+      },
+      {
+        arch: "arm64",
+        bunVersion: "1.3.1",
+        date: "2026-03-07T00:00:00.000Z",
+        gitSha: "abc123",
+        platform: "darwin",
+      }
+    )
+
+    expect(artifact.cases).toEqual([])
+    expect(renderBenchmarkSummary(artifact)).toContain("No benchmark cases were produced.")
+  })
+
+  it("throws on non-finite numeric values", () => {
+    expect(() =>
+      normalizeBenchmarkPayload({
+        groups: {
+          "runSync/function/success": "core",
+        },
+        results: {
+          benchmarks: [
+            {
+              runs: [
+                {
+                  name: "runSync/function/success",
+                  stats: {
+                    avg: Number.POSITIVE_INFINITY,
+                    samples: [100],
+                  },
+                },
+              ],
+            },
+          ],
+        },
+      })
+    ).toThrow("Benchmark runSync/function/success is missing stats.avg")
+  })
+})
@@ -0,0 +1 @@
+export const BENCHMARK_SUITE_VERSION = 2
@@ -0,0 +1,49 @@
+import { run } from "mitata"
+import { BENCHMARK_SUITE_VERSION } from "./constants"
+import { getBenchmarkGroups } from "./shared"
+import { registerCoreBenchmarks } from "./suites/core.bench"
+import { registerOrchestrationBenchmarks } from "./suites/orchestration.bench"
+import { registerPoliciesBenchmarks } from "./suites/policies.bench"
+
+registerCoreBenchmarks()
+registerPoliciesBenchmarks()
+registerOrchestrationBenchmarks()
+
+const suppressPrint = () => null
+const isJson = process.argv.includes("--json")
+const results = await run({
+  colors: !isJson,
+  format: isJson ? "quiet" : "mitata",
+  print: isJson ? suppressPrint : undefined,
+  throw: true,
+})
+
+if (isJson) {
+  process.stdout.write(
+    `${JSON.stringify({
+      groups: getBenchmarkGroups(),
+      results: {
+        benchmarks: results.benchmarks.map((trial) => ({
+          runs: trial.runs.map((run) => ({
+            error: run.error,
+            name: run.name,
+            stats:
+              run.stats === undefined
+                ? undefined
+                : {
+                    avg: run.stats.avg,
+                    samples: run.stats.samples.length,
+                  },
+          })),
+        })),
+        context: {
+          cpu: {
+            name: results.context.cpu.name,
+          },
+          version: Bun.version,
+        },
+      },
+      suiteVersion: BENCHMARK_SUITE_VERSION,
+    })}\n`
+  )
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+export const BENCHMARK_SUITE_VERSION = 2`