Improvements to reporters to better handle noisy samples

corrideat · corrideat · commit 1f513aee0ecb · 2026-03-13T10:46:35.000Z
diff --git a/example/empty.ts b/example/empty.ts
@@ -0,0 +1,54 @@
+/* Copyright © 2026 Apeleg Limited. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License") with LLVM
+ * exceptions; you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://llvm.org/foundation/relicensing/LICENSE.txt
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { runSuite } from '../src/index.js';
+import advancedReport from '../src/reporters/advanced.js';
+import simpleReport from '../src/reporters/simple.js';
+
+const result = await runSuite({
+	name: 'Empty',
+	functions: [
+		{
+			name: 'A',
+			fn() {},
+		},
+		{
+			name: 'B',
+			fn() {},
+		},
+		{
+			name: 'C',
+			fn() {},
+		},
+		{
+			name: 'D',
+			fn() {},
+		},
+		{
+			name: 'E',
+			fn() {},
+		},
+	],
+});
+
+console.log('=== START SIMPLE REPORT ===');
+simpleReport(result);
+console.log('=== END SIMPLE REPORT ===');
+
+console.log('');
+
+console.log('=== START ADVANCED REPORT ===');
+advancedReport(result);
+console.log('=== END ADVANCED REPORT ===');
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@apeleghq/benchmark",
-	"version": "1.0.5",
+	"version": "1.0.6",
 	"description": "A statistically rigorous benchmarking library with paired t-tests, baseline correction, and confidence intervals",
 	"type": "module",
 	"main": "./dist/index.cjs",
diff --git a/src/report.ts b/src/report.ts
@@ -65,6 +65,9 @@ function computeFunctionStats(
 		p95: stats.percentile(samples, 95),
 		marginOfError95: tCrit * se,
 		samples,
+		rawMean: stats.mean(rawSamples),
+		rawMedian: stats.median(rawSamples),
+		rawStdDev: stats.stdDev(rawSamples),
 		rawSamples,
 	};
 }
diff --git a/src/reporters/advanced.ts b/src/reporters/advanced.ts
@@ -14,13 +14,24 @@
  */
 
 import pc from 'picocolors';
-import { mean, stdDev } from '../stats.js';
 import type {
 	IFunctionStatistics,
 	IPairedComparison,
 	ISuiteReport,
 } from '../types.js';
 
+function getRatio(
+	fastest: IFunctionStatistics,
+	a: IFunctionStatistics,
+	b: IFunctionStatistics,
+) {
+	if (!(fastest.mean > 0)) {
+		return a.rawMean / b.rawMean;
+	}
+
+	return a.mean / b.mean;
+}
+
 // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 //  Constants
 // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@@ -63,27 +74,27 @@ function lpad(s: string, w: number): string {
 function ft(ms: number): string {
 	const a = Math.abs(ms);
 	const sign = ms < 0 ? '−' : '';
-	if (a === 0) return '0.00 ns';
+	if (a === 0) return '0.000 ns';
 	if (a < 0.000_000_000_001) return `${sign}${(a * 1e15).toFixed(3)} as`;
 	if (a < 0.000_000_001) return `${sign}${(a * 1e12).toFixed(3)} fs`;
 	if (a < 0.000_001) return `${sign}${(a * 1e9).toFixed(3)} ps`;
-	if (a < 0.001) return `${sign}${(a * 1e6).toFixed(2)} ns`;
-	if (a < 1) return `${sign}${(a * 1e3).toFixed(2)} µs`;
+	if (a < 0.001) return `${sign}${(a * 1e6).toFixed(3)} ns`;
+	if (a < 1) return `${sign}${(a * 1e3).toFixed(3)} µs`;
 	if (a < 1000) return `${sign}${a.toFixed(3)} ms`;
 	return `${sign}${(a / 1000).toFixed(3)} s`;
 }
 
 /** Format throughput as operations per second with SI suffix. */
 function fops(ms: number): string {
-	if (ms <= 0) return '∞ op/s';
+	if (ms <= 0) return '∞';
 	const ops = 1000 / ms;
-	if (ops >= 1e18) return `${(ops / 1e18).toFixed(2)}E op/s`;
-	if (ops >= 1e15) return `${(ops / 1e15).toFixed(2)}P op/s`;
-	if (ops >= 1e12) return `${(ops / 1e12).toFixed(2)}T op/s`;
-	if (ops >= 1e9) return `${(ops / 1e9).toFixed(2)}G op/s`;
-	if (ops >= 1e6) return `${(ops / 1e6).toFixed(2)}M op/s`;
-	if (ops >= 1e3) return `${(ops / 1e3).toFixed(2)}K op/s`;
-	return `${ops.toFixed(2)} op/s`;
+	if (ops >= 1e18) return `${(ops / 1e18).toFixed(2)}E`;
+	if (ops >= 1e15) return `${(ops / 1e15).toFixed(2)}P`;
+	if (ops >= 1e12) return `${(ops / 1e12).toFixed(2)}T`;
+	if (ops >= 1e9) return `${(ops / 1e9).toFixed(2)}G`;
+	if (ops >= 1e6) return `${(ops / 1e6).toFixed(2)}M`;
+	if (ops >= 1e3) return `${(ops / 1e3).toFixed(2)}k`;
+	return `${ops.toFixed(2)}`;
 }
 
 /** Locale-formatted integer / number. */
@@ -290,6 +301,24 @@ function renderHeader(suite: Readonly<ISuiteReport>): string[] {
 	L.push('  ' + bar('┃') + ' '.repeat(inner) + bar('┃'));
 	L.push('  ' + bar('┗' + '━'.repeat(inner) + '┛'));
 
+	if (
+		suite.functions.some(
+			(fn) => fn.name !== suite.baselineName && !(fn.mean > 0),
+		)
+	) {
+		L.push('');
+		L.push(
+			'    ' +
+				pc.yellow('⚠') +
+				'  ' +
+				pc.yellow(
+					`Raw ratios shown — some baseline-adjusted values are at or below the noise floor,`,
+				),
+		);
+		L.push('       ' + pc.yellow('making adjusted ratios unreliable.'));
+		L.push('');
+	}
+
 	return L;
 }
 
@@ -328,12 +357,12 @@ function renderWinner(
 				pc.dim(`(${fpv(topComp.pValue)})`),
 		);
 	} else {
-		const ratio2 = second.mean / fastest.mean;
+		const ratio2 = getRatio(fastest, second, fastest);
 		const parts = [pc.dim(`${fmul(ratio2)} faster than ${second.name}`)];
 		if (fns.length > 2) {
 			parts.push(
 				pc.dim(
-					`${fmul(slowest.mean / fastest.mean)} vs ${slowest.name}`,
+					`${fmul(getRatio(fastest, slowest, fastest))} vs ${slowest.name}`,
 				),
 			);
 		}
@@ -389,11 +418,11 @@ function renderLeaderboard(
 			),
 	);
 
-	const maxOps = fastest.mean > 0 ? 1 / fastest.mean : 0;
+	const maxOps = fastest.mean > 0 ? 1 / fastest.mean : 1 / fastest.rawMean;
 
 	for (let i = 0; i < fns.length; i++) {
 		const f = fns[i];
-		const ops = f.mean > 0 ? 1 / f.mean : 0;
+		const ops = fastest.mean > 0 ? 1 / f.mean : 1 / f.rawMean;
 		const ratio = maxOps > 0 ? ops / maxOps : 0;
 
 		const medal = i < 3 ? MEDALS[i] : pc.dim(`#${i + 1}`);
@@ -413,11 +442,9 @@ function renderLeaderboard(
 		let rel: string;
 		if (i === 0) {
 			rel = pc.green(' fastest');
-		} else if (fastest.mean > 0) {
-			const timesSlower = f.mean / fastest.mean;
-			rel = pc.dim(` ${fmul(timesSlower)} slower`);
 		} else {
-			rel = '';
+			const timesSlower = getRatio(fastest, f, fastest);
+			rel = pc.dim(` ${fmul(timesSlower)} slower`);
 		}
 
 		L.push(
@@ -576,7 +603,9 @@ function renderComparisons(
 
 		const aFaster = fA.mean <= fB.mean;
 		const fasterName = aFaster ? c.a : c.b;
-		const ratio = aFaster ? fB.mean / fA.mean : fA.mean / fB.mean;
+		const ratio = aFaster
+			? getRatio(fns[0], fB, fA)
+			: getRatio(fns[0], fA, fB);
 
 		L.push('');
 		L.push('    ' + pc.bold(c.a) + pc.dim(' vs ') + pc.bold(c.b));
@@ -689,7 +718,7 @@ function renderMatrix(
 			}
 
 			// ratio > 1 ⇒ row is faster
-			const ratio = colF.mean / rowF.mean;
+			const ratio = getRatio(fns[0], colF, rowF);
 
 			const comp = comps.find(
 				(cc) =>
@@ -726,8 +755,8 @@ function renderBaseline(
 
 	const L: string[] = [];
 
-	const baseLineMean = mean(baseline.rawSamples);
-	const baselineStdDev = stdDev(baseline.rawSamples);
+	const baseLineMean = baseline.rawMean;
+	const baselineStdDev = baseline.rawStdDev;
 
 	L.push(secLine('Measurement Overhead'));
 	L.push('');
@@ -744,33 +773,31 @@ function renderBaseline(
 		'    ' + pc.dim('All reported times have this overhead subtracted.'),
 	);
 
-	if (fastest.mean > 0) {
-		const ratio = baseLineMean / fastest.mean;
-		if (ratio > 0.1) {
-			L.push('');
-			L.push(
-				'    ' +
-					pc.yellow('⚠') +
-					'  ' +
-					pc.yellow(
-						`Overhead is ${(ratio * 100).toFixed(1)}% of the fastest function.`,
-					),
-			);
-			L.push(
-				'    ' +
-					pc.dim(
-						'   Consider increasing work per iteration for more accurate results.',
-					),
-			);
-		} else {
-			L.push(
-				'    ' +
-					pc.dim(
-						`Overhead is ${(ratio * 100).toFixed(2)}% of the fastest — `,
-					) +
-					pc.green('negligible'),
-			);
-		}
+	const ratio = baseLineMean / fastest.rawMean;
+	if (ratio > 0.1) {
+		L.push('');
+		L.push(
+			'    ' +
+				pc.yellow('⚠') +
+				'  ' +
+				pc.yellow(
+					`Overhead is ${(ratio * 100).toFixed(1)}% of the fastest function.`,
+				),
+		);
+		L.push(
+			'    ' +
+				pc.dim(
+					'   Consider increasing work per iteration for more accurate results.',
+				),
+		);
+	} else {
+		L.push(
+			'    ' +
+				pc.dim(
+					`Overhead is ${(ratio * 100).toFixed(2)}% of the fastest — `,
+				) +
+				pc.green('negligible'),
+		);
 	}
 
 	return L;
diff --git a/src/reporters/simple.ts b/src/reporters/simple.ts
@@ -29,7 +29,6 @@
  */
 
 import pc from 'picocolors';
-import { mean } from '../stats.js';
 import type {
 	IFunctionStatistics,
 	IPairedComparison,
@@ -305,8 +304,7 @@ function emitBaseline(
 	baseline: Readonly<IFunctionStatistics>,
 	unit: Readonly<Unit>,
 ): void {
-	const raw = baseline.rawSamples;
-	const avg = mean(raw);
+	const avg = baseline.rawMean;
 
 	ln(
 		`  ${pc.dim(
diff --git a/src/reporters/xunit.ts b/src/reporters/xunit.ts
@@ -149,6 +149,9 @@ function buildFnCase(
 		safeXml`        <property name="vendor:benchmark.wallClock_ms" value="${wallClockMs}" />`,
 		// ── Per-trial data (JSON arrays — enables downstream re-analysis)
 		safeXml`        <property name="vendor:benchmark.samples" value="${JSON.stringify(f.samples)}" />`,
+		safeXml`        <property name="vendor:benchmark.rawMean_ms" value="${f.rawMean}" />`,
+		safeXml`        <property name="vendor:benchmark.rawMedian_ms" value="${f.rawMedian}" />`,
+		safeXml`        <property name="vendor:benchmark.rawStdDev_ms" value="${f.rawStdDev}" />`,
 		safeXml`        <property name="vendor:benchmark.rawSamples" value="${JSON.stringify(f.rawSamples)}" />`,
 		safeXml`      </properties>`,
 		safeXml`      <system-out>${fnSummaryText(f)}</system-out>`,
diff --git a/src/types.ts b/src/types.ts
@@ -186,6 +186,12 @@ export interface IFunctionStatistics {
 	 */
 	samples: number[];
 
+	/** Arithmetic mean of raw per-iteration times (ms). */
+	rawMean: number;
+	/** Median of raw per-iteration times (ms). */
+	rawMedian: number;
+	/** Raw sample standard deviation (Bessel-corrected) (ms). */
+	rawStdDev: number;
 	/**
 	 * Raw (uncorrected) per-iteration times, one per trial (ms).
 	 * Provided so consumers can inspect or apply their own correction.

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@apeleghq/benchmark",`
`3`		`- "version": "1.0.5",`
	`3`	`+ "version": "1.0.6",`
`4`	`4`	`"description": "A statistically rigorous benchmarking library with paired t-tests, baseline correction, and confidence intervals",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "./dist/index.cjs",`