mui · Janpot · Jun 15, 2026 · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026
diff --git a/apps/code-infra-dashboard/src/components/BenchmarkComparisonReportView.tsx b/apps/code-infra-dashboard/src/components/BenchmarkComparisonReportView.tsx
@@ -14,16 +14,22 @@ import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
 import Tooltip from '@mui/material/Tooltip';
 import {
   compareBenchmarkReports,
+  type BenchmarkComparisonInput,
   type BenchmarkComparisonReport,
   type ComparisonItem,
   type DiffValue,
   type BenchmarkDiffSeverity,
 } from '@/lib/benchmark/compareBenchmarkReports';
-import type { BenchmarkReport } from '@/lib/benchmark/types';
-import { formatMs, formatDiffMs, percentFormatter } from '@/utils/formatters';
+import {
+  formatMs,
+  formatMetricNumber,
+  formatMetricDiff,
+  percentFormatter,
+} from '@/utils/formatters';
 
 const SEVERITY_COLOR: Record<BenchmarkDiffSeverity, string> = {
   error: 'error.main',
+  warning: 'warning.main',
   success: 'success.main',
   neutral: 'text.secondary',
 };
@@ -32,6 +38,7 @@ const MIN_BAR_WIDTH_PX = 4;
 
 const DIFF_BAR_COLORS: Record<BenchmarkDiffSeverity, string> = {
   error: 'var(--mui-palette-error-main)',
+  warning: 'var(--mui-palette-warning-main)',
   success: 'var(--mui-palette-success-main)',
   neutral: 'var(--mui-palette-action-disabled)',
 };
@@ -72,13 +79,19 @@ function computeEntryBar(
   return computeDiffBar(comparison.duration, range.min, range.max);
 }
 
-function FormattedDiffMs({ diff, percent = false }: { diff: DiffValue; percent?: boolean }) {
+interface FormattedDiffMsProps {
+  diff: DiffValue;
+  percent?: boolean;
+  format?: Intl.NumberFormatOptions;
+}
+
+function FormattedDiffMs({ diff, percent = false, format }: FormattedDiffMsProps) {
   if (diff.absoluteDiff === 0) {
     return '\u2014';
   }
   return (
     <React.Fragment>
-      {formatDiffMs(diff.absoluteDiff)}
+      {formatMetricDiff(diff.absoluteDiff, format)}
       {percent && (
         <React.Fragment>
           {' '}
@@ -91,12 +104,18 @@ function FormattedDiffMs({ diff, percent = false }: { diff: DiffValue; percent?:
   );
 }
 
-function DiffCell({ diff, sx }: { diff: DiffValue; sx?: object }) {
+interface DiffCellProps {
+  diff: DiffValue;
+  sx?: object;
+  format?: Intl.NumberFormatOptions;
+}
+
+function DiffCell({ diff, sx, format }: DiffCellProps) {
   const color = SEVERITY_COLOR[diff.severity];
   return (
     <Tooltip title={diff.hint} arrow>
       <TableCell align="right" sx={{ color, ...sx }}>
-        <FormattedDiffMs diff={diff} percent />
+        <FormattedDiffMs diff={diff} percent format={format} />
       </TableCell>
     </Tooltip>
   );
@@ -114,6 +133,7 @@ interface ComparisonTableRow {
   valueFill?: number;
   valueColor?: string;
   diffBar?: { left: number; width: number; color: string };
+  format?: Intl.NumberFormatOptions;
 }
 
 const NAME_CELL_SX = {
@@ -181,7 +201,7 @@ function ComparisonTable({
               <TableCell align="right">{row.outliers}</TableCell>
               {hasBase && (
                 <TableCell align="right">
-                  {row.base != null ? formatMs(row.base) : '\u2014'}
+                  {row.base != null ? formatMetricNumber(row.base, row.format) : '\u2014'}
                 </TableCell>
               )}
               {(() => {
@@ -196,10 +216,10 @@ function ComparisonTable({
                     : undefined;
                   return row.removed ? (
                     <TableCell align="right" sx={{ color: 'success.main', ...diffBarSx }}>
-                      <FormattedDiffMs diff={row.comparison} percent />
+                      <FormattedDiffMs diff={row.comparison} percent format={row.format} />
                     </TableCell>
                   ) : (
-                    <DiffCell diff={row.comparison} sx={diffBarSx} />
+                    <DiffCell diff={row.comparison} sx={diffBarSx} format={row.format} />
                   );
                 }
                 return <TableCell align="right">{'\u2014'}</TableCell>;
@@ -246,24 +266,20 @@ function TotalsSummary({ totals }: { totals: BenchmarkComparisonReport['totals']
           </Typography>
         </Typography>
       </Tooltip>
-      {totals.paintDefault && (
-        <Tooltip title={totals.paintDefault.hint} arrow>
-          <Typography variant="body2">
-            <strong>Paint:</strong>{' '}
-            <Typography
-              component="span"
-              variant="body2"
-              sx={{ color: SEVERITY_COLOR[totals.paintDefault.severity] }}
-            >
-              <FormattedDiffMs diff={totals.paintDefault} percent />
-            </Typography>
-          </Typography>
-        </Tooltip>
-      )}
     </Box>
   );
 }
 
+function worstMetricSeverity(metrics: ComparisonItem['metrics']): BenchmarkDiffSeverity {
+  if (metrics.some((row) => row.diff.severity === 'error')) {
+    return 'error';
+  }
+  if (metrics.some((row) => row.diff.severity === 'warning')) {
+    return 'warning';
+  }
+  return 'neutral';
+}
+
 function BenchmarkAccordion({
   comparison,
   hasBase,
@@ -278,6 +294,8 @@ function BenchmarkAccordion({
   entryDiffRange: { min: number; max: number };
 }) {
   const renderCount = comparison.renders.filter((row) => !row.removed).length;
+  const metricCount = comparison.metrics.filter((row) => !row.removed).length;
+  const metricSeverity = worstMetricSeverity(comparison.metrics);
 
   const entryBar = computeEntryBar(comparison, hasBase, entryDiffRange);
 
@@ -345,6 +363,11 @@ function BenchmarkAccordion({
                 </Tooltip>
               )}
             </Typography>
+            {metricCount > 0 && (
+              <Typography variant="body2" sx={{ color: SEVERITY_COLOR[metricSeverity] }}>
+                {metricCount} metrics
+              </Typography>
+            )}
           </Box>
         </Box>
       </AccordionSummary>
@@ -406,16 +429,17 @@ function BenchmarkAccordion({
                     '\u2014'
                   ) : (
                     <React.Fragment>
-                      {formatMs(row.value)}{' '}
+                      {formatMetricNumber(row.value, row.format)}{' '}
                       <Typography component="span" variant="caption" color="text.secondary">
-                        ±{formatMs(row.stdDev)}
+                        ±{formatMetricNumber(row.stdDev, row.format)}
                       </Typography>
                     </React.Fragment>
                   ),
                   outliers: row.removed ? '\u2014' : row.outliers,
                   comparison: row.diff,
                   base: row.diff?.base,
                   removed: row.removed,
+                  format: row.format,
                 };
               })}
             />
@@ -431,8 +455,8 @@ function BenchmarkAccordion({
 }
 
 interface BenchmarkComparisonReportViewProps {
-  value: BenchmarkReport;
-  base: BenchmarkReport | null;
+  value: BenchmarkComparisonInput;
+  base: BenchmarkComparisonInput | null;
 }
 
 export function BenchmarkComparisonReportView({ value, base }: BenchmarkComparisonReportViewProps) {

diff --git a/apps/code-infra-dashboard/src/components/DailyBenchmarkChart.tsx b/apps/code-infra-dashboard/src/components/DailyBenchmarkChart.tsx
@@ -10,7 +10,7 @@ import Tabs from '@mui/material/Tabs';
 import Tab from '@mui/material/Tab';
 import { BarChartPro } from '@mui/x-charts-pro/BarChartPro';
 import { useXScale, useDrawingArea } from '@mui/x-charts-pro/hooks';
-import type { BenchmarkReport } from '@/lib/benchmark/types';
+import type { BenchmarkReport, MetricDefinition } from '@/lib/benchmark/types';
 import { formatMs } from '@/utils/formatters';
 import { useMasterCommits, type GitHubCommit } from '../hooks/useMasterCommits';
 import { useCiReports } from '../hooks/useCiReports';
@@ -81,6 +81,7 @@ interface CommitReportData {
   timestamp: number;
   commit: GitHubCommit;
   report: BenchmarkReport | null;
+  metricDefinitions?: Record<string, MetricDefinition>;
 }
 
 function collectBenchmarkNames(chartData: CommitReportData[]): string[] {
@@ -126,6 +127,7 @@ export default function DailyBenchmarkChart({ repo }: DailyBenchmarkChartProps)
         timestamp,
         commit,
         report: reports[commit.sha]?.report ?? null,
+        metricDefinitions: reports[commit.sha]?.metricDefinitions,
       })),
     [commits, reports],
   );
@@ -154,7 +156,7 @@ export default function DailyBenchmarkChart({ repo }: DailyBenchmarkChartProps)
         return entry.totalDuration;
       }
       if (chartMode === 'paint') {
-        return entry.metrics['paint:default']?.mean ?? null;
+        return entry.metrics['bench:paint']?.mean ?? null;
       }
       return entry.renders.length;
     },
@@ -300,8 +302,10 @@ export default function DailyBenchmarkChart({ repo }: DailyBenchmarkChartProps)
       return null;
     }
     return {
-      value: reportData.report,
-      base: baselineData?.report ?? null,
+      value: { report: reportData.report, metricDefinitions: reportData.metricDefinitions },
+      base: baselineData?.report
+        ? { report: baselineData.report, metricDefinitions: baselineData.metricDefinitions }
+        : null,
       valueCommit: reportData.commit,
       baseCommit: baselineData?.commit ?? null,
     };

diff --git a/apps/code-infra-dashboard/src/lib/benchmark/buildMarkdownReport.test.ts b/apps/code-infra-dashboard/src/lib/benchmark/buildMarkdownReport.test.ts
@@ -1,8 +1,34 @@
 import { describe, it, expect } from 'vitest';
 import { buildBenchmarkMarkdownReport } from './buildMarkdownReport';
-import { compareBenchmarkReports } from './compareBenchmarkReports';
+import { compareBenchmarkReports, type BenchmarkComparisonInput } from './compareBenchmarkReports';
+import type { BenchmarkReportEntry, MetricDefinition } from './types';
 import { makeReport, makeReportFromConfig } from './test-fixtures';
 
+// A single benchmark whose duration/renders are neutral, so the only signal is one metric.
+function metricReport(
+  name: string,
+  mean: number,
+  definitions?: Record<string, MetricDefinition>,
+): BenchmarkComparisonInput {
+  const entry: BenchmarkReportEntry = {
+    iterations: 10,
+    totalDuration: 100,
+    renders: [],
+    metrics: { [name]: { mean, stdDev: 0, outliers: 0 } },
+  };
+  return { report: { Bench: entry }, metricDefinitions: definitions };
+}
+
+const alarmDefinitions: Record<string, MetricDefinition> = {
+  clicks: {
+    kind: 'discrete',
+    format: { maximumFractionDigits: 0 },
+    alarm: { direction: 'lowerIsBetter', error: 1 },
+  },
+  tti: { kind: 'scalar', alarm: { direction: 'lowerIsBetter', warn: 0.1, error: 0.25 } },
+  fps: { kind: 'scalar' }, // informational (no alarm)
+};
+
 describe('buildBenchmarkMarkdownReport', () => {
   it('drops within-noise rows but keeps significant regressions', () => {
     const report = compareBenchmarkReports(
@@ -126,4 +152,38 @@ describe('buildBenchmarkMarkdownReport', () => {
     expect(markdown).not.toContain('No significant changes');
     expect(markdown).toContain('| Test |');
   });
+
+  describe('metric alarms', () => {
+    it('surfaces an error-level metric, making its otherwise-neutral test significant', () => {
+      const report = compareBenchmarkReports(
+        metricReport('clicks', 5, alarmDefinitions), // +2 vs 3, discrete error band 1
+        metricReport('clicks', 3),
+      );
+      const markdown = buildBenchmarkMarkdownReport(report);
+      expect(markdown).not.toContain('No significant changes');
+      expect(markdown).toContain('**Metric alarms**');
+      expect(markdown).toContain('| Bench | clicks |');
+      expect(markdown).toContain('🔺');
+    });
+
+    it('does not surface warning-level metrics (kept on the dashboard only)', () => {
+      const report = compareBenchmarkReports(
+        metricReport('tti', 115, alarmDefinitions), // +15%: past warn (10%), within error (25%) -> warning
+        metricReport('tti', 100),
+      );
+      const markdown = buildBenchmarkMarkdownReport(report);
+      expect(markdown).toContain('No significant changes');
+      expect(markdown).not.toContain('Metric alarms');
+    });
+
+    it('ignores informational metrics — no alarm section, test stays within noise', () => {
+      const report = compareBenchmarkReports(
+        metricReport('fps', 200, alarmDefinitions), // big change but no alarm configured
+        metricReport('fps', 100),
+      );
+      const markdown = buildBenchmarkMarkdownReport(report);
+      expect(markdown).toContain('No significant changes');
+      expect(markdown).not.toContain('Metric alarms');
+    });
+  });
 });
diff --git a/apps/code-infra-dashboard/src/lib/benchmark/buildMarkdownReport.ts b/apps/code-infra-dashboard/src/lib/benchmark/buildMarkdownReport.ts
@@ -1,5 +1,12 @@
-import { formatMs, formatDiffMs, percentFormatter } from '@/utils/formatters';
-import type { BenchmarkComparisonReport, DiffValue } from './compareBenchmarkReports';
+import { formatMs, formatDiffMs, formatMetricDiff, percentFormatter } from '@/utils/formatters';
+import type {
+  BenchmarkComparisonReport,
+  ComparisonEntry,
+  DiffValue,
+} from './compareBenchmarkReports';
+
+// Only error-level alarms surface in the PR comment; warnings stay on the dashboard.
+const isAlarmed = (metric: ComparisonEntry): boolean => metric.diff.severity === 'error';
 
 export interface BuildMarkdownReportOptions {
   maxRows?: number;
@@ -8,6 +15,7 @@ export interface BuildMarkdownReportOptions {
 
 const SEVERITY_PREFIX: Record<string, string> = {
   error: '🔺',
+  warning: '⚠️',
   success: '▼',
 };
 
@@ -42,11 +50,6 @@ export function buildBenchmarkMarkdownReport(
       `**Total duration:** ${formatMs(report.totals.duration.current ?? 0)}${formatDiff(report.totals.duration, 'ms')}`,
       `**Renders:** ${report.totals.renderCount.current ?? 0}${formatDiff(report.totals.renderCount, 'count')}`,
     ];
-    if (report.totals.paintDefault) {
-      totalParts.push(
-        `**Paint:** ${formatMs(report.totals.paintDefault.current ?? 0)}${formatDiff(report.totals.paintDefault, 'ms')}`,
-      );
-    }
     lines.push(totalParts.join(' | '));
     lines.push('');
   }
@@ -56,7 +59,8 @@ export function buildBenchmarkMarkdownReport(
       entry.duration.severity !== 'neutral' ||
       (entry.renderCount?.severity ?? 'neutral') !== 'neutral' ||
       entry.duration.current === null ||
-      entry.duration.base === null,
+      entry.duration.base === null ||
+      entry.metrics.some(isAlarmed),
   );
 
   const detailsLink = reportUrl ? `[details](${reportUrl})` : '';
@@ -99,5 +103,22 @@ export function buildBenchmarkMarkdownReport(
     lines.push(detailsLink);
   }
 
+  // Error-level metric alarms across all tests (independent of the test-table row cap).
+  const alarms = report.entries.flatMap((entry) =>
+    entry.metrics.filter(isAlarmed).map((metric) => ({ test: entry.name, metric })),
+  );
+  if (alarms.length > 0) {
+    lines.push('');
+    lines.push('**Metric alarms**');
+    lines.push('');
+    lines.push('| Test | Metric | Change |');
+    lines.push('|:-----|:-------|-------:|');
+    for (const { test, metric } of alarms) {
+      const prefix = SEVERITY_PREFIX[metric.diff.severity] ?? '';
+      const change = `${prefix} ${formatMetricDiff(metric.diff.absoluteDiff, metric.format)}`;
+      lines.push(`| ${test} | ${metric.name} | ${change} |`);
+    }
+  }
+
   return lines.join('\n');
 }