Add hover tooltips to headline metric cards

mihailoxyz · mihailoxyz · commit b28293a2a0ec · 2026-02-26T16:48:41.000+01:00
diff --git a/docs/app.js b/docs/app.js
@@ -32,9 +32,10 @@ function dateTime(value) {
   return new Date(value).toLocaleString();
 }
 
-function metricCard(title, value, detail) {
+function metricCard(title, value, detail, tooltip) {
+  const tip = tooltip ? ` data-tooltip="${escapeHtml(tooltip)}"` : "";
   return `
-    <article class="metric-card">
+    <article class="metric-card"${tip}>
       <p class="metric-title">${title}</p>
       <p class="metric-value">${value}</p>
       <p class="metric-detail">${detail}</p>
@@ -107,47 +108,56 @@ function renderCards(data) {
     metricCard(
       "Direct",
       pct(overall.direct.pct),
-      `${overall.direct.correct}/${overall.direct.total}`
+      `${overall.direct.correct}/${overall.direct.total}`,
+      "Accuracy when the model answers the question directly (open-ended, no answer choices provided)."
     ),
     metricCard(
       "MCQ with refusal",
       pct(overall.mcq_with_refusal.pct),
-      `${overall.mcq_with_refusal.correct}/${overall.mcq_with_refusal.total}`
+      `${overall.mcq_with_refusal.correct}/${overall.mcq_with_refusal.total}`,
+      "Accuracy on multiple-choice questions where \"I don't know\" is included as an answer option."
     ),
     metricCard(
       "MCQ without refusal",
       pct(overall.mcq_without_refusal.pct),
-      `${overall.mcq_without_refusal.correct}/${overall.mcq_without_refusal.total}`
+      `${overall.mcq_without_refusal.correct}/${overall.mcq_without_refusal.total}`,
+      "Accuracy on multiple-choice questions without an \"I don't know\" option, forcing a best guess."
     ),
     metricCard(
       "MCQ Lift",
       pp(h.mcq_lift_pp),
-      `Direct \u2192 MCQ w/o refusal`
+      `Direct \u2192 MCQ w/o refusal`,
+      "Percentage-point gain when switching from direct (open-ended) to MCQ without refusal. Shows how much answer choices help the model."
     ),
     metricCard(
       "Refusal Gap",
       `${h.refusal_gap_pp}pp`,
-      `MCQ w/o \u2192 MCQ w/ refusal`
+      `MCQ w/o \u2192 MCQ w/ refusal`,
+      "Percentage-point drop from MCQ without refusal to MCQ with refusal. Measures how often the model opts for \"I don't know\" when given the chance."
     ),
     metricCard(
       "MCQ rescue rate",
       pct(rescue.rescued_pct),
-      `${rescue.rescued}/${rescue.direct_wrong} direct misses rescued`
+      `${rescue.rescued}/${rescue.direct_wrong} direct misses rescued`,
+      "Of questions answered wrong in direct mode, the percentage that were answered correctly in MCQ without refusal mode."
     ),
     metricCard(
       "Best repeat",
       pct(h.best_repeat_pct),
-      h.best_repeat_label
+      h.best_repeat_label,
+      "The highest MCQ without refusal accuracy achieved by any single repeat run."
     ),
     metricCard(
       "Always-correct questions",
       pct(consistency.always_correct_pct),
-      `${consistency.always_correct}/${data.totals.questions} questions`
+      `${consistency.always_correct}/${data.totals.questions} questions`,
+      "Questions answered correctly in MCQ without refusal mode across every single repeat run."
     ),
     metricCard(
       "Task groups at 100%",
       pct(h.task_groups_at_100_pct),
-      `${h.task_groups_at_100}/${h.total_task_groups} task groups`
+      `${h.task_groups_at_100}/${h.total_task_groups} task groups`,
+      "Task groups where every question was answered correctly in MCQ without refusal across all runs."
     ),
   ].join("");
 
@@ -674,3 +684,27 @@ init().catch((error) => {
   header.textContent = "Failed to load benchmark data.";
   console.error(error);
 });
+
+// --- Floating tooltip for metric cards ---
+(function () {
+  const tip = document.createElement("div");
+  tip.className = "tip";
+  document.body.appendChild(tip);
+
+  document.addEventListener("mouseover", (e) => {
+    const card = e.target.closest("[data-tooltip]");
+    if (!card) return;
+    tip.textContent = card.dataset.tooltip;
+    const r = card.getBoundingClientRect();
+    tip.style.left = r.left + r.width / 2 + "px";
+    tip.style.top = r.top - 8 + "px";
+    tip.style.transform = "translate(-50%, -100%)";
+    tip.classList.add("visible");
+  });
+
+  document.addEventListener("mouseout", (e) => {
+    const card = e.target.closest("[data-tooltip]");
+    if (!card) return;
+    tip.classList.remove("visible");
+  });
+})();
diff --git a/docs/styles.css b/docs/styles.css
@@ -131,6 +131,31 @@ code {
   border: 1px solid var(--border);
   background: var(--surface);
   box-shadow: var(--shadow-soft);
+  cursor: default;
+}
+
+.metric-card[data-tooltip]:hover {
+  border-color: var(--text-secondary);
+}
+
+.tip {
+  position: fixed;
+  padding: 8px 12px;
+  max-width: 300px;
+  background: #1a1a1a;
+  color: #f0f0f0;
+  font-size: 12px;
+  line-height: 1.5;
+  border-radius: 6px;
+  z-index: 1000;
+  pointer-events: none;
+  box-shadow: 0 4px 16px rgba(0,0,0,.2);
+  opacity: 0;
+  transition: opacity 0.15s;
+}
+
+.tip.visible {
+  opacity: 1;
 }
 
 .metric-title {