Skip to content

Commit b28293a

Browse files
committed
Add hover tooltips to headline metric cards
1 parent 5ec70ce commit b28293a

2 files changed

Lines changed: 70 additions & 11 deletions

File tree

docs/app.js

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ function dateTime(value) {
3232
return new Date(value).toLocaleString();
3333
}
3434

35-
function metricCard(title, value, detail) {
35+
function metricCard(title, value, detail, tooltip) {
36+
const tip = tooltip ? ` data-tooltip="${escapeHtml(tooltip)}"` : "";
3637
return `
37-
<article class="metric-card">
38+
<article class="metric-card"${tip}>
3839
<p class="metric-title">${title}</p>
3940
<p class="metric-value">${value}</p>
4041
<p class="metric-detail">${detail}</p>
@@ -107,47 +108,56 @@ function renderCards(data) {
107108
metricCard(
108109
"Direct",
109110
pct(overall.direct.pct),
110-
`${overall.direct.correct}/${overall.direct.total}`
111+
`${overall.direct.correct}/${overall.direct.total}`,
112+
"Accuracy when the model answers the question directly (open-ended, no answer choices provided)."
111113
),
112114
metricCard(
113115
"MCQ with refusal",
114116
pct(overall.mcq_with_refusal.pct),
115-
`${overall.mcq_with_refusal.correct}/${overall.mcq_with_refusal.total}`
117+
`${overall.mcq_with_refusal.correct}/${overall.mcq_with_refusal.total}`,
118+
"Accuracy on multiple-choice questions where \"I don't know\" is included as an answer option."
116119
),
117120
metricCard(
118121
"MCQ without refusal",
119122
pct(overall.mcq_without_refusal.pct),
120-
`${overall.mcq_without_refusal.correct}/${overall.mcq_without_refusal.total}`
123+
`${overall.mcq_without_refusal.correct}/${overall.mcq_without_refusal.total}`,
124+
"Accuracy on multiple-choice questions without an \"I don't know\" option, forcing a best guess."
121125
),
122126
metricCard(
123127
"MCQ Lift",
124128
pp(h.mcq_lift_pp),
125-
`Direct \u2192 MCQ w/o refusal`
129+
`Direct \u2192 MCQ w/o refusal`,
130+
"Percentage-point gain when switching from direct (open-ended) to MCQ without refusal. Shows how much answer choices help the model."
126131
),
127132
metricCard(
128133
"Refusal Gap",
129134
`${h.refusal_gap_pp}pp`,
130-
`MCQ w/o \u2192 MCQ w/ refusal`
135+
`MCQ w/o \u2192 MCQ w/ refusal`,
136+
"Percentage-point drop from MCQ without refusal to MCQ with refusal. Measures how often the model opts for \"I don't know\" when given the chance."
131137
),
132138
metricCard(
133139
"MCQ rescue rate",
134140
pct(rescue.rescued_pct),
135-
`${rescue.rescued}/${rescue.direct_wrong} direct misses rescued`
141+
`${rescue.rescued}/${rescue.direct_wrong} direct misses rescued`,
142+
"Of questions answered wrong in direct mode, the percentage that were answered correctly in MCQ without refusal mode."
136143
),
137144
metricCard(
138145
"Best repeat",
139146
pct(h.best_repeat_pct),
140-
h.best_repeat_label
147+
h.best_repeat_label,
148+
"The highest MCQ without refusal accuracy achieved by any single repeat run."
141149
),
142150
metricCard(
143151
"Always-correct questions",
144152
pct(consistency.always_correct_pct),
145-
`${consistency.always_correct}/${data.totals.questions} questions`
153+
`${consistency.always_correct}/${data.totals.questions} questions`,
154+
"Questions answered correctly in MCQ without refusal mode across every single repeat run."
146155
),
147156
metricCard(
148157
"Task groups at 100%",
149158
pct(h.task_groups_at_100_pct),
150-
`${h.task_groups_at_100}/${h.total_task_groups} task groups`
159+
`${h.task_groups_at_100}/${h.total_task_groups} task groups`,
160+
"Task groups where every question was answered correctly in MCQ without refusal across all runs."
151161
),
152162
].join("");
153163

@@ -674,3 +684,27 @@ init().catch((error) => {
674684
header.textContent = "Failed to load benchmark data.";
675685
console.error(error);
676686
});
687+
688+
// --- Floating tooltip for metric cards ---
689+
(function () {
690+
const tip = document.createElement("div");
691+
tip.className = "tip";
692+
document.body.appendChild(tip);
693+
694+
document.addEventListener("mouseover", (e) => {
695+
const card = e.target.closest("[data-tooltip]");
696+
if (!card) return;
697+
tip.textContent = card.dataset.tooltip;
698+
const r = card.getBoundingClientRect();
699+
tip.style.left = r.left + r.width / 2 + "px";
700+
tip.style.top = r.top - 8 + "px";
701+
tip.style.transform = "translate(-50%, -100%)";
702+
tip.classList.add("visible");
703+
});
704+
705+
document.addEventListener("mouseout", (e) => {
706+
const card = e.target.closest("[data-tooltip]");
707+
if (!card) return;
708+
tip.classList.remove("visible");
709+
});
710+
})();

docs/styles.css

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,31 @@ code {
131131
border: 1px solid var(--border);
132132
background: var(--surface);
133133
box-shadow: var(--shadow-soft);
134+
cursor: default;
135+
}
136+
137+
.metric-card[data-tooltip]:hover {
138+
border-color: var(--text-secondary);
139+
}
140+
141+
.tip {
142+
position: fixed;
143+
padding: 8px 12px;
144+
max-width: 300px;
145+
background: #1a1a1a;
146+
color: #f0f0f0;
147+
font-size: 12px;
148+
line-height: 1.5;
149+
border-radius: 6px;
150+
z-index: 1000;
151+
pointer-events: none;
152+
box-shadow: 0 4px 16px rgba(0,0,0,.2);
153+
opacity: 0;
154+
transition: opacity 0.15s;
155+
}
156+
157+
.tip.visible {
158+
opacity: 1;
134159
}
135160

136161
.metric-title {

0 commit comments

Comments
 (0)