33 < head >
44 < meta charset ="UTF-8 " />
55 < meta name ="viewport " content ="width=device-width, initial-scale=1.0 " />
6- < title > PhyloBioBixBench-Verified-50 | Bio Benchmark Results</ title >
6+ < title > BIOS Benchmark Results</ title >
77 < meta
88 name ="description "
9- content ="Interactive dashboard for PhyloBioBixBench -Verified-50 benchmark results — direct, MCQ, and refusal-aware grading across 50 biology questions. "
9+ content ="Interactive dashboard for BixBench -Verified-50 benchmark results — direct, MCQ, and refusal-aware grading across 50 biology questions. "
1010 />
1111
1212 <!-- Favicon -->
1313 < link rel ="icon " type ="image/svg+xml " href ="assets/favicon.svg " />
1414
1515 <!-- Open Graph -->
1616 < meta property ="og:type " content ="website " />
17- < meta
18- property ="og:title "
19- content ="PhyloBioBixBench-Verified-50 | Bio Benchmark "
20- />
17+ < meta property ="og:title " content ="BIOS Benchmark Results " />
2118 < meta
2219 property ="og:description "
23- content ="Interactive dashboard for PhyloBioBixBench -Verified-50 benchmark results — direct, MCQ, and refusal-aware grading across 50 biology questions. "
20+ content ="Interactive dashboard for BixBench -Verified-50 benchmark results — direct, MCQ, and refusal-aware grading across 50 biology questions. "
2421 />
2522 < meta
2623 property ="og:url "
3027
3128 <!-- Twitter Card -->
3229 < meta name ="twitter:card " content ="summary " />
33- < meta
34- name ="twitter:title "
35- content ="PhyloBioBixBench-Verified-50 | Bio Benchmark "
36- />
30+ < meta name ="twitter:title " content ="BIOS Benchmark Results " />
3731 < meta
3832 name ="twitter:description "
39- content ="Interactive dashboard for PhyloBioBixBench -Verified-50 benchmark results — direct, MCQ, and refusal-aware grading across 50 biology questions. "
33+ content ="Interactive dashboard for BixBench -Verified-50 benchmark results — direct, MCQ, and refusal-aware grading across 50 biology questions. "
4034 />
4135
4236 < link rel ="preconnect " href ="https://fonts.googleapis.com " />
5549 <!-- 1. Hero -->
5650 < header class ="hero ">
5751 < p class ="eyebrow ">
58- < a href ="https://ai.bio.xyz/ " class ="eyebrow-link "> ai.bio.xyz</ a >
52+ < a
53+ href ="https://ai.bio.xyz/ "
54+ class ="eyebrow-link "
55+ target ="_blank "
56+ rel ="noopener noreferrer "
57+ > ai.bio.xyz</ a
58+ >
5959 </ p >
60- < h1 > PhyloBioBixBench -Verified-50</ h1 >
60+ < h1 > BixBench -Verified-50</ h1 >
6161 < div class ="hero-meta ">
6262 < span id ="generated-at "> Loading data...</ span >
6363 < span id ="totals-badge "> </ span >
6464 </ div >
6565 < nav class ="hero-links ">
66- < a href ="https://ai.bio.xyz/ " class ="hero-link "> ai.bio.xyz</ a >
67- < a href ="https://github.com/bio-xyz/bio-benchmark " class ="hero-link "
68- > GitHub</ a
66+ < a
67+ href ="https://ai.bio.xyz/ "
68+ class ="hero-link "
69+ target ="_blank "
70+ rel ="noopener noreferrer "
71+ > < span class ="hero-link-icon " aria-hidden ="true ">
72+ < svg viewBox ="0 0 24 24 " focusable ="false ">
73+ < circle cx ="12 " cy ="12 " r ="9 "> </ circle >
74+ < path d ="M3 12h18 "> </ path >
75+ < path d ="M12 3c2.8 2.4 4.5 5.6 4.5 9s-1.7 6.6-4.5 9 "> </ path >
76+ < path d ="M12 3c-2.8 2.4-4.5 5.6-4.5 9s1.7 6.6 4.5 9 "> </ path >
77+ </ svg >
78+ </ span >
79+ < span class ="hero-link-label "> ai.bio.xyz</ span > </ a
80+ >
81+ < a
82+ href ="https://github.com/bio-xyz/bio-benchmark "
83+ class ="hero-link "
84+ target ="_blank "
85+ rel ="noopener noreferrer "
86+ > < span class ="hero-link-icon " aria-hidden ="true ">
87+ < svg viewBox ="0 0 24 24 " focusable ="false ">
88+ < path
89+ d ="M9 19c-5 1.5-5-2.5-7-3m14 6v-3.87a3.36 3.36 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.36 3.36 0 0 0 9 18.13V22 "
90+ > </ path >
91+ </ svg >
92+ </ span >
93+ < span class ="hero-link-label "> GitHub</ span > </ a
94+ >
95+ < a
96+ href ="https://huggingface.co/datasets/phylobio/BixBench-Verified-50 "
97+ class ="hero-link "
98+ target ="_blank "
99+ rel ="noopener noreferrer "
100+ > < span class ="hero-link-icon " aria-hidden ="true ">
101+ < svg viewBox ="0 0 24 24 " focusable ="false ">
102+ < path d ="M4 19h16 "> </ path >
103+ < path d ="M6 15.5 10.5 11l3.5 3.5L19 8 "> </ path >
104+ < path d ="m16 8 3-.2-.2 3 "> </ path >
105+ </ svg >
106+ </ span >
107+ < span class ="hero-link-label "> Hugging Face</ span > </ a
69108 >
70109 </ nav >
71110 </ header >
@@ -83,13 +122,19 @@ <h2>Grading Modes</h2>
83122 aria-label ="Headline metrics "
84123 > </ section >
85124
86- <!-- 4. Key Takeaways -->
125+ <!-- 4. Scientific Reporting -->
126+ < section class ="insight-panel " id ="reporting-section ">
127+ < h2 > Scientific Reporting</ h2 >
128+ < div id ="reporting-content " class ="reporting-grid "> </ div >
129+ </ section >
130+
131+ <!-- 5. Key Takeaways -->
87132 < section class ="insight-panel " id ="takeaways-section ">
88133 < h2 > Key Takeaways</ h2 >
89134 < ol id ="takeaways-list " class ="takeaway-list "> </ ol >
90135 </ section >
91136
92- <!-- 5 . Source Runs + Downloads -->
137+ <!-- 6 . Source Runs + Downloads -->
93138 < section class ="panel source-panel ">
94139 < div >
95140 < h2 > Scoped Source Runs</ h2 >
@@ -111,7 +156,7 @@ <h2>Downloads</h2>
111156 </ div >
112157 </ section >
113158
114- <!-- 6 . Performance Image -->
159+ <!-- 7 . Performance Image -->
115160 < section class ="panel ">
116161 < h2 > Existing Performance Figure</ h2 >
117162 < p class ="panel-intro ">
@@ -125,7 +170,7 @@ <h2>Existing Performance Figure</h2>
125170 />
126171 </ section >
127172
128- <!-- 7 . Charts Grid (6 existing + 1 new task group chart) -->
173+ <!-- 8 . Charts Grid (6 existing + 1 new task group chart) -->
129174 < section class ="charts ">
130175 < article class ="panel chart-panel ">
131176 < h2 > Overall Accuracy</ h2 >
@@ -179,7 +224,7 @@ <h2>Task Group Accuracy (32 groups)</h2>
179224 </ article >
180225 </ section >
181226
182- <!-- 8 . Strengths & Weaknesses -->
227+ <!-- 9 . Strengths & Weaknesses -->
183228 < section class ="insight-panel " id ="sw-section ">
184229 < h2 > Strengths & Weaknesses</ h2 >
185230 < div class ="sw-grid ">
@@ -194,7 +239,7 @@ <h3>Weaknesses</h3>
194239 </ div >
195240 </ section >
196241
197- <!-- 9 . All 50 Questions Table -->
242+ <!-- 10 . All 50 Questions Table -->
198243 < section class ="panel ">
199244 < h2 > All 50 Questions</ h2 >
200245 < div class ="table-scroll ">
@@ -215,7 +260,7 @@ <h2>All 50 Questions</h2>
215260 </ div >
216261 </ section >
217262
218- <!-- 10 . All 32 Task Groups Table -->
263+ <!-- 11 . All 32 Task Groups Table -->
219264 < section class ="panel ">
220265 < h2 > All 32 Task Groups</ h2 >
221266 < div class ="table-scroll ">
@@ -235,13 +280,13 @@ <h2>All 32 Task Groups</h2>
235280 </ div >
236281 </ section >
237282
238- <!-- 11 . MCQ Rescue Detail -->
283+ <!-- 12 . MCQ Rescue Detail -->
239284 < section class ="panel " id ="rescue-detail-section ">
240285 < h2 > MCQ Rescue Detail</ h2 >
241286 < div id ="rescue-detail-content "> </ div >
242287 </ section >
243288
244- <!-- 12 . Cross-Run Variability -->
289+ <!-- 13 . Cross-Run Variability -->
245290 < section class ="panel " id ="variability-section ">
246291 < h2 > Cross-Run Variability</ h2 >
247292 < p class ="panel-intro ">
@@ -256,12 +301,6 @@ <h2>Cross-Run Variability</h2>
256301 </ div >
257302 </ section >
258303
259- <!-- 13. Scientific Reporting -->
260- < section class ="insight-panel " id ="reporting-section ">
261- < h2 > Scientific Reporting</ h2 >
262- < div id ="reporting-content " class ="reporting-grid "> </ div >
263- </ section >
264-
265304 <!-- 14. Best Repeats Table (all 11) -->
266305 < section class ="panel ">
267306 < h2 > Best Repeats</ h2 >
0 commit comments