Skip to content

Commit c0b7e9e

Browse files
authored
Merge pull request #5 from TabbyML/feat/add-tasks-page
feat: add tasks page and task aggregation script
2 parents 25b5b72 + c4b43d9 commit c0b7e9e

File tree

9 files changed

+5150
-47
lines changed

9 files changed

+5150
-47
lines changed

site/app/page.tsx

Lines changed: 81 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
import { Github, ChevronDown, Trophy, Search, Filter, ArrowUpRight, Terminal } from "lucide-react";
1+
import { Github, ChevronDown, Trophy, Search, Filter, ArrowUpRight, Terminal, ListTree } from "lucide-react";
22
import { clsx, type ClassValue } from "clsx";
33
import { twMerge } from "tailwind-merge";
4-
import resultData from "./result-data";
4+
import Link from "next/link";
5+
import tasksData from "../tasks.json";
56

67
function cn(...inputs: ClassValue[]) {
78
return twMerge(clsx(inputs));
@@ -54,30 +55,69 @@ function ScoreCell({ value }: { value: number }) {
5455
}
5556

5657
export default async function Home() {
57-
const data = Object.entries(resultData.evals)
58-
.map(([key, val], index) => {
59-
let parts = key.split("__");
60-
let agentRaw = parts[0] || "Unknown";
61-
let model = parts[1] || "Unknown";
62-
63-
if (parts.length === 2) {
64-
agentRaw = "Codex";
65-
model = parts[0];
58+
// Process tasks.json to compute leaderboard stats
59+
const statsMap = new Map<string, {
60+
passed: number;
61+
total: number;
62+
totalLatency: number;
63+
latencyCount: number;
64+
model: string;
65+
agent: string;
66+
}>();
67+
68+
Object.values(tasksData).forEach((trials: any[]) => {
69+
trials.forEach(trial => {
70+
// Simplify model name
71+
const modelName = trial.model.split('/').pop() || trial.model;
72+
const agentName = trial.agent.charAt(0).toUpperCase() + trial.agent.slice(1);
73+
74+
const key = `${modelName}-${agentName}`;
75+
76+
if (!statsMap.has(key)) {
77+
statsMap.set(key, {
78+
passed: 0,
79+
total: 0,
80+
totalLatency: 0,
81+
latencyCount: 0,
82+
model: modelName,
83+
agent: agentName
84+
});
6685
}
86+
87+
const stats = statsMap.get(key)!;
88+
stats.total += 1;
89+
if (trial.passed) {
90+
stats.passed += 1;
91+
}
92+
if (trial.latency_sec) {
93+
stats.totalLatency += trial.latency_sec;
94+
stats.latencyCount += 1;
95+
}
96+
});
97+
});
6798

68-
const agent = agentRaw.charAt(0).toUpperCase() + agentRaw.slice(1);
69-
99+
const data = Array.from(statsMap.values())
100+
.map((stats, index) => {
101+
const successRate = stats.total > 0 ? Math.round((stats.passed / stats.total) * 100) : 0;
102+
const avgLatency = stats.latencyCount > 0 ? stats.totalLatency / stats.latencyCount : 0;
70103
return {
71104
id: String(index + 1),
72-
model: model,
73-
agent: agent,
74-
passedEvals: Math.round(val.metrics[0].mean * val.n_trials),
75-
successRate: Math.round(val.metrics[0].mean * 100),
76-
isNew: index === 0,
105+
model: stats.model,
106+
agent: stats.agent,
107+
passedEvals: stats.passed,
108+
successRate: successRate,
109+
avgLatency: avgLatency,
110+
isNew: index === 0, // This logic might need updating if 'isNew' has a specific meaning
77111
};
78112
})
79113
.sort((a, b) => b.successRate - a.successRate);
80114

115+
// Re-assign IDs based on sorted order and adjust isNew
116+
data.forEach((item, index) => {
117+
item.id = String(index + 1);
118+
item.isNew = index === 0; // Keeping the original visual effect for the top item
119+
});
120+
81121
return (
82122
<div className="min-h-screen bg-background text-foreground font-sans selection:bg-primary/20">
83123
{/* Background Gradient Effect */}
@@ -108,24 +148,32 @@ export default async function Home() {
108148
<div className="h-4 w-px bg-border"></div>
109149
<span className="flex items-center gap-2">
110150
<Terminal className="w-4 h-4" />
111-
<span>Last run: {new Date(resultData.startedAt).toLocaleDateString()}</span>
151+
<span>Last run: {new Date().toLocaleDateString()}</span>
112152
</span>
113153
</div>
114154
</div>
115155

116156
{/* Controls & Filters */}
117-
<div className="flex flex-col md:flex-row justify-between items-center mb-6 gap-4">
157+
<div className="flex flex-col md:flex-row justify-between items-start md:items-center mb-6 gap-4">
118158
<h2 className="text-2xl font-semibold flex items-center gap-2">
119159
Agent Performance
120160
</h2>
121161

122-
<div className="flex items-center gap-3">
123-
<div className="relative">
162+
<div className="flex flex-col sm:flex-row items-stretch sm:items-center gap-4 w-full md:w-auto">
163+
<Link
164+
href="./tasks"
165+
className="flex items-center justify-center gap-2 px-4 py-2 border border-border bg-card/50 hover:bg-secondary/50 text-foreground rounded-lg text-sm font-medium transition-colors shadow-sm backdrop-blur-sm whitespace-nowrap"
166+
>
167+
<ListTree className="w-4 h-4" />
168+
View Tasks
169+
</Link>
170+
171+
<div className="relative w-full sm:w-auto">
124172
<Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-muted-foreground" />
125173
<input
126174
type="text"
127175
placeholder="Search agents..."
128-
className="pl-9 pr-4 py-2 bg-card border border-border rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-primary/20 w-64"
176+
className="pl-9 pr-4 py-2 bg-card border border-border rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-primary/20 w-full sm:w-64 transition-all"
129177
/>
130178
</div>
131179
</div>
@@ -137,10 +185,11 @@ export default async function Home() {
137185
<table className="w-full text-sm text-left">
138186
<thead className="bg-secondary/50 text-muted-foreground font-medium border-b border-border">
139187
<tr>
140-
<th className="px-6 py-4 w-[30%]">Model</th>
141-
<th className="px-6 py-4 w-[20%]">Agent</th>
188+
<th className="px-6 py-4 w-[25%]">Model</th>
189+
<th className="px-6 py-4 w-[15%]">Agent</th>
142190
<th className="px-6 py-4 w-[15%] text-center">Passed</th>
143-
<th className="px-6 py-4 w-[35%]">Success Rate</th>
191+
<th className="px-6 py-4 w-[15%] text-right">Avg Latency</th>
192+
<th className="px-6 py-4 w-[30%]">Success Rate</th>
144193
</tr>
145194
</thead>
146195
<tbody className="divide-y divide-border/50">
@@ -174,8 +223,13 @@ export default async function Home() {
174223
<td className="px-6 py-4 text-center text-muted-foreground font-mono">
175224
{row.passedEvals}
176225
</td>
226+
<td className="px-6 py-4 text-right text-muted-foreground font-mono">
227+
{row.avgLatency > 0 ? `${row.avgLatency.toFixed(1)}s` : '-'}
228+
</td>
177229
<td className="px-6 py-4">
178-
<ScoreCell value={row.successRate} />
230+
<Link href={`./tasks?model=${encodeURIComponent(row.model)}&agent=${encodeURIComponent(row.agent.toLowerCase())}`} className="block w-full hover:opacity-80 transition-opacity">
231+
<ScoreCell value={row.successRate} />
232+
</Link>
179233
</td>
180234
</tr>
181235
))}

site/app/result-data.ts

Lines changed: 0 additions & 18 deletions
This file was deleted.

0 commit comments

Comments
 (0)