Skip to content

Commit f64b018

Browse files
authored
Merge pull request #7 from TabbyML/feat/search-models-agents
feat: support searching models and agents
2 parents f4768bf + 3d1c4c2 commit f64b018

File tree

3 files changed

+282
-243
lines changed

3 files changed

+282
-243
lines changed
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
"use client";
2+
3+
import { useState, useMemo } from "react";
4+
import { Search, Trophy, ListTree } from "lucide-react";
5+
import { clsx, type ClassValue } from "clsx";
6+
import { twMerge } from "tailwind-merge";
7+
import Link from "next/link";
8+
9+
function cn(...inputs: ClassValue[]) {
10+
return twMerge(clsx(inputs));
11+
}
12+
13+
export interface LeaderboardEntry {
14+
id: string;
15+
model: string;
16+
agent: string;
17+
passedEvals: number;
18+
successRate: number;
19+
avgLatency: number;
20+
isNew?: boolean;
21+
}
22+
23+
function ProgressBar({ value, colorClass }: { value: number; colorClass: string }) {
24+
return (
25+
<div className="h-2 w-24 bg-secondary rounded-full overflow-hidden">
26+
<div
27+
className={cn("h-full transition-all duration-500 ease-out", colorClass)}
28+
style={{ width: `${value}%` }}
29+
/>
30+
</div>
31+
);
32+
}
33+
34+
function ScoreCell({ value }: { value: number }) {
35+
let colorClass = "bg-primary";
36+
let textClass = "text-muted-foreground";
37+
38+
if (value >= 90) {
39+
colorClass = "bg-emerald-500";
40+
textClass = "text-emerald-500 font-bold";
41+
} else if (value >= 75) {
42+
colorClass = "bg-blue-500";
43+
textClass = "text-blue-500 font-medium";
44+
} else if (value >= 60) {
45+
colorClass = "bg-amber-500";
46+
textClass = "text-amber-500";
47+
} else {
48+
colorClass = "bg-red-500";
49+
textClass = "text-red-500";
50+
}
51+
52+
return (
53+
<div className="flex items-center gap-3">
54+
<span className={cn("w-12 text-right", textClass)}>{value}%</span>
55+
<ProgressBar value={value} colorClass={colorClass} />
56+
</div>
57+
);
58+
}
59+
60+
export default function LeaderboardTable({ data }: { data: LeaderboardEntry[] }) {
61+
const [searchQuery, setSearchQuery] = useState("");
62+
63+
const filteredData = useMemo(() => {
64+
let processedData = data;
65+
66+
if (searchQuery) {
67+
const query = searchQuery.toLowerCase();
68+
processedData = processedData.filter(item =>
69+
item.model.toLowerCase().includes(query) ||
70+
item.agent.toLowerCase().includes(query)
71+
);
72+
}
73+
74+
return processedData;
75+
}, [data, searchQuery]);
76+
77+
return (
78+
<>
79+
{/* Controls & Filters */}
80+
<div className="flex flex-col md:flex-row justify-between items-start md:items-center mb-6 gap-4">
81+
<h2 className="text-2xl font-semibold flex items-center gap-2">
82+
Agent Performance
83+
</h2>
84+
85+
<div className="flex flex-col sm:flex-row items-stretch sm:items-center gap-4 w-full md:w-auto">
86+
<Link
87+
href="./tasks"
88+
className="flex items-center justify-center gap-2 px-4 py-2 border border-border bg-card/50 hover:bg-secondary/50 text-foreground rounded-lg text-sm font-medium transition-colors shadow-sm backdrop-blur-sm whitespace-nowrap"
89+
>
90+
<ListTree className="w-4 h-4" />
91+
View Tasks
92+
</Link>
93+
94+
<div className="relative w-full sm:w-auto">
95+
<Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-muted-foreground" />
96+
<input
97+
type="text"
98+
placeholder="Search models or agents..."
99+
value={searchQuery}
100+
onChange={(e) => setSearchQuery(e.target.value)}
101+
className="pl-9 pr-4 py-2 bg-card border border-border rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-primary/20 w-full sm:w-64 transition-all"
102+
/>
103+
</div>
104+
</div>
105+
</div>
106+
107+
{/* Leaderboard Table */}
108+
<div className="rounded-xl border border-border bg-card/50 backdrop-blur-sm shadow-xl overflow-hidden">
109+
<div className="overflow-x-auto">
110+
<table className="w-full text-sm text-left">
111+
<thead className="bg-secondary/50 text-muted-foreground font-medium border-b border-border">
112+
<tr>
113+
<th className="px-6 py-4 w-[25%]">Model</th>
114+
<th className="px-6 py-4 w-[15%]">Agent</th>
115+
<th className="px-6 py-4 w-[15%] text-center">Passed</th>
116+
<th className="px-6 py-4 w-[15%] text-right">Avg Duration</th>
117+
<th className="px-6 py-4 w-[30%]">Success Rate</th>
118+
</tr>
119+
</thead>
120+
<tbody className="divide-y divide-border/50">
121+
{filteredData.length === 0 ? (
122+
<tr>
123+
<td colSpan={5} className="px-6 py-8 text-center text-muted-foreground">
124+
No results found matching your search.
125+
</td>
126+
</tr>
127+
) : (
128+
filteredData.map((row, index) => (
129+
<tr
130+
key={row.id}
131+
className="group hover:bg-secondary/30 transition-colors duration-200"
132+
>
133+
<td className="px-6 py-4 font-medium text-foreground flex items-center gap-3">
134+
<span className="w-6 text-muted-foreground/50 text-xs">#{index + 1}</span>
135+
<div className="flex flex-col">
136+
<span className="flex items-center gap-2">
137+
{row.model}
138+
{index === 0 && <Trophy className="w-3 h-3 text-yellow-500" />}
139+
{row.isNew && (
140+
<span className="px-1.5 py-0.5 rounded text-[10px] font-bold bg-blue-500/10 text-blue-500 border border-blue-500/20">
141+
NEW
142+
</span>
143+
)}
144+
</span>
145+
</div>
146+
</td>
147+
<td className="px-6 py-4 text-muted-foreground">
148+
<div className="flex items-center gap-2">
149+
<div className="w-6 h-6 rounded-full bg-secondary flex items-center justify-center text-[10px] font-bold border border-border">
150+
{row.agent.substring(0, 1).toUpperCase()}
151+
</div>
152+
{row.agent}
153+
</div>
154+
</td>
155+
<td className="px-6 py-4 text-center text-muted-foreground font-mono">
156+
{row.passedEvals}
157+
</td>
158+
<td className="px-6 py-4 text-right text-muted-foreground font-mono">
159+
{row.avgLatency > 0 ? `${row.avgLatency.toFixed(1)}s` : '-'}
160+
</td>
161+
<td className="px-6 py-4">
162+
<Link href={`./tasks?model=${encodeURIComponent(row.model)}&agent=${encodeURIComponent(row.agent.toLowerCase())}`} className="block w-full hover:opacity-80 transition-opacity">
163+
<ScoreCell value={row.successRate} />
164+
</Link>
165+
</td>
166+
</tr>
167+
)))}
168+
</tbody>
169+
</table>
170+
</div>
171+
</div>
172+
</>
173+
);
174+
}

site/app/(home)/page.tsx

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import { Github, Terminal } from "lucide-react";
2+
import tasksData from "../../tasks.json";
3+
import LeaderboardTable, { type LeaderboardEntry } from "./components/leaderboard-table";
4+
5+
export default function Home() {
6+
// Process tasks.json to compute leaderboard stats directly on the server
7+
const statsMap = new Map<string, {
8+
passed: number;
9+
total: number;
10+
totalLatency: number;
11+
latencyCount: number;
12+
model: string;
13+
agent: string;
14+
}>();
15+
16+
Object.values(tasksData).forEach((trials: any[]) => {
17+
trials.forEach(trial => {
18+
// Simplify model name
19+
const modelName = trial.model.split('/').pop() || trial.model;
20+
const agentName = trial.agent.charAt(0).toUpperCase() + trial.agent.slice(1);
21+
22+
const key = `${modelName}-${agentName}`;
23+
24+
if (!statsMap.has(key)) {
25+
statsMap.set(key, {
26+
passed: 0,
27+
total: 0,
28+
totalLatency: 0,
29+
latencyCount: 0,
30+
model: modelName,
31+
agent: agentName
32+
});
33+
}
34+
35+
const stats = statsMap.get(key)!;
36+
stats.total += 1;
37+
if (trial.passed) {
38+
stats.passed += 1;
39+
}
40+
if (trial.latency_sec) {
41+
stats.totalLatency += trial.latency_sec;
42+
stats.latencyCount += 1;
43+
}
44+
});
45+
});
46+
47+
const data: LeaderboardEntry[] = Array.from(statsMap.values())
48+
.map((stats, index) => {
49+
const successRate = stats.total > 0 ? Math.round((stats.passed / stats.total) * 100) : 0;
50+
const avgLatency = stats.latencyCount > 0 ? stats.totalLatency / stats.latencyCount : 0;
51+
return {
52+
id: String(index + 1),
53+
model: stats.model,
54+
agent: stats.agent,
55+
passedEvals: stats.passed,
56+
successRate: successRate,
57+
avgLatency: avgLatency,
58+
};
59+
})
60+
.sort((a, b) => b.successRate - a.successRate);
61+
62+
// Re-assign IDs based on sorted order and adjust isNew
63+
data.forEach((item, index) => {
64+
item.id = String(index + 1);
65+
item.isNew = index === 0; // Keeping the original visual effect for the top item
66+
});
67+
68+
return (
69+
<div className="min-h-screen bg-background text-foreground font-sans selection:bg-primary/20">
70+
{/* Background Gradient Effect */}
71+
<div className="fixed inset-0 -z-10 h-full w-full bg-background bg-[radial-gradient(#2a2a2a_1px,transparent_1px)] [background-size:16px_16px] [mask-image:radial-gradient(ellipse_50%_50%_at_50%_50%,#000_70%,transparent_100%)] opacity-20 dark:opacity-40"></div>
72+
73+
<div className="container mx-auto px-4 py-16 max-w-6xl">
74+
{/* Header Section */}
75+
<div className="text-center mb-16 space-y-6">
76+
<div className="inline-flex items-center justify-center p-1.5 rounded-full bg-secondary/50 backdrop-blur-sm border border-border mb-4">
77+
<span className="flex h-2 w-2 rounded-full bg-emerald-500 mx-2 animate-pulse"></span>
78+
<span className="text-xs font-medium px-2">Live Benchmarks</span>
79+
</div>
80+
81+
<h1 className="text-5xl md:text-7xl font-bold tracking-tight bg-clip-text text-transparent bg-gradient-to-b from-foreground to-foreground/50 pb-2">
82+
JJ Benchmark
83+
</h1>
84+
85+
<p className="text-lg text-muted-foreground max-w-2xl mx-auto leading-relaxed">
86+
Performance results of AI coding agents on Jujutsu tasks,
87+
measuring success rate and execution time with high precision.
88+
</p>
89+
90+
<div className="flex items-center justify-center gap-6 text-sm text-muted-foreground pt-4">
91+
<a href="https://github.com/TabbyML/jj-benchmark" target="_blank" rel="noopener noreferrer" className="flex items-center gap-2 hover:text-primary transition-colors">
92+
<Github className="w-4 h-4" />
93+
<span>View on GitHub</span>
94+
</a>
95+
<div className="h-4 w-px bg-border"></div>
96+
<span className="flex items-center gap-2">
97+
<Terminal className="w-4 h-4" />
98+
<span>Last run: {new Date().toLocaleDateString()}</span>
99+
</span>
100+
</div>
101+
</div>
102+
103+
{/* Client Component for Interactive Table */}
104+
<LeaderboardTable data={data} />
105+
</div>
106+
</div>
107+
);
108+
}

0 commit comments

Comments
 (0)