1- import { Github , ChevronDown , Trophy , Search , Filter , ArrowUpRight , Terminal } from "lucide-react" ;
1+ import { Github , ChevronDown , Trophy , Search , Filter , ArrowUpRight , Terminal , ListTree } from "lucide-react" ;
22import { clsx , type ClassValue } from "clsx" ;
33import { twMerge } from "tailwind-merge" ;
4- import resultData from "./result-data" ;
4+ import Link from "next/link" ;
5+ import tasksData from "../tasks.json" ;
56
67function cn ( ...inputs : ClassValue [ ] ) {
78 return twMerge ( clsx ( inputs ) ) ;
@@ -54,30 +55,69 @@ function ScoreCell({ value }: { value: number }) {
5455}
5556
5657export default async function Home ( ) {
57- const data = Object . entries ( resultData . evals )
58- . map ( ( [ key , val ] , index ) => {
59- let parts = key . split ( "__" ) ;
60- let agentRaw = parts [ 0 ] || "Unknown" ;
61- let model = parts [ 1 ] || "Unknown" ;
62-
63- if ( parts . length === 2 ) {
64- agentRaw = "Codex" ;
65- model = parts [ 0 ] ;
58+ // Process tasks.json to compute leaderboard stats
59+ const statsMap = new Map < string , {
60+ passed : number ;
61+ total : number ;
62+ totalLatency : number ;
63+ latencyCount : number ;
64+ model : string ;
65+ agent : string ;
66+ } > ( ) ;
67+
68+ Object . values ( tasksData ) . forEach ( ( trials : any [ ] ) => {
69+ trials . forEach ( trial => {
70+ // Simplify model name
71+ const modelName = trial . model . split ( '/' ) . pop ( ) || trial . model ;
72+ const agentName = trial . agent . charAt ( 0 ) . toUpperCase ( ) + trial . agent . slice ( 1 ) ;
73+
74+ const key = `${ modelName } -${ agentName } ` ;
75+
76+ if ( ! statsMap . has ( key ) ) {
77+ statsMap . set ( key , {
78+ passed : 0 ,
79+ total : 0 ,
80+ totalLatency : 0 ,
81+ latencyCount : 0 ,
82+ model : modelName ,
83+ agent : agentName
84+ } ) ;
6685 }
86+
87+ const stats = statsMap . get ( key ) ! ;
88+ stats . total += 1 ;
89+ if ( trial . passed ) {
90+ stats . passed += 1 ;
91+ }
92+ if ( trial . latency_sec ) {
93+ stats . totalLatency += trial . latency_sec ;
94+ stats . latencyCount += 1 ;
95+ }
96+ } ) ;
97+ } ) ;
6798
68- const agent = agentRaw . charAt ( 0 ) . toUpperCase ( ) + agentRaw . slice ( 1 ) ;
69-
99+ const data = Array . from ( statsMap . values ( ) )
100+ . map ( ( stats , index ) => {
101+ const successRate = stats . total > 0 ? Math . round ( ( stats . passed / stats . total ) * 100 ) : 0 ;
102+ const avgLatency = stats . latencyCount > 0 ? stats . totalLatency / stats . latencyCount : 0 ;
70103 return {
71104 id : String ( index + 1 ) ,
72- model : model ,
73- agent : agent ,
74- passedEvals : Math . round ( val . metrics [ 0 ] . mean * val . n_trials ) ,
75- successRate : Math . round ( val . metrics [ 0 ] . mean * 100 ) ,
76- isNew : index === 0 ,
105+ model : stats . model ,
106+ agent : stats . agent ,
107+ passedEvals : stats . passed ,
108+ successRate : successRate ,
109+ avgLatency : avgLatency ,
110+ isNew : index === 0 , // This logic might need updating if 'isNew' has a specific meaning
77111 } ;
78112 } )
79113 . sort ( ( a , b ) => b . successRate - a . successRate ) ;
80114
115+ // Re-assign IDs based on sorted order and adjust isNew
116+ data . forEach ( ( item , index ) => {
117+ item . id = String ( index + 1 ) ;
118+ item . isNew = index === 0 ; // Keeping the original visual effect for the top item
119+ } ) ;
120+
81121 return (
82122 < div className = "min-h-screen bg-background text-foreground font-sans selection:bg-primary/20" >
83123 { /* Background Gradient Effect */ }
@@ -108,24 +148,32 @@ export default async function Home() {
108148 < div className = "h-4 w-px bg-border" > </ div >
109149 < span className = "flex items-center gap-2" >
110150 < Terminal className = "w-4 h-4" />
111- < span > Last run: { new Date ( resultData . startedAt ) . toLocaleDateString ( ) } </ span >
151+ < span > Last run: { new Date ( ) . toLocaleDateString ( ) } </ span >
112152 </ span >
113153 </ div >
114154 </ div >
115155
116156 { /* Controls & Filters */ }
117- < div className = "flex flex-col md:flex-row justify-between items-center mb-6 gap-4" >
157+ < div className = "flex flex-col md:flex-row justify-between items-start md:items- center mb-6 gap-4" >
118158 < h2 className = "text-2xl font-semibold flex items-center gap-2" >
119159 Agent Performance
120160 </ h2 >
121161
122- < div className = "flex items-center gap-3" >
123- < div className = "relative" >
162+ < div className = "flex flex-col sm:flex-row items-stretch sm:items-center gap-4 w-full md:w-auto" >
163+ < Link
164+ href = "./tasks"
165+ className = "flex items-center justify-center gap-2 px-4 py-2 border border-border bg-card/50 hover:bg-secondary/50 text-foreground rounded-lg text-sm font-medium transition-colors shadow-sm backdrop-blur-sm whitespace-nowrap"
166+ >
167+ < ListTree className = "w-4 h-4" />
168+ View Tasks
169+ </ Link >
170+
171+ < div className = "relative w-full sm:w-auto" >
124172 < Search className = "absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-muted-foreground" />
125173 < input
126174 type = "text"
127175 placeholder = "Search agents..."
128- className = "pl-9 pr-4 py-2 bg-card border border-border rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-primary/20 w-64 "
176+ className = "pl-9 pr-4 py-2 bg-card border border-border rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-primary/20 w-full sm:w-64 transition-all "
129177 />
130178 </ div >
131179 </ div >
@@ -137,10 +185,11 @@ export default async function Home() {
137185 < table className = "w-full text-sm text-left" >
138186 < thead className = "bg-secondary/50 text-muted-foreground font-medium border-b border-border" >
139187 < tr >
140- < th className = "px-6 py-4 w-[30 %]" > Model</ th >
141- < th className = "px-6 py-4 w-[20 %]" > Agent</ th >
188+ < th className = "px-6 py-4 w-[25 %]" > Model</ th >
189+ < th className = "px-6 py-4 w-[15 %]" > Agent</ th >
142190 < th className = "px-6 py-4 w-[15%] text-center" > Passed</ th >
143- < th className = "px-6 py-4 w-[35%]" > Success Rate</ th >
191+ < th className = "px-6 py-4 w-[15%] text-right" > Avg Latency</ th >
192+ < th className = "px-6 py-4 w-[30%]" > Success Rate</ th >
144193 </ tr >
145194 </ thead >
146195 < tbody className = "divide-y divide-border/50" >
@@ -174,8 +223,13 @@ export default async function Home() {
174223 < td className = "px-6 py-4 text-center text-muted-foreground font-mono" >
175224 { row . passedEvals }
176225 </ td >
226+ < td className = "px-6 py-4 text-right text-muted-foreground font-mono" >
227+ { row . avgLatency > 0 ? `${ row . avgLatency . toFixed ( 1 ) } s` : '-' }
228+ </ td >
177229 < td className = "px-6 py-4" >
178- < ScoreCell value = { row . successRate } />
230+ < Link href = { `./tasks?model=${ encodeURIComponent ( row . model ) } &agent=${ encodeURIComponent ( row . agent . toLowerCase ( ) ) } ` } className = "block w-full hover:opacity-80 transition-opacity" >
231+ < ScoreCell value = { row . successRate } />
232+ </ Link >
179233 </ td >
180234 </ tr >
181235 ) ) }
0 commit comments