Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions site/app/(home)/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@ import tasksData from "../../tasks.json";
import zealtConfig from "../../../zealt.json";
import LeaderboardTable, { type LeaderboardEntry } from "./components/leaderboard-table";

type TaskTrial = {
agent: string;
model: string;
passed: boolean;
latency_sec: number | null;
};

type TaskValue = {
trials?: TaskTrial[];
};

export default function Home() {
// Process tasks.json to compute leaderboard stats directly on the server
const statsMap = new Map<string, {
Expand All @@ -14,8 +25,16 @@ export default function Home() {
agent: string;
}>();

Object.values(tasksData).forEach((trials: any[]) => {
trials.forEach(trial => {
Object.values(tasksData as Record<string, unknown>).forEach((taskValue) => {
let trials: TaskTrial[] = [];
if (Array.isArray(taskValue)) {
trials = taskValue as TaskTrial[];
} else if (typeof taskValue === "object" && taskValue !== null) {
const task = taskValue as TaskValue;
trials = Array.isArray(task.trials) ? task.trials : [];
}

trials.forEach((trial) => {
// Simplify model name
const modelName = trial.model.split('/').pop() || trial.model;
const agentName = trial.agent.charAt(0).toUpperCase() + trial.agent.slice(1);
Expand All @@ -33,7 +52,10 @@ export default function Home() {
});
}

const stats = statsMap.get(key)!;
const stats = statsMap.get(key);
if (!stats) {
return;
}
stats.total += 1;
if (trial.passed) {
stats.passed += 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ export function TrajectoryPage({ title, trajectoryUrl, fallbackUrl }: Trajectory

useEffect(() => {
if (!trajectoryUrl) {
window.location.replace(fallbackUrl);
if (fallbackUrl) {
window.location.replace(fallbackUrl);
}
return;
}

Expand All @@ -32,7 +34,9 @@ export function TrajectoryPage({ title, trajectoryUrl, fallbackUrl }: Trajectory
};

const handleIframeError = () => {
window.location.replace(fallbackUrl);
if (fallbackUrl) {
window.location.replace(fallbackUrl);
}
}

if (trajectoryUrl) {
Expand Down
14 changes: 12 additions & 2 deletions site/app/tasks/[name]/[jobId]/trajectory/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ function isTrialEntry(value: unknown): value is TrialEntry {
}

function findTrialEntry(taskName: string, jobId: string): TrialEntry | null {
for (const trials of Object.values(tasksData as Record<string, unknown>)) {
for (const task of Object.values(tasksData as Record<string, unknown>)) {
if (typeof task !== "object" || task === null) {
continue;
}

const trials = (task as { trials?: unknown }).trials;
if (!Array.isArray(trials)) {
continue;
}
Expand Down Expand Up @@ -84,7 +89,12 @@ export const dynamicParams = false;
export function generateStaticParams(): RouteParams[] {
const params: RouteParams[] = [];

for (const trials of Object.values(tasksData as Record<string, unknown>)) {
for (const task of Object.values(tasksData as Record<string, unknown>)) {
if (typeof task !== "object" || task === null) {
continue;
}

const trials = (task as { trials?: unknown }).trials;
if (!Array.isArray(trials)) {
continue;
}
Expand Down
181 changes: 157 additions & 24 deletions site/app/tasks/page.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
"use client";

import { useState, useMemo, useEffect, Suspense } from "react";
import { Check, X as XIcon, Search, AlertTriangle, ArrowUpDown, ArrowUp, ArrowDown, Filter, X } from "lucide-react";
import { useState, useMemo, useEffect, Suspense, useCallback } from "react";
import {
Check,
X as XIcon,
Search,
AlertTriangle,
ArrowUpDown,
ArrowUp,
ArrowDown,
Filter,
X,
ExternalLink,
} from "lucide-react";
import Link from "next/link";
import { useRouter, useSearchParams, usePathname } from "next/navigation";
import { clsx, type ClassValue } from "clsx";
Expand All @@ -13,17 +24,35 @@ import {
HoverCardContent,
HoverCardTrigger,
} from "@/components/ui/hover-card";
import {
Sheet,
SheetContent,
SheetDescription,
SheetHeader,
SheetTitle,
} from "@/components/ui/sheet";
import {
Drawer,
DrawerContent,
DrawerDescription,
DrawerHeader,
DrawerTitle,
} from "@/components/ui/drawer";
import { Button } from "@/components/ui/button";
import { Skeleton } from "@/components/ui/skeleton";
import { MultiSelect } from "./components/multi-select";
import { BackToTop } from "./components/back-to-top";


function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs));
}

// Convert object to array and sort by task name
const tasksData = Object.entries(tasksDataRaw).map(([taskName, trials]) => {
const tasksData = Object.entries(tasksDataRaw).map(([taskName, { trials, instruction }]) => {
return {
taskName,
instruction,
trials: (trials as any[]).map(t => {
const trialNameParts = String(t.trial_name ?? "").split("__");
const taskName = trialNameParts[0] || "";
Expand All @@ -49,9 +78,24 @@ const allTrialsFlat = tasksData.flatMap(task =>
);

const allModels = Array.from(new Set(allTrialsFlat.map(tr => tr.model)));
const allAgents = Array.from(new Set(allTrialsFlat.map(tr => tr.agent)));
const allCombos = Array.from(new Set(allTrialsFlat.map(tr => `${tr.model} (${tr.agent})`))).sort();

function useMediaQuery(query: string) {
const [matches, setMatches] = useState(false);

useEffect(() => {
const media = window.matchMedia(query);
setMatches(media.matches);

const listener = (event: MediaQueryListEvent) => setMatches(event.matches);
media.addEventListener("change", listener);

return () => media.removeEventListener("change", listener);
}, [query]);

return matches;
}

function TasksContent() {
const router = useRouter();
const pathname = usePathname();
Expand All @@ -69,18 +113,13 @@ function TasksContent() {
const selectedAgents = queryAgentStr ? queryAgentStr.split(",") : [];

const [searchQuery, setSearchQuery] = useState(queryQ);
const [selectedTask, setSelectedTask] = useState<string | null>(null);
const [isInstructionOpen, setIsInstructionOpen] = useState(false);
const isDesktop = useMediaQuery("(min-width: 1024px)");

const hasActiveFilters = selectedStatuses.length > 0 || selectedModels.length > 0 || selectedAgents.length > 0 || searchQuery !== "" || querySort !== "default";

// Debounce search query to URL
useEffect(() => {
const timer = setTimeout(() => {
updateParams({ q: searchQuery });
}, 300);
return () => clearTimeout(timer);
}, [searchQuery]);

const updateParams = (updates: Record<string, string | null>) => {
const updateParams = useCallback((updates: Record<string, string | null>) => {
const params = new URLSearchParams(searchParams.toString());
Object.entries(updates).forEach(([key, value]) => {
if (value === null || value === "" || value === "all" || (key === "sort" && value === "default")) {
Expand All @@ -89,8 +128,27 @@ function TasksContent() {
params.set(key, value);
}
});
router.replace(`${pathname}?${params.toString()}`, { scroll: false });
};
const nextQuery = params.toString();
const currentQuery = searchParams.toString();
if (nextQuery === currentQuery) {
return;
}

const nextUrl = nextQuery ? `${pathname}?${nextQuery}` : pathname;
router.replace(nextUrl, { scroll: false });
}, [pathname, router, searchParams]);

// Debounce search query to URL
useEffect(() => {
if (searchQuery === queryQ) {
return;
}

const timer = setTimeout(() => {
updateParams({ q: searchQuery });
}, 300);
return () => clearTimeout(timer);
}, [queryQ, searchQuery, updateParams]);

const activeCombos = useMemo(() => {
const combos = allCombos.filter(combo => {
Expand Down Expand Up @@ -171,7 +229,7 @@ function TasksContent() {
}

const avgDuration = Object.values(comboMap).length > 0
? Object.values(comboMap).reduce((sum: number, t: any) => sum + t.exec_duration, 0) / Object.values(comboMap).length
? Object.values(comboMap).reduce((sum, t) => sum + t.exec_duration, 0) / Object.values(comboMap).length
: 0;

return {
Expand Down Expand Up @@ -217,6 +275,47 @@ function TasksContent() {
return queryOrder === "asc" ? <ArrowUp className="w-3 h-3" /> : <ArrowDown className="w-3 h-3" />;
};

const selectedTaskInstructionUrl = selectedTask
? `${zealtConfig.github_repo}/tree/main/tasks/${selectedTask}/instruction.md`
: "";

const selectedTaskInstruction = selectedTask
? tasksData.find(task => task.taskName === selectedTask)?.instruction || ""
: "";

const instructionBody = (
<>
<div className="min-h-0 flex-1 overflow-y-auto overflow-x-hidden px-5 sm:px-7 py-4 sm:py-5">
{selectedTask ? (
selectedTaskInstruction ? (
<pre className="m-0 p-0 text-xs sm:text-sm leading-6 sm:leading-7 text-foreground/95 whitespace-pre-wrap wrap-break-word font-mono">
{selectedTaskInstruction}
</pre>
) : (
<div className="rounded-lg border border-border/60 bg-secondary/20 px-4 py-3 text-sm text-muted-foreground">
This task has no instruction content.
</div>
)
) : (
<Skeleton className="h-28 w-full" />
)}
</div>

<div className="shrink-0 border-t border-border/60 px-5 sm:px-7 py-3 bg-card/80">
<Button variant="outline" asChild className="h-8 w-full text-xs sm:h-9 sm:w-auto sm:text-sm">
<a
href={selectedTaskInstructionUrl}
target="_blank"
rel="noopener noreferrer"
>
<ExternalLink className="h-4 w-4" />
Open instruction.md
</a>
</Button>
</div>
</>
);

return (
<div className="container mx-auto px-4 sm:px-8 lg:px-12 py-8 max-w-screen-2xl h-[100dvh] flex flex-col overflow-hidden">
{/* Header Section */}
Expand Down Expand Up @@ -326,23 +425,25 @@ function TasksContent() {
</tr>
</thead>
<tbody className="divide-y divide-border/30">
{filteredAndSortedTasks.map((task, index) => (
{filteredAndSortedTasks.map((task) => (
<tr
key={task.taskName}
className="hover:bg-secondary/30 even:bg-secondary/5 transition-colors duration-200 group"
>
<td className="md:sticky left-0 z-20 bg-background border-r border-border/50 p-0 font-mono w-[200px] min-w-[200px] max-w-[200px] md:w-[350px] md:min-w-[350px] md:max-w-[350px] md:shadow-[1px_0_0_rgba(0,0,0,0.05)]">
<a
href={`${zealtConfig.github_repo}/tree/main/tasks/${task.taskName}/instruction.md`}
target="_blank"
rel="noopener noreferrer"
className="group/task flex items-center gap-2 px-3 sm:px-6 py-2 w-full h-full text-foreground hover:text-primary transition-colors focus:outline-none bg-transparent group-even:bg-secondary/5 group-hover:bg-secondary/30"
title={`View ${task.taskName} instruction on GitHub`}
<button
type="button"
onClick={() => {
setSelectedTask(task.taskName);
setIsInstructionOpen(true);
}}
className="group/task flex items-center gap-2 px-3 sm:px-6 py-2 w-full h-full text-foreground hover:text-primary transition-colors focus:outline-none bg-transparent group-even:bg-secondary/5 group-hover:bg-secondary/30 cursor-pointer text-left"
title={`View ${task.taskName} instruction`}
>
<span className="truncate w-full block group-hover/task:underline text-xs md:text-sm">
{task.taskName}
</span>
</a>
</button>
</td>
{activeCombos.map(combo => {
const trial = task.comboMap[combo];
Expand Down Expand Up @@ -421,6 +522,38 @@ function TasksContent() {
)}
</div>

{isDesktop ? (
<Sheet open={isInstructionOpen} onOpenChange={setIsInstructionOpen}>
<SheetContent
side="right"
className="h-full min-h-0 w-[640px] lg:w-[680px] xl:w-[760px] 2xl:w-[820px] max-w-[90vw] border-l border-border/70 bg-card/80 p-0 shadow-[0_0_0_1px_rgba(255,255,255,0.04),0_24px_80px_rgba(0,0,0,0.55)] backdrop-blur-md data-[state=open]:duration-320 data-[state=closed]:duration-220 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=open]:slide-in-from-right data-[state=closed]:slide-out-to-right data-[state=open]:zoom-in-95 data-[state=closed]:zoom-out-95"
>
<div className="flex h-full min-h-0 flex-col">
<SheetHeader className="border-b border-border/60 bg-card/80 px-7 py-5 pr-14">
<SheetTitle className="text-base">{selectedTask || "Task Instruction"}</SheetTitle>
<SheetDescription className="sr-only">{selectedTask}</SheetDescription>
</SheetHeader>
{instructionBody}
</div>
</SheetContent>
</Sheet>
) : (
<Drawer open={isInstructionOpen} onOpenChange={setIsInstructionOpen} direction="bottom">
<DrawerContent className="inset-x-0 bottom-0 h-[76dvh] max-h-[76dvh] rounded-t-2xl border-t border-border/70 bg-card/95 p-0">
<div className="mx-auto mt-3 h-1.5 w-14 rounded-full bg-muted-foreground/40" />
<div className="flex h-full min-h-0 flex-col">
<DrawerHeader className="border-b border-border/60 px-5 pb-4">
<DrawerTitle className="text-base">
{selectedTask || "Task Instruction"}
</DrawerTitle>
<SheetDescription className="sr-only">{selectedTask}</SheetDescription>
</DrawerHeader>
{instructionBody}
</div>
</DrawerContent>
</Drawer>
)}

<BackToTop />
</div>
);
Expand Down
3 changes: 3 additions & 0 deletions site/bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading