Skip to content

Commit 91a99d1

Browse files
committed
update: build instruction.md
1 parent 388f13f commit 91a99d1

File tree

72 files changed

+1908
-4126
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+1908
-4126
lines changed

site/app/(home)/page.tsx

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@ import tasksData from "../../tasks.json";
33
import zealtConfig from "../../../zealt.json";
44
import LeaderboardTable, { type LeaderboardEntry } from "./components/leaderboard-table";
55

6+
type TaskTrial = {
7+
agent: string;
8+
model: string;
9+
passed: boolean;
10+
latency_sec: number | null;
11+
};
12+
13+
type TaskValue = {
14+
trials?: TaskTrial[];
15+
};
16+
617
export default function Home() {
718
// Process tasks.json to compute leaderboard stats directly on the server
819
const statsMap = new Map<string, {
@@ -14,8 +25,16 @@ export default function Home() {
1425
agent: string;
1526
}>();
1627

17-
Object.values(tasksData).forEach((trials: any[]) => {
18-
trials.forEach(trial => {
28+
Object.values(tasksData as Record<string, unknown>).forEach((taskValue) => {
29+
let trials: TaskTrial[] = [];
30+
if (Array.isArray(taskValue)) {
31+
trials = taskValue as TaskTrial[];
32+
} else if (typeof taskValue === "object" && taskValue !== null) {
33+
const task = taskValue as TaskValue;
34+
trials = Array.isArray(task.trials) ? task.trials : [];
35+
}
36+
37+
trials.forEach((trial) => {
1938
// Simplify model name
2039
const modelName = trial.model.split('/').pop() || trial.model;
2140
const agentName = trial.agent.charAt(0).toUpperCase() + trial.agent.slice(1);
@@ -33,7 +52,10 @@ export default function Home() {
3352
});
3453
}
3554

36-
const stats = statsMap.get(key)!;
55+
const stats = statsMap.get(key);
56+
if (!stats) {
57+
return;
58+
}
3759
stats.total += 1;
3860
if (trial.passed) {
3961
stats.passed += 1;

site/app/tasks/[name]/[jobName]/trajectory/page.tsx

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,12 @@ function isTrialEntry(value: unknown): value is TrialEntry {
4747
}
4848

4949
function findTrialEntry(jobName: string, trialName: string): TrialEntry | null {
50-
for (const trials of Object.values(tasksData as Record<string, unknown>)) {
50+
for (const task of Object.values(tasksData as Record<string, unknown>)) {
51+
if (typeof task !== "object" || task === null) {
52+
continue;
53+
}
54+
55+
const trials = (task as { trials?: unknown }).trials;
5156
if (!Array.isArray(trials)) {
5257
continue;
5358
}
@@ -71,7 +76,12 @@ export const dynamicParams = false;
7176
export function generateStaticParams(): RouteParams[] {
7277
const params: RouteParams[] = [];
7378

74-
for (const trials of Object.values(tasksData as Record<string, unknown>)) {
79+
for (const task of Object.values(tasksData as Record<string, unknown>)) {
80+
if (typeof task !== "object" || task === null) {
81+
continue;
82+
}
83+
84+
const trials = (task as { trials?: unknown }).trials;
7585
if (!Array.isArray(trials)) {
7686
continue;
7787
}

site/app/tasks/page.tsx

Lines changed: 86 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ import {
1515
} from "lucide-react";
1616
import Link from "next/link";
1717
import { useRouter, useSearchParams, usePathname } from "next/navigation";
18-
import ReactMarkdown from "react-markdown";
19-
import remarkGfm from "remark-gfm";
2018
import { clsx, type ClassValue } from "clsx";
2119
import { twMerge } from "tailwind-merge";
2220
import tasksDataRaw from "../../tasks.json";
@@ -45,22 +43,81 @@ import { Skeleton } from "@/components/ui/skeleton";
4543
import { MultiSelect } from "./components/multi-select";
4644
import { BackToTop } from "./components/back-to-top";
4745

46+
type TaskTrial = {
47+
job_name: string;
48+
trial_name: string;
49+
trajectory_id?: string;
50+
agent: string;
51+
model: string;
52+
provider: string;
53+
passed: boolean;
54+
reward: number | null;
55+
error: boolean;
56+
latency_sec: number | null;
57+
latency_breakdown?: {
58+
env_setup?: number | null;
59+
agent_setup?: number | null;
60+
agent_exec?: number | null;
61+
verifier?: number | null;
62+
};
63+
tokens: {
64+
input: number;
65+
output: number;
66+
cache: number;
67+
};
68+
};
69+
70+
type TaskEntry = {
71+
instruction: string;
72+
trials: TaskTrial[];
73+
};
74+
75+
type NormalizedTrial = TaskTrial & {
76+
exec_duration: number;
77+
};
78+
79+
type NormalizedTask = {
80+
taskName: string;
81+
instruction: string;
82+
trials: NormalizedTrial[];
83+
};
84+
4885
function cn(...inputs: ClassValue[]) {
4986
return twMerge(clsx(inputs));
5087
}
5188

89+
function normalizeTasksData(raw: unknown): NormalizedTask[] {
90+
if (typeof raw !== "object" || raw === null) {
91+
return [];
92+
}
93+
94+
return Object.entries(raw as Record<string, unknown>).map(([taskName, value]) => {
95+
let instruction = "";
96+
let trials: TaskTrial[] = [];
97+
98+
if (Array.isArray(value)) {
99+
trials = value as TaskTrial[];
100+
} else if (typeof value === "object" && value !== null) {
101+
const taskEntry = value as Partial<TaskEntry>;
102+
instruction = typeof taskEntry.instruction === "string" ? taskEntry.instruction : "";
103+
trials = Array.isArray(taskEntry.trials) ? taskEntry.trials : [];
104+
}
105+
106+
return {
107+
taskName,
108+
instruction,
109+
trials: trials.map((t) => ({
110+
...t,
111+
model: t.model.split('/').pop() || t.model,
112+
agent: t.agent.charAt(0).toUpperCase() + t.agent.slice(1),
113+
exec_duration: t.latency_breakdown?.agent_exec || t.latency_sec || 0,
114+
})),
115+
};
116+
}).sort((a, b) => a.taskName.localeCompare(b.taskName));
117+
}
118+
52119
// Convert object to array and sort by task name
53-
const tasksData = Object.entries(tasksDataRaw).map(([taskName, trials]) => {
54-
return {
55-
taskName,
56-
trials: (trials as any[]).map(t => ({
57-
...t,
58-
model: t.model.split('/').pop() || t.model,
59-
agent: t.agent.charAt(0).toUpperCase() + t.agent.slice(1),
60-
exec_duration: t.latency_breakdown?.agent_exec || t.latency_sec || 0
61-
})),
62-
};
63-
}).sort((a, b) => a.taskName.localeCompare(b.taskName));
120+
const tasksData = normalizeTasksData(tasksDataRaw);
64121

65122
const allTrialsFlat = tasksData.flatMap(task =>
66123
task.trials.map(trial => ({
@@ -70,9 +127,7 @@ const allTrialsFlat = tasksData.flatMap(task =>
70127
);
71128

72129
const allModels = Array.from(new Set(allTrialsFlat.map(tr => tr.model)));
73-
const allAgents = Array.from(new Set(allTrialsFlat.map(tr => tr.agent)));
74130
const allCombos = Array.from(new Set(allTrialsFlat.map(tr => `${tr.model} (${tr.agent})`))).sort();
75-
const basePath = (process.env.NEXT_PUBLIC_BASE_PATH || "").replace(/\/$/, "");
76131

77132
function useMediaQuery(query: string) {
78133
const [matches, setMatches] = useState(false);
@@ -109,9 +164,6 @@ function TasksContent() {
109164
const [searchQuery, setSearchQuery] = useState(queryQ);
110165
const [selectedTask, setSelectedTask] = useState<string | null>(null);
111166
const [isInstructionOpen, setIsInstructionOpen] = useState(false);
112-
const [instructionContent, setInstructionContent] = useState("");
113-
const [instructionLoading, setInstructionLoading] = useState(false);
114-
const [instructionError, setInstructionError] = useState<string | null>(null);
115167
const isDesktop = useMediaQuery("(min-width: 1024px)");
116168

117169
const hasActiveFilters = selectedStatuses.length > 0 || selectedModels.length > 0 || selectedAgents.length > 0 || searchQuery !== "" || querySort !== "default";
@@ -124,44 +176,6 @@ function TasksContent() {
124176
return () => clearTimeout(timer);
125177
}, [searchQuery]);
126178

127-
useEffect(() => {
128-
if (!isInstructionOpen || !selectedTask) return;
129-
130-
const controller = new AbortController();
131-
const taskName = selectedTask;
132-
133-
async function loadInstruction() {
134-
try {
135-
setInstructionLoading(true);
136-
setInstructionError(null);
137-
setInstructionContent("");
138-
139-
const response = await fetch(
140-
`${basePath}/task-instructions/${encodeURIComponent(taskName)}/instruction.md`,
141-
{ signal: controller.signal }
142-
);
143-
144-
if (!response.ok) {
145-
throw new Error("Failed to load instruction");
146-
}
147-
148-
const markdown = await response.text();
149-
setInstructionContent(markdown);
150-
} catch {
151-
if (controller.signal.aborted) return;
152-
setInstructionError("Unable to load instruction.md for this task.");
153-
} finally {
154-
if (!controller.signal.aborted) {
155-
setInstructionLoading(false);
156-
}
157-
}
158-
}
159-
160-
loadInstruction();
161-
162-
return () => controller.abort();
163-
}, [isInstructionOpen, selectedTask]);
164-
165179
const updateParams = (updates: Record<string, string | null>) => {
166180
const params = new URLSearchParams(searchParams.toString());
167181
Object.entries(updates).forEach(([key, value]) => {
@@ -205,7 +219,7 @@ function TasksContent() {
205219

206220
const filteredAndSortedTasks = useMemo(() => {
207221
const result = tasksData.map(task => {
208-
const comboMap: Record<string, any> = {};
222+
const comboMap: Record<string, NormalizedTrial> = {};
209223
let hasMatchingTrial = false;
210224
let selectedModelMatchesStatus = false;
211225
let hasSelectedModelTrial = false;
@@ -253,7 +267,7 @@ function TasksContent() {
253267
}
254268

255269
const avgDuration = Object.values(comboMap).length > 0
256-
? Object.values(comboMap).reduce((sum: number, t: any) => sum + t.exec_duration, 0) / Object.values(comboMap).length
270+
? Object.values(comboMap).reduce((sum, t) => sum + t.exec_duration, 0) / Object.values(comboMap).length
257271
: 0;
258272

259273
return {
@@ -303,30 +317,25 @@ function TasksContent() {
303317
? `${zealtConfig.github_repo}/tree/main/tasks/${selectedTask}/instruction.md`
304318
: "";
305319

320+
const selectedTaskInstruction = selectedTask
321+
? tasksData.find(task => task.taskName === selectedTask)?.instruction || ""
322+
: "";
323+
306324
const instructionBody = (
307325
<>
308326
<div className="min-h-0 flex-1 overflow-y-auto overflow-x-hidden px-5 sm:px-7 py-4 sm:py-5">
309-
{instructionLoading ? (
310-
<div className="space-y-3 py-1">
311-
<Skeleton className="h-6 w-[62%]" />
312-
<Skeleton className="h-4 w-full" />
313-
<Skeleton className="h-4 w-[94%]" />
314-
<Skeleton className="h-4 w-[88%]" />
315-
<div className="pt-3 space-y-2">
316-
<Skeleton className="h-4 w-[70%]" />
317-
<Skeleton className="h-24 w-full rounded-lg" />
327+
{selectedTask ? (
328+
selectedTaskInstruction ? (
329+
<pre className="m-0 p-0 text-xs sm:text-sm leading-6 sm:leading-7 text-foreground/95 whitespace-pre-wrap wrap-break-word font-mono">
330+
{selectedTaskInstruction}
331+
</pre>
332+
) : (
333+
<div className="rounded-lg border border-border/60 bg-secondary/20 px-4 py-3 text-sm text-muted-foreground">
334+
This task has no instruction content.
318335
</div>
319-
</div>
320-
) : instructionError ? (
321-
<div className="rounded-lg border border-red-500/30 bg-red-500/5 px-4 py-3 text-sm text-red-600 dark:text-red-400">
322-
{instructionError}
323-
</div>
336+
)
324337
) : (
325-
<article className="text-xs sm:text-sm leading-6 sm:leading-7 text-foreground/95 wrap-break-word [&_a]:text-primary [&_a]:underline [&_blockquote]:border-l-2 [&_blockquote]:border-border [&_blockquote]:pl-4 [&_code]:rounded [&_code]:bg-secondary/70 [&_code]:px-1 [&_code]:py-0.5 [&_h1]:mt-2 [&_h1]:mb-4 [&_h1]:text-xl [&_h1]:font-bold [&_h1]:leading-tight [&_h2]:mt-8 [&_h2]:mb-3 [&_h2]:text-lg [&_h2]:font-semibold [&_h3]:mt-6 [&_h3]:mb-2 [&_h3]:text-base [&_h3]:font-semibold [&_li]:mt-1 [&_ol]:my-3 [&_ol]:list-decimal [&_ol]:pl-6 [&_p]:my-3 [&_pre]:my-4 [&_pre]:overflow-x-auto [&_pre]:rounded-lg [&_pre]:border [&_pre]:border-border/60 [&_pre]:bg-secondary/35 [&_pre]:p-3 [&_pre_code]:bg-transparent [&_pre_code]:p-0 [&_pre_code]:rounded-none [&_pre_code]:shadow-none [&_table]:my-4 [&_table]:w-full [&_table]:border-collapse [&_td]:border [&_td]:border-border/50 [&_td]:px-2 [&_td]:py-1.5 [&_th]:border [&_th]:border-border/50 [&_th]:bg-secondary/40 [&_th]:px-2 [&_th]:py-1.5 [&_ul]:my-3 [&_ul]:list-disc [&_ul]:pl-6">
326-
<ReactMarkdown remarkPlugins={[remarkGfm]}>
327-
{instructionContent || "No markdown content."}
328-
</ReactMarkdown>
329-
</article>
338+
<Skeleton className="h-28 w-full" />
330339
)}
331340
</div>
332341

0 commit comments

Comments
 (0)