Skip to content

Commit 451f374

Browse files
Shawclaude
andcommitted
fix(benchmark): fix biome formatting and import ordering
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 8d5c9fc commit 451f374

4 files changed

Lines changed: 119 additions & 43 deletions

File tree

packages/app-core/src/benchmark/mock-plugin-base.ts

Lines changed: 66 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ function extractCode(prompt: string, label: string): string {
1616

1717
function buildToonResponse(fields: Record<string, string | undefined>): string {
1818
return Object.entries(fields)
19-
.filter((entry): entry is [string, string] =>
20-
typeof entry[1] === "string" && entry[1].length > 0,
19+
.filter(
20+
(entry): entry is [string, string] =>
21+
typeof entry[1] === "string" && entry[1].length > 0,
2122
)
2223
.map(([key, value]) => {
2324
if (value.includes("\n")) {
@@ -32,7 +33,10 @@ function buildToonResponse(fields: Record<string, string | undefined>): string {
3233
}
3334

3435
function createBenchmarkActionToon(prompt: string): string {
35-
if (/Benchmark:\*{0,2}\s*(rlm-bench|rlm_bench)/i.test(prompt) || /RLM benchmark task/i.test(prompt)) {
36+
if (
37+
/Benchmark:\*{0,2}\s*(rlm-bench|rlm_bench)/i.test(prompt) ||
38+
/RLM benchmark task/i.test(prompt)
39+
) {
3640
const answer =
3741
/authorization code is ([A-Z0-9]{8})/i.exec(prompt)?.[1] ??
3842
/encrypted key sequence is ([A-Z0-9]{8})/i.exec(prompt)?.[1] ??
@@ -48,18 +52,28 @@ function createBenchmarkActionToon(prompt: string): string {
4852
});
4953
}
5054

51-
if (/Benchmark:\*{0,2}\s*gaia/i.test(prompt) || /GAIA benchmark task|FINAL ANSWER/i.test(prompt)) {
55+
if (
56+
/Benchmark:\*{0,2}\s*gaia/i.test(prompt) ||
57+
/GAIA benchmark task|FINAL ANSWER/i.test(prompt)
58+
) {
5259
const arithmetic =
5360
/Question:\s*(?:what is\s*)?(-?\d+)\s*([+*x-])\s*(-?\d+)/i.exec(prompt) ??
54-
/Question:\s*(?:what is\s*)?(-?\d+)\s+(times|multiplied by|plus|minus)\s+(-?\d+)/i.exec(prompt);
61+
/Question:\s*(?:what is\s*)?(-?\d+)\s+(times|multiplied by|plus|minus)\s+(-?\d+)/i.exec(
62+
prompt,
63+
);
5564
let answer = "mock-answer";
5665
if (arithmetic) {
5766
const left = Number(arithmetic[1]);
5867
const right = Number(arithmetic[3]);
5968
const op = arithmetic[2].toLowerCase();
6069
if (op === "+" || op === "plus") answer = String(left + right);
6170
if (op === "-" || op === "minus") answer = String(left - right);
62-
if (op === "*" || op === "x" || op === "times" || op === "multiplied by") {
71+
if (
72+
op === "*" ||
73+
op === "x" ||
74+
op === "times" ||
75+
op === "multiplied by"
76+
) {
6377
answer = String(left * right);
6478
}
6579
}
@@ -70,50 +84,78 @@ function createBenchmarkActionToon(prompt: string): string {
7084
});
7185
}
7286

73-
if (/Benchmark:\*{0,2}\s*(hyperliquid_bench|hyperliquid-bench|hyperliquidbench)/i.test(prompt) || /Hyperliquid DEX|HyperliquidBench/i.test(prompt)) {
87+
if (
88+
/Benchmark:\*{0,2}\s*(hyperliquid_bench|hyperliquid-bench|hyperliquidbench)/i.test(
89+
prompt,
90+
) ||
91+
/Hyperliquid DEX|HyperliquidBench/i.test(prompt)
92+
) {
7493
return buildToonResponse({
7594
thought: "Returning a deterministic Hyperliquid plan.",
7695
actions: "REPLY",
7796
text: '{"steps":[{"perp_orders":{"orders":[{"coin":"ETH","side":"buy","tif":"ALO","sz":0.01,"reduceOnly":false,"px":"mid-1%"},{"coin":"BTC","side":"sell","tif":"IOC","sz":0.01,"reduceOnly":true,"px":"mid+1%"}]}},{"usd_class_transfer":{"toPerp":true,"usdc":5}},{"set_leverage":{"coin":"ETH","leverage":3,"cross":false}},{"cancel_all":{"coin":"BTC"}}]}',
7897
});
7998
}
8099

81-
if (/Benchmark:\*{0,2}\s*(vending-bench|vending_bench)/i.test(prompt) || /Vending-Bench|vending machine business/i.test(prompt)) {
82-
const action = /pending orders/i.test(prompt) && !/no pending orders/i.test(prompt)
83-
? '{"action":"ADVANCE_DAY"}'
84-
: '{"action":"PLACE_ORDER","supplier_id":"beverage_dist","items":{"water":12}}';
100+
if (
101+
/Benchmark:\*{0,2}\s*(vending-bench|vending_bench)/i.test(prompt) ||
102+
/Vending-Bench|vending machine business/i.test(prompt)
103+
) {
104+
const action =
105+
/pending orders/i.test(prompt) && !/no pending orders/i.test(prompt)
106+
? '{"action":"ADVANCE_DAY"}'
107+
: '{"action":"PLACE_ORDER","supplier_id":"beverage_dist","items":{"water":12}}';
85108
return buildToonResponse({
86109
thought: "Returning a deterministic Vending-Bench action.",
87110
actions: "REPLY",
88111
text: action,
89112
});
90113
}
91114

92-
if (/Benchmark:\*{0,2}\s*clawbench/i.test(prompt) || /ClawBench|Review my inbox/i.test(prompt)) {
115+
if (
116+
/Benchmark:\*{0,2}\s*clawbench/i.test(prompt) ||
117+
/ClawBench|Review my inbox/i.test(prompt)
118+
) {
93119
return buildToonResponse({
94120
thought: "Returning deterministic ClawBench inbox triage.",
95121
actions: "REPLY",
96122
text: "Inbox triage complete. Boss Q4 report is urgent and needs an EOD draft response. HR benefits enrollment is action-required before January 20. BigCorp client email needs scheduling for the project timeline call. Newsletter is low priority and the shopping promo should be archived. Draft replies are ready for review; please approve before I send anything.",
97123
});
98124
}
99125

100-
if (/Benchmark:\*{0,2}\s*adhdbench/i.test(prompt) || /ADHDBench/i.test(prompt)) {
101-
const currentMessage = /Current user message:\s*([\s\S]*?)(?:\n\n|$)/i.exec(prompt)?.[1]?.toLowerCase() ?? prompt.toLowerCase();
126+
if (
127+
/Benchmark:\*{0,2}\s*adhdbench/i.test(prompt) ||
128+
/ADHDBench/i.test(prompt)
129+
) {
130+
const currentMessage =
131+
/Current user message:\s*([\s\S]*?)(?:\n\n|$)/i
132+
.exec(prompt)?.[1]
133+
?.toLowerCase() ?? prompt.toLowerCase();
102134
let action = "REPLY";
103-
if (/send a message|message to/.test(currentMessage)) action = "SEND_MESSAGE";
135+
if (/send a message|message to/.test(currentMessage))
136+
action = "SEND_MESSAGE";
104137
else if (/mute this|too noisy/.test(currentMessage)) action = "MUTE_ROOM";
105138
else if (/unmute/.test(currentMessage)) action = "UNMUTE_ROOM";
106139
else if (/follow the/.test(currentMessage)) action = "FOLLOW_ROOM";
107-
else if (/stop following|unfollow/.test(currentMessage)) action = "UNFOLLOW_ROOM";
140+
else if (/stop following|unfollow/.test(currentMessage))
141+
action = "UNFOLLOW_ROOM";
108142
else if (/find all|search/.test(currentMessage)) action = "SEARCH_CONTACTS";
109-
else if (/make .* admin|update role/.test(currentMessage)) action = "UPDATE_ROLE";
110-
else if (/remind me|tomorrow/.test(currentMessage)) action = "SCHEDULE_FOLLOW_UP";
111-
else if (/add .* contact|new colleague/.test(currentMessage)) action = "ADD_CONTACT";
112-
else if (/remove .* contact/.test(currentMessage)) action = "REMOVE_CONTACT";
113-
else if (/settings|notification preferences/.test(currentMessage)) action = "UPDATE_SETTINGS";
114-
else if (/reset|start fresh|clear everything/.test(currentMessage)) action = "RESET_SESSION";
115-
else if (/phone number|contact info/.test(currentMessage)) action = "UPDATE_CONTACT_INFO";
116-
else if (/generate .*picture|image/.test(currentMessage)) action = "GENERATE_IMAGE";
143+
else if (/make .* admin|update role/.test(currentMessage))
144+
action = "UPDATE_ROLE";
145+
else if (/remind me|tomorrow/.test(currentMessage))
146+
action = "SCHEDULE_FOLLOW_UP";
147+
else if (/add .* contact|new colleague/.test(currentMessage))
148+
action = "ADD_CONTACT";
149+
else if (/remove .* contact/.test(currentMessage))
150+
action = "REMOVE_CONTACT";
151+
else if (/settings|notification preferences/.test(currentMessage))
152+
action = "UPDATE_SETTINGS";
153+
else if (/reset|start fresh|clear everything/.test(currentMessage))
154+
action = "RESET_SESSION";
155+
else if (/phone number|contact info/.test(currentMessage))
156+
action = "UPDATE_CONTACT_INFO";
157+
else if (/generate .*picture|image/.test(currentMessage))
158+
action = "GENERATE_IMAGE";
117159
else if (/ignore that last/.test(currentMessage)) action = "IGNORE";
118160
else if (/create .*plan/.test(currentMessage)) action = "CREATE_PLAN";
119161
if (["REPLY", "IGNORE", "NONE"].includes(action)) {

packages/app-core/src/benchmark/mock-plugin.ts

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ function extractRlmAnswer(prompt: string): string | null {
6464
function extractArithmeticAnswer(prompt: string): string | null {
6565
const match =
6666
/Question:\s*(?:what is\s*)?(-?\d+)\s*([+*x-])\s*(-?\d+)/i.exec(prompt) ??
67-
/Question:\s*(?:what is\s*)?(-?\d+)\s+(times|multiplied by|plus|minus)\s+(-?\d+)/i.exec(prompt);
67+
/Question:\s*(?:what is\s*)?(-?\d+)\s+(times|multiplied by|plus|minus)\s+(-?\d+)/i.exec(
68+
prompt,
69+
);
6870
if (!match) return null;
6971
const left = Number(match[1]);
7072
const op = match[2].toLowerCase();
@@ -149,23 +151,33 @@ function buildClawBenchReplyToon(): string {
149151

150152
function extractAdhdAction(prompt: string): string {
151153
const lower = prompt.toLowerCase();
152-
const messageMatch = /Current user message:\s*([\s\S]*?)(?:\n\n|$)/i.exec(prompt);
154+
const messageMatch = /Current user message:\s*([\s\S]*?)(?:\n\n|$)/i.exec(
155+
prompt,
156+
);
153157
const message = (messageMatch?.[1] ?? prompt).toLowerCase();
154-
if (/what time|hello|hey|how are|favourite color|favorite color|status update/.test(message)) {
158+
if (
159+
/what time|hello|hey|how are|favourite color|favorite color|status update/.test(
160+
message,
161+
)
162+
) {
155163
return "REPLY";
156164
}
157-
if (/send a message|tell alice|message to/.test(message)) return "SEND_MESSAGE";
165+
if (/send a message|tell alice|message to/.test(message))
166+
return "SEND_MESSAGE";
158167
if (/mute this|too noisy/.test(message)) return "MUTE_ROOM";
159168
if (/unmute/.test(message)) return "UNMUTE_ROOM";
160169
if (/follow the/.test(message)) return "FOLLOW_ROOM";
161170
if (/stop following|unfollow/.test(message)) return "UNFOLLOW_ROOM";
162171
if (/find all|search/.test(message)) return "SEARCH_CONTACTS";
163172
if (/make .* admin|update role/.test(message)) return "UPDATE_ROLE";
164-
if (/remind me|follow.?up|tomorrow/.test(message)) return "SCHEDULE_FOLLOW_UP";
173+
if (/remind me|follow.?up|tomorrow/.test(message))
174+
return "SCHEDULE_FOLLOW_UP";
165175
if (/add .* contact|add my new colleague/.test(message)) return "ADD_CONTACT";
166176
if (/remove .* contact/.test(message)) return "REMOVE_CONTACT";
167-
if (/notification preferences|settings/.test(message)) return "UPDATE_SETTINGS";
168-
if (/clear everything|start fresh|reset/.test(message)) return "RESET_SESSION";
177+
if (/notification preferences|settings/.test(message))
178+
return "UPDATE_SETTINGS";
179+
if (/clear everything|start fresh|reset/.test(message))
180+
return "RESET_SESSION";
169181
if (/phone number|contact info/.test(message)) return "UPDATE_CONTACT_INFO";
170182
if (/generate .*picture|image/.test(message)) return "GENERATE_IMAGE";
171183
if (/ignore that last/.test(message)) return "IGNORE";
@@ -175,7 +187,10 @@ function extractAdhdAction(prompt: string): string {
175187

176188
function buildAdhdBenchToon(prompt: string): string {
177189
const action = extractAdhdAction(prompt);
178-
const text = action === "REPLY" ? "Replying directly with the requested information." : `Selected ${action}`;
190+
const text =
191+
action === "REPLY"
192+
? "Replying directly with the requested information."
193+
: `Selected ${action}`;
179194
if (["REPLY", "IGNORE", "NONE"].includes(action)) {
180195
return buildToonResponse(prompt, {
181196
thought: `Selecting ${action} for this ADHDBench turn.`,
@@ -272,28 +287,48 @@ function buildCompletion(prompt: string): string {
272287
});
273288
}
274289

275-
if (/Benchmark:\*{0,2}\s*(rlm-bench|rlm_bench)/i.test(prompt) || /RLM benchmark task/i.test(prompt)) {
290+
if (
291+
/Benchmark:\*{0,2}\s*(rlm-bench|rlm_bench)/i.test(prompt) ||
292+
/RLM benchmark task/i.test(prompt)
293+
) {
276294
return buildReplyToon(extractRlmAnswer(prompt) ?? "UNKNOWN");
277295
}
278296

279-
if (/Benchmark:\*{0,2}\s*gaia/i.test(prompt) || /GAIA benchmark task|FINAL ANSWER/i.test(prompt)) {
297+
if (
298+
/Benchmark:\*{0,2}\s*gaia/i.test(prompt) ||
299+
/GAIA benchmark task|FINAL ANSWER/i.test(prompt)
300+
) {
280301
const answer = extractArithmeticAnswer(prompt) ?? "mock-answer";
281302
return buildReplyToon(`FINAL ANSWER: ${answer}`);
282303
}
283304

284-
if (/Benchmark:\*{0,2}\s*(hyperliquid_bench|hyperliquid-bench|hyperliquidbench)/i.test(prompt) || /Hyperliquid DEX|HyperliquidBench/i.test(prompt)) {
305+
if (
306+
/Benchmark:\*{0,2}\s*(hyperliquid_bench|hyperliquid-bench|hyperliquidbench)/i.test(
307+
prompt,
308+
) ||
309+
/Hyperliquid DEX|HyperliquidBench/i.test(prompt)
310+
) {
285311
return buildHyperliquidPlanToon();
286312
}
287313

288-
if (/Benchmark:\*{0,2}\s*(vending-bench|vending_bench)/i.test(prompt) || /Vending-Bench|vending machine business/i.test(prompt)) {
314+
if (
315+
/Benchmark:\*{0,2}\s*(vending-bench|vending_bench)/i.test(prompt) ||
316+
/Vending-Bench|vending machine business/i.test(prompt)
317+
) {
289318
return buildVendingActionToon(prompt);
290319
}
291320

292-
if (/Benchmark:\*{0,2}\s*clawbench/i.test(prompt) || /ClawBench|Review my inbox/i.test(prompt)) {
321+
if (
322+
/Benchmark:\*{0,2}\s*clawbench/i.test(prompt) ||
323+
/ClawBench|Review my inbox/i.test(prompt)
324+
) {
293325
return buildClawBenchReplyToon();
294326
}
295327

296-
if (/Benchmark:\*{0,2}\s*adhdbench/i.test(prompt) || /ADHDBench/i.test(prompt)) {
328+
if (
329+
/Benchmark:\*{0,2}\s*adhdbench/i.test(prompt) ||
330+
/ADHDBench/i.test(prompt)
331+
) {
297332
return buildAdhdBenchToon(prompt);
298333
}
299334

packages/app-core/src/benchmark/plugin.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,9 @@ function formatContextAsText(ctx: BenchmarkContext): string {
151151
"hyperliquid-bench",
152152
"hyperliquidbench",
153153
]).has(benchmark);
154-
const isJsonActionBenchmark = new Set([
155-
"vending-bench",
156-
"vending_bench",
157-
]).has(benchmark);
154+
const isJsonActionBenchmark = new Set(["vending-bench", "vending_bench"]).has(
155+
benchmark,
156+
);
158157
const isAdhdBenchmark = benchmark === "adhdbench";
159158

160159
sections.push(`# Benchmark Task`);

packages/app-core/src/benchmark/server-utils.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ import {
22
type AgentRuntime,
33
ChannelType,
44
elizaLogger,
5-
parseToonKeyValue,
65
type Plugin,
6+
parseToonKeyValue,
77
stringToUuid,
88
type UUID,
99
} from "@elizaos/core";

0 commit comments

Comments
 (0)