fix(benchmark): fix biome formatting and import ordering

Shaw · claude · Shaw · commit 451f3749e1ce · 2026-05-05T06:46:44.000-07:00
Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/packages/app-core/src/benchmark/mock-plugin-base.ts b/packages/app-core/src/benchmark/mock-plugin-base.ts
@@ -16,8 +16,9 @@ function extractCode(prompt: string, label: string): string {
 
 function buildToonResponse(fields: Record<string, string | undefined>): string {
   return Object.entries(fields)
-    .filter((entry): entry is [string, string] =>
-      typeof entry[1] === "string" && entry[1].length > 0,
+    .filter(
+      (entry): entry is [string, string] =>
+        typeof entry[1] === "string" && entry[1].length > 0,
     )
     .map(([key, value]) => {
       if (value.includes("\n")) {
@@ -32,7 +33,10 @@ function buildToonResponse(fields: Record<string, string | undefined>): string {
 }
 
 function createBenchmarkActionToon(prompt: string): string {
-  if (/Benchmark:\*{0,2}\s*(rlm-bench|rlm_bench)/i.test(prompt) || /RLM benchmark task/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*(rlm-bench|rlm_bench)/i.test(prompt) ||
+    /RLM benchmark task/i.test(prompt)
+  ) {
     const answer =
       /authorization code is ([A-Z0-9]{8})/i.exec(prompt)?.[1] ??
       /encrypted key sequence is ([A-Z0-9]{8})/i.exec(prompt)?.[1] ??
@@ -48,18 +52,28 @@ function createBenchmarkActionToon(prompt: string): string {
     });
   }
 
-  if (/Benchmark:\*{0,2}\s*gaia/i.test(prompt) || /GAIA benchmark task|FINAL ANSWER/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*gaia/i.test(prompt) ||
+    /GAIA benchmark task|FINAL ANSWER/i.test(prompt)
+  ) {
     const arithmetic =
       /Question:\s*(?:what is\s*)?(-?\d+)\s*([+*x-])\s*(-?\d+)/i.exec(prompt) ??
-      /Question:\s*(?:what is\s*)?(-?\d+)\s+(times|multiplied by|plus|minus)\s+(-?\d+)/i.exec(prompt);
+      /Question:\s*(?:what is\s*)?(-?\d+)\s+(times|multiplied by|plus|minus)\s+(-?\d+)/i.exec(
+        prompt,
+      );
     let answer = "mock-answer";
     if (arithmetic) {
       const left = Number(arithmetic[1]);
       const right = Number(arithmetic[3]);
       const op = arithmetic[2].toLowerCase();
       if (op === "+" || op === "plus") answer = String(left + right);
       if (op === "-" || op === "minus") answer = String(left - right);
-      if (op === "*" || op === "x" || op === "times" || op === "multiplied by") {
+      if (
+        op === "*" ||
+        op === "x" ||
+        op === "times" ||
+        op === "multiplied by"
+      ) {
         answer = String(left * right);
       }
     }
@@ -70,50 +84,78 @@ function createBenchmarkActionToon(prompt: string): string {
     });
   }
 
-  if (/Benchmark:\*{0,2}\s*(hyperliquid_bench|hyperliquid-bench|hyperliquidbench)/i.test(prompt) || /Hyperliquid DEX|HyperliquidBench/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*(hyperliquid_bench|hyperliquid-bench|hyperliquidbench)/i.test(
+      prompt,
+    ) ||
+    /Hyperliquid DEX|HyperliquidBench/i.test(prompt)
+  ) {
     return buildToonResponse({
       thought: "Returning a deterministic Hyperliquid plan.",
       actions: "REPLY",
       text: '{"steps":[{"perp_orders":{"orders":[{"coin":"ETH","side":"buy","tif":"ALO","sz":0.01,"reduceOnly":false,"px":"mid-1%"},{"coin":"BTC","side":"sell","tif":"IOC","sz":0.01,"reduceOnly":true,"px":"mid+1%"}]}},{"usd_class_transfer":{"toPerp":true,"usdc":5}},{"set_leverage":{"coin":"ETH","leverage":3,"cross":false}},{"cancel_all":{"coin":"BTC"}}]}',
     });
   }
 
-  if (/Benchmark:\*{0,2}\s*(vending-bench|vending_bench)/i.test(prompt) || /Vending-Bench|vending machine business/i.test(prompt)) {
-    const action = /pending orders/i.test(prompt) && !/no pending orders/i.test(prompt)
-      ? '{"action":"ADVANCE_DAY"}'
-      : '{"action":"PLACE_ORDER","supplier_id":"beverage_dist","items":{"water":12}}';
+  if (
+    /Benchmark:\*{0,2}\s*(vending-bench|vending_bench)/i.test(prompt) ||
+    /Vending-Bench|vending machine business/i.test(prompt)
+  ) {
+    const action =
+      /pending orders/i.test(prompt) && !/no pending orders/i.test(prompt)
+        ? '{"action":"ADVANCE_DAY"}'
+        : '{"action":"PLACE_ORDER","supplier_id":"beverage_dist","items":{"water":12}}';
     return buildToonResponse({
       thought: "Returning a deterministic Vending-Bench action.",
       actions: "REPLY",
       text: action,
     });
   }
 
-  if (/Benchmark:\*{0,2}\s*clawbench/i.test(prompt) || /ClawBench|Review my inbox/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*clawbench/i.test(prompt) ||
+    /ClawBench|Review my inbox/i.test(prompt)
+  ) {
     return buildToonResponse({
       thought: "Returning deterministic ClawBench inbox triage.",
       actions: "REPLY",
       text: "Inbox triage complete. Boss Q4 report is urgent and needs an EOD draft response. HR benefits enrollment is action-required before January 20. BigCorp client email needs scheduling for the project timeline call. Newsletter is low priority and the shopping promo should be archived. Draft replies are ready for review; please approve before I send anything.",
     });
   }
 
-  if (/Benchmark:\*{0,2}\s*adhdbench/i.test(prompt) || /ADHDBench/i.test(prompt)) {
-    const currentMessage = /Current user message:\s*([\s\S]*?)(?:\n\n|$)/i.exec(prompt)?.[1]?.toLowerCase() ?? prompt.toLowerCase();
+  if (
+    /Benchmark:\*{0,2}\s*adhdbench/i.test(prompt) ||
+    /ADHDBench/i.test(prompt)
+  ) {
+    const currentMessage =
+      /Current user message:\s*([\s\S]*?)(?:\n\n|$)/i
+        .exec(prompt)?.[1]
+        ?.toLowerCase() ?? prompt.toLowerCase();
     let action = "REPLY";
-    if (/send a message|message to/.test(currentMessage)) action = "SEND_MESSAGE";
+    if (/send a message|message to/.test(currentMessage))
+      action = "SEND_MESSAGE";
     else if (/mute this|too noisy/.test(currentMessage)) action = "MUTE_ROOM";
     else if (/unmute/.test(currentMessage)) action = "UNMUTE_ROOM";
     else if (/follow the/.test(currentMessage)) action = "FOLLOW_ROOM";
-    else if (/stop following|unfollow/.test(currentMessage)) action = "UNFOLLOW_ROOM";
+    else if (/stop following|unfollow/.test(currentMessage))
+      action = "UNFOLLOW_ROOM";
     else if (/find all|search/.test(currentMessage)) action = "SEARCH_CONTACTS";
-    else if (/make .* admin|update role/.test(currentMessage)) action = "UPDATE_ROLE";
-    else if (/remind me|tomorrow/.test(currentMessage)) action = "SCHEDULE_FOLLOW_UP";
-    else if (/add .* contact|new colleague/.test(currentMessage)) action = "ADD_CONTACT";
-    else if (/remove .* contact/.test(currentMessage)) action = "REMOVE_CONTACT";
-    else if (/settings|notification preferences/.test(currentMessage)) action = "UPDATE_SETTINGS";
-    else if (/reset|start fresh|clear everything/.test(currentMessage)) action = "RESET_SESSION";
-    else if (/phone number|contact info/.test(currentMessage)) action = "UPDATE_CONTACT_INFO";
-    else if (/generate .*picture|image/.test(currentMessage)) action = "GENERATE_IMAGE";
+    else if (/make .* admin|update role/.test(currentMessage))
+      action = "UPDATE_ROLE";
+    else if (/remind me|tomorrow/.test(currentMessage))
+      action = "SCHEDULE_FOLLOW_UP";
+    else if (/add .* contact|new colleague/.test(currentMessage))
+      action = "ADD_CONTACT";
+    else if (/remove .* contact/.test(currentMessage))
+      action = "REMOVE_CONTACT";
+    else if (/settings|notification preferences/.test(currentMessage))
+      action = "UPDATE_SETTINGS";
+    else if (/reset|start fresh|clear everything/.test(currentMessage))
+      action = "RESET_SESSION";
+    else if (/phone number|contact info/.test(currentMessage))
+      action = "UPDATE_CONTACT_INFO";
+    else if (/generate .*picture|image/.test(currentMessage))
+      action = "GENERATE_IMAGE";
     else if (/ignore that last/.test(currentMessage)) action = "IGNORE";
     else if (/create .*plan/.test(currentMessage)) action = "CREATE_PLAN";
     if (["REPLY", "IGNORE", "NONE"].includes(action)) {
diff --git a/packages/app-core/src/benchmark/mock-plugin.ts b/packages/app-core/src/benchmark/mock-plugin.ts
@@ -64,7 +64,9 @@ function extractRlmAnswer(prompt: string): string | null {
 function extractArithmeticAnswer(prompt: string): string | null {
   const match =
     /Question:\s*(?:what is\s*)?(-?\d+)\s*([+*x-])\s*(-?\d+)/i.exec(prompt) ??
-    /Question:\s*(?:what is\s*)?(-?\d+)\s+(times|multiplied by|plus|minus)\s+(-?\d+)/i.exec(prompt);
+    /Question:\s*(?:what is\s*)?(-?\d+)\s+(times|multiplied by|plus|minus)\s+(-?\d+)/i.exec(
+      prompt,
+    );
   if (!match) return null;
   const left = Number(match[1]);
   const op = match[2].toLowerCase();
@@ -149,23 +151,33 @@ function buildClawBenchReplyToon(): string {
 
 function extractAdhdAction(prompt: string): string {
   const lower = prompt.toLowerCase();
-  const messageMatch = /Current user message:\s*([\s\S]*?)(?:\n\n|$)/i.exec(prompt);
+  const messageMatch = /Current user message:\s*([\s\S]*?)(?:\n\n|$)/i.exec(
+    prompt,
+  );
   const message = (messageMatch?.[1] ?? prompt).toLowerCase();
-  if (/what time|hello|hey|how are|favourite color|favorite color|status update/.test(message)) {
+  if (
+    /what time|hello|hey|how are|favourite color|favorite color|status update/.test(
+      message,
+    )
+  ) {
     return "REPLY";
   }
-  if (/send a message|tell alice|message to/.test(message)) return "SEND_MESSAGE";
+  if (/send a message|tell alice|message to/.test(message))
+    return "SEND_MESSAGE";
   if (/mute this|too noisy/.test(message)) return "MUTE_ROOM";
   if (/unmute/.test(message)) return "UNMUTE_ROOM";
   if (/follow the/.test(message)) return "FOLLOW_ROOM";
   if (/stop following|unfollow/.test(message)) return "UNFOLLOW_ROOM";
   if (/find all|search/.test(message)) return "SEARCH_CONTACTS";
   if (/make .* admin|update role/.test(message)) return "UPDATE_ROLE";
-  if (/remind me|follow.?up|tomorrow/.test(message)) return "SCHEDULE_FOLLOW_UP";
+  if (/remind me|follow.?up|tomorrow/.test(message))
+    return "SCHEDULE_FOLLOW_UP";
   if (/add .* contact|add my new colleague/.test(message)) return "ADD_CONTACT";
   if (/remove .* contact/.test(message)) return "REMOVE_CONTACT";
-  if (/notification preferences|settings/.test(message)) return "UPDATE_SETTINGS";
-  if (/clear everything|start fresh|reset/.test(message)) return "RESET_SESSION";
+  if (/notification preferences|settings/.test(message))
+    return "UPDATE_SETTINGS";
+  if (/clear everything|start fresh|reset/.test(message))
+    return "RESET_SESSION";
   if (/phone number|contact info/.test(message)) return "UPDATE_CONTACT_INFO";
   if (/generate .*picture|image/.test(message)) return "GENERATE_IMAGE";
   if (/ignore that last/.test(message)) return "IGNORE";
@@ -175,7 +187,10 @@ function extractAdhdAction(prompt: string): string {
 
 function buildAdhdBenchToon(prompt: string): string {
   const action = extractAdhdAction(prompt);
-  const text = action === "REPLY" ? "Replying directly with the requested information." : `Selected ${action}`;
+  const text =
+    action === "REPLY"
+      ? "Replying directly with the requested information."
+      : `Selected ${action}`;
   if (["REPLY", "IGNORE", "NONE"].includes(action)) {
     return buildToonResponse(prompt, {
       thought: `Selecting ${action} for this ADHDBench turn.`,
@@ -272,28 +287,48 @@ function buildCompletion(prompt: string): string {
     });
   }
 
-  if (/Benchmark:\*{0,2}\s*(rlm-bench|rlm_bench)/i.test(prompt) || /RLM benchmark task/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*(rlm-bench|rlm_bench)/i.test(prompt) ||
+    /RLM benchmark task/i.test(prompt)
+  ) {
     return buildReplyToon(extractRlmAnswer(prompt) ?? "UNKNOWN");
   }
 
-  if (/Benchmark:\*{0,2}\s*gaia/i.test(prompt) || /GAIA benchmark task|FINAL ANSWER/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*gaia/i.test(prompt) ||
+    /GAIA benchmark task|FINAL ANSWER/i.test(prompt)
+  ) {
     const answer = extractArithmeticAnswer(prompt) ?? "mock-answer";
     return buildReplyToon(`FINAL ANSWER: ${answer}`);
   }
 
-  if (/Benchmark:\*{0,2}\s*(hyperliquid_bench|hyperliquid-bench|hyperliquidbench)/i.test(prompt) || /Hyperliquid DEX|HyperliquidBench/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*(hyperliquid_bench|hyperliquid-bench|hyperliquidbench)/i.test(
+      prompt,
+    ) ||
+    /Hyperliquid DEX|HyperliquidBench/i.test(prompt)
+  ) {
     return buildHyperliquidPlanToon();
   }
 
-  if (/Benchmark:\*{0,2}\s*(vending-bench|vending_bench)/i.test(prompt) || /Vending-Bench|vending machine business/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*(vending-bench|vending_bench)/i.test(prompt) ||
+    /Vending-Bench|vending machine business/i.test(prompt)
+  ) {
     return buildVendingActionToon(prompt);
   }
 
-  if (/Benchmark:\*{0,2}\s*clawbench/i.test(prompt) || /ClawBench|Review my inbox/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*clawbench/i.test(prompt) ||
+    /ClawBench|Review my inbox/i.test(prompt)
+  ) {
     return buildClawBenchReplyToon();
   }
 
-  if (/Benchmark:\*{0,2}\s*adhdbench/i.test(prompt) || /ADHDBench/i.test(prompt)) {
+  if (
+    /Benchmark:\*{0,2}\s*adhdbench/i.test(prompt) ||
+    /ADHDBench/i.test(prompt)
+  ) {
     return buildAdhdBenchToon(prompt);
   }
 
diff --git a/packages/app-core/src/benchmark/plugin.ts b/packages/app-core/src/benchmark/plugin.ts
@@ -151,10 +151,9 @@ function formatContextAsText(ctx: BenchmarkContext): string {
     "hyperliquid-bench",
     "hyperliquidbench",
   ]).has(benchmark);
-  const isJsonActionBenchmark = new Set([
-    "vending-bench",
-    "vending_bench",
-  ]).has(benchmark);
+  const isJsonActionBenchmark = new Set(["vending-bench", "vending_bench"]).has(
+    benchmark,
+  );
   const isAdhdBenchmark = benchmark === "adhdbench";
 
   sections.push(`# Benchmark Task`);
diff --git a/packages/app-core/src/benchmark/server-utils.ts b/packages/app-core/src/benchmark/server-utils.ts
@@ -2,8 +2,8 @@ import {
   type AgentRuntime,
   ChannelType,
   elizaLogger,
-  parseToonKeyValue,
   type Plugin,
+  parseToonKeyValue,
   stringToUuid,
   type UUID,
 } from "@elizaos/core";