Skip to content

Commit f125df4

Browse files
committed
Automatically skip approvals for benchmark
1 parent ad5f423 commit f125df4

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

src/benchmark.ts

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,15 @@ import { showProgressModal } from "./utils.ts";
1010
const BENCHMARK_PAGE = "🧪 AI Benchmark";
1111
const TEST_PAGE = `${BENCHMARK_PAGE}/Test Page`;
1212
const TEST_TIMEOUT_MS = 30000; // 30 seconds per test
13+
const BENCHMARK_ALLOWED_TOOLS = [
14+
"read_note",
15+
"list_pages",
16+
"get_page_info",
17+
"navigate",
18+
"update_note",
19+
"search_replace",
20+
"create_note",
21+
];
1322

1423
function withTimeout<T>(promise: Promise<T>, ms: number, operation: string): Promise<T> {
1524
return new Promise((resolve, reject) => {
@@ -366,7 +375,10 @@ async function runExecutionTest(
366375
await test.setup();
367376
}
368377

369-
const luaTools = await discoverTools();
378+
const allLuaTools = await discoverTools();
379+
const luaTools = new Map(
380+
[...allLuaTools].filter(([name]) => BENCHMARK_ALLOWED_TOOLS.includes(name)),
381+
);
370382
const tools = convertToOpenAITools(luaTools);
371383

372384
const result = await runAgenticChat({
@@ -548,9 +560,14 @@ function generateReport(tests: BenchmarkTest[], results: ModelResults[]): string
548560
let cachedBenchmarkResults: string | null = null;
549561

550562
export async function runBenchmark(): Promise<string> {
551-
// Force config reload to pick up skipToolApproval changes
552563
await initializeOpenAI(false);
553564

565+
// Temporarily enable skipToolApproval for benchmark
566+
const originalSkipApproval = aiSettings?.chat?.skipToolApproval ?? false;
567+
if (aiSettings?.chat) {
568+
aiSettings.chat.skipToolApproval = true;
569+
}
570+
554571
const models = await selectModelsForBenchmark();
555572
if (models.length === 0) {
556573
await editor.flashNotification("No models selected", "error");
@@ -593,6 +610,10 @@ export async function runBenchmark(): Promise<string> {
593610

594611
return report;
595612
} finally {
613+
// Restore original skipToolApproval setting
614+
if (aiSettings?.chat) {
615+
aiSettings.chat.skipToolApproval = originalSkipApproval;
616+
}
596617
await editor.hidePanel("modal");
597618
}
598619
}

0 commit comments

Comments
 (0)