@@ -10,6 +10,15 @@ import { showProgressModal } from "./utils.ts";
1010const BENCHMARK_PAGE = "🧪 AI Benchmark" ;
1111const TEST_PAGE = `${ BENCHMARK_PAGE } /Test Page` ;
1212const TEST_TIMEOUT_MS = 30000 ; // 30 seconds per test
13+ const BENCHMARK_ALLOWED_TOOLS = [
14+ "read_note" ,
15+ "list_pages" ,
16+ "get_page_info" ,
17+ "navigate" ,
18+ "update_note" ,
19+ "search_replace" ,
20+ "create_note" ,
21+ ] ;
1322
1423function withTimeout < T > ( promise : Promise < T > , ms : number , operation : string ) : Promise < T > {
1524 return new Promise ( ( resolve , reject ) => {
@@ -366,7 +375,10 @@ async function runExecutionTest(
366375 await test . setup ( ) ;
367376 }
368377
369- const luaTools = await discoverTools ( ) ;
378+ const allLuaTools = await discoverTools ( ) ;
379+ const luaTools = new Map (
380+ [ ...allLuaTools ] . filter ( ( [ name ] ) => BENCHMARK_ALLOWED_TOOLS . includes ( name ) ) ,
381+ ) ;
370382 const tools = convertToOpenAITools ( luaTools ) ;
371383
372384 const result = await runAgenticChat ( {
@@ -548,9 +560,14 @@ function generateReport(tests: BenchmarkTest[], results: ModelResults[]): string
548560let cachedBenchmarkResults : string | null = null ;
549561
550562export async function runBenchmark ( ) : Promise < string > {
551- // Force config reload to pick up skipToolApproval changes
552563 await initializeOpenAI ( false ) ;
553564
565+ // Temporarily enable skipToolApproval for benchmark
566+ const originalSkipApproval = aiSettings ?. chat ?. skipToolApproval ?? false ;
567+ if ( aiSettings ?. chat ) {
568+ aiSettings . chat . skipToolApproval = true ;
569+ }
570+
554571 const models = await selectModelsForBenchmark ( ) ;
555572 if ( models . length === 0 ) {
556573 await editor . flashNotification ( "No models selected" , "error" ) ;
@@ -593,6 +610,10 @@ export async function runBenchmark(): Promise<string> {
593610
594611 return report ;
595612 } finally {
613+ // Restore original skipToolApproval setting
614+ if ( aiSettings ?. chat ) {
615+ aiSettings . chat . skipToolApproval = originalSkipApproval ;
616+ }
596617 await editor . hidePanel ( "modal" ) ;
597618 }
598619}
0 commit comments