diff --git a/aider/website/_data/polyglot_leaderboard.yml b/aider/website/_data/polyglot_leaderboard.yml index 66e1c84ddb7..ff63555f8b2 100644 --- a/aider/website/_data/polyglot_leaderboard.yml +++ b/aider/website/_data/polyglot_leaderboard.yml @@ -75,7 +75,7 @@ versions: 0.71.2.dev seconds_per_case: 21.4 total_cost: 14.4063 - + - dirname: 2024-12-30-20-57-12--gpt-4o-2024-11-20-ex-as-sys test_cases: 225 model: gpt-4o-2024-11-20 @@ -101,7 +101,7 @@ versions: 0.70.1.dev seconds_per_case: 12.1 total_cost: 6.7351 - + - dirname: 2024-12-30-20-44-54--gpt4o-ex-as-sys-clean-prompt test_cases: 225 model: gpt-4o-2024-08-06 @@ -127,7 +127,7 @@ versions: 0.70.1.dev seconds_per_case: 16.0 total_cost: 7.0286 - + - dirname: 2024-12-21-19-23-03--polyglot-o1-hard-diff test_cases: 224 model: o1-2024-12-17 (high) @@ -283,7 +283,7 @@ versions: 0.69.2.dev seconds_per_case: 45.5 total_cost: 0.0000 - + - dirname: 2024-12-22-20-08-13--gemini-2.0-flash-exp-polyglot-whole test_cases: 225 model: gemini-2.0-flash-exp @@ -309,7 +309,7 @@ versions: 0.69.2.dev seconds_per_case: 12.2 total_cost: 0.0000 - + - dirname: 2024-12-23-01-11-56--yi-test test_cases: 225 model: yi-lightning @@ -988,7 +988,7 @@ versions: 0.81.3.dev seconds_per_case: 79.1 total_cost: 0.7346 - + - dirname: 2025-04-10-19-02-44--oalpha-diff-exsys test_cases: 225 model: Optimus Alpha @@ -1015,6 +1015,34 @@ seconds_per_case: 18.4 total_cost: 0.0000 +- dirname: 2025-04-14-05-17-19--gemini-2.5-pro-preview-architect-sonnet-3.7-editor-32k-thinking + test_cases: 225 + model: Gemini 2.5 Pro Preview + claude-3-7-sonnet-20250219 (32k thinking tokens) + edit_format: architect + commit_hash: 0c383df + editor_model: anthropic/claude-3-7-sonnet-20250219 + editor_edit_format: editor-diff + pass_rate_1: 39.6 + pass_rate_2: 75.1 + pass_num_1: 89 + pass_num_2: 169 + percent_cases_well_formed: 100.0 + error_outputs: 7 + num_malformed_responses: 0 + num_with_malformed_responses: 0 + user_asks: 124 + lazy_comments: 12 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 1 + total_tests: 225 + command: aider --model gemini/gemini-2.5-pro-preview-03-25 --editor-model anthropic/claude-3-7-sonnet-20250219 --thinking-tokens 32k --architect + date: 2025-04-14 + versions: 0.81.4.dev + seconds_per_case: 96.7 + total_cost: 30.1766 + - dirname: 2025-04-14-21-05-54--gpt41-diff-exuser test_cases: 225 model: gpt-4.1 @@ -1143,4 +1171,4 @@ date: 2025-04-16 versions: 0.82.1.dev seconds_per_case: 176.5 - total_cost: 19.6399 \ No newline at end of file + total_cost: 19.6399