Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 35 additions & 7 deletions aider/website/_data/polyglot_leaderboard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
versions: 0.71.2.dev
seconds_per_case: 21.4
total_cost: 14.4063

- dirname: 2024-12-30-20-57-12--gpt-4o-2024-11-20-ex-as-sys
test_cases: 225
model: gpt-4o-2024-11-20
Expand All @@ -101,7 +101,7 @@
versions: 0.70.1.dev
seconds_per_case: 12.1
total_cost: 6.7351

- dirname: 2024-12-30-20-44-54--gpt4o-ex-as-sys-clean-prompt
test_cases: 225
model: gpt-4o-2024-08-06
Expand All @@ -127,7 +127,7 @@
versions: 0.70.1.dev
seconds_per_case: 16.0
total_cost: 7.0286

- dirname: 2024-12-21-19-23-03--polyglot-o1-hard-diff
test_cases: 224
model: o1-2024-12-17 (high)
Expand Down Expand Up @@ -283,7 +283,7 @@
versions: 0.69.2.dev
seconds_per_case: 45.5
total_cost: 0.0000

- dirname: 2024-12-22-20-08-13--gemini-2.0-flash-exp-polyglot-whole
test_cases: 225
model: gemini-2.0-flash-exp
Expand All @@ -309,7 +309,7 @@
versions: 0.69.2.dev
seconds_per_case: 12.2
total_cost: 0.0000

- dirname: 2024-12-23-01-11-56--yi-test
test_cases: 225
model: yi-lightning
Expand Down Expand Up @@ -988,7 +988,7 @@
versions: 0.81.3.dev
seconds_per_case: 79.1
total_cost: 0.7346

- dirname: 2025-04-10-19-02-44--oalpha-diff-exsys
test_cases: 225
model: Optimus Alpha
Expand All @@ -1015,6 +1015,34 @@
seconds_per_case: 18.4
total_cost: 0.0000

- dirname: 2025-04-14-05-17-19--gemini-2.5-pro-preview-architect-sonnet-3.7-editor-32k-thinking
test_cases: 225
model: Gemini 2.5 Pro Preview + claude-3-7-sonnet-20250219 (32k thinking tokens)
edit_format: architect
commit_hash: 0c383df
editor_model: anthropic/claude-3-7-sonnet-20250219
editor_edit_format: editor-diff
pass_rate_1: 39.6
pass_rate_2: 75.1
pass_num_1: 89
pass_num_2: 169
percent_cases_well_formed: 100.0
error_outputs: 7
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 124
lazy_comments: 12
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 1
total_tests: 225
command: aider --model gemini/gemini-2.5-pro-preview-03-25 --editor-model anthropic/claude-3-7-sonnet-20250219 --thinking-tokens 32k --architect
date: 2025-04-14
versions: 0.81.4.dev
seconds_per_case: 96.7
total_cost: 30.1766

- dirname: 2025-04-14-21-05-54--gpt41-diff-exuser
test_cases: 225
model: gpt-4.1
Expand Down Expand Up @@ -1143,4 +1171,4 @@
date: 2025-04-16
versions: 0.82.1.dev
seconds_per_case: 176.5
total_cost: 19.6399
total_cost: 19.6399