1
1
,win_rate,standard_error,n_wins,n_wins_base,n_draws,n_total,discrete_win_rate,mode,avg_length,length_controlled_winrate,lc_standard_error
2
- Shopee-SlimMoA-v1,75.6142865980535 ,1.27062740591947 ,621,184,0,805,77.1428571428572 ,community,1994,77.4515432873834,0.430175221492396
3
- blendaxai-gm-l6-vo31,69.11033492869565,1.3280735654354865 ,562,242,1,805,69.87577639751554,community,1809,76.91981221023656,0.5725365663132986
2
+ Shopee-SlimMoA-v1,75.61428659805350 ,1.2706274059194700 ,621,184,0,805,77.14285714285720 ,community,1994,77.4515432873834,0.43017522149239600
3
+ blendaxai-gm-l6-vo31,69.11033492869565,1.3280735654354863 ,562,242,1,805,69.87577639751554,community,1809,76.91981221023656,0.5725365663132986
4
4
gemma-2-9b-it-WPO-HB,77.82503168985093,1.2355857177790277,640,163,2,805,79.62732919254658,community,2285,76.72506842726064,0.4242603928637889
5
5
blendaxai-gm-l3-v35,73.41035740244067,1.254951147343878,607,196,2,805,75.527950310559,community,2186,73.37270365010379,0.6163911450738288
6
6
gemma-2-9b-it-SimPO,65.86422561532919,1.423459922555078,540,264,1,805,67.14285714285714,community,1833,72.3508446939842,0.5167873784867067
7
- model_hf_model_args_pretrained=mlfoundations-dev__gemma-simpo-reproduction,67.35102937013792,1.4210070002869848,557,247,1,805,69.25465838509317,community,1950,71.18995900084634,0.5756949353655318
8
7
openpipe-moa-gpt-4-turbo-v1,63.15493451236265,1.422980098799326,515,283,7,805,64.40993788819875,community,1856,68.37866250336802,0.7309418614587613
9
8
gemma-2-9b-it-DPO,65.35922380122982,1.402802336467638,536,268,1,805,66.64596273291924,community,2016,67.6620382198043,0.6605613085864308
10
9
Together-MoA,59.8688062333292,1.434305604543079,490,314,1,805,60.93167701863354,community,1825,65.37996976852163,0.7392392836781445
@@ -23,7 +22,7 @@ gpt4_1106_preview_verbose,64.30360147101865,1.3348590089025316,525,268,12,805,65
23
22
gpt-4o-mini-2024-07-18,44.65413862507926,1.4572395578449813,350,451,4,805,43.72670807453416,minimal,1861,50.727144855901976,0.8284734951761676
24
23
Storm-7B,50.26886905528583,1.4728176780737183,397,408,0,805,49.31677018633541,community,2045,50.45110959343775,
25
24
gpt4_1106_preview,50.0,0.0,0,0,805,805,50.0,minimal,2049,50.0,
26
- REBEL-Llama-3-8B-Instruct-Armo,48.43655307668638,1.480341435123528,394,410,1,805,49.00621118012423 ,community,1965,49.31429353685712 ,0.7061879308002301
25
+ REBEL-Llama-3-8B-Instruct-Armo,48.43655307668638,1.480341435123528,394,410,1,805,49.006211180124225 ,community,1965,49.314293536857114 ,0.7061879308002301
27
26
Infinity-Instruct-7M-Gen-Llama3_1-70B,37.46327383827497,1.4734130373862548,299,501,5,805,37.453416149068325,community,1654,46.10043331712677,0.822439983375277
28
27
Llama-3-Instruct-8B-SimPO-ExPO,40.63285400856655,1.4439449942168028,325,479,1,805,40.43478260869565,community,1765,45.78021783946177,
29
28
Llama-3-Instruct-8B-SimPO,40.52977498461182,1.422574464675002,319,485,1,805,39.68944099378882,community,1825,44.65131348921881,0.8800655791760451
@@ -209,5 +208,4 @@ oasst-sft-pythia-12b,1.790114083180124,0.3985580883049341,13,790,2,805,1.7391304
209
208
guanaco-13b,3.469596859739131,0.5518606725700214,22,780,3,805,2.919254658385093,verified,1774,3.003787329611614,
210
209
guanaco-7b,2.880002266173913,0.5202924149314048,21,783,1,805,2.670807453416149,verified,1364,2.871116813131697,
211
210
Qwen1.5-1.8B-Chat,3.70555681579365,0.5811750995496215,27,774,3,804,3.544776119402985,verified,2673,2.588498849185137,
212
- baichuan-13b-chat,1.9921455615279504,0.4176985079331233,14,790,1,805,1.8012422360248446,community,1727,2.062170253598568,
213
- model_hf_model_args_pretrained=mlfoundations-dev__gemma-oh-preferences,0.005260368511326853,0.0018774672393365112,0,805,0,805,0.0,community,196,0.010252829751292214,0.0007495965900756891
211
+ baichuan-13b-chat,1.9921455615279504,0.4176985079331233,14,790,1,805,1.8012422360248446,community,1727,2.062170253598568,
0 commit comments