-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path04_eval_1.4B_minipile_density_table.txt
143 lines (143 loc) · 18.4 KB
/
04_eval_1.4B_minipile_density_table.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
| Tasks |Version|Filter|n-shot| Metric | | Value | | Stderr |
|------------------------------------------------------------|------:|------|-----:|----------|---|-----------:|---|----------:|
|arc_challenge | 1|none | 0|acc |↑ | 0.1852|± | 0.0114|
| | |none | 0|acc_norm |↑ | 0.2287|± | 0.0123|
|arc_easy | 1|none | 0|acc |↑ | 0.2698|± | 0.0091|
| | |none | 0|acc_norm |↑ | 0.2660|± | 0.0091|
|blimp | 2|none | |acc |↑ | 0.5422|± | 0.0017|
| - blimp_adjunct_island | 1|none | 0|acc |↑ | 0.6420|± | 0.0152|
| - blimp_anaphor_gender_agreement | 1|none | 0|acc |↑ | 0.1920|± | 0.0125|
| - blimp_anaphor_number_agreement | 1|none | 0|acc |↑ | 0.3580|± | 0.0152|
| - blimp_animate_subject_passive | 1|none | 0|acc |↑ | 0.6420|± | 0.0152|
| - blimp_animate_subject_trans | 1|none | 0|acc |↑ | 0.7850|± | 0.0130|
| - blimp_causative | 1|none | 0|acc |↑ | 0.5060|± | 0.0158|
| - blimp_complex_NP_island | 1|none | 0|acc |↑ | 0.4650|± | 0.0158|
| - blimp_coordinate_structure_constraint_complex_left_branch| 1|none | 0|acc |↑ | 0.1240|± | 0.0104|
| - blimp_coordinate_structure_constraint_object_extraction | 1|none | 0|acc |↑ | 0.5090|± | 0.0158|
| - blimp_determiner_noun_agreement_1 | 1|none | 0|acc |↑ | 0.5160|± | 0.0158|
| - blimp_determiner_noun_agreement_2 | 1|none | 0|acc |↑ | 0.4930|± | 0.0158|
| - blimp_determiner_noun_agreement_irregular_1 | 1|none | 0|acc |↑ | 0.4930|± | 0.0158|
| - blimp_determiner_noun_agreement_irregular_2 | 1|none | 0|acc |↑ | 0.5600|± | 0.0157|
| - blimp_determiner_noun_agreement_with_adj_2 | 1|none | 0|acc |↑ | 0.4840|± | 0.0158|
| - blimp_determiner_noun_agreement_with_adj_irregular_1 | 1|none | 0|acc |↑ | 0.4360|± | 0.0157|
| - blimp_determiner_noun_agreement_with_adj_irregular_2 | 1|none | 0|acc |↑ | 0.5290|± | 0.0158|
| - blimp_determiner_noun_agreement_with_adjective_1 | 1|none | 0|acc |↑ | 0.4930|± | 0.0158|
| - blimp_distractor_agreement_relational_noun | 1|none | 0|acc |↑ | 0.3050|± | 0.0146|
| - blimp_distractor_agreement_relative_clause | 1|none | 0|acc |↑ | 0.2850|± | 0.0143|
| - blimp_drop_argument | 1|none | 0|acc |↑ | 0.7070|± | 0.0144|
| - blimp_ellipsis_n_bar_1 | 1|none | 0|acc |↑ | 0.1900|± | 0.0124|
| - blimp_ellipsis_n_bar_2 | 1|none | 0|acc |↑ | 0.3560|± | 0.0151|
| - blimp_existential_there_object_raising | 1|none | 0|acc |↑ | 0.8150|± | 0.0123|
| - blimp_existential_there_quantifiers_1 | 1|none | 0|acc |↑ | 0.8150|± | 0.0123|
| - blimp_existential_there_quantifiers_2 | 1|none | 0|acc |↑ | 0.1330|± | 0.0107|
| - blimp_existential_there_subject_raising | 1|none | 0|acc |↑ | 0.5000|± | 0.0158|
| - blimp_expletive_it_object_raising | 1|none | 0|acc |↑ | 0.7030|± | 0.0145|
| - blimp_inchoative | 1|none | 0|acc |↑ | 0.4020|± | 0.0155|
| - blimp_intransitive | 1|none | 0|acc |↑ | 0.5670|± | 0.0157|
| - blimp_irregular_past_participle_adjectives | 1|none | 0|acc |↑ | 0.7250|± | 0.0141|
| - blimp_irregular_past_participle_verbs | 1|none | 0|acc |↑ | 0.7430|± | 0.0138|
| - blimp_irregular_plural_subject_verb_agreement_1 | 1|none | 0|acc |↑ | 0.5940|± | 0.0155|
| - blimp_irregular_plural_subject_verb_agreement_2 | 1|none | 0|acc |↑ | 0.5480|± | 0.0157|
| - blimp_left_branch_island_echo_question | 1|none | 0|acc |↑ | 0.7680|± | 0.0134|
| - blimp_left_branch_island_simple_question | 1|none | 0|acc |↑ | 0.1660|± | 0.0118|
| - blimp_matrix_question_npi_licensor_present | 1|none | 0|acc |↑ | 0.0050|± | 0.0022|
| - blimp_npi_present_1 | 1|none | 0|acc |↑ | 0.5210|± | 0.0158|
| - blimp_npi_present_2 | 1|none | 0|acc |↑ | 0.4720|± | 0.0158|
| - blimp_only_npi_licensor_present | 1|none | 0|acc |↑ | 0.2670|± | 0.0140|
| - blimp_only_npi_scope | 1|none | 0|acc |↑ | 0.2180|± | 0.0131|
| - blimp_passive_1 | 1|none | 0|acc |↑ | 0.7440|± | 0.0138|
| - blimp_passive_2 | 1|none | 0|acc |↑ | 0.6090|± | 0.0154|
| - blimp_principle_A_c_command | 1|none | 0|acc |↑ | 0.6530|± | 0.0151|
| - blimp_principle_A_case_1 | 1|none | 0|acc |↑ | 1.0000|± | 0.0000|
| - blimp_principle_A_case_2 | 1|none | 0|acc |↑ | 0.5410|± | 0.0158|
| - blimp_principle_A_domain_1 | 1|none | 0|acc |↑ | 0.9980|± | 0.0014|
| - blimp_principle_A_domain_2 | 1|none | 0|acc |↑ | 0.5880|± | 0.0156|
| - blimp_principle_A_domain_3 | 1|none | 0|acc |↑ | 0.5190|± | 0.0158|
| - blimp_principle_A_reconstruction | 1|none | 0|acc |↑ | 0.3860|± | 0.0154|
| - blimp_regular_plural_subject_verb_agreement_1 | 1|none | 0|acc |↑ | 0.6800|± | 0.0148|
| - blimp_regular_plural_subject_verb_agreement_2 | 1|none | 0|acc |↑ | 0.5800|± | 0.0156|
| - blimp_sentential_negation_npi_licensor_present | 1|none | 0|acc |↑ | 1.0000|± | 0.0000|
| - blimp_sentential_negation_npi_scope | 1|none | 0|acc |↑ | 0.6460|± | 0.0151|
| - blimp_sentential_subject_island | 1|none | 0|acc |↑ | 0.6100|± | 0.0154|
| - blimp_superlative_quantifiers_1 | 1|none | 0|acc |↑ | 0.5440|± | 0.0158|
| - blimp_superlative_quantifiers_2 | 1|none | 0|acc |↑ | 0.2560|± | 0.0138|
| - blimp_tough_vs_raising_1 | 1|none | 0|acc |↑ | 0.3170|± | 0.0147|
| - blimp_tough_vs_raising_2 | 1|none | 0|acc |↑ | 0.7070|± | 0.0144|
| - blimp_transitive | 1|none | 0|acc |↑ | 0.6760|± | 0.0148|
| - blimp_wh_island | 1|none | 0|acc |↑ | 0.6910|± | 0.0146|
| - blimp_wh_questions_object_gap | 1|none | 0|acc |↑ | 0.6260|± | 0.0153|
| - blimp_wh_questions_subject_gap | 1|none | 0|acc |↑ | 0.9710|± | 0.0053|
| - blimp_wh_questions_subject_gap_long_distance | 1|none | 0|acc |↑ | 0.9530|± | 0.0067|
| - blimp_wh_vs_that_no_gap | 1|none | 0|acc |↑ | 0.9990|± | 0.0010|
| - blimp_wh_vs_that_no_gap_long_distance | 1|none | 0|acc |↑ | 0.9980|± | 0.0014|
| - blimp_wh_vs_that_with_gap | 1|none | 0|acc |↑ | 0.0010|± | 0.0010|
| - blimp_wh_vs_that_with_gap_long_distance | 1|none | 0|acc |↑ | 0.0010|± | 0.0010|
|hellaswag | 1|none | 0|acc |↑ | 0.2589|± | 0.0044|
| | |none | 0|acc_norm |↑ | 0.2551|± | 0.0044|
|lambada_openai | 1|none | 0|acc |↑ | 0.0000|± | 0.0000|
| | |none | 0|perplexity|↓ |1420846.8323|± |106563.1327|
|lambada_standard | 1|none | 0|acc |↑ | 0.0000|± | 0.0000|
| | |none | 0|perplexity|↓ |7916035.3527|± |664805.9178|
|mmlu | 2|none | |acc |↑ | 0.2295|± | 0.0035|
| - humanities | 2|none | |acc |↑ | 0.2421|± | 0.0062|
| - formal_logic | 1|none | 0|acc |↑ | 0.2857|± | 0.0404|
| - high_school_european_history | 1|none | 0|acc |↑ | 0.2182|± | 0.0323|
| - high_school_us_history | 1|none | 0|acc |↑ | 0.2500|± | 0.0304|
| - high_school_world_history | 1|none | 0|acc |↑ | 0.2700|± | 0.0289|
| - international_law | 1|none | 0|acc |↑ | 0.2397|± | 0.0390|
| - jurisprudence | 1|none | 0|acc |↑ | 0.2593|± | 0.0424|
| - logical_fallacies | 1|none | 0|acc |↑ | 0.2209|± | 0.0326|
| - moral_disputes | 1|none | 0|acc |↑ | 0.2486|± | 0.0233|
| - moral_scenarios | 1|none | 0|acc |↑ | 0.2380|± | 0.0142|
| - philosophy | 1|none | 0|acc |↑ | 0.1865|± | 0.0221|
| - prehistory | 1|none | 0|acc |↑ | 0.2160|± | 0.0229|
| - professional_law | 1|none | 0|acc |↑ | 0.2458|± | 0.0110|
| - world_religions | 1|none | 0|acc |↑ | 0.3216|± | 0.0358|
| - other | 2|none | |acc |↑ | 0.2398|± | 0.0076|
| - business_ethics | 1|none | 0|acc |↑ | 0.3000|± | 0.0461|
| - clinical_knowledge | 1|none | 0|acc |↑ | 0.2151|± | 0.0253|
| - college_medicine | 1|none | 0|acc |↑ | 0.2081|± | 0.0310|
| - global_facts | 1|none | 0|acc |↑ | 0.1800|± | 0.0386|
| - human_aging | 1|none | 0|acc |↑ | 0.3139|± | 0.0311|
| - management | 1|none | 0|acc |↑ | 0.1748|± | 0.0376|
| - marketing | 1|none | 0|acc |↑ | 0.2906|± | 0.0297|
| - medical_genetics | 1|none | 0|acc |↑ | 0.3000|± | 0.0461|
| - miscellaneous | 1|none | 0|acc |↑ | 0.2375|± | 0.0152|
| - nutrition | 1|none | 0|acc |↑ | 0.2255|± | 0.0239|
| - professional_accounting | 1|none | 0|acc |↑ | 0.2340|± | 0.0253|
| - professional_medicine | 1|none | 0|acc |↑ | 0.1838|± | 0.0235|
| - virology | 1|none | 0|acc |↑ | 0.2831|± | 0.0351|
| - social sciences | 2|none | |acc |↑ | 0.2171|± | 0.0074|
| - econometrics | 1|none | 0|acc |↑ | 0.2368|± | 0.0400|
| - high_school_geography | 1|none | 0|acc |↑ | 0.1768|± | 0.0272|
| - high_school_government_and_politics | 1|none | 0|acc |↑ | 0.1969|± | 0.0287|
| - high_school_macroeconomics | 1|none | 0|acc |↑ | 0.2026|± | 0.0204|
| - high_school_microeconomics | 1|none | 0|acc |↑ | 0.2101|± | 0.0265|
| - high_school_psychology | 1|none | 0|acc |↑ | 0.1927|± | 0.0169|
| - human_sexuality | 1|none | 0|acc |↑ | 0.2595|± | 0.0384|
| - professional_psychology | 1|none | 0|acc |↑ | 0.2500|± | 0.0175|
| - public_relations | 1|none | 0|acc |↑ | 0.2182|± | 0.0396|
| - security_studies | 1|none | 0|acc |↑ | 0.1878|± | 0.0250|
| - sociology | 1|none | 0|acc |↑ | 0.2438|± | 0.0304|
| - us_foreign_policy | 1|none | 0|acc |↑ | 0.2800|± | 0.0451|
| - stem | 2|none | |acc |↑ | 0.2125|± | 0.0073|
| - abstract_algebra | 1|none | 0|acc |↑ | 0.2200|± | 0.0416|
| - anatomy | 1|none | 0|acc |↑ | 0.1852|± | 0.0336|
| - astronomy | 1|none | 0|acc |↑ | 0.1776|± | 0.0311|
| - college_biology | 1|none | 0|acc |↑ | 0.2569|± | 0.0365|
| - college_chemistry | 1|none | 0|acc |↑ | 0.2000|± | 0.0402|
| - college_computer_science | 1|none | 0|acc |↑ | 0.2600|± | 0.0441|
| - college_mathematics | 1|none | 0|acc |↑ | 0.2100|± | 0.0409|
| - college_physics | 1|none | 0|acc |↑ | 0.2157|± | 0.0409|
| - computer_security | 1|none | 0|acc |↑ | 0.2800|± | 0.0451|
| - conceptual_physics | 1|none | 0|acc |↑ | 0.2638|± | 0.0288|
| - electrical_engineering | 1|none | 0|acc |↑ | 0.2414|± | 0.0357|
| - elementary_mathematics | 1|none | 0|acc |↑ | 0.2090|± | 0.0209|
| - high_school_biology | 1|none | 0|acc |↑ | 0.1774|± | 0.0217|
| - high_school_chemistry | 1|none | 0|acc |↑ | 0.1527|± | 0.0253|
| - high_school_computer_science | 1|none | 0|acc |↑ | 0.2500|± | 0.0435|
| - high_school_mathematics | 1|none | 0|acc |↑ | 0.2111|± | 0.0249|
| - high_school_physics | 1|none | 0|acc |↑ | 0.1987|± | 0.0326|
| - high_school_statistics | 1|none | 0|acc |↑ | 0.1528|± | 0.0245|
| - machine_learning | 1|none | 0|acc |↑ | 0.3125|± | 0.0440|
|winogrande | 1|none | 0|acc |↑ | 0.5043|± | 0.0141|