Skip to content

Commit a533e19

Browse files
committed
update
1 parent f1c4abc commit a533e19

1 file changed

Lines changed: 88 additions & 81 deletions

File tree

autotest/all/chat_obj_fullbench_v2.py

Lines changed: 88 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -12,155 +12,162 @@
1212
from opencompass.configs.chatml_datasets.C_MHChem.C_MHChem_gen import \
1313
datasets as C_MHChem_chatml # noqa: F401, E501
1414
from opencompass.configs.chatml_datasets.CPsyExam.CPsyExam_gen import \
15-
datasets as CPsyExam_chatml # noqa: F401
15+
datasets as CPsyExam_chatml # noqa: F401, E501
1616
from opencompass.configs.chatml_datasets.CS_Bench.CS_Bench_gen import \
17-
datasets as CS_Bench_chatml # noqa: F401
17+
datasets as CS_Bench_chatml # noqa: F401, E501
1818
from opencompass.configs.chatml_datasets.HMMT2025.HMMT2025_repeat32_gen import \
19-
datasets as HMMT2025_chatml # noqa: E501, F401
19+
datasets as HMMT2025_chatml # noqa: F401, E501
2020
from opencompass.configs.chatml_datasets.IMO_Bench_AnswerBench.IMO_Bench_AnswerBench_gen import \
21-
datasets as IMO_Bench_AnswerBench_chatml # noqa: E501, F401
21+
datasets as IMO_Bench_AnswerBench_chatml # noqa: F401, E501
22+
# ChatML Datasets
2223
from opencompass.configs.chatml_datasets.MaScQA.MaScQA_gen import \
23-
datasets as MaScQA_chatml # noqa: F401
24+
datasets as MaScQA_chatml # noqa: F401, E501
2425
from opencompass.configs.chatml_datasets.UGD_hard.UGD_hard_repeat8_gen import \
25-
datasets as UGD_hard_chatml # noqa: E501, F401
26+
datasets as UGD_hard_chatml # noqa: F401, E501
2627
from opencompass.configs.chatml_datasets.UGPhysics.UGPhysics_gen import \
27-
datasets as UGPhysics_chatml # noqa: F401
28-
# Math
28+
datasets as UGPhysics_chatml # noqa: F401, E501
29+
# Math Calculation
2930
from opencompass.configs.datasets.aime2024.aime2024_cascade_eval_rawprompt_gen_2f2c96 import \
30-
aime2024_datasets # noqa: E501
31+
aime2024_datasets # noqa: F401, E501
3132
from opencompass.configs.datasets.aime2025.aime2025_cascade_eval_rawprompt_gen_2f2c96 import \
32-
aime2025_datasets # noqa: E501
33-
# CompassAcademic (same modules as chat_objective)
34-
from opencompass.configs.datasets.aime2025.aime2025_llmjudge_academic import \
33+
aime2025_datasets # noqa: F401, E501
34+
# CompassAcademic Extended
35+
from opencompass.configs.datasets.aime2025.aime2025_llmjudge_academic_rawprompt import \
3536
aime2025_datasets as \
36-
CompassAcademic_aime2025_datasets # noqa: E501, F401
37+
CompassAcademic_aime2025_datasets # noqa: F401, E501
3738
from opencompass.configs.datasets.aime2026.aime2026_cascade_eval_rawprompt_gen_0970dd import \
38-
aime2026_datasets # noqa: E501
39+
aime2026_datasets # noqa: F401, E501
3940
from opencompass.configs.datasets.atlas.atlas_val_rawprompt_gen_277bee import \
40-
atlas_datasets # noqa: E501, F401
41-
# General reasoning
41+
atlas_datasets # noqa: F401, E501
42+
# General Reasoning
4243
from opencompass.configs.datasets.bbeh.bbeh_llmjudge_rawprompt_gen_36b5f4 import \
43-
bbeh_datasets # noqa: E501, F401
44+
bbeh_datasets # noqa: F401, E501
4445
from opencompass.configs.datasets.bigcodebench.bigcodebench_hard_complete_rawprompt_gen_95140b import \
45-
bigcodebench_hard_complete_datasets # noqa: E501, F401
46+
bigcodebench_hard_complete_datasets # noqa: F401, E501
4647
from opencompass.configs.datasets.bigcodebench.bigcodebench_hard_instruct_rawprompt_gen_5cbb9f import \
47-
bigcodebench_hard_instruct_datasets # noqa: E501, F401
48+
bigcodebench_hard_instruct_datasets # noqa: F401, E501
4849
from opencompass.configs.datasets.biodata.biodata_task_rawprompt_gen import \
49-
biodata_task_datasets # noqa: E501, F401
50+
biodata_task_datasets # noqa: F401, E501
5051
from opencompass.configs.datasets.CARDBiomedBench.CARDBiomedBench_llmjudge_rawprompt_gen_b4d90c import \
51-
cardbiomedbench_datasets # noqa: E501, F401
52+
cardbiomedbench_datasets # noqa: F401, E501
5253
from opencompass.configs.datasets.chem_exam.competition_rawprompt_gen import \
53-
chem_competition_instruct_datasets # noqa: E501, F401
54+
chem_competition_instruct_datasets # noqa: F401, E501
5455
from opencompass.configs.datasets.chem_exam.gaokao_rawprompt_gen import \
55-
chem_gaokao_instruct_datasets # noqa: F401
56+
chem_gaokao_instruct_datasets # noqa: F401, E501
5657
from opencompass.configs.datasets.ChemBench.ChemBench_llmjudge_rawprompt_gen_fa3fc4 import \
57-
chembench_datasets # noqa: E501, F401
58+
chembench_datasets # noqa: F401, E501
5859
from opencompass.configs.datasets.ClimaQA.ClimaQA_Gold_llm_judge_rawprompt_gen_b3080f import \
59-
climaqa_datasets # noqa: E501, F401
60+
climaqa_datasets # noqa: F401, E501
6061
from opencompass.configs.datasets.cmmlu.cmmlu_llmjudge_rawprompt_gen_9f9c31 import \
61-
cmmlu_datasets # noqa: E501, F401
62+
cmmlu_datasets # noqa: F401, E501
6263
from opencompass.configs.datasets.CMPhysBench.cmphysbench_rawprompt_gen import \
63-
cmphysbench_datasets # noqa: E501, F401
64+
cmphysbench_datasets # noqa: F401, E501
6465
from opencompass.configs.datasets.Earth_Silver.Earth_Silver_llmjudge_rawprompt_gen_a84bc6 import \
65-
earth_silver_mcq_datasets # noqa: E501, F401
66-
from opencompass.configs.datasets.gpqa.gpqa_cascade_eval_academic import \
67-
gpqa_datasets as CompassAcademic_gpqa_datasets # noqa: F401
66+
earth_silver_mcq_datasets # noqa: F401, E501
67+
from opencompass.configs.datasets.gpqa.gpqa_cascade_eval_academic_rawprompt import \
68+
gpqa_datasets as CompassAcademic_gpqa_datasets # noqa: F401, E501
6869
from opencompass.configs.datasets.gpqa.gpqa_cascade_eval_rawprompt_gen_706039 import \
69-
gpqa_datasets # noqa: E501, F401
70-
from opencompass.configs.datasets.HLE.hle_llmverify_academic import \
71-
hle_datasets as CompassAcademic_hle_datasets # noqa: F401
70+
gpqa_datasets # noqa: F401, E501
71+
from opencompass.configs.datasets.HLE.hle_llmverify_academic_rawprompt import \
72+
hle_datasets as CompassAcademic_hle_datasets # noqa: F401, E501
7273
from opencompass.configs.datasets.HLE.hle_llmverify_rawprompt_gen_0970dd import \
73-
hle_datasets # noqa: E501, F401
74+
hle_datasets # noqa: F401, E501
7475
from opencompass.configs.datasets.hmmt2026.hmmt2026_cascade_eval_rawprompt_gen_0970dd import \
75-
hmmt2026_datasets # noqa: E501
76+
hmmt2026_datasets # noqa: F401, E501
7677
# Coding
7778
from opencompass.configs.datasets.humaneval.humaneval_openai_sample_evals_rawprompt_gen_6ce2ca import \
78-
humaneval_datasets # noqa: E501, F401
79+
humaneval_datasets # noqa: F401, E501
7980
from opencompass.configs.datasets.IFBench.IFBench_rawprompt_gen import \
80-
ifbench_datasets # noqa: F401
81-
# Instruct following
81+
ifbench_datasets # noqa: F401, E501
8282
from opencompass.configs.datasets.IFEval.IFEval_rawprompt_gen_e7f781 import \
83-
ifeval_datasets # noqa: E501, F401
83+
ifeval_datasets # noqa: F401, E501
8484
from opencompass.configs.datasets.kcle.kcle_llm_judge_rawprompt_gen_16e383 import \
85-
kcle_datasets as kcle_fix_datasets # noqa: E501, F401
85+
kcle_datasets as kcle_fix_datasets # noqa: F401, E501
8686
from opencompass.configs.datasets.korbench.korbench_single_0shot_cascade_eval_rawprompt_gen_c048da import \
87-
korbench_0shot_single_datasets # noqa: E501
87+
korbench_0shot_single_datasets # noqa: F401, E501
8888
from opencompass.configs.datasets.livecodebench.livecodebench_rawprompt_gen_c09673 import \
89-
LCBCodeGeneration_dataset # noqa: E501
90-
from opencompass.configs.datasets.livecodebench.livecodebench_v6_academic import \
89+
LCBCodeGeneration_dataset # noqa: F401, E501
90+
from opencompass.configs.datasets.livecodebench.livecodebench_v6_academic_rawprompt import \
9191
LCBCodeGeneration_dataset as \
92-
CompassAcademic_LCBCodeGeneration_dataset # noqa: E501, F401
92+
CompassAcademic_LCBCodeGeneration_dataset # noqa: F401, E501
9393
from opencompass.configs.datasets.livecodebench_pro.livecodebench_pro_rawprompt_gen import \
94-
lcb_pro_datasets # noqa: E501, F401
94+
lcb_pro_datasets # noqa: F401, E501
9595
from opencompass.configs.datasets.livemathbench.livemathbench_hard_custom_cascade_eval_rawprompt_gen_e1ce64 import \
96-
livemathbench_datasets # noqa: E501
96+
livemathbench_datasets # noqa: F401, E501
9797
from opencompass.configs.datasets.matbench.matbench_llm_judge_rawprompt_gen_c987b6 import \
98-
matbench_datasets # noqa: E501, F401
98+
matbench_datasets # noqa: F401, E501
9999
from opencompass.configs.datasets.math.math_500_cascade_eval_rawprompt_gen_0970dd import \
100-
math_datasets # noqa: E501
100+
math_datasets # noqa: F401, E501
101101
from opencompass.configs.datasets.mbpp.sanitized_mbpp_mdblock_0shot_nocot_rawprompt_gen_30c1e5 import \
102-
sanitized_mbpp_datasets # noqa: E501, F401
102+
sanitized_mbpp_datasets # noqa: F401, E501
103103
from opencompass.configs.datasets.medmcqa.medmcqa_llmjudge_rawprompt_gen_015178 import \
104-
medmcqa_datasets # noqa: E501, F401
104+
medmcqa_datasets # noqa: F401, E501
105105
from opencompass.configs.datasets.MedXpertQA.MedXpertQA_llmjudge_rawprompt_gen import \
106-
medxpertqa_datasets # noqa: E501, F401
106+
medxpertqa_datasets # noqa: F401, E501
107107
from opencompass.configs.datasets.mmlu.mmlu_llmjudge_rawprompt_gen_af67f0 import \
108-
mmlu_datasets # noqa: E501, F401
108+
mmlu_datasets # noqa: F401, E501
109109
# Knowledge
110110
from opencompass.configs.datasets.mmlu_pro.mmlu_pro_0shot_nocot_genericllmeval_rawprompt_gen_0321fb import \
111-
mmlu_pro_datasets # noqa: E501, F401
111+
mmlu_pro_datasets # noqa: F401, E501
112112
from opencompass.configs.datasets.molculariq.molculariq_rawprompt_gen import \
113-
moleculariq_datasets # noqa: E501, F401
113+
moleculariq_datasets # noqa: F401, E501
114114
from opencompass.configs.datasets.MolInstructions_chem.mol_instructions_chem_rawprompt_gen import \
115-
mol_gen_selfies_datasets # noqa: E501, F401
115+
mol_gen_selfies_datasets # noqa: F401, E501
116116
from opencompass.configs.datasets.OlymMATH.olymmath_llmverify_rawprompt_gen_9d3a8e import \
117-
olymmath_datasets # noqa: E501, F401
117+
olymmath_datasets # noqa: F401, E501
118118
from opencompass.configs.datasets.OlympiadBench.OlympiadBench_0shot_llmverify_rawprompt_gen_d3e9e4 import \
119-
olympiadbench_datasets # noqa: E501, F401
119+
olympiadbench_datasets # noqa: F401, E501
120120
from opencompass.configs.datasets.openswi.openswi_rawprompt_gen import \
121-
openswi_datasets # noqa: F401
121+
openswi_datasets # noqa: F401, E501
122122
from opencompass.configs.datasets.PHYBench.phybench_rawprompt_gen import \
123-
phybench_datasets # noqa: F401
123+
phybench_datasets # noqa: F401, E501
124124
from opencompass.configs.datasets.PHYSICS.PHYSICS_llm_judge_rawprompt_gen_56ebc8 import \
125-
physics_datasets # noqa: E501, F401
125+
physics_datasets # noqa: F401, E501
126126
from opencompass.configs.datasets.ProteinLMBench.ProteinLMBench_llmjudge_rawprompt_gen_9627a6 import \
127-
proteinlmbench_datasets # noqa: E501, F401
127+
proteinlmbench_datasets # noqa: F401, E501
128128
from opencompass.configs.datasets.R_Bench.rbench_llmjudge_rawprompt_gen_c24221 import \
129-
RBench_datasets # noqa: E501, F401
129+
RBench_datasets # noqa: F401, E501
130130
from opencompass.configs.datasets.SimpleQA.simpleqa_verified_rawprompt_gen import \
131-
simpleqa_verified_datasets # noqa: E501, F401
131+
simpleqa_verified_datasets # noqa: F401, E501
132132
# AI4S
133+
from opencompass.configs.datasets.SmolInstruct.smolinstruct_0shot_instruct_rawprompt_gen import \
134+
mini_smolinstruct_datasets_0shot_instruct as \
135+
mini_smolinstruct_datasets # noqa: F401, E501
133136
from opencompass.configs.datasets.SmolInstruct.smolinstruct_0shot_instruct_rawprompt_gen import \
134137
smolinstruct_datasets_0shot_instruct as \
135-
smolinstruct_datasets # noqa: E501, F401
138+
smolinstruct_datasets # noqa: F401, E501
136139
from opencompass.configs.datasets.srbench.srbench_rawprompt_gen import \
137-
srbench_datasets # noqa: F401
140+
srbench_datasets # noqa: F401, E501
138141
from opencompass.configs.datasets.supergpqa.supergpqa_cascade_rawprompt_gen_ca8345 import \
139-
supergpqa_datasets # noqa: E501, F401
140-
# Summary groups (aligned with chat_objective for eval / summarizer)
142+
supergpqa_datasets # noqa: F401, E501
143+
# Summary Groups
141144
from opencompass.configs.summarizers.groups.bbeh import \
142-
bbeh_summary_groups # noqa: F401
145+
bbeh_summary_groups # noqa: F401, E501
143146
from opencompass.configs.summarizers.groups.biodata import \
144-
biodata_summary_groups # noqa: F401
147+
biodata_summary_groups # noqa: F401, E501
145148
from opencompass.configs.summarizers.groups.cmmlu import \
146-
cmmlu_summary_groups # noqa: F401
149+
cmmlu_summary_groups # noqa: F401, E501
147150
from opencompass.configs.summarizers.groups.korbench import \
148-
korbench_summary_groups # noqa: F401
151+
korbench_summary_groups # noqa: F401, E501
152+
from opencompass.configs.summarizers.groups.matbench import \
153+
matbench_summary_groups # noqa: F401, E501
149154
from opencompass.configs.summarizers.groups.mmlu import \
150-
mmlu_summary_groups # noqa: F401
155+
mmlu_summary_groups # noqa: F401, E501
151156
from opencompass.configs.summarizers.groups.mmlu_pro import \
152-
mmlu_pro_summary_groups # noqa: F401
153-
from opencompass.configs.summarizers.groups.OlympiadBench import ( # noqa: E501, F401
154-
OlympiadBench_summary_groups, OlympiadBenchMath_summary_groups,
155-
OlympiadBenchPhysics_summary_groups)
157+
mmlu_pro_summary_groups # noqa: F401, E501
158+
from opencompass.configs.summarizers.groups.mol_instruct import \
159+
mol_instruct_summary_groups # noqa: F401, E501
160+
from opencompass.configs.summarizers.groups.OlympiadBench import \
161+
OlympiadBenchPhysics_summary_groups # noqa: F401, E501
162+
from opencompass.configs.summarizers.groups.OlympiadBench import ( # noqa: F401, E501
163+
OlympiadBench_summary_groups, OlympiadBenchMath_summary_groups)
156164
from opencompass.configs.summarizers.groups.PHYSICS import \
157-
physics_summary_groups # noqa: F401
165+
physics_summary_groups # noqa: F401, E501
166+
from opencompass.configs.summarizers.groups.smolinstruct import ( # noqa: F401, E501
167+
smolinstruct_mini_summary_groups, smolinstruct_summary_groups)
158168
from opencompass.configs.summarizers.groups.supergpqa import \
159-
supergpqa_summary_groups # noqa: F401
169+
supergpqa_summary_groups # noqa: F401, E501
160170

161-
# LiveCodeBench v5 / v6 (deepcopy so base LCB config is not mutated).
162-
# Avoid `import copy`: mmengine cfg.dump() serializes top-level names; a
163-
# `copy` module binding becomes invalid Python in the dumped config.
164171
LCBCodeGeneration_v6_datasets = __import__('copy').deepcopy(
165172
LCBCodeGeneration_dataset)
166173
LCBCodeGeneration_v6_datasets['abbr'] = 'lcb_code_generation_v6'

0 commit comments

Comments
 (0)