Skip to content

Commit bd90684

Browse files
committed
sync main
1 parent 94eee2b commit bd90684

File tree

251 files changed

+15582
-4679
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

251 files changed

+15582
-4679
lines changed

.github/scripts/action_tools.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,5 +265,32 @@ def generate_csv_from_profile_result(file_path: str, out_path: str):
265265
writer.writerows(data_csv)
266266

267267

268+
def generate_output_for_evaluation(result_dir: str):
269+
# find latest result
270+
latest_csv_file = find_csv_files(result_dir)
271+
df = pd.read_csv(latest_csv_file)
272+
transposed_df = df.T
273+
head_part = transposed_df.head(4)
274+
tail_part = transposed_df[4:]
275+
sorted_tail_part = tail_part.sort_index()
276+
transposed_df = pd.concat([head_part, sorted_tail_part])
277+
transposed_df.to_csv('transposed_output.csv', header=False, index=True)
278+
# output to github action summary
279+
add_summary('transposed_output.csv')
280+
281+
282+
def find_csv_files(directory):
283+
csv_files = []
284+
for root, dirs, files in os.walk(directory):
285+
for file in files:
286+
if file.endswith('.csv') and file.startswith('summary'):
287+
csv_files.append(os.path.join(root, file))
288+
289+
csv_files_with_time = {f: os.path.getctime(f) for f in csv_files}
290+
sorted_csv_files = sorted(csv_files_with_time.items(), key=lambda x: x[1])
291+
latest_csv_file = sorted_csv_files[-1][0]
292+
return latest_csv_file
293+
294+
268295
if __name__ == '__main__':
269296
fire.Fire()
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
from copy import deepcopy
2+
3+
from mmengine.config import read_base
4+
5+
with read_base():
6+
# choose a list of datasets
7+
from opencompass.configs.datasets.gpqa.gpqa_openai_simple_evals_gen_5aeece import gpqa_datasets # noqa: F401, E501
8+
from opencompass.configs.datasets.gsm8k.gsm8k_gen_17d0dc import gsm8k_datasets # noqa: F401, E501
9+
from opencompass.configs.datasets.race.race_ppl import race_datasets # noqa: F401, E501
10+
from opencompass.configs.datasets.winogrande.winogrande_5shot_ll_252f01 import \
11+
winogrande_datasets # noqa: F401, E501
12+
# read hf models - chat models
13+
from opencompass.configs.models.chatglm.lmdeploy_glm4_9b import models as lmdeploy_glm4_9b_model # noqa: F401, E501
14+
from opencompass.configs.models.deepseek.lmdeploy_deepseek_7b_base import \
15+
models as lmdeploy_deepseek_7b_base_model # noqa: F401, E501
16+
from opencompass.configs.models.deepseek.lmdeploy_deepseek_67b_base import \
17+
models as lmdeploy_deepseek_67b_base_model # noqa: F401, E501
18+
from opencompass.configs.models.deepseek.lmdeploy_deepseek_v2 import lmdeploy_deepseek_v2_model # noqa: F401, E501
19+
from opencompass.configs.models.gemma.lmdeploy_gemma_9b import models as pytorch_gemma_9b_model # noqa: F401, E501
20+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_1_8b import \
21+
models as lmdeploy_internlm2_1_8b_model # noqa: F401, E501
22+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b import \
23+
models as lmdeploy_internlm2_5_7b_model # noqa: F401, E501
24+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_20b import \
25+
models as lmdeploy_internlm2_20b_model # noqa: F401, E501
26+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_base_7b import \
27+
models as lmdeploy_internlm2_base_7b_model # noqa: F401, E501
28+
from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b import \
29+
models as lmdeploy_llama3_1_8b_model # noqa: F401, E501
30+
from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b import \
31+
models as lmdeploy_llama3_8b_model # noqa: F401, E501
32+
from opencompass.configs.models.hf_llama.lmdeploy_llama3_70b import \
33+
models as lmdeploy_llama3_70b_model # noqa: F401, E501
34+
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_1_5b import \
35+
models as lmdeploy_qwen2_5_1_5b_model # noqa: F401, E501
36+
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b import \
37+
models as lmdeploy_qwen2_5_7b_model # noqa: F401, E501
38+
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_32b import \
39+
models as lmdeploy_qwen2_5_32b_model # noqa: F401, E501
40+
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b import \
41+
models as lmdeploy_qwen2_5_72b_model # noqa: F401, E501
42+
from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b import \
43+
models as lmdeploy_qwen2_1_5b_model # noqa: F401, E501
44+
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import models as lmdeploy_qwen2_7b_model # noqa: F401, E501
45+
from opencompass.configs.models.yi.lmdeploy_yi_1_5_9b import models as lmdeploy_yi_1_5_9b_model # noqa: F401, E501
46+
47+
from .volc import infer as volc_infer # noqa: F401, E501
48+
49+
race_datasets = [race_datasets[1]]
50+
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
51+
52+
pytorch_glm4_9b_model = deepcopy(lmdeploy_glm4_9b_model)
53+
pytorch_deepseek_7b_base_model = deepcopy(lmdeploy_deepseek_7b_base_model)
54+
pytorch_deepseek_67b_base_model = deepcopy(lmdeploy_deepseek_67b_base_model)
55+
pytorch_deepseek_v2_model = deepcopy(lmdeploy_deepseek_v2_model)
56+
pytorch_internlm2_5_7b_model = deepcopy(lmdeploy_internlm2_5_7b_model)
57+
pytorch_internlm2_20b_model = deepcopy(lmdeploy_internlm2_20b_model)
58+
pytorch_internlm2_base_7b_model = deepcopy(lmdeploy_internlm2_base_7b_model)
59+
pytorch_llama3_1_8b_model = deepcopy(lmdeploy_llama3_1_8b_model)
60+
pytorch_llama3_70b_model = deepcopy(lmdeploy_llama3_70b_model)
61+
pytorch_qwen2_5_1_5b_model = deepcopy(lmdeploy_qwen2_5_1_5b_model)
62+
pytorch_qwen2_5_72b_model = deepcopy(lmdeploy_qwen2_5_72b_model)
63+
pytorch_qwen2_7b_model = deepcopy(lmdeploy_qwen2_7b_model)
64+
pytorch_yi_1_5_9b_model = deepcopy(lmdeploy_yi_1_5_9b_model)
65+
66+
lmdeploy_glm4_9b_model_native = deepcopy(lmdeploy_glm4_9b_model)
67+
lmdeploy_deepseek_7b_base_model_native = deepcopy(lmdeploy_deepseek_7b_base_model)
68+
lmdeploy_deepseek_67b_base_model_native = deepcopy(lmdeploy_deepseek_67b_base_model)
69+
lmdeploy_deepseek_v2_model_native = deepcopy(lmdeploy_deepseek_v2_model)
70+
lmdeploy_internlm2_5_7b_model_native = deepcopy(lmdeploy_internlm2_5_7b_model)
71+
lmdeploy_internlm2_20b_model_native = deepcopy(lmdeploy_internlm2_20b_model)
72+
lmdeploy_internlm2_base_7b_model_native = deepcopy(lmdeploy_internlm2_base_7b_model)
73+
lmdeploy_llama3_1_8b_model_native = deepcopy(lmdeploy_llama3_1_8b_model)
74+
lmdeploy_llama3_70b_model_native = deepcopy(lmdeploy_llama3_70b_model)
75+
lmdeploy_qwen2_5_1_5b_model_native = deepcopy(lmdeploy_qwen2_5_1_5b_model)
76+
lmdeploy_qwen2_5_72b_model_native = deepcopy(lmdeploy_qwen2_5_72b_model)
77+
lmdeploy_qwen2_7b_model_native = deepcopy(lmdeploy_qwen2_7b_model)
78+
lmdeploy_yi_1_5_9b_model_native = deepcopy(lmdeploy_yi_1_5_9b_model)
79+
80+
for model in [v for k, v in locals().items() if k.startswith('lmdeploy_') or k.startswith('pytorch_')]:
81+
for m in model:
82+
m['engine_config']['max_batch_size'] = 512
83+
m['gen_config']['do_sample'] = False
84+
m['batch_size'] = 5000
85+
86+
for model in [v for k, v in locals().items() if k.startswith('lmdeploy_')]:
87+
for m in model:
88+
m['backend'] = 'turbomind'
89+
90+
for model in [v for k, v in locals().items() if k.startswith('pytorch_')]:
91+
for m in model:
92+
m['abbr'] = m['abbr'].replace('turbomind', 'pytorch').replace('lmdeploy', 'pytorch')
93+
m['backend'] = 'pytorch'
94+
95+
for model in [v for k, v in locals().items() if k.endswith('_native')]:
96+
for m in model:
97+
m['abbr'] = m['abbr'] + '_native'
98+
m['engine_config']['communicator'] = 'native'
99+
100+
# models = sum([v for k, v in locals().items() if k.startswith('lmdeploy_') or k.startswith('pytorch_')], [])
101+
# models = sorted(models, key=lambda x: x['run_cfg']['num_gpus'])
102+
103+
summarizer = dict(
104+
dataset_abbrs=[
105+
['gsm8k', 'accuracy'],
106+
['GPQA_diamond', 'accuracy'],
107+
['race-high', 'accuracy'],
108+
['winogrande', 'accuracy'],
109+
],
110+
summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []),
111+
)
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
from copy import deepcopy
2+
3+
from mmengine.config import read_base
4+
5+
with read_base():
6+
# choose a list of datasets
7+
from opencompass.configs.datasets.gpqa.gpqa_openai_simple_evals_gen_5aeece import gpqa_datasets # noqa: F401, E501
8+
from opencompass.configs.datasets.IFEval.IFEval_gen_353ae7 import ifeval_datasets # noqa: F401, E501
9+
from opencompass.configs.datasets.math.math_0shot_gen_11c4b5 import math_datasets # noqa: F401, E501
10+
# read hf models - chat models
11+
from opencompass.configs.models.chatglm.lmdeploy_glm4_9b_chat import \
12+
models as lmdeploy_glm4_9b_chat_model # noqa: F401, E501
13+
from opencompass.configs.models.deepseek.lmdeploy_deepseek_r1_distill_qwen_32b import \
14+
models as lmdeploy_deepseek_r1_distill_qwen_32b_model # noqa: F401, E501
15+
from opencompass.configs.models.deepseek.lmdeploy_deepseek_v2_5_1210 import \
16+
models as lmdeploy_deepseek_v2_5_1210_model # noqa: F401, E501
17+
from opencompass.configs.models.deepseek.lmdeploy_deepseek_v2_lite import \
18+
models as lmdeploy_deepseek_v2_lite_model # noqa: F401, E501
19+
from opencompass.configs.models.gemma.lmdeploy_gemma_9b_it import \
20+
models as pytorch_gemma_9b_it_model # noqa: F401, E501
21+
from opencompass.configs.models.gemma.lmdeploy_gemma_27b_it import \
22+
models as pytorch_gemma_27b_it_model # noqa: F401, E501
23+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
24+
models as lmdeploy_internlm2_5_7b_chat_model # noqa: F401, E501
25+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_20b_chat import \
26+
models as lmdeploy_internlm2_5_20b_chat_model # noqa: F401, E501
27+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b import \
28+
models as lmdeploy_internlm2_chat_1_8b_model # noqa: F401, E501
29+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b_sft import \
30+
models as lmdeploy_internlm2_chat_1_8b_sft_model # noqa: F401, E501
31+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_7b import \
32+
models as lmdeploy_internlm2_chat_7b_model # noqa: F401, E501
33+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_7b_sft import \
34+
models as lmdeploy_internlm2_chat_7b_sft_model # noqa: F401, E501
35+
from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import \
36+
models as lmdeploy_internlm3_8b_instruct_model # noqa: F401, E501
37+
from opencompass.configs.models.hf_llama.lmdeploy_llama2_7b_chat import \
38+
models as lmdeploy_llama2_7b_chat_model # noqa: F401, E501
39+
from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import \
40+
models as lmdeploy_llama3_1_8b_instruct_model # noqa: F401, E501
41+
from opencompass.configs.models.hf_llama.lmdeploy_llama3_2_3b_instruct import \
42+
models as lmdeploy_llama3_2_3b_instruct_model # noqa: F401, E501
43+
from opencompass.configs.models.hf_llama.lmdeploy_llama3_3_70b_instruct import \
44+
models as lmdeploy_llama3_3_70b_instruct_model # noqa: F401, E501
45+
from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \
46+
models as lmdeploy_llama3_8b_instruct_model # noqa: F401, E501
47+
from opencompass.configs.models.mistral.lmdeploy_mistral_large_instruct_2411 import \
48+
models as lmdeploy_mistral_large_instruct_2411_model # noqa: F401, E501
49+
from opencompass.configs.models.mistral.lmdeploy_mistral_nemo_instruct_2407 import \
50+
models as lmdeploy_mistral_nemo_instruct_2407_model # noqa: F401, E501
51+
from opencompass.configs.models.mistral.lmdeploy_mistral_small_instruct_2409 import \
52+
models as lmdeploy_mistral_small_instruct_2409_model # noqa: F401, E501
53+
from opencompass.configs.models.nvidia.lmdeploy_nemotron_70b_instruct_hf import \
54+
models as lmdeploy_nemotron_70b_instruct_hf_model # noqa: F401, E501
55+
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_0_5b_instruct import \
56+
models as lmdeploy_qwen2_5_0_5b_instruct_model # noqa: F401, E501
57+
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_3b_instruct import \
58+
models as lmdeploy_qwen2_5_3b_instruct_model # noqa: F401, E501
59+
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_14b_instruct import \
60+
models as lmdeploy_qwen2_5_14b_instruct_model # noqa: F401, E501
61+
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import \
62+
models as lmdeploy_qwen2_5_72b_instruct_model # noqa: F401, E501
63+
from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import \
64+
models as lmdeploy_qwen2_1_5b_instruct_model # noqa: F401, E501
65+
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \
66+
models as lmdeploy_qwen2_7b_instruct_model # noqa: F401, E501
67+
from opencompass.configs.models.yi.lmdeploy_yi_1_5_6b_chat import \
68+
models as lmdeploy_yi_1_5_6b_chat_model # noqa: F401, E501
69+
from opencompass.configs.models.yi.lmdeploy_yi_1_5_9b_chat import \
70+
models as lmdeploy_yi_1_5_9b_chat_model # noqa: F401, E501
71+
from opencompass.configs.models.yi.lmdeploy_yi_1_5_34b_chat import \
72+
models as lmdeploy_yi_1_5_34b_chat_model # noqa: F401, E501
73+
74+
from .volc import infer as volc_infer # noqa: F401, E501
75+
76+
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
77+
78+
pytorch_glm4_9b_chat_model = deepcopy(lmdeploy_glm4_9b_chat_model)
79+
pytorch_deepseek_v2_lite_model = deepcopy(lmdeploy_deepseek_v2_lite_model)
80+
pytorch_deepseek_v2_5_1210_model = deepcopy(lmdeploy_deepseek_v2_5_1210_model)
81+
pytorch_internlm3_8b_instruct_model = deepcopy(lmdeploy_internlm3_8b_instruct_model)
82+
pytorch_internlm2_5_7b_chat_model = deepcopy(lmdeploy_internlm2_5_7b_chat_model)
83+
pytorch_internlm2_5_20b_chat_model = deepcopy(lmdeploy_internlm2_5_20b_chat_model)
84+
pytorch_llama3_2_3b_instruct_model = deepcopy(lmdeploy_llama3_2_3b_instruct_model)
85+
pytorch_llama3_3_70b_instruct_model = deepcopy(lmdeploy_llama3_3_70b_instruct_model)
86+
pytorch_mistral_nemo_instruct_2407_model = deepcopy(lmdeploy_mistral_nemo_instruct_2407_model)
87+
pytorch_mistral_small_instruct_2409_model = deepcopy(lmdeploy_mistral_small_instruct_2409_model)
88+
pytorch_qwen2_5_72b_instruct_model = deepcopy(lmdeploy_qwen2_5_72b_instruct_model)
89+
pytorch_qwen2_7b_instruct_model = deepcopy(lmdeploy_qwen2_7b_instruct_model)
90+
pytorch_yi_1_5_34b_chat_model = deepcopy(lmdeploy_yi_1_5_34b_chat_model)
91+
92+
lmdeploy_glm4_9b_chat_model_native = deepcopy(lmdeploy_glm4_9b_chat_model)
93+
lmdeploy_deepseek_r1_distill_qwen_32b_model_native = deepcopy(lmdeploy_deepseek_r1_distill_qwen_32b_model)
94+
lmdeploy_deepseek_v2_lite_model_native = deepcopy(lmdeploy_deepseek_v2_lite_model)
95+
lmdeploy_deepseek_v2_5_1210_model_native = deepcopy(lmdeploy_deepseek_v2_5_1210_model)
96+
lmdeploy_internlm3_8b_instruct_model_native = deepcopy(lmdeploy_internlm3_8b_instruct_model)
97+
lmdeploy_internlm2_5_7b_chat_model_native = deepcopy(lmdeploy_internlm2_5_7b_chat_model)
98+
lmdeploy_internlm2_5_20b_chat_model_native = deepcopy(lmdeploy_internlm2_5_20b_chat_model)
99+
lmdeploy_llama3_1_8b_instruct_model_native = deepcopy(lmdeploy_llama3_1_8b_instruct_model)
100+
lmdeploy_llama3_2_3b_instruct_model_native = deepcopy(lmdeploy_llama3_2_3b_instruct_model)
101+
lmdeploy_llama3_8b_instruct_model_native = deepcopy(lmdeploy_llama3_8b_instruct_model)
102+
lmdeploy_llama3_3_70b_instruct_model_native = deepcopy(lmdeploy_llama3_3_70b_instruct_model)
103+
lmdeploy_mistral_large_instruct_2411_model_native = deepcopy(lmdeploy_mistral_large_instruct_2411_model)
104+
lmdeploy_mistral_nemo_instruct_2407_model_native = deepcopy(lmdeploy_mistral_nemo_instruct_2407_model)
105+
lmdeploy_mistral_small_instruct_2409_model_native = deepcopy(lmdeploy_mistral_small_instruct_2409_model)
106+
lmdeploy_nemotron_70b_instruct_hf_model_native = deepcopy(lmdeploy_nemotron_70b_instruct_hf_model)
107+
lmdeploy_qwen2_5_0_5b_instruct_model_native = deepcopy(lmdeploy_qwen2_5_0_5b_instruct_model)
108+
lmdeploy_qwen2_5_14b_instruct_model_native = deepcopy(lmdeploy_qwen2_5_14b_instruct_model)
109+
lmdeploy_qwen2_5_72b_instruct_model_native = deepcopy(lmdeploy_qwen2_5_72b_instruct_model)
110+
lmdeploy_qwen2_7b_instruct_model_native = deepcopy(lmdeploy_qwen2_7b_instruct_model)
111+
lmdeploy_yi_1_5_6b_chat_model_native = deepcopy(lmdeploy_yi_1_5_6b_chat_model)
112+
lmdeploy_yi_1_5_34b_chat_model_native = deepcopy(lmdeploy_yi_1_5_34b_chat_model)
113+
114+
for model in [v for k, v in locals().items() if k.startswith('lmdeploy_') or k.startswith('pytorch_')]:
115+
for m in model:
116+
m['engine_config']['max_batch_size'] = 512
117+
m['gen_config']['do_sample'] = False
118+
m['batch_size'] = 5000
119+
120+
for model in [v for k, v in locals().items() if k.startswith('lmdeploy_')]:
121+
for m in model:
122+
m['backend'] = 'turbomind'
123+
124+
for model in [v for k, v in locals().items() if k.startswith('pytorch_')]:
125+
for m in model:
126+
m['abbr'] = m['abbr'].replace('turbomind', 'pytorch').replace('lmdeploy', 'pytorch')
127+
m['backend'] = 'pytorch'
128+
129+
for model in [v for k, v in locals().items() if k.endswith('_native')]:
130+
for m in model:
131+
m['abbr'] = m['abbr'] + '_native'
132+
m['engine_config']['communicator'] = 'native'
133+
134+
# models = sum([v for k, v in locals().items() if k.startswith('lmdeploy_') or k.startswith('pytorch_')], [])
135+
# models = sorted(models, key=lambda x: x['run_cfg']['num_gpus'])
136+
137+
summarizer = dict(
138+
dataset_abbrs=[
139+
['GPQA_diamond', 'accuracy'],
140+
['math', 'accuracy'],
141+
['IFEval', 'Prompt-level-strict-accuracy'],
142+
],
143+
summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []),
144+
)

0 commit comments

Comments
 (0)