Skip to content

Commit 5dee848

Browse files
authored
feat: add HiPhO benchmark task (#1186)
1 parent 0c3821d commit 5dee848

16 files changed

Lines changed: 417 additions & 0 deletions
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dataset_path: HY-Wan/HiPhO
2+
dataset_kwargs:
3+
token: false
4+
output_type: generate_until
5+
doc_to_visual: !function utils.hipho_doc_to_visual
6+
doc_to_text: !function utils.hipho_doc_to_text
7+
doc_to_target: !function utils.hipho_doc_to_target
8+
generation_kwargs:
9+
max_new_tokens: 1024
10+
temperature: 0
11+
top_p: 1.0
12+
do_sample: false
13+
process_results: !function utils.hipho_process_results
14+
metric_list:
15+
- metric: hipho_score
16+
aggregation: !function utils.hipho_aggregate_results
17+
higher_is_better: true
18+
metadata:
19+
version: 0.0
20+
lmms_eval_specific_kwargs:
21+
default:
22+
pre_prompt: ""
23+
post_prompt: ""

lmms_eval/tasks/hipho/hipho.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
group: hipho
2+
task:
3+
- hipho_apho_2025
4+
- hipho_eupho_2024
5+
- hipho_eupho_2025
6+
- hipho_f_ma_2024
7+
- hipho_f_ma_2025
8+
- hipho_ipho_2024
9+
- hipho_ipho_2025
10+
- hipho_nbpho_2024
11+
- hipho_nbpho_2025
12+
- hipho_panmechanics_2024
13+
- hipho_panmechanics_2025
14+
- hipho_panpho_2024
15+
- hipho_panpho_2025
16+
metadata:
17+
version: 0.0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include: _default_template_yaml
2+
task: hipho_apho_2025
3+
test_split: APhO_2025
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include: _default_template_yaml
2+
task: hipho_eupho_2024
3+
test_split: EuPhO_2024
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include: _default_template_yaml
2+
task: hipho_eupho_2025
3+
test_split: EuPhO_2025
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include: _default_template_yaml
2+
task: hipho_f_ma_2024
3+
test_split: F_MA_2024
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include: _default_template_yaml
2+
task: hipho_f_ma_2025
3+
test_split: F_MA_2025
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include: _default_template_yaml
2+
task: hipho_ipho_2024
3+
test_split: IPhO_2024
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include: _default_template_yaml
2+
task: hipho_ipho_2025
3+
test_split: IPhO_2025
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include: _default_template_yaml
2+
task: hipho_nbpho_2024
3+
test_split: NBPhO_2024

0 commit comments

Comments
 (0)