-
Notifications
You must be signed in to change notification settings - Fork 133
Expand file tree
/
Copy pathlabbench2_vlm.yaml
More file actions
85 lines (84 loc) · 3.25 KB
/
labbench2_vlm.yaml
File metadata and controls
85 lines (84 loc) · 3.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
labbench2_vlm:
resources_servers:
labbench2_vlm:
entrypoint: app.py
domain: knowledge
verified: false
description: "labbench2 VLM benchmarks: scientific figure/table QA (figqa2, tableqa2), protocol troubleshooting (protocolqa2), LLM-as-judge"
value: Measure scientific reasoning on figures, tables, and lab protocols
judge_model_server:
type: responses_api_models
name: judge_model
judge_responses_create_params:
input: []
max_output_tokens: 1024
judge_prompt_template_fpath: prompt_templates/judge.txt
judge_prompt_template_protocol_fpath: prompt_templates/judge_protocol.txt
judge_equal_label: "[[A=B]]"
judge_not_equal_label: "[[A!=B]]"
judge_endpoint_max_concurrency: 64
labbench2_vlm_simple_agent:
responses_api_agents:
labbench2_vlm_agent:
entrypoint: app.py
resources_server:
type: resources_servers
name: labbench2_vlm
model_server:
type: responses_api_models
name: policy_model
media_base_dir: resources_servers/labbench2_vlm/data
dpi: 170
media_mode: image
strip_images_from_output: true
datasets:
- name: figqa2_img
type: validation
jsonl_fpath: resources_servers/labbench2_vlm/data/figqa2_img_validation.jsonl
num_repeats: 1
gitlab_identifier:
dataset_name: labbench2_vlm
version: 0.0.1
artifact_fpath: figqa2_img_validation.jsonl
license: Creative Commons Attribution-ShareAlike 4.0 International
- name: figqa2_pdf
type: validation
jsonl_fpath: resources_servers/labbench2_vlm/data/figqa2_pdf_validation.jsonl
num_repeats: 1
gitlab_identifier:
dataset_name: labbench2_vlm
version: 0.0.1
artifact_fpath: figqa2_pdf_validation.jsonl
license: Creative Commons Attribution-ShareAlike 4.0 International
- name: tableqa2_img
type: validation
jsonl_fpath: resources_servers/labbench2_vlm/data/tableqa2_img_validation.jsonl
num_repeats: 1
gitlab_identifier:
dataset_name: labbench2_vlm
version: 0.0.1
artifact_fpath: tableqa2_img_validation.jsonl
license: Creative Commons Attribution-ShareAlike 4.0 International
- name: tableqa2_pdf
type: validation
jsonl_fpath: resources_servers/labbench2_vlm/data/tableqa2_pdf_validation.jsonl
num_repeats: 1
gitlab_identifier:
dataset_name: labbench2_vlm
version: 0.0.1
artifact_fpath: tableqa2_pdf_validation.jsonl
license: Creative Commons Attribution-ShareAlike 4.0 International
- name: protocolqa2
type: validation
jsonl_fpath: resources_servers/labbench2_vlm/data/protocolqa2_validation.jsonl
num_repeats: 1
gitlab_identifier:
dataset_name: labbench2_vlm
version: 0.0.1
artifact_fpath: protocolqa2_validation.jsonl
license: Creative Commons Attribution-ShareAlike 4.0 International
- name: example
type: example
jsonl_fpath: resources_servers/labbench2_vlm/data/example.jsonl
num_repeats: 1
license: Creative Commons Attribution-ShareAlike 4.0 International