Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions lmms_eval/tasks/simplevqa/_default_template_simplevqa_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
dataset_path: m-a-p/SimpleVQA
dataset_kwargs:
token: false
output_type: generate_until
doc_to_visual: !function utils.simplevqa_doc_to_visual
doc_to_text: !function utils.simplevqa_doc_to_text
doc_to_target: "answer"
process_results: !function utils.simplevqa_process_results
generation_kwargs:
max_new_tokens: 32
temperature: 0
top_p: 1.0
num_beams: 1
do_sample: false
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: "\nAnswer the question using a short phrase."
qwen3_vl:
pre_prompt: "Question: "
post_prompt: "\nAnswer with a short phrase only."
metadata:
- version: 0.0
3 changes: 3 additions & 0 deletions lmms_eval/tasks/simplevqa/simplevqa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: simplevqa
test_split: test
include: _default_template_simplevqa_yaml
35 changes: 35 additions & 0 deletions lmms_eval/tasks/simplevqa/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import base64
import io

from PIL import Image

from lmms_eval.tasks._task_utils.vqa_eval_metric import EvalAIAnswerProcessor

EVAL_AI_PROCESSOR = EvalAIAnswerProcessor()


def simplevqa_doc_to_visual(doc):
image = doc["image"]
if isinstance(image, Image.Image):
return [image.convert("RGB")]

image_bytes = base64.b64decode(image)
decoded_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
return [decoded_image]


def simplevqa_doc_to_text(doc, lmms_eval_specific_kwargs=None):
if lmms_eval_specific_kwargs is None:
lmms_eval_specific_kwargs = {}

pre_prompt = lmms_eval_specific_kwargs.get("pre_prompt", "")
post_prompt = lmms_eval_specific_kwargs.get("post_prompt", "")
question = doc["question"].strip()
return f"{pre_prompt}{question}{post_prompt}"


def simplevqa_process_results(doc, result):
assert len(result) == 1, f"The result should be a list of length 1, but got {len(result)}."
prediction = EVAL_AI_PROCESSOR(result[0])
reference = EVAL_AI_PROCESSOR(doc["answer"])
return {"exact_match": float(prediction == reference)}