Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/current_tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ python -m lmms_eval --tasks list_with_num
- [SeedBench 2](https://github.com/AILab-CVC/SEED-Bench) (seedbench_2)
- [SeedBench 2 Plus](https://huggingface.co/datasets/AILab-CVC/SEED-Bench-2-plus) (seedbench_2_plus)
- [VibeEval](https://github.com/reka-ai/reka-vibe-eval) (vibe_eval)
- [VisuLogic](https://visulogic-benchmark.github.io/VisuLogic/) (visulogic)
- [VizWizVQA](https://vizwiz.org/tasks-and-datasets/vqa/) (vizwiz_vqa)
- VizWizVQA Validation (vizwiz_vqa_val)
- VizWizVQA Test (vizwiz_vqa_test)
Expand Down
22 changes: 22 additions & 0 deletions lmms_eval/tasks/visulogic/_default_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
output_type: generate_until
doc_to_visual: !function utils.visulogic_doc_to_visual
doc_to_text: !function utils.visulogic_doc_to_text
doc_to_target: !function utils.visulogic_doc_to_target
generation_kwargs:
max_new_tokens: 64
temperature: 0
do_sample: false
process_results: !function utils.visulogic_process_results
metric_list:
- metric: visulogic_acc
aggregation: !function utils.visulogic_aggregate_acc
higher_is_better: true
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: "\nAnswer with the option letter only."
qwen_vl:
pre_prompt: ""
post_prompt: " Answer:"
metadata:
- version: 0.0
88 changes: 88 additions & 0 deletions lmms_eval/tasks/visulogic/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import io
import re
import zipfile
from pathlib import Path

from huggingface_hub import snapshot_download
from PIL import Image

DATASET_REPO_ID = "VisuLogic/VisuLogic"
OPTION_LETTERS = {"A", "B", "C", "D"}

_DATASET_DIR = Path(snapshot_download(repo_id=DATASET_REPO_ID, repo_type="dataset", local_dir_use_symlinks=False))
_IMAGES_ARCHIVE_PATH = _DATASET_DIR / "images.zip"
_ANSWER_PATTERNS = [
re.compile(r"<answer>\s*\(?([A-D])\)?\s*</answer>", re.IGNORECASE | re.DOTALL),
re.compile(r"\\boxed\{\s*([A-D])\s*\}", re.IGNORECASE),
re.compile(r"answer\s*(?:is|:|-)\s*\(?([A-D])\)?\b", re.IGNORECASE),
re.compile(r"option\s*([A-D])\b", re.IGNORECASE),
re.compile(r"\(([A-D])\)", re.IGNORECASE),
]
_IMAGES_ARCHIVE = None


def _get_images_archive() -> zipfile.ZipFile:
global _IMAGES_ARCHIVE
if _IMAGES_ARCHIVE is None:
_IMAGES_ARCHIVE = zipfile.ZipFile(_IMAGES_ARCHIVE_PATH, "r")
return _IMAGES_ARCHIVE


def _extract_option_letter(text: str) -> str:
normalized = str(text).strip()
if not normalized:
return ""

for pattern in _ANSWER_PATTERNS:
matches = pattern.findall(normalized)
if matches:
return matches[-1].upper()

if len(normalized) <= 3:
first_char = normalized.upper()[0]
if first_char in OPTION_LETTERS:
return first_char

return ""


def visulogic_doc_to_visual(doc):
image_path = str(doc.get("image_path", "")).strip().lstrip("./")
if not image_path:
return []

archive = _get_images_archive()
try:
with archive.open(image_path) as image_file:
image_bytes = image_file.read()
except KeyError as error:
raise FileNotFoundError(f"Image not found in {DATASET_REPO_ID} archive: {image_path}") from error

return [Image.open(io.BytesIO(image_bytes)).convert("RGB")]


def visulogic_doc_to_text(doc, lmms_eval_specific_kwargs=None):
kwargs = lmms_eval_specific_kwargs or {}
pre_prompt = kwargs.get("pre_prompt", "")
post_prompt = kwargs.get("post_prompt", "")

question = str(doc.get("question", "")).strip()
return f"{pre_prompt}{question}{post_prompt}"


def visulogic_doc_to_target(doc):
return str(doc.get("label", "")).strip().upper()[:1]


def visulogic_process_results(doc, results):
prediction = str(results[0]).strip() if results else ""
predicted_letter = _extract_option_letter(prediction)
target = visulogic_doc_to_target(doc)
score = 1.0 if predicted_letter == target else 0.0
return {"visulogic_acc": score}


def visulogic_aggregate_acc(items):
if not items:
return 0.0
return sum(float(item) for item in items) / len(items)
6 changes: 6 additions & 0 deletions lmms_eval/tasks/visulogic/visulogic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
dataset_path: VisuLogic/VisuLogic
dataset_kwargs:
data_files: data.jsonl
task: visulogic
test_split: train
include: _default_template_yaml