|
| 1 | +import re |
| 2 | +from typing import Any |
| 3 | + |
| 4 | + |
| 5 | +def mathkangaroo_doc_to_visual(doc): |
| 6 | + image = doc.get("image") |
| 7 | + if image is not None and hasattr(image, "convert"): |
| 8 | + return [image.convert("RGB")] |
| 9 | + return [] |
| 10 | + |
| 11 | + |
| 12 | +def mathkangaroo_doc_to_text(doc, lmms_eval_specific_kwargs=None): |
| 13 | + kwargs = lmms_eval_specific_kwargs or {} |
| 14 | + pre_prompt = kwargs.get("pre_prompt", "") |
| 15 | + post_prompt = kwargs.get("post_prompt", "\nAnswer with the option letter (A, B, C, D, or E) only.") |
| 16 | + question = str(doc.get("question", "")).strip() |
| 17 | + return f"{pre_prompt}{question}{post_prompt}" |
| 18 | + |
| 19 | + |
| 20 | +def _normalize_targets(answer: Any) -> set[str]: |
| 21 | + if answer is None: |
| 22 | + return set() |
| 23 | + return set(re.findall(r"[A-E]", str(answer).upper())) |
| 24 | + |
| 25 | + |
| 26 | +def _extract_prediction(response: str) -> str: |
| 27 | + if not response: |
| 28 | + return "" |
| 29 | + |
| 30 | + text = str(response).strip() |
| 31 | + direct_match = re.search(r"(?i)(?:final\s+answer|answer|option)\s*(?:is|:)?\s*\(?([A-E])\)?", text) |
| 32 | + if direct_match: |
| 33 | + return direct_match.group(1).upper() |
| 34 | + |
| 35 | + for line in reversed(text.splitlines()): |
| 36 | + line = line.strip().upper() |
| 37 | + if not line: |
| 38 | + continue |
| 39 | + line_match = re.fullmatch(r"\(?([A-E])\)?[\.)]?", line) |
| 40 | + if line_match: |
| 41 | + return line_match.group(1) |
| 42 | + |
| 43 | + candidates = re.findall(r"\b([A-E])\b", text.upper()) |
| 44 | + if candidates: |
| 45 | + return candidates[-1] |
| 46 | + return "" |
| 47 | + |
| 48 | + |
| 49 | +def mathkangaroo_process_results(doc, results): |
| 50 | + prediction = _extract_prediction(results[0] if results else "") |
| 51 | + targets = _normalize_targets(doc.get("ground_truth")) |
| 52 | + score = 1.0 if prediction and prediction in targets else 0.0 |
| 53 | + return {"mathkangaroo_accuracy": score} |
0 commit comments