lmms-eval/lmms_eval/api/reasoning.py at 364d6b25457aa16a7e7cabe809779550e00105bb · EvolvingLMMs-Lab/lmms-eval · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import re
from typing import List, Optional, Union


def strip_reasoning_tags(text: str, tag_pairs: List[List[str]]) -> str:
    """Remove reasoning tag blocks from model output.

    Args:
        text: Raw model output string
        tag_pairs: List of [start_tag, end_tag] pairs,
                   e.g. [["<think>", "</think>"], ["<reasoning>", "</reasoning>"]]

    Returns:
        Cleaned text with reasoning blocks removed.
    """
    result = text
    for start_tag, end_tag in tag_pairs:
        while start_tag in result and end_tag in result:
            start = result.find(start_tag)
            end = result.find(end_tag, start)
            if start != -1 and end != -1:
                result = result[:start] + result[end + len(end_tag) :]
            else:
                break
        # Some chat templates prefill the opening reasoning tag in the prompt,
        # so the model completion may contain only the closing tag plus the
        # final answer. In that case, keep the suffix after the final closing
        # tag so downstream scorers see the answer instead of the reasoning.
        if end_tag in result and start_tag not in result:
            result = result.rsplit(end_tag, 1)[-1]
    return result.strip()


def parse_reasoning_tags_config(cli_value: Optional[str] = None, task_value: Optional[object] = None) -> Optional[List[List[str]]]:
    """Resolve reasoning_tags from CLI + task config.

    Priority: task_value > cli_value.
    "none" / None = disabled.
    """
    import json

    effective = task_value if task_value is not None else cli_value
    if effective is None or effective == "none" or effective is False:
        return None
    if isinstance(effective, str):
        return json.loads(effective)
    return effective