EvolvingLMMs-Lab · Luodian · Mar 7, 2026 · Mar 7, 2026
diff --git a/lmms_eval/api/reasoning.py b/lmms_eval/api/reasoning.py
@@ -22,6 +22,12 @@ def strip_reasoning_tags(text: str, tag_pairs: List[List[str]]) -> str:
                 result = result[:start] + result[end + len(end_tag) :]
             else:
                 break
+        # Some chat templates prefill the opening reasoning tag in the prompt,
+        # so the model completion may contain only the closing tag plus the
+        # final answer. In that case, keep the suffix after the final closing
+        # tag so downstream scorers see the answer instead of the reasoning.
+        if end_tag in result and start_tag not in result:
+            result = result.rsplit(end_tag, 1)[-1]
     return result.strip()
 
 

diff --git a/test/eval/test_reasoning.py b/test/eval/test_reasoning.py
@@ -0,0 +1,13 @@
+from lmms_eval.api.reasoning import strip_reasoning_tags
+
+
+def test_strip_reasoning_tags_removes_paired_block():
+    text = "<think>\nreasoning\n</think>\n\nYes"
+    cleaned = strip_reasoning_tags(text, [["<think>", "</think>"]])
+    assert cleaned == "Yes"
+
+
+def test_strip_reasoning_tags_handles_prompt_prefilled_opening_tag():
+    text = "reasoning from completion only\n</think>\n\nNo"
+    cleaned = strip_reasoning_tags(text, [["<think>", "</think>"]])
+    assert cleaned == "No"