fix: Update Qwen3 parser tests to match fork reasoning behavior

janhilgard · claude · janhilgard · commit e12a1b77eca0 · 2026-03-22T00:32:41.000+01:00
Tests expected reasoning=None for no-tag and start-tag-only outputs,
but fork's Qwen3 parser intentionally treats these as reasoning
(enable_thinking=True injects &lt;think&gt; in prompt, so output without
&lt;/think&gt; = reasoning that hit max_tokens).

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/tests/test_reasoning_parser.py b/tests/test_reasoning_parser.py
@@ -100,19 +100,22 @@ def test_extract_multiline_reasoning(self, parser):
         assert "Step 3" in reasoning
         assert content == "Result: 42"
 
-    def test_no_tags_returns_content_only(self, parser):
-        """Qwen3 requires both tags - no tags means pure content."""
+    def test_no_tags_returns_reasoning(self, parser):
+        """Qwen3 with enable_thinking=True: no tags means reasoning hit max_tokens."""
         output = "Just a regular response without thinking."
         reasoning, content = parser.extract_reasoning(output)
-        assert reasoning is None
-        assert content == output
+        # enable_thinking=True injects <think> in prompt, so output without
+        # tags = reasoning that hit max_tokens before </think>
+        assert reasoning == output
+        assert content is None
 
-    def test_only_start_tag_no_reasoning(self, parser):
-        """Qwen3 requires both tags - missing end tag means no reasoning."""
+    def test_only_start_tag_incomplete_reasoning(self, parser):
+        """Qwen3: only <think> without </think> = incomplete reasoning (still generating)."""
         output = "<think>Started thinking but never finished"
         reasoning, content = parser.extract_reasoning(output)
-        assert reasoning is None
-        assert content == output
+        # Only start tag = reasoning in progress, no content yet
+        assert reasoning == "Started thinking but never finished"
+        assert content is None
 
     def test_only_end_tag_implicit_mode(self, parser):
         """Qwen3 supports implicit mode - when <think> is in prompt, only </think> in output."""
@@ -651,12 +654,11 @@ def test_qwen3_implicit_mode_support(self, parser):
         assert content == "more text"
 
         # Only start tag - no </think> means model is still generating
-        # Qwen3 requires </think> to extract reasoning (treats as pure content until then)
         output2 = "<think>incomplete reasoning"
         reasoning, content = parser.extract_reasoning(output2)
-        # No </think> = no reasoning extraction, entire output is content
-        assert reasoning is None
-        assert content == output2
+        # Only <think> without </think> = reasoning in progress, no content yet
+        assert reasoning == "incomplete reasoning"
+        assert content is None
 
     def test_qwen3_empty_think_tags(self, parser):
         """Test empty think tags."""