[NemotronH] Small fix reasoning parser (#36635)

roikoren755 · khluu · commit 95c0f928cdee · 2026-03-11T02:51:18.000-07:00
Signed-off-by: Roi Koren <roik@nvidia.com> (cherry picked from commit e661b9e)
diff --git a/tests/reasoning/test_nemotron_v3_reasoning_parser.py b/tests/reasoning/test_nemotron_v3_reasoning_parser.py
@@ -128,6 +128,28 @@ def test_nemotron_v3_without_thinking_returns_content(
     assert content == "This is plain content"
 
 
+def test_nemotron_v3_force_nonempty_content_returns_content(
+    tokenizer: FakeNemotronTokenizer,
+):
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(tokenizer)
+    request = ChatCompletionRequest(
+        model="test-model",
+        messages=[],
+        chat_template_kwargs={"force_nonempty_content": True},
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser,
+        ["<think>This is plain content"],
+        request=request,
+        streaming=False,
+    )
+
+    assert reasoning is None
+    assert content == "This is plain content"
+
+
 def test_nemotron_v3_with_thinking_keeps_truncated_reasoning(
     tokenizer: FakeNemotronTokenizer,
 ):
diff --git a/vllm/reasoning/nemotron_v3_reasoning_parser.py b/vllm/reasoning/nemotron_v3_reasoning_parser.py
@@ -24,7 +24,10 @@ def extract_reasoning(
 
         if (
             chat_template_kwargs
-            and chat_template_kwargs.get("enable_thinking") is False
+            and (
+                chat_template_kwargs.get("enable_thinking") is False
+                or chat_template_kwargs.get("force_nonempty_content") is True
+            )
             and final_content is None
         ):
             reasoning_content, final_content = final_content, reasoning_content