refactor(test): fix tool parser tests and add logprob regression test

Lawhy · Lawhy · commit b7fdc18ce0d5 · 2026-02-11T00:52:45.000-08:00
diff --git a/tests/integration/test_sglang_integration.py b/tests/integration/test_sglang_integration.py
@@ -77,16 +77,12 @@ async def test_tool_call_generation(self, model, calculator_tool):
         system_prompt = "You are a calculator. Use the calculator tool for all math."
 
         events = []
-        async for event in model.stream(
-            messages, tool_specs=[calculator_tool], system_prompt=system_prompt
-        ):
+        async for event in model.stream(messages, tool_specs=[calculator_tool], system_prompt=system_prompt):
             events.append(event)
 
         # Check for tool use events
         tool_starts = [e for e in events if "contentBlockStart" in e]
-        tool_use_starts = [
-            e for e in tool_starts if "toolUse" in e["contentBlockStart"].get("start", {})
-        ]
+        tool_use_starts = [e for e in tool_starts if "toolUse" in e["contentBlockStart"].get("start", {})]
 
         # Model should have called calculator tool
         if tool_use_starts:
@@ -101,9 +97,7 @@ async def test_multi_turn_with_tool_result(self, model, calculator_tool):
 
         # First generation
         events = []
-        async for event in model.stream(
-            messages, tool_specs=[calculator_tool], system_prompt=system_prompt
-        ):
+        async for event in model.stream(messages, tool_specs=[calculator_tool], system_prompt=system_prompt):
             events.append(event)
 
         # Add assistant response and tool result
@@ -130,9 +124,7 @@ async def test_multi_turn_with_tool_result(self, model, calculator_tool):
 
         # Second generation: model should respond after receiving tool result
         events = []
-        async for event in model.stream(
-            messages, tool_specs=[calculator_tool], system_prompt=system_prompt
-        ):
+        async for event in model.stream(messages, tool_specs=[calculator_tool], system_prompt=system_prompt):
             events.append(event)
 
         # Should have generated a response (content deltas or tool calls)
@@ -164,6 +156,22 @@ async def test_token_count_consistency(self, model):
         assert total_tokens == len(model.token_manager.loss_mask)
         assert total_tokens == len(model.token_manager.logprobs)
 
+    async def test_logprobs_no_none_when_return_logprob_enabled(self, model):
+        """Logprobs should never contain None when return_logprob=True (regression test for v0.2.0)."""
+        # Ensure return_logprob is enabled (default is True)
+        assert model.config.get("return_logprob", True) is True
+
+        messages = [{"role": "user", "content": [{"text": "Say hello"}]}]
+        async for _ in model.stream(messages):
+            pass
+
+        logprobs = model.token_manager.logprobs
+        assert len(logprobs) > 0, "Should have logprobs after generation"
+        assert all(lp is not None for lp in logprobs), (
+            f"Logprobs should never contain None when return_logprob=True. "
+            f"Found {logprobs.count(None)} None values out of {len(logprobs)} total."
+        )
+
     async def test_incremental_tokenization(self, model):
         """Subsequent calls only tokenize new messages."""
         # First turn
diff --git a/tests/unit/test_tool_parser.py b/tests/unit/test_tool_parser.py
@@ -278,8 +278,8 @@ def test_parse_with_whitespace(self, parser):
     # --- Custom Tokens ---
 
     def test_custom_tokens(self):
-        """Use custom tool_call_tokens."""
-        parser = HermesToolParser(tool_call_tokens=("<function>", "</function>"))
+        """Use custom tool tokens."""
+        parser = HermesToolParser(tool_start_token="<function>", tool_end_token="</function>")
         text = '<function>{"name": "custom", "arguments": {}}</function>'
         results = parser.parse(text)
 
@@ -288,7 +288,7 @@ def test_custom_tokens(self):
 
     def test_custom_tokens_ignore_default(self):
         """Custom tokens ignore default format."""
-        parser = HermesToolParser(tool_call_tokens=("<function>", "</function>"))
+        parser = HermesToolParser(tool_start_token="<function>", tool_end_token="</function>")
         # Default format should not be parsed
         text = '<tool_call>{"name": "ignored", "arguments": {}}</tool_call>'
         results = parser.parse(text)
@@ -436,25 +436,9 @@ def test_no_think_blocks(self, parser):
         assert len(results) == 1
         assert results[0].name == "tool"
 
-    def test_disable_think_block_exclusion(self):
-        """Setting think_tokens=None disables exclusion."""
-        parser = HermesToolParser(think_tokens=None)
-        text = """
-        <think>
-        <tool_call>{"name": "inside_think", "arguments": {}}</tool_call>
-        </think>
-        <tool_call>{"name": "outside_think", "arguments": {}}</tool_call>
-        """
-        results = parser.parse(text)
-
-        # Both should be parsed when exclusion is disabled
-        assert len(results) == 2
-        assert results[0].name == "inside_think"
-        assert results[1].name == "outside_think"
-
     def test_custom_think_tokens(self):
         """Custom think tokens work correctly."""
-        parser = HermesToolParser(think_tokens=("<reasoning>", "</reasoning>"))
+        parser = HermesToolParser(think_start_token="<reasoning>", think_end_token="</reasoning>")
         text = """
         <reasoning>
         <tool_call>{"name": "draft", "arguments": {}}</tool_call>
@@ -468,7 +452,7 @@ def test_custom_think_tokens(self):
 
     def test_custom_think_tokens_ignore_default(self):
         """Custom think tokens don't exclude default <think> blocks."""
-        parser = HermesToolParser(think_tokens=("<reasoning>", "</reasoning>"))
+        parser = HermesToolParser(think_start_token="<reasoning>", think_end_token="</reasoning>")
         text = """
         <think>
         <tool_call>{"name": "in_think", "arguments": {}}</tool_call>
@@ -663,8 +647,8 @@ def test_parse_compact_format(self, parser):
     # --- Custom Tokens ---
 
     def test_custom_tokens(self):
-        """Use custom tool_call_tokens."""
-        parser = QwenXMLToolParser(tool_call_tokens=("<call>", "</call>"))
+        """Use custom tool tokens."""
+        parser = QwenXMLToolParser(tool_start_token="<call>", tool_end_token="</call>")
         text = """<call>
 <function=custom>
 <parameter=x>1</parameter>
@@ -677,7 +661,7 @@ def test_custom_tokens(self):
 
     def test_custom_tokens_ignore_default(self):
         """Custom tokens ignore default format."""
-        parser = QwenXMLToolParser(tool_call_tokens=("<call>", "</call>"))
+        parser = QwenXMLToolParser(tool_start_token="<call>", tool_end_token="</call>")
         text = """<tool_call>
 <function=ignored>
 <parameter=x>1</parameter>
@@ -713,27 +697,25 @@ def test_exclude_tool_calls_inside_think_block(self, parser):
         assert results[0].name == "actual_tool"
         assert results[0].input == {"y": "2"}
 
-    def test_disable_think_block_exclusion(self):
-        """Setting think_tokens=None disables exclusion."""
-        parser = QwenXMLToolParser(think_tokens=None)
+    def test_custom_think_tokens(self):
+        """Custom think tokens work correctly."""
+        parser = QwenXMLToolParser(think_start_token="<reasoning>", think_end_token="</reasoning>")
         text = """
-<think>
+<reasoning>
 <tool_call>
-<function=inside_think>
+<function=inside_reasoning>
 </function>
 </tool_call>
-</think>
+</reasoning>
 <tool_call>
-<function=outside_think>
+<function=outside_reasoning>
 </function>
 </tool_call>
 """
         results = parser.parse(text)
 
-        # Both should be parsed when exclusion is disabled
-        assert len(results) == 2
-        assert results[0].name == "inside_think"
-        assert results[1].name == "outside_think"
+        assert len(results) == 1
+        assert results[0].name == "outside_reasoning"
 
     # --- Edge Cases ---
 
@@ -1011,8 +993,8 @@ def test_parse_compact_format(self, parser):
     # --- Custom Tokens ---
 
     def test_custom_tokens(self):
-        """Use custom tool_call_tokens."""
-        parser = GLMToolParser(tool_call_tokens=("<call>", "</call>"))
+        """Use custom tool tokens."""
+        parser = GLMToolParser(tool_start_token="<call>", tool_end_token="</call>")
         text = """<call>custom
 <arg_key>x</arg_key>
 <arg_value>1</arg_value>
@@ -1025,7 +1007,7 @@ def test_custom_tokens(self):
 
     def test_custom_tokens_ignore_default(self):
         """Custom tokens ignore default format."""
-        parser = GLMToolParser(tool_call_tokens=("<call>", "</call>"))
+        parser = GLMToolParser(tool_start_token="<call>", tool_end_token="</call>")
         text = """<tool_call>ignored
 <arg_key>x</arg_key>
 <arg_value>1</arg_value>
@@ -1058,23 +1040,21 @@ def test_exclude_tool_calls_inside_think_block(self, parser):
         assert results[0].name == "actual_tool"
         assert results[0].input == {"y": 2}  # JSON-decoded as integer
 
-    def test_disable_think_block_exclusion(self):
-        """Setting think_tokens=None disables exclusion."""
-        parser = GLMToolParser(think_tokens=None)
+    def test_custom_think_tokens(self):
+        """Custom think tokens work correctly."""
+        parser = GLMToolParser(think_start_token="<reasoning>", think_end_token="</reasoning>")
         text = """
-<think>
-<tool_call>inside_think
+<reasoning>
+<tool_call>inside_reasoning
 </tool_call>
-</think>
-<tool_call>outside_think
+</reasoning>
+<tool_call>outside_reasoning
 </tool_call>
 """
         results = parser.parse(text)
 
-        # Both should be parsed when exclusion is disabled
-        assert len(results) == 2
-        assert results[0].name == "inside_think"
-        assert results[1].name == "outside_think"
+        assert len(results) == 1
+        assert results[0].name == "outside_reasoning"
 
     # --- Edge Cases ---
 
@@ -1184,8 +1164,8 @@ def test_get_parser_with_kwargs(self):
         """Get parser with custom arguments."""
         from strands_sglang.tool_parsers import get_tool_parser
 
-        parser = get_tool_parser("hermes", think_tokens=None)
-        assert parser.think_tokens is None
+        parser = get_tool_parser("hermes", think_start_token="<reasoning>")
+        assert parser.think_start_token == "<reasoning>"
 
     def test_unknown_parser_raises(self):
         """Unknown parser name raises KeyError."""