Skip to content

Commit 5c867fd

Browse files
committed
test: strengthen assertions across 3 more test files (batch 2)
test_run_agent.py (2 weak → 0, +13 assertions): - Session ID validated against actual YYYYMMDD_HHMMSS_hex format - API failure verifies error message propagation - Invalid JSON args verifies empty dict fallback + message structure - Context compression verifies final_response + completed flag - Invalid tool name retry verifies api_calls count - Invalid response verifies completed/failed/error structure test_model_tools.py (3 weak → 0): - Unknown tool error includes tool name in message - Exception returns dict with 'error' key + non-empty message - get_all_tool_names verifies both web_search AND terminal present test_approval.py (1 weak → 0, assert ratio 1.1 → 2.2): - Dangerous commands verify description content (delete, shell, drop, etc.) - Safe commands explicitly assert key AND desc are None - Pre/post condition checks for state management
1 parent a44e041 commit 5c867fd

File tree

3 files changed

+157
-50
lines changed

3 files changed

+157
-50
lines changed

tests/test_model_tools.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,16 @@ def test_agent_loop_tool_returns_error(self):
2727
def test_unknown_tool_returns_error(self):
2828
result = json.loads(handle_function_call("totally_fake_tool_xyz", {}))
2929
assert "error" in result
30+
assert "totally_fake_tool_xyz" in result["error"]
3031

3132
def test_exception_returns_json_error(self):
3233
# Even if something goes wrong, should return valid JSON
3334
result = handle_function_call("web_search", None) # None args may cause issues
3435
parsed = json.loads(result)
3536
assert isinstance(parsed, dict)
37+
assert "error" in parsed
38+
assert len(parsed["error"]) > 0
39+
assert "error" in parsed["error"].lower() or "failed" in parsed["error"].lower()
3640

3741

3842
# =========================================================================
@@ -82,7 +86,8 @@ def test_get_all_tool_names_returns_list(self):
8286
assert isinstance(names, list)
8387
assert len(names) > 0
8488
# Should contain well-known tools
85-
assert "web_search" in names or "terminal" in names
89+
assert "web_search" in names
90+
assert "terminal" in names
8691

8792
def test_get_toolset_for_tool(self):
8893
result = get_toolset_for_tool("web_search")

tests/test_run_agent.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,8 @@ def test_extra_newlines_cleaned(self):
213213
result = AIAgent._clean_session_content(text)
214214
# Should not have excessive newlines around think block
215215
assert "\n\n\n" not in result
216+
# Content after think block must be preserved
217+
assert "after" in result
216218

217219

218220
class TestGetMessagesUpToLastAssistant:
@@ -361,7 +363,7 @@ def test_valid_tool_names_populated(self):
361363
assert a.valid_tool_names == {"web_search", "terminal"}
362364

363365
def test_session_id_auto_generated(self):
364-
"""Session ID should be auto-generated when not provided."""
366+
"""Session ID should be auto-generated in YYYYMMDD_HHMMSS_<hex6> format."""
365367
with (
366368
patch("run_agent.get_tool_definitions", return_value=[]),
367369
patch("run_agent.check_toolset_requirements", return_value={}),
@@ -373,8 +375,10 @@ def test_session_id_auto_generated(self):
373375
skip_context_files=True,
374376
skip_memory=True,
375377
)
376-
assert a.session_id is not None
377-
assert len(a.session_id) > 0
378+
# Format: YYYYMMDD_HHMMSS_<6 hex chars>
379+
assert re.match(r"^\d{8}_\d{6}_[0-9a-f]{6}$", a.session_id), (
380+
f"session_id doesn't match expected format: {a.session_id}"
381+
)
378382

379383

380384
class TestInterrupt:
@@ -621,9 +625,13 @@ def test_invalid_json_args_defaults_empty(self, agent):
621625
tc = _mock_tool_call(name="web_search", arguments="not valid json", call_id="c1")
622626
mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
623627
messages = []
624-
with patch("run_agent.handle_function_call", return_value="ok"):
628+
with patch("run_agent.handle_function_call", return_value="ok") as mock_hfc:
625629
agent._execute_tool_calls(mock_msg, messages, "task-1")
630+
# Invalid JSON args should fall back to empty dict
631+
mock_hfc.assert_called_once_with("web_search", {}, "task-1")
626632
assert len(messages) == 1
633+
assert messages[0]["role"] == "tool"
634+
assert messages[0]["tool_call_id"] == "c1"
627635

628636
def test_result_truncation_over_100k(self, agent):
629637
tc = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
@@ -644,14 +652,18 @@ def test_returns_summary(self, agent):
644652
agent._cached_system_prompt = "You are helpful."
645653
messages = [{"role": "user", "content": "do stuff"}]
646654
result = agent._handle_max_iterations(messages, 60)
655+
assert isinstance(result, str)
656+
assert len(result) > 0
647657
assert "summary" in result.lower()
648658

649659
def test_api_failure_returns_error(self, agent):
650660
agent.client.chat.completions.create.side_effect = Exception("API down")
651661
agent._cached_system_prompt = "You are helpful."
652662
messages = [{"role": "user", "content": "do stuff"}]
653663
result = agent._handle_max_iterations(messages, 60)
654-
assert "Error" in result or "error" in result
664+
assert isinstance(result, str)
665+
assert "error" in result.lower()
666+
assert "API down" in result
655667

656668

657669
class TestRunConversation:
@@ -729,6 +741,8 @@ def test_invalid_tool_name_retry(self, agent):
729741
):
730742
result = agent.run_conversation("do something")
731743
assert result["final_response"] == "Got it"
744+
assert result["completed"] is True
745+
assert result["api_calls"] == 2
732746

733747
def test_empty_content_retry_and_fallback(self, agent):
734748
"""Empty content (only think block) retries, then falls back to partial."""
@@ -776,6 +790,8 @@ def test_context_compression_triggered(self, agent):
776790
)
777791
result = agent.run_conversation("search something")
778792
mock_compress.assert_called_once()
793+
assert result["final_response"] == "All done"
794+
assert result["completed"] is True
779795

780796

781797
class TestRetryExhaustion:
@@ -825,7 +841,10 @@ def test_invalid_response_returns_error_not_crash(self, agent):
825841
patch("run_agent.time", self._make_fast_time_mock()),
826842
):
827843
result = agent.run_conversation("hello")
828-
assert result.get("failed") is True or result.get("completed") is False
844+
assert result.get("completed") is False, f"Expected completed=False, got: {result}"
845+
assert result.get("failed") is True
846+
assert "error" in result
847+
assert "Invalid API response" in result["error"]
829848

830849
def test_api_error_raises_after_retries(self, agent):
831850
"""Exhausted retries on API errors must raise, not fall through."""

0 commit comments

Comments
 (0)