@@ -213,6 +213,8 @@ def test_extra_newlines_cleaned(self):
213213 result = AIAgent ._clean_session_content (text )
214214 # Should not have excessive newlines around think block
215215 assert "\n \n \n " not in result
216+ # Content after think block must be preserved
217+ assert "after" in result
216218
217219
218220class TestGetMessagesUpToLastAssistant :
@@ -361,7 +363,7 @@ def test_valid_tool_names_populated(self):
361363 assert a .valid_tool_names == {"web_search" , "terminal" }
362364
363365 def test_session_id_auto_generated (self ):
364- """Session ID should be auto-generated when not provided ."""
366+ """Session ID should be auto-generated in YYYYMMDD_HHMMSS_<hex6> format ."""
365367 with (
366368 patch ("run_agent.get_tool_definitions" , return_value = []),
367369 patch ("run_agent.check_toolset_requirements" , return_value = {}),
@@ -373,8 +375,10 @@ def test_session_id_auto_generated(self):
373375 skip_context_files = True ,
374376 skip_memory = True ,
375377 )
376- assert a .session_id is not None
377- assert len (a .session_id ) > 0
378+ # Format: YYYYMMDD_HHMMSS_<6 hex chars>
379+ assert re .match (r"^\d{8}_\d{6}_[0-9a-f]{6}$" , a .session_id ), (
380+ f"session_id doesn't match expected format: { a .session_id } "
381+ )
378382
379383
380384class TestInterrupt :
@@ -621,9 +625,13 @@ def test_invalid_json_args_defaults_empty(self, agent):
621625 tc = _mock_tool_call (name = "web_search" , arguments = "not valid json" , call_id = "c1" )
622626 mock_msg = _mock_assistant_msg (content = "" , tool_calls = [tc ])
623627 messages = []
624- with patch ("run_agent.handle_function_call" , return_value = "ok" ):
628+ with patch ("run_agent.handle_function_call" , return_value = "ok" ) as mock_hfc :
625629 agent ._execute_tool_calls (mock_msg , messages , "task-1" )
630+ # Invalid JSON args should fall back to empty dict
631+ mock_hfc .assert_called_once_with ("web_search" , {}, "task-1" )
626632 assert len (messages ) == 1
633+ assert messages [0 ]["role" ] == "tool"
634+ assert messages [0 ]["tool_call_id" ] == "c1"
627635
628636 def test_result_truncation_over_100k (self , agent ):
629637 tc = _mock_tool_call (name = "web_search" , arguments = '{}' , call_id = "c1" )
@@ -644,14 +652,18 @@ def test_returns_summary(self, agent):
644652 agent ._cached_system_prompt = "You are helpful."
645653 messages = [{"role" : "user" , "content" : "do stuff" }]
646654 result = agent ._handle_max_iterations (messages , 60 )
655+ assert isinstance (result , str )
656+ assert len (result ) > 0
647657 assert "summary" in result .lower ()
648658
649659 def test_api_failure_returns_error (self , agent ):
650660 agent .client .chat .completions .create .side_effect = Exception ("API down" )
651661 agent ._cached_system_prompt = "You are helpful."
652662 messages = [{"role" : "user" , "content" : "do stuff" }]
653663 result = agent ._handle_max_iterations (messages , 60 )
654- assert "Error" in result or "error" in result
664+ assert isinstance (result , str )
665+ assert "error" in result .lower ()
666+ assert "API down" in result
655667
656668
657669class TestRunConversation :
@@ -729,6 +741,8 @@ def test_invalid_tool_name_retry(self, agent):
729741 ):
730742 result = agent .run_conversation ("do something" )
731743 assert result ["final_response" ] == "Got it"
744+ assert result ["completed" ] is True
745+ assert result ["api_calls" ] == 2
732746
733747 def test_empty_content_retry_and_fallback (self , agent ):
734748 """Empty content (only think block) retries, then falls back to partial."""
@@ -776,6 +790,8 @@ def test_context_compression_triggered(self, agent):
776790 )
777791 result = agent .run_conversation ("search something" )
778792 mock_compress .assert_called_once ()
793+ assert result ["final_response" ] == "All done"
794+ assert result ["completed" ] is True
779795
780796
781797class TestRetryExhaustion :
@@ -825,7 +841,10 @@ def test_invalid_response_returns_error_not_crash(self, agent):
825841 patch ("run_agent.time" , self ._make_fast_time_mock ()),
826842 ):
827843 result = agent .run_conversation ("hello" )
828- assert result .get ("failed" ) is True or result .get ("completed" ) is False
844+ assert result .get ("completed" ) is False , f"Expected completed=False, got: { result } "
845+ assert result .get ("failed" ) is True
846+ assert "error" in result
847+ assert "Invalid API response" in result ["error" ]
829848
830849 def test_api_error_raises_after_retries (self , agent ):
831850 """Exhausted retries on API errors must raise, not fall through."""
0 commit comments