From e0b84c26a13caf3612f25f4a06f403f83fa3e4f4 Mon Sep 17 00:00:00 2001 From: Chase Xu <80196056+Chase-Xuu@users.noreply.github.com> Date: Mon, 9 Feb 2026 23:04:49 -0600 Subject: [PATCH 1/2] fix(agents): preserve both assistant and function messages during snapshot clean for Gemini API Fixes #3759 When `enable_snapshot_clean` is enabled, the `_clean_snapshot_in_memory` method previously only recreated the FUNCTION message (tool result) but removed both the ASSISTANT message (tool call request) and FUNCTION message from memory. This breaks Gemini API which requires both messages to exist together - the tool call request must be present alongside the tool call result. Without the ASSISTANT message, Gemini returns the error: `GenerateContentRequest.contents[0].parts[0].function_response.name: Name cannot be empty` Changes: - Added `args` and `extra_content` fields to `_ToolOutputHistoryEntry` to preserve tool call context for later reconstruction - Modified `_register_tool_output_for_cache` to accept and store args/extra_content - Modified `_clean_snapshot_in_memory` to recreate BOTH the ASSISTANT message (with tool_calls) and the FUNCTION message (with result) when cleaning snapshots - Updated `_record_tool_calling` to pass args and extra_content to the cache registration function This ensures proper tool call message reconstruction that is compatible with Gemini and other APIs that require paired tool call request/response messages. --- camel/agents/chat_agent.py | 97 +++++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 22 deletions(-) diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index c6337408d7..8c707e3915 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -169,6 +169,8 @@ class _ToolOutputHistoryEntry: result_text: str record_uuids: List[str] record_timestamps: List[float] + args: Optional[Dict[str, Any]] = None + extra_content: Optional[Dict[str, Any]] = None cached: bool = False @@ -1445,6 +1447,8 @@ def _register_tool_output_for_cache( tool_call_id: str, result_text: str, records: List[MemoryRecord], + args: Optional[Dict[str, Any]] = None, + extra_content: Optional[Dict[str, Any]] = None, ) -> None: if not records: return @@ -1455,6 +1459,8 @@ def _register_tool_output_for_cache( result_text=result_text, record_uuids=[str(record.uuid) for record in records], record_timestamps=[record.timestamp for record in records], + args=args, + extra_content=extra_content, ) self._tool_output_history.append(entry) self._process_tool_output_cache() @@ -1480,22 +1486,6 @@ def _clean_snapshot_in_memory( if '- ' in result_text and '[ref=' in result_text: cleaned_result = self._clean_snapshot_content(result_text) - # Update the message in memory storage - timestamp = ( - entry.record_timestamps[0] - if entry.record_timestamps - else time.time_ns() / 1_000_000_000 - ) - cleaned_message = FunctionCallingMessage( - role_name=self.role_name, - role_type=self.role_type, - meta_dict={}, - content="", - func_name=entry.tool_name, - result=cleaned_result, - tool_call_id=entry.tool_call_id, - ) - chat_history_block = getattr( self.memory, "_chat_history_block", None ) @@ -1504,18 +1494,74 @@ def _clean_snapshot_in_memory( return existing_records = storage.load() + + # Remove records by UUID updated_records = [ record for record in existing_records if record["uuid"] not in entry.record_uuids ] - new_record = MemoryRecord( - message=cleaned_message, + + # Recreate both assistant and function messages + # For Gemini API, the tool call request needs to exist along with + # the tool call result. + assist_args = entry.args if entry.args is not None else {} + + # Use timestamps from entry, ensuring proper ordering + if len(entry.record_timestamps) >= 2: + assist_timestamp = entry.record_timestamps[0] + func_timestamp = entry.record_timestamps[1] + elif entry.record_timestamps: + # If only one timestamp, use it for function and ensure + # assist comes before + func_timestamp = entry.record_timestamps[0] + assist_timestamp = func_timestamp - 1e-6 + else: + # No timestamps, use current time with proper ordering + assist_timestamp = time.time_ns() / 1_000_000_000 + func_timestamp = assist_timestamp + 1e-6 + + # Recreate assistant message (tool call request) + cleaned_assist_message = FunctionCallingMessage( + role_name=self.role_name, + role_type=self.role_type, + meta_dict={}, + content="", + func_name=entry.tool_name, + args=assist_args, + tool_call_id=entry.tool_call_id, + extra_content=entry.extra_content, + ) + + # Recreate function message (tool result) + cleaned_func_message = FunctionCallingMessage( + role_name=self.role_name, + role_type=self.role_type, + meta_dict={}, + content="", + func_name=entry.tool_name, + result=cleaned_result, + tool_call_id=entry.tool_call_id, + extra_content=entry.extra_content, + ) + + # Create new records for both assistant and function messages + new_assist_record = MemoryRecord( + message=cleaned_assist_message, + role_at_backend=OpenAIBackendRole.ASSISTANT, + timestamp=assist_timestamp, + agent_id=self.agent_id, + ) + new_func_record = MemoryRecord( + message=cleaned_func_message, role_at_backend=OpenAIBackendRole.FUNCTION, - timestamp=timestamp, + timestamp=func_timestamp, agent_id=self.agent_id, ) - updated_records.append(new_record.to_dict()) + + # Add both records back + updated_records.append(new_assist_record.to_dict()) + updated_records.append(new_func_record.to_dict()) updated_records.sort(key=lambda record: record["timestamp"]) storage.clear() storage.save(updated_records) @@ -1527,8 +1573,11 @@ def _clean_snapshot_in_memory( ) entry.cached = True - entry.record_uuids = [str(new_record.uuid)] - entry.record_timestamps = [timestamp] + entry.record_uuids = [ + str(new_assist_record.uuid), + str(new_func_record.uuid), + ] + entry.record_timestamps = [assist_timestamp, func_timestamp] def add_external_tool( self, tool: Union[FunctionTool, Callable, Dict[str, Any]] @@ -4079,6 +4128,8 @@ def _record_tool_calling( ) # Register tool output for snapshot cleaning if enabled + # Include args and extra_content so both the assistant message + # (tool call) and function message (tool result) can be recreated if self._enable_snapshot_clean and not mask_output and func_records: serialized_result = self._serialize_tool_result(result_for_memory) self._register_tool_output_for_cache( @@ -4086,6 +4137,8 @@ def _record_tool_calling( tool_call_id, serialized_result, cast(List[MemoryRecord], func_records), + args=args, + extra_content=extra_content, ) if isinstance(result, ToolResult) and result.images: From 3eef5800c8879aaf2ffb43ec75a9816b627ce713 Mon Sep 17 00:00:00 2001 From: Chase Xu <80196056+Chase-Xuu@users.noreply.github.com> Date: Tue, 10 Feb 2026 04:37:34 -0600 Subject: [PATCH 2/2] fix: simplify snapshot cleaning to only update function result message Address reviewer feedback: - Remove unnecessary recreation of assistant message (already recorded separately by _record_assistant_tool_calls_from_requests) - Fixes format mismatch issue (FunctionCallingMessage vs BaseMessage) - Fixes multi-tool-call support (assistant message preserves all tool calls) - Add test for snapshot cleaning functionality --- camel/agents/chat_agent.py | 78 +++--------- test/agents/test_chat_agent.py | 225 ++++++++++++++++++++++----------- 2 files changed, 168 insertions(+), 135 deletions(-) diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index 8c707e3915..4cfca38096 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -169,8 +169,6 @@ class _ToolOutputHistoryEntry: result_text: str record_uuids: List[str] record_timestamps: List[float] - args: Optional[Dict[str, Any]] = None - extra_content: Optional[Dict[str, Any]] = None cached: bool = False @@ -1447,8 +1445,6 @@ def _register_tool_output_for_cache( tool_call_id: str, result_text: str, records: List[MemoryRecord], - args: Optional[Dict[str, Any]] = None, - extra_content: Optional[Dict[str, Any]] = None, ) -> None: if not records: return @@ -1459,8 +1455,6 @@ def _register_tool_output_for_cache( result_text=result_text, record_uuids=[str(record.uuid) for record in records], record_timestamps=[record.timestamp for record in records], - args=args, - extra_content=extra_content, ) self._tool_output_history.append(entry) self._process_tool_output_cache() @@ -1502,39 +1496,16 @@ def _clean_snapshot_in_memory( if record["uuid"] not in entry.record_uuids ] - # Recreate both assistant and function messages - # For Gemini API, the tool call request needs to exist along with - # the tool call result. - assist_args = entry.args if entry.args is not None else {} - - # Use timestamps from entry, ensuring proper ordering - if len(entry.record_timestamps) >= 2: - assist_timestamp = entry.record_timestamps[0] - func_timestamp = entry.record_timestamps[1] - elif entry.record_timestamps: - # If only one timestamp, use it for function and ensure - # assist comes before - func_timestamp = entry.record_timestamps[0] - assist_timestamp = func_timestamp - 1e-6 - else: - # No timestamps, use current time with proper ordering - assist_timestamp = time.time_ns() / 1_000_000_000 - func_timestamp = assist_timestamp + 1e-6 - - # Recreate assistant message (tool call request) - cleaned_assist_message = FunctionCallingMessage( - role_name=self.role_name, - role_type=self.role_type, - meta_dict={}, - content="", - func_name=entry.tool_name, - args=assist_args, - tool_call_id=entry.tool_call_id, - extra_content=entry.extra_content, + # Recreate only the function result message with cleaned content. + # The assistant message with tool calls is already recorded + # separately by _record_assistant_tool_calls_from_requests and + # should not be modified here. + timestamp = ( + entry.record_timestamps[0] + if entry.record_timestamps + else time.time_ns() / 1_000_000_000 ) - - # Recreate function message (tool result) - cleaned_func_message = FunctionCallingMessage( + cleaned_message = FunctionCallingMessage( role_name=self.role_name, role_type=self.role_type, meta_dict={}, @@ -1542,26 +1513,14 @@ def _clean_snapshot_in_memory( func_name=entry.tool_name, result=cleaned_result, tool_call_id=entry.tool_call_id, - extra_content=entry.extra_content, - ) - - # Create new records for both assistant and function messages - new_assist_record = MemoryRecord( - message=cleaned_assist_message, - role_at_backend=OpenAIBackendRole.ASSISTANT, - timestamp=assist_timestamp, - agent_id=self.agent_id, ) - new_func_record = MemoryRecord( - message=cleaned_func_message, + new_record = MemoryRecord( + message=cleaned_message, role_at_backend=OpenAIBackendRole.FUNCTION, - timestamp=func_timestamp, + timestamp=timestamp, agent_id=self.agent_id, ) - - # Add both records back - updated_records.append(new_assist_record.to_dict()) - updated_records.append(new_func_record.to_dict()) + updated_records.append(new_record.to_dict()) updated_records.sort(key=lambda record: record["timestamp"]) storage.clear() storage.save(updated_records) @@ -1573,11 +1532,8 @@ def _clean_snapshot_in_memory( ) entry.cached = True - entry.record_uuids = [ - str(new_assist_record.uuid), - str(new_func_record.uuid), - ] - entry.record_timestamps = [assist_timestamp, func_timestamp] + entry.record_uuids = [str(new_record.uuid)] + entry.record_timestamps = [timestamp] def add_external_tool( self, tool: Union[FunctionTool, Callable, Dict[str, Any]] @@ -4128,8 +4084,6 @@ def _record_tool_calling( ) # Register tool output for snapshot cleaning if enabled - # Include args and extra_content so both the assistant message - # (tool call) and function message (tool result) can be recreated if self._enable_snapshot_clean and not mask_output and func_records: serialized_result = self._serialize_tool_result(result_for_memory) self._register_tool_output_for_cache( @@ -4137,8 +4091,6 @@ def _record_tool_calling( tool_call_id, serialized_result, cast(List[MemoryRecord], func_records), - args=args, - extra_content=extra_content, ) if isinstance(result, ToolResult) and result.images: diff --git a/test/agents/test_chat_agent.py b/test/agents/test_chat_agent.py index bd654452c8..9699714693 100644 --- a/test/agents/test_chat_agent.py +++ b/test/agents/test_chat_agent.py @@ -129,20 +129,20 @@ def test_chat_agent(model, step_call_count=3): for i in range(step_call_count): for user_msg in [user_msg_bm, user_msg_str]: response = assistant.step(user_msg) - assert isinstance( - response.msgs, list - ), f"Error in round {i + 1}" + assert isinstance(response.msgs, list), ( + f"Error in round {i + 1}" + ) assert len(response.msgs) > 0, f"Error in round {i + 1}" - assert isinstance( - response.terminated, bool - ), f"Error in round {i + 1}" + assert isinstance(response.terminated, bool), ( + f"Error in round {i + 1}" + ) assert response.terminated is False, f"Error in round {i + 1}" - assert isinstance( - response.info, dict - ), f"Error in round {i + 1}" - assert ( - response.info['id'] is not None - ), f"Error in round {i + 1}" + assert isinstance(response.info, dict), ( + f"Error in round {i + 1}" + ) + assert response.info['id'] is not None, ( + f"Error in round {i + 1}" + ) @pytest.mark.model_backend @@ -370,9 +370,9 @@ def test_chat_agent_step_with_external_tools(step_call_count=3): external_tool_call_requests = response.info[ "external_tool_call_requests" ] - assert ( - external_tool_call_requests[0].tool_name == "math_subtract" - ), f"Error in calling round {i + 1}" + assert external_tool_call_requests[0].tool_name == "math_subtract", ( + f"Error in calling round {i + 1}" + ) @pytest.mark.model_backend @@ -514,9 +514,9 @@ async def mock_arun(*args, **kwargs): external_tool_call_requests = response.info[ "external_tool_call_requests" ] - assert ( - external_tool_call_requests[0].tool_name == "math_subtract" - ), f"Error in calling round {i + 1}" + assert external_tool_call_requests[0].tool_name == "math_subtract", ( + f"Error in calling round {i + 1}" + ) @pytest.mark.model_backend @@ -657,18 +657,18 @@ def test_chat_agent_multiple_return_messages(n, step_call_count=3): ) for i in range(step_call_count): - assert ( - assistant_with_sys_msg_response.msgs is not None - ), f"Error in calling round {i + 1}" - assert ( - len(assistant_with_sys_msg_response.msgs) == n - ), f"Error in calling round {i + 1}" - assert ( - assistant_without_sys_msg_response.msgs is not None - ), f"Error in calling round {i + 1}" - assert ( - len(assistant_without_sys_msg_response.msgs) == n - ), f"Error in calling round {i + 1}" + assert assistant_with_sys_msg_response.msgs is not None, ( + f"Error in calling round {i + 1}" + ) + assert len(assistant_with_sys_msg_response.msgs) == n, ( + f"Error in calling round {i + 1}" + ) + assert assistant_without_sys_msg_response.msgs is not None, ( + f"Error in calling round {i + 1}" + ) + assert len(assistant_without_sys_msg_response.msgs) == n, ( + f"Error in calling round {i + 1}" + ) @pytest.mark.model_backend @@ -753,12 +753,12 @@ def test_chat_agent_stream_output(step_call_count=3): assert len(msg.content) > 0, f"Error in calling round {i + 1}" stream_usage = stream_assistant_response.info["usage"] - assert ( - stream_usage["completion_tokens"] > 0 - ), f"Error in calling round {i + 1}" - assert ( - stream_usage["prompt_tokens"] > 0 - ), f"Error in calling round {i + 1}" + assert stream_usage["completion_tokens"] > 0, ( + f"Error in calling round {i + 1}" + ) + assert stream_usage["prompt_tokens"] > 0, ( + f"Error in calling round {i + 1}" + ) assert ( stream_usage["total_tokens"] == stream_usage["completion_tokens"] @@ -1039,12 +1039,12 @@ def test_tool_calling_sync(step_call_count=3): ] assert len(tool_calls) > 0, f"Error in calling round {i + 1}" - assert str(tool_calls[0]).startswith( - "Tool Execution" - ), f"Error in calling round {i + 1}" - assert ( - tool_calls[0].tool_name == "math_multiply" - ), f"Error in calling round {i + 1}" + assert str(tool_calls[0]).startswith("Tool Execution"), ( + f"Error in calling round {i + 1}" + ) + assert tool_calls[0].tool_name == "math_multiply", ( + f"Error in calling round {i + 1}" + ) assert tool_calls[0].args == { "a": 2, "b": 8, @@ -1165,9 +1165,9 @@ async def test_tool_calling_math_async(step_call_count=3): tool_calls = agent_response.info['tool_calls'] - assert ( - tool_calls[0].tool_name == "math_multiply" - ), f"Error in calling round {i + 1}" + assert tool_calls[0].tool_name == "math_multiply", ( + f"Error in calling round {i + 1}" + ) assert tool_calls[0].args == { "a": 2, "b": 8, @@ -1254,16 +1254,16 @@ def mock_run_tool_calling_async(*args, **kwargs): tool_calls = agent_response.info['tool_calls'] assert tool_calls, f"Error in calling round {i + 1}" - assert str(tool_calls[0]).startswith( - "Tool Execution" - ), f"Error in calling round {i + 1}" + assert str(tool_calls[0]).startswith("Tool Execution"), ( + f"Error in calling round {i + 1}" + ) - assert ( - tool_calls[0].tool_name == "async_sleep" - ), f"Error in calling round {i + 1}" - assert tool_calls[0].args == { - 'second': 1 - }, f"Error in calling round {i + 1}" + assert tool_calls[0].tool_name == "async_sleep", ( + f"Error in calling round {i + 1}" + ) + assert tool_calls[0].args == {'second': 1}, ( + f"Error in calling round {i + 1}" + ) assert tool_calls[0].result == 1, f"Error in calling round {i + 1}" @@ -1294,9 +1294,9 @@ def test_response_words_termination(step_call_count=3): assert agent.terminated, f"Error in calling round {i + 1}" assert agent_response.terminated, f"Error in calling round {i + 1}" - assert ( - "goodbye" in agent_response.info['termination_reasons'][0] - ), f"Error in calling round {i + 1}" + assert "goodbye" in agent_response.info['termination_reasons'][0], ( + f"Error in calling round {i + 1}" + ) def test_chat_agent_vision(step_call_count=3): @@ -1362,9 +1362,9 @@ def test_chat_agent_vision(step_call_count=3): for i in range(step_call_count): agent_response = agent.step(user_msg) - assert ( - agent_response.msgs[0].content == "Yes." - ), f"Error in calling round {i + 1}" + assert agent_response.msgs[0].content == "Yes.", ( + f"Error in calling round {i + 1}" + ) @pytest.mark.model_backend @@ -1534,9 +1534,9 @@ async def test_chat_agent_async_stream_with_async_generator(): # Create an async generator that wraps the chunks # This simulates what GeminiModel does with _wrap_async_stream_with_ # thought_preservation - async def mock_async_generator() -> ( - AsyncGenerator[ChatCompletionChunk, None] - ): + async def mock_async_generator() -> AsyncGenerator[ + ChatCompletionChunk, None + ]: for chunk in chunks: yield chunk @@ -1563,12 +1563,12 @@ async def mock_async_generator() -> ( # Verify final response contains the accumulated content final_response = responses[-1] - assert ( - final_response.msg is not None - ), "Final response should have a message" - assert ( - "Hello" in final_response.msg.content - ), "Final content should contain 'Hello'" + assert final_response.msg is not None, ( + "Final response should have a message" + ) + assert "Hello" in final_response.msg.content, ( + "Final content should contain 'Hello'" + ) @pytest.mark.model_backend @@ -1718,9 +1718,9 @@ def test_add(a: int, b: int) -> int: call_count = 0 - async def mock_async_generator() -> ( - AsyncGenerator[ChatCompletionChunk, None] - ): + async def mock_async_generator() -> AsyncGenerator[ + ChatCompletionChunk, None + ]: nonlocal call_count if call_count == 0: call_count += 1 @@ -1837,3 +1837,84 @@ class MathResult(BaseModel): assert len(responses) > 1, "Should receive multiple streaming chunks" assert responses[-1].msg.parsed.answer == 6 assert responses[-1].msg.parsed.explanation + + +def test_clean_snapshot_in_memory(): + """Test that snapshot content is properly cleaned in memory. + + This tests the _clean_snapshot_in_memory functionality which removes + stale snapshot markers and references from tool output messages stored + in memory. The cleaning preserves the assistant message (tool call request) + and only updates the function result message. + """ + from unittest.mock import MagicMock, patch + + from camel.agents.chat_agent import _ToolOutputHistoryEntry + from camel.messages import FunctionCallingMessage + + # Create a mock model to avoid API calls + mock_model = MagicMock() + mock_model.model_type = ModelType.DEFAULT + mock_model.model_config_dict = {} + mock_model.token_counter = None + mock_model.model_platform_name = "openai" + + with patch.object(ChatAgent, '_init_model', return_value=mock_model): + agent = ChatAgent( + system_message="Test agent", + model=mock_model, + ) + + # Manually enable snapshot cleaning + agent._enable_snapshot_clean = True + agent._tool_output_history = [] + + # Create a mock memory storage + mock_storage = MagicMock() + mock_chat_history_block = MagicMock() + mock_chat_history_block.storage = mock_storage + + agent.memory._chat_history_block = mock_chat_history_block + + # Create a test entry with snapshot markers + test_uuid = "test-uuid-123" + test_timestamp = 1234567890.0 + entry = _ToolOutputHistoryEntry( + tool_name="test_tool", + tool_call_id="call_123", + result_text="- Item 1 [ref=abc]\n- Item 2 [ref=def]\n", + record_uuids=[test_uuid], + record_timestamps=[test_timestamp], + cached=False, + ) + agent._tool_output_history.append(entry) + + # Mock the storage to return the existing record + mock_storage.load.return_value = [ + { + "uuid": test_uuid, + "timestamp": test_timestamp, + "message": {"content": "- Item 1 [ref=abc]\n- Item 2 [ref=def]\n"}, + } + ] + + # Call the clean function + agent._clean_snapshot_in_memory(entry) + + # Verify storage was updated + assert mock_storage.clear.called + assert mock_storage.save.called + + # Get the saved records + saved_records = mock_storage.save.call_args[0][0] + + # Should have one record (the cleaned function result) + assert len(saved_records) == 1 + + # The record should be a function message with cleaned content + saved_record = saved_records[0] + assert saved_record["role_at_backend"] == OpenAIBackendRole.FUNCTION.value + + # Verify entry was marked as cached + assert entry.cached is True + assert len(entry.record_uuids) == 1 # Single function result record