update

fengju0213 · fengju0213 · commit f1519459a759 · 2026-02-11T21:45:41.000+08:00
diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md
@@ -0,0 +1,29 @@
+# Fix: prevent orphan `tool` messages when `enable_snapshot_clean` is on
+
+## Summary
+This PR fixes a state-leak issue in `ChatAgent` that can produce invalid message history (orphan `tool` messages) across agent reuse scenarios.
+
+## Root Cause
+- `ChatAgent` keeps snapshot-clean metadata in `_tool_output_history`.
+- `reset()` previously did not clear this cache.
+- When an agent instance is reused (e.g., worker pool), stale entries from a previous task can still be processed.
+- `_clean_snapshot_in_memory()` could rewrite a cleaned `FUNCTION/tool` record even if the original referenced records no longer existed in current memory.
+- This may inject orphan `tool` messages (without preceding `assistant.tool_calls`), which can trigger strict backend validation errors (e.g., Azure/LiteLLM 400).
+
+## Fix Approach
+1. Clear snapshot-clean cache on reset:
+   - `ChatAgent.reset()` now clears `_tool_output_history`.
+2. Add safe-guard before rewriting cleaned tool output:
+   - In `_clean_snapshot_in_memory()`, only rewrite when referenced record UUIDs still exist in storage.
+   - If no referenced records are found, skip rewrite and mark the entry as cached.
+
+## Why this is safe
+- Normal snapshot-clean flow is unchanged when records exist.
+- Only stale/cross-task cache entries are blocked from writing new `tool` records.
+- This prevents invalid cross-conversation contamination without affecting valid tool-call chains.
+
+## Tests
+Added unit tests in `test/agents/test_chat_agent.py`:
+- `test_chat_agent_reset_clears_tool_output_history`
+- `test_clean_snapshot_in_memory_skips_missing_records`
+
diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
@@ -635,6 +635,9 @@ def reset(self):
         r"""Resets the :obj:`ChatAgent` to its initial state."""
         self.terminated = False
         self.init_messages()
+        # Snapshot-clean cache is per-conversation state and must not survive
+        # agent reuse (e.g. pooled workers across different tasks).
+        self._tool_output_history.clear()
         for terminator in self.response_terminators:
             terminator.reset()
 
@@ -1504,10 +1507,29 @@ def _clean_snapshot_in_memory(
                 return
 
             existing_records = storage.load()
+            existing_record_uuids = {
+                str(record.get("uuid"))
+                for record in existing_records
+                if record.get("uuid") is not None
+            }
+            matched_uuids = [
+                record_uuid
+                for record_uuid in entry.record_uuids
+                if record_uuid in existing_record_uuids
+            ]
+
+            if not matched_uuids:
+                # Record no longer exists in current memory (e.g. after reset).
+                # Skip rewriting to avoid injecting orphan tool messages.
+                entry.cached = True
+                entry.record_uuids = []
+                entry.record_timestamps = []
+                return
+
             updated_records = [
                 record
                 for record in existing_records
-                if record["uuid"] not in entry.record_uuids
+                if str(record.get("uuid")) not in matched_uuids
             ]
             new_record = MemoryRecord(
                 message=cleaned_message,
diff --git a/test/agents/test_chat_agent.py b/test/agents/test_chat_agent.py
@@ -30,11 +30,16 @@
 from pydantic import BaseModel, Field
 
 from camel.agents import ChatAgent
-from camel.agents.chat_agent import StreamContentAccumulator, ToolCallingRecord
+from camel.agents.chat_agent import (
+    StreamContentAccumulator,
+    ToolCallingRecord,
+    _ToolOutputHistoryEntry,
+)
 from camel.configs import ChatGPTConfig
 from camel.generators import SystemMessageGenerator
 from camel.memories import MemoryRecord
 from camel.messages import BaseMessage
+from camel.models import BaseModelBackend
 from camel.models import AnthropicModel, ModelFactory, OpenAIModel
 from camel.terminators import ResponseWordsTerminator
 from camel.toolkits import (
@@ -90,6 +95,18 @@
 )
 
 
+class DummyModel(BaseModelBackend):
+    @property
+    def token_counter(self):
+        return self._token_counter
+
+    def _run(self, messages, response_format=None, tools=None):
+        raise NotImplementedError
+
+    async def _arun(self, messages, response_format=None, tools=None):
+        raise NotImplementedError
+
+
 @parametrize
 def test_chat_agent(model, step_call_count=3):
     model = model
@@ -145,6 +162,55 @@ def test_chat_agent(model, step_call_count=3):
                 ), f"Error in round {i + 1}"
 
 
+def test_chat_agent_reset_clears_tool_output_history():
+    model = DummyModel(ModelType.GPT_4O_MINI)
+    assistant = ChatAgent(
+        system_message="You are a helpful assistant.",
+        model=model,
+        enable_snapshot_clean=True,
+    )
+    assistant._tool_output_history = [
+        _ToolOutputHistoryEntry(
+            tool_name="tool_a",
+            tool_call_id="call_a",
+            result_text="old",
+            record_uuids=["uuid_a"],
+            record_timestamps=[1.0],
+        )
+    ]
+
+    assistant.reset()
+
+    assert assistant._tool_output_history == []
+
+
+def test_clean_snapshot_in_memory_skips_missing_records():
+    model = DummyModel(ModelType.GPT_4O_MINI)
+    assistant = ChatAgent(
+        system_message="You are a helpful assistant.",
+        model=model,
+        enable_snapshot_clean=True,
+    )
+    entry = _ToolOutputHistoryEntry(
+        tool_name="browser_get_page_snapshot",
+        tool_call_id="call_missing",
+        result_text="- button [ref=1]",
+        record_uuids=["missing_uuid"],
+        record_timestamps=[123.0],
+    )
+
+    chat_history_block = getattr(assistant.memory, "_chat_history_block", None)
+    storage = getattr(chat_history_block, "storage", None)
+    assert storage is not None
+    records_before = storage.load()
+
+    assistant._clean_snapshot_in_memory(entry)
+
+    records_after = storage.load()
+    assert records_after == records_before
+    assert entry.cached is True
+
+
 @pytest.mark.model_backend
 def test_chat_agent_stored_messages():
     system_msg = BaseMessage(