Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions PR_DESCRIPTION.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Fix: prevent orphan `tool` messages when `enable_snapshot_clean` is on

## Summary
This PR fixes a state-leak issue in `ChatAgent` that can produce invalid message history (orphan `tool` messages) across agent reuse scenarios.

## Root Cause
- `ChatAgent` keeps snapshot-clean metadata in `_tool_output_history`.
- `reset()` previously did not clear this cache.
- When an agent instance is reused (e.g., worker pool), stale entries from a previous task can still be processed.
- `_clean_snapshot_in_memory()` could rewrite a cleaned `FUNCTION/tool` record even if the original referenced records no longer existed in current memory.
- This may inject orphan `tool` messages (without preceding `assistant.tool_calls`), which can trigger strict backend validation errors (e.g., Azure/LiteLLM 400).

## Fix Approach
1. Clear snapshot-clean cache on reset:
- `ChatAgent.reset()` now clears `_tool_output_history`.
2. Add safe-guard before rewriting cleaned tool output:
- In `_clean_snapshot_in_memory()`, only rewrite when referenced record UUIDs still exist in storage.
- If no referenced records are found, skip rewrite and mark the entry as cached.

## Why this is safe
- Normal snapshot-clean flow is unchanged when records exist.
- Only stale/cross-task cache entries are blocked from writing new `tool` records.
- This prevents invalid cross-conversation contamination without affecting valid tool-call chains.

## Tests
Added unit tests in `test/agents/test_chat_agent.py`:
- `test_chat_agent_reset_clears_tool_output_history`
- `test_clean_snapshot_in_memory_skips_missing_records`

24 changes: 23 additions & 1 deletion camel/agents/chat_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,9 @@ def reset(self):
r"""Resets the :obj:`ChatAgent` to its initial state."""
self.terminated = False
self.init_messages()
# Snapshot-clean cache is per-conversation state and must not survive
# agent reuse (e.g. pooled workers across different tasks).
self._tool_output_history.clear()
for terminator in self.response_terminators:
terminator.reset()

Expand Down Expand Up @@ -1504,10 +1507,29 @@ def _clean_snapshot_in_memory(
return

existing_records = storage.load()
existing_record_uuids = {
str(record.get("uuid"))
for record in existing_records
if record.get("uuid") is not None
}
matched_uuids = [
record_uuid
for record_uuid in entry.record_uuids
if record_uuid in existing_record_uuids
]

if not matched_uuids:
# Record no longer exists in current memory (e.g. after reset).
# Skip rewriting to avoid injecting orphan tool messages.
entry.cached = True
entry.record_uuids = []
entry.record_timestamps = []
return

updated_records = [
record
for record in existing_records
if record["uuid"] not in entry.record_uuids
if str(record.get("uuid")) not in matched_uuids
]
new_record = MemoryRecord(
message=cleaned_message,
Expand Down
74 changes: 72 additions & 2 deletions test/agents/test_chat_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,21 @@
from pydantic import BaseModel, Field

from camel.agents import ChatAgent
from camel.agents.chat_agent import StreamContentAccumulator, ToolCallingRecord
from camel.agents.chat_agent import (
StreamContentAccumulator,
ToolCallingRecord,
_ToolOutputHistoryEntry,
)
from camel.configs import ChatGPTConfig
from camel.generators import SystemMessageGenerator
from camel.memories import MemoryRecord
from camel.messages import BaseMessage
from camel.models import AnthropicModel, ModelFactory, OpenAIModel
from camel.models import (
AnthropicModel,
BaseModelBackend,
ModelFactory,
OpenAIModel,
)
from camel.terminators import ResponseWordsTerminator
from camel.toolkits import (
FunctionTool,
Expand Down Expand Up @@ -90,6 +99,18 @@
)


class DummyModel(BaseModelBackend):
@property
def token_counter(self):
return self._token_counter

def _run(self, messages, response_format=None, tools=None):
raise NotImplementedError

async def _arun(self, messages, response_format=None, tools=None):
raise NotImplementedError


@parametrize
def test_chat_agent(model, step_call_count=3):
model = model
Expand Down Expand Up @@ -145,6 +166,55 @@ def test_chat_agent(model, step_call_count=3):
), f"Error in round {i + 1}"


def test_chat_agent_reset_clears_tool_output_history():
model = DummyModel(ModelType.GPT_4O_MINI)
assistant = ChatAgent(
system_message="You are a helpful assistant.",
model=model,
enable_snapshot_clean=True,
)
assistant._tool_output_history = [
_ToolOutputHistoryEntry(
tool_name="tool_a",
tool_call_id="call_a",
result_text="old",
record_uuids=["uuid_a"],
record_timestamps=[1.0],
)
]

assistant.reset()

assert assistant._tool_output_history == []


def test_clean_snapshot_in_memory_skips_missing_records():
model = DummyModel(ModelType.GPT_4O_MINI)
assistant = ChatAgent(
system_message="You are a helpful assistant.",
model=model,
enable_snapshot_clean=True,
)
entry = _ToolOutputHistoryEntry(
tool_name="browser_get_page_snapshot",
tool_call_id="call_missing",
result_text="- button [ref=1]",
record_uuids=["missing_uuid"],
record_timestamps=[123.0],
)

chat_history_block = getattr(assistant.memory, "_chat_history_block", None)
storage = getattr(chat_history_block, "storage", None)
assert storage is not None
records_before = storage.load()

assistant._clean_snapshot_in_memory(entry)

records_after = storage.load()
assert records_after == records_before
assert entry.cached is True


@pytest.mark.model_backend
def test_chat_agent_stored_messages():
system_msg = BaseMessage(
Expand Down
Loading