Skip to content

Commit 902175a

Browse files
committed
fix(#2914): prevent state.db from replaying edited/undone messages to agent context
1 parent cf003ae commit 902175a

4 files changed

Lines changed: 424 additions & 10 deletions

File tree

api/models.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -605,16 +605,20 @@ def path(self):
605605
return SESSION_DIR / f'{self.session_id}.json'
606606

607607
def _maybe_clear_truncation_watermark(self) -> None:
608-
watermark = _message_timestamp_as_float({"timestamp": self.truncation_watermark})
609-
if watermark is None:
610-
return
611-
max_message_timestamp = None
612-
for msg in self.messages or []:
613-
timestamp = _message_timestamp_as_float(msg)
614-
if timestamp is not None:
615-
max_message_timestamp = timestamp if max_message_timestamp is None else max(max_message_timestamp, timestamp)
616-
if max_message_timestamp is not None and max_message_timestamp > watermark:
617-
self.truncation_watermark = None
608+
# Do NOT auto-clear truncation_watermark here. Once the user has
609+
# truncated this session (Edit / Regenerate / undo / retry), the
610+
# watermark must stay active to prevent state.db from replaying the
611+
# rows the user deliberately removed. Auto-clearing on every save()
612+
# after the agent appends a newer message defeated the watermark
613+
# entirely — the next reload would merge the full state.db tail
614+
# back in (#2914).
615+
#
616+
# The watermark is cleared explicitly when the sidecar itself grows
617+
# beyond it (new messages appended with timestamp > watermark), which
618+
# means the user has continued the conversation past the truncation
619+
# point. That check is done in _append_message / save paths that
620+
# know a genuine new turn was added, not in this blanket save() hook.
621+
pass
618622

619623
def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None:
620624
if not is_safe_session_id(self.session_id):
@@ -3798,6 +3802,20 @@ def merge_session_messages_append_only(
37983802
and key not in seen_message_keys
37993803
):
38003804
continue
3805+
# When a truncation watermark is active, state.db may contain original
3806+
# messages that were replaced by Edit (old content with old timestamp).
3807+
# The timestamp-based filter above catches messages AFTER the watermark,
3808+
# but messages BEFORE it (like the original pre-edit content) slip through.
3809+
# If a state.db message's content is not present in the sidecar and its
3810+
# timestamp is before the watermark, it's a replaced/stale row — skip it.
3811+
if (
3812+
watermark_timestamp is not None
3813+
and timestamp is not None
3814+
and timestamp < watermark_timestamp
3815+
and key not in seen_message_keys
3816+
and _session_message_content_key(msg) not in seen_content_keys
3817+
):
3818+
continue
38013819
if max_sidecar_timestamp is not None and timestamp is not None and timestamp <= max_sidecar_timestamp:
38023820
if key in seen_message_keys:
38033821
continue

api/routes.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5730,13 +5730,28 @@ def handle_post(handler, parsed) -> bool:
57305730
return bad(handler, "Session not found", 404)
57315731
keep = int(body["keep_count"])
57325732
with _get_session_agent_lock(body["session_id"]):
5733+
old_msg_count = len(s.messages or [])
5734+
old_ctx_count = len(getattr(s, 'context_messages', None) or [])
57335735
s.messages = s.messages[:keep]
5736+
# Truncate context_messages in sync with messages so the agent's
5737+
# model-facing context doesn't retain rows the user removed via
5738+
# Edit / Regenerate. Without this, context_messages still contains
5739+
# the full pre-truncation history and the agent sees "deleted"
5740+
# turns on the next turn (#2914).
5741+
if isinstance(getattr(s, 'context_messages', None), list):
5742+
s.context_messages = s.context_messages[:keep]
57345743
try:
57355744
from api.session_ops import _truncation_watermark_for
57365745
s.truncation_watermark = _truncation_watermark_for(s.messages)
57375746
except Exception:
57385747
s.truncation_watermark = 0.0
57395748
s.save()
5749+
logger.info(
5750+
"truncate %s: messages %d→%d, context_messages %d→%d, watermark=%.2f",
5751+
body["session_id"], old_msg_count, len(s.messages or []),
5752+
old_ctx_count, len(getattr(s, 'context_messages', None) or []),
5753+
s.truncation_watermark or 0,
5754+
)
57405755
return j(
57415756
handler, {"ok": True, "session": s.compact() | {"messages": s.messages}}
57425757
)

api/streaming.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from api.usage import prompt_cache_hit_percent
4646
from api.models import (
4747
_is_empty_partial_activity_message,
48+
_message_timestamp_as_float,
4849
get_state_db_session_messages,
4950
reconciled_state_db_messages_for_session,
5051
)
@@ -2618,6 +2619,32 @@ def _restore_display_reasoning_metadata(previous_messages, updated_messages):
26182619
return updated_messages
26192620

26202621

2622+
def _clamp_context_to_watermark(session, messages: list) -> list:
2623+
"""Filter context_messages to the truncation watermark boundary (#2914).
2624+
2625+
When a user edits, regenerates, or undoes messages, the agent's result
2626+
may contain the full state.db history including turns the user deliberately
2627+
removed. This helper drops messages whose timestamp exceeds the active
2628+
watermark so the next turn doesn't feed the agent "deleted" rows again.
2629+
"""
2630+
_tw = getattr(session, 'truncation_watermark', None)
2631+
if _tw is None:
2632+
return messages
2633+
_tw_ts = _message_timestamp_as_float({'timestamp': _tw})
2634+
if _tw_ts is None:
2635+
return messages
2636+
_clamped = [
2637+
m for m in messages
2638+
if (m_ts := _message_timestamp_as_float(m)) is None or m_ts <= _tw_ts
2639+
]
2640+
if len(_clamped) != len(messages):
2641+
logger.info(
2642+
"clamping context_messages: %d → %d (watermark=%.2f, session=%s)",
2643+
len(messages), len(_clamped), _tw_ts, session.session_id,
2644+
)
2645+
return _clamped
2646+
2647+
26212648
def _session_context_messages(session):
26222649
"""Return model-facing history without assuming it matches the UI transcript."""
26232650
context_messages = getattr(session, 'context_messages', None)
@@ -5148,6 +5175,7 @@ def _periodic_checkpoint():
51485175
_previous_context_messages,
51495176
_next_context_messages,
51505177
)
5178+
_next_context_messages = _clamp_context_to_watermark(s, _next_context_messages)
51515179
s.context_messages = _deduplicate_context_messages(_next_context_messages)
51525180
s.messages = _merge_display_messages_after_agent_result(
51535181
_previous_messages,
@@ -5295,6 +5323,7 @@ def _periodic_checkpoint():
52955323
_previous_context_messages,
52965324
_next_context_messages,
52975325
)
5326+
_next_context_messages = _clamp_context_to_watermark(s, _next_context_messages)
52985327
s.context_messages = _deduplicate_context_messages(_next_context_messages)
52995328
s.messages = _merge_display_messages_after_agent_result(
53005329
_previous_messages,
@@ -6172,6 +6201,7 @@ def _periodic_checkpoint():
61726201
_previous_context_messages,
61736202
_next_context_messages,
61746203
)
6204+
_next_context_messages = _clamp_context_to_watermark(s, _next_context_messages)
61756205
s.context_messages = _deduplicate_context_messages(_next_context_messages)
61766206
s.messages = _merge_display_messages_after_agent_result(
61776207
_previous_messages,

0 commit comments

Comments
 (0)