Skip to content

Commit fe2dee7

Browse files
fix(widget+voice): carry, run & persist chat session-state across CHAT↔VOICE
Three related correctness fixes so a widget CHAT↔VOICE flip keeps the session's reducer-built state (cart_id, checkout_id, client-pushed facts) instead of silently dropping it. Before this, voice had ZERO state-reducer / tool_arg_injection code, so a template's state config worked in chat and silently no-op'd on the actual voice call. 1. CARRY (CHAT→VOICE seed). The widget voice resume seed carried prior_history + start_node but NOT agent_session_state. Read it into the seed (widget/handlers.py) and merge into the voice runtime's template_vars (only_if_missing) so chat-built identifiers thread through voice's existing {placeholder} resolution. The voice agent now holds self.agent_state, seeded from the resume. 2. RUN (SessionStatePolicy on voice). inject_tool_args runs before each voice tool and apply_state_reducers after it, reading/writing bot.agent_state — the same pure engines chat runs in _cycle_loop — at BOTH voice tool seams: global functions (_make_global_wrapper) and MCP tools (the mcp loader's _state_wrap_mcp_handler, applied outside the approval gate). Gated by a new handles_state_externally flag: chat sets it (wrapper skips, chat does it itself — no double-application); voice doesn't. The two voice-state helpers live in session_state.py next to the pure engines so both seams share them. No-op for templates with no state config. 3. PERSIST (VOICE→CHAT drain). end_conversation writes the voice-accumulated agent_state back to the chat_session, so a later chat turn sees what voice changed — completing the round-trip. The principled completion of the agent_state seed-fix: voice now READS and UPDATES the shared state via the chat engines, for global functions AND MCP tools, with no new state machinery. Tests: tests/test_voice_session_state.py (global-function hook + gating) and tests/test_mcp_approval.py (MCP state wrap). pyrefly 0 errors; full suite 452 passed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 58d3fbc commit fe2dee7

9 files changed

Lines changed: 417 additions & 3 deletions

File tree

app/ai/voice/agents/breeze_buddy/agent/__init__.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,13 @@ def __init__(
189189
# at start_node with prior_history pre-loaded into LLM context.
190190
self._widget_resume_seed: Optional[Dict[str, Any]] = None
191191

192+
# Reducer-built session state (cart_id/checkout_id/client facts),
193+
# the voice counterpart of ChatAgent.agent_state. Seeded from a
194+
# widget-resume (below), accumulated via state_reducers during the
195+
# call by the global-function wrapper, and drained back to the
196+
# chat_session on end_conversation. Empty {} for fresh/telephony calls.
197+
self.agent_state: Dict[str, Any] = {}
198+
192199
# HITL approval channel — daily mode only, set alongside
193200
# _rtvi_processor. None on telephony bots (no approval surface) and
194201
# when RTVI is unavailable; the gate in template/approval.py treats
@@ -332,11 +339,20 @@ async def _setup_daily_transport(self, runner_args: RunnerArguments) -> None:
332339
"start_node": meta.get("start_node"),
333340
"prior_history": list(meta.get("prior_history") or []),
334341
"seed_message_count": int(meta.get("seed_message_count", 0) or 0),
342+
# Reducer-built chat state (cart_id/checkout_id/...) carried
343+
# across the CHAT->VOICE flip; merged into template_vars below.
344+
"agent_state": dict(meta.get("agent_state") or {}),
335345
}
346+
# Seed the live agent_state from the chat session so voice tool
347+
# calls inject/update it (tool_arg_injection + state_reducers via
348+
# the global-function wrapper) and the final state drains back on
349+
# end_conversation.
350+
self.agent_state = dict(self._widget_resume_seed["agent_state"])
336351
logger.info(
337352
f"Widget voice resume: chat_session={widget_session_id} "
338353
f"start_node={self._widget_resume_seed['start_node']!r} "
339-
f"prior_msgs={len(self._widget_resume_seed['prior_history'])}"
354+
f"prior_msgs={len(self._widget_resume_seed['prior_history'])} "
355+
f"agent_state_keys={sorted(self.agent_state)}"
340356
)
341357

342358
logger.info(
@@ -361,6 +377,22 @@ async def _setup_daily_transport(self, runner_args: RunnerArguments) -> None:
361377
logger.error(f"Failed to load template config for Daily mode: {e}")
362378
raise
363379

380+
# Widget resume: thread the chat session's accumulated agent_state
381+
# (cart_id/checkout_id/client-pushed facts) into template_vars so
382+
# {placeholder} resolution in the resumed voice flow uses the
383+
# chat-built identifiers instead of losing them on the flip.
384+
# only_if_missing — never clobber an explicitly-rendered call var.
385+
if self._widget_resume_seed:
386+
resumed_state = self._widget_resume_seed.get("agent_state") or {}
387+
merged_keys = [k for k in resumed_state if k not in self.template_vars]
388+
for k in merged_keys:
389+
self.template_vars[k] = resumed_state[k]
390+
if merged_keys:
391+
logger.info(
392+
f"Widget voice resume: merged {len(merged_keys)} agent_state "
393+
f"var(s) into template_vars: {sorted(merged_keys)}"
394+
)
395+
364396
# Synthesize and cache the initial greeting in Redis so it can be
365397
# played out on client-connect. Idempotent: if the dispatch worker
366398
# pre-synthesized the audio at dispatch time (outbound), this is a

app/ai/voice/agents/breeze_buddy/chat/agent.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,11 @@ def __init__(
174174
# global-function adapters receive bot_instance=self and would
175175
# otherwise double-gate.
176176
self.handles_approval_externally = True
177+
# Chat runs inject_tool_args / apply_state_reducers itself inside
178+
# _cycle_loop; this flag tells the shared global-function wrapper NOT
179+
# to re-apply them (voice has no such loop, so it lets the wrapper do
180+
# it). Prevents double-application of the SessionStatePolicy.
181+
self.handles_state_externally = True
177182
# function name -> ApprovalConfig for every gated global function.
178183
self._approval_map = build_approval_map(self.template.flow or {})
179184

app/ai/voice/agents/breeze_buddy/handlers/internal/end_conversation.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from app.core.logger.context import clear_log_context
2020
from app.database.accessor.breeze_buddy.chat_session import (
2121
drain_voice_into_chat_session,
22+
upsert_agent_session_state,
2223
)
2324

2425
callback_map = {
@@ -149,6 +150,15 @@ async def end_conversation(context: TemplateContext, args, transition_to=None):
149150
new_messages=new_messages,
150151
final_node=final_node,
151152
)
153+
# Persist voice-accumulated agent_state back to the chat
154+
# session so a later chat turn sees cart_id/etc. that voice
155+
# updated (mirror of the seed carried in on /voice/connect).
156+
voice_state = getattr(context.bot, "agent_state", None)
157+
if voice_state:
158+
await upsert_agent_session_state(
159+
chat_session_id=str(widget_session_id),
160+
data=voice_state,
161+
)
152162
except Exception as drain_err:
153163
logger.error(
154164
f"widget drain: failed for chat_session "

app/ai/voice/agents/breeze_buddy/mcp/__init__.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@
2828
# others; the latent handlers<->template cycle the note below describes is
2929
# pre-existing and unchanged by this import.)
3030
from app.ai.voice.agents.breeze_buddy.template.approval import gate_call
31+
from app.ai.voice.agents.breeze_buddy.template.session_state import (
32+
_inject_voice_state,
33+
_reduce_voice_state,
34+
)
3135

3236
# Template types FIRST — fully loads the `template` package (whose
3337
# __init__ eagerly pulls in http_handler / http_requester / hooks /
@@ -98,6 +102,33 @@ async def execute() -> Any:
98102
return gated_handler
99103

100104

105+
def _state_wrap_mcp_handler(handler: Any, bot_instance: Any, tool_name: str) -> Any:
106+
"""Apply the voice SessionStatePolicy to an MCP tool handler.
107+
108+
The MCP counterpart of ``_make_global_wrapper``'s state hook: inject
109+
state-driven args before the call and lift identifiers off the result
110+
after it, so a template's ``tool_arg_injection`` / ``state_reducers`` reach
111+
MCP tools on voice (chat already runs them in its ``_cycle_loop``). No-op
112+
without a bot, and skipped when the bot sets ``handles_state_externally``
113+
(chat). ``tool_name`` is the REGISTERED name the LLM calls — the name that
114+
injection/reducer rules target. Applied OUTSIDE the approval gate, so a
115+
denied call's bare ``{status, reason}`` simply finds no matching reducer
116+
paths (no-op), while an approved call reduces its real result.
117+
"""
118+
if bot_instance is None:
119+
return handler
120+
121+
async def state_handler(args: Dict[str, Any], flow_manager: Any) -> Any:
122+
if getattr(bot_instance, "handles_state_externally", False):
123+
return await handler(args, flow_manager)
124+
injected = _inject_voice_state(bot_instance, tool_name, args)
125+
result = await handler(injected, flow_manager)
126+
_reduce_voice_state(bot_instance, tool_name, result)
127+
return result
128+
129+
return state_handler
130+
131+
101132
def _deep_merge_defaults(
102133
args: Dict[str, Any], defaults: Dict[str, Any]
103134
) -> Dict[str, Any]:
@@ -641,6 +672,7 @@ async def _load_server_tools(
641672
tool_name,
642673
tool_approval,
643674
)
675+
handler = _state_wrap_mcp_handler(handler, bot_instance, tool_name)
644676
schema_kwargs: Dict[str, Any] = {}
645677
if tool_approval is not None:
646678
schema_kwargs["timeout_secs"] = _mcp_approval_timeout_secs(
@@ -698,6 +730,7 @@ async def _load_server_tools(
698730
tool_name,
699731
tool_approval,
700732
)
733+
handler = _state_wrap_mcp_handler(handler, bot_instance, tool_name)
701734
schema_kwargs: Dict[str, Any] = {}
702735
if tool_approval is not None:
703736
schema_kwargs["timeout_secs"] = _mcp_approval_timeout_secs(tool_approval)

app/ai/voice/agents/breeze_buddy/template/global_function.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
from app.ai.voice.agents.breeze_buddy.template.func_action_handlers import (
2323
execute_func_post_actions,
2424
)
25+
from app.ai.voice.agents.breeze_buddy.template.session_state import (
26+
_inject_voice_state,
27+
_reduce_voice_state,
28+
)
2529
from app.ai.voice.agents.breeze_buddy.template.types import (
2630
BaseGlobalFunction,
2731
GlobalBuiltinFunction,
@@ -94,17 +98,32 @@ def _make_global_wrapper(
9498
"""
9599

96100
async def wrapper_handler(llm_args, flow_manager):
101+
# SessionStatePolicy (voice): inject state-driven args before the
102+
# handler runs and lift identifiers off the result after it — the
103+
# same reducers / tool_arg_injection chat applies. Chat sets
104+
# handles_state_externally and does this in its own _cycle_loop, so
105+
# skip here to avoid double-application.
106+
manages_state = not getattr(bot_instance, "handles_state_externally", False)
107+
effective_args = (
108+
_inject_voice_state(bot_instance, func.name, llm_args)
109+
if manages_state
110+
else llm_args
111+
)
112+
97113
async def execute() -> Any:
98114
await _run_filler_and_music(bot_instance, func)
99115
try:
100116
result = await wrapped_handler(
101-
llm_args,
117+
effective_args,
102118
function_config=func,
103119
)
104120
finally:
105121
# Always stop music even if handler errors
106122
await _stop_music(bot_instance, func)
107123

124+
if manages_state:
125+
_reduce_voice_state(bot_instance, func.name, result)
126+
108127
if func.func_post_actions and bot_instance:
109128
asyncio.create_task(
110129
execute_func_post_actions(
@@ -116,7 +135,7 @@ async def execute() -> Any:
116135
)
117136
return result
118137

119-
return await gate_global_function(bot_instance, func, llm_args, execute)
138+
return await gate_global_function(bot_instance, func, effective_args, execute)
120139

121140
return wrapper_handler
122141

app/ai/voice/agents/breeze_buddy/template/session_state.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,74 @@ def _is_tool_success(tool_result: Any) -> bool:
329329
return True
330330

331331

332+
# ---------------------------------------------------------------------------
333+
# Voice SessionStatePolicy — bot-aware application of the engines above.
334+
#
335+
# Chat runs inject_tool_args / apply_state_reducers itself inside
336+
# ChatAgent._cycle_loop. Voice has no such loop, so these helpers are applied
337+
# at the voice tool seams — global functions (template/global_function.py's
338+
# _make_global_wrapper) and MCP tools (mcp/__init__.py's loader). bot_instance
339+
# is duck-typed via getattr so this stays import-light (no agent import).
340+
# ---------------------------------------------------------------------------
341+
342+
343+
def _voice_state_session_id(bot_instance: Any) -> str:
344+
"""Best-effort session id for inject_tool_args' eval context (voice path).
345+
346+
Prefers the widget chat_session id (widget-resume), falls back to the
347+
lead id / call_sid. Used only for the ``{session_id}`` eval var + the
348+
idempotency-hash discriminator — never a correctness key on voice.
349+
"""
350+
seed = getattr(bot_instance, "_widget_resume_seed", None)
351+
if isinstance(seed, dict) and seed.get("widget_session_id"):
352+
return str(seed["widget_session_id"])
353+
lead = getattr(bot_instance, "lead", None)
354+
if lead is not None and getattr(lead, "id", None):
355+
return str(lead.id)
356+
return str(getattr(bot_instance, "call_sid", "") or "")
357+
358+
359+
def _inject_voice_state(
360+
bot_instance: Any, tool_name: str, llm_args: Dict[str, Any]
361+
) -> Dict[str, Any]:
362+
"""Voice SessionStatePolicy (read): fill state-driven tool args.
363+
364+
Mirrors what ``ChatAgent._cycle_loop`` does for chat — so a template's
365+
``tool_arg_injection`` rules (e.g. thread ``cart_id`` from session state
366+
into ``update_cart``) work on voice too. Applied to both voice global
367+
functions and voice MCP tools. No-op when no injection rules are declared.
368+
"""
369+
configs = getattr(bot_instance, "configurations", None)
370+
injections = getattr(configs, "tool_arg_injection", None) if configs else None
371+
if not injections:
372+
return llm_args
373+
state = getattr(bot_instance, "agent_state", None) or {}
374+
return inject_tool_args(
375+
tool_name=tool_name,
376+
args=llm_args,
377+
state_data=state,
378+
chat_session_id=_voice_state_session_id(bot_instance),
379+
injections=injections,
380+
)
381+
382+
383+
def _reduce_voice_state(bot_instance: Any, tool_name: str, result: Any) -> None:
384+
"""Voice SessionStatePolicy (write): lift identifiers off a tool result
385+
into ``bot_instance.agent_state`` via the template's ``state_reducers``.
386+
No-op when no reducers are declared or the bot carries no agent_state.
387+
"""
388+
configs = getattr(bot_instance, "configurations", None)
389+
reducers = getattr(configs, "state_reducers", None) if configs else None
390+
if not reducers:
391+
return
392+
state = getattr(bot_instance, "agent_state", None)
393+
if state is None:
394+
return
395+
bot_instance.agent_state = apply_state_reducers(
396+
state_data=state, tool_name=tool_name, tool_result=result, reducers=reducers
397+
)
398+
399+
332400
__all__ = [
333401
"apply_state_reducers",
334402
"inject_tool_args",

app/api/routers/breeze_buddy/widget/handlers.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,16 @@ async def voice_connect_handler(
600600
"widget_config_id": cfg.id,
601601
"source": "widget",
602602
}
603+
# Carry the chat session's accumulated agent_state (reducer-built
604+
# identifiers like cart_id / checkout_id, plus client-pushed facts)
605+
# into the voice seed so they survive the CHAT->VOICE flip. Without
606+
# this, a voice continuation of a chat that already built a cart
607+
# loses the cart_id and would silently act on a fresh cart.
608+
agent_state_row = await get_agent_session_state(session_id)
609+
agent_state_data: Dict[str, Any] = (
610+
dict(agent_state_row.data) if agent_state_row else {}
611+
)
612+
603613
# ``meta_data`` carries the voice-runtime seed (read by the
604614
# agent's _setup_*_transport). The widget_session_id back-link
605615
# is what the end_conversation drain uses to find the
@@ -611,6 +621,7 @@ async def voice_connect_handler(
611621
"start_node": session.current_node,
612622
"prior_history": prior_history,
613623
"seed_message_count": seed_message_count,
624+
"agent_state": agent_state_data,
614625
}
615626
template_name = getattr(template, "name", None) or "widget"
616627

0 commit comments

Comments
 (0)