Skip to content

Commit f3c3f0c

Browse files
committed
fix: address voice bridge review feedback
1 parent cdce270 commit f3c3f0c

7 files changed

Lines changed: 160 additions & 104 deletions

File tree

app/agent_server.py

Lines changed: 24 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@
5050
from utils.config_manager import get_config_manager
5151
from utils.tokenize import truncate_to_tokens as _tt
5252
from main_logic.agent_event_bus import AgentServerEventBridge
53+
from plugin.server.application.plugins.voice_contracts import (
54+
VOICE_TRANSCRIPT_EVENT_ID,
55+
VOICE_TRANSCRIPT_EVENT_TYPE,
56+
arbitrate_voice_transcript_results,
57+
voice_transcript_custom_event_args,
58+
voice_transcript_noop,
59+
voice_transcript_request_has_text,
60+
)
5361
try:
5462
from brain.computer_use import ComputerUseAdapter
5563
from brain.browser_use_adapter import BrowserUseAdapter
@@ -1651,83 +1659,11 @@ async def _emit_agent_status_update(lanlan_name: Optional[str] = None) -> None:
16511659
pass
16521660

16531661

1654-
VOICE_TRANSCRIPT_CUSTOM_EVENT_TYPE = "voice_transcript"
1655-
VOICE_TRANSCRIPT_CUSTOM_EVENT_ID = "handle_transcript"
16561662
VOICE_TRANSCRIPT_CUSTOM_EVENT_TIMEOUT_SECONDS = 1.0
16571663

16581664

1659-
def _voice_bridge_noop(reason: str, **extra: object) -> Dict[str, Any]:
1660-
return {
1661-
"action": "noop",
1662-
"reason": str(reason or "noop"),
1663-
**extra,
1664-
}
1665-
1666-
1667-
def _voice_transcript_request_has_text(event: Mapping[str, object] | None) -> bool:
1668-
if not isinstance(event, Mapping):
1669-
return False
1670-
return bool(str(event.get("transcript") or "").strip())
1671-
1672-
1673-
def _voice_transcript_custom_event_args(event: Mapping[str, object]) -> Dict[str, object]:
1674-
metadata = event.get("metadata")
1675-
return {
1676-
"transcript": str(event.get("transcript") or "").strip(),
1677-
"lanlan_name": str(event.get("lanlan_name") or ""),
1678-
"metadata": dict(metadata) if isinstance(metadata, Mapping) else {},
1679-
}
1680-
1681-
16821665
def _voice_bridge_action_from_dispatch_results(dispatch_results: object) -> Dict[str, Any]:
1683-
if not isinstance(dispatch_results, list) or not dispatch_results:
1684-
return _voice_bridge_noop("no_subscribers")
1685-
1686-
from plugin.server.application.plugins.voice_contracts import (
1687-
arbitrate_voice_transcript_results,
1688-
)
1689-
1690-
arbitration_items: list[dict[str, object]] = []
1691-
failure_count = 0
1692-
for item in dispatch_results:
1693-
if not isinstance(item, Mapping):
1694-
continue
1695-
if not bool(item.get("success")):
1696-
failure_count += 1
1697-
continue
1698-
result = item.get("result")
1699-
if not isinstance(result, Mapping):
1700-
continue
1701-
action = str(result.get("action") or "").strip()
1702-
if not action:
1703-
continue
1704-
payload: Dict[str, Any] = dict(result)
1705-
payload["action"] = action
1706-
plugin_id = str(item.get("plugin_id") or "").strip()
1707-
if plugin_id:
1708-
payload.setdefault("source_plugin", plugin_id)
1709-
source_event_id = str(item.get("event_id") or "").strip()
1710-
if source_event_id:
1711-
payload.setdefault("source_event_id", source_event_id)
1712-
arbitration_items.append(
1713-
{
1714-
"plugin_id": payload.get("source_plugin") or plugin_id,
1715-
"event_id": payload.get("source_event_id") or source_event_id,
1716-
"success": True,
1717-
"result": payload,
1718-
}
1719-
)
1720-
1721-
if not arbitration_items:
1722-
return _voice_bridge_noop("no_handler_result", failures=failure_count)
1723-
payload = arbitrate_voice_transcript_results(arbitration_items)
1724-
if failure_count:
1725-
try:
1726-
existing_failures = int(payload.get("failures") or 0)
1727-
except (TypeError, ValueError):
1728-
existing_failures = 0
1729-
payload["failures"] = existing_failures + failure_count
1730-
return payload
1666+
return arbitrate_voice_transcript_results(dispatch_results)
17311667

17321668

17331669
async def _dispatch_voice_transcript_custom_event(
@@ -1736,9 +1672,9 @@ async def _dispatch_voice_transcript_custom_event(
17361672
from plugin.server.application.plugins.dispatch_service import PluginDispatchService
17371673

17381674
dispatch_results = await PluginDispatchService().trigger_custom_event_subscribers(
1739-
event_type=VOICE_TRANSCRIPT_CUSTOM_EVENT_TYPE,
1740-
event_id=VOICE_TRANSCRIPT_CUSTOM_EVENT_ID,
1741-
args=_voice_transcript_custom_event_args(event),
1675+
event_type=VOICE_TRANSCRIPT_EVENT_TYPE,
1676+
event_id=VOICE_TRANSCRIPT_EVENT_ID,
1677+
args=voice_transcript_custom_event_args(event),
17421678
timeout=VOICE_TRANSCRIPT_CUSTOM_EVENT_TIMEOUT_SECONDS,
17431679
)
17441680
return _voice_bridge_action_from_dispatch_results(dispatch_results)
@@ -1747,24 +1683,19 @@ async def _dispatch_voice_transcript_custom_event(
17471683
async def _handle_voice_transcript_request(event: Dict[str, Any]) -> None:
17481684
event_id = str((event or {}).get("event_id") or "")
17491685
lanlan_name = (event or {}).get("lanlan_name")
1750-
result: Dict[str, Any] = _voice_bridge_noop("unavailable")
1686+
result: Dict[str, Any] = voice_transcript_noop("unavailable")
17511687

17521688
try:
1753-
if not _voice_transcript_request_has_text(event):
1754-
result = _voice_bridge_noop("empty_transcript")
1689+
if not voice_transcript_request_has_text(event):
1690+
result = voice_transcript_noop("empty_transcript")
17551691
elif not Modules.analyzer_enabled:
1756-
result = _voice_bridge_noop("agent_disabled")
1692+
result = voice_transcript_noop("agent_disabled")
17571693
elif not Modules.agent_flags.get("user_plugin_enabled", False):
1758-
result = _voice_bridge_noop("user_plugin_disabled")
1694+
result = voice_transcript_noop("user_plugin_disabled")
1695+
elif not bool(Modules.plugin_lifecycle_started):
1696+
result = voice_transcript_noop("plugin_warming_up")
17591697
else:
1760-
lifecycle_ready = bool(Modules.plugin_lifecycle_started)
1761-
if not lifecycle_ready:
1762-
lifecycle_ready = await _ensure_plugin_lifecycle_started()
1763-
1764-
if not lifecycle_ready:
1765-
result = _voice_bridge_noop("plugin_lifecycle_start_failed")
1766-
else:
1767-
result = await _dispatch_voice_transcript_custom_event(event)
1698+
result = await _dispatch_voice_transcript_custom_event(event)
17681699
except asyncio.CancelledError:
17691700
raise
17701701
except Exception as exc:
@@ -1774,11 +1705,10 @@ async def _handle_voice_transcript_request(event: Dict[str, Any]) -> None:
17741705
lanlan_name,
17751706
exc,
17761707
)
1777-
result = {
1778-
"action": "noop",
1779-
"reason": "dispatch_failed",
1780-
"error_type": type(exc).__name__,
1781-
}
1708+
result = voice_transcript_noop(
1709+
"dispatch_failed",
1710+
error_type=type(exc).__name__,
1711+
)
17821712

17831713
await _emit_main_event(
17841714
"voice_bridge_result",

main_logic/agent_event_bus.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ def _zmq_addr(env_key: str, default_port: int) -> str:
4747
_ack_waiters_lock = threading.Lock()
4848
_voice_bridge_waiters: dict[str, asyncio.Future] = {}
4949
_voice_bridge_request_seen_waiters: dict[str, asyncio.Future] = {}
50+
# Voice bridge waiter state is protected by _voice_bridge_waiters_lock.
51+
# queued means a result has been enqueued on the main loop but resolution has
52+
# not started; resolving means notify_voice_bridge_result is actively resolving
53+
# it. Normal flow is queued -> resolving -> removed.
5054
_voice_bridge_waiters_queued: set[str] = set()
5155
_voice_bridge_waiters_resolving: set[str] = set()
5256
_voice_bridge_waiters_lock = threading.Lock()

main_logic/core.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2009,16 +2009,21 @@ def _session_changed() -> bool:
20092009
action = str(result.get("action") or "").strip()
20102010
if action == "cancel_response":
20112011
cancel_response = getattr(session_snapshot, "cancel_response", None)
2012-
if callable(cancel_response):
2013-
try:
2014-
if _session_changed():
2015-
return ""
2016-
await cancel_response()
2017-
logger.debug("[%s] voice bridge cancelled current response", self.lanlan_name)
2018-
except asyncio.CancelledError:
2019-
raise
2020-
except Exception as exc:
2021-
logger.debug("[%s] voice bridge cancel skipped/failed: %s", self.lanlan_name, exc)
2012+
if not callable(cancel_response):
2013+
logger.debug("[%s] voice bridge cancel skipped: session has no cancel_response", self.lanlan_name)
2014+
return ""
2015+
try:
2016+
if _session_changed():
2017+
return ""
2018+
await cancel_response()
2019+
if _session_changed():
2020+
return ""
2021+
logger.debug("[%s] voice bridge cancelled current response", self.lanlan_name)
2022+
except asyncio.CancelledError:
2023+
raise
2024+
except Exception as exc:
2025+
logger.debug("[%s] voice bridge cancel skipped/failed: %s", self.lanlan_name, exc)
2026+
return ""
20222027
return action
20232028

20242029
if action == "prime_context":

plugin/plugins/study_companion/voice_contracts.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
VOICE_TRANSCRIPT_EVENT_TYPE,
1717
arbitrate_voice_transcript_results,
1818
voice_transcript_cancel_response,
19+
voice_transcript_custom_event_args,
1920
voice_transcript_noop,
2021
voice_transcript_prime_context,
22+
voice_transcript_request_has_text,
2123
)

plugin/server/application/plugins/voice_contracts.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,25 @@ def voice_transcript_prime_context(
9090
return payload
9191

9292

93+
def voice_transcript_request_has_text(event: Mapping[str, object] | None) -> bool:
94+
"""Return whether a loosely-typed voice transcript request has usable text."""
95+
if not isinstance(event, Mapping):
96+
return False
97+
return bool(str(event.get("transcript") or "").strip())
98+
99+
100+
def voice_transcript_custom_event_args(
101+
event: Mapping[str, object],
102+
) -> dict[str, object]:
103+
"""Build canonical custom-event args for a voice transcript request."""
104+
metadata = event.get("metadata")
105+
return {
106+
"transcript": str(event.get("transcript") or "").strip(),
107+
"lanlan_name": str(event.get("lanlan_name") or ""),
108+
"metadata": dict(metadata) if isinstance(metadata, Mapping) else {},
109+
}
110+
111+
93112
# ---------------------------------------------------------------------------
94113
# Arbitration helpers
95114
# ---------------------------------------------------------------------------
@@ -217,6 +236,8 @@ def arbitrate_voice_transcript_results(dispatch_results: object) -> dict[str, An
217236
"VOICE_TRANSCRIPT_EVENT_TYPE",
218237
"arbitrate_voice_transcript_results",
219238
"voice_transcript_cancel_response",
239+
"voice_transcript_custom_event_args",
220240
"voice_transcript_noop",
221241
"voice_transcript_prime_context",
242+
"voice_transcript_request_has_text",
222243
]

plugin/tests/unit/plugins/test_study_companion_voice_contracts.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
from plugin.plugins.study_companion.voice_contracts import (
66
arbitrate_voice_transcript_results,
7+
voice_transcript_custom_event_args,
8+
voice_transcript_request_has_text,
79
)
810

911
pytestmark = pytest.mark.plugin_unit
@@ -104,3 +106,19 @@ def test_voice_transcript_arbitration_all_noop_continues_ordinary_flow() -> None
104106
"handlers": 1,
105107
"failures": 1,
106108
}
109+
110+
111+
def test_voice_transcript_request_helpers_normalize_event_args() -> None:
112+
event = {
113+
"transcript": " Yui help ",
114+
"lanlan_name": "Lan",
115+
"metadata": {"request_id": "req-1"},
116+
}
117+
118+
assert voice_transcript_request_has_text(event) is True
119+
assert voice_transcript_request_has_text({"transcript": " "}) is False
120+
assert voice_transcript_custom_event_args(event) == {
121+
"transcript": "Yui help",
122+
"lanlan_name": "Lan",
123+
"metadata": {"request_id": "req-1"},
124+
}

tests/unit/test_core_game_route_memory_contract.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,82 @@ async def fake_publish(lanlan_name, transcript, *, metadata):
389389
assert mgr.sync_message_queue.messages == []
390390

391391

392+
@pytest.mark.unit
393+
@pytest.mark.asyncio
394+
async def test_voice_transcript_bridge_cancel_without_session_handler_uses_ordinary_flow(
395+
monkeypatch,
396+
):
397+
async def fake_publish(*_args, **_kwargs):
398+
return {"action": "cancel_response", "reason": "ocr_overlap"}
399+
400+
mgr = _make_transcript_manager()
401+
mgr.session = object()
402+
monkeypatch.setattr(
403+
core_module,
404+
"publish_voice_transcript_request_reliably",
405+
fake_publish,
406+
)
407+
408+
await core_module.LLMSessionManager.handle_input_transcript(
409+
mgr,
410+
"screen echo",
411+
is_voice_source=True,
412+
)
413+
414+
assert mgr._activity_tracker.voice_rms_count == 1
415+
assert mgr._activity_tracker.user_messages == ["screen echo"]
416+
assert mgr._session_turn_count == 1
417+
mgr._publish_user_utterance_to_plugin_bus.assert_called_once_with(
418+
"screen echo",
419+
is_voice_source=True,
420+
)
421+
assert mgr.sync_message_queue.messages == [
422+
{
423+
"type": "user",
424+
"data": {"input_type": "transcript", "data": "screen echo"},
425+
}
426+
]
427+
428+
429+
@pytest.mark.unit
430+
@pytest.mark.asyncio
431+
async def test_voice_transcript_bridge_cancel_failure_uses_ordinary_flow(monkeypatch):
432+
class _FailingCancelSession:
433+
async def cancel_response(self):
434+
raise RuntimeError("cancel failed")
435+
436+
async def fake_publish(*_args, **_kwargs):
437+
return {"action": "cancel_response", "reason": "ocr_overlap"}
438+
439+
mgr = _make_transcript_manager()
440+
mgr.session = _FailingCancelSession()
441+
monkeypatch.setattr(
442+
core_module,
443+
"publish_voice_transcript_request_reliably",
444+
fake_publish,
445+
)
446+
447+
await core_module.LLMSessionManager.handle_input_transcript(
448+
mgr,
449+
"screen echo",
450+
is_voice_source=True,
451+
)
452+
453+
assert mgr._activity_tracker.voice_rms_count == 1
454+
assert mgr._activity_tracker.user_messages == ["screen echo"]
455+
assert mgr._session_turn_count == 1
456+
mgr._publish_user_utterance_to_plugin_bus.assert_called_once_with(
457+
"screen echo",
458+
is_voice_source=True,
459+
)
460+
assert mgr.sync_message_queue.messages == [
461+
{
462+
"type": "user",
463+
"data": {"input_type": "transcript", "data": "screen echo"},
464+
}
465+
]
466+
467+
392468
@pytest.mark.unit
393469
@pytest.mark.asyncio
394470
async def test_voice_transcript_bridge_prime_context_consumes_ordinary_flow(monkeypatch):

0 commit comments

Comments
 (0)