Skip to content

Commit ae90cf6

Browse files
Merge pull request nesquena#4106 from nesquena/stage-4016b
Release MY (v0.51.386): voice mode survives dropped speechSynthesis onend (nesquena#3983)
2 parents 1ae5679 + 8b5c8e3 commit ae90cf6

3 files changed

Lines changed: 146 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33

44
## [Unreleased]
55

6+
## [v0.51.386] — 2026-06-13 — Release MY (voice mode survives a dropped speechSynthesis onend, #3983)
7+
8+
### Fixed
9+
10+
- **Hands-free voice mode no longer dead-ends after the first browser-TTS reply (#3983).** Chromium intermittently drops the `speechSynthesis` utterance's `onend` event, which left voice mode stuck "speaking" and never re-armed listening. A watchdog now forces a return to listening if `onend` never fires, with the recovery handles cleared on normal completion and on deactivation. The fix is scoped to the browser `speechSynthesis` path — the Edge `Audio` branch (which has a reliable `onended`) is untouched. (#3983)
11+
612
## [v0.51.385] — 2026-06-13 — Release MX (profile-cookie env var aligned to HERMES_WEBUI_ prefix, #803)
713

814
### Changed

static/boot.js

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -853,8 +853,48 @@ window._micPendingSend=window._micPendingSend||false;
853853
// a different session's last assistant reply if the user navigated away
854854
// between send and stream completion. (Opus pre-release advisor.)
855855
let _voiceModeThinkingSid=null;
856+
let _browserTtsKeepAlive=null;
857+
let _browserTtsWatchdog=null;
858+
let _browserTtsSuppressNextErrorRearm=false;
856859
const SILENCE_MS=1800; // auto-send after 1.8s silence
857860

861+
function _clearBrowserTtsRecovery(){
862+
if(_browserTtsKeepAlive){
863+
clearInterval(_browserTtsKeepAlive);
864+
_browserTtsKeepAlive=null;
865+
}
866+
if(_browserTtsWatchdog){
867+
clearTimeout(_browserTtsWatchdog);
868+
_browserTtsWatchdog=null;
869+
}
870+
}
871+
872+
function _armBrowserTtsRecovery(clean, rate){
873+
_clearBrowserTtsRecovery();
874+
_browserTtsSuppressNextErrorRearm=false;
875+
const safeRate=(Number.isFinite(rate)&&rate>0)?rate:1;
876+
// Chromium can drop utter.onend on later turns, so force a recovery path.
877+
const watchdogMs=Math.max(4000,Math.round((String(clean||'').length/(12*safeRate))*1000)+10000);
878+
_browserTtsWatchdog=setTimeout(()=>{
879+
if(!_voiceModeActive||_voiceModeState!=='speaking') return;
880+
_browserTtsSuppressNextErrorRearm=true;
881+
try{ speechSynthesis.cancel(); }catch(_){}
882+
_clearBrowserTtsRecovery();
883+
_startListening();
884+
},watchdogMs);
885+
_browserTtsKeepAlive=setInterval(()=>{
886+
if(!_voiceModeActive||_voiceModeState!=='speaking'){
887+
_clearBrowserTtsRecovery();
888+
return;
889+
}
890+
if(!speechSynthesis.speaking) return;
891+
try{
892+
speechSynthesis.pause();
893+
speechSynthesis.resume();
894+
}catch(_){}
895+
},10000);
896+
}
897+
858898
function _setState(state){
859899
_voiceModeState=state;
860900
indicator.className='voice-mode-indicator '+state;
@@ -867,6 +907,7 @@ window._micPendingSend=window._micPendingSend||false;
867907

868908
function _startListening(){
869909
if(!_voiceModeActive) return;
910+
_clearBrowserTtsRecovery();
870911
_setState('listening');
871912

872913
_recognition=new SpeechRecognition();
@@ -1057,14 +1098,27 @@ window._micPendingSend=window._micPendingSend||false;
10571098
if(!isNaN(savedPitch)) utter.pitch=Math.min(2,Math.max(0,savedPitch));
10581099

10591100
utter.onend=()=>{
1101+
_browserTtsSuppressNextErrorRearm=false;
1102+
_clearBrowserTtsRecovery();
10601103
// After speaking, go back to listening
1061-
if(_voiceModeActive) setTimeout(()=>_startListening(),500);
1104+
if(_voiceModeActive&&_voiceModeState==='speaking') setTimeout(()=>_startListening(),500);
10621105
};
10631106
utter.onerror=()=>{
1107+
_clearBrowserTtsRecovery();
1108+
if(_browserTtsSuppressNextErrorRearm){
1109+
_browserTtsSuppressNextErrorRearm=false;
1110+
return;
1111+
}
10641112
if(_voiceModeActive) setTimeout(()=>_startListening(),1000);
10651113
};
10661114

1067-
speechSynthesis.speak(utter);
1115+
_armBrowserTtsRecovery(clean, utter.rate);
1116+
try{
1117+
speechSynthesis.speak(utter);
1118+
}catch(_){
1119+
_clearBrowserTtsRecovery();
1120+
if(_voiceModeActive) setTimeout(()=>_startListening(),1000);
1121+
}
10681122
}
10691123

10701124
// Hook into response completion — observe when the agent finishes
@@ -1121,10 +1175,12 @@ window._micPendingSend=window._micPendingSend||false;
11211175
_voiceModeActive=false;
11221176
_voiceModeState='idle';
11231177
_voiceModeThinkingSid=null;
1178+
_browserTtsSuppressNextErrorRearm=false;
11241179
modeBtn.classList.remove('active');
11251180
_setButtonTooltip(modeBtn, t('voice_mode_toggle'));
11261181
bar.style.display='none';
11271182
clearTimeout(_silenceTimer);
1183+
_clearBrowserTtsRecovery();
11281184
try{ if(_recognition) _recognition.abort(); }catch(_){}
11291185
_recognition=null;
11301186
if(typeof stopTTS==='function') stopTTS();
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
from pathlib import Path
2+
import re
3+
4+
5+
REPO = Path(__file__).resolve().parents[1]
6+
7+
8+
def _extract_function(src: str, name: str) -> str:
9+
anchor = f"function {name}("
10+
start = src.find(anchor)
11+
assert start != -1, f"{name}() must exist"
12+
body_start = src.find("{", start)
13+
assert body_start != -1, f"{name}() must have a body"
14+
depth = 1
15+
idx = body_start + 1
16+
while depth and idx < len(src):
17+
if src[idx] == "{":
18+
depth += 1
19+
elif src[idx] == "}":
20+
depth -= 1
21+
idx += 1
22+
assert depth == 0, f"{name}() body must balance braces"
23+
return src[start:idx]
24+
25+
26+
def test_boot_js_declares_browser_tts_recovery_helpers():
27+
src = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
28+
assert "let _browserTtsKeepAlive=null;" in src
29+
assert "let _browserTtsWatchdog=null;" in src
30+
assert "let _browserTtsSuppressNextErrorRearm=false;" in src
31+
assert "function _clearBrowserTtsRecovery()" in src
32+
assert "function _armBrowserTtsRecovery(clean, rate)" in src
33+
34+
35+
def test_browser_tts_watchdog_rearms_listening_if_onend_drops():
36+
src = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
37+
arm_body = _extract_function(src, "_armBrowserTtsRecovery")
38+
assert "_browserTtsWatchdog=setTimeout" in arm_body
39+
assert "_voiceModeState!=='speaking'" in arm_body
40+
assert "_browserTtsSuppressNextErrorRearm=true;" in arm_body
41+
assert "speechSynthesis.cancel()" in arm_body
42+
assert "_startListening();" in arm_body
43+
assert "_browserTtsKeepAlive=setInterval" in arm_body
44+
assert "speechSynthesis.pause();" in arm_body
45+
assert "speechSynthesis.resume();" in arm_body
46+
47+
48+
def test_browser_tts_callbacks_and_deactivate_clear_recovery_handles():
49+
src = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
50+
speak_body = _extract_function(src, "_speakResponse")
51+
assert "const utter=new SpeechSynthesisUtterance(clean);" in speak_body
52+
assert "utter.onend=()=>{" in speak_body
53+
assert "utter.onerror=()=>{" in speak_body
54+
assert speak_body.count("_clearBrowserTtsRecovery();") >= 2, (
55+
"Both browser TTS completion callbacks must clear watchdog/keep-alive handles."
56+
)
57+
assert "_browserTtsSuppressNextErrorRearm=false;" in speak_body
58+
assert "_voiceModeActive&&_voiceModeState==='speaking'" in speak_body
59+
assert "if(_browserTtsSuppressNextErrorRearm){" in speak_body
60+
assert "_armBrowserTtsRecovery(clean, utter.rate);" in speak_body
61+
62+
deactivate_body = _extract_function(src, "_deactivate")
63+
assert "_clearBrowserTtsRecovery();" in deactivate_body, (
64+
"_deactivate() must clear browser TTS watchdog/keep-alive handles."
65+
)
66+
assert "_browserTtsSuppressNextErrorRearm=false;" in deactivate_body
67+
68+
69+
def test_edge_audio_branch_stays_separate():
70+
src = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
71+
edge_match = re.search(
72+
r'if\(engine==="edge"\)\{(.*?)\n\s+return;\n\s+\}',
73+
src,
74+
re.DOTALL,
75+
)
76+
assert edge_match, "Edge audio branch must exist"
77+
edge_body = edge_match.group(1)
78+
assert "const audio = new Audio(url);" in edge_body
79+
assert "audio.onended = () => {" in edge_body
80+
assert "_armBrowserTtsRecovery" not in edge_body, (
81+
"The browser speechSynthesis workaround must not be injected into the Edge audio branch."
82+
)

0 commit comments

Comments
 (0)