@@ -2919,24 +2919,34 @@ <h1><span class="brand-simp">Simpatico</span><span class="brand-hr">HR</span></h
29192919 // ══════════════════════════════════════════════════════════════
29202920 // START SESSION
29212921 // ══════════════════════════════════════════════════════════════
2922- async function startSession ( ) {
2923- // ── CRITICAL: Warm up TTS engine on user gesture ──
2924- // Chrome's autoplay policy requires the FIRST speechSynthesis.speak()
2925- // to happen during a user gesture callback. Since we do async work
2926- // (API fetch) before speaking, the gesture context expires and Chrome
2927- // silently drops all subsequent speak() calls. This empty utterance
2928- // "unlocks" the TTS engine while we still have gesture context.
2922+ // Track whether TTS has been unlocked by a user gesture
2923+ let _ttsUnlocked = false ;
2924+
2925+ // Unlock TTS with a real utterance (empty strings are silently ignored by Chrome)
2926+ function unlockTTS ( ) {
29292927 try {
2930- const warmup = new SpeechSynthesisUtterance ( '' ) ;
2931- warmup . volume = 0 ;
2932- warmup . rate = 10 ; // fastest possible
2928+ state . voice . synth . cancel ( ) ; // clear any stale queue
2929+ const warmup = new SpeechSynthesisUtterance ( '.' ) ;
2930+ warmup . volume = 0.01 ; // near-silent but non-zero (volume 0 may not init audio pipeline)
2931+ warmup . rate = 10 ; // fastest possible
2932+ warmup . pitch = 0.01 ;
29332933 if ( state . voice . preferredVoice ) warmup . voice = state . voice . preferredVoice ;
29342934 warmup . lang = state . interviewLanguage || 'en-IN' ;
2935+ warmup . onend = ( ) => { _ttsUnlocked = true ; console . log ( '[TTS] ✓ Engine unlocked via warmup' ) ; } ;
2936+ warmup . onerror = ( ) => { console . warn ( '[TTS] Warmup utterance error — will retry' ) ; } ;
29352937 state . voice . synth . speak ( warmup ) ;
2936- console . log ( '[TTS] Engine warmed up on user gesture ' ) ;
2938+ console . log ( '[TTS] Warmup utterance dispatched (text=".", vol=0.01) ' ) ;
29372939 } catch ( e ) {
29382940 console . warn ( '[TTS] Warmup failed:' , e ) ;
29392941 }
2942+ }
2943+
2944+ async function startSession ( ) {
2945+ // ── CRITICAL: Warm up TTS engine on user gesture ──
2946+ // Chrome's autoplay policy requires the FIRST speechSynthesis.speak()
2947+ // to happen during a user gesture callback. Must use NON-EMPTY text
2948+ // (empty strings are silently ignored) with non-zero volume.
2949+ unlockTTS ( ) ;
29402950
29412951 // Request screen share
29422952 if ( ! state . media . screen ) {
@@ -2963,6 +2973,12 @@ <h1><span class="brand-simp">Simpatico</span><span class="brand-hr">HR</span></h
29632973 }
29642974 }
29652975
2976+ // Re-unlock TTS after getDisplayMedia (the system dialog breaks gesture context)
2977+ if ( ! _ttsUnlocked ) {
2978+ console . log ( '[TTS] Re-unlocking after screen share dialog...' ) ;
2979+ unlockTTS ( ) ;
2980+ }
2981+
29662982 document . getElementById ( 'setupOverlay' ) . style . display = 'none' ;
29672983 document . getElementById ( 'interviewApp' ) . style . display = 'flex' ;
29682984 document . getElementById ( 'selfVid' ) . srcObject = state . media . stream ;
@@ -2987,8 +3003,13 @@ <h1><span class="brand-simp">Simpatico</span><span class="brand-hr">HR</span></h
29873003 setOrbState ( 'thinking' ) ;
29883004 const introQuestion = await engine . generateNextQuestion ( null , null ) ;
29893005
2990- // Small delay to let warmup utterance finish naturally
2991- await sleep ( 200 ) ;
3006+ // Wait for TTS warmup to actually complete (onend sets _ttsUnlocked)
3007+ // The '.' utterance at rate=10 finishes in ~50ms, but we wait up to 2s as safety
3008+ const ttsWaitStart = Date . now ( ) ;
3009+ while ( ! _ttsUnlocked && ( Date . now ( ) - ttsWaitStart ) < 2000 ) {
3010+ await sleep ( 50 ) ;
3011+ }
3012+ console . log ( '[TTS] Warmup wait done, unlocked=' , _ttsUnlocked , 'waited=' , Date . now ( ) - ttsWaitStart , 'ms' ) ;
29923013
29933014 await aiSpeak ( introQuestion ) ;
29943015 }
@@ -3034,11 +3055,11 @@ <h1><span class="brand-simp">Simpatico</span><span class="brand-hr">HR</span></h
30343055 const listenVizEl = document . getElementById ( 'listenViz' ) ;
30353056 if ( listenVizEl ) listenVizEl . classList . remove ( 'active' ) ;
30363057
3037- // Only cancel if something is actually playing (avoids Chrome bug
3038- // where cancel() right before speak() kills the engine)
3039- if ( state . voice . synth . speaking || state . voice . synth . pending ) {
3040- state . voice . synth . cancel ( ) ;
3041- }
3058+ // ── DO NOT call synth.cancel() here ──
3059+ // Chrome bug: cancel() right before speak() kills the internal audio
3060+ // pipeline, causing all subsequent speak() calls to produce no sound.
3061+ // The queue is already clean because we cancel in startSession after
3062+ // warmup, and each chunk completes naturally via onend/onerror.
30423063
30433064 // If no voices are available, try reloading them
30443065 if ( ! state . voice . preferredVoice ) {
@@ -3148,19 +3169,54 @@ <h1><span class="brand-simp">Simpatico</span><span class="brand-hr">HR</span></h
31483169 } ;
31493170
31503171 try {
3172+ u . onstart = ( ) => {
3173+ console . log ( '[TTS] ▶ Chunk' , idx , 'started playing, text:' , chunkText . substring ( 0 , 40 ) + '...' ) ;
3174+ } ;
31513175 state . voice . synth . speak ( u ) ;
31523176 startTTSKeepAlive ( ) ;
31533177
3154- // Extra safety: if synth is not speaking after 500ms, it failed silently
3178+ // Extra safety: if synth is not speaking after 800ms, try re-speaking
31553179 setTimeout ( ( ) => {
31563180 if ( ! resolved && ! state . voice . synth . speaking && ! state . voice . synth . pending ) {
3157- console . warn ( '[TTS] Speech did not start, skipping chunk' , idx ) ;
3158- resolved = true ;
3159- clearTimeout ( watchdog ) ;
3160- idx ++ ;
3161- speakNext ( ) ;
3181+ console . warn ( '[TTS] Speech did not start after 800ms, chunk' , idx ,
3182+ 'synth.paused=' , state . voice . synth . paused ,
3183+ 'voices=' , state . voice . synth . getVoices ( ) . length ,
3184+ 'voice=' , state . voice . preferredVoice ?. name || 'NONE' ) ;
3185+ // Try one more time — sometimes Chrome needs a retry
3186+ try {
3187+ const retry = new SpeechSynthesisUtterance ( chunkText ) ;
3188+ retry . lang = u . lang ;
3189+ if ( state . voice . preferredVoice ) retry . voice = state . voice . preferredVoice ;
3190+ retry . rate = u . rate ;
3191+ retry . pitch = u . pitch ;
3192+ retry . volume = u . volume ;
3193+ retry . onend = ( ) => {
3194+ if ( ! resolved && state . voice . isSpeaking ) {
3195+ resolved = true ;
3196+ clearTimeout ( watchdog ) ;
3197+ idx ++ ;
3198+ speakNext ( ) ;
3199+ }
3200+ } ;
3201+ retry . onerror = ( ) => {
3202+ if ( ! resolved && state . voice . isSpeaking ) {
3203+ resolved = true ;
3204+ clearTimeout ( watchdog ) ;
3205+ idx ++ ;
3206+ speakNext ( ) ;
3207+ }
3208+ } ;
3209+ state . voice . synth . speak ( retry ) ;
3210+ console . log ( '[TTS] Retry speak for chunk' , idx ) ;
3211+ } catch ( retryErr ) {
3212+ console . warn ( '[TTS] Retry also failed:' , retryErr ) ;
3213+ resolved = true ;
3214+ clearTimeout ( watchdog ) ;
3215+ idx ++ ;
3216+ speakNext ( ) ;
3217+ }
31623218 }
3163- } , 500 ) ;
3219+ } , 800 ) ;
31643220 } catch ( ttsErr ) {
31653221 console . error ( '[TTS] speak() threw:' , ttsErr ) ;
31663222 resolved = true ;
0 commit comments