Skip to content

Commit f4c3d0b

Browse files
committed
fix(ui): preserve playback after audio unlock
1 parent e297fca commit f4c3d0b

4 files changed

Lines changed: 159 additions & 12 deletions

File tree

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
// @vitest-environment jsdom
2+
3+
import { act, cleanup, renderHook } from "@testing-library/react";
4+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
5+
import { useVoiceChat } from "../../hooks/useVoiceChat";
6+
import type { VoiceChatState } from "../../voice/voice-chat-types";
7+
import { useChatVoiceController } from "./chat-view-hooks";
8+
9+
vi.mock("../../api/client", () => ({
10+
client: {
11+
getConfig: vi.fn(async () => ({})),
12+
updateConfig: vi.fn(async () => ({})),
13+
},
14+
}));
15+
16+
vi.mock("../../hooks/useContinuousChat", () => ({
17+
DEFAULT_VOICE_CONTINUOUS_MODE: "off",
18+
useContinuousChat: vi.fn(() => ({
19+
enabled: false,
20+
setEnabled: vi.fn(),
21+
mode: "off",
22+
setMode: vi.fn(),
23+
})),
24+
}));
25+
26+
vi.mock("../../hooks/useDefaultProviderPresets", () => ({
27+
useDefaultProviderPresets: vi.fn(() => ({
28+
defaults: { asr: "local-inference", tts: "local-inference" },
29+
})),
30+
}));
31+
32+
vi.mock("../../hooks/useDocumentVisibility", () => ({
33+
useDocumentVisibility: vi.fn(() => "visible"),
34+
}));
35+
36+
vi.mock("../../hooks/useTimeout", () => ({
37+
useTimeout: vi.fn(() => ({ setTimeout: globalThis.setTimeout })),
38+
}));
39+
40+
vi.mock("../../hooks/useVoiceChat", () => ({
41+
useVoiceChat: vi.fn(),
42+
}));
43+
44+
vi.mock("../companion/injected", () => ({
45+
useCompanionSceneStatus: vi.fn(() => ({ avatarReady: true })),
46+
}));
47+
48+
const useVoiceChatMock = vi.mocked(useVoiceChat);
49+
50+
function makeVoiceState(
51+
overrides: Partial<VoiceChatState> = {},
52+
): VoiceChatState {
53+
return {
54+
assistantTtsQuality: "enhanced",
55+
captureMode: "idle",
56+
interimTranscript: "",
57+
isListening: false,
58+
isSpeaking: false,
59+
mouthOpen: 0,
60+
queueAssistantSpeech: vi.fn(),
61+
speak: vi.fn(),
62+
startListening: vi.fn(async () => {}),
63+
stopListening: vi.fn(async () => {}),
64+
stopSpeaking: vi.fn(),
65+
supported: true,
66+
toggleListening: vi.fn(),
67+
usingAudioAnalysis: false,
68+
voiceUnlockedGeneration: 0,
69+
...overrides,
70+
};
71+
}
72+
73+
const baseOptions = {
74+
activeConversationId: "conversation-1",
75+
agentVoiceMuted: false,
76+
chatFirstTokenReceived: false,
77+
chatInput: "",
78+
chatSending: false,
79+
conversationMessages: [],
80+
elizaCloudConnected: false,
81+
elizaCloudHasPersistedKey: false,
82+
elizaCloudVoiceProxyAvailable: false,
83+
handleChatEdit: vi.fn(async () => true),
84+
handleChatSend: vi.fn(async () => {}),
85+
isComposerLocked: false,
86+
isGameModal: false,
87+
setState: vi.fn(),
88+
uiLanguage: "en",
89+
};
90+
91+
describe("useChatVoiceController voice playback unlock", () => {
92+
let voiceState: VoiceChatState;
93+
94+
beforeEach(() => {
95+
voiceState = makeVoiceState();
96+
useVoiceChatMock.mockImplementation(() => voiceState);
97+
});
98+
99+
afterEach(() => {
100+
cleanup();
101+
vi.clearAllMocks();
102+
});
103+
104+
it("does not cancel speech queued by the same user gesture that unlocks audio", () => {
105+
const { rerender } = renderHook(() => useChatVoiceController(baseOptions));
106+
const stopSpeaking = vi.mocked(voiceState.stopSpeaking);
107+
108+
voiceState = makeVoiceState({
109+
stopSpeaking,
110+
voiceUnlockedGeneration: 1,
111+
});
112+
113+
act(() => {
114+
rerender();
115+
});
116+
117+
expect(stopSpeaking).not.toHaveBeenCalled();
118+
});
119+
120+
it("passes message telemetry through manual Play message speech", () => {
121+
const { result } = renderHook(() => useChatVoiceController(baseOptions));
122+
123+
act(() => {
124+
result.current.handleSpeakMessage("message-1", "hello from Milady");
125+
});
126+
127+
expect(voiceState.speak).toHaveBeenCalledWith("hello from Milady", {
128+
telemetry: { messageId: "message-1" },
129+
});
130+
});
131+
});

packages/ui/src/components/pages/chat-view-hooks.tsx

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,13 @@ export function useChatVoiceController(options: {
414414

415415
const handleVoicePlaybackStart = useCallback(
416416
(event: VoicePlaybackStartEvent) => {
417+
if (event.messageId) {
418+
rememberCompanionSpeech(
419+
activeConversationId,
420+
event.messageId,
421+
event.text,
422+
);
423+
}
417424
ttsDebug("chat:playback-start", {
418425
provider: event.provider,
419426
segment: event.segment,
@@ -465,7 +472,7 @@ export function useChatVoiceController(options: {
465472
: (prev?.assistantStreamToVoiceStartMs ?? null),
466473
}));
467474
},
468-
[],
475+
[activeConversationId],
469476
);
470477

471478
const cloudVoiceAvailable = useMemo(() => {
@@ -511,8 +518,9 @@ export function useChatVoiceController(options: {
511518
voiceUnlockedGeneration,
512519
} = voice;
513520

514-
// After the user gesture unlocks audio, clear progressive TTS dedupe state so
515-
// auto-speak can queue the greeting again (ElevenLabs was likely skipped once).
521+
// After the user gesture unlocks audio, clear only the progressive TTS dedupe
522+
// state so auto-speak can retry. Do not stop speaking here: this effect runs
523+
// from the same click that may have just queued Play Greeting / Play Message.
516524
const prevVoiceUnlockGenRef = useRef<number | null>(null);
517525
useLayoutEffect(() => {
518526
if (prevVoiceUnlockGenRef.current === null) {
@@ -521,8 +529,8 @@ export function useChatVoiceController(options: {
521529
}
522530
if (prevVoiceUnlockGenRef.current === voiceUnlockedGeneration) return;
523531
prevVoiceUnlockGenRef.current = voiceUnlockedGeneration;
524-
stopSpeaking();
525-
}, [voiceUnlockedGeneration, stopSpeaking]);
532+
companionBootstrapAutoSpeakRef.current = null;
533+
}, [voiceUnlockedGeneration]);
526534

527535
const beginVoiceCapture = useCallback(
528536
(mode: Exclude<VoiceCaptureMode, "idle"> = "compose") => {
@@ -557,10 +565,9 @@ export function useChatVoiceController(options: {
557565
(messageId: string, text: string) => {
558566
if (!text.trim()) return;
559567
suppressedAssistantSpeechRef.current = { messageId, text };
560-
rememberCompanionSpeech(activeConversationId, messageId, text);
561-
speak(text);
568+
speak(text, { telemetry: { messageId } });
562569
},
563-
[activeConversationId, speak],
570+
[speak],
564571
);
565572

566573
const handleEditMessage = useCallback(
@@ -743,7 +750,6 @@ export function useChatVoiceController(options: {
743750
replace: replacePlayback,
744751
telemetry,
745752
});
746-
rememberCompanionSpeech(activeConversationId, messageId, text);
747753
suppressedAssistantSpeechRef.current = null;
748754
companionBootstrapAutoSpeakRef.current = {
749755
tick,

packages/ui/src/hooks/useVoiceChat.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1831,7 +1831,13 @@ export function useVoiceChat(options: VoiceChatOptions): VoiceChatState {
18311831
// ── Public speak APIs ─────────────────────────────────────────────
18321832

18331833
const speak = useCallback(
1834-
(text: string, speakOptions?: { append?: boolean }) => {
1834+
(
1835+
text: string,
1836+
speakOptions?: {
1837+
append?: boolean;
1838+
telemetry?: VoiceAssistantSpeechTelemetry;
1839+
},
1840+
) => {
18351841
if (assistantTtsDebounceRef.current != null) {
18361842
clearTimeout(assistantTtsDebounceRef.current);
18371843
assistantTtsDebounceRef.current = null;
@@ -1841,6 +1847,7 @@ export function useVoiceChat(options: VoiceChatOptions): VoiceChatState {
18411847
text,
18421848
append: Boolean(speakOptions?.append),
18431849
segment: "full",
1850+
telemetry: speakOptions?.telemetry,
18441851
});
18451852
},
18461853
[enqueueSpeech],
@@ -1923,7 +1930,7 @@ export function useVoiceChat(options: VoiceChatOptions): VoiceChatState {
19231930
latestSpeakable: "",
19241931
finalQueued: false,
19251932
replacePlaybackOnFirstClip: queueOptions?.replace !== false,
1926-
telemetry: queueOptions?.telemetry,
1933+
telemetry: { messageId, ...queueOptions?.telemetry },
19271934
};
19281935
} else if (queueOptions?.telemetry) {
19291936
current.telemetry = {

packages/ui/src/voice/voice-chat-types.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,10 @@ export interface VoiceChatState {
238238
/** End voice capture and optionally submit the transcript */
239239
stopListening: (options?: { submit?: boolean }) => Promise<void>;
240240
/** Speak text aloud with mouth animation */
241-
speak: (text: string, options?: { append?: boolean }) => void;
241+
speak: (
242+
text: string,
243+
options?: { append?: boolean; telemetry?: VoiceAssistantSpeechTelemetry },
244+
) => void;
242245
/** Progressively speak an assistant message while it streams */
243246
queueAssistantSpeech: (
244247
messageId: string,

0 commit comments

Comments
 (0)