From 09f8fede70ae67d8cac4dc77de8899c283811f1d Mon Sep 17 00:00:00 2001
From: Kawano
Date: Mon, 9 Mar 2026 06:09:06 +0900
Subject: [PATCH 1/7] feat: add Text-to-Speech support via Web Speech API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add browser-native TTS integration that reads Claude's responses aloud:
- useSpeechOutput hook: Web Speech API wrapper with voice selection,
rate/pitch control, language filtering, and streaming-aware chunking
- TtsContext: React context provider for app-wide TTS state
- QuickSettingsTtsSection: Full TTS configuration panel with voice
preview, rate/pitch sliders, and language filter
- ChatComposer: Toggle button with lucide-react icons (Volume2/VolumeX/
StopCircle) and graceful degradation when no voices available
- QuickSettingsContent: TTS section with voices=0 guard
All settings persist to localStorage. Falls back to navigator.language
instead of hardcoded locale. No external dependencies required — uses
the browser's built-in speechSynthesis API.
Co-Authored-By: Claude Opus 4.6
---
src/components/chat/view/ChatInterface.tsx | 5 +-
.../chat/view/subcomponents/ChatComposer.tsx | 26 ++
.../view/QuickSettingsContent.tsx | 30 ++
.../view/QuickSettingsTtsSection.tsx | 188 +++++++++++++
src/contexts/TtsContext.tsx | 49 ++++
src/hooks/useSpeechOutput.ts | 265 ++++++++++++++++++
6 files changed, 561 insertions(+), 2 deletions(-)
create mode 100644 src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
create mode 100644 src/contexts/TtsContext.tsx
create mode 100644 src/hooks/useSpeechOutput.ts
diff --git a/src/components/chat/view/ChatInterface.tsx b/src/components/chat/view/ChatInterface.tsx
index 90c1921d7..9e1db3976 100644
--- a/src/components/chat/view/ChatInterface.tsx
+++ b/src/components/chat/view/ChatInterface.tsx
@@ -7,6 +7,7 @@ import { useChatProviderState } from '../hooks/useChatProviderState';
import { useChatSessionState } from '../hooks/useChatSessionState';
import { useChatRealtimeHandlers } from '../hooks/useChatRealtimeHandlers';
import { useChatComposerState } from '../hooks/useChatComposerState';
+import { TtsProvider, useTts } from '../../../contexts/TtsContext';
import ChatMessagesPane from './subcomponents/ChatMessagesPane';
import ChatComposer from './subcomponents/ChatComposer';
@@ -272,7 +273,7 @@ function ChatInterface({
}
return (
- <>
+
- >
+
);
}
diff --git a/src/components/chat/view/subcomponents/ChatComposer.tsx b/src/components/chat/view/subcomponents/ChatComposer.tsx
index 35bf7548b..0da6d6a36 100644
--- a/src/components/chat/view/subcomponents/ChatComposer.tsx
+++ b/src/components/chat/view/subcomponents/ChatComposer.tsx
@@ -11,7 +11,9 @@ import type {
SetStateAction,
TouchEvent,
} from 'react';
+import { Volume2, VolumeX, StopCircle } from 'lucide-react';
import MicButton from '../../../mic-button/view/MicButton';
+import { useTts } from '../../../../contexts/TtsContext';
import type { PendingPermissionRequest, PermissionMode, Provider } from '../../types/types';
import CommandMenu from './CommandMenu';
import ClaudeStatus from './ClaudeStatus';
@@ -150,6 +152,7 @@ export default function ChatComposer({
sendByCtrlEnter,
onTranscript,
}: ChatComposerProps) {
+ const tts = useTts();
const { t } = useTranslation('chat');
const textareaRect = textareaRef.current?.getBoundingClientRect();
const commandMenuPosition = {
@@ -326,6 +329,29 @@ export default function ChatComposer({
+ {tts && tts.availableVoices.length > 0 && (
+
+ )}
+
+ );
+}
type QuickSettingsContentProps = {
isDarkMode: boolean;
@@ -76,6 +104,8 @@ export default function QuickSettingsContent({
+
+
);
diff --git a/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
new file mode 100644
index 000000000..aa70dd49f
--- /dev/null
+++ b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
@@ -0,0 +1,188 @@
+import { Volume2, VolumeX, Play } from 'lucide-react';
+import type { VoiceInfo } from '../../../hooks/useSpeechOutput';
+import { SETTING_ROW_CLASS } from '../constants';
+import QuickSettingsSection from './QuickSettingsSection';
+
+type QuickSettingsTtsSectionProps = {
+ enabled: boolean;
+ onToggle: () => void;
+ rate: number;
+ onRateChange: (rate: number) => void;
+ pitch: number;
+ onPitchChange: (pitch: number) => void;
+ voiceURI: string;
+ onVoiceChange: (voiceURI: string) => void;
+ lang: string;
+ onLangChange: (lang: string) => void;
+ filteredVoices: VoiceInfo[];
+ availableLanguages: string[];
+ onTestVoice: () => void;
+ isSpeaking: boolean;
+ onStop: () => void;
+};
+
+export default function QuickSettingsTtsSection({
+ enabled,
+ onToggle,
+ rate,
+ onRateChange,
+ pitch,
+ onPitchChange,
+ voiceURI,
+ onVoiceChange,
+ lang,
+ onLangChange,
+ filteredVoices,
+ availableLanguages,
+ onTestVoice,
+ isSpeaking,
+ onStop,
+}: QuickSettingsTtsSectionProps) {
+ return (
+
+ {/* Enable/Disable toggle */}
+
+
+ {enabled ? (
+
+ ) : (
+
+ )}
+ TTS Enabled
+
+
+
+
+ {enabled && (
+ <>
+ {/* Language filter */}
+
+
+
+
+
+ {/* Voice selection */}
+
+
+
+
+
+ {/* Rate slider */}
+
+
+
+
+ {rate.toFixed(1)}x
+
+
+
onRateChange(parseFloat(e.target.value))}
+ className="w-full accent-blue-600"
+ />
+
+ 0.5x
+ 1.0x
+ 2.0x
+ 3.0x
+
+
+
+ {/* Pitch slider */}
+
+
+
+
+ {pitch.toFixed(1)}
+
+
+
onPitchChange(parseFloat(e.target.value))}
+ className="w-full accent-blue-600"
+ />
+
+ Low
+ Normal
+ High
+
+
+
+ {/* Test / Stop button */}
+
+
+
+ >
+ )}
+
+ );
+}
diff --git a/src/contexts/TtsContext.tsx b/src/contexts/TtsContext.tsx
new file mode 100644
index 000000000..aea9960a9
--- /dev/null
+++ b/src/contexts/TtsContext.tsx
@@ -0,0 +1,49 @@
+import { createContext, useContext, type ReactNode } from 'react';
+import { useSpeechOutput } from '../hooks/useSpeechOutput';
+import type { VoiceInfo } from '../hooks/useSpeechOutput';
+
+type TtsContextValue = {
+ enabled: boolean;
+ toggle: () => void;
+ rate: number;
+ setRate: (rate: number) => void;
+ pitch: number;
+ setPitch: (pitch: number) => void;
+ voiceURI: string;
+ setVoiceURI: (uri: string) => void;
+ lang: string;
+ setLang: (lang: string) => void;
+ isSpeaking: boolean;
+ speak: (text: string) => void;
+ stop: () => void;
+ testVoice: () => void;
+ availableVoices: VoiceInfo[];
+ filteredVoices: VoiceInfo[];
+ availableLanguages: string[];
+};
+
+const TtsContext = createContext(null);
+
+type ChatMessage = {
+ type: string;
+ content?: string;
+ isStreaming?: boolean;
+ isToolUse?: boolean;
+ isInteractivePrompt?: boolean;
+ [key: string]: unknown;
+};
+
+export function TtsProvider({
+ chatMessages,
+ children,
+}: {
+ chatMessages: ChatMessage[];
+ children: ReactNode;
+}) {
+ const tts = useSpeechOutput(chatMessages);
+ return {children};
+}
+
+export function useTts(): TtsContextValue | null {
+ return useContext(TtsContext);
+}
diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts
new file mode 100644
index 000000000..f402b6c35
--- /dev/null
+++ b/src/hooks/useSpeechOutput.ts
@@ -0,0 +1,265 @@
+import { useCallback, useEffect, useRef, useState } from 'react';
+
+type ChatMessage = {
+ type: string;
+ content?: string;
+ isStreaming?: boolean;
+ isToolUse?: boolean;
+ isInteractivePrompt?: boolean;
+ [key: string]: unknown;
+};
+
+export type VoiceInfo = {
+ name: string;
+ lang: string;
+ localService: boolean;
+ voiceURI: string;
+};
+
+const STORAGE_KEY = 'tts_enabled';
+const RATE_STORAGE_KEY = 'tts_rate';
+const PITCH_STORAGE_KEY = 'tts_pitch';
+const VOICE_STORAGE_KEY = 'tts_voice_uri';
+const LANG_STORAGE_KEY = 'tts_lang';
+
+/**
+ * Strip markdown formatting for cleaner TTS output.
+ */
+function stripMarkdown(text: string): string {
+ return text
+ .replace(/```[\s\S]*?```/g, '')
+ .replace(/`[^`]+`/g, '')
+ .replace(/\*\*(.+?)\*\*/g, '$1')
+ .replace(/\*(.+?)\*/g, '$1')
+ .replace(/__(.+?)__/g, '$1')
+ .replace(/_(.+?)_/g, '$1')
+ .replace(/~~(.+?)~~/g, '$1')
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
+ .replace(/^#{1,6}\s+/gm, '')
+ .replace(/^[-*+]\s+/gm, '')
+ .replace(/^\d+\.\s+/gm, '')
+ .replace(/^>\s+/gm, '')
+ .replace(/\n{2,}/g, '\n')
+ .trim();
+}
+
+function readStorage(key: string, fallback: string): string {
+ try {
+ return localStorage.getItem(key) ?? fallback;
+ } catch {
+ return fallback;
+ }
+}
+
+function readStorageFloat(key: string, fallback: number): number {
+ try {
+ const v = localStorage.getItem(key);
+ return v ? parseFloat(v) : fallback;
+ } catch {
+ return fallback;
+ }
+}
+
+/**
+ * Hook that speaks finalized assistant messages using the Web Speech API.
+ *
+ * Features:
+ * - Voice selection from available system voices
+ * - Adjustable rate and pitch
+ * - Language filter for voice list
+ * - All settings persisted in localStorage
+ */
+export function useSpeechOutput(chatMessages: ChatMessage[]) {
+ const [enabled, setEnabled] = useState(() => readStorage(STORAGE_KEY, 'false') === 'true');
+ const [rate, setRate] = useState(() => readStorageFloat(RATE_STORAGE_KEY, 1.2));
+ const [pitch, setPitch] = useState(() => readStorageFloat(PITCH_STORAGE_KEY, 1.0));
+ const [voiceURI, setVoiceURI] = useState(() => readStorage(VOICE_STORAGE_KEY, ''));
+ const [lang, setLang] = useState(() => {
+ const stored = readStorage(LANG_STORAGE_KEY, '');
+ return stored || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP');
+ });
+ const [isSpeaking, setIsSpeaking] = useState(false);
+ const [availableVoices, setAvailableVoices] = useState([]);
+
+ const lastSpokenIndexRef = useRef(-1);
+ const lastStreamingContentRef = useRef(null);
+
+ // Load available voices
+ useEffect(() => {
+ if (typeof window === 'undefined' || !window.speechSynthesis) return;
+
+ const loadVoices = () => {
+ const voices = window.speechSynthesis.getVoices();
+ setAvailableVoices(
+ voices.map((v) => ({
+ name: v.name,
+ lang: v.lang,
+ localService: v.localService,
+ voiceURI: v.voiceURI,
+ })),
+ );
+ };
+
+ loadVoices();
+ window.speechSynthesis.onvoiceschanged = loadVoices;
+ return () => {
+ window.speechSynthesis.onvoiceschanged = null;
+ };
+ }, []);
+
+ // Persist settings
+ useEffect(() => {
+ try { localStorage.setItem(STORAGE_KEY, String(enabled)); } catch { /* noop */ }
+ }, [enabled]);
+ useEffect(() => {
+ try { localStorage.setItem(RATE_STORAGE_KEY, String(rate)); } catch { /* noop */ }
+ }, [rate]);
+ useEffect(() => {
+ try { localStorage.setItem(PITCH_STORAGE_KEY, String(pitch)); } catch { /* noop */ }
+ }, [pitch]);
+ useEffect(() => {
+ try { localStorage.setItem(VOICE_STORAGE_KEY, voiceURI); } catch { /* noop */ }
+ }, [voiceURI]);
+ useEffect(() => {
+ try { localStorage.setItem(LANG_STORAGE_KEY, lang); } catch { /* noop */ }
+ }, [lang]);
+
+ // Monitor speechSynthesis state
+ useEffect(() => {
+ if (typeof window === 'undefined' || !window.speechSynthesis) return;
+ const interval = setInterval(() => {
+ setIsSpeaking(window.speechSynthesis.speaking);
+ }, 200);
+ return () => clearInterval(interval);
+ }, []);
+
+ // Get voices filtered by current language
+ const filteredVoices = availableVoices.filter((v) => {
+ if (lang === '') return true;
+ const langPrefix = lang.split('-')[0];
+ return v.lang.startsWith(langPrefix);
+ });
+
+ // Get unique language list from all voices
+ const availableLanguages = Array.from(
+ new Set(availableVoices.map((v) => v.lang)),
+ ).sort();
+
+ const speak = useCallback(
+ (text: string) => {
+ if (!text || typeof window === 'undefined' || !window.speechSynthesis) return;
+
+ const cleaned = stripMarkdown(text);
+ if (!cleaned) return;
+
+ window.speechSynthesis.cancel();
+
+ const utterance = new SpeechSynthesisUtterance(cleaned);
+ utterance.lang = lang || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP');
+ utterance.rate = rate;
+ utterance.pitch = pitch;
+
+ // Find selected voice, or fall back to first matching voice
+ const voices = window.speechSynthesis.getVoices();
+ if (voiceURI) {
+ const selected = voices.find((v) => v.voiceURI === voiceURI);
+ if (selected) utterance.voice = selected;
+ } else {
+ const fallbackLang = lang || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP');
+ const langPrefix = fallbackLang.split('-')[0];
+ const fallback = voices.find((v) => v.lang.startsWith(langPrefix));
+ if (fallback) utterance.voice = fallback;
+ }
+
+ utterance.onend = () => setIsSpeaking(false);
+ utterance.onerror = () => setIsSpeaking(false);
+
+ setIsSpeaking(true);
+ window.speechSynthesis.speak(utterance);
+ },
+ [lang, rate, pitch, voiceURI],
+ );
+
+ const stop = useCallback(() => {
+ if (typeof window !== 'undefined' && window.speechSynthesis) {
+ window.speechSynthesis.cancel();
+ setIsSpeaking(false);
+ }
+ }, []);
+
+ const toggle = useCallback(() => {
+ setEnabled((prev) => {
+ const next = !prev;
+ if (!next && typeof window !== 'undefined' && window.speechSynthesis) {
+ window.speechSynthesis.cancel();
+ setIsSpeaking(false);
+ }
+ return next;
+ });
+ }, []);
+
+ // Test current voice settings
+ const testVoice = useCallback(() => {
+ speak('テスト音声です。Hello, this is a test.');
+ }, [speak]);
+
+ // Watch for finalized assistant messages
+ useEffect(() => {
+ if (!enabled || chatMessages.length === 0) {
+ return;
+ }
+
+ const lastIndex = chatMessages.length - 1;
+ const lastMsg = chatMessages[lastIndex];
+
+ if (
+ !lastMsg ||
+ lastMsg.type !== 'assistant' ||
+ lastMsg.isToolUse ||
+ lastMsg.isInteractivePrompt ||
+ !lastMsg.content
+ ) {
+ lastStreamingContentRef.current = null;
+ return;
+ }
+
+ if (lastMsg.isStreaming) {
+ lastStreamingContentRef.current = lastMsg.content;
+ return;
+ }
+
+ if (lastIndex > lastSpokenIndexRef.current) {
+ lastSpokenIndexRef.current = lastIndex;
+ lastStreamingContentRef.current = null;
+ speak(lastMsg.content);
+ }
+ }, [chatMessages, enabled, speak]);
+
+ // Reset spoken index when messages are cleared (new session)
+ useEffect(() => {
+ if (chatMessages.length === 0) {
+ lastSpokenIndexRef.current = -1;
+ lastStreamingContentRef.current = null;
+ }
+ }, [chatMessages.length]);
+
+ return {
+ enabled,
+ toggle,
+ rate,
+ setRate,
+ pitch,
+ setPitch,
+ voiceURI,
+ setVoiceURI,
+ lang,
+ setLang,
+ isSpeaking,
+ speak,
+ stop,
+ testVoice,
+ availableVoices,
+ filteredVoices,
+ availableLanguages,
+ };
+}
From 81d32658b7dfd1fe68cca80f7472086f9e1975bb Mon Sep 17 00:00:00 2001
From: Kawano
Date: Mon, 9 Mar 2026 06:20:59 +0900
Subject: [PATCH 2/7] =?UTF-8?q?fix:=20address=20CodeRabbit=20review=20?=
=?UTF-8?q?=E2=80=94=20i18n,=20a11y,=20unmount=20cleanup?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- Add aria-label to TTS toggle button in ChatComposer (a11y)
- Move all hardcoded TTS UI strings to i18n (en/ja/ko/zh-CN)
- Add htmlFor/id pairs to all TTS form controls (a11y)
- Add aria-labelledby to TTS enabled switch (a11y)
- Cancel active speech on TtsProvider unmount (cleanup)
Co-Authored-By: Claude Opus 4.6
---
.../chat/view/subcomponents/ChatComposer.tsx | 4 +-
.../view/QuickSettingsTtsSection.tsx | 46 ++++++++++++-------
src/hooks/useSpeechOutput.ts | 9 ++++
src/i18n/locales/en/settings.json | 21 +++++++++
src/i18n/locales/ja/settings.json | 21 +++++++++
src/i18n/locales/ko/settings.json | 21 +++++++++
src/i18n/locales/zh-CN/settings.json | 21 +++++++++
7 files changed, 126 insertions(+), 17 deletions(-)
diff --git a/src/components/chat/view/subcomponents/ChatComposer.tsx b/src/components/chat/view/subcomponents/ChatComposer.tsx
index 0da6d6a36..1a3c52c2e 100644
--- a/src/components/chat/view/subcomponents/ChatComposer.tsx
+++ b/src/components/chat/view/subcomponents/ChatComposer.tsx
@@ -154,6 +154,7 @@ export default function ChatComposer({
}: ChatComposerProps) {
const tts = useTts();
const { t } = useTranslation('chat');
+ const { t: tSettings } = useTranslation('settings');
const textareaRect = textareaRef.current?.getBoundingClientRect();
const commandMenuPosition = {
top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0,
@@ -340,7 +341,8 @@ export default function ChatComposer({
: 'text-primary hover:bg-accent/60'
: 'text-muted-foreground hover:bg-accent/60'
}`}
- title={tts.isSpeaking ? 'Stop speaking' : tts.enabled ? 'TTS ON (click to disable)' : 'TTS OFF (click to enable)'}
+ aria-label={tts.isSpeaking ? tSettings('quickSettings.tts.button.stopSpeaking') : tts.enabled ? tSettings('quickSettings.tts.button.ttsOn') : tSettings('quickSettings.tts.button.ttsOff')}
+ title={tts.isSpeaking ? tSettings('quickSettings.tts.button.stopSpeaking') : tts.enabled ? tSettings('quickSettings.tts.button.ttsOn') : tSettings('quickSettings.tts.button.ttsOff')}
>
{tts.isSpeaking ? (
diff --git a/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
index aa70dd49f..3625dad77 100644
--- a/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
+++ b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
@@ -1,3 +1,4 @@
+import { useTranslation } from 'react-i18next';
import { Volume2, VolumeX, Play } from 'lucide-react';
import type { VoiceInfo } from '../../../hooks/useSpeechOutput';
import { SETTING_ROW_CLASS } from '../constants';
@@ -38,22 +39,25 @@ export default function QuickSettingsTtsSection({
isSpeaking,
onStop,
}: QuickSettingsTtsSectionProps) {
+ const { t } = useTranslation('settings');
+
return (
-
+
{/* Enable/Disable toggle */}
-
+
{enabled ? (
) : (
)}
- TTS Enabled
+ {t('quickSettings.tts.enabled')}