diff --git a/src/components/chat/view/ChatInterface.tsx b/src/components/chat/view/ChatInterface.tsx
index 90c1921d7..9e1db3976 100644
--- a/src/components/chat/view/ChatInterface.tsx
+++ b/src/components/chat/view/ChatInterface.tsx
@@ -7,6 +7,7 @@ import { useChatProviderState } from '../hooks/useChatProviderState';
import { useChatSessionState } from '../hooks/useChatSessionState';
import { useChatRealtimeHandlers } from '../hooks/useChatRealtimeHandlers';
import { useChatComposerState } from '../hooks/useChatComposerState';
+import { TtsProvider, useTts } from '../../../contexts/TtsContext';
import ChatMessagesPane from './subcomponents/ChatMessagesPane';
import ChatComposer from './subcomponents/ChatComposer';
@@ -272,7 +273,7 @@ function ChatInterface({
}
return (
- <>
+
- >
+
);
}
diff --git a/src/components/chat/view/subcomponents/ChatComposer.tsx b/src/components/chat/view/subcomponents/ChatComposer.tsx
index 35bf7548b..1a3c52c2e 100644
--- a/src/components/chat/view/subcomponents/ChatComposer.tsx
+++ b/src/components/chat/view/subcomponents/ChatComposer.tsx
@@ -11,7 +11,9 @@ import type {
SetStateAction,
TouchEvent,
} from 'react';
+import { Volume2, VolumeX, StopCircle } from 'lucide-react';
import MicButton from '../../../mic-button/view/MicButton';
+import { useTts } from '../../../../contexts/TtsContext';
import type { PendingPermissionRequest, PermissionMode, Provider } from '../../types/types';
import CommandMenu from './CommandMenu';
import ClaudeStatus from './ClaudeStatus';
@@ -150,7 +152,9 @@ export default function ChatComposer({
sendByCtrlEnter,
onTranscript,
}: ChatComposerProps) {
+ const tts = useTts();
const { t } = useTranslation('chat');
+ const { t: tSettings } = useTranslation('settings');
const textareaRect = textareaRef.current?.getBoundingClientRect();
const commandMenuPosition = {
top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0,
@@ -326,6 +330,30 @@ export default function ChatComposer({
+ {tts && tts.availableVoices.length > 0 && (
+
+ )}
+
+ );
+}
type QuickSettingsContentProps = {
isDarkMode: boolean;
@@ -76,6 +104,8 @@ export default function QuickSettingsContent({
+
+
);
diff --git a/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
new file mode 100644
index 000000000..3625dad77
--- /dev/null
+++ b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
@@ -0,0 +1,202 @@
+import { useTranslation } from 'react-i18next';
+import { Volume2, VolumeX, Play } from 'lucide-react';
+import type { VoiceInfo } from '../../../hooks/useSpeechOutput';
+import { SETTING_ROW_CLASS } from '../constants';
+import QuickSettingsSection from './QuickSettingsSection';
+
+type QuickSettingsTtsSectionProps = {
+ enabled: boolean;
+ onToggle: () => void;
+ rate: number;
+ onRateChange: (rate: number) => void;
+ pitch: number;
+ onPitchChange: (pitch: number) => void;
+ voiceURI: string;
+ onVoiceChange: (voiceURI: string) => void;
+ lang: string;
+ onLangChange: (lang: string) => void;
+ filteredVoices: VoiceInfo[];
+ availableLanguages: string[];
+ onTestVoice: () => void;
+ isSpeaking: boolean;
+ onStop: () => void;
+};
+
+export default function QuickSettingsTtsSection({
+ enabled,
+ onToggle,
+ rate,
+ onRateChange,
+ pitch,
+ onPitchChange,
+ voiceURI,
+ onVoiceChange,
+ lang,
+ onLangChange,
+ filteredVoices,
+ availableLanguages,
+ onTestVoice,
+ isSpeaking,
+ onStop,
+}: QuickSettingsTtsSectionProps) {
+ const { t } = useTranslation('settings');
+
+ return (
+
+ {/* Enable/Disable toggle */}
+
+
+ {enabled ? (
+
+ ) : (
+
+ )}
+ {t('quickSettings.tts.enabled')}
+
+
+
+
+ {enabled && (
+ <>
+ {/* Language filter */}
+
+
+
+
+
+ {/* Voice selection */}
+
+
+
+
+
+ {/* Rate slider */}
+
+
+
+
+ {rate.toFixed(1)}x
+
+
+
onRateChange(parseFloat(e.target.value))}
+ className="w-full accent-blue-600"
+ />
+
+ 0.5x
+ 1.0x
+ 2.0x
+ 3.0x
+
+
+
+ {/* Pitch slider */}
+
+
+
+
+ {pitch.toFixed(1)}
+
+
+
onPitchChange(parseFloat(e.target.value))}
+ className="w-full accent-blue-600"
+ />
+
+ {t('quickSettings.tts.pitchLow')}
+ {t('quickSettings.tts.pitchNormal')}
+ {t('quickSettings.tts.pitchHigh')}
+
+
+
+ {/* Test / Stop button */}
+
+
+
+ >
+ )}
+
+ );
+}
diff --git a/src/contexts/TtsContext.tsx b/src/contexts/TtsContext.tsx
new file mode 100644
index 000000000..aea9960a9
--- /dev/null
+++ b/src/contexts/TtsContext.tsx
@@ -0,0 +1,49 @@
+import { createContext, useContext, type ReactNode } from 'react';
+import { useSpeechOutput } from '../hooks/useSpeechOutput';
+import type { VoiceInfo } from '../hooks/useSpeechOutput';
+
+type TtsContextValue = {
+ enabled: boolean;
+ toggle: () => void;
+ rate: number;
+ setRate: (rate: number) => void;
+ pitch: number;
+ setPitch: (pitch: number) => void;
+ voiceURI: string;
+ setVoiceURI: (uri: string) => void;
+ lang: string;
+ setLang: (lang: string) => void;
+ isSpeaking: boolean;
+ speak: (text: string) => void;
+ stop: () => void;
+ testVoice: () => void;
+ availableVoices: VoiceInfo[];
+ filteredVoices: VoiceInfo[];
+ availableLanguages: string[];
+};
+
+const TtsContext = createContext(null);
+
+type ChatMessage = {
+ type: string;
+ content?: string;
+ isStreaming?: boolean;
+ isToolUse?: boolean;
+ isInteractivePrompt?: boolean;
+ [key: string]: unknown;
+};
+
+export function TtsProvider({
+ chatMessages,
+ children,
+}: {
+ chatMessages: ChatMessage[];
+ children: ReactNode;
+}) {
+ const tts = useSpeechOutput(chatMessages);
+ return {children};
+}
+
+export function useTts(): TtsContextValue | null {
+ return useContext(TtsContext);
+}
diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts
new file mode 100644
index 000000000..a45d646f6
--- /dev/null
+++ b/src/hooks/useSpeechOutput.ts
@@ -0,0 +1,285 @@
+import { useCallback, useEffect, useRef, useState } from 'react';
+
+type ChatMessage = {
+ type: string;
+ content?: string;
+ isStreaming?: boolean;
+ isToolUse?: boolean;
+ isInteractivePrompt?: boolean;
+ [key: string]: unknown;
+};
+
+export type VoiceInfo = {
+ name: string;
+ lang: string;
+ localService: boolean;
+ voiceURI: string;
+};
+
+const STORAGE_KEY = 'tts_enabled';
+const RATE_STORAGE_KEY = 'tts_rate';
+const PITCH_STORAGE_KEY = 'tts_pitch';
+const VOICE_STORAGE_KEY = 'tts_voice_uri';
+const LANG_STORAGE_KEY = 'tts_lang';
+
+/**
+ * Strip markdown formatting for cleaner TTS output.
+ */
+function stripMarkdown(text: string): string {
+ return text
+ .replace(/```[\s\S]*?```/g, '')
+ .replace(/`[^`]+`/g, '')
+ .replace(/\*\*(.+?)\*\*/g, '$1')
+ .replace(/\*(.+?)\*/g, '$1')
+ .replace(/__(.+?)__/g, '$1')
+ .replace(/_(.+?)_/g, '$1')
+ .replace(/~~(.+?)~~/g, '$1')
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
+ .replace(/^#{1,6}\s+/gm, '')
+ .replace(/^[-*+]\s+/gm, '')
+ .replace(/^\d+\.\s+/gm, '')
+ .replace(/^>\s+/gm, '')
+ .replace(/\n{2,}/g, '\n')
+ .trim();
+}
+
+function readStorage(key: string, fallback: string): string {
+ try {
+ return localStorage.getItem(key) ?? fallback;
+ } catch {
+ return fallback;
+ }
+}
+
+function readStorageFloat(key: string, fallback: number): number {
+ try {
+ const v = localStorage.getItem(key);
+ if (!v) return fallback;
+ const parsed = parseFloat(v);
+ return Number.isNaN(parsed) ? fallback : parsed;
+ } catch {
+ return fallback;
+ }
+}
+
+/**
+ * Hook that speaks finalized assistant messages using the Web Speech API.
+ *
+ * Features:
+ * - Voice selection from available system voices
+ * - Adjustable rate and pitch
+ * - Language filter for voice list
+ * - All settings persisted in localStorage
+ */
+export function useSpeechOutput(chatMessages: ChatMessage[]) {
+ const [enabled, setEnabled] = useState(() => readStorage(STORAGE_KEY, 'false') === 'true');
+ const [rate, setRate] = useState(() => readStorageFloat(RATE_STORAGE_KEY, 1.2));
+ const [pitch, setPitch] = useState(() => readStorageFloat(PITCH_STORAGE_KEY, 1.0));
+ const [voiceURI, setVoiceURI] = useState(() => readStorage(VOICE_STORAGE_KEY, ''));
+ const [lang, setLang] = useState(() => {
+ const stored = readStorage(LANG_STORAGE_KEY, '');
+ return stored || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP');
+ });
+ const [isSpeaking, setIsSpeaking] = useState(false);
+ const [availableVoices, setAvailableVoices] = useState([]);
+
+ // Seed to current tail so we don't replay historical messages on mount
+ const lastSpokenIndexRef = useRef(chatMessages.length - 1);
+ const lastStreamingContentRef = useRef(null);
+ const chatMessagesLengthRef = useRef(chatMessages.length);
+ chatMessagesLengthRef.current = chatMessages.length;
+
+ // Load available voices
+ useEffect(() => {
+ if (typeof window === 'undefined' || !window.speechSynthesis) return;
+
+ const loadVoices = () => {
+ const voices = window.speechSynthesis.getVoices();
+ setAvailableVoices(
+ voices.map((v) => ({
+ name: v.name,
+ lang: v.lang,
+ localService: v.localService,
+ voiceURI: v.voiceURI,
+ })),
+ );
+ };
+
+ loadVoices();
+ window.speechSynthesis.onvoiceschanged = loadVoices;
+ return () => {
+ window.speechSynthesis.onvoiceschanged = null;
+ };
+ }, []);
+
+ // Persist settings
+ useEffect(() => {
+ try { localStorage.setItem(STORAGE_KEY, String(enabled)); } catch { /* noop */ }
+ }, [enabled]);
+ useEffect(() => {
+ try { localStorage.setItem(RATE_STORAGE_KEY, String(rate)); } catch { /* noop */ }
+ }, [rate]);
+ useEffect(() => {
+ try { localStorage.setItem(PITCH_STORAGE_KEY, String(pitch)); } catch { /* noop */ }
+ }, [pitch]);
+ useEffect(() => {
+ try { localStorage.setItem(VOICE_STORAGE_KEY, voiceURI); } catch { /* noop */ }
+ }, [voiceURI]);
+ useEffect(() => {
+ try { localStorage.setItem(LANG_STORAGE_KEY, lang); } catch { /* noop */ }
+ }, [lang]);
+
+ // Monitor speechSynthesis state (only when TTS is active)
+ useEffect(() => {
+ if (typeof window === 'undefined' || !window.speechSynthesis) return;
+ if (!enabled && !isSpeaking) return;
+ const interval = setInterval(() => {
+ setIsSpeaking(window.speechSynthesis.speaking);
+ }, 200);
+ return () => clearInterval(interval);
+ }, [enabled, isSpeaking]);
+
+ // Get voices filtered by current language
+ const filteredVoices = availableVoices.filter((v) => {
+ if (lang === '') return true;
+ const langPrefix = lang.split('-')[0];
+ return v.lang.startsWith(langPrefix);
+ });
+
+ // Get unique language list from all voices
+ const availableLanguages = Array.from(
+ new Set(availableVoices.map((v) => v.lang)),
+ ).sort();
+
+ const speak = useCallback(
+ (text: string) => {
+ if (!text || typeof window === 'undefined' || !window.speechSynthesis) return;
+
+ const cleaned = stripMarkdown(text);
+ if (!cleaned) return;
+
+ window.speechSynthesis.cancel();
+
+ const utterance = new SpeechSynthesisUtterance(cleaned);
+ utterance.lang = lang || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP');
+ utterance.rate = rate;
+ utterance.pitch = pitch;
+
+ // Find selected voice, or fall back to first matching voice
+ const voices = window.speechSynthesis.getVoices();
+ if (voiceURI) {
+ const selected = voices.find((v) => v.voiceURI === voiceURI);
+ if (selected) utterance.voice = selected;
+ } else {
+ const fallbackLang = lang || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP');
+ const langPrefix = fallbackLang.split('-')[0];
+ const fallback = voices.find((v) => v.lang.startsWith(langPrefix));
+ if (fallback) utterance.voice = fallback;
+ }
+
+ utterance.onend = () => setIsSpeaking(false);
+ utterance.onerror = () => setIsSpeaking(false);
+
+ setIsSpeaking(true);
+ window.speechSynthesis.speak(utterance);
+ },
+ [lang, rate, pitch, voiceURI],
+ );
+
+ const stop = useCallback(() => {
+ if (typeof window !== 'undefined' && window.speechSynthesis) {
+ window.speechSynthesis.cancel();
+ setIsSpeaking(false);
+ }
+ }, []);
+
+ const toggle = useCallback(() => {
+ setEnabled((prev) => {
+ const next = !prev;
+ if (next) {
+ // Seed so we only speak messages appended after enabling
+ lastSpokenIndexRef.current = chatMessagesLengthRef.current - 1;
+ lastStreamingContentRef.current = null;
+ } else if (typeof window !== 'undefined' && window.speechSynthesis) {
+ window.speechSynthesis.cancel();
+ setIsSpeaking(false);
+ }
+ return next;
+ });
+ }, []);
+
+ // Cancel active speech when provider unmounts
+ useEffect(() => {
+ return () => {
+ if (typeof window !== 'undefined' && window.speechSynthesis) {
+ window.speechSynthesis.cancel();
+ }
+ };
+ }, []);
+
+ // Test current voice settings
+ const testVoice = useCallback(() => {
+ speak('テスト音声です。Hello, this is a test.');
+ }, [speak]);
+
+ // Watch for finalized assistant messages
+ useEffect(() => {
+ if (!enabled || chatMessages.length === 0) {
+ return;
+ }
+
+ const lastIndex = chatMessages.length - 1;
+ const lastMsg = chatMessages[lastIndex];
+
+ if (
+ !lastMsg ||
+ lastMsg.type !== 'assistant' ||
+ lastMsg.isToolUse ||
+ lastMsg.isThinking ||
+ lastMsg.isInteractivePrompt ||
+ !lastMsg.content
+ ) {
+ lastStreamingContentRef.current = null;
+ return;
+ }
+
+ if (lastMsg.isStreaming) {
+ lastStreamingContentRef.current = lastMsg.content;
+ return;
+ }
+
+ if (lastIndex > lastSpokenIndexRef.current) {
+ lastSpokenIndexRef.current = lastIndex;
+ lastStreamingContentRef.current = null;
+ speak(lastMsg.content);
+ }
+ }, [chatMessages, enabled, speak]);
+
+ // Reset spoken index when messages are cleared (new session)
+ useEffect(() => {
+ if (chatMessages.length === 0) {
+ lastSpokenIndexRef.current = -1;
+ lastStreamingContentRef.current = null;
+ }
+ }, [chatMessages.length]);
+
+ return {
+ enabled,
+ toggle,
+ rate,
+ setRate,
+ pitch,
+ setPitch,
+ voiceURI,
+ setVoiceURI,
+ lang,
+ setLang,
+ isSpeaking,
+ speak,
+ stop,
+ testVoice,
+ availableVoices,
+ filteredVoices,
+ availableLanguages,
+ };
+}
diff --git a/src/i18n/locales/en/settings.json b/src/i18n/locales/en/settings.json
index 2c6a99e1d..73fb1ec12 100644
--- a/src/i18n/locales/en/settings.json
+++ b/src/i18n/locales/en/settings.json
@@ -72,6 +72,27 @@
"draggingStatus": "Dragging...",
"toggleAndMove": "Click to toggle, drag to move"
},
+ "tts": {
+ "sectionTitle": "Text-to-Speech",
+ "enabled": "TTS Enabled",
+ "language": "Language",
+ "allLanguages": "All Languages",
+ "voice": "Voice ({{count}} available)",
+ "voiceAuto": "Auto (first match)",
+ "speed": "Speed",
+ "pitch": "Pitch",
+ "pitchLow": "Low",
+ "pitchNormal": "Normal",
+ "pitchHigh": "High",
+ "testVoice": "Test Voice",
+ "stop": "Stop",
+ "network": "Network",
+ "button": {
+ "stopSpeaking": "Stop speaking",
+ "ttsOn": "Text-to-Speech enabled (click to disable)",
+ "ttsOff": "Text-to-Speech disabled (click to enable)"
+ }
+ },
"whisper": {
"modes": {
"default": "Default Mode",
diff --git a/src/i18n/locales/ja/settings.json b/src/i18n/locales/ja/settings.json
index 4fd82ec8f..e890348fd 100644
--- a/src/i18n/locales/ja/settings.json
+++ b/src/i18n/locales/ja/settings.json
@@ -72,6 +72,27 @@
"draggingStatus": "ドラッグ中...",
"toggleAndMove": "クリックで切替、ドラッグで移動"
},
+ "tts": {
+ "sectionTitle": "テキスト読み上げ",
+ "enabled": "TTS有効",
+ "language": "言語",
+ "allLanguages": "すべての言語",
+ "voice": "音声({{count}}件利用可能)",
+ "voiceAuto": "自動(最初に一致)",
+ "speed": "速度",
+ "pitch": "ピッチ",
+ "pitchLow": "低",
+ "pitchNormal": "標準",
+ "pitchHigh": "高",
+ "testVoice": "音声テスト",
+ "stop": "停止",
+ "network": "ネットワーク",
+ "button": {
+ "stopSpeaking": "読み上げを停止",
+ "ttsOn": "テキスト読み上げ有効(クリックで無効化)",
+ "ttsOff": "テキスト読み上げ無効(クリックで有効化)"
+ }
+ },
"whisper": {
"modes": {
"default": "標準モード",
diff --git a/src/i18n/locales/ko/settings.json b/src/i18n/locales/ko/settings.json
index f452291f9..9687d1d99 100644
--- a/src/i18n/locales/ko/settings.json
+++ b/src/i18n/locales/ko/settings.json
@@ -72,6 +72,27 @@
"draggingStatus": "드래그 중...",
"toggleAndMove": "클릭하여 토글, 드래그하여 이동"
},
+ "tts": {
+ "sectionTitle": "텍스트 음성 변환",
+ "enabled": "TTS 활성화",
+ "language": "언어",
+ "allLanguages": "모든 언어",
+ "voice": "음성 ({{count}}개 사용 가능)",
+ "voiceAuto": "자동 (첫 번째 일치)",
+ "speed": "속도",
+ "pitch": "피치",
+ "pitchLow": "낮음",
+ "pitchNormal": "보통",
+ "pitchHigh": "높음",
+ "testVoice": "음성 테스트",
+ "stop": "중지",
+ "network": "네트워크",
+ "button": {
+ "stopSpeaking": "읽기 중지",
+ "ttsOn": "텍스트 음성 변환 활성화 (클릭하여 비활성화)",
+ "ttsOff": "텍스트 음성 변환 비활성화 (클릭하여 활성화)"
+ }
+ },
"whisper": {
"modes": {
"default": "기본 모드",
diff --git a/src/i18n/locales/zh-CN/settings.json b/src/i18n/locales/zh-CN/settings.json
index cdfb54979..6acd8c1fa 100644
--- a/src/i18n/locales/zh-CN/settings.json
+++ b/src/i18n/locales/zh-CN/settings.json
@@ -72,6 +72,27 @@
"draggingStatus": "正在拖拽...",
"toggleAndMove": "点击切换,拖拽移动"
},
+ "tts": {
+ "sectionTitle": "文字转语音",
+ "enabled": "TTS 已启用",
+ "language": "语言",
+ "allLanguages": "所有语言",
+ "voice": "语音({{count}}个可用)",
+ "voiceAuto": "自动(第一个匹配)",
+ "speed": "速度",
+ "pitch": "音调",
+ "pitchLow": "低",
+ "pitchNormal": "正常",
+ "pitchHigh": "高",
+ "testVoice": "测试语音",
+ "stop": "停止",
+ "network": "网络",
+ "button": {
+ "stopSpeaking": "停止朗读",
+ "ttsOn": "文字转语音已启用(点击禁用)",
+ "ttsOff": "文字转语音已禁用(点击启用)"
+ }
+ },
"whisper": {
"modes": {
"default": "默认模式",