diff --git a/src/components/chat/view/ChatInterface.tsx b/src/components/chat/view/ChatInterface.tsx index 90c1921d7..9e1db3976 100644 --- a/src/components/chat/view/ChatInterface.tsx +++ b/src/components/chat/view/ChatInterface.tsx @@ -7,6 +7,7 @@ import { useChatProviderState } from '../hooks/useChatProviderState'; import { useChatSessionState } from '../hooks/useChatSessionState'; import { useChatRealtimeHandlers } from '../hooks/useChatRealtimeHandlers'; import { useChatComposerState } from '../hooks/useChatComposerState'; +import { TtsProvider, useTts } from '../../../contexts/TtsContext'; import ChatMessagesPane from './subcomponents/ChatMessagesPane'; import ChatComposer from './subcomponents/ChatComposer'; @@ -272,7 +273,7 @@ function ChatInterface({ } return ( - <> +
- + ); } diff --git a/src/components/chat/view/subcomponents/ChatComposer.tsx b/src/components/chat/view/subcomponents/ChatComposer.tsx index 35bf7548b..1a3c52c2e 100644 --- a/src/components/chat/view/subcomponents/ChatComposer.tsx +++ b/src/components/chat/view/subcomponents/ChatComposer.tsx @@ -11,7 +11,9 @@ import type { SetStateAction, TouchEvent, } from 'react'; +import { Volume2, VolumeX, StopCircle } from 'lucide-react'; import MicButton from '../../../mic-button/view/MicButton'; +import { useTts } from '../../../../contexts/TtsContext'; import type { PendingPermissionRequest, PermissionMode, Provider } from '../../types/types'; import CommandMenu from './CommandMenu'; import ClaudeStatus from './ClaudeStatus'; @@ -150,7 +152,9 @@ export default function ChatComposer({ sendByCtrlEnter, onTranscript, }: ChatComposerProps) { + const tts = useTts(); const { t } = useTranslation('chat'); + const { t: tSettings } = useTranslation('settings'); const textareaRect = textareaRef.current?.getBoundingClientRect(); const commandMenuPosition = { top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0, @@ -326,6 +330,30 @@ export default function ChatComposer({
+ {tts && tts.availableVoices.length > 0 && ( + + )} + + + + {enabled && ( + <> + {/* Language filter */} +
+ + +
+ + {/* Voice selection */} +
+ + +
+ + {/* Rate slider */} +
+
+ + + {rate.toFixed(1)}x + +
+ onRateChange(parseFloat(e.target.value))} + className="w-full accent-blue-600" + /> +
+ 0.5x + 1.0x + 2.0x + 3.0x +
+
+ + {/* Pitch slider */} +
+
+ + + {pitch.toFixed(1)} + +
+ onPitchChange(parseFloat(e.target.value))} + className="w-full accent-blue-600" + /> +
+ {t('quickSettings.tts.pitchLow')} + {t('quickSettings.tts.pitchNormal')} + {t('quickSettings.tts.pitchHigh')} +
+
+ + {/* Test / Stop button */} +
+ +
+ + )} + + ); +} diff --git a/src/contexts/TtsContext.tsx b/src/contexts/TtsContext.tsx new file mode 100644 index 000000000..aea9960a9 --- /dev/null +++ b/src/contexts/TtsContext.tsx @@ -0,0 +1,49 @@ +import { createContext, useContext, type ReactNode } from 'react'; +import { useSpeechOutput } from '../hooks/useSpeechOutput'; +import type { VoiceInfo } from '../hooks/useSpeechOutput'; + +type TtsContextValue = { + enabled: boolean; + toggle: () => void; + rate: number; + setRate: (rate: number) => void; + pitch: number; + setPitch: (pitch: number) => void; + voiceURI: string; + setVoiceURI: (uri: string) => void; + lang: string; + setLang: (lang: string) => void; + isSpeaking: boolean; + speak: (text: string) => void; + stop: () => void; + testVoice: () => void; + availableVoices: VoiceInfo[]; + filteredVoices: VoiceInfo[]; + availableLanguages: string[]; +}; + +const TtsContext = createContext(null); + +type ChatMessage = { + type: string; + content?: string; + isStreaming?: boolean; + isToolUse?: boolean; + isInteractivePrompt?: boolean; + [key: string]: unknown; +}; + +export function TtsProvider({ + chatMessages, + children, +}: { + chatMessages: ChatMessage[]; + children: ReactNode; +}) { + const tts = useSpeechOutput(chatMessages); + return {children}; +} + +export function useTts(): TtsContextValue | null { + return useContext(TtsContext); +} diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts new file mode 100644 index 000000000..a45d646f6 --- /dev/null +++ b/src/hooks/useSpeechOutput.ts @@ -0,0 +1,285 @@ +import { useCallback, useEffect, useRef, useState } from 'react'; + +type ChatMessage = { + type: string; + content?: string; + isStreaming?: boolean; + isToolUse?: boolean; + isInteractivePrompt?: boolean; + [key: string]: unknown; +}; + +export type VoiceInfo = { + name: string; + lang: string; + localService: boolean; + voiceURI: string; +}; + +const STORAGE_KEY = 'tts_enabled'; +const RATE_STORAGE_KEY = 'tts_rate'; +const PITCH_STORAGE_KEY = 'tts_pitch'; +const VOICE_STORAGE_KEY = 'tts_voice_uri'; +const LANG_STORAGE_KEY = 'tts_lang'; + +/** + * Strip markdown formatting for cleaner TTS output. + */ +function stripMarkdown(text: string): string { + return text + .replace(/```[\s\S]*?```/g, '') + .replace(/`[^`]+`/g, '') + .replace(/\*\*(.+?)\*\*/g, '$1') + .replace(/\*(.+?)\*/g, '$1') + .replace(/__(.+?)__/g, '$1') + .replace(/_(.+?)_/g, '$1') + .replace(/~~(.+?)~~/g, '$1') + .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') + .replace(/^#{1,6}\s+/gm, '') + .replace(/^[-*+]\s+/gm, '') + .replace(/^\d+\.\s+/gm, '') + .replace(/^>\s+/gm, '') + .replace(/\n{2,}/g, '\n') + .trim(); +} + +function readStorage(key: string, fallback: string): string { + try { + return localStorage.getItem(key) ?? fallback; + } catch { + return fallback; + } +} + +function readStorageFloat(key: string, fallback: number): number { + try { + const v = localStorage.getItem(key); + if (!v) return fallback; + const parsed = parseFloat(v); + return Number.isNaN(parsed) ? fallback : parsed; + } catch { + return fallback; + } +} + +/** + * Hook that speaks finalized assistant messages using the Web Speech API. + * + * Features: + * - Voice selection from available system voices + * - Adjustable rate and pitch + * - Language filter for voice list + * - All settings persisted in localStorage + */ +export function useSpeechOutput(chatMessages: ChatMessage[]) { + const [enabled, setEnabled] = useState(() => readStorage(STORAGE_KEY, 'false') === 'true'); + const [rate, setRate] = useState(() => readStorageFloat(RATE_STORAGE_KEY, 1.2)); + const [pitch, setPitch] = useState(() => readStorageFloat(PITCH_STORAGE_KEY, 1.0)); + const [voiceURI, setVoiceURI] = useState(() => readStorage(VOICE_STORAGE_KEY, '')); + const [lang, setLang] = useState(() => { + const stored = readStorage(LANG_STORAGE_KEY, ''); + return stored || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP'); + }); + const [isSpeaking, setIsSpeaking] = useState(false); + const [availableVoices, setAvailableVoices] = useState([]); + + // Seed to current tail so we don't replay historical messages on mount + const lastSpokenIndexRef = useRef(chatMessages.length - 1); + const lastStreamingContentRef = useRef(null); + const chatMessagesLengthRef = useRef(chatMessages.length); + chatMessagesLengthRef.current = chatMessages.length; + + // Load available voices + useEffect(() => { + if (typeof window === 'undefined' || !window.speechSynthesis) return; + + const loadVoices = () => { + const voices = window.speechSynthesis.getVoices(); + setAvailableVoices( + voices.map((v) => ({ + name: v.name, + lang: v.lang, + localService: v.localService, + voiceURI: v.voiceURI, + })), + ); + }; + + loadVoices(); + window.speechSynthesis.onvoiceschanged = loadVoices; + return () => { + window.speechSynthesis.onvoiceschanged = null; + }; + }, []); + + // Persist settings + useEffect(() => { + try { localStorage.setItem(STORAGE_KEY, String(enabled)); } catch { /* noop */ } + }, [enabled]); + useEffect(() => { + try { localStorage.setItem(RATE_STORAGE_KEY, String(rate)); } catch { /* noop */ } + }, [rate]); + useEffect(() => { + try { localStorage.setItem(PITCH_STORAGE_KEY, String(pitch)); } catch { /* noop */ } + }, [pitch]); + useEffect(() => { + try { localStorage.setItem(VOICE_STORAGE_KEY, voiceURI); } catch { /* noop */ } + }, [voiceURI]); + useEffect(() => { + try { localStorage.setItem(LANG_STORAGE_KEY, lang); } catch { /* noop */ } + }, [lang]); + + // Monitor speechSynthesis state (only when TTS is active) + useEffect(() => { + if (typeof window === 'undefined' || !window.speechSynthesis) return; + if (!enabled && !isSpeaking) return; + const interval = setInterval(() => { + setIsSpeaking(window.speechSynthesis.speaking); + }, 200); + return () => clearInterval(interval); + }, [enabled, isSpeaking]); + + // Get voices filtered by current language + const filteredVoices = availableVoices.filter((v) => { + if (lang === '') return true; + const langPrefix = lang.split('-')[0]; + return v.lang.startsWith(langPrefix); + }); + + // Get unique language list from all voices + const availableLanguages = Array.from( + new Set(availableVoices.map((v) => v.lang)), + ).sort(); + + const speak = useCallback( + (text: string) => { + if (!text || typeof window === 'undefined' || !window.speechSynthesis) return; + + const cleaned = stripMarkdown(text); + if (!cleaned) return; + + window.speechSynthesis.cancel(); + + const utterance = new SpeechSynthesisUtterance(cleaned); + utterance.lang = lang || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP'); + utterance.rate = rate; + utterance.pitch = pitch; + + // Find selected voice, or fall back to first matching voice + const voices = window.speechSynthesis.getVoices(); + if (voiceURI) { + const selected = voices.find((v) => v.voiceURI === voiceURI); + if (selected) utterance.voice = selected; + } else { + const fallbackLang = lang || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP'); + const langPrefix = fallbackLang.split('-')[0]; + const fallback = voices.find((v) => v.lang.startsWith(langPrefix)); + if (fallback) utterance.voice = fallback; + } + + utterance.onend = () => setIsSpeaking(false); + utterance.onerror = () => setIsSpeaking(false); + + setIsSpeaking(true); + window.speechSynthesis.speak(utterance); + }, + [lang, rate, pitch, voiceURI], + ); + + const stop = useCallback(() => { + if (typeof window !== 'undefined' && window.speechSynthesis) { + window.speechSynthesis.cancel(); + setIsSpeaking(false); + } + }, []); + + const toggle = useCallback(() => { + setEnabled((prev) => { + const next = !prev; + if (next) { + // Seed so we only speak messages appended after enabling + lastSpokenIndexRef.current = chatMessagesLengthRef.current - 1; + lastStreamingContentRef.current = null; + } else if (typeof window !== 'undefined' && window.speechSynthesis) { + window.speechSynthesis.cancel(); + setIsSpeaking(false); + } + return next; + }); + }, []); + + // Cancel active speech when provider unmounts + useEffect(() => { + return () => { + if (typeof window !== 'undefined' && window.speechSynthesis) { + window.speechSynthesis.cancel(); + } + }; + }, []); + + // Test current voice settings + const testVoice = useCallback(() => { + speak('テスト音声です。Hello, this is a test.'); + }, [speak]); + + // Watch for finalized assistant messages + useEffect(() => { + if (!enabled || chatMessages.length === 0) { + return; + } + + const lastIndex = chatMessages.length - 1; + const lastMsg = chatMessages[lastIndex]; + + if ( + !lastMsg || + lastMsg.type !== 'assistant' || + lastMsg.isToolUse || + lastMsg.isThinking || + lastMsg.isInteractivePrompt || + !lastMsg.content + ) { + lastStreamingContentRef.current = null; + return; + } + + if (lastMsg.isStreaming) { + lastStreamingContentRef.current = lastMsg.content; + return; + } + + if (lastIndex > lastSpokenIndexRef.current) { + lastSpokenIndexRef.current = lastIndex; + lastStreamingContentRef.current = null; + speak(lastMsg.content); + } + }, [chatMessages, enabled, speak]); + + // Reset spoken index when messages are cleared (new session) + useEffect(() => { + if (chatMessages.length === 0) { + lastSpokenIndexRef.current = -1; + lastStreamingContentRef.current = null; + } + }, [chatMessages.length]); + + return { + enabled, + toggle, + rate, + setRate, + pitch, + setPitch, + voiceURI, + setVoiceURI, + lang, + setLang, + isSpeaking, + speak, + stop, + testVoice, + availableVoices, + filteredVoices, + availableLanguages, + }; +} diff --git a/src/i18n/locales/en/settings.json b/src/i18n/locales/en/settings.json index 2c6a99e1d..73fb1ec12 100644 --- a/src/i18n/locales/en/settings.json +++ b/src/i18n/locales/en/settings.json @@ -72,6 +72,27 @@ "draggingStatus": "Dragging...", "toggleAndMove": "Click to toggle, drag to move" }, + "tts": { + "sectionTitle": "Text-to-Speech", + "enabled": "TTS Enabled", + "language": "Language", + "allLanguages": "All Languages", + "voice": "Voice ({{count}} available)", + "voiceAuto": "Auto (first match)", + "speed": "Speed", + "pitch": "Pitch", + "pitchLow": "Low", + "pitchNormal": "Normal", + "pitchHigh": "High", + "testVoice": "Test Voice", + "stop": "Stop", + "network": "Network", + "button": { + "stopSpeaking": "Stop speaking", + "ttsOn": "Text-to-Speech enabled (click to disable)", + "ttsOff": "Text-to-Speech disabled (click to enable)" + } + }, "whisper": { "modes": { "default": "Default Mode", diff --git a/src/i18n/locales/ja/settings.json b/src/i18n/locales/ja/settings.json index 4fd82ec8f..e890348fd 100644 --- a/src/i18n/locales/ja/settings.json +++ b/src/i18n/locales/ja/settings.json @@ -72,6 +72,27 @@ "draggingStatus": "ドラッグ中...", "toggleAndMove": "クリックで切替、ドラッグで移動" }, + "tts": { + "sectionTitle": "テキスト読み上げ", + "enabled": "TTS有効", + "language": "言語", + "allLanguages": "すべての言語", + "voice": "音声({{count}}件利用可能)", + "voiceAuto": "自動(最初に一致)", + "speed": "速度", + "pitch": "ピッチ", + "pitchLow": "低", + "pitchNormal": "標準", + "pitchHigh": "高", + "testVoice": "音声テスト", + "stop": "停止", + "network": "ネットワーク", + "button": { + "stopSpeaking": "読み上げを停止", + "ttsOn": "テキスト読み上げ有効(クリックで無効化)", + "ttsOff": "テキスト読み上げ無効(クリックで有効化)" + } + }, "whisper": { "modes": { "default": "標準モード", diff --git a/src/i18n/locales/ko/settings.json b/src/i18n/locales/ko/settings.json index f452291f9..9687d1d99 100644 --- a/src/i18n/locales/ko/settings.json +++ b/src/i18n/locales/ko/settings.json @@ -72,6 +72,27 @@ "draggingStatus": "드래그 중...", "toggleAndMove": "클릭하여 토글, 드래그하여 이동" }, + "tts": { + "sectionTitle": "텍스트 음성 변환", + "enabled": "TTS 활성화", + "language": "언어", + "allLanguages": "모든 언어", + "voice": "음성 ({{count}}개 사용 가능)", + "voiceAuto": "자동 (첫 번째 일치)", + "speed": "속도", + "pitch": "피치", + "pitchLow": "낮음", + "pitchNormal": "보통", + "pitchHigh": "높음", + "testVoice": "음성 테스트", + "stop": "중지", + "network": "네트워크", + "button": { + "stopSpeaking": "읽기 중지", + "ttsOn": "텍스트 음성 변환 활성화 (클릭하여 비활성화)", + "ttsOff": "텍스트 음성 변환 비활성화 (클릭하여 활성화)" + } + }, "whisper": { "modes": { "default": "기본 모드", diff --git a/src/i18n/locales/zh-CN/settings.json b/src/i18n/locales/zh-CN/settings.json index cdfb54979..6acd8c1fa 100644 --- a/src/i18n/locales/zh-CN/settings.json +++ b/src/i18n/locales/zh-CN/settings.json @@ -72,6 +72,27 @@ "draggingStatus": "正在拖拽...", "toggleAndMove": "点击切换,拖拽移动" }, + "tts": { + "sectionTitle": "文字转语音", + "enabled": "TTS 已启用", + "language": "语言", + "allLanguages": "所有语言", + "voice": "语音({{count}}个可用)", + "voiceAuto": "自动(第一个匹配)", + "speed": "速度", + "pitch": "音调", + "pitchLow": "低", + "pitchNormal": "正常", + "pitchHigh": "高", + "testVoice": "测试语音", + "stop": "停止", + "network": "网络", + "button": { + "stopSpeaking": "停止朗读", + "ttsOn": "文字转语音已启用(点击禁用)", + "ttsOff": "文字转语音已禁用(点击启用)" + } + }, "whisper": { "modes": { "default": "默认模式",