From 09f8fede70ae67d8cac4dc77de8899c283811f1d Mon Sep 17 00:00:00 2001
From: Kawano <kawano@c-trading.jp>
Date: Mon, 9 Mar 2026 06:09:06 +0900
Subject: [PATCH 1/7] feat: add Text-to-Speech support via Web Speech API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add browser-native TTS integration that reads Claude's responses aloud:

- useSpeechOutput hook: Web Speech API wrapper with voice selection,
  rate/pitch control, language filtering, and streaming-aware chunking
- TtsContext: React context provider for app-wide TTS state
- QuickSettingsTtsSection: Full TTS configuration panel with voice
  preview, rate/pitch sliders, and language filter
- ChatComposer: Toggle button with lucide-react icons (Volume2/VolumeX/
  StopCircle) and graceful degradation when no voices available
- QuickSettingsContent: TTS section with voices=0 guard

All settings persist to localStorage. Falls back to navigator.language
instead of hardcoded locale. No external dependencies required — uses
the browser's built-in speechSynthesis API.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/components/chat/view/ChatInterface.tsx    |   5 +-
 .../chat/view/subcomponents/ChatComposer.tsx  |  26 ++
 .../view/QuickSettingsContent.tsx             |  30 ++
 .../view/QuickSettingsTtsSection.tsx          | 188 +++++++++++++
 src/contexts/TtsContext.tsx                   |  49 ++++
 src/hooks/useSpeechOutput.ts                  | 265 ++++++++++++++++++
 6 files changed, 561 insertions(+), 2 deletions(-)
 create mode 100644 src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
 create mode 100644 src/contexts/TtsContext.tsx
 create mode 100644 src/hooks/useSpeechOutput.ts
diff --git a/src/components/chat/view/ChatInterface.tsx b/src/components/chat/view/ChatInterface.tsx
index 90c1921d7..9e1db3976 100644
--- a/src/components/chat/view/ChatInterface.tsx
+++ b/src/components/chat/view/ChatInterface.tsx
@@ -7,6 +7,7 @@ import { useChatProviderState } from '../hooks/useChatProviderState';
 import { useChatSessionState } from '../hooks/useChatSessionState';
 import { useChatRealtimeHandlers } from '../hooks/useChatRealtimeHandlers';
 import { useChatComposerState } from '../hooks/useChatComposerState';
+import { TtsProvider, useTts } from '../../../contexts/TtsContext';
 import ChatMessagesPane from './subcomponents/ChatMessagesPane';
 import ChatComposer from './subcomponents/ChatComposer';
 
@@ -272,7 +273,7 @@ function ChatInterface({
   }
 
   return (
-    <>
+    <TtsProvider chatMessages={chatMessages}>
       <div className="flex h-full flex-col">
         <ChatMessagesPane
           scrollContainerRef={scrollContainerRef}
@@ -392,7 +393,7 @@ function ChatInterface({
       </div>
 
       <QuickSettingsPanel />
-    </>
+    </TtsProvider>
   );
 }
 
diff --git a/src/components/chat/view/subcomponents/ChatComposer.tsx b/src/components/chat/view/subcomponents/ChatComposer.tsx
index 35bf7548b..0da6d6a36 100644
--- a/src/components/chat/view/subcomponents/ChatComposer.tsx
+++ b/src/components/chat/view/subcomponents/ChatComposer.tsx
@@ -11,7 +11,9 @@ import type {
   SetStateAction,
   TouchEvent,
 } from 'react';
+import { Volume2, VolumeX, StopCircle } from 'lucide-react';
 import MicButton from '../../../mic-button/view/MicButton';
+import { useTts } from '../../../../contexts/TtsContext';
 import type { PendingPermissionRequest, PermissionMode, Provider } from '../../types/types';
 import CommandMenu from './CommandMenu';
 import ClaudeStatus from './ClaudeStatus';
@@ -150,6 +152,7 @@ export default function ChatComposer({
   sendByCtrlEnter,
   onTranscript,
 }: ChatComposerProps) {
+  const tts = useTts();
   const { t } = useTranslation('chat');
   const textareaRect = textareaRef.current?.getBoundingClientRect();
   const commandMenuPosition = {
@@ -326,6 +329,29 @@ export default function ChatComposer({
               <MicButton onTranscript={onTranscript} className="h-10 w-10 sm:h-10 sm:w-10" />
             </div>
 
+            {tts && tts.availableVoices.length > 0 && (
+              <button
+                type="button"
+                onClick={tts.isSpeaking ? tts.stop : tts.toggle}
+                className={`absolute right-14 top-1/2 -translate-y-1/2 transform rounded-xl p-2 transition-colors sm:right-[60px] ${
+                  tts.enabled
+                    ? tts.isSpeaking
+                      ? 'bg-primary/20 text-primary'
+                      : 'text-primary hover:bg-accent/60'
+                    : 'text-muted-foreground hover:bg-accent/60'
+                }`}
+                title={tts.isSpeaking ? 'Stop speaking' : tts.enabled ? 'TTS ON (click to disable)' : 'TTS OFF (click to enable)'}
+              >
+                {tts.isSpeaking ? (
+                  <StopCircle className="h-5 w-5" />
+                ) : tts.enabled ? (
+                  <Volume2 className="h-5 w-5" />
+                ) : (
+                  <VolumeX className="h-5 w-5" />
+                )}
+              </button>
+            )}
+
             <button
               type="submit"
               disabled={!input.trim() || isLoading}
diff --git a/src/components/quick-settings-panel/view/QuickSettingsContent.tsx b/src/components/quick-settings-panel/view/QuickSettingsContent.tsx
index 2bd058b45..b259de244 100644
--- a/src/components/quick-settings-panel/view/QuickSettingsContent.tsx
+++ b/src/components/quick-settings-panel/view/QuickSettingsContent.tsx
@@ -15,7 +15,35 @@ import type {
 } from '../types';
 import QuickSettingsSection from './QuickSettingsSection';
 import QuickSettingsToggleRow from './QuickSettingsToggleRow';
+import QuickSettingsTtsSection from './QuickSettingsTtsSection';
 import QuickSettingsWhisperSection from './QuickSettingsWhisperSection';
+import { useTts } from '../../../contexts/TtsContext';
+
+function QuickSettingsTtsWrapper() {
+  const tts = useTts();
+  if (!tts) return null;
+  if (tts.availableVoices.length === 0) return null;
+
+  return (
+    <QuickSettingsTtsSection
+      enabled={tts.enabled}
+      onToggle={tts.toggle}
+      rate={tts.rate}
+      onRateChange={tts.setRate}
+      pitch={tts.pitch}
+      onPitchChange={tts.setPitch}
+      voiceURI={tts.voiceURI}
+      onVoiceChange={tts.setVoiceURI}
+      lang={tts.lang}
+      onLangChange={tts.setLang}
+      filteredVoices={tts.filteredVoices}
+      availableLanguages={tts.availableLanguages}
+      onTestVoice={tts.testVoice}
+      isSpeaking={tts.isSpeaking}
+      onStop={tts.stop}
+    />
+  );
+}
 
 type QuickSettingsContentProps = {
   isDarkMode: boolean;
@@ -76,6 +104,8 @@ export default function QuickSettingsContent({
         </p>
       </QuickSettingsSection>
 
+      <QuickSettingsTtsWrapper />
+
       <QuickSettingsWhisperSection />
     </div>
   );
diff --git a/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
new file mode 100644
index 000000000..aa70dd49f
--- /dev/null
+++ b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
@@ -0,0 +1,188 @@
+import { Volume2, VolumeX, Play } from 'lucide-react';
+import type { VoiceInfo } from '../../../hooks/useSpeechOutput';
+import { SETTING_ROW_CLASS } from '../constants';
+import QuickSettingsSection from './QuickSettingsSection';
+
+type QuickSettingsTtsSectionProps = {
+  enabled: boolean;
+  onToggle: () => void;
+  rate: number;
+  onRateChange: (rate: number) => void;
+  pitch: number;
+  onPitchChange: (pitch: number) => void;
+  voiceURI: string;
+  onVoiceChange: (voiceURI: string) => void;
+  lang: string;
+  onLangChange: (lang: string) => void;
+  filteredVoices: VoiceInfo[];
+  availableLanguages: string[];
+  onTestVoice: () => void;
+  isSpeaking: boolean;
+  onStop: () => void;
+};
+
+export default function QuickSettingsTtsSection({
+  enabled,
+  onToggle,
+  rate,
+  onRateChange,
+  pitch,
+  onPitchChange,
+  voiceURI,
+  onVoiceChange,
+  lang,
+  onLangChange,
+  filteredVoices,
+  availableLanguages,
+  onTestVoice,
+  isSpeaking,
+  onStop,
+}: QuickSettingsTtsSectionProps) {
+  return (
+    <QuickSettingsSection title="Text-to-Speech">
+      {/* Enable/Disable toggle */}
+      <div className={SETTING_ROW_CLASS}>
+        <span className="flex items-center gap-2 text-sm text-gray-900 dark:text-white">
+          {enabled ? (
+            <Volume2 className="h-4 w-4 text-gray-600 dark:text-gray-400" />
+          ) : (
+            <VolumeX className="h-4 w-4 text-gray-600 dark:text-gray-400" />
+          )}
+          TTS Enabled
+        </span>
+        <button
+          type="button"
+          role="switch"
+          aria-checked={enabled}
+          onClick={onToggle}
+          className={`relative inline-flex h-5 w-9 shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 ${
+            enabled ? 'bg-blue-600' : 'bg-gray-200 dark:bg-gray-700'
+          }`}
+        >
+          <span
+            className={`pointer-events-none inline-block h-4 w-4 transform rounded-full bg-white shadow ring-0 transition-transform ${
+              enabled ? 'translate-x-4' : 'translate-x-0'
+            }`}
+          />
+        </button>
+      </div>
+
+      {enabled && (
+        <>
+          {/* Language filter */}
+          <div className="space-y-1 px-1">
+            <label className="text-xs text-gray-500 dark:text-gray-400">Language</label>
+            <select
+              value={lang}
+              onChange={(e) => {
+                onLangChange(e.target.value);
+                onVoiceChange('');
+              }}
+              className="w-full rounded-md border border-gray-300 bg-white px-2 py-1.5 text-sm text-gray-900 focus:border-blue-500 focus:outline-none focus:ring-1 focus:ring-blue-500 dark:border-gray-600 dark:bg-gray-800 dark:text-white"
+            >
+              <option value="">All Languages</option>
+              {availableLanguages.map((l) => (
+                <option key={l} value={l}>
+                  {l}
+                </option>
+              ))}
+            </select>
+          </div>
+
+          {/* Voice selection */}
+          <div className="space-y-1 px-1">
+            <label className="text-xs text-gray-500 dark:text-gray-400">
+              Voice ({filteredVoices.length} available)
+            </label>
+            <select
+              value={voiceURI}
+              onChange={(e) => onVoiceChange(e.target.value)}
+              className="w-full rounded-md border border-gray-300 bg-white px-2 py-1.5 text-sm text-gray-900 focus:border-blue-500 focus:outline-none focus:ring-1 focus:ring-blue-500 dark:border-gray-600 dark:bg-gray-800 dark:text-white"
+            >
+              <option value="">Auto (first match)</option>
+              {filteredVoices.map((v) => (
+                <option key={v.voiceURI} value={v.voiceURI}>
+                  {v.name} ({v.lang}){v.localService ? '' : ' [Network]'}
+                </option>
+              ))}
+            </select>
+          </div>
+
+          {/* Rate slider */}
+          <div className="space-y-1 px-1">
+            <div className="flex items-center justify-between">
+              <label className="text-xs text-gray-500 dark:text-gray-400">Speed</label>
+              <span className="text-xs font-mono text-gray-500 dark:text-gray-400">
+                {rate.toFixed(1)}x
+              </span>
+            </div>
+            <input
+              type="range"
+              min="0.5"
+              max="3.0"
+              step="0.1"
+              value={rate}
+              onChange={(e) => onRateChange(parseFloat(e.target.value))}
+              className="w-full accent-blue-600"
+            />
+            <div className="flex justify-between text-[10px] text-gray-400">
+              <span>0.5x</span>
+              <span>1.0x</span>
+              <span>2.0x</span>
+              <span>3.0x</span>
+            </div>
+          </div>
+
+          {/* Pitch slider */}
+          <div className="space-y-1 px-1">
+            <div className="flex items-center justify-between">
+              <label className="text-xs text-gray-500 dark:text-gray-400">Pitch</label>
+              <span className="text-xs font-mono text-gray-500 dark:text-gray-400">
+                {pitch.toFixed(1)}
+              </span>
+            </div>
+            <input
+              type="range"
+              min="0.5"
+              max="2.0"
+              step="0.1"
+              value={pitch}
+              onChange={(e) => onPitchChange(parseFloat(e.target.value))}
+              className="w-full accent-blue-600"
+            />
+            <div className="flex justify-between text-[10px] text-gray-400">
+              <span>Low</span>
+              <span>Normal</span>
+              <span>High</span>
+            </div>
+          </div>
+
+          {/* Test / Stop button */}
+          <div className="px-1">
+            <button
+              type="button"
+              onClick={isSpeaking ? onStop : onTestVoice}
+              className={`flex w-full items-center justify-center gap-2 rounded-md px-3 py-1.5 text-sm font-medium transition-colors ${
+                isSpeaking
+                  ? 'bg-red-100 text-red-700 hover:bg-red-200 dark:bg-red-900/30 dark:text-red-400 dark:hover:bg-red-900/50'
+                  : 'bg-blue-100 text-blue-700 hover:bg-blue-200 dark:bg-blue-900/30 dark:text-blue-400 dark:hover:bg-blue-900/50'
+              }`}
+            >
+              {isSpeaking ? (
+                <>
+                  <VolumeX className="h-4 w-4" />
+                  Stop
+                </>
+              ) : (
+                <>
+                  <Play className="h-4 w-4" />
+                  Test Voice
+                </>
+              )}
+            </button>
+          </div>
+        </>
+      )}
+    </QuickSettingsSection>
+  );
+}
diff --git a/src/contexts/TtsContext.tsx b/src/contexts/TtsContext.tsx
new file mode 100644
index 000000000..aea9960a9
--- /dev/null
+++ b/src/contexts/TtsContext.tsx
@@ -0,0 +1,49 @@
+import { createContext, useContext, type ReactNode } from 'react';
+import { useSpeechOutput } from '../hooks/useSpeechOutput';
+import type { VoiceInfo } from '../hooks/useSpeechOutput';
+
+type TtsContextValue = {
+  enabled: boolean;
+  toggle: () => void;
+  rate: number;
+  setRate: (rate: number) => void;
+  pitch: number;
+  setPitch: (pitch: number) => void;
+  voiceURI: string;
+  setVoiceURI: (uri: string) => void;
+  lang: string;
+  setLang: (lang: string) => void;
+  isSpeaking: boolean;
+  speak: (text: string) => void;
+  stop: () => void;
+  testVoice: () => void;
+  availableVoices: VoiceInfo[];
+  filteredVoices: VoiceInfo[];
+  availableLanguages: string[];
+};
+
+const TtsContext = createContext<TtsContextValue | null>(null);
+
+type ChatMessage = {
+  type: string;
+  content?: string;
+  isStreaming?: boolean;
+  isToolUse?: boolean;
+  isInteractivePrompt?: boolean;
+  [key: string]: unknown;
+};
+
+export function TtsProvider({
+  chatMessages,
+  children,
+}: {
+  chatMessages: ChatMessage[];
+  children: ReactNode;
+}) {
+  const tts = useSpeechOutput(chatMessages);
+  return <TtsContext.Provider value={tts}>{children}</TtsContext.Provider>;
+}
+
+export function useTts(): TtsContextValue | null {
+  return useContext(TtsContext);
+}
diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts
new file mode 100644
index 000000000..f402b6c35
--- /dev/null
+++ b/src/hooks/useSpeechOutput.ts
@@ -0,0 +1,265 @@
+import { useCallback, useEffect, useRef, useState } from 'react';
+
+type ChatMessage = {
+  type: string;
+  content?: string;
+  isStreaming?: boolean;
+  isToolUse?: boolean;
+  isInteractivePrompt?: boolean;
+  [key: string]: unknown;
+};
+
+export type VoiceInfo = {
+  name: string;
+  lang: string;
+  localService: boolean;
+  voiceURI: string;
+};
+
+const STORAGE_KEY = 'tts_enabled';
+const RATE_STORAGE_KEY = 'tts_rate';
+const PITCH_STORAGE_KEY = 'tts_pitch';
+const VOICE_STORAGE_KEY = 'tts_voice_uri';
+const LANG_STORAGE_KEY = 'tts_lang';
+
+/**
+ * Strip markdown formatting for cleaner TTS output.
+ */
+function stripMarkdown(text: string): string {
+  return text
+    .replace(/```[\s\S]*?```/g, '')
+    .replace(/`[^`]+`/g, '')
+    .replace(/\*\*(.+?)\*\*/g, '$1')
+    .replace(/\*(.+?)\*/g, '$1')
+    .replace(/__(.+?)__/g, '$1')
+    .replace(/_(.+?)_/g, '$1')
+    .replace(/~~(.+?)~~/g, '$1')
+    .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
+    .replace(/^#{1,6}\s+/gm, '')
+    .replace(/^[-*+]\s+/gm, '')
+    .replace(/^\d+\.\s+/gm, '')
+    .replace(/^>\s+/gm, '')
+    .replace(/\n{2,}/g, '\n')
+    .trim();
+}
+
+function readStorage(key: string, fallback: string): string {
+  try {
+    return localStorage.getItem(key) ?? fallback;
+  } catch {
+    return fallback;
+  }
+}
+
+function readStorageFloat(key: string, fallback: number): number {
+  try {
+    const v = localStorage.getItem(key);
+    return v ? parseFloat(v) : fallback;
+  } catch {
+    return fallback;
+  }
+}
+
+/**
+ * Hook that speaks finalized assistant messages using the Web Speech API.
+ *
+ * Features:
+ * - Voice selection from available system voices
+ * - Adjustable rate and pitch
+ * - Language filter for voice list
+ * - All settings persisted in localStorage
+ */
+export function useSpeechOutput(chatMessages: ChatMessage[]) {
+  const [enabled, setEnabled] = useState(() => readStorage(STORAGE_KEY, 'false') === 'true');
+  const [rate, setRate] = useState(() => readStorageFloat(RATE_STORAGE_KEY, 1.2));
+  const [pitch, setPitch] = useState(() => readStorageFloat(PITCH_STORAGE_KEY, 1.0));
+  const [voiceURI, setVoiceURI] = useState(() => readStorage(VOICE_STORAGE_KEY, ''));
+  const [lang, setLang] = useState(() => {
+    const stored = readStorage(LANG_STORAGE_KEY, '');
+    return stored || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP');
+  });
+  const [isSpeaking, setIsSpeaking] = useState(false);
+  const [availableVoices, setAvailableVoices] = useState<VoiceInfo[]>([]);
+
+  const lastSpokenIndexRef = useRef(-1);
+  const lastStreamingContentRef = useRef<string | null>(null);
+
+  // Load available voices
+  useEffect(() => {
+    if (typeof window === 'undefined' || !window.speechSynthesis) return;
+
+    const loadVoices = () => {
+      const voices = window.speechSynthesis.getVoices();
+      setAvailableVoices(
+        voices.map((v) => ({
+          name: v.name,
+          lang: v.lang,
+          localService: v.localService,
+          voiceURI: v.voiceURI,
+        })),
+      );
+    };
+
+    loadVoices();
+    window.speechSynthesis.onvoiceschanged = loadVoices;
+    return () => {
+      window.speechSynthesis.onvoiceschanged = null;
+    };
+  }, []);
+
+  // Persist settings
+  useEffect(() => {
+    try { localStorage.setItem(STORAGE_KEY, String(enabled)); } catch { /* noop */ }
+  }, [enabled]);
+  useEffect(() => {
+    try { localStorage.setItem(RATE_STORAGE_KEY, String(rate)); } catch { /* noop */ }
+  }, [rate]);
+  useEffect(() => {
+    try { localStorage.setItem(PITCH_STORAGE_KEY, String(pitch)); } catch { /* noop */ }
+  }, [pitch]);
+  useEffect(() => {
+    try { localStorage.setItem(VOICE_STORAGE_KEY, voiceURI); } catch { /* noop */ }
+  }, [voiceURI]);
+  useEffect(() => {
+    try { localStorage.setItem(LANG_STORAGE_KEY, lang); } catch { /* noop */ }
+  }, [lang]);
+
+  // Monitor speechSynthesis state
+  useEffect(() => {
+    if (typeof window === 'undefined' || !window.speechSynthesis) return;
+    const interval = setInterval(() => {
+      setIsSpeaking(window.speechSynthesis.speaking);
+    }, 200);
+    return () => clearInterval(interval);
+  }, []);
+
+  // Get voices filtered by current language
+  const filteredVoices = availableVoices.filter((v) => {
+    if (lang === '') return true;
+    const langPrefix = lang.split('-')[0];
+    return v.lang.startsWith(langPrefix);
+  });
+
+  // Get unique language list from all voices
+  const availableLanguages = Array.from(
+    new Set(availableVoices.map((v) => v.lang)),
+  ).sort();
+
+  const speak = useCallback(
+    (text: string) => {
+      if (!text || typeof window === 'undefined' || !window.speechSynthesis) return;
+
+      const cleaned = stripMarkdown(text);
+      if (!cleaned) return;
+
+      window.speechSynthesis.cancel();
+
+      const utterance = new SpeechSynthesisUtterance(cleaned);
+      utterance.lang = lang || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP');
+      utterance.rate = rate;
+      utterance.pitch = pitch;
+
+      // Find selected voice, or fall back to first matching voice
+      const voices = window.speechSynthesis.getVoices();
+      if (voiceURI) {
+        const selected = voices.find((v) => v.voiceURI === voiceURI);
+        if (selected) utterance.voice = selected;
+      } else {
+        const fallbackLang = lang || (typeof navigator !== 'undefined' ? navigator.language : 'ja-JP');
+        const langPrefix = fallbackLang.split('-')[0];
+        const fallback = voices.find((v) => v.lang.startsWith(langPrefix));
+        if (fallback) utterance.voice = fallback;
+      }
+
+      utterance.onend = () => setIsSpeaking(false);
+      utterance.onerror = () => setIsSpeaking(false);
+
+      setIsSpeaking(true);
+      window.speechSynthesis.speak(utterance);
+    },
+    [lang, rate, pitch, voiceURI],
+  );
+
+  const stop = useCallback(() => {
+    if (typeof window !== 'undefined' && window.speechSynthesis) {
+      window.speechSynthesis.cancel();
+      setIsSpeaking(false);
+    }
+  }, []);
+
+  const toggle = useCallback(() => {
+    setEnabled((prev) => {
+      const next = !prev;
+      if (!next && typeof window !== 'undefined' && window.speechSynthesis) {
+        window.speechSynthesis.cancel();
+        setIsSpeaking(false);
+      }
+      return next;
+    });
+  }, []);
+
+  // Test current voice settings
+  const testVoice = useCallback(() => {
+    speak('テスト音声です。Hello, this is a test.');
+  }, [speak]);
+
+  // Watch for finalized assistant messages
+  useEffect(() => {
+    if (!enabled || chatMessages.length === 0) {
+      return;
+    }
+
+    const lastIndex = chatMessages.length - 1;
+    const lastMsg = chatMessages[lastIndex];
+
+    if (
+      !lastMsg ||
+      lastMsg.type !== 'assistant' ||
+      lastMsg.isToolUse ||
+      lastMsg.isInteractivePrompt ||
+      !lastMsg.content
+    ) {
+      lastStreamingContentRef.current = null;
+      return;
+    }
+
+    if (lastMsg.isStreaming) {
+      lastStreamingContentRef.current = lastMsg.content;
+      return;
+    }
+
+    if (lastIndex > lastSpokenIndexRef.current) {
+      lastSpokenIndexRef.current = lastIndex;
+      lastStreamingContentRef.current = null;
+      speak(lastMsg.content);
+    }
+  }, [chatMessages, enabled, speak]);
+
+  // Reset spoken index when messages are cleared (new session)
+  useEffect(() => {
+    if (chatMessages.length === 0) {
+      lastSpokenIndexRef.current = -1;
+      lastStreamingContentRef.current = null;
+    }
+  }, [chatMessages.length]);
+
+  return {
+    enabled,
+    toggle,
+    rate,
+    setRate,
+    pitch,
+    setPitch,
+    voiceURI,
+    setVoiceURI,
+    lang,
+    setLang,
+    isSpeaking,
+    speak,
+    stop,
+    testVoice,
+    availableVoices,
+    filteredVoices,
+    availableLanguages,
+  };
+}

From 81d32658b7dfd1fe68cca80f7472086f9e1975bb Mon Sep 17 00:00:00 2001
From: Kawano <kawano@c-trading.jp>
Date: Mon, 9 Mar 2026 06:20:59 +0900
Subject: [PATCH 2/7] =?UTF-8?q?fix:=20address=20CodeRabbit=20review=20?=
 =?UTF-8?q?=E2=80=94=20i18n,=20a11y,=20unmount=20cleanup?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add aria-label to TTS toggle button in ChatComposer (a11y)
- Move all hardcoded TTS UI strings to i18n (en/ja/ko/zh-CN)
- Add htmlFor/id pairs to all TTS form controls (a11y)
- Add aria-labelledby to TTS enabled switch (a11y)
- Cancel active speech on TtsProvider unmount (cleanup)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../chat/view/subcomponents/ChatComposer.tsx  |  4 +-
 .../view/QuickSettingsTtsSection.tsx          | 46 ++++++++++++-------
 src/hooks/useSpeechOutput.ts                  |  9 ++++
 src/i18n/locales/en/settings.json             | 21 +++++++++
 src/i18n/locales/ja/settings.json             | 21 +++++++++
 src/i18n/locales/ko/settings.json             | 21 +++++++++
 src/i18n/locales/zh-CN/settings.json          | 21 +++++++++
 7 files changed, 126 insertions(+), 17 deletions(-)

diff --git a/src/components/chat/view/subcomponents/ChatComposer.tsx b/src/components/chat/view/subcomponents/ChatComposer.tsx
index 0da6d6a36..1a3c52c2e 100644
--- a/src/components/chat/view/subcomponents/ChatComposer.tsx
+++ b/src/components/chat/view/subcomponents/ChatComposer.tsx
@@ -154,6 +154,7 @@ export default function ChatComposer({
 }: ChatComposerProps) {
   const tts = useTts();
   const { t } = useTranslation('chat');
+  const { t: tSettings } = useTranslation('settings');
   const textareaRect = textareaRef.current?.getBoundingClientRect();
   const commandMenuPosition = {
     top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0,
@@ -340,7 +341,8 @@ export default function ChatComposer({
                       : 'text-primary hover:bg-accent/60'
                     : 'text-muted-foreground hover:bg-accent/60'
                 }`}
-                title={tts.isSpeaking ? 'Stop speaking' : tts.enabled ? 'TTS ON (click to disable)' : 'TTS OFF (click to enable)'}
+                aria-label={tts.isSpeaking ? tSettings('quickSettings.tts.button.stopSpeaking') : tts.enabled ? tSettings('quickSettings.tts.button.ttsOn') : tSettings('quickSettings.tts.button.ttsOff')}
+                title={tts.isSpeaking ? tSettings('quickSettings.tts.button.stopSpeaking') : tts.enabled ? tSettings('quickSettings.tts.button.ttsOn') : tSettings('quickSettings.tts.button.ttsOff')}
               >
                 {tts.isSpeaking ? (
                   <StopCircle className="h-5 w-5" />
diff --git a/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
index aa70dd49f..3625dad77 100644
--- a/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
+++ b/src/components/quick-settings-panel/view/QuickSettingsTtsSection.tsx
@@ -1,3 +1,4 @@
+import { useTranslation } from 'react-i18next';
 import { Volume2, VolumeX, Play } from 'lucide-react';
 import type { VoiceInfo } from '../../../hooks/useSpeechOutput';
 import { SETTING_ROW_CLASS } from '../constants';
@@ -38,22 +39,25 @@ export default function QuickSettingsTtsSection({
   isSpeaking,
   onStop,
 }: QuickSettingsTtsSectionProps) {
+  const { t } = useTranslation('settings');
+
   return (
-    <QuickSettingsSection title="Text-to-Speech">
+    <QuickSettingsSection title={t('quickSettings.tts.sectionTitle')}>
       {/* Enable/Disable toggle */}
       <div className={SETTING_ROW_CLASS}>
-        <span className="flex items-center gap-2 text-sm text-gray-900 dark:text-white">
+        <span id="tts-enabled-label" className="flex items-center gap-2 text-sm text-gray-900 dark:text-white">
           {enabled ? (
             <Volume2 className="h-4 w-4 text-gray-600 dark:text-gray-400" />
           ) : (
             <VolumeX className="h-4 w-4 text-gray-600 dark:text-gray-400" />
           )}
-          TTS Enabled
+          {t('quickSettings.tts.enabled')}
         </span>
         <button
           type="button"
           role="switch"
           aria-checked={enabled}
+          aria-labelledby="tts-enabled-label"
           onClick={onToggle}
           className={`relative inline-flex h-5 w-9 shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 ${
             enabled ? 'bg-blue-600' : 'bg-gray-200 dark:bg-gray-700'
@@ -71,8 +75,11 @@ export default function QuickSettingsTtsSection({
         <>
           {/* Language filter */}
           <div className="space-y-1 px-1">
-            <label className="text-xs text-gray-500 dark:text-gray-400">Language</label>
+            <label htmlFor="tts-lang-select" className="text-xs text-gray-500 dark:text-gray-400">
+              {t('quickSettings.tts.language')}
+            </label>
             <select
+              id="tts-lang-select"
               value={lang}
               onChange={(e) => {
                 onLangChange(e.target.value);
@@ -80,7 +87,7 @@ export default function QuickSettingsTtsSection({
               }}
               className="w-full rounded-md border border-gray-300 bg-white px-2 py-1.5 text-sm text-gray-900 focus:border-blue-500 focus:outline-none focus:ring-1 focus:ring-blue-500 dark:border-gray-600 dark:bg-gray-800 dark:text-white"
             >
-              <option value="">All Languages</option>
+              <option value="">{t('quickSettings.tts.allLanguages')}</option>
               {availableLanguages.map((l) => (
                 <option key={l} value={l}>
                   {l}
@@ -91,18 +98,19 @@ export default function QuickSettingsTtsSection({
 
           {/* Voice selection */}
           <div className="space-y-1 px-1">
-            <label className="text-xs text-gray-500 dark:text-gray-400">
-              Voice ({filteredVoices.length} available)
+            <label htmlFor="tts-voice-select" className="text-xs text-gray-500 dark:text-gray-400">
+              {t('quickSettings.tts.voice', { count: filteredVoices.length })}
             </label>
             <select
+              id="tts-voice-select"
               value={voiceURI}
               onChange={(e) => onVoiceChange(e.target.value)}
               className="w-full rounded-md border border-gray-300 bg-white px-2 py-1.5 text-sm text-gray-900 focus:border-blue-500 focus:outline-none focus:ring-1 focus:ring-blue-500 dark:border-gray-600 dark:bg-gray-800 dark:text-white"
             >
-              <option value="">Auto (first match)</option>
+              <option value="">{t('quickSettings.tts.voiceAuto')}</option>
               {filteredVoices.map((v) => (
                 <option key={v.voiceURI} value={v.voiceURI}>
-                  {v.name} ({v.lang}){v.localService ? '' : ' [Network]'}
+                  {v.name} ({v.lang}){v.localService ? '' : ` [${t('quickSettings.tts.network')}]`}
                 </option>
               ))}
             </select>
@@ -111,12 +119,15 @@ export default function QuickSettingsTtsSection({
           {/* Rate slider */}
           <div className="space-y-1 px-1">
             <div className="flex items-center justify-between">
-              <label className="text-xs text-gray-500 dark:text-gray-400">Speed</label>
+              <label htmlFor="tts-rate-input" className="text-xs text-gray-500 dark:text-gray-400">
+                {t('quickSettings.tts.speed')}
+              </label>
               <span className="text-xs font-mono text-gray-500 dark:text-gray-400">
                 {rate.toFixed(1)}x
               </span>
             </div>
             <input
+              id="tts-rate-input"
               type="range"
               min="0.5"
               max="3.0"
@@ -136,12 +147,15 @@ export default function QuickSettingsTtsSection({
           {/* Pitch slider */}
           <div className="space-y-1 px-1">
             <div className="flex items-center justify-between">
-              <label className="text-xs text-gray-500 dark:text-gray-400">Pitch</label>
+              <label htmlFor="tts-pitch-input" className="text-xs text-gray-500 dark:text-gray-400">
+                {t('quickSettings.tts.pitch')}
+              </label>
               <span className="text-xs font-mono text-gray-500 dark:text-gray-400">
                 {pitch.toFixed(1)}
               </span>
             </div>
             <input
+              id="tts-pitch-input"
               type="range"
               min="0.5"
               max="2.0"
@@ -151,9 +165,9 @@ export default function QuickSettingsTtsSection({
               className="w-full accent-blue-600"
             />
             <div className="flex justify-between text-[10px] text-gray-400">
-              <span>Low</span>
-              <span>Normal</span>
-              <span>High</span>
+              <span>{t('quickSettings.tts.pitchLow')}</span>
+              <span>{t('quickSettings.tts.pitchNormal')}</span>
+              <span>{t('quickSettings.tts.pitchHigh')}</span>
             </div>
           </div>
 
@@ -171,12 +185,12 @@ export default function QuickSettingsTtsSection({
               {isSpeaking ? (
                 <>
                   <VolumeX className="h-4 w-4" />
-                  Stop
+                  {t('quickSettings.tts.stop')}
                 </>
               ) : (
                 <>
                   <Play className="h-4 w-4" />
-                  Test Voice
+                  {t('quickSettings.tts.testVoice')}
                 </>
               )}
             </button>
diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts
index f402b6c35..290ca0b56 100644
--- a/src/hooks/useSpeechOutput.ts
+++ b/src/hooks/useSpeechOutput.ts
@@ -198,6 +198,15 @@ export function useSpeechOutput(chatMessages: ChatMessage[]) {
     });
   }, []);
 
+  // Cancel active speech when provider unmounts
+  useEffect(() => {
+    return () => {
+      if (typeof window !== 'undefined' && window.speechSynthesis) {
+        window.speechSynthesis.cancel();
+      }
+    };
+  }, []);
+
   // Test current voice settings
   const testVoice = useCallback(() => {
     speak('テスト音声です。Hello, this is a test.');
diff --git a/src/i18n/locales/en/settings.json b/src/i18n/locales/en/settings.json
index 2c6a99e1d..73fb1ec12 100644
--- a/src/i18n/locales/en/settings.json
+++ b/src/i18n/locales/en/settings.json
@@ -72,6 +72,27 @@
       "draggingStatus": "Dragging...",
       "toggleAndMove": "Click to toggle, drag to move"
     },
+    "tts": {
+      "sectionTitle": "Text-to-Speech",
+      "enabled": "TTS Enabled",
+      "language": "Language",
+      "allLanguages": "All Languages",
+      "voice": "Voice ({{count}} available)",
+      "voiceAuto": "Auto (first match)",
+      "speed": "Speed",
+      "pitch": "Pitch",
+      "pitchLow": "Low",
+      "pitchNormal": "Normal",
+      "pitchHigh": "High",
+      "testVoice": "Test Voice",
+      "stop": "Stop",
+      "network": "Network",
+      "button": {
+        "stopSpeaking": "Stop speaking",
+        "ttsOn": "Text-to-Speech enabled (click to disable)",
+        "ttsOff": "Text-to-Speech disabled (click to enable)"
+      }
+    },
     "whisper": {
       "modes": {
         "default": "Default Mode",
diff --git a/src/i18n/locales/ja/settings.json b/src/i18n/locales/ja/settings.json
index 4fd82ec8f..e890348fd 100644
--- a/src/i18n/locales/ja/settings.json
+++ b/src/i18n/locales/ja/settings.json
@@ -72,6 +72,27 @@
       "draggingStatus": "ドラッグ中...",
       "toggleAndMove": "クリックで切替、ドラッグで移動"
     },
+    "tts": {
+      "sectionTitle": "テキスト読み上げ",
+      "enabled": "TTS有効",
+      "language": "言語",
+      "allLanguages": "すべての言語",
+      "voice": "音声（{{count}}件利用可能）",
+      "voiceAuto": "自動（最初に一致）",
+      "speed": "速度",
+      "pitch": "ピッチ",
+      "pitchLow": "低",
+      "pitchNormal": "標準",
+      "pitchHigh": "高",
+      "testVoice": "音声テスト",
+      "stop": "停止",
+      "network": "ネットワーク",
+      "button": {
+        "stopSpeaking": "読み上げを停止",
+        "ttsOn": "テキスト読み上げ有効（クリックで無効化）",
+        "ttsOff": "テキスト読み上げ無効（クリックで有効化）"
+      }
+    },
     "whisper": {
       "modes": {
         "default": "標準モード",
diff --git a/src/i18n/locales/ko/settings.json b/src/i18n/locales/ko/settings.json
index f452291f9..9687d1d99 100644
--- a/src/i18n/locales/ko/settings.json
+++ b/src/i18n/locales/ko/settings.json
@@ -72,6 +72,27 @@
       "draggingStatus": "드래그 중...",
       "toggleAndMove": "클릭하여 토글, 드래그하여 이동"
     },
+    "tts": {
+      "sectionTitle": "텍스트 음성 변환",
+      "enabled": "TTS 활성화",
+      "language": "언어",
+      "allLanguages": "모든 언어",
+      "voice": "음성 ({{count}}개 사용 가능)",
+      "voiceAuto": "자동 (첫 번째 일치)",
+      "speed": "속도",
+      "pitch": "피치",
+      "pitchLow": "낮음",
+      "pitchNormal": "보통",
+      "pitchHigh": "높음",
+      "testVoice": "음성 테스트",
+      "stop": "중지",
+      "network": "네트워크",
+      "button": {
+        "stopSpeaking": "읽기 중지",
+        "ttsOn": "텍스트 음성 변환 활성화 (클릭하여 비활성화)",
+        "ttsOff": "텍스트 음성 변환 비활성화 (클릭하여 활성화)"
+      }
+    },
     "whisper": {
       "modes": {
         "default": "기본 모드",
diff --git a/src/i18n/locales/zh-CN/settings.json b/src/i18n/locales/zh-CN/settings.json
index cdfb54979..6acd8c1fa 100644
--- a/src/i18n/locales/zh-CN/settings.json
+++ b/src/i18n/locales/zh-CN/settings.json
@@ -72,6 +72,27 @@
       "draggingStatus": "正在拖拽...",
       "toggleAndMove": "点击切换，拖拽移动"
     },
+    "tts": {
+      "sectionTitle": "文字转语音",
+      "enabled": "TTS 已启用",
+      "language": "语言",
+      "allLanguages": "所有语言",
+      "voice": "语音（{{count}}个可用）",
+      "voiceAuto": "自动（第一个匹配）",
+      "speed": "速度",
+      "pitch": "音调",
+      "pitchLow": "低",
+      "pitchNormal": "正常",
+      "pitchHigh": "高",
+      "testVoice": "测试语音",
+      "stop": "停止",
+      "network": "网络",
+      "button": {
+        "stopSpeaking": "停止朗读",
+        "ttsOn": "文字转语音已启用（点击禁用）",
+        "ttsOff": "文字转语音已禁用（点击启用）"
+      }
+    },
     "whisper": {
       "modes": {
         "default": "默认模式",

From 1babd63485889125d277673c0331467b8e554540 Mon Sep 17 00:00:00 2001
From: Kawano <kawano@c-trading.jp>
Date: Mon, 9 Mar 2026 06:33:15 +0900
Subject: [PATCH 3/7] fix: seed lastSpokenIndex to prevent replaying historical
 messages

When TTS is enabled on page load or toggled on mid-session,
lastSpokenIndexRef was -1, causing the last assistant message to be
immediately spoken. Now seeded to chatMessages.length - 1 on mount
and on enable toggle so only new messages are spoken.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/hooks/useSpeechOutput.ts | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts
index 290ca0b56..92a4010b9 100644
--- a/src/hooks/useSpeechOutput.ts
+++ b/src/hooks/useSpeechOutput.ts
@@ -81,7 +81,8 @@ export function useSpeechOutput(chatMessages: ChatMessage[]) {
   const [isSpeaking, setIsSpeaking] = useState(false);
   const [availableVoices, setAvailableVoices] = useState<VoiceInfo[]>([]);
 
-  const lastSpokenIndexRef = useRef(-1);
+  // Seed to current tail so we don't replay historical messages on mount
+  const lastSpokenIndexRef = useRef(chatMessages.length - 1);
   const lastStreamingContentRef = useRef<string | null>(null);
 
   // Load available voices
@@ -190,13 +191,17 @@ export function useSpeechOutput(chatMessages: ChatMessage[]) {
   const toggle = useCallback(() => {
     setEnabled((prev) => {
       const next = !prev;
-      if (!next && typeof window !== 'undefined' && window.speechSynthesis) {
+      if (next) {
+        // Seed so we only speak messages appended after enabling
+        lastSpokenIndexRef.current = chatMessages.length - 1;
+        lastStreamingContentRef.current = null;
+      } else if (typeof window !== 'undefined' && window.speechSynthesis) {
         window.speechSynthesis.cancel();
         setIsSpeaking(false);
       }
       return next;
     });
-  }, []);
+  }, [chatMessages.length]);
 
   // Cancel active speech when provider unmounts
   useEffect(() => {

From 839ca30ec0e0efc5c161f5f9126969d042aa013e Mon Sep 17 00:00:00 2001
From: Kawano <kawano@c-trading.jp>
Date: Mon, 9 Mar 2026 06:44:19 +0900
Subject: [PATCH 4/7] fix: guard parseFloat against NaN from corrupted
 localStorage

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/hooks/useSpeechOutput.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts
index 92a4010b9..7395be024 100644
--- a/src/hooks/useSpeechOutput.ts
+++ b/src/hooks/useSpeechOutput.ts
@@ -54,7 +54,9 @@ function readStorage(key: string, fallback: string): string {
 function readStorageFloat(key: string, fallback: number): number {
   try {
     const v = localStorage.getItem(key);
-    return v ? parseFloat(v) : fallback;
+    if (!v) return fallback;
+    const parsed = parseFloat(v);
+    return Number.isNaN(parsed) ? fallback : parsed;
   } catch {
     return fallback;
   }

From cf5f79590167db55337e96be74f4860fa48cb540 Mon Sep 17 00:00:00 2001
From: Kawano <kawano@c-trading.jp>
Date: Mon, 9 Mar 2026 07:27:44 +0900
Subject: [PATCH 5/7] fix: use ref for chatMessages.length in toggle closure

Prevents stale closure capturing outdated length when toggle is
called between re-renders.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/hooks/useSpeechOutput.ts | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts
index 7395be024..c149e12bd 100644
--- a/src/hooks/useSpeechOutput.ts
+++ b/src/hooks/useSpeechOutput.ts
@@ -86,6 +86,8 @@ export function useSpeechOutput(chatMessages: ChatMessage[]) {
   // Seed to current tail so we don't replay historical messages on mount
   const lastSpokenIndexRef = useRef(chatMessages.length - 1);
   const lastStreamingContentRef = useRef<string | null>(null);
+  const chatMessagesLengthRef = useRef(chatMessages.length);
+  chatMessagesLengthRef.current = chatMessages.length;
 
   // Load available voices
   useEffect(() => {
@@ -195,7 +197,7 @@ export function useSpeechOutput(chatMessages: ChatMessage[]) {
       const next = !prev;
       if (next) {
         // Seed so we only speak messages appended after enabling
-        lastSpokenIndexRef.current = chatMessages.length - 1;
+        lastSpokenIndexRef.current = chatMessagesLengthRef.current - 1;
         lastStreamingContentRef.current = null;
       } else if (typeof window !== 'undefined' && window.speechSynthesis) {
         window.speechSynthesis.cancel();
@@ -203,7 +205,7 @@ export function useSpeechOutput(chatMessages: ChatMessage[]) {
       }
       return next;
     });
-  }, [chatMessages.length]);
+  }, []);
 
   // Cancel active speech when provider unmounts
   useEffect(() => {

From 81b538b4596e70cc88a99db41581d83a9cd852cc Mon Sep 17 00:00:00 2001
From: Kawano <kawano@c-trading.jp>
Date: Mon, 9 Mar 2026 12:54:37 +0900
Subject: [PATCH 6/7] perf: skip speechSynthesis polling when TTS is inactive

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/hooks/useSpeechOutput.ts | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts
index c149e12bd..06c15061d 100644
--- a/src/hooks/useSpeechOutput.ts
+++ b/src/hooks/useSpeechOutput.ts
@@ -129,14 +129,15 @@ export function useSpeechOutput(chatMessages: ChatMessage[]) {
     try { localStorage.setItem(LANG_STORAGE_KEY, lang); } catch { /* noop */ }
   }, [lang]);
 
-  // Monitor speechSynthesis state
+  // Monitor speechSynthesis state (only when TTS is active)
   useEffect(() => {
     if (typeof window === 'undefined' || !window.speechSynthesis) return;
+    if (!enabled && !isSpeaking) return;
     const interval = setInterval(() => {
       setIsSpeaking(window.speechSynthesis.speaking);
     }, 200);
     return () => clearInterval(interval);
-  }, []);
+  }, [enabled, isSpeaking]);
 
   // Get voices filtered by current language
   const filteredVoices = availableVoices.filter((v) => {

From 2bcce9c7d1ea3231121130e1aaf2843270be5161 Mon Sep 17 00:00:00 2001
From: Kawano <kawano@c-trading.jp>
Date: Mon, 9 Mar 2026 15:30:54 +0900
Subject: [PATCH 7/7] fix: exclude thinking messages from TTS auto-speech

Filter out isThinking assistant messages to avoid reading aloud
Claude's internal reasoning process.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/hooks/useSpeechOutput.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/hooks/useSpeechOutput.ts b/src/hooks/useSpeechOutput.ts
index 06c15061d..a45d646f6 100644
--- a/src/hooks/useSpeechOutput.ts
+++ b/src/hooks/useSpeechOutput.ts
@@ -235,6 +235,7 @@ export function useSpeechOutput(chatMessages: ChatMessage[]) {
       !lastMsg ||
       lastMsg.type !== 'assistant' ||
       lastMsg.isToolUse ||
+      lastMsg.isThinking ||
       lastMsg.isInteractivePrompt ||
       !lastMsg.content
     ) {