diff --git a/apps/api/src/routes/books.test.ts b/apps/api/src/routes/books.test.ts
index 32105f90..b6bc070e 100644
--- a/apps/api/src/routes/books.test.ts
+++ b/apps/api/src/routes/books.test.ts
@@ -529,8 +529,8 @@ describe("POST /books/:label/stages/run", () => {
         "X-Gemini-API-Key": "gm-test",
       },
       body: JSON.stringify({
-        fromStage: "text-and-speech",
-        toStage: "text-and-speech",
+        fromStage: "translation",
+        toStage: "speech",
       }),
     })
 
diff --git a/apps/api/src/routes/pages.test.ts b/apps/api/src/routes/pages.test.ts
index 740bfdd6..42a3f395 100644
--- a/apps/api/src/routes/pages.test.ts
+++ b/apps/api/src/routes/pages.test.ts
@@ -667,7 +667,7 @@ describe("Page routes", () => {
     }
   }
 
-  /** Assert that all caption + text-and-speech node data and step_runs were cleared. */
+  /** Assert that all caption + translation + speech node data and step_runs were cleared. */
   function expectAllDownstreamCleared(dir: string, bookLabel: string) {
     const s = createBookStorage(bookLabel, dir)
     try {
@@ -687,7 +687,7 @@ describe("Page routes", () => {
     }
   }
 
-  /** Assert that text-and-speech (but NOT image-captioning) node data and step_runs were cleared. */
+  /** Assert that translation + speech (but NOT image-captioning) node data and step_runs were cleared. */
   function expectTextAndSpeechCleared(dir: string, bookLabel: string) {
     const s = createBookStorage(bookLabel, dir)
     try {
@@ -709,7 +709,7 @@ describe("Page routes", () => {
   }
 
   describe("PUT /api/books/:label/pages/:pageId/sectioning clears downstream", () => {
-    it("clears caption + text-and-speech data on sectioning save", async () => {
+    it("clears caption + translation + speech data on sectioning save", async () => {
       seedDownstreamData(tmpDir, label)
 
       const data = {
@@ -746,7 +746,7 @@ describe("Page routes", () => {
   })
 
   describe("PUT /api/books/:label/pages/:pageId/rendering clears downstream", () => {
-    it("clears caption + text-and-speech data on rendering save", async () => {
+    it("clears caption + translation + speech data on rendering save", async () => {
       seedDownstreamData(tmpDir, label)
 
       const data = {
@@ -773,7 +773,7 @@ describe("Page routes", () => {
   })
 
   describe("POST clone clears downstream", () => {
-    it("clears caption + text-and-speech data on section clone", async () => {
+    it("clears caption + translation + speech data on section clone", async () => {
       seedDownstreamData(tmpDir, label)
 
       const res = await app.request(
@@ -787,7 +787,7 @@ describe("Page routes", () => {
   })
 
   describe("POST delete clears downstream", () => {
-    it("clears caption + text-and-speech data on section delete", async () => {
+    it("clears caption + translation + speech data on section delete", async () => {
       // Need at least 2 sections so delete is valid
       const s = createBookStorage(label, tmpDir)
       try {
@@ -837,7 +837,7 @@ describe("Page routes", () => {
   })
 
   describe("POST crop (images) clears downstream", () => {
-    it("clears caption + text-and-speech data on image crop", async () => {
+    it("clears caption + translation + speech data on image crop", async () => {
       seedDownstreamData(tmpDir, label)
 
       // Minimal valid PNG (1x1 pixel)
@@ -865,7 +865,7 @@ describe("Page routes", () => {
     })
   })
 
-  describe("PUT image-captioning clears text-and-speech downstream", () => {
+  describe("PUT image-captioning clears translation + speech downstream", () => {
     it("clears text-catalog/translations/TTS but keeps image-captioning", async () => {
       seedDownstreamData(tmpDir, label)
 
diff --git a/apps/api/src/routes/pages.ts b/apps/api/src/routes/pages.ts
index b6b76bf3..1e29cae4 100644
--- a/apps/api/src/routes/pages.ts
+++ b/apps/api/src/routes/pages.ts
@@ -326,7 +326,7 @@ async function executeAiImageGeneration(params: AiImageGenParams): Promise<{
   }
 }
 
-/** Clear caption + downstream text-and-speech data when images change. */
+/** Clear caption + downstream translation + speech data when images change. */
 function clearCaptionData(storage: Storage): void {
   storage.clearNodesByType(["image-captioning", "text-catalog", "text-catalog-translation", "tts"])
   storage.clearStepRuns(["image-captioning", "text-catalog", "catalog-translation", "tts"])
diff --git a/apps/api/src/routes/stages.ts b/apps/api/src/routes/stages.ts
index e2bdc40e..f6fc2d9b 100644
--- a/apps/api/src/routes/stages.ts
+++ b/apps/api/src/routes/stages.ts
@@ -135,6 +135,43 @@ export function createStageRoutes(
     return c.json({ status: result.status, label, fromStage, toStage })
   })
 
+  // DELETE /books/:label/stages/:stageName — Clear a stage's data and step runs
+  app.delete("/books/:label/stages/:stageName", (c) => {
+    const { label, stageName } = c.req.param()
+
+    let safeLabel: string
+    try {
+      safeLabel = parseBookLabel(label)
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err)
+      throw new HTTPException(400, { message })
+    }
+
+    const parsed = StageName.safeParse(stageName)
+    if (!parsed.success) {
+      throw new HTTPException(400, { message: `Invalid stage name: ${stageName}` })
+    }
+
+    const stage = parsed.data
+    const storage = createBookStorage(safeLabel, booksDir)
+    try {
+      const nodes = getStageClearNodes(stage)
+      if (nodes.length > 0) {
+        storage.clearNodesByType(nodes)
+      }
+      const stagesToClear = getStageClearOrder(stage)
+      const stepsToClear = PIPELINE
+        .filter((s) => stagesToClear.includes(s.name))
+        .flatMap((s) => s.steps.map((step) => step.name))
+      storage.clearStepRuns(stepsToClear)
+    } finally {
+      storage.close()
+    }
+
+    console.log(`[stages] ${label}: cleared stage ${stage}`)
+    return c.json({ ok: true, stage })
+  })
+
   // GET /books/:label/step-status — Unified stage + step status
   // DB step_runs is the single source of truth for step/stage state.
   // Only "queued" comes from the in-memory run queue.
diff --git a/apps/api/src/services/stage-runner.test.ts b/apps/api/src/services/stage-runner.test.ts
index f54b0a9a..36a660e4 100644
--- a/apps/api/src/services/stage-runner.test.ts
+++ b/apps/api/src/services/stage-runner.test.ts
@@ -177,6 +177,12 @@ function seedTextAndSpeechBook(booksDir: string, label: string): void {
         },
       ],
     })
+
+    // Pre-seed text catalog so the speech stage can read it
+    storage.putNodeData("text-catalog", "book", {
+      entries: [{ id: "pg001_t001", text: "Hello world" }],
+      generatedAt: new Date().toISOString(),
+    })
   } finally {
     storage.close()
   }
@@ -351,7 +357,7 @@ describe("createStageRunner storyboard render-only", () => {
   })
 })
 
-describe("createStageRunner text-and-speech Gemini partial failures", () => {
+describe("createStageRunner speech Gemini partial failures", () => {
   let tmpDir = ""
 
   beforeEach(() => {
@@ -402,8 +408,8 @@ speech:
         geminiApiKey: "gm-test",
         promptsDir,
         configPath,
-        fromStage: "text-and-speech",
-        toStage: "text-and-speech",
+        fromStage: "speech",
+        toStage: "speech",
       },
       { emit: (event) => events.push(event) }
     )
@@ -472,8 +478,8 @@ speech:
         geminiApiKey: "gm-test",
         promptsDir,
         configPath,
-        fromStage: "text-and-speech",
-        toStage: "text-and-speech",
+        fromStage: "speech",
+        toStage: "speech",
       },
       { emit: (event) => events.push(event) }
     )
diff --git a/apps/api/src/services/stage-runner.ts b/apps/api/src/services/stage-runner.ts
index 431485b8..38f515bb 100644
--- a/apps/api/src/services/stage-runner.ts
+++ b/apps/api/src/services/stage-runner.ts
@@ -169,7 +169,8 @@ const STAGE_RUNNERS: Record<StageName, RunFn> = {
   "captions": runCaptionsStep,
   "glossary": runGlossaryStep,
   "toc": runTocStep,
-  "text-and-speech": runTextAndSpeechStep,
+  "translation": runTranslationStep,
+  "speech": runSpeechStep,
   "package": async () => { /* packaging handled separately */ },
 }
 
@@ -1350,7 +1351,7 @@ async function runTocStep(
 // Text & Speech stage (text catalog + catalog translation + TTS)
 // ---------------------------------------------------------------------------
 
-async function runTextAndSpeechStep(
+async function runTranslationStep(
   label: string,
   options: StageRunOptions,
   progress: StageRunProgress
@@ -1365,15 +1366,11 @@ async function runTextAndSpeechStep(
   try {
     const config = loadBookConfig(label, booksDir, configPath)
     const cacheDir = path.join(path.resolve(booksDir), label, ".cache")
-    const bookDir = path.join(path.resolve(booksDir), label)
     const bookPromptsDir = path.join(path.resolve(booksDir), label, "prompts")
     const promptEngine = createPromptEngine([bookPromptsDir, promptsDir])
     const rateLimiter = config.rate_limit
       ? createRateLimiter(config.rate_limit.requests_per_minute)
       : undefined
-    const configDir = configPath
-      ? path.join(path.dirname(configPath), "config")
-      : path.resolve(process.cwd(), "config")
 
     // Get book language from metadata
     const metadataRow = storage.getLatestNodeData("metadata", "book")
@@ -1514,8 +1511,61 @@ async function runTextAndSpeechStep(
       progress.emit({ type: "step-complete", step: "catalog-translation" })
       console.log(`[stage-run] ${label}: catalog translation complete`)
     }
+  } finally {
+    storage.close()
+    if (previousKey !== undefined) {
+      process.env.OPENAI_API_KEY = previousKey
+    } else {
+      delete process.env.OPENAI_API_KEY
+    }
+  }
+}
+
+async function runSpeechStep(
+  label: string,
+  options: StageRunOptions,
+  progress: StageRunProgress
+): Promise<void> {
+  const { booksDir, apiKey, configPath } = options
+
+  const previousKey = process.env.OPENAI_API_KEY
+  process.env.OPENAI_API_KEY = apiKey
+
+  const storage = createBookStorage(label, booksDir)
+
+  try {
+    const config = loadBookConfig(label, booksDir, configPath)
+    const cacheDir = path.join(path.resolve(booksDir), label, ".cache")
+    const bookDir = path.join(path.resolve(booksDir), label)
+    const configDir = configPath
+      ? path.join(path.dirname(configPath), "config")
+      : path.resolve(process.cwd(), "config")
+
+    // Get book language from metadata
+    const metadataRow = storage.getLatestNodeData("metadata", "book")
+    const metadata = metadataRow?.data as { language_code?: string | null } | null
+    const language = normalizeLocale(config.editing_language ?? metadata?.language_code ?? "en")
+
+    const effectiveConcurrency = config.concurrency ?? 32
+
+    // Output languages default to editing language if not set
+    const outputLanguages = Array.from(
+      new Set(
+        (config.output_languages && config.output_languages.length > 0
+          ? config.output_languages
+          : [language]).map((code) => normalizeLocale(code))
+      )
+    )
+
+    // Load text catalog from storage (produced by the translation stage)
+    const catalogRow = storage.getLatestNodeData("text-catalog", "book")
+    if (!catalogRow) {
+      progress.emit({ type: "step-skip", step: "tts" })
+      console.log(`[stage-run] ${label}: TTS skipped (no text catalog)`)
+      return
+    }
+    const catalog = catalogRow.data as TextCatalogOutput
 
-    // ── Step 3: Generate TTS ────────────────────────────────────────
     if (catalog.entries.length === 0) {
       progress.emit({ type: "step-skip", step: "tts" })
       console.log(`[stage-run] ${label}: TTS skipped (empty catalog)`)
@@ -1805,18 +1855,18 @@ async function runTextAndSpeechStep(
     }
 
     if (geminiFailedItems.length > 0) {
-      const summary = `${geminiFailedItems.length} Gemini TTS item(s) failed. Missing Gemini audio can be generated one by one from the Text & Speech view.`
+      const summary = `${geminiFailedItems.length} Gemini TTS item(s) failed. Missing Gemini audio can be generated one by one from the Speech view.`
       progress.emit({
         type: "step-error",
         step: "tts",
         error: summary,
       })
-      console.log(`[stage-run] ${label}: text & speech completed with Gemini TTS gaps`)
+      console.log(`[stage-run] ${label}: speech completed with Gemini TTS gaps`)
       return
     }
 
     progress.emit({ type: "step-complete", step: "tts" })
-    console.log(`[stage-run] ${label}: text & speech complete`)
+    console.log(`[stage-run] ${label}: speech complete`)
   } finally {
     storage.close()
     if (previousKey !== undefined) {
diff --git a/apps/studio/src/api/client.ts b/apps/studio/src/api/client.ts
index 6bc16659..7427ceb5 100644
--- a/apps/studio/src/api/client.ts
+++ b/apps/studio/src/api/client.ts
@@ -475,6 +475,9 @@ export const api = {
   deleteBook: (label: string) =>
     request<{ ok: boolean }>(`/books/${label}`, { method: "DELETE" }),
 
+  clearStage: (label: string, stageName: string) =>
+    request<{ ok: boolean; stage: string }>(`/books/${label}/stages/${stageName}`, { method: "DELETE" }),
+
   runStages: (
     label: string,
     apiKey: string,
diff --git a/apps/studio/src/components/LanguagePicker.tsx b/apps/studio/src/components/LanguagePicker.tsx
index 81823a3d..9ea7ff00 100644
--- a/apps/studio/src/components/LanguagePicker.tsx
+++ b/apps/studio/src/components/LanguagePicker.tsx
@@ -25,12 +25,14 @@ export function LanguagePicker({
   multiple,
   label,
   hint,
+  bookLanguage,
 }: {
   selected: string | Set<string>
   onSelect: (code: string) => void
   multiple?: boolean
   label: string
   hint?: string
+  bookLanguage?: string | null
 }) {
   const { t } = useLingui()
   const [search, setSearch] = useState("")
@@ -66,7 +68,8 @@ export function LanguagePicker({
   // Build dropdown items based on phase
   const items: DropdownItem[] = useMemo(() => {
     if (lockedLang) {
-      // Phase 2: show base language first, then suggested countries, then all others
+      // Phase 2: show base language first, then suggested countries.
+      // Other countries only appear when the user is searching.
       const q = search.toLowerCase()
       const { suggested, all } = getCountriesForLanguage(lockedLang.code)
       const result: DropdownItem[] = [
@@ -83,7 +86,10 @@ export function LanguagePicker({
         }
       }
       for (const c of suggested) addCountry(c)
-      for (const c of all) addCountry(c)
+      // Only search all countries when the user has typed something
+      if (q) {
+        for (const c of all) addCountry(c)
+      }
       return result
     }
     // Phase 1: show languages
@@ -145,12 +151,16 @@ export function LanguagePicker({
 
   const lockLanguage = useCallback(
     (lang: Language) => {
-      // Always enter phase 2 — any language can be paired with any country
+      // Only enter phase 2 if the language has defined regional variants
+      if (!lang.countries || lang.countries.length === 0) {
+        commit(lang.code)
+        return
+      }
       setLockedLang(lang)
       setSearch("")
       setHighlighted(0)
     },
-    []
+    [commit]
   )
 
   const clearSelection = useCallback(
@@ -257,22 +267,28 @@ export function LanguagePicker({
       {/* Selected badges for multi-select */}
       {multiple && selectedSet && selectedSet.size > 0 && (
         <div className="flex flex-wrap gap-1">
-          {Array.from(selectedSet).map((code) => (
-            <Badge
-              key={code}
-              variant="secondary"
-              className="gap-1 pr-1 text-xs font-normal"
-            >
-              {getDisplayName(code) || code}
-              <button
-                type="button"
-                onClick={() => onSelect(code)}
-                className="ml-0.5 rounded-full p-0.5 hover:bg-muted-foreground/20"
+          {Array.from(selectedSet).map((code) => {
+            const isBookLang = bookLanguage != null && normalizeLocale(code) === normalizeLocale(bookLanguage)
+            return (
+              <Badge
+                key={code}
+                variant="secondary"
+                className="gap-1 pr-1 text-xs font-normal"
               >
-                <X className="h-3 w-3" />
-              </button>
-            </Badge>
-          ))}
+                {getDisplayName(code) || code}
+                {isBookLang && (
+                  <span className="text-[10px] text-muted-foreground font-normal">({t`book language`})</span>
+                )}
+                <button
+                  type="button"
+                  onClick={() => onSelect(code)}
+                  className="ml-0.5 rounded-full p-0.5 hover:bg-muted-foreground/20"
+                >
+                  <X className="h-3 w-3" />
+                </button>
+              </Badge>
+            )
+          })}
         </div>
       )}
 
diff --git a/apps/studio/src/components/pipeline/components/StageRunCard.tsx b/apps/studio/src/components/pipeline/components/StageRunCard.tsx
index cc433fcb..16fba7aa 100644
--- a/apps/studio/src/components/pipeline/components/StageRunCard.tsx
+++ b/apps/studio/src/components/pipeline/components/StageRunCard.tsx
@@ -30,6 +30,10 @@ export const STAGE_SUB_STEPS: Record<StageName, StageSubStep[]> = Object.fromEnt
 
 interface StageRunCardProps {
   stageSlug: string
+  /** Additional pipeline stage slugs whose sub-steps should be shown in this card. */
+  additionalStageSlugs?: string[]
+  /** Override the internal error check (useful when combining multiple stages). */
+  overrideHasError?: boolean
   isRunning: boolean
   completed?: boolean
   showRunButton?: boolean
@@ -50,6 +54,8 @@ const HOVER_BG_BY_COLOR: Record<string, string> = {
 
 export function StageRunCard({
   stageSlug,
+  additionalStageSlugs,
+  overrideHasError,
   isRunning,
   completed,
   showRunButton = true,
@@ -60,11 +66,14 @@ export function StageRunCard({
   const stage = STAGES.find((s) => s.slug === stageSlug) ?? STAGES[0]
   const { stageState, stepState, stepProgress, stepError, error } = useBookRun()
   const stageStatus = stageState(stageSlug)
-  const subSteps = STAGE_SUB_STEPS[stageSlug as StageName] ?? []
+  const subSteps = [
+    ...(STAGE_SUB_STEPS[stageSlug as StageName] ?? []),
+    ...(additionalStageSlugs?.flatMap((s) => STAGE_SUB_STEPS[s as StageName] ?? []) ?? []),
+  ]
   const Icon = stage.icon
   const color = stage.color
   const borderColor = stage.borderDark
-  const hasError = stageStatus === "error"
+  const hasError = overrideHasError ?? stageStatus === "error"
   const isCompleted = completed ?? (stageStatus === "done")
   const hasSubSteps = subSteps.length > 0
   const hoverColorClass = HOVER_BG_BY_COLOR[color] ?? "hover:bg-gray-600"
diff --git a/apps/studio/src/components/pipeline/components/StageSidebar.tsx b/apps/studio/src/components/pipeline/components/StageSidebar.tsx
index ffdaa573..5dc68b32 100644
--- a/apps/studio/src/components/pipeline/components/StageSidebar.tsx
+++ b/apps/studio/src/components/pipeline/components/StageSidebar.tsx
@@ -96,7 +96,7 @@ function getSettingsTabs(
     captions: [
       { key: "general", label: i18n._(SETTINGS_TAB_MESSAGE["caption-prompt"]) },
     ],
-    "text-and-speech": [
+    translation: [
       { key: "general", label: i18n._(SETTINGS_TAB_MESSAGE.languages) },
       { key: "prompt", label: i18n._(SETTINGS_TAB_MESSAGE["translation-prompt"]) },
       { key: "speech", label: i18n._(SETTINGS_TAB_MESSAGE.speech) },
@@ -159,7 +159,8 @@ export function StageSidebar({
   const storyboardDone = stageState("storyboard") === "done"
   const validationCompleted = Boolean(accessibilityAssessment?.assessment)
 
-  const stageItems = STAGES.map((step, index) => {
+  const visibleStages = STAGES.filter((s) => s.slug !== "speech")
+  const stageItems = visibleStages.map((step, index) => {
     const isActive = step.slug === activeStep
     const Icon = step.icon
     const settingsTabs = getSettingsTabs(step.slug, i18n)
@@ -182,7 +183,7 @@ export function StageSidebar({
     return (
       <div key={step.slug} className="relative">
         {/* Connector line */}
-        {index < STAGES.length - 1 && (
+        {index < visibleStages.length - 1 && (
           <div className="absolute left-[24px] top-[36px] bottom-[-10px] w-0.5 bg-border z-10" />
         )}
 
diff --git a/apps/studio/src/components/pipeline/components/StepViewRouter.tsx b/apps/studio/src/components/pipeline/components/StepViewRouter.tsx
index f9168d90..2f548b18 100644
--- a/apps/studio/src/components/pipeline/components/StepViewRouter.tsx
+++ b/apps/studio/src/components/pipeline/components/StepViewRouter.tsx
@@ -9,7 +9,8 @@ import {
   CaptionsView,
   GlossaryView,
   TocView,
-  TranslationsView,
+  TranslationStageView,
+  SpeechView,
   PreviewView,
   ValidationView,
   ExportView,
@@ -53,7 +54,8 @@ const VIEW_MAP: Record<string, ViewEntry> = {
   captions: { component: CaptionsView },
   glossary: { component: GlossaryView },
   toc: { component: TocView },
-  "text-and-speech": { component: TranslationsView, fullHeight: true },
+  translation: { component: TranslationStageView, fullHeight: true },
+  speech: { component: SpeechView, fullHeight: true },
   validation: { component: ValidationView, fullHeight: true },
   preview: { component: PreviewView, fullHeight: true },
   export: { component: ExportView, fullHeight: true },
diff --git a/apps/studio/src/components/pipeline/pipeline-i18n.ts b/apps/studio/src/components/pipeline/pipeline-i18n.ts
index df5c2847..be778479 100644
--- a/apps/studio/src/components/pipeline/pipeline-i18n.ts
+++ b/apps/studio/src/components/pipeline/pipeline-i18n.ts
@@ -14,7 +14,8 @@ export const STAGE_LABEL_MESSAGES: Record<string, MessageDescriptor> = {
   captions: msg`Captions`,
   glossary: msg`Glossary`,
   toc: msg`Table of Contents`,
-  "text-and-speech": msg`Text & Speech`,
+  translation: msg`Text & Speech`,
+  speech: msg`Speech`,
   validation: msg`Validation`,
   preview: msg`Preview`,
   export: msg`Export`,
@@ -28,7 +29,8 @@ export const STAGE_RUNNING_LABEL_MESSAGES: Record<string, MessageDescriptor> = {
   captions: msg`Captioning Images...`,
   glossary: msg`Generating Glossary...`,
   toc: msg`Generating TOC...`,
-  "text-and-speech": msg`Generating Text & Speech...`,
+  translation: msg`Translating...`,
+  speech: msg`Generating Speech...`,
   validation: msg`Running Validation...`,
   preview: msg`Building Preview...`,
   export: msg`Exporting...`,
@@ -41,7 +43,8 @@ export const STAGE_DESCRIPTION_MESSAGES: Record<string, MessageDescriptor> = {
   captions: msg`Create descriptive captions for images to improve accessibility.`,
   glossary: msg`Build a glossary of key terms and definitions found in the text.`,
   toc: msg`Generate and customize the table of contents for the book navigation.`,
-  "text-and-speech": msg`Translate the book content and generate audio narration.`,
+  translation: msg`Translate the book content and generate audio narration.`,
+  speech: msg`Generate audio narration for the book content.`,
   validation: msg`Run whole-book validation checks and configure accessibility assessment settings.`,
   preview: msg`Package and preview the final ADT web application.`,
 }
diff --git a/apps/studio/src/components/pipeline/settings-routing.test.ts b/apps/studio/src/components/pipeline/settings-routing.test.ts
index d904d32e..e3ae5e22 100644
--- a/apps/studio/src/components/pipeline/settings-routing.test.ts
+++ b/apps/studio/src/components/pipeline/settings-routing.test.ts
@@ -9,7 +9,8 @@ describe("settings-routing", () => {
 
   it("resolves known settings stages", () => {
     expect(resolveSettingsStageSlug("extract")).toBe("extract")
-    expect(resolveSettingsStageSlug("text-and-speech")).toBe("text-and-speech")
+    expect(resolveSettingsStageSlug("translation")).toBe("translation")
+    expect(resolveSettingsStageSlug("speech")).toBe("speech")
   })
 
   it("returns null for stages without settings views", () => {
diff --git a/apps/studio/src/components/pipeline/settings-routing.ts b/apps/studio/src/components/pipeline/settings-routing.ts
index 05a70f3b..c5d827a2 100644
--- a/apps/studio/src/components/pipeline/settings-routing.ts
+++ b/apps/studio/src/components/pipeline/settings-routing.ts
@@ -5,7 +5,8 @@ export const SETTINGS_STAGE_SLUGS = [
   "glossary",
   "toc",
   "captions",
-  "text-and-speech",
+  "translation",
+  "speech",
   "validation",
 ] as const
 
diff --git a/apps/studio/src/components/pipeline/stage-config.test.ts b/apps/studio/src/components/pipeline/stage-config.test.ts
index dd585544..5ad632f3 100644
--- a/apps/studio/src/components/pipeline/stage-config.test.ts
+++ b/apps/studio/src/components/pipeline/stage-config.test.ts
@@ -20,7 +20,8 @@ describe("stage-config", () => {
       "captions",
       "glossary",
       "toc",
-      "text-and-speech",
+      "translation",
+      "speech",
       "preview",
     ])
   })
@@ -34,7 +35,8 @@ describe("stage-config", () => {
       "captions",
       "glossary",
       "toc",
-      "text-and-speech",
+      "translation",
+      "speech",
       "validation",
       "preview",
       "export",
diff --git a/apps/studio/src/components/pipeline/stage-config.ts b/apps/studio/src/components/pipeline/stage-config.ts
index 55828344..ea3866b1 100644
--- a/apps/studio/src/components/pipeline/stage-config.ts
+++ b/apps/studio/src/components/pipeline/stage-config.ts
@@ -7,6 +7,7 @@ import {
   BookOpen,
   List,
   Languages,
+  Volume2,
   Eye,
   ShieldCheck,
   FileDown,
@@ -21,7 +22,8 @@ export const STAGES = [
   { slug: "captions", label: "Captions", runningLabel: "Captioning Images", icon: Image, color: "bg-teal-600", hex: "#0d9488", textColor: "text-teal-600", bgLight: "bg-teal-50", borderColor: "border-teal-200", borderDark: "border-teal-600" },
   { slug: "glossary", label: "Glossary", runningLabel: "Generating Glossary", icon: BookOpen, color: "bg-lime-600", hex: "#65a30d", textColor: "text-lime-600", bgLight: "bg-lime-50", borderColor: "border-lime-200", borderDark: "border-lime-600" },
   { slug: "toc", label: "Table of Contents", runningLabel: "Generating TOC", icon: List, color: "bg-amber-600", hex: "#d97706", textColor: "text-amber-600", bgLight: "bg-amber-50", borderColor: "border-amber-200", borderDark: "border-amber-600" },
-  { slug: "text-and-speech", label: "Text & Speech", runningLabel: "Generating Text & Speech", icon: Languages, color: "bg-pink-600", hex: "#db2777", textColor: "text-pink-600", bgLight: "bg-pink-50", borderColor: "border-pink-200", borderDark: "border-pink-600" },
+  { slug: "translation", label: "Text & Speech", runningLabel: "Translating", icon: Languages, color: "bg-pink-600", hex: "#db2777", textColor: "text-pink-600", bgLight: "bg-pink-50", borderColor: "border-pink-200", borderDark: "border-pink-600" },
+  { slug: "speech", label: "Speech", runningLabel: "Generating Speech", icon: Volume2, color: "bg-rose-600", hex: "#e11d48", textColor: "text-rose-600", bgLight: "bg-rose-50", borderColor: "border-rose-200", borderDark: "border-rose-600" },
   { slug: "validation", label: "Validation", runningLabel: "Running Validation", icon: ShieldCheck, color: "bg-emerald-600", hex: "#059669", textColor: "text-emerald-600", bgLight: "bg-emerald-50", borderColor: "border-emerald-200", borderDark: "border-emerald-600" },
   { slug: "preview", label: "Preview", runningLabel: "Building Preview", icon: Eye, color: "bg-gray-600", hex: "#4b5563", textColor: "text-gray-600", bgLight: "bg-gray-50", borderColor: "border-gray-200", borderDark: "border-gray-600" },
   { slug: "export", label: "Export", runningLabel: "Exporting", icon: FileDown, color: "bg-indigo-700", hex: "#4338ca", textColor: "text-indigo-700", bgLight: "bg-indigo-50", borderColor: "border-indigo-200", borderDark: "border-indigo-700" },
@@ -52,7 +54,8 @@ export const STAGE_DESCRIPTIONS: Record<NonBookStageSlug, string> = {
   captions: "Create descriptive captions for images to improve accessibility.",
   glossary: "Build a glossary of key terms and definitions found in the text.",
   toc: "Generate and customize the table of contents for the book navigation.",
-  "text-and-speech": "Translate the book content and generate audio narration.",
+  translation: "Translate the book content and generate audio narration.",
+  speech: "Generate audio narration for the book content.",
   validation: "Run whole-book validation checks and configure accessibility assessment settings.",
   preview: "Package and preview the final ADT web application.",
   export: "Export packaged ADTs and related artifacts for delivery.",
@@ -63,7 +66,7 @@ export const STAGES_WITH_PAGES = new Set<StageSlug>([
   "storyboard",
   "quizzes",
   "captions",
-  "text-and-speech",
+  "translation",
 ])
 
 const STAGE_SLUG_SET = new Set<StageSlug>(STAGES.map((stage) => stage.slug))
diff --git a/apps/studio/src/components/pipeline/stages/BookView.tsx b/apps/studio/src/components/pipeline/stages/BookView.tsx
index 20b3f8ea..b479cdd7 100644
--- a/apps/studio/src/components/pipeline/stages/BookView.tsx
+++ b/apps/studio/src/components/pipeline/stages/BookView.tsx
@@ -13,7 +13,8 @@ interface ViewProps {
 }
 
 export function BookView({ bookLabel }: ViewProps) {
-  const overviewSteps = getBookOverviewStages()
+  // Filter out "speech" — it's combined with "translation" into one "Text & Speech" card
+  const overviewSteps = getBookOverviewStages().filter((s) => s.slug !== "speech")
   const { stageState, queueRun } = useBookRun()
   const { apiKey, hasApiKey, azureKey, azureRegion, geminiKey } = useApiKey()
   const { data: accessibilityAssessment } = useAccessibilityAssessment(bookLabel)
@@ -25,9 +26,12 @@ export function BookView({ bookLabel }: ViewProps) {
     const state = stageState(stage.slug)
     if (state === "running" || state === "queued") return
 
+    // "Text & Speech" card runs translation → speech
+    const toStage = stage.slug === "translation" ? "speech" : stage.slug
+
     queueRun({
       fromStage: stage.slug,
-      toStage: stage.slug,
+      toStage,
       apiKey,
       providerCredentials: {
         azure: { key: azureKey, region: azureRegion },
@@ -40,10 +44,29 @@ export function BookView({ bookLabel }: ViewProps) {
     <div className="flex max-w-xl flex-col items-start">
       {overviewSteps.map((step, index) => {
         const isLast = index === overviewSteps.length - 1
-        const state = step.slug === "validation" && validationCompleted ? "done" : stageState(step.slug)
+
+        // For "translation", combine with speech stage state
+        const isTextAndSpeech = step.slug === "translation"
+        const translationState = stageState("translation")
+        const speechState = stageState("speech")
+
+        const state = step.slug === "validation" && validationCompleted
+          ? "done"
+          : isTextAndSpeech
+            ? (translationState === "running" || translationState === "queued" || speechState === "running" || speechState === "queued")
+              ? "running"
+              : (translationState === "done" && speechState === "done")
+                ? "done"
+                : (translationState === "error" || speechState === "error")
+                  ? "error"
+                  : translationState
+            : stageState(step.slug)
         const isRunning = step.slug !== "validation" && (state === "running" || state === "queued")
         const stageCompleted = state === "done"
         const showRunButton = isPipelineStage(step) && step.slug !== "preview"
+        const hasError = isTextAndSpeech
+          ? translationState === "error" || speechState === "error"
+          : undefined
 
         return (
           <div key={step.slug} className="w-full">
@@ -54,6 +77,8 @@ export function BookView({ bookLabel }: ViewProps) {
             >
               <StageRunCard
                 stageSlug={step.slug}
+                additionalStageSlugs={isTextAndSpeech ? ["speech"] : undefined}
+                overrideHasError={hasError}
                 isRunning={isRunning}
                 completed={stageCompleted}
                 showRunButton={showRunButton}
diff --git a/apps/studio/src/components/pipeline/stages/PreviewValidationCard.tsx b/apps/studio/src/components/pipeline/stages/PreviewValidationCard.tsx
index dced7cf3..483590e0 100644
--- a/apps/studio/src/components/pipeline/stages/PreviewValidationCard.tsx
+++ b/apps/studio/src/components/pipeline/stages/PreviewValidationCard.tsx
@@ -369,11 +369,12 @@ export function PreviewValidationCard({
   const sessionLanguage = activeSession?.session.language?.trim() || null
   const glossaryAvailable = (glossary.data?.items.length ?? 0) > 0
   const glossaryPending = stageState("glossary") === "done" && glossary.isLoading
-  const textAndSpeechStageDone = stageState("text-and-speech") === "done"
+  const speechStageDone = stageState("speech") === "done"
+  const translationStageDone = stageState("translation") === "done"
   const ttsAvailable = hasLanguageEntries(tts.data?.languages, sessionLanguage)
-  const ttsPending = textAndSpeechStageDone && tts.isLoading
+  const ttsPending = speechStageDone && tts.isLoading
   const translationAvailable = hasLanguageEntries(textCatalog.data?.translations, sessionLanguage)
-  const translationPending = textAndSpeechStageDone && textCatalog.isLoading
+  const translationPending = translationStageDone && textCatalog.isLoading
   const easyReadAvailable = false
 
   const resolvedResults = useMemo(() => {
@@ -387,7 +388,8 @@ export function PreviewValidationCard({
             explicitStatus: draftResults[criterion.id]?.status,
             glossaryAvailable,
             glossaryPending,
-            textAndSpeechStageDone,
+            speechStageDone,
+            translationStageDone,
             ttsAvailable,
             ttsPending,
             sessionLanguage,
@@ -408,7 +410,8 @@ export function PreviewValidationCard({
     glossaryAvailable,
     glossaryPending,
     sessionLanguage,
-    textAndSpeechStageDone,
+    speechStageDone,
+    translationStageDone,
     currentPage.hasActivity,
     currentPage.hasImages,
     currentPage.signLanguageEnabled,
diff --git a/apps/studio/src/components/pipeline/stages/index.ts b/apps/studio/src/components/pipeline/stages/index.ts
index 65c42e65..4a6e8b62 100644
--- a/apps/studio/src/components/pipeline/stages/index.ts
+++ b/apps/studio/src/components/pipeline/stages/index.ts
@@ -6,6 +6,8 @@ export { CaptionsView } from "./captions/CaptionsView"
 export { GlossaryView } from "./glossary/GlossaryView"
 export { TocView } from "./toc/TocView"
 export { TranslationsView } from "./translations/TranslationsView"
+export { SpeechView } from "./speech/SpeechView"
+export { TranslationStageView } from "./translations/TranslationStageView"
 export { PreviewView } from "./PreviewView"
 export { ValidationView } from "./ValidationView"
 export { ExportView } from "./ExportView"
diff --git a/apps/studio/src/components/pipeline/stages/speech/SpeechSettings.tsx b/apps/studio/src/components/pipeline/stages/speech/SpeechSettings.tsx
new file mode 100644
index 00000000..fd3a0f72
--- /dev/null
+++ b/apps/studio/src/components/pipeline/stages/speech/SpeechSettings.tsx
@@ -0,0 +1,274 @@
+import { useState, useEffect } from "react"
+import { createPortal } from "react-dom"
+import { Save } from "lucide-react"
+import { Button } from "@/components/ui/button"
+import { Input } from "@/components/ui/input"
+import { Label } from "@/components/ui/label"
+import { useBookConfig, useUpdateBookConfig } from "@/hooks/use-book-config"
+import { useActiveConfig } from "@/hooks/use-debug"
+import { SpeechPromptsEditor } from "../translations/components/SpeechPromptsEditor"
+import { VoiceMappingsEditor } from "../translations/components/VoiceMappingsEditor"
+import { useLingui } from "@lingui/react/macro"
+
+export function SpeechSettings({ bookLabel, headerTarget, tab = "general" }: { bookLabel: string; headerTarget?: HTMLDivElement | null; tab?: string }) {
+  const { t } = useLingui()
+  const { data: bookConfigData } = useBookConfig(bookLabel)
+  const { data: activeConfigData } = useActiveConfig(bookLabel)
+  const updateConfig = useUpdateBookConfig()
+
+  // Speech settings
+  const [speechModel, setSpeechModel] = useState("")
+  const [format, setFormat] = useState("")
+  const [defaultProvider, setDefaultProvider] = useState("openai")
+  const [openaiModel, setOpenaiModel] = useState("")
+  const [openaiLanguages, setOpenaiLanguages] = useState("")
+  const [azureModel, setAzureModel] = useState("")
+  const [azureLanguages, setAzureLanguages] = useState("")
+  const [geminiModel, setGeminiModel] = useState("")
+  const [geminiLanguages, setGeminiLanguages] = useState("")
+  const [bitRate, setBitRate] = useState("")
+  const [sampleRate, setSampleRate] = useState("")
+
+  const [dirty, setDirty] = useState<Record<string, boolean>>({})
+  const markDirty = (field: string) => setDirty((prev) => ({ ...prev, [field]: true }))
+
+  useEffect(() => {
+    if (!activeConfigData) return
+    const m = activeConfigData.merged as Record<string, unknown>
+    if (m.speech && typeof m.speech === "object") {
+      const s = m.speech as Record<string, unknown>
+      if (s.model) setSpeechModel(String(s.model))
+      if (s.format) setFormat(String(s.format))
+      if (s.default_provider) setDefaultProvider(String(s.default_provider))
+      if (s.bit_rate) setBitRate(String(s.bit_rate))
+      if (s.sample_rate) setSampleRate(String(s.sample_rate))
+      if (s.providers && typeof s.providers === "object") {
+        const providers = s.providers as Record<string, Record<string, unknown>>
+        if (providers.openai) {
+          if (providers.openai.model) setOpenaiModel(String(providers.openai.model))
+          if (Array.isArray(providers.openai.languages)) setOpenaiLanguages((providers.openai.languages as string[]).join(", "))
+        }
+        if (providers.azure) {
+          if (providers.azure.model) setAzureModel(String(providers.azure.model))
+          if (Array.isArray(providers.azure.languages)) setAzureLanguages((providers.azure.languages as string[]).join(", "))
+        }
+        if (providers.gemini) {
+          if (providers.gemini.model) setGeminiModel(String(providers.gemini.model))
+          if (Array.isArray(providers.gemini.languages)) setGeminiLanguages((providers.gemini.languages as string[]).join(", "))
+        }
+      }
+    }
+  }, [activeConfigData])
+
+  const shouldWrite = (field: string) =>
+    dirty[field] || (bookConfigData?.config && field in bookConfigData.config)
+
+  const buildOverrides = () => {
+    const overrides: Record<string, unknown> = {}
+    if (bookConfigData?.config) Object.assign(overrides, bookConfigData.config)
+
+    if (shouldWrite("speech")) {
+      const existing = (bookConfigData?.config?.speech ?? {}) as Record<string, unknown>
+      const openaiLangs = openaiLanguages.split(",").map((s) => s.trim()).filter(Boolean)
+      const azureLangs = azureLanguages.split(",").map((s) => s.trim()).filter(Boolean)
+      const geminiLangs = geminiLanguages.split(",").map((s) => s.trim()).filter(Boolean)
+      const providers: Record<string, unknown> = {}
+      if (openaiModel.trim() || openaiLangs.length > 0) {
+        providers.openai = {
+          model: openaiModel.trim() || undefined,
+          languages: openaiLangs.length > 0 ? openaiLangs : undefined,
+        }
+      }
+      if (azureModel.trim() || azureLangs.length > 0) {
+        providers.azure = {
+          model: azureModel.trim() || undefined,
+          languages: azureLangs.length > 0 ? azureLangs : undefined,
+        }
+      }
+      if (geminiModel.trim() || geminiLangs.length > 0) {
+        providers.gemini = {
+          model: geminiModel.trim() || undefined,
+          languages: geminiLangs.length > 0 ? geminiLangs : undefined,
+        }
+      }
+      overrides.speech = {
+        ...existing,
+        model: speechModel.trim() || undefined,
+        format: format.trim() || undefined,
+        default_provider: defaultProvider || undefined,
+        providers: Object.keys(providers).length > 0 ? providers : undefined,
+        bit_rate: bitRate.trim() || undefined,
+        sample_rate: sampleRate.trim() ? Number(sampleRate.trim()) : undefined,
+      }
+    }
+    return overrides
+  }
+
+  const saveOnly = async () => {
+    const overrides = buildOverrides()
+    updateConfig.mutate(
+      { label: bookLabel, config: overrides },
+      {
+        onSuccess: () => {
+          setDirty({})
+        },
+      }
+    )
+  }
+
+  return (
+    <div className="p-4 max-w-2xl space-y-6">
+      {tab === "general" && (
+        <div className="space-y-6">
+          {/* Provider Routing */}
+          <div className="space-y-3">
+            <h3 className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">{t`Provider Routing`}</h3>
+            <div className="space-y-1.5">
+              <Label className="text-xs">{t`Default Provider`}</Label>
+              <select
+                value={defaultProvider}
+                onChange={(e) => { setDefaultProvider(e.target.value); markDirty("speech") }}
+                className="flex h-8 w-48 rounded-md border border-input bg-background px-3 py-1 text-xs shadow-sm"
+              >
+                <option value="openai">{t`OpenAI`}</option>
+                <option value="azure">{t`Azure`}</option>
+                <option value="gemini">{t`Gemini`}</option>
+              </select>
+              <p className="text-xs text-muted-foreground">{t`Provider used for languages not assigned to a specific provider.`}</p>
+            </div>
+          </div>
+
+          {/* OpenAI Provider */}
+          <div className="space-y-3 rounded-md border p-3">
+            <h3 className="text-xs font-semibold">{t`OpenAI`}</h3>
+            <div className="space-y-1.5">
+              <Label className="text-xs">{t`Model`}</Label>
+              <Input
+                value={openaiModel}
+                onChange={(e) => { setOpenaiModel(e.target.value); markDirty("speech") }}
+                placeholder={t`e.g. gpt-4o-mini-tts`}
+                className="w-72 h-8 text-xs"
+              />
+            </div>
+            <div className="space-y-1.5">
+              <Label className="text-xs">{t`Languages`}</Label>
+              <Input
+                value={openaiLanguages}
+                onChange={(e) => { setOpenaiLanguages(e.target.value); markDirty("speech") }}
+                placeholder={t`e.g. en, fr`}
+                className="w-72 h-8 text-xs"
+              />
+              <p className="text-xs text-muted-foreground">{t`Comma-separated language codes routed to OpenAI.`}</p>
+            </div>
+          </div>
+
+          {/* Azure Provider */}
+          <div className="space-y-3 rounded-md border p-3">
+            <h3 className="text-xs font-semibold">{t`Azure Speech`}</h3>
+            <div className="space-y-1.5">
+              <Label className="text-xs">{t`Model`}</Label>
+              <Input
+                value={azureModel}
+                onChange={(e) => { setAzureModel(e.target.value); markDirty("speech") }}
+                placeholder={t`e.g. azure-tts`}
+                className="w-72 h-8 text-xs"
+              />
+            </div>
+            <div className="space-y-1.5">
+              <Label className="text-xs">{t`Languages`}</Label>
+              <Input
+                value={azureLanguages}
+                onChange={(e) => { setAzureLanguages(e.target.value); markDirty("speech") }}
+                placeholder={t`e.g. es, ta, si, sw`}
+                className="w-72 h-8 text-xs"
+              />
+              <p className="text-xs text-muted-foreground">{t`Comma-separated language codes routed to Azure.`}</p>
+            </div>
+          </div>
+
+          {/* Gemini Provider */}
+          <div className="space-y-3 rounded-md border p-3">
+            <h3 className="text-xs font-semibold">{t`Gemini`}</h3>
+            <div className="space-y-1.5">
+              <Label className="text-xs">{t`Model`}</Label>
+              <Input
+                value={geminiModel}
+                onChange={(e) => { setGeminiModel(e.target.value); markDirty("speech") }}
+                placeholder={t`e.g. gemini-2.5-pro-preview-tts`}
+                className="w-72 h-8 text-xs"
+              />
+            </div>
+            <div className="space-y-1.5">
+              <Label className="text-xs">{t`Languages`}</Label>
+              <Input
+                value={geminiLanguages}
+                onChange={(e) => { setGeminiLanguages(e.target.value); markDirty("speech") }}
+                placeholder={t`e.g. en, hi, ta`}
+                className="w-72 h-8 text-xs"
+              />
+              <p className="text-xs text-muted-foreground">{t`Comma-separated language codes routed to Gemini.`}</p>
+            </div>
+          </div>
+
+          {/* Audio Settings */}
+          <div className="space-y-3">
+            <h3 className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">{t`Audio Settings`}</h3>
+            <div className="flex gap-4">
+              <div className="space-y-1.5">
+                <Label className="text-xs">{t`Format`}</Label>
+                <Input
+                  value={format}
+                  onChange={(e) => { setFormat(e.target.value); markDirty("speech") }}
+                  placeholder={t`mp3`}
+                  className="w-32 h-8 text-xs"
+                />
+              </div>
+              <div className="space-y-1.5">
+                <Label className="text-xs">{t`Bit Rate`}</Label>
+                <Input
+                  value={bitRate}
+                  onChange={(e) => { setBitRate(e.target.value); markDirty("speech") }}
+                  placeholder={t`64k`}
+                  className="w-32 h-8 text-xs"
+                />
+              </div>
+              <div className="space-y-1.5">
+                <Label className="text-xs">{t`Sample Rate`}</Label>
+                <Input
+                  value={sampleRate}
+                  onChange={(e) => { setSampleRate(e.target.value); markDirty("speech") }}
+                  placeholder={t`24000`}
+                  className="w-32 h-8 text-xs"
+                />
+              </div>
+            </div>
+            <p className="text-xs text-muted-foreground">
+              {t`Gemini TTS outputs WAV audio in this integration; other providers continue using the configured format.`}
+            </p>
+          </div>
+        </div>
+      )}
+
+      {tab === "speech-prompts" && (
+        <SpeechPromptsEditor bookLabel={bookLabel} headerTarget={headerTarget} />
+      )}
+
+      {tab === "voices" && (
+        <VoiceMappingsEditor bookLabel={bookLabel} headerTarget={headerTarget} />
+      )}
+
+      {headerTarget && tab === "general" && createPortal(
+        <Button
+          size="sm"
+          className="h-7 px-2.5 text-xs bg-black/15 text-white hover:bg-black/25"
+          onClick={saveOnly}
+          disabled={updateConfig.isPending}
+        >
+          <Save className="mr-1.5 h-3.5 w-3.5" />
+          {t`Save`}
+        </Button>,
+        headerTarget
+      )}
+    </div>
+  )
+}
diff --git a/apps/studio/src/components/pipeline/stages/speech/SpeechView.tsx b/apps/studio/src/components/pipeline/stages/speech/SpeechView.tsx
new file mode 100644
index 00000000..d9010689
--- /dev/null
+++ b/apps/studio/src/components/pipeline/stages/speech/SpeechView.tsx
@@ -0,0 +1,1052 @@
+import { useState, useEffect, useRef, useCallback, useMemo } from "react"
+import { Volume2, Languages, Loader2, Play, Pause, WandSparkles, RefreshCw, MoreVertical, Clock, Trash2, Settings } from "lucide-react"
+import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"
+import { Link } from "@tanstack/react-router"
+import { api, getAudioUrl, BASE_URL } from "@/api/client"
+import type { TextCatalogEntry } from "@/api/client"
+import { useActiveConfig } from "@/hooks/use-debug"
+import { useBook } from "@/hooks/use-books"
+import { useStepHeader } from "../../components/StepViewRouter"
+import { useBookRun } from "@/hooks/use-book-run"
+import { useApiKey } from "@/hooks/use-api-key"
+import { useVirtualizer } from "@tanstack/react-virtual"
+import { cn } from "@/lib/utils"
+import { normalizeLocale } from "@/lib/languages"
+import { languageUsesSpeechProvider } from "@/lib/speech-routing"
+import { Alert, AlertDescription } from "@/components/ui/alert"
+import { Button } from "@/components/ui/button"
+import { Card, CardHeader, CardTitle, CardContent } from "@/components/ui/card"
+import { resolveTranslationLanguageState } from "../translations/lib/translations-view-state"
+import { msg } from "@lingui/core/macro"
+import { useLingui } from "@lingui/react/macro"
+
+const IMAGE_ID_RE = /_im\d{3}/
+function isImageEntry(id: string): boolean {
+  return IMAGE_ID_RE.test(id)
+}
+
+const ANSWER_ID_RE = /_ans_/
+function isAnswerEntry(id: string): boolean {
+  return ANSWER_ID_RE.test(id)
+}
+
+type CatalogFilter = "all" | "text" | "captions" | "activities" | "answers" | "glossary" | "quizzes"
+
+function getEntryType(id: string): CatalogFilter {
+  if (id.startsWith("gl")) return "glossary"
+  if (id.startsWith("qz")) return "quizzes"
+  if (ANSWER_ID_RE.test(id)) return "answers"
+  if (IMAGE_ID_RE.test(id)) return "captions"
+  if (/_ac\d{3}/.test(id)) return "activities"
+  return "text"
+}
+
+const langNames = new Intl.DisplayNames(["en"], { type: "language" })
+function displayLang(code: string): string {
+  try { return langNames.of(code) ?? code } catch { return code }
+}
+
+function LanguageSummary({ bookLanguage, outputLanguages }: { bookLanguage: string | null; outputLanguages: string[] }) {
+  const { t } = useLingui()
+  return (
+    <>
+      <div>
+        <div className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1">{t`Book Language`}</div>
+        <p className="text-sm">
+          {bookLanguage ? (
+            <>{displayLang(bookLanguage)} <span className="text-muted-foreground text-xs">({bookLanguage})</span></>
+          ) : (
+            <span className="text-muted-foreground italic">{t`Not detected`}</span>
+          )}
+        </p>
+      </div>
+      <div>
+        <div className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">{t`Output Languages`}</div>
+        {outputLanguages.length > 0 ? (
+          <div className="flex flex-wrap gap-1.5">
+            {outputLanguages.map((lang) => (
+              <span key={lang} className="text-xs bg-muted rounded-full px-2.5 py-0.5 font-medium">
+                {displayLang(lang)} <span className="text-muted-foreground font-normal">({lang})</span>
+              </span>
+            ))}
+          </div>
+        ) : bookLanguage ? (
+          <div className="flex flex-wrap gap-1.5">
+            <span className="text-xs bg-muted rounded-full px-2.5 py-0.5 font-medium">
+              {displayLang(bookLanguage)} <span className="text-muted-foreground font-normal">({bookLanguage})</span>
+            </span>
+          </div>
+        ) : (
+          <p className="text-xs text-muted-foreground italic">{t`Not detected`}</p>
+        )}
+      </div>
+    </>
+  )
+}
+
+export function SpeechView({ bookLabel, selectedPageId, onSelectPage }: { bookLabel: string; selectedPageId?: string; onSelectPage?: (pageId: string | null) => void }) {
+  const { t, i18n } = useLingui()
+  const { setExtra } = useStepHeader()
+  const { data: activeConfigData } = useActiveConfig(bookLabel)
+  const { data: book, isLoading: isBookLoading } = useBook(bookLabel)
+  const queryClient = useQueryClient()
+  const { stageState, queueRun, error: runError } = useBookRun()
+  const { apiKey, hasApiKey, azureKey, azureRegion, geminiKey } = useApiKey()
+  // Translation state (for initial cards & control panel)
+  const translationState = stageState("translation")
+  const translationDone = translationState === "done"
+  const isTranslationRunning = translationState === "running" || translationState === "queued"
+
+  // Speech state
+  const speechState = stageState("speech")
+  const speechDone = speechState === "done"
+  const hasStageError = speechState === "error"
+  const isRunning = speechState === "running" || speechState === "queued"
+
+  const handleRunTranslations = useCallback(() => {
+    if (!hasApiKey || isTranslationRunning) return
+    queueRun({
+      fromStage: "translation",
+      toStage: "translation",
+      apiKey,
+      providerCredentials: {
+        azure: { key: azureKey, region: azureRegion },
+        geminiApiKey: geminiKey,
+      },
+    })
+  }, [hasApiKey, isTranslationRunning, apiKey, azureKey, azureRegion, geminiKey, queueRun])
+
+  const handleRunSpeech = useCallback(() => {
+    if (!hasApiKey || isRunning) return
+    queueRun({
+      fromStage: "speech",
+      toStage: "speech",
+      apiKey,
+      providerCredentials: {
+        azure: { key: azureKey, region: azureRegion },
+        geminiApiKey: geminiKey,
+      },
+    })
+  }, [hasApiKey, isRunning, apiKey, azureKey, azureRegion, geminiKey, queueRun])
+
+  const handleRunTranslationAndSpeech = useCallback(() => {
+    if (!hasApiKey || isTranslationRunning) return
+    queueRun({
+      fromStage: "translation",
+      toStage: "speech",
+      apiKey,
+      providerCredentials: {
+        azure: { key: azureKey, region: azureRegion },
+        geminiApiKey: geminiKey,
+      },
+    })
+  }, [hasApiKey, isTranslationRunning, apiKey, azureKey, azureRegion, geminiKey, queueRun])
+
+  const { data: catalog, isLoading } = useQuery({
+    queryKey: ["books", bookLabel, "text-catalog"],
+    queryFn: () => api.getTextCatalog(bookLabel),
+    enabled: !!bookLabel,
+  })
+
+  const { data: ttsData } = useQuery({
+    queryKey: ["books", bookLabel, "tts"],
+    queryFn: () => api.getTTS(bookLabel),
+    enabled: !!bookLabel,
+  })
+
+  const merged = activeConfigData?.merged as Record<string, unknown> | undefined
+  const speechConfig = merged?.speech
+  const outputLanguages = Array.from(
+    new Set(((merged?.output_languages as string[] | undefined) ?? []).map((code) => normalizeLocale(code)))
+  )
+  const bookLanguage = book?.languageCode ?? book?.metadata?.language_code ?? null
+  const configuredEditingLanguage = merged?.editing_language as string | undefined
+
+  const hasExplicitOutputLanguages = outputLanguages.length > 0
+
+  const [selectedLang, setSelectedLang] = useState<string | null>(null)
+  const [generateErrorById, setGenerateErrorById] = useState<Record<string, string>>({})
+  const [catalogFilter, setCatalogFilter] = useState<CatalogFilter>("all")
+
+  useEffect(() => {
+    if (hasExplicitOutputLanguages && outputLanguages.length > 0 && !selectedLang) {
+      setSelectedLang(outputLanguages[0])
+    }
+  }, [outputLanguages.length, hasExplicitOutputLanguages])
+
+  const entries = catalog?.entries ?? []
+  const filteredByPage = selectedPageId
+    ? entries.filter((e) => e.id.startsWith(selectedPageId + "_"))
+    : entries
+  const displayEntries = catalogFilter === "all"
+    ? filteredByPage
+    : filteredByPage.filter((e) => getEntryType(e.id) === catalogFilter)
+
+  const typeCounts = useMemo(() => {
+    const counts: Record<CatalogFilter, number> = { all: 0, text: 0, captions: 0, activities: 0, answers: 0, glossary: 0, quizzes: 0 }
+    for (const e of filteredByPage) {
+      counts[getEntryType(e.id)]++
+      counts.all++
+    }
+    return counts
+  }, [filteredByPage])
+
+  const { editingLanguage, isSourceLang: isSelectedSourceLang } = resolveTranslationLanguageState({
+    selectedLang,
+    configuredEditingLanguage,
+    bookLanguage,
+    isBookLoading,
+  })
+  const isSourceLang = !hasExplicitOutputLanguages || isSelectedSourceLang
+
+  // Translation data (read-only in speech view)
+  const translationData = selectedLang ? catalog?.translations?.[selectedLang] : undefined
+  const translatedEntries = isSourceLang ? entries : (translationData?.entries ?? [])
+  const translatedMap = new Map(translatedEntries.map((e) => [e.id, e.text]))
+
+  const audioLang = selectedLang ??
+    (hasExplicitOutputLanguages ? (outputLanguages[0] ?? editingLanguage) : editingLanguage)
+  const currentLanguageUsesGemini =
+    !!audioLang && languageUsesSpeechProvider(audioLang, "gemini", speechConfig)
+  const geminiRoutedLanguages = (
+    outputLanguages.length > 0 ? outputLanguages : editingLanguage ? [editingLanguage] : []
+  ).filter((language, index, array) =>
+    languageUsesSpeechProvider(language, "gemini", speechConfig) && array.indexOf(language) === index
+  )
+  const allowGeminiPartialView =
+    hasStageError &&
+    geminiRoutedLanguages.length > 0
+  const showRunCard = (!speechDone || isRunning) && !allowGeminiPartialView
+
+  // Build audio lookup
+  const audioMap = new Map<string, { fileName: string; voice: string }>()
+  if (ttsData && audioLang && ttsData.languages[audioLang]) {
+    for (const e of ttsData.languages[audioLang].entries) {
+      audioMap.set(e.textId, { fileName: e.fileName, voice: e.voice })
+    }
+  }
+  const totalAudioFiles = ttsData
+    ? Object.values(ttsData.languages).reduce((sum, lang) => sum + lang.entries.length, 0)
+    : 0
+  const generatedAudioCount = displayEntries.filter((entry) => audioMap.has(entry.id)).length
+  const missingAudioCount = Math.max(displayEntries.length - generatedAudioCount, 0)
+
+  // Speech config
+  const speechCfg = speechConfig as { default_provider?: string; voice?: string; model?: string } | undefined
+  const defaultProvider = speechCfg?.default_provider ?? "openai"
+  const defaultVoice = speechCfg?.voice ?? "alloy"
+  const defaultModel = speechCfg?.model ?? (defaultProvider === "openai" ? "gpt-4o-mini-tts" : undefined)
+  const providerLabel = defaultProvider.charAt(0).toUpperCase() + defaultProvider.slice(1)
+
+  const scrollRef = useRef<HTMLDivElement>(null)
+  const virtualizer = useVirtualizer({
+    count: displayEntries.length,
+    getScrollElement: () => scrollRef.current,
+    estimateSize: () => 140,
+    overscan: 3,
+  })
+
+  useEffect(() => {
+    virtualizer.scrollToOffset(0)
+  }, [catalogFilter, selectedLang, selectedPageId])
+
+  const generateAudioMutation = useMutation({
+    mutationFn: async (variables: { textId: string; language: string }) => {
+      if (!geminiKey) {
+        throw new Error(i18n._(msg`Gemini API key is required to generate audio.`))
+      }
+      return api.generateGeminiTTSForItem(
+        bookLabel,
+        variables.textId,
+        variables.language,
+        {
+          geminiApiKey: geminiKey,
+          openaiApiKey: apiKey || undefined,
+          azure: azureKey && azureRegion
+            ? { key: azureKey, region: azureRegion }
+            : undefined,
+        }
+      )
+    },
+    onMutate: (variables) => {
+      setGenerateErrorById((prev) => {
+        if (!(variables.textId in prev)) return prev
+        const next = { ...prev }
+        delete next[variables.textId]
+        return next
+      })
+    },
+    onSuccess: async () => {
+      await Promise.all([
+        queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "tts"] }),
+        queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] }),
+      ])
+    },
+    onError: (error, variables) => {
+      setGenerateErrorById((prev) => ({
+        ...prev,
+        [variables.textId]:
+          error instanceof Error ? error.message : String(error),
+      }))
+      queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] })
+    },
+  })
+
+  const handleGenerateAudio = useCallback(
+    (textId: string) => {
+      if (!audioLang || !currentLanguageUsesGemini) return
+      generateAudioMutation.mutate({ textId, language: audioLang })
+    },
+    [audioLang, currentLanguageUsesGemini, generateAudioMutation]
+  )
+
+  const clearSpeechMutation = useMutation({
+    mutationFn: () => api.clearStage(bookLabel, "speech"),
+    onSuccess: async () => {
+      await Promise.all([
+        queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "tts"] }),
+        queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] }),
+      ])
+      setGenerateErrorById({})
+    },
+  })
+
+  useEffect(() => {
+    if (!catalog) return
+    setExtra(
+      <div className="flex items-center gap-1.5 ml-auto">
+        <span className="text-[10px] bg-white/20 rounded-full px-2 py-0.5">{t`${String(displayEntries.length)} texts`}</span>
+        {currentLanguageUsesGemini ? (
+          <span className="text-[10px] bg-white/20 rounded-full px-2 py-0.5">
+            {t`${String(generatedAudioCount)}/${String(displayEntries.length)} audio`}
+          </span>
+        ) : totalAudioFiles > 0 && (
+          <span className="text-[10px] bg-white/20 rounded-full px-2 py-0.5">{t`${String(totalAudioFiles)} audio`}</span>
+        )}
+        {currentLanguageUsesGemini && missingAudioCount > 0 && (
+          <span className="text-[10px] bg-amber-100 text-amber-900 rounded-full px-2 py-0.5">
+            {t`${missingAudioCount} missing`}
+          </span>
+        )}
+      </div>
+    )
+    return () => setExtra(null)
+  }, [catalog, t, displayEntries.length, totalAudioFiles, currentLanguageUsesGemini, generatedAudioCount, missingAudioCount])
+
+  if (!showRunCard && isLoading) {
+    return (
+      <div className="flex items-center justify-center py-12 text-muted-foreground">
+        <Loader2 className="w-4 h-4 animate-spin mr-2" />
+        <span className="text-sm">{t`Loading text catalog...`}</span>
+      </div>
+    )
+  }
+
+  // Show styled initial cards when speech hasn't been generated
+  if (showRunCard || !catalog || entries.length === 0) {
+    const resolvedBookLang = editingLanguage || bookLanguage
+    return (
+      <div className="p-4 space-y-4">
+        {/* Translation info card */}
+        <Card className="overflow-hidden max-w-xl shadow-none border-pink-600">
+          <CardHeader className="flex-row items-center gap-2.5 space-y-0 px-4 py-2 text-white bg-pink-600">
+            <div className="flex items-center justify-center w-6 h-6 rounded-full bg-white/20">
+              <Languages className="w-3 h-3" />
+            </div>
+            <CardTitle className="text-sm leading-normal tracking-normal">
+              {t`Translation`}
+              {translationDone && <span className="font-normal text-white/60 ml-1.5">({t`done`})</span>}
+            </CardTitle>
+          </CardHeader>
+          <CardContent className="px-5 py-4 space-y-3">
+            <LanguageSummary bookLanguage={resolvedBookLang} outputLanguages={outputLanguages} />
+            {!translationDone && (
+              <div className="flex items-center gap-3 pt-1">
+                <Link
+                  to="/books/$label/$step/settings"
+                  params={{ label: bookLabel, step: "translation" }}
+                  search={{ tab: "general" }}
+                  className="inline-flex items-center gap-1.5 text-xs font-medium text-pink-600 hover:text-pink-700 transition-colors"
+                >
+                  <Settings className="w-3 h-3" />
+                  {t`Add Translations`}
+                </Link>
+                <div className="flex-1" />
+                <Button
+                  size="sm"
+                  className="h-8 bg-pink-600 hover:bg-pink-700 text-white text-xs"
+                  onClick={handleRunTranslations}
+                  disabled={!hasApiKey || isTranslationRunning}
+                >
+                  {isTranslationRunning ? (
+                    <Loader2 className="mr-1 h-3 w-3 animate-spin" />
+                  ) : (
+                    <Play className="mr-1 h-3 w-3" />
+                  )}
+                  {translationState === "error" ? t`Retry Translation` : t`Run Translation`}
+                </Button>
+              </div>
+            )}
+          </CardContent>
+        </Card>
+
+        {/* Speech card */}
+        <Card className="overflow-hidden max-w-xl shadow-none border-rose-600">
+          <CardHeader className="flex-row items-center gap-2.5 space-y-0 px-4 py-2 text-white bg-rose-600">
+            <div className="flex items-center justify-center w-6 h-6 rounded-full bg-white/20">
+              <Volume2 className="w-3 h-3" />
+            </div>
+            <CardTitle className="text-sm leading-normal tracking-normal">{t`Speech`}</CardTitle>
+          </CardHeader>
+          <CardContent className="px-5 py-4 space-y-3">
+            <div>
+              <div className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">{t`Voice`}</div>
+              <p className="text-sm font-medium">{providerLabel} <span className="font-normal text-muted-foreground">·</span> {defaultVoice}
+                {defaultModel && <>{" "}<span className="font-normal text-muted-foreground">·</span> <span className="font-normal text-muted-foreground">{defaultModel}</span></>}
+              </p>
+            </div>
+            <div className="flex items-center gap-3 pt-1">
+              <Link
+                to="/books/$label/$step/settings"
+                params={{ label: bookLabel, step: "translation" }}
+                search={{ tab: "speech" }}
+                className="inline-flex items-center gap-1.5 text-xs font-medium text-rose-600 hover:text-rose-700 transition-colors"
+              >
+                <Settings className="w-3 h-3" />
+                {t`Choose Provider`}
+              </Link>
+              <div className="flex-1" />
+              <Button
+                size="sm"
+                className="h-8 bg-rose-600 hover:bg-rose-700 text-white text-xs"
+                onClick={handleRunTranslationAndSpeech}
+                disabled={!hasApiKey || isTranslationRunning || isRunning}
+              >
+                {isRunning ? (
+                  <Loader2 className="mr-1 h-3 w-3 animate-spin" />
+                ) : (
+                  <Play className="mr-1 h-3 w-3" />
+                )}
+                {speechState === "error" ? t`Retry Speech` : t`Run Speech`}
+              </Button>
+            </div>
+          </CardContent>
+        </Card>
+      </div>
+    )
+  }
+
+  const showAllButton = selectedPageId ? (
+    <div className="flex justify-center pt-2 pb-4">
+      <button
+        type="button"
+        onClick={() => onSelectPage?.(null)}
+        className="text-xs font-medium text-rose-600 hover:text-rose-700 hover:underline transition-colors"
+      >
+        {t`Show all speech entries`}
+      </button>
+    </div>
+  ) : null
+
+  return (
+    <div className="flex flex-col h-full">
+      <div className="shrink-0 px-4 pt-4 space-y-3">
+        {allowGeminiPartialView && runError && (
+          <Alert variant="destructive" className="rounded-md">
+            <AlertDescription className="text-xs whitespace-pre-wrap break-words">
+              {runError}
+            </AlertDescription>
+          </Alert>
+        )}
+
+        {/* Language tabs */}
+        {outputLanguages.length > 1 && (
+          <div className="flex gap-1.5">
+            {outputLanguages.map((lang) => (
+              <button
+                key={lang}
+                type="button"
+                onClick={() => setSelectedLang(lang)}
+                className={cn(
+                  "text-xs h-7 px-3 rounded-md font-medium transition-colors cursor-pointer",
+                  selectedLang === lang
+                    ? "bg-foreground text-background"
+                    : "bg-muted text-muted-foreground hover:bg-accent"
+                )}
+              >
+                {displayLang(lang)}
+                <span className={cn(
+                  "ml-1 text-[10px]",
+                  selectedLang === lang ? "opacity-60" : "opacity-50"
+                )}>
+                  ({lang})
+                </span>
+              </button>
+            ))}
+          </div>
+        )}
+
+        {/* Catalog type filters */}
+        {filteredByPage.length > 0 && (
+          <div className="flex flex-wrap gap-1">
+            {([
+              ["all", t`All`],
+              ["text", t`Text`],
+              ["captions", t`Captions`],
+              ["activities", t`Activities`],
+              ["answers", t`Answers`],
+              ["glossary", t`Glossary`],
+              ["quizzes", t`Quizzes`],
+            ] as const).map(([key, label]) => {
+              const count = typeCounts[key]
+              if (key !== "all" && count === 0) return null
+              return (
+                <button
+                  key={key}
+                  type="button"
+                  onClick={() => setCatalogFilter(key)}
+                  className={cn(
+                    "text-[11px] h-6 px-2.5 rounded-full font-medium transition-colors cursor-pointer",
+                    catalogFilter === key
+                      ? "bg-foreground text-background"
+                      : "bg-muted text-muted-foreground hover:bg-accent"
+                  )}
+                >
+                  {label}
+                  <span className={cn(
+                    "ml-1 text-[10px]",
+                    catalogFilter === key ? "opacity-60" : "opacity-50"
+                  )}>
+                    {count}
+                  </span>
+                </button>
+              )
+            })}
+          </div>
+        )}
+
+        {/* Translation & Speech control panels */}
+        <div className="grid grid-cols-2 gap-3">
+          {/* Translation panel */}
+          <div className="rounded-lg border border-pink-200 bg-pink-50/30 px-4 py-2.5 space-y-2">
+            <div className="flex items-center gap-2">
+              <Languages className="w-3.5 h-3.5 text-pink-500 shrink-0" />
+              <p className="flex-1 text-xs font-medium text-pink-900">{t`Translation`}</p>
+              {isTranslationRunning ? (
+                <Loader2 className="w-4 h-4 animate-spin text-pink-500 shrink-0" />
+              ) : (
+                <button
+                  type="button"
+                  onClick={handleRunTranslations}
+                  disabled={!hasApiKey}
+                  className="flex items-center justify-center w-6 h-6 rounded text-pink-600 hover:bg-pink-100 transition-colors cursor-pointer disabled:opacity-50"
+                  title={t`Rerun translation`}
+                >
+                  <RefreshCw className="h-3.5 w-3.5" />
+                </button>
+              )}
+            </div>
+            <div className="text-[11px] text-pink-800/70">
+              {bookLanguage && <span>{displayLang(bookLanguage)}</span>}
+              {outputLanguages.length > 0 && (
+                <span>
+                  {bookLanguage && <span className="text-pink-300"> → </span>}
+                  {outputLanguages.map((l) => displayLang(l)).join(", ")}
+                </span>
+              )}
+            </div>
+            <div className="flex items-center gap-3">
+              <Link
+                to="/books/$label/$step/settings"
+                params={{ label: bookLabel, step: "translation" }}
+                search={{ tab: "general" }}
+                className="inline-flex items-center gap-1 text-[10px] font-medium text-pink-600 hover:text-pink-700 transition-colors"
+              >
+                <Settings className="w-2.5 h-2.5" />
+                {t`Languages`}
+              </Link>
+              <Link
+                to="/books/$label/$step/settings"
+                params={{ label: bookLabel, step: "translation" }}
+                search={{ tab: "prompt" }}
+                className="inline-flex items-center gap-1 text-[10px] font-medium text-pink-600 hover:text-pink-700 transition-colors"
+              >
+                <Settings className="w-2.5 h-2.5" />
+                {t`Prompt`}
+              </Link>
+            </div>
+          </div>
+
+          {/* Speech panel */}
+          <div className="rounded-lg border border-rose-200 bg-rose-50/30 px-4 py-2.5 space-y-2">
+            <div className="flex items-center gap-2">
+              <Volume2 className="w-3.5 h-3.5 text-rose-500 shrink-0" />
+              <p className="flex-1 text-xs font-medium text-rose-900">
+                {t`Speech`}
+                <span className="font-normal text-rose-600 ml-1.5">
+                  {currentLanguageUsesGemini
+                    ? t`${String(generatedAudioCount)}/${String(displayEntries.length)}`
+                    : t`${String(totalAudioFiles)} files`}
+                </span>
+              </p>
+              {isRunning ? (
+                <Loader2 className="w-4 h-4 animate-spin text-rose-500 shrink-0" />
+              ) : (
+                <div className="flex items-center gap-1">
+                  <button
+                    type="button"
+                    onClick={handleRunSpeech}
+                    disabled={!hasApiKey}
+                    className="flex items-center justify-center w-6 h-6 rounded text-rose-600 hover:bg-rose-100 transition-colors cursor-pointer disabled:opacity-50"
+                    title={t`Regenerate all speech`}
+                  >
+                    <RefreshCw className="h-3.5 w-3.5" />
+                  </button>
+                  <button
+                    type="button"
+                    onClick={() => clearSpeechMutation.mutate()}
+                    disabled={clearSpeechMutation.isPending}
+                    className="flex items-center justify-center w-6 h-6 rounded text-rose-600 hover:bg-rose-100 transition-colors cursor-pointer disabled:opacity-50"
+                    title={t`Clear speech data`}
+                  >
+                    {clearSpeechMutation.isPending ? (
+                      <Loader2 className="h-3.5 w-3.5 animate-spin" />
+                    ) : (
+                      <Trash2 className="h-3.5 w-3.5" />
+                    )}
+                  </button>
+                </div>
+              )}
+            </div>
+            <div className="text-[11px] text-rose-800/70">
+              {providerLabel} <span className="text-rose-300">·</span> {defaultVoice}
+              {defaultModel && <>{" "}<span className="text-rose-300">·</span> {defaultModel}</>}
+            </div>
+            <Link
+              to="/books/$label/$step/settings"
+              params={{ label: bookLabel, step: "translation" }}
+              search={{ tab: "speech" }}
+              className="inline-flex items-center gap-1 text-[10px] font-medium text-rose-600 hover:text-rose-700 transition-colors"
+            >
+              <Settings className="w-2.5 h-2.5" />
+              {t`Choose Provider`}
+            </Link>
+          </div>
+        </div>
+      </div>
+
+      {/* Entries */}
+      {selectedPageId && displayEntries.length === 0 && entries.length > 0 ? (
+        <div className="flex flex-col items-center justify-center py-16 text-muted-foreground">
+          <div className="w-12 h-12 rounded-full bg-rose-50 flex items-center justify-center mb-3">
+            <Volume2 className="w-6 h-6 text-rose-300" />
+          </div>
+          <p className="text-sm font-medium">{t`No speech entries for this page`}</p>
+          <p className="text-xs mt-1">{t`This page has no text entries with audio`}</p>
+        </div>
+      ) : (
+        <div ref={scrollRef} className="flex-1 min-h-0 overflow-y-auto px-4 pb-4 pt-3">
+          <div style={{ height: virtualizer.getTotalSize(), width: "100%", position: "relative" }}>
+            {virtualizer.getVirtualItems().map((virtualRow) => {
+              const entry = displayEntries[virtualRow.index]
+              const audio = audioMap.get(entry.id)
+              const isImg = isImageEntry(entry.id)
+              const isAnswer = isAnswerEntry(entry.id)
+              const translated = translatedMap.get(entry.id)
+
+              return (
+                <div
+                  key={entry.id}
+                  data-index={virtualRow.index}
+                  ref={virtualizer.measureElement}
+                  style={{
+                    position: "absolute",
+                    top: 0,
+                    left: 0,
+                    width: "100%",
+                    transform: `translateY(${virtualRow.start}px)`,
+                  }}
+                >
+                  <div className="pb-2">
+                    <SpeechEntryCard
+                      entry={entry}
+                      translated={isSourceLang ? null : (translated ?? null)}
+                      editingLanguage={editingLanguage}
+                      selectedLang={isSourceLang ? null : (selectedLang ?? null)}
+                      audio={audio}
+                      audioLang={audioLang}
+                      bookLabel={bookLabel}
+                      isImg={isImg}
+                      isAnswer={isAnswer}
+                      canGenerate={currentLanguageUsesGemini && !isAnswer}
+                      hasGeminiKey={geminiKey.length > 0}
+                      onGenerate={handleGenerateAudio}
+                      isGenerating={
+                        generateAudioMutation.isPending &&
+                        generateAudioMutation.variables?.textId === entry.id &&
+                        generateAudioMutation.variables?.language === audioLang
+                      }
+                      errorMessage={generateErrorById[entry.id]}
+                    />
+                  </div>
+                </div>
+              )
+            })}
+          </div>
+          {showAllButton}
+        </div>
+      )}
+    </div>
+  )
+}
+
+/* ---------- Entry card ---------- */
+
+function SpeechEntryCard({
+  entry,
+  translated,
+  editingLanguage,
+  selectedLang,
+  audio,
+  audioLang,
+  bookLabel,
+  isImg,
+  isAnswer,
+  canGenerate,
+  hasGeminiKey,
+  onGenerate,
+  isGenerating,
+  errorMessage,
+}: {
+  entry: TextCatalogEntry
+  translated: string | null
+  editingLanguage: string
+  selectedLang: string | null
+  audio?: { fileName: string; voice: string }
+  audioLang: string | null
+  bookLabel: string
+  isImg: boolean
+  isAnswer: boolean
+  canGenerate: boolean
+  hasGeminiKey: boolean
+  onGenerate: (textId: string) => void
+  isGenerating: boolean
+  errorMessage?: string
+}) {
+  const { t } = useLingui()
+
+  return (
+    <div className={cn("rounded-lg border px-4 py-3 space-y-2.5", isAnswer ? "bg-amber-50/60" : "bg-card")}>
+      {/* Header: ID + badges + action menu */}
+      <div className="flex items-center gap-2">
+        <span className="text-[10px] text-muted-foreground font-mono truncate flex-1">
+          {entry.id}
+          {isAnswer && <span className="ml-1.5 text-[9px] font-medium text-amber-700 bg-amber-100 rounded px-1 py-0.5 font-sans">{t`Answer`}</span>}
+        </span>
+        {audio && audioLang && (
+          <span className="text-[9px] text-muted-foreground bg-muted rounded px-1.5 py-0.5 shrink-0">
+            {audio.voice}
+          </span>
+        )}
+        {audio && canGenerate && (
+          <EntryMenu
+            onRegenerate={() => onGenerate(entry.id)}
+            isRegenerating={isGenerating}
+            hasGeminiKey={hasGeminiKey}
+          />
+        )}
+      </div>
+
+      {/* Text content */}
+      <div className="space-y-1">
+        {isImg && (
+          <img
+            src={`${BASE_URL}/books/${bookLabel}/images/${entry.id}`}
+            alt=""
+            className="w-20 h-14 rounded object-cover ring-1 ring-border"
+          />
+        )}
+        <div>
+          <span className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider">
+            {displayLang(editingLanguage)}
+          </span>
+          <p className="text-sm leading-relaxed">{entry.text}</p>
+        </div>
+        {translated !== null && selectedLang && (
+          <div>
+            <span className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider">
+              {displayLang(selectedLang)}
+            </span>
+            <p className="text-sm leading-relaxed text-muted-foreground">{translated || <span className="italic">{t`Pending...`}</span>}</p>
+          </div>
+        )}
+      </div>
+
+      {/* Audio section */}
+      {!isAnswer && (
+        <div className="space-y-1.5">
+          {audio && audioLang ? (
+            <WaveformPlayer audioUrl={getAudioUrl(bookLabel, audioLang, audio.fileName)} />
+          ) : canGenerate ? (
+            <div className="flex items-center gap-2">
+              <Button
+                type="button"
+                variant="outline"
+                size="sm"
+                className="h-7 px-2.5 text-[10px]"
+                disabled={isGenerating || !hasGeminiKey}
+                onClick={() => onGenerate(entry.id)}
+                title={
+                  hasGeminiKey
+                    ? t`Generate audio`
+                    : t`Set a Gemini API key to generate audio`
+                }
+              >
+                {isGenerating ? (
+                  <Loader2 className="mr-1 h-3 w-3 animate-spin" />
+                ) : (
+                  <WandSparkles className="mr-1 h-3 w-3" />
+                )}
+                {t`Generate`}
+              </Button>
+              {errorMessage && (
+                <p className="text-[10px] leading-tight text-red-500 truncate flex-1">
+                  {errorMessage}
+                </p>
+              )}
+            </div>
+          ) : (
+            <div className="h-6 flex items-center">
+              <span className="text-[10px] text-muted-foreground italic">{t`No audio`}</span>
+            </div>
+          )}
+
+          {/* Timecode placeholder */}
+          {audio && audioLang && (
+            <div className="flex items-center gap-1.5 pt-0.5">
+              <Clock className="w-3 h-3 text-muted-foreground/40" />
+              <span className="text-[10px] text-muted-foreground/40">{t`Word-level timecodes`}</span>
+            </div>
+          )}
+        </div>
+      )}
+    </div>
+  )
+}
+
+/* ---------- Entry action menu ---------- */
+
+function EntryMenu({ onRegenerate, isRegenerating, hasGeminiKey }: { onRegenerate: () => void; isRegenerating: boolean; hasGeminiKey: boolean }) {
+  const { t } = useLingui()
+  const [open, setOpen] = useState(false)
+  const ref = useRef<HTMLDivElement>(null)
+
+  useEffect(() => {
+    if (!open) return
+    const handleClick = (e: MouseEvent) => {
+      if (ref.current && !ref.current.contains(e.target as Node)) setOpen(false)
+    }
+    document.addEventListener("mousedown", handleClick)
+    return () => document.removeEventListener("mousedown", handleClick)
+  }, [open])
+
+  return (
+    <div ref={ref} className="relative shrink-0">
+      <button
+        type="button"
+        onClick={() => setOpen(!open)}
+        className="flex items-center justify-center w-6 h-6 rounded text-muted-foreground hover:bg-muted transition-colors cursor-pointer"
+      >
+        <MoreVertical className="w-3.5 h-3.5" />
+      </button>
+      {open && (
+        <div className="absolute right-0 top-full mt-1 z-20 bg-popover border rounded shadow-md min-w-[140px] py-1">
+          <button
+            type="button"
+            disabled={isRegenerating || !hasGeminiKey}
+            onClick={() => { onRegenerate(); setOpen(false) }}
+            className="w-full text-left px-3 py-1.5 text-xs hover:bg-accent transition-colors flex items-center gap-2 disabled:opacity-50"
+          >
+            {isRegenerating ? (
+              <Loader2 className="w-3 h-3 animate-spin" />
+            ) : (
+              <RefreshCw className="w-3 h-3" />
+            )}
+            {t`Regenerate`}
+          </button>
+        </div>
+      )}
+    </div>
+  )
+}
+
+/* ---------- Waveform player ---------- */
+
+let activePlayer: { stop: () => void } | null = null
+
+function formatTime(s: number): string {
+  const m = Math.floor(s / 60)
+  const sec = Math.floor(s % 60)
+  return `${m}:${sec.toString().padStart(2, "0")}`
+}
+
+function computePeaks(buffer: AudioBuffer, barCount: number): number[] {
+  const data = buffer.getChannelData(0)
+  const step = Math.max(1, Math.floor(data.length / barCount))
+  const peaks: number[] = []
+  for (let i = 0; i < barCount; i++) {
+    let max = 0
+    const start = i * step
+    const end = Math.min(start + step, data.length)
+    for (let j = start; j < end; j++) {
+      const v = Math.abs(data[j])
+      if (v > max) max = v
+    }
+    peaks.push(max)
+  }
+  const maxPeak = Math.max(...peaks, 0.01)
+  return peaks.map((p) => p / maxPeak)
+}
+
+const BAR_COUNT = 60
+
+function WaveformPlayer({ audioUrl }: { audioUrl: string }) {
+  const [playing, setPlaying] = useState(false)
+  const [progress, setProgress] = useState(0)
+  const [duration, setDuration] = useState(0)
+  const [peaks, setPeaks] = useState<number[] | null>(null)
+  const audioRef = useRef<HTMLAudioElement | null>(null)
+  const rafRef = useRef<number>(0)
+  const fetchedRef = useRef(false)
+
+  // Fetch waveform data eagerly
+  useEffect(() => {
+    if (fetchedRef.current) return
+    fetchedRef.current = true
+    fetch(audioUrl)
+      .then((r) => r.arrayBuffer())
+      .then((buf) => new AudioContext().decodeAudioData(buf))
+      .then((decoded) => {
+        setPeaks(computePeaks(decoded, BAR_COUNT))
+        setDuration(decoded.duration)
+      })
+      .catch(() => {})
+  }, [audioUrl])
+
+  const tick = useCallback(() => {
+    if (audioRef.current) {
+      setProgress(audioRef.current.currentTime)
+    }
+    rafRef.current = requestAnimationFrame(tick)
+  }, [])
+
+  const stop = useCallback(() => {
+    if (audioRef.current) {
+      audioRef.current.pause()
+      audioRef.current.currentTime = 0
+    }
+    cancelAnimationFrame(rafRef.current)
+    setPlaying(false)
+    setProgress(0)
+  }, [])
+
+  const toggle = () => {
+    if (!audioRef.current) {
+      audioRef.current = new Audio(audioUrl)
+      audioRef.current.addEventListener("loadedmetadata", () => {
+        setDuration(audioRef.current!.duration)
+      })
+      audioRef.current.addEventListener("ended", () => {
+        activePlayer = null
+        setPlaying(false)
+        setProgress(0)
+        cancelAnimationFrame(rafRef.current)
+      })
+    }
+    if (playing) {
+      activePlayer = null
+      audioRef.current.pause()
+      cancelAnimationFrame(rafRef.current)
+      setPlaying(false)
+    } else {
+      if (activePlayer) activePlayer.stop()
+      activePlayer = { stop }
+      audioRef.current.play()
+      setPlaying(true)
+      rafRef.current = requestAnimationFrame(tick)
+    }
+  }
+
+  const seek = (e: React.MouseEvent<HTMLDivElement>) => {
+    if (!audioRef.current || !duration) return
+    const rect = e.currentTarget.getBoundingClientRect()
+    const pct = Math.max(0, Math.min(1, (e.clientX - rect.left) / rect.width))
+    audioRef.current.currentTime = pct * duration
+    setProgress(pct * duration)
+  }
+
+  useEffect(() => {
+    return () => {
+      cancelAnimationFrame(rafRef.current)
+      if (audioRef.current) {
+        audioRef.current.pause()
+        audioRef.current = null
+      }
+      if (activePlayer?.stop === stop) activePlayer = null
+    }
+  }, [stop])
+
+  const pct = duration > 0 ? (progress / duration) * 100 : 0
+
+  return (
+    <div className="flex items-center gap-2.5 bg-muted/40 rounded-lg px-3 py-2">
+      <button
+        type="button"
+        onClick={toggle}
+        className={cn(
+          "shrink-0 flex items-center justify-center w-8 h-8 rounded-full transition-all cursor-pointer",
+          playing ? "bg-rose-500 text-white hover:bg-rose-600 scale-105" : "bg-rose-100 text-rose-600 hover:bg-rose-200"
+        )}
+      >
+        {playing ? <Pause className="w-3.5 h-3.5" /> : <Play className="w-3.5 h-3.5 ml-0.5" />}
+      </button>
+
+      <div className="flex-1 min-w-0 space-y-1">
+        <div
+          className="relative h-8 cursor-pointer rounded overflow-hidden"
+          onClick={seek}
+        >
+          {peaks ? (
+            <div className="flex items-end h-full gap-px">
+              {peaks.map((p, i) => {
+                const barPct = ((i + 0.5) / BAR_COUNT) * 100
+                return (
+                  <div
+                    key={i}
+                    className="flex-1 rounded-sm transition-colors"
+                    style={{
+                      height: `${Math.max(8, p * 100)}%`,
+                      backgroundColor: barPct <= pct ? "rgb(244 63 94)" : "rgb(228 228 231)",
+                    }}
+                  />
+                )
+              })}
+            </div>
+          ) : (
+            <div className="h-full bg-muted rounded relative flex items-center justify-center">
+              <Loader2 className="w-3 h-3 animate-spin text-muted-foreground" />
+            </div>
+          )}
+        </div>
+        <div className="flex items-center justify-between px-0.5">
+          <span className="text-[9px] text-muted-foreground tabular-nums">
+            {duration > 0 ? formatTime(progress) : "0:00"}
+          </span>
+          <span className="text-[9px] text-muted-foreground tabular-nums">
+            {duration > 0 ? formatTime(duration) : "—"}
+          </span>
+        </div>
+      </div>
+    </div>
+  )
+}
diff --git a/apps/studio/src/components/pipeline/stages/translations/TranslationStageView.tsx b/apps/studio/src/components/pipeline/stages/translations/TranslationStageView.tsx
new file mode 100644
index 00000000..37a21433
--- /dev/null
+++ b/apps/studio/src/components/pipeline/stages/translations/TranslationStageView.tsx
@@ -0,0 +1,57 @@
+import { useState } from "react"
+import { Languages, Volume2 } from "lucide-react"
+import { TranslationsView } from "./TranslationsView"
+import { SpeechView } from "../speech/SpeechView"
+import { cn } from "@/lib/utils"
+import { useLingui } from "@lingui/react/macro"
+
+type TabId = "translation" | "speech"
+
+export function TranslationStageView({ bookLabel, selectedPageId, onSelectPage }: { bookLabel: string; selectedPageId?: string; onSelectPage?: (pageId: string | null) => void }) {
+  const { t } = useLingui()
+  const [activeTab, setActiveTab] = useState<TabId>("translation")
+
+  return (
+    <div className="flex flex-col h-full">
+      {/* Tab bar */}
+      <div className="shrink-0 px-4 pt-3 flex gap-1">
+        <button
+          type="button"
+          onClick={() => setActiveTab("translation")}
+          className={cn(
+            "flex items-center gap-1.5 text-xs h-8 px-3.5 rounded-t-md font-medium transition-colors cursor-pointer border border-b-0",
+            activeTab === "translation"
+              ? "bg-background text-foreground border-border"
+              : "bg-transparent text-muted-foreground border-transparent hover:text-foreground hover:bg-muted/50"
+          )}
+        >
+          <Languages className="w-3.5 h-3.5" />
+          {t`Translation`}
+        </button>
+        <button
+          type="button"
+          onClick={() => setActiveTab("speech")}
+          className={cn(
+            "flex items-center gap-1.5 text-xs h-8 px-3.5 rounded-t-md font-medium transition-colors cursor-pointer border border-b-0",
+            activeTab === "speech"
+              ? "bg-background text-foreground border-border"
+              : "bg-transparent text-muted-foreground border-transparent hover:text-foreground hover:bg-muted/50"
+          )}
+        >
+          <Volume2 className="w-3.5 h-3.5" />
+          {t`Speech`}
+        </button>
+      </div>
+      <div className="border-t border-border" />
+
+      {/* Tab content */}
+      <div className="flex-1 min-h-0">
+        {activeTab === "translation" ? (
+          <TranslationsView bookLabel={bookLabel} selectedPageId={selectedPageId} onSelectPage={onSelectPage} />
+        ) : (
+          <SpeechView bookLabel={bookLabel} selectedPageId={selectedPageId} onSelectPage={onSelectPage} />
+        )}
+      </div>
+    </div>
+  )
+}
diff --git a/apps/studio/src/components/pipeline/stages/translations/TranslationsSettings.tsx b/apps/studio/src/components/pipeline/stages/translations/TranslationsSettings.tsx
index 778fa978..82f84eef 100644
--- a/apps/studio/src/components/pipeline/stages/translations/TranslationsSettings.tsx
+++ b/apps/studio/src/components/pipeline/stages/translations/TranslationsSettings.tsx
@@ -1,7 +1,7 @@
 import { useState, useEffect } from "react"
 import { createPortal } from "react-dom"
 import { useNavigate } from "@tanstack/react-router"
-import { Play } from "lucide-react"
+import { Play, Save } from "lucide-react"
 import { Button } from "@/components/ui/button"
 import {
   Dialog,
@@ -11,10 +11,9 @@ import {
   DialogHeader,
   DialogTitle,
 } from "@/components/ui/dialog"
-import { Input } from "@/components/ui/input"
-import { Label } from "@/components/ui/label"
 import { useBookConfig, useUpdateBookConfig } from "@/hooks/use-book-config"
 import { useActiveConfig } from "@/hooks/use-debug"
+import { useBook } from "@/hooks/use-books"
 import { useApiKey } from "@/hooks/use-api-key"
 import { api } from "@/api/client"
 import { PromptViewer } from "@/components/pipeline/components/PromptViewer"
@@ -22,14 +21,13 @@ import { LanguagePicker } from "@/components/LanguagePicker"
 import { useBookRun } from "@/hooks/use-book-run"
 import { useStepConfig } from "@/hooks/use-step-config"
 import { normalizeLocale } from "@/lib/languages"
-import { SpeechPromptsEditor } from "./components/SpeechPromptsEditor"
-import { VoiceMappingsEditor } from "./components/VoiceMappingsEditor"
 import { useLingui } from "@lingui/react/macro"
 
 export function TranslationsSettings({ bookLabel, headerTarget, tab = "general" }: { bookLabel: string; headerTarget?: HTMLDivElement | null; tab?: string }) {
   const { t } = useLingui()
   const { data: bookConfigData } = useBookConfig(bookLabel)
   const { data: activeConfigData } = useActiveConfig(bookLabel)
+  const { data: book } = useBook(bookLabel)
   const updateConfig = useUpdateBookConfig()
   const { apiKey, hasApiKey, azureKey, azureRegion, geminiKey } = useApiKey()
   const { queueRun } = useBookRun()
@@ -39,56 +37,23 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general"
   const [outputLanguages, setOutputLanguages] = useState<Set<string>>(new Set())
   const [promptDraft, setPromptDraft] = useState<string | null>(null)
 
-  // Speech settings
-  const [speechModel, setSpeechModel] = useState("")
-  const [format, setFormat] = useState("")
-  const [defaultProvider, setDefaultProvider] = useState("openai")
-  const [openaiModel, setOpenaiModel] = useState("")
-  const [openaiLanguages, setOpenaiLanguages] = useState("")
-  const [azureModel, setAzureModel] = useState("")
-  const [azureLanguages, setAzureLanguages] = useState("")
-  const [geminiModel, setGeminiModel] = useState("")
-  const [geminiLanguages, setGeminiLanguages] = useState("")
-  const [bitRate, setBitRate] = useState("")
-  const [sampleRate, setSampleRate] = useState("")
-
   const [dirty, setDirty] = useState<Record<string, boolean>>({})
   const markDirty = (field: string) => setDirty((prev) => ({ ...prev, [field]: true }))
 
   const merged = activeConfigData?.merged as Record<string, unknown> | undefined
   const translation = useStepConfig(merged, "translation", markDirty)
+  const bookLanguage = book?.languageCode ?? book?.metadata?.language_code ?? null
 
   useEffect(() => {
     if (!activeConfigData) return
     const m = activeConfigData.merged as Record<string, unknown>
-    if (Array.isArray(m.output_languages)) {
+    if (Array.isArray(m.output_languages) && m.output_languages.length > 0) {
       const normalized = (m.output_languages as string[]).map((code) => normalizeLocale(code))
       setOutputLanguages(new Set(normalized))
+    } else if (bookLanguage) {
+      setOutputLanguages(new Set([normalizeLocale(bookLanguage)]))
     }
-    if (m.speech && typeof m.speech === "object") {
-      const s = m.speech as Record<string, unknown>
-      if (s.model) setSpeechModel(String(s.model))
-      if (s.format) setFormat(String(s.format))
-      if (s.default_provider) setDefaultProvider(String(s.default_provider))
-      if (s.bit_rate) setBitRate(String(s.bit_rate))
-      if (s.sample_rate) setSampleRate(String(s.sample_rate))
-      if (s.providers && typeof s.providers === "object") {
-        const providers = s.providers as Record<string, Record<string, unknown>>
-        if (providers.openai) {
-          if (providers.openai.model) setOpenaiModel(String(providers.openai.model))
-          if (Array.isArray(providers.openai.languages)) setOpenaiLanguages((providers.openai.languages as string[]).join(", "))
-        }
-        if (providers.azure) {
-          if (providers.azure.model) setAzureModel(String(providers.azure.model))
-          if (Array.isArray(providers.azure.languages)) setAzureLanguages((providers.azure.languages as string[]).join(", "))
-        }
-        if (providers.gemini) {
-          if (providers.gemini.model) setGeminiModel(String(providers.gemini.model))
-          if (Array.isArray(providers.gemini.languages)) setGeminiLanguages((providers.gemini.languages as string[]).join(", "))
-        }
-      }
-    }
-  }, [activeConfigData])
+  }, [activeConfigData, bookLanguage])
 
   const shouldWrite = (field: string) =>
     dirty[field] || (bookConfigData?.config && field in bookConfigData.config)
@@ -103,41 +68,8 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general"
     }
     if (shouldWrite("output_languages")) {
       const normalized = Array.from(outputLanguages).map((code) => normalizeLocale(code))
-      overrides.output_languages = normalized.length > 0 ? normalized : undefined
-    }
-    if (shouldWrite("speech")) {
-      const existing = (bookConfigData?.config?.speech ?? {}) as Record<string, unknown>
-      const openaiLangs = openaiLanguages.split(",").map((s) => s.trim()).filter(Boolean)
-      const azureLangs = azureLanguages.split(",").map((s) => s.trim()).filter(Boolean)
-      const geminiLangs = geminiLanguages.split(",").map((s) => s.trim()).filter(Boolean)
-      const providers: Record<string, unknown> = {}
-      if (openaiModel.trim() || openaiLangs.length > 0) {
-        providers.openai = {
-          model: openaiModel.trim() || undefined,
-          languages: openaiLangs.length > 0 ? openaiLangs : undefined,
-        }
-      }
-      if (azureModel.trim() || azureLangs.length > 0) {
-        providers.azure = {
-          model: azureModel.trim() || undefined,
-          languages: azureLangs.length > 0 ? azureLangs : undefined,
-        }
-      }
-      if (geminiModel.trim() || geminiLangs.length > 0) {
-        providers.gemini = {
-          model: geminiModel.trim() || undefined,
-          languages: geminiLangs.length > 0 ? geminiLangs : undefined,
-        }
-      }
-      overrides.speech = {
-        ...existing,
-        model: speechModel.trim() || undefined,
-        format: format.trim() || undefined,
-        default_provider: defaultProvider || undefined,
-        providers: Object.keys(providers).length > 0 ? providers : undefined,
-        bit_rate: bitRate.trim() || undefined,
-        sample_rate: sampleRate.trim() ? Number(sampleRate.trim()) : undefined,
-      }
+      const isOnlyBookLang = bookLanguage && normalized.length === 1 && normalizeLocale(normalized[0]) === normalizeLocale(bookLanguage)
+      overrides.output_languages = normalized.length > 0 && !isOnlyBookLang ? normalized : undefined
     }
     return overrides
   }
@@ -153,6 +85,23 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general"
     markDirty("output_languages")
   }
 
+  const saveOnly = async () => {
+    const promptSaves: Promise<unknown>[] = []
+    if (promptDraft != null) promptSaves.push(api.updatePrompt("translation", promptDraft, bookLabel))
+    if (promptSaves.length > 0) await Promise.all(promptSaves)
+
+    const overrides = buildOverrides()
+    updateConfig.mutate(
+      { label: bookLabel, config: overrides },
+      {
+        onSuccess: () => {
+          setDirty({})
+          setPromptDraft(null)
+        },
+      }
+    )
+  }
+
   const confirmSaveAndRerun = async () => {
     const promptSaves: Promise<unknown>[] = []
     if (promptDraft != null) promptSaves.push(api.updatePrompt("translation", promptDraft, bookLabel))
@@ -167,15 +116,15 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general"
           setPromptDraft(null)
           setShowRerunDialog(false)
           queueRun({
-            fromStage: "text-and-speech",
-            toStage: "text-and-speech",
+            fromStage: "translation",
+            toStage: "translation",
             apiKey,
             providerCredentials: {
               azure: { key: azureKey, region: azureRegion },
               geminiApiKey: geminiKey,
             },
           })
-          navigate({ to: "/books/$label/$step", params: { label: bookLabel, step: "text-and-speech" } })
+          navigate({ to: "/books/$label/$step", params: { label: bookLabel, step: "translation" } })
         },
       }
     )
@@ -189,7 +138,7 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general"
           onSelect={toggleLanguage}
           multiple
           label={t`Output Languages`}
-          hint={t`Leave empty to output only in the book language.`}
+          bookLanguage={bookLanguage}
         />
       )}
 
@@ -208,164 +157,36 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general"
         />
       )}
 
-      {tab === "speech" && (
-        <div className="space-y-6">
-          {/* Provider Routing */}
-          <div className="space-y-3">
-            <h3 className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">{t`Provider Routing`}</h3>
-            <div className="space-y-1.5">
-              <Label className="text-xs">{t`Default Provider`}</Label>
-              <select
-                value={defaultProvider}
-                onChange={(e) => { setDefaultProvider(e.target.value); markDirty("speech") }}
-                className="flex h-8 w-48 rounded-md border border-input bg-background px-3 py-1 text-xs shadow-sm"
-              >
-                <option value="openai">{t`OpenAI`}</option>
-                <option value="azure">{t`Azure`}</option>
-                <option value="gemini">{t`Gemini`}</option>
-              </select>
-              <p className="text-xs text-muted-foreground">{t`Provider used for languages not assigned to a specific provider.`}</p>
-            </div>
-          </div>
-
-          {/* OpenAI Provider */}
-          <div className="space-y-3 rounded-md border p-3">
-            <h3 className="text-xs font-semibold">{t`OpenAI`}</h3>
-            <div className="space-y-1.5">
-              <Label className="text-xs">{t`Model`}</Label>
-              <Input
-                value={openaiModel}
-                onChange={(e) => { setOpenaiModel(e.target.value); markDirty("speech") }}
-                placeholder={t`e.g. gpt-4o-mini-tts`}
-                className="w-72 h-8 text-xs"
-              />
-            </div>
-            <div className="space-y-1.5">
-              <Label className="text-xs">{t`Languages`}</Label>
-              <Input
-                value={openaiLanguages}
-                onChange={(e) => { setOpenaiLanguages(e.target.value); markDirty("speech") }}
-                placeholder={t`e.g. en, fr`}
-                className="w-72 h-8 text-xs"
-              />
-              <p className="text-xs text-muted-foreground">{t`Comma-separated language codes routed to OpenAI.`}</p>
-            </div>
-          </div>
-
-          {/* Azure Provider */}
-          <div className="space-y-3 rounded-md border p-3">
-            <h3 className="text-xs font-semibold">{t`Azure Speech`}</h3>
-            <div className="space-y-1.5">
-              <Label className="text-xs">{t`Model`}</Label>
-              <Input
-                value={azureModel}
-                onChange={(e) => { setAzureModel(e.target.value); markDirty("speech") }}
-                placeholder={t`e.g. azure-tts`}
-                className="w-72 h-8 text-xs"
-              />
-            </div>
-            <div className="space-y-1.5">
-              <Label className="text-xs">{t`Languages`}</Label>
-              <Input
-                value={azureLanguages}
-                onChange={(e) => { setAzureLanguages(e.target.value); markDirty("speech") }}
-                placeholder={t`e.g. es, ta, si, sw`}
-                className="w-72 h-8 text-xs"
-              />
-              <p className="text-xs text-muted-foreground">{t`Comma-separated language codes routed to Azure.`}</p>
-            </div>
-          </div>
-
-          {/* Gemini Provider */}
-          <div className="space-y-3 rounded-md border p-3">
-            <h3 className="text-xs font-semibold">{t`Gemini`}</h3>
-            <div className="space-y-1.5">
-              <Label className="text-xs">{t`Model`}</Label>
-              <Input
-                value={geminiModel}
-                onChange={(e) => { setGeminiModel(e.target.value); markDirty("speech") }}
-                placeholder={t`e.g. gemini-2.5-pro-preview-tts`}
-                className="w-72 h-8 text-xs"
-              />
-            </div>
-            <div className="space-y-1.5">
-              <Label className="text-xs">{t`Languages`}</Label>
-              <Input
-                value={geminiLanguages}
-                onChange={(e) => { setGeminiLanguages(e.target.value); markDirty("speech") }}
-                placeholder={t`e.g. en, hi, ta`}
-                className="w-72 h-8 text-xs"
-              />
-              <p className="text-xs text-muted-foreground">{t`Comma-separated language codes routed to Gemini.`}</p>
-            </div>
-          </div>
-
-          {/* Audio Settings */}
-          <div className="space-y-3">
-            <h3 className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">{t`Audio Settings`}</h3>
-            <div className="flex gap-4">
-              <div className="space-y-1.5">
-                <Label className="text-xs">{t`Format`}</Label>
-                <Input
-                  value={format}
-                  onChange={(e) => { setFormat(e.target.value); markDirty("speech") }}
-                  placeholder={t`mp3`}
-                  className="w-32 h-8 text-xs"
-                />
-              </div>
-              <div className="space-y-1.5">
-                <Label className="text-xs">{t`Bit Rate`}</Label>
-                <Input
-                  value={bitRate}
-                  onChange={(e) => { setBitRate(e.target.value); markDirty("speech") }}
-                  placeholder={t`64k`}
-                  className="w-32 h-8 text-xs"
-                />
-              </div>
-              <div className="space-y-1.5">
-                <Label className="text-xs">{t`Sample Rate`}</Label>
-                <Input
-                  value={sampleRate}
-                  onChange={(e) => { setSampleRate(e.target.value); markDirty("speech") }}
-                  placeholder={t`24000`}
-                  className="w-32 h-8 text-xs"
-                />
-              </div>
-            </div>
-            <p className="text-xs text-muted-foreground">
-              {t`Gemini TTS outputs WAV audio in this integration; other providers continue using the configured format.`}
-            </p>
-          </div>
-        </div>
-      )}
-
-      {tab === "speech-prompts" && (
-        <SpeechPromptsEditor bookLabel={bookLabel} headerTarget={headerTarget} />
-      )}
-
-      {tab === "voices" && (
-        <VoiceMappingsEditor bookLabel={bookLabel} headerTarget={headerTarget} />
-      )}
-
-      {headerTarget && (tab === "general" || tab === "prompt" || tab === "speech") && createPortal(
-        <Button
-          size="sm"
-          className="h-7 px-2.5 text-xs bg-black/15 text-white hover:bg-black/25"
-          onClick={() => setShowRerunDialog(true)}
-          disabled={updateConfig.isPending || !hasApiKey}
-        >
-          <Play className="mr-1.5 h-3.5 w-3.5" />
-          {t`Save & Rerun`}
-        </Button>,
+      {headerTarget && (tab === "general" || tab === "prompt") && createPortal(
+        <div className="flex items-center gap-1.5">
+          <Button
+            size="sm"
+            className="h-7 px-2.5 text-xs bg-black/15 text-white hover:bg-black/25"
+            onClick={saveOnly}
+            disabled={updateConfig.isPending}
+          >
+            <Save className="mr-1.5 h-3.5 w-3.5" />
+            {t`Save`}
+          </Button>
+          <Button
+            size="sm"
+            className="h-7 px-2.5 text-xs bg-black/15 text-white hover:bg-black/25"
+            onClick={() => setShowRerunDialog(true)}
+            disabled={updateConfig.isPending || !hasApiKey}
+          >
+            <Play className="mr-1.5 h-3.5 w-3.5" />
+            {t`Save & Rerun`}
+          </Button>
+        </div>,
         headerTarget
       )}
 
       <Dialog open={showRerunDialog} onOpenChange={setShowRerunDialog}>
         <DialogContent>
           <DialogHeader>
-            <DialogTitle>{t`Save & Rerun Translations + Audio`}</DialogTitle>
+            <DialogTitle>{t`Save & Rerun Translations`}</DialogTitle>
             <DialogDescription>
-              {t`This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech.`}
+              {t`This will save your settings and re-run translations, rebuilding the text catalog and translating to output languages.`}
             </DialogDescription>
           </DialogHeader>
           <DialogFooter>
diff --git a/apps/studio/src/components/pipeline/stages/translations/TranslationsView.tsx b/apps/studio/src/components/pipeline/stages/translations/TranslationsView.tsx
index e57185d9..ce4113ab 100644
--- a/apps/studio/src/components/pipeline/stages/translations/TranslationsView.tsx
+++ b/apps/studio/src/components/pipeline/stages/translations/TranslationsView.tsx
@@ -1,7 +1,7 @@
-import { useState, useEffect, useRef, useCallback, useMemo } from "react"
-import { Check, ChevronDown, Languages, Loader2, Play, Pause, WandSparkles } from "lucide-react"
-import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"
-import { api, getAudioUrl, BASE_URL } from "@/api/client"
+import { useState, useEffect, useRef, useCallback } from "react"
+import { Check, ChevronDown, Languages, Loader2 } from "lucide-react"
+import { useQuery, useQueryClient } from "@tanstack/react-query"
+import { api, BASE_URL } from "@/api/client"
 import type { TextCatalogEntry, VersionEntry } from "@/api/client"
 import { useActiveConfig } from "@/hooks/use-debug"
 import { useBook } from "@/hooks/use-books"
@@ -12,11 +12,7 @@ import { StageRunCard } from "../../components/StageRunCard"
 import { useVirtualizer } from "@tanstack/react-virtual"
 import { cn } from "@/lib/utils"
 import { normalizeLocale } from "@/lib/languages"
-import { languageUsesSpeechProvider } from "@/lib/speech-routing"
-import { Alert, AlertDescription } from "@/components/ui/alert"
-import { Button } from "@/components/ui/button"
 import { resolveTranslationLanguageState } from "./lib/translations-view-state"
-import { msg } from "@lingui/core/macro"
 import { useLingui } from "@lingui/react/macro"
 
 const IMAGE_ID_RE = /_im\d{3}/
@@ -159,18 +155,17 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
   const { data: activeConfigData } = useActiveConfig(bookLabel)
   const { data: book, isLoading: isBookLoading } = useBook(bookLabel)
   const queryClient = useQueryClient()
-  const { stageState, queueRun, error: runError } = useBookRun()
+  const { stageState, queueRun } = useBookRun()
   const { apiKey, hasApiKey, azureKey, azureRegion, geminiKey } = useApiKey()
-  const ttsState = stageState("text-and-speech")
-  const textAndSpeechDone = ttsState === "done"
-  const hasStageError = ttsState === "error"
-  const isRunning = ttsState === "running" || ttsState === "queued"
+  const translationState = stageState("translation")
+  const translationDone = translationState === "done"
+  const isRunning = translationState === "running" || translationState === "queued"
 
   const handleRunTranslations = useCallback(() => {
     if (!hasApiKey || isRunning) return
     queueRun({
-      fromStage: "text-and-speech",
-      toStage: "text-and-speech",
+      fromStage: "translation",
+      toStage: "translation",
       apiKey,
       providerCredentials: {
         azure: { key: azureKey, region: azureRegion },
@@ -185,14 +180,7 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
     enabled: !!bookLabel,
   })
 
-  const { data: ttsData } = useQuery({
-    queryKey: ["books", bookLabel, "tts"],
-    queryFn: () => api.getTTS(bookLabel),
-    enabled: !!bookLabel,
-  })
-
   const merged = activeConfigData?.merged as Record<string, unknown> | undefined
-  const speechConfig = merged?.speech
   const outputLanguages = Array.from(
     new Set(((merged?.output_languages as string[] | undefined) ?? []).map((code) => normalizeLocale(code)))
   )
@@ -224,29 +212,11 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
     isBookLoading,
   })
   const isSourceLang = !hasExplicitOutputLanguages || isSelectedSourceLang
-  const audioLang = selectedLang ??
-    (hasExplicitOutputLanguages ? (outputLanguages[0] ?? editingLanguage) : editingLanguage)
-  const currentLanguageUsesGemini =
-    !!audioLang && languageUsesSpeechProvider(audioLang, "gemini", speechConfig)
-  const geminiRoutedLanguages = (
-    outputLanguages.length > 0
-      ? outputLanguages
-      : editingLanguage
-        ? [editingLanguage]
-        : []
-  ).filter((language, index, array) =>
-    languageUsesSpeechProvider(language, "gemini", speechConfig) &&
-    array.indexOf(language) === index
-  )
-  const allowGeminiPartialView =
-    hasStageError &&
-    geminiRoutedLanguages.length > 0
-  const showRunCard = (!textAndSpeechDone || isRunning) && !allowGeminiPartialView
+  const showRunCard = !translationDone || isRunning
 
   // Pending state for edits (keyed by language)
   const [pendingEntries, setPendingEntries] = useState<TextCatalogEntry[] | null>(null)
   const [saving, setSaving] = useState(false)
-  const [generateErrorById, setGenerateErrorById] = useState<Record<string, string>>({})
 
   // Get translated entries for selected language
   const translationData = selectedLang ? catalog?.translations?.[selectedLang] : undefined
@@ -290,19 +260,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
     }
   }
 
-  // Build audio lookup — use selected language, or editing language when no output languages
-  const audioMap = new Map<string, { fileName: string; voice: string }>()
-  if (ttsData && audioLang && ttsData.languages[audioLang]) {
-    for (const e of ttsData.languages[audioLang].entries) {
-      audioMap.set(e.textId, { fileName: e.fileName, voice: e.voice })
-    }
-  }
-  const totalAudioFiles = ttsData
-    ? Object.values(ttsData.languages).reduce((sum, lang) => sum + lang.entries.length, 0)
-    : 0
-  const generatedAudioCount = displayEntries.filter((entry) => audioMap.has(entry.id)).length
-  const missingAudioCount = Math.max(displayEntries.length - generatedAudioCount, 0)
-
   const scrollRef = useRef<HTMLDivElement>(null)
   const virtualizer = useVirtualizer({
     count: displayEntries.length,
@@ -311,56 +268,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
     overscan: 5,
   })
 
-  const generateAudioMutation = useMutation({
-    mutationFn: async (variables: { textId: string; language: string }) => {
-      if (!geminiKey) {
-        throw new Error(i18n._(msg`Gemini API key is required to generate audio.`))
-      }
-      return api.generateGeminiTTSForItem(
-        bookLabel,
-        variables.textId,
-        variables.language,
-        {
-          geminiApiKey: geminiKey,
-          openaiApiKey: apiKey || undefined,
-          azure: azureKey && azureRegion
-            ? { key: azureKey, region: azureRegion }
-            : undefined,
-        }
-      )
-    },
-    onMutate: (variables) => {
-      setGenerateErrorById((prev) => {
-        if (!(variables.textId in prev)) return prev
-        const next = { ...prev }
-        delete next[variables.textId]
-        return next
-      })
-    },
-    onSuccess: async () => {
-      await Promise.all([
-        queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "tts"] }),
-        queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] }),
-      ])
-    },
-    onError: (error, variables) => {
-      setGenerateErrorById((prev) => ({
-        ...prev,
-        [variables.textId]:
-          error instanceof Error ? error.message : String(error),
-      }))
-      queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] })
-    },
-  })
-
-  const handleGenerateAudio = useCallback(
-    (textId: string) => {
-      if (!audioLang || !currentLanguageUsesGemini) return
-      generateAudioMutation.mutate({ textId, language: audioLang })
-    },
-    [audioLang, currentLanguageUsesGemini, generateAudioMutation]
-  )
-
   useEffect(() => {
     if (!catalog) return
     setExtra(
@@ -369,18 +276,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
         {outputLanguages.length > 1 && (
           <span className="text-[10px] bg-white/20 rounded-full px-2 py-0.5">{t`${String(outputLanguages.length)} languages`}</span>
         )}
-        {currentLanguageUsesGemini ? (
-          <span className="text-[10px] bg-white/20 rounded-full px-2 py-0.5">
-            {t`${String(generatedAudioCount)}/${String(displayEntries.length)} audio`}
-          </span>
-        ) : totalAudioFiles > 0 && (
-          <span className="text-[10px] bg-white/20 rounded-full px-2 py-0.5">{t`${String(totalAudioFiles)} audio`}</span>
-        )}
-        {currentLanguageUsesGemini && missingAudioCount > 0 && (
-          <span className="text-[10px] bg-amber-100 text-amber-900 rounded-full px-2 py-0.5">
-            {t`${missingAudioCount} missing`}
-          </span>
-        )}
         {selectedLang && translationVersion != null && !isSourceLang && (
           <VersionPicker
             currentVersion={translationVersion}
@@ -399,7 +294,7 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
       </div>
     )
     return () => setExtra(null)
-  }, [catalog, t, displayEntries.length, outputLanguages.length, selectedLang, translationVersion, saving, dirty, bookLabel, isSourceLang, totalAudioFiles, selectedPageId, currentLanguageUsesGemini, generatedAudioCount, missingAudioCount])
+  }, [catalog, t, displayEntries.length, outputLanguages.length, selectedLang, translationVersion, saving, dirty, bookLabel, isSourceLang, selectedPageId])
 
   if (!showRunCard && isLoading) {
     return (
@@ -414,9 +309,9 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
     return (
       <div className="p-4">
         <StageRunCard
-          stageSlug="text-and-speech"
+          stageSlug="translation"
           isRunning={isRunning}
-          completed={textAndSpeechDone}
+          completed={translationDone}
           onRun={handleRunTranslations}
           disabled={!hasApiKey || isRunning}
         />
@@ -431,7 +326,7 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
         onClick={() => onSelectPage?.(null)}
         className="text-xs font-medium text-pink-600 hover:text-pink-700 hover:underline transition-colors"
       >
-        {t`Show all text & speech`}
+        {t`Show all translations`}
       </button>
     </div>
   ) : null
@@ -441,14 +336,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
     <div className="flex flex-col h-full">
       {/* Fixed header: alerts, language tabs, column headers */}
       <div className="shrink-0 px-4 pt-4 space-y-3">
-        {allowGeminiPartialView && runError && (
-          <Alert variant="destructive" className="rounded-md">
-            <AlertDescription className="text-xs whitespace-pre-wrap break-words">
-              {runError}
-            </AlertDescription>
-          </Alert>
-        )}
-
         {/* Language tabs — only when there are multiple output languages */}
         {outputLanguages.length > 1 && (
         <div className="flex gap-1.5">
@@ -506,7 +393,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
           {virtualizer.getVirtualItems().map((virtualRow) => {
             const entry = displayEntries[virtualRow.index]
             const translated = translatedMap.get(entry.id)
-            const audio = audioMap.get(entry.id)
             const isImg = isImageEntry(entry.id)
             const isAnswer = isAnswerEntry(entry.id)
 
@@ -540,21 +426,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
                         </span>
                         <p className="text-sm leading-relaxed mt-0.5">{entry.text}</p>
                       </div>
-                      {!isAnswer && <AudioAction
-                        audio={audio}
-                        audioLang={audioLang}
-                        bookLabel={bookLabel}
-                        textId={entry.id}
-                        canGenerate={currentLanguageUsesGemini}
-                        hasGeminiKey={geminiKey.length > 0}
-                        onGenerate={handleGenerateAudio}
-                        isGenerating={
-                          generateAudioMutation.isPending &&
-                          generateAudioMutation.variables?.textId === entry.id &&
-                          generateAudioMutation.variables?.language === audioLang
-                        }
-                        errorMessage={generateErrorById[entry.id]}
-                      />}
                     </div>
                   ) : (
                     <div className={cn("grid grid-cols-2 gap-3 px-3 py-2.5 rounded-md border", isAnswer ? "bg-amber-50/60" : "bg-card")}>
@@ -574,33 +445,16 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
                           <p className="text-sm leading-relaxed mt-0.5">{entry.text}</p>
                         </div>
                       </div>
-                      <div className="flex items-start gap-2">
-                        <div className="flex-1 min-w-0">
-                          <span className="text-[10px] text-muted-foreground">&nbsp;</span>
-                          <textarea
-                            value={translated ?? ""}
-                            onChange={(e) => updateEntry(entry.id, e.target.value)}
-                            placeholder={t`Pending...`}
-                            className="w-full text-sm leading-relaxed mt-0.5 resize-none rounded border border-transparent bg-transparent p-1.5 -ml-1.5 hover:border-border hover:bg-muted/30 focus:border-ring focus:bg-white focus:outline-none focus:ring-1 focus:ring-ring transition-colors placeholder:text-muted-foreground placeholder:italic"
-                            style={{ fieldSizing: "content" } as React.CSSProperties}
-                            rows={1}
-                          />
-                        </div>
-                        {!isAnswer && <AudioAction
-                          audio={audio}
-                          audioLang={audioLang}
-                          bookLabel={bookLabel}
-                          textId={entry.id}
-                          canGenerate={currentLanguageUsesGemini}
-                          hasGeminiKey={geminiKey.length > 0}
-                          onGenerate={handleGenerateAudio}
-                          isGenerating={
-                            generateAudioMutation.isPending &&
-                            generateAudioMutation.variables?.textId === entry.id &&
-                            generateAudioMutation.variables?.language === audioLang
-                          }
-                          errorMessage={generateErrorById[entry.id]}
-                        />}
+                      <div className="flex-1 min-w-0">
+                        <span className="text-[10px] text-muted-foreground">&nbsp;</span>
+                        <textarea
+                          value={translated ?? ""}
+                          onChange={(e) => updateEntry(entry.id, e.target.value)}
+                          placeholder={t`Pending...`}
+                          className="w-full text-sm leading-relaxed mt-0.5 resize-none rounded border border-transparent bg-transparent p-1.5 -ml-1.5 hover:border-border hover:bg-muted/30 focus:border-ring focus:bg-white focus:outline-none focus:ring-1 focus:ring-ring transition-colors placeholder:text-muted-foreground placeholder:italic"
+                          style={{ fieldSizing: "content" } as React.CSSProperties}
+                          rows={1}
+                        />
                       </div>
                     </div>
                   )}
@@ -616,111 +470,3 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
   )
 }
 
-function PlayButton({ audioUrl }: { audioUrl: string }) {
-  const [playing, setPlaying] = useState(false)
-  const audioRef = useRef<HTMLAudioElement | null>(null)
-
-  const toggle = () => {
-    if (!audioRef.current) {
-      audioRef.current = new Audio(audioUrl)
-      audioRef.current.addEventListener("ended", () => setPlaying(false))
-    }
-    if (playing) {
-      audioRef.current.pause()
-      audioRef.current.currentTime = 0
-      setPlaying(false)
-    } else {
-      audioRef.current.play()
-      setPlaying(true)
-    }
-  }
-
-  useEffect(() => {
-    return () => {
-      if (audioRef.current) {
-        audioRef.current.pause()
-        audioRef.current = null
-      }
-    }
-  }, [])
-
-  return (
-    <button
-      type="button"
-      onClick={toggle}
-      className={cn(
-        "shrink-0 flex items-center justify-center w-6 h-6 rounded-full transition-all mt-3 cursor-pointer",
-        playing ? "bg-pink-500 text-white hover:bg-pink-600 scale-110" : "bg-muted text-muted-foreground hover:bg-pink-100 hover:text-pink-600 hover:scale-110"
-      )}
-    >
-      {playing ? <Pause className="w-2.5 h-2.5" /> : <Play className="w-2.5 h-2.5 ml-0.5" />}
-    </button>
-  )
-}
-
-function AudioAction({
-  audio,
-  audioLang,
-  bookLabel,
-  textId,
-  canGenerate,
-  hasGeminiKey,
-  onGenerate,
-  isGenerating,
-  errorMessage,
-}: {
-  audio?: { fileName: string; voice: string }
-  audioLang: string | null
-  bookLabel: string
-  textId: string
-  canGenerate: boolean
-  hasGeminiKey: boolean
-  onGenerate: (textId: string) => void
-  isGenerating: boolean
-  errorMessage?: string
-}) {
-  const { t } = useLingui()
-
-  if (audio && audioLang) {
-    return (
-      <PlayButton
-        key={audioLang}
-        audioUrl={getAudioUrl(bookLabel, audioLang, audio.fileName)}
-      />
-    )
-  }
-
-  if (!canGenerate) {
-    return null
-  }
-
-  return (
-    <div className="flex flex-col items-end gap-1 shrink-0">
-      <Button
-        type="button"
-        variant="outline"
-        size="sm"
-        className="h-7 px-2 text-[10px]"
-        disabled={isGenerating || !hasGeminiKey}
-        onClick={() => onGenerate(textId)}
-        title={
-          hasGeminiKey
-            ? t`Generate missing Gemini audio`
-            : t`Set a Gemini API key to generate audio`
-        }
-      >
-        {isGenerating ? (
-          <Loader2 className="mr-1 h-3 w-3 animate-spin" />
-        ) : (
-          <WandSparkles className="mr-1 h-3 w-3" />
-        )}
-        {t`Generate`}
-      </Button>
-      {errorMessage && (
-        <p className="max-w-44 text-[10px] leading-tight text-red-500 text-right">
-          {errorMessage}
-        </p>
-      )}
-    </div>
-  )
-}
diff --git a/apps/studio/src/hooks/use-book-run.ts b/apps/studio/src/hooks/use-book-run.ts
index fd95a789..041b86d5 100644
--- a/apps/studio/src/hooks/use-book-run.ts
+++ b/apps/studio/src/hooks/use-book-run.ts
@@ -421,8 +421,10 @@ function invalidateStageData(qc: ReturnType<typeof useQueryClient>, label: strin
     case "glossary":
       qc.invalidateQueries({ queryKey: ["books", label, "glossary"] })
       break
-    case "text-and-speech":
+    case "translation":
       qc.invalidateQueries({ queryKey: ["books", label, "text-catalog"] })
+      break
+    case "speech":
       qc.invalidateQueries({ queryKey: ["books", label, "tts"] })
       break
   }
diff --git a/apps/studio/src/lib/languages.ts b/apps/studio/src/lib/languages.ts
index 6d88e8c3..e3b0b77a 100644
--- a/apps/studio/src/lib/languages.ts
+++ b/apps/studio/src/lib/languages.ts
@@ -9,25 +9,52 @@ export interface Language {
   countries?: Country[]
 }
 
+// Countries are only listed for languages with meaningful CLDR regional variants
+// (different orthography, vocabulary, or dialect). Languages spoken primarily in one
+// country or without significant written regional differences omit countries entirely —
+// the base language code (e.g. "dz", "fi") is sufficient.
 export const SUPPORTED_LANGUAGES: Language[] = [
+  { code: "af", name: "Afrikaans" },
+  { code: "sq", name: "Albanian" },
+  { code: "am", name: "Amharic" },
   { code: "ar", name: "Arabic", countries: [
     { code: "eg", name: "Egypt" },
     { code: "sa", name: "Saudi Arabia" },
     { code: "ma", name: "Morocco" },
+    { code: "ae", name: "United Arab Emirates" },
+    { code: "iq", name: "Iraq" },
+    { code: "jo", name: "Jordan" },
+    { code: "lb", name: "Lebanon" },
+    { code: "tn", name: "Tunisia" },
+    { code: "dz", name: "Algeria" },
+    { code: "sy", name: "Syria" },
   ]},
+  { code: "hy", name: "Armenian" },
+  { code: "az", name: "Azerbaijani" },
+  { code: "eu", name: "Basque" },
+  { code: "be", name: "Belarusian" },
   { code: "bn", name: "Bengali", countries: [
     { code: "bd", name: "Bangladesh" },
     { code: "in", name: "India" },
   ]},
+  { code: "bs", name: "Bosnian" },
+  { code: "bg", name: "Bulgarian" },
+  { code: "my", name: "Burmese" },
+  { code: "ca", name: "Catalan" },
   { code: "zh", name: "Chinese", countries: [
     { code: "cn", name: "China" },
     { code: "tw", name: "Taiwan" },
     { code: "hk", name: "Hong Kong" },
+    { code: "sg", name: "Singapore" },
   ]},
+  { code: "hr", name: "Croatian" },
+  { code: "cs", name: "Czech" },
+  { code: "da", name: "Danish" },
   { code: "nl", name: "Dutch", countries: [
     { code: "nl", name: "Netherlands" },
     { code: "be", name: "Belgium" },
   ]},
+  { code: "dz", name: "Dzongkha" },
   { code: "en", name: "English", countries: [
     { code: "us", name: "United States" },
     { code: "gb", name: "United Kingdom" },
@@ -35,55 +62,157 @@ export const SUPPORTED_LANGUAGES: Language[] = [
     { code: "ca", name: "Canada" },
     { code: "in", name: "India" },
     { code: "za", name: "South Africa" },
+    { code: "nz", name: "New Zealand" },
+    { code: "ie", name: "Ireland" },
+    { code: "sg", name: "Singapore" },
+    { code: "ph", name: "Philippines" },
+    { code: "ke", name: "Kenya" },
+    { code: "ng", name: "Nigeria" },
+    { code: "gh", name: "Ghana" },
   ]},
+  { code: "et", name: "Estonian" },
+  { code: "fi", name: "Finnish" },
   { code: "fr", name: "French", countries: [
     { code: "fr", name: "France" },
     { code: "ca", name: "Canada" },
     { code: "be", name: "Belgium" },
     { code: "ch", name: "Switzerland" },
+    { code: "sn", name: "Senegal" },
+    { code: "ci", name: "Côte d'Ivoire" },
+    { code: "cm", name: "Cameroon" },
+    { code: "cd", name: "Congo (DRC)" },
+    { code: "ht", name: "Haiti" },
+    { code: "mg", name: "Madagascar" },
   ]},
+  { code: "gl", name: "Galician" },
+  { code: "ka", name: "Georgian" },
   { code: "de", name: "German", countries: [
     { code: "de", name: "Germany" },
     { code: "at", name: "Austria" },
     { code: "ch", name: "Switzerland" },
   ]},
+  { code: "el", name: "Greek" },
+  { code: "gu", name: "Gujarati" },
+  { code: "ha", name: "Hausa", countries: [
+    { code: "ng", name: "Nigeria" },
+    { code: "ne", name: "Niger" },
+    { code: "gh", name: "Ghana" },
+  ]},
+  { code: "he", name: "Hebrew" },
   { code: "hi", name: "Hindi" },
+  { code: "hu", name: "Hungarian" },
+  { code: "is", name: "Icelandic" },
+  { code: "ig", name: "Igbo" },
   { code: "id", name: "Indonesian" },
-  { code: "it", name: "Italian" },
+  { code: "it", name: "Italian", countries: [
+    { code: "it", name: "Italy" },
+    { code: "ch", name: "Switzerland" },
+  ]},
   { code: "ja", name: "Japanese" },
+  { code: "jv", name: "Javanese" },
+  { code: "kn", name: "Kannada" },
+  { code: "kk", name: "Kazakh" },
+  { code: "km", name: "Khmer" },
+  { code: "rw", name: "Kinyarwanda" },
   { code: "ko", name: "Korean" },
+  { code: "ku", name: "Kurdish" },
+  { code: "lo", name: "Lao" },
+  { code: "lv", name: "Latvian" },
+  { code: "lt", name: "Lithuanian" },
+  { code: "mk", name: "Macedonian" },
+  { code: "ms", name: "Malay", countries: [
+    { code: "my", name: "Malaysia" },
+    { code: "sg", name: "Singapore" },
+    { code: "bn", name: "Brunei" },
+  ]},
+  { code: "ml", name: "Malayalam" },
+  { code: "mr", name: "Marathi" },
+  { code: "mn", name: "Mongolian" },
   { code: "ne", name: "Nepali", countries: [
     { code: "np", name: "Nepal" },
     { code: "in", name: "India" },
   ]},
+  { code: "no", name: "Norwegian" },
+  { code: "ps", name: "Pashto" },
+  { code: "fa", name: "Persian", countries: [
+    { code: "ir", name: "Iran" },
+    { code: "af", name: "Afghanistan" },
+  ]},
   { code: "pl", name: "Polish" },
   { code: "pt", name: "Portuguese", countries: [
     { code: "br", name: "Brazil" },
     { code: "pt", name: "Portugal" },
+    { code: "mz", name: "Mozambique" },
+    { code: "ao", name: "Angola" },
+  ]},
+  { code: "pa", name: "Punjabi", countries: [
+    { code: "in", name: "India" },
+    { code: "pk", name: "Pakistan" },
+  ]},
+  { code: "ro", name: "Romanian", countries: [
+    { code: "ro", name: "Romania" },
+    { code: "md", name: "Moldova" },
   ]},
   { code: "ru", name: "Russian" },
+  { code: "sr", name: "Serbian" },
   { code: "si", name: "Sinhala" },
+  { code: "sk", name: "Slovak" },
+  { code: "sl", name: "Slovenian" },
+  { code: "so", name: "Somali" },
   { code: "es", name: "Spanish", countries: [
     { code: "es", name: "Spain" },
     { code: "mx", name: "Mexico" },
     { code: "ar", name: "Argentina" },
     { code: "co", name: "Colombia" },
+    { code: "pe", name: "Peru" },
+    { code: "cl", name: "Chile" },
+    { code: "ve", name: "Venezuela" },
+    { code: "ec", name: "Ecuador" },
+    { code: "gt", name: "Guatemala" },
+    { code: "cu", name: "Cuba" },
+    { code: "do", name: "Dominican Republic" },
+    { code: "hn", name: "Honduras" },
+    { code: "uy", name: "Uruguay" },
+    { code: "py", name: "Paraguay" },
+    { code: "bo", name: "Bolivia" },
+    { code: "sv", name: "El Salvador" },
+    { code: "ni", name: "Nicaragua" },
+    { code: "cr", name: "Costa Rica" },
+    { code: "pa", name: "Panama" },
   ]},
   { code: "sw", name: "Swahili", countries: [
     { code: "ke", name: "Kenya" },
     { code: "tz", name: "Tanzania" },
+    { code: "ug", name: "Uganda" },
+    { code: "cd", name: "Congo (DRC)" },
+  ]},
+  { code: "sv", name: "Swedish", countries: [
+    { code: "se", name: "Sweden" },
+    { code: "fi", name: "Finland" },
   ]},
+  { code: "tl", name: "Tagalog" },
   { code: "ta", name: "Tamil", countries: [
     { code: "in", name: "India" },
     { code: "lk", name: "Sri Lanka" },
+    { code: "sg", name: "Singapore" },
+    { code: "my", name: "Malaysia" },
   ]},
+  { code: "te", name: "Telugu" },
   { code: "th", name: "Thai" },
+  { code: "ti", name: "Tigrinya", countries: [
+    { code: "er", name: "Eritrea" },
+    { code: "et", name: "Ethiopia" },
+  ]},
   { code: "tr", name: "Turkish" },
+  { code: "uk", name: "Ukrainian" },
   { code: "ur", name: "Urdu", countries: [
     { code: "pk", name: "Pakistan" },
     { code: "in", name: "India" },
   ]},
+  { code: "uz", name: "Uzbek" },
   { code: "vi", name: "Vietnamese" },
+  { code: "yo", name: "Yoruba" },
+  { code: "zu", name: "Zulu" },
 ]
 
 /** All countries — used as the full suggestion pool in phase 2 of the picker. */
@@ -91,6 +220,7 @@ export const ALL_COUNTRIES: Country[] = [
   { code: "af", name: "Afghanistan" },
   { code: "al", name: "Albania" },
   { code: "dz", name: "Algeria" },
+  { code: "ao", name: "Angola" },
   { code: "ar", name: "Argentina" },
   { code: "am", name: "Armenia" },
   { code: "au", name: "Australia" },
@@ -100,8 +230,10 @@ export const ALL_COUNTRIES: Country[] = [
   { code: "by", name: "Belarus" },
   { code: "be", name: "Belgium" },
   { code: "bj", name: "Benin" },
+  { code: "bt", name: "Bhutan" },
   { code: "bo", name: "Bolivia" },
   { code: "ba", name: "Bosnia and Herzegovina" },
+  { code: "bn", name: "Brunei" },
   { code: "bw", name: "Botswana" },
   { code: "br", name: "Brazil" },
   { code: "bg", name: "Bulgaria" },
@@ -121,6 +253,7 @@ export const ALL_COUNTRIES: Country[] = [
   { code: "ci", name: "Côte d'Ivoire" },
   { code: "hr", name: "Croatia" },
   { code: "cu", name: "Cuba" },
+  { code: "cy", name: "Cyprus" },
   { code: "cz", name: "Czech Republic" },
   { code: "dk", name: "Denmark" },
   { code: "do", name: "Dominican Republic" },
@@ -159,6 +292,7 @@ export const ALL_COUNTRIES: Country[] = [
   { code: "kp", name: "North Korea" },
   { code: "kr", name: "South Korea" },
   { code: "kw", name: "Kuwait" },
+  { code: "xk", name: "Kosovo" },
   { code: "kg", name: "Kyrgyzstan" },
   { code: "la", name: "Laos" },
   { code: "lv", name: "Latvia" },
diff --git a/apps/studio/src/lib/reviewer-validation-applicability.test.ts b/apps/studio/src/lib/reviewer-validation-applicability.test.ts
index 5d91bc76..5dcaef16 100644
--- a/apps/studio/src/lib/reviewer-validation-applicability.test.ts
+++ b/apps/studio/src/lib/reviewer-validation-applicability.test.ts
@@ -26,7 +26,8 @@ describe("resolveReviewerValidationCriterionStatus", () => {
         explicitStatus: "pass",
         glossaryAvailable: false,
         glossaryPending: false,
-        textAndSpeechStageDone: false,
+        speechStageDone: false,
+        translationStageDone: false,
         ttsAvailable: false,
         ttsPending: false,
         sessionLanguage: null,
@@ -46,7 +47,8 @@ describe("resolveReviewerValidationCriterionStatus", () => {
         sectionId: "visual-media-image-description",
         glossaryAvailable: true,
         glossaryPending: false,
-        textAndSpeechStageDone: true,
+        speechStageDone: true,
+        translationStageDone: true,
         ttsAvailable: true,
         ttsPending: false,
         sessionLanguage: "en",
@@ -70,7 +72,8 @@ describe("resolveReviewerValidationCriterionStatus", () => {
         sectionId: "interactivity",
         glossaryAvailable: true,
         glossaryPending: false,
-        textAndSpeechStageDone: true,
+        speechStageDone: true,
+        translationStageDone: true,
         ttsAvailable: true,
         ttsPending: false,
         sessionLanguage: "en",
@@ -94,7 +97,8 @@ describe("resolveReviewerValidationCriterionStatus", () => {
         sectionId: "sign-language",
         glossaryAvailable: true,
         glossaryPending: false,
-        textAndSpeechStageDone: true,
+        speechStageDone: true,
+        translationStageDone: true,
         ttsAvailable: true,
         ttsPending: false,
         sessionLanguage: "en",
@@ -118,7 +122,8 @@ describe("resolveReviewerValidationCriterionStatus", () => {
         sectionId: "translation",
         glossaryAvailable: true,
         glossaryPending: false,
-        textAndSpeechStageDone: true,
+        speechStageDone: true,
+        translationStageDone: true,
         ttsAvailable: true,
         ttsPending: false,
         sessionLanguage: null,
diff --git a/apps/studio/src/lib/reviewer-validation-applicability.ts b/apps/studio/src/lib/reviewer-validation-applicability.ts
index 9796be1f..bfb55e4c 100644
--- a/apps/studio/src/lib/reviewer-validation-applicability.ts
+++ b/apps/studio/src/lib/reviewer-validation-applicability.ts
@@ -12,7 +12,8 @@ export type ReviewerValidationApplicabilityContext = {
   explicitStatus?: ReviewerValidationStatus
   glossaryAvailable: boolean
   glossaryPending: boolean
-  textAndSpeechStageDone: boolean
+  speechStageDone: boolean
+  translationStageDone: boolean
   ttsAvailable: boolean
   ttsPending: boolean
   sessionLanguage: string | null
@@ -52,7 +53,8 @@ export function resolveReviewerValidationCriterionStatus({
   explicitStatus,
   glossaryAvailable,
   glossaryPending,
-  textAndSpeechStageDone,
+  speechStageDone,
+  translationStageDone,
   ttsAvailable,
   ttsPending,
   sessionLanguage,
@@ -117,11 +119,11 @@ export function resolveReviewerValidationCriterionStatus({
   }
 
   if (sectionId === "audio-voice-over") {
-    if (!textAndSpeechStageDone) {
+    if (!speechStageDone) {
       return {
         status: "not-applicable",
         isDerived: true,
-        reason: createReviewerValidationDefaultReason("text-and-speech-unavailable"),
+        reason: createReviewerValidationDefaultReason("speech-unavailable"),
       }
     }
     if (ttsPending) {
@@ -131,13 +133,13 @@ export function resolveReviewerValidationCriterionStatus({
       return {
         status: "not-applicable",
         isDerived: true,
-        reason: createReviewerValidationDefaultReason("text-and-speech-language-unavailable", { language: sessionLanguage }),
+        reason: createReviewerValidationDefaultReason("speech-language-unavailable", { language: sessionLanguage }),
       }
     }
   }
 
   if (sectionId === "translation") {
-    if (!textAndSpeechStageDone) {
+    if (!translationStageDone) {
       return {
         status: "not-applicable",
         isDerived: true,
diff --git a/apps/studio/src/lib/reviewer-validation-defaults.test.ts b/apps/studio/src/lib/reviewer-validation-defaults.test.ts
index 3c286205..b2c87c6f 100644
--- a/apps/studio/src/lib/reviewer-validation-defaults.test.ts
+++ b/apps/studio/src/lib/reviewer-validation-defaults.test.ts
@@ -33,7 +33,7 @@ describe("reviewer-validation-defaults", () => {
   it("supports language-aware default reasons", () => {
     expect(
       getReviewerValidationDefaultReasonMessage(
-        createReviewerValidationDefaultReason("text-and-speech-language-unavailable", { language: "sw" }),
+        createReviewerValidationDefaultReason("speech-language-unavailable", { language: "sw" }),
       ),
     ).toEqual({ id: "Defaulted to N/A because no Text & Speech audio is available for sw." })
     expect(
diff --git a/apps/studio/src/lib/reviewer-validation-defaults.ts b/apps/studio/src/lib/reviewer-validation-defaults.ts
index 92535f4f..53180c63 100644
--- a/apps/studio/src/lib/reviewer-validation-defaults.ts
+++ b/apps/studio/src/lib/reviewer-validation-defaults.ts
@@ -8,8 +8,8 @@ export type ReviewerValidationDefaultReasonKind =
   | "easy-read-unavailable"
   | "sign-language-unavailable"
   | "glossary-unavailable"
-  | "text-and-speech-unavailable"
-  | "text-and-speech-language-unavailable"
+  | "speech-unavailable"
+  | "speech-language-unavailable"
   | "translation-unavailable"
   | "translation-language-required"
   | "translation-language-unavailable"
@@ -43,9 +43,9 @@ export function getReviewerValidationDefaultReasonMessage(
       return msg`Defaulted to N/A because sign language is not enabled for this book.`
     case "glossary-unavailable":
       return msg`Defaulted to N/A because Glossary has not been generated for this book yet.`
-    case "text-and-speech-unavailable":
+    case "speech-unavailable":
       return msg`Defaulted to N/A because Text & Speech audio has not been generated yet.`
-    case "text-and-speech-language-unavailable":
+    case "speech-language-unavailable":
       return reason.language
         ? msg`Defaulted to N/A because no Text & Speech audio is available for ${reason.language}.`
         : msg`Defaulted to N/A because no Text & Speech audio is available yet.`
diff --git a/apps/studio/src/locales/en.po b/apps/studio/src/locales/en.po
index ea5b9f84..7dbaf7c1 100644
--- a/apps/studio/src/locales/en.po
+++ b/apps/studio/src/locales/en.po
@@ -76,6 +76,9 @@ msgstr "{0} {1} reviewed so far"
 msgid "{0} audio"
 msgstr "{0} audio"
 
+#~ msgid "{0} audio files generated"
+#~ msgstr "{0} audio files generated"
+
 #. placeholder {0}: activeProgress.criteriaPerPage
 msgid "{0} checks per page"
 msgstr "{0} checks per page"
@@ -84,6 +87,10 @@ msgstr "{0} checks per page"
 msgid "{0} entries"
 msgstr "{0} entries"
 
+#. placeholder {0}: String(totalAudioFiles)
+msgid "{0} files"
+msgstr "{0} files"
+
 #~ msgid "{0} findings"
 #~ msgstr "{0} findings"
 
@@ -119,6 +126,9 @@ msgstr "{0} languages"
 msgid "{0} of {1} assigned pages reviewed"
 msgstr "{0} of {1} assigned pages reviewed"
 
+#~ msgid "{0} of {1} audio files generated"
+#~ msgstr "{0} of {1} audio files generated"
+
 #. placeholder {0}: activeMetrics.pagesReviewed
 #. placeholder {1}: activeProgress.totalBookPages
 msgid "{0} of {1} book pages reviewed"
@@ -157,6 +167,11 @@ msgstr "{0} texts"
 #~ msgid "{0}. These checks need manual review."
 #~ msgstr "{0}. These checks need manual review."
 
+#. placeholder {0}: String(generatedAudioCount)
+#. placeholder {1}: String(displayEntries.length)
+msgid "{0}/{1}"
+msgstr "{0}/{1}"
+
 #. placeholder {0}: String(generatedAudioCount)
 #. placeholder {1}: String(displayEntries.length)
 msgid "{0}/{1} audio"
@@ -408,6 +423,9 @@ msgstr "Add reviewer guidance."
 msgid "Add section"
 msgstr "Add section"
 
+msgid "Add Translations"
+msgstr "Add Translations"
+
 msgid "Add your first book"
 msgstr "Add your first book"
 
@@ -457,6 +475,9 @@ msgstr "AI sees the current image and modifies it"
 msgid "AIza..."
 msgstr "AIza..."
 
+msgid "All"
+msgstr "All"
+
 msgid "All Categories"
 msgstr "All Categories"
 
@@ -615,6 +636,12 @@ msgstr "Book Author"
 msgid "Book data is outdated and must be rebuilt."
 msgstr "Book data is outdated and must be rebuilt."
 
+msgid "book language"
+msgstr "book language"
+
+msgid "Book Language"
+msgstr "Book Language"
+
 msgid "Book Metadata"
 msgstr "Book Metadata"
 
@@ -711,6 +738,9 @@ msgstr "Checklist item"
 msgid "Choose an existing image from this project"
 msgstr "Choose an existing image from this project"
 
+msgid "Choose Provider"
+msgstr "Choose Provider"
+
 msgid "Choose which per-page validation items reviewers must check, customize the wording and guidance, and add your own checklist items for this document."
 msgstr "Choose which per-page validation items reviewers must check, customize the wording and guidance, and add your own checklist items for this document."
 
@@ -726,6 +756,12 @@ msgstr "Clear all"
 msgid "Clear language"
 msgstr "Clear language"
 
+#~ msgid "Clear Speech"
+#~ msgstr "Clear Speech"
+
+msgid "Clear speech data"
+msgstr "Clear speech data"
+
 msgid "Click images to select"
 msgstr "Click images to select"
 
@@ -795,6 +831,9 @@ msgstr "Completion across the assigned page range for this reviewer session."
 msgid "Configure API keys for AI pipeline features."
 msgstr "Configure API keys for AI pipeline features."
 
+#~ msgid "Configure Voices"
+#~ msgstr "Configure Voices"
+
 msgid "Confirm merge"
 msgstr "Confirm merge"
 
@@ -1031,6 +1070,9 @@ msgstr "Discard"
 msgid "Display"
 msgstr "Display"
 
+msgid "done"
+msgstr "done"
+
 msgid "Drag to move boxes, drag edges/corners to resize"
 msgstr "Drag to move boxes, drag edges/corners to resize"
 
@@ -1345,14 +1387,26 @@ msgstr "Generate"
 msgid "Generate and customize the table of contents for the book navigation."
 msgstr "Generate and customize the table of contents for the book navigation."
 
+msgid "Generate audio"
+msgstr "Generate audio"
+
+#~ msgid "Generate audio narration for text entries."
+#~ msgstr "Generate audio narration for text entries."
+
+msgid "Generate audio narration for the book content."
+msgstr "Generate audio narration for the book content."
+
+#~ msgid "Generate audio narration for the translated book content."
+#~ msgstr "Generate audio narration for the translated book content."
+
 msgid "Generate comprehension quizzes and activities based on the book content."
 msgstr "Generate comprehension quizzes and activities based on the book content."
 
 msgid "Generate from pages"
 msgstr "Generate from pages"
 
-msgid "Generate missing Gemini audio"
-msgstr "Generate missing Gemini audio"
+#~ msgid "Generate missing Gemini audio"
+#~ msgstr "Generate missing Gemini audio"
 
 msgid "Generate new"
 msgstr "Generate new"
@@ -1378,8 +1432,11 @@ msgstr "Generating image..."
 msgid "Generating Quizzes..."
 msgstr "Generating Quizzes..."
 
-msgid "Generating Text & Speech..."
-msgstr "Generating Text & Speech..."
+msgid "Generating Speech..."
+msgstr "Generating Speech..."
+
+#~ msgid "Generating Text & Speech..."
+#~ msgstr "Generating Text & Speech..."
 
 msgid "Generating TOC..."
 msgstr "Generating TOC..."
@@ -1584,8 +1641,8 @@ msgstr "Languages"
 msgid "Last"
 msgstr "Last"
 
-msgid "Leave empty to output only in the book language."
-msgstr "Leave empty to output only in the book language."
+#~ msgid "Leave empty to output only in the book language."
+#~ msgstr "Leave empty to output only in the book language."
 
 msgid "Leave empty to process all pages."
 msgstr "Leave empty to process all pages."
@@ -1875,6 +1932,9 @@ msgstr "No accessibility findings were reported for this page."
 msgid "No assessment"
 msgstr "No assessment"
 
+msgid "No audio"
+msgstr "No audio"
+
 msgid "No captions for this page"
 msgstr "No captions for this page"
 
@@ -1959,6 +2019,9 @@ msgstr "No matching sections"
 msgid "No metadata yet — run the pipeline to extract book details"
 msgstr "No metadata yet — run the pipeline to extract book details"
 
+#~ msgid "No output languages configured"
+#~ msgstr "No output languages configured"
+
 msgid "No page linked"
 msgstr "No page linked"
 
@@ -1998,6 +2061,9 @@ msgstr "No sections on this page"
 msgid "No segmentation needed for this image"
 msgstr "No segmentation needed for this image"
 
+msgid "No speech entries for this page"
+msgstr "No speech entries for this page"
+
 msgid "No text extracted"
 msgstr "No text extracted"
 
@@ -2019,6 +2085,9 @@ msgstr "Nodes"
 msgid "None"
 msgstr "None"
 
+msgid "Not detected"
+msgstr "Not detected"
+
 msgid "Not reviewed"
 msgstr "Not reviewed"
 
@@ -2067,6 +2136,9 @@ msgstr "OpenAI API Key"
 msgid "OpenAI Voice"
 msgstr "OpenAI Voice"
 
+#~ msgid "optional"
+#~ msgstr "optional"
+
 msgid "Optional instructions for the LLM..."
 msgstr "Optional instructions for the LLM..."
 
@@ -2337,6 +2409,15 @@ msgstr "Refresh validation"
 msgid "Refreshing results for this packaged preview."
 msgstr "Refreshing results for this packaged preview."
 
+msgid "Regenerate"
+msgstr "Regenerate"
+
+msgid "Regenerate all speech"
+msgstr "Regenerate all speech"
+
+#~ msgid "Regenerate speech"
+#~ msgstr "Regenerate speech"
+
 msgid "Region"
 msgstr "Region"
 
@@ -2382,6 +2463,9 @@ msgstr "Require comment on failure"
 msgid "Require suggested modification"
 msgstr "Require suggested modification"
 
+msgid "Rerun translation"
+msgstr "Rerun translation"
+
 msgid "Reset to defaults"
 msgstr "Reset to defaults"
 
@@ -2416,6 +2500,12 @@ msgstr "Retry"
 msgid "Retry Export"
 msgstr "Retry Export"
 
+msgid "Retry Speech"
+msgstr "Retry Speech"
+
+msgid "Retry Translation"
+msgstr "Retry Translation"
+
 msgid "Review areas with the most items marked as needing changes in this session."
 msgstr "Review areas with the most items marked as needing changes in this session."
 
@@ -2461,12 +2551,24 @@ msgstr "Rewriting the story of this section..."
 msgid "Run {stageLabel}"
 msgstr "Run {stageLabel}"
 
+msgid "Run Speech"
+msgstr "Run Speech"
+
 msgid "Run the pipeline through at least the"
 msgstr "Run the pipeline through at least the"
 
 msgid "Run the pipeline through at least the <0>Storyboard</0> stage first."
 msgstr "Run the pipeline through at least the <0>Storyboard</0> stage first."
 
+msgid "Run Translation"
+msgstr "Run Translation"
+
+#~ msgid "Run Translation & Speech"
+#~ msgstr "Run Translation & Speech"
+
+#~ msgid "Run translation first to enable speech generation."
+#~ msgstr "Run translation first to enable speech generation."
+
 msgid "Run whole-book validation checks and configure accessibility assessment settings."
 msgstr "Run whole-book validation checks and configure accessibility assessment settings."
 
@@ -2500,14 +2602,20 @@ msgstr "Save & Rerun Glossary"
 msgid "Save & Rerun Quizzes"
 msgstr "Save & Rerun Quizzes"
 
+#~ msgid "Save & Rerun Speech Generation"
+#~ msgstr "Save & Rerun Speech Generation"
+
 msgid "Save & Rerun Storyboard"
 msgstr "Save & Rerun Storyboard"
 
 msgid "Save & Rerun TOC Generation"
 msgstr "Save & Rerun TOC Generation"
 
-msgid "Save & Rerun Translations + Audio"
-msgstr "Save & Rerun Translations + Audio"
+msgid "Save & Rerun Translations"
+msgstr "Save & Rerun Translations"
+
+#~ msgid "Save & Rerun Translations + Audio"
+#~ msgstr "Save & Rerun Translations + Audio"
 
 #~ msgid "Save changes before cloning"
 #~ msgstr "Save changes before cloning"
@@ -2712,8 +2820,14 @@ msgstr "Show all"
 msgid "Show all {0}"
 msgstr "Show all {0}"
 
-msgid "Show all text & speech"
-msgstr "Show all text & speech"
+msgid "Show all speech entries"
+msgstr "Show all speech entries"
+
+#~ msgid "Show all text & speech"
+#~ msgstr "Show all text & speech"
+
+msgid "Show all translations"
+msgstr "Show all translations"
 
 msgid "Show fewer"
 msgstr "Show fewer"
@@ -2748,6 +2862,9 @@ msgstr "Speech Generation"
 msgid "Speech Prompts"
 msgstr "Speech Prompts"
 
+#~ msgid "Speech Settings"
+#~ msgstr "Speech Settings"
+
 msgid "Spread Mode"
 msgstr "Spread Mode"
 
@@ -2968,6 +3085,9 @@ msgstr "This page has no captioned images"
 msgid "This page has no storyboard sections"
 msgstr "This page has no storyboard sections"
 
+msgid "This page has no text entries with audio"
+msgstr "This page has no text entries with audio"
+
 msgid "This page has no translatable text entries"
 msgstr "This page has no translatable text entries"
 
@@ -2998,6 +3118,9 @@ msgstr "This will save your settings and re-run image captioning."
 msgid "This will save your settings and re-run quiz generation."
 msgstr "This will save your settings and re-run quiz generation."
 
+#~ msgid "This will save your settings and re-run speech generation for all languages."
+#~ msgstr "This will save your settings and re-run speech generation for all languages."
+
 msgid "This will save your settings and re-run the extraction pipeline. Any manual edits to extracted text will be overwritten for affected pages."
 msgstr "This will save your settings and re-run the extraction pipeline. Any manual edits to extracted text will be overwritten for affected pages."
 
@@ -3007,8 +3130,11 @@ msgstr "This will save your settings and re-run the storyboard pipeline. Only re
 msgid "This will save your settings and re-run the storyboard pipeline. Sectioning and rendering will be regenerated for all pages."
 msgstr "This will save your settings and re-run the storyboard pipeline. Sectioning and rendering will be regenerated for all pages."
 
-msgid "This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech."
-msgstr "This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech."
+#~ msgid "This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech."
+#~ msgstr "This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech."
+
+msgid "This will save your settings and re-run translations, rebuilding the text catalog and translating to output languages."
+msgstr "This will save your settings and re-run translations, rebuilding the text catalog and translating to output languages."
 
 msgid "Time"
 msgstr "Time"
@@ -3037,6 +3163,12 @@ msgstr "Track reviewer progress and review findings captured from Preview."
 msgid "Translate the book content and generate audio narration."
 msgstr "Translate the book content and generate audio narration."
 
+#~ msgid "Translate the book content into target languages."
+#~ msgstr "Translate the book content into target languages."
+
+msgid "Translating..."
+msgstr "Translating..."
+
 msgid "Translation"
 msgstr "Translation"
 
@@ -3159,6 +3291,9 @@ msgstr "View HTML source"
 msgid "Visual & sensory cues"
 msgstr "Visual & sensory cues"
 
+msgid "Voice"
+msgstr "Voice"
+
 msgid "Voice Mappings"
 msgstr "Voice Mappings"
 
@@ -3201,6 +3336,9 @@ msgstr "When enabled, background colors from the styleguide are applied to the f
 msgid "Whole-book checks for packaged ADT output, plus reviewer findings captured from Preview."
 msgstr "Whole-book checks for packaged ADT output, plus reviewer findings captured from Preview."
 
+msgid "Word-level timecodes"
+msgstr "Word-level timecodes"
+
 msgid "Wraps 'Edit this image' requests. The AI receives the original image alongside this prompt. Supports user_prompt and style variables."
 msgstr "Wraps 'Edit this image' requests. The AI receives the original image alongside this prompt. Supports user_prompt and style variables."
 
diff --git a/apps/studio/src/locales/es.po b/apps/studio/src/locales/es.po
index c3a907a9..c171e5d5 100644
--- a/apps/studio/src/locales/es.po
+++ b/apps/studio/src/locales/es.po
@@ -76,6 +76,9 @@ msgstr "{0} {1} reviewed so far"
 msgid "{0} audio"
 msgstr "{0} audios"
 
+#~ msgid "{0} audio files generated"
+#~ msgstr "{0} archivos de audio generados"
+
 #. placeholder {0}: activeProgress.criteriaPerPage
 msgid "{0} checks per page"
 msgstr "{0} checks per page"
@@ -84,6 +87,10 @@ msgstr "{0} checks per page"
 msgid "{0} entries"
 msgstr "{0} entradas"
 
+#. placeholder {0}: String(totalAudioFiles)
+msgid "{0} files"
+msgstr "{0} archivos"
+
 #~ msgid "{0} findings"
 #~ msgstr "{0} findings"
 
@@ -119,6 +126,9 @@ msgstr "{0} idiomas"
 msgid "{0} of {1} assigned pages reviewed"
 msgstr "{0} of {1} assigned pages reviewed"
 
+#~ msgid "{0} of {1} audio files generated"
+#~ msgstr "{0} de {1} archivos de audio generados"
+
 #. placeholder {0}: activeMetrics.pagesReviewed
 #. placeholder {1}: activeProgress.totalBookPages
 msgid "{0} of {1} book pages reviewed"
@@ -157,6 +167,11 @@ msgstr "{0} textos"
 #~ msgid "{0}. These checks need manual review."
 #~ msgstr "{0}. Estas verificaciones necesitan revisión manual."
 
+#. placeholder {0}: String(generatedAudioCount)
+#. placeholder {1}: String(displayEntries.length)
+msgid "{0}/{1}"
+msgstr "{0}/{1}"
+
 #. placeholder {0}: String(generatedAudioCount)
 #. placeholder {1}: String(displayEntries.length)
 msgid "{0}/{1} audio"
@@ -408,6 +423,9 @@ msgstr "Add reviewer guidance."
 msgid "Add section"
 msgstr "Add section"
 
+msgid "Add Translations"
+msgstr "Agregar traducciones"
+
 msgid "Add your first book"
 msgstr "Agrega tu primer libro"
 
@@ -457,6 +475,9 @@ msgstr "La IA ve la imagen actual y la modifica"
 msgid "AIza..."
 msgstr "AIza..."
 
+msgid "All"
+msgstr "Todos"
+
 msgid "All Categories"
 msgstr "All Categories"
 
@@ -615,6 +636,12 @@ msgstr "Autor del libro"
 msgid "Book data is outdated and must be rebuilt."
 msgstr "Los datos del libro están desactualizados y deben reconstruirse."
 
+msgid "book language"
+msgstr "idioma del libro"
+
+msgid "Book Language"
+msgstr "Idioma del libro"
+
 msgid "Book Metadata"
 msgstr "Metadatos del libro"
 
@@ -711,6 +738,9 @@ msgstr "Checklist item"
 msgid "Choose an existing image from this project"
 msgstr "Elige una imagen existente de este proyecto"
 
+msgid "Choose Provider"
+msgstr "Elegir proveedor"
+
 msgid "Choose which per-page validation items reviewers must check, customize the wording and guidance, and add your own checklist items for this document."
 msgstr "Choose which per-page validation items reviewers must check, customize the wording and guidance, and add your own checklist items for this document."
 
@@ -726,6 +756,12 @@ msgstr "Clear all"
 msgid "Clear language"
 msgstr "Borrar idioma"
 
+#~ msgid "Clear Speech"
+#~ msgstr "Borrar voz"
+
+msgid "Clear speech data"
+msgstr "Borrar datos de voz"
+
 msgid "Click images to select"
 msgstr "Haz clic en las imágenes para seleccionar"
 
@@ -795,6 +831,9 @@ msgstr "Completion across the assigned page range for this reviewer session."
 msgid "Configure API keys for AI pipeline features."
 msgstr "Configura las claves de API para las funciones de IA."
 
+#~ msgid "Configure Voices"
+#~ msgstr "Configurar voces"
+
 msgid "Confirm merge"
 msgstr "Confirmar fusión"
 
@@ -1031,6 +1070,9 @@ msgstr "Descartar"
 msgid "Display"
 msgstr "Visualización"
 
+msgid "done"
+msgstr "listo"
+
 msgid "Drag to move boxes, drag edges/corners to resize"
 msgstr "Drag to move boxes, drag edges/corners to resize"
 
@@ -1345,14 +1387,26 @@ msgstr "Generar"
 msgid "Generate and customize the table of contents for the book navigation."
 msgstr "Genera y personaliza la tabla de contenidos para la navegación del libro."
 
+msgid "Generate audio"
+msgstr "Generar audio"
+
+#~ msgid "Generate audio narration for text entries."
+#~ msgstr "Generar narración de audio para las entradas de texto."
+
+msgid "Generate audio narration for the book content."
+msgstr "Genera narración de audio para el contenido del libro."
+
+#~ msgid "Generate audio narration for the translated book content."
+#~ msgstr "Generar narración de audio para el contenido traducido del libro."
+
 msgid "Generate comprehension quizzes and activities based on the book content."
 msgstr "Genera cuestionarios y actividades de comprensión basados en el contenido del libro."
 
 msgid "Generate from pages"
 msgstr "Generar desde páginas"
 
-msgid "Generate missing Gemini audio"
-msgstr "Generar audio de Gemini faltante"
+#~ msgid "Generate missing Gemini audio"
+#~ msgstr "Generar audio de Gemini faltante"
 
 msgid "Generate new"
 msgstr "Generar nueva"
@@ -1378,8 +1432,11 @@ msgstr "Generando imagen..."
 msgid "Generating Quizzes..."
 msgstr "Generando cuestionarios..."
 
-msgid "Generating Text & Speech..."
-msgstr "Generando texto y voz..."
+msgid "Generating Speech..."
+msgstr "Generando voz..."
+
+#~ msgid "Generating Text & Speech..."
+#~ msgstr "Generando texto y voz..."
 
 msgid "Generating TOC..."
 msgstr "Generando tabla de contenidos..."
@@ -1584,8 +1641,8 @@ msgstr "Idiomas"
 msgid "Last"
 msgstr "Última"
 
-msgid "Leave empty to output only in the book language."
-msgstr "Dejar vacío para generar solo en el idioma del libro."
+#~ msgid "Leave empty to output only in the book language."
+#~ msgstr "Dejar vacío para generar solo en el idioma del libro."
 
 msgid "Leave empty to process all pages."
 msgstr "Deja vacío para procesar todas las páginas."
@@ -1875,6 +1932,9 @@ msgstr "No accessibility findings were reported for this page."
 msgid "No assessment"
 msgstr "No assessment"
 
+msgid "No audio"
+msgstr "Sin audio"
+
 msgid "No captions for this page"
 msgstr "Sin leyendas para esta página"
 
@@ -1959,6 +2019,9 @@ msgstr "No hay secciones coincidentes"
 msgid "No metadata yet — run the pipeline to extract book details"
 msgstr "Aún no hay metadatos — ejecuta el pipeline para extraer los detalles del libro"
 
+#~ msgid "No output languages configured"
+#~ msgstr "No hay idiomas de salida configurados"
+
 msgid "No page linked"
 msgstr "No hay página vinculada"
 
@@ -1998,6 +2061,9 @@ msgstr "Sin secciones en esta página"
 msgid "No segmentation needed for this image"
 msgstr "No segmentation needed for this image"
 
+msgid "No speech entries for this page"
+msgstr "No hay entradas de voz para esta página"
+
 msgid "No text extracted"
 msgstr "No se extrajo texto"
 
@@ -2019,6 +2085,9 @@ msgstr "Nodes"
 msgid "None"
 msgstr "Ninguno"
 
+msgid "Not detected"
+msgstr "No detectado"
+
 msgid "Not reviewed"
 msgstr "Not reviewed"
 
@@ -2067,6 +2136,9 @@ msgstr "Clave de API de OpenAI"
 msgid "OpenAI Voice"
 msgstr "Voz OpenAI"
 
+#~ msgid "optional"
+#~ msgstr "opcional"
+
 msgid "Optional instructions for the LLM..."
 msgstr "Instrucciones opcionales para el LLM..."
 
@@ -2337,6 +2409,15 @@ msgstr "Refresh validation"
 msgid "Refreshing results for this packaged preview."
 msgstr "Refreshing results for this packaged preview."
 
+msgid "Regenerate"
+msgstr "Regenerar"
+
+msgid "Regenerate all speech"
+msgstr "Regenerar todo el audio"
+
+#~ msgid "Regenerate speech"
+#~ msgstr "Regenerar audio"
+
 msgid "Region"
 msgstr "Región"
 
@@ -2382,6 +2463,9 @@ msgstr "Require comment on failure"
 msgid "Require suggested modification"
 msgstr "Require suggested modification"
 
+msgid "Rerun translation"
+msgstr "Reejecutar traducción"
+
 msgid "Reset to defaults"
 msgstr "Reset to defaults"
 
@@ -2416,6 +2500,12 @@ msgstr "Reintentar"
 msgid "Retry Export"
 msgstr "Reintentar exportación"
 
+msgid "Retry Speech"
+msgstr "Reintentar voz"
+
+msgid "Retry Translation"
+msgstr "Reintentar traducción"
+
 msgid "Review areas with the most items marked as needing changes in this session."
 msgstr "Review areas with the most items marked as needing changes in this session."
 
@@ -2461,12 +2551,24 @@ msgstr "Reescribiendo la historia de esta sección..."
 msgid "Run {stageLabel}"
 msgstr "Ejecutar {0}"
 
+msgid "Run Speech"
+msgstr "Ejecutar voz"
+
 msgid "Run the pipeline through at least the"
 msgstr "Ejecuta el pipeline al menos hasta la etapa"
 
 msgid "Run the pipeline through at least the <0>Storyboard</0> stage first."
 msgstr "Run the pipeline through at least the <0>Storyboard</0> stage first."
 
+msgid "Run Translation"
+msgstr "Ejecutar traducción"
+
+#~ msgid "Run Translation & Speech"
+#~ msgstr "Ejecutar traducción y audio"
+
+#~ msgid "Run translation first to enable speech generation."
+#~ msgstr "Ejecuta la traducción primero para habilitar la generación de voz."
+
 msgid "Run whole-book validation checks and configure accessibility assessment settings."
 msgstr "Run whole-book validation checks and configure accessibility assessment settings."
 
@@ -2500,14 +2602,20 @@ msgstr "Guardar y reejecutar glosario"
 msgid "Save & Rerun Quizzes"
 msgstr "Guardar y reejecutar quizzes"
 
+#~ msgid "Save & Rerun Speech Generation"
+#~ msgstr "Guardar y regenerar voz"
+
 msgid "Save & Rerun Storyboard"
 msgstr "Guardar y reejecutar storyboard"
 
 msgid "Save & Rerun TOC Generation"
 msgstr "Guardar y volver a ejecutar la generación de la tabla de contenidos"
 
-msgid "Save & Rerun Translations + Audio"
-msgstr "Guardar y reejecutar traducciones + audio"
+msgid "Save & Rerun Translations"
+msgstr "Guardar y retraducir"
+
+#~ msgid "Save & Rerun Translations + Audio"
+#~ msgstr "Guardar y reejecutar traducciones + audio"
 
 #~ msgid "Save changes before cloning"
 #~ msgstr "Guarda los cambios antes de clonar"
@@ -2712,8 +2820,14 @@ msgstr "Mostrar todas"
 msgid "Show all {0}"
 msgstr "Show all {0}"
 
-msgid "Show all text & speech"
-msgstr "Mostrar todos los textos y audios"
+msgid "Show all speech entries"
+msgstr "Mostrar todas las entradas de voz"
+
+#~ msgid "Show all text & speech"
+#~ msgstr "Mostrar todos los textos y audios"
+
+msgid "Show all translations"
+msgstr "Mostrar todas las traducciones"
 
 msgid "Show fewer"
 msgstr "Show fewer"
@@ -2748,6 +2862,9 @@ msgstr "Generación de voz"
 msgid "Speech Prompts"
 msgstr "Indicaciones de voz"
 
+#~ msgid "Speech Settings"
+#~ msgstr "Configuración de voz"
+
 msgid "Spread Mode"
 msgstr "Modo de páginas dobles"
 
@@ -2968,6 +3085,9 @@ msgstr "Esta página no tiene imágenes con leyendas"
 msgid "This page has no storyboard sections"
 msgstr "Esta página no tiene secciones de storyboard"
 
+msgid "This page has no text entries with audio"
+msgstr "Esta página no tiene entradas de texto con audio"
+
 msgid "This page has no translatable text entries"
 msgstr "Esta página no tiene entradas de texto traducibles"
 
@@ -2998,6 +3118,9 @@ msgstr "Esto guardará tus ajustes y reejecutará la generación de leyendas de
 msgid "This will save your settings and re-run quiz generation."
 msgstr "Esto guardará tus ajustes y reejecutará la generación de quizzes."
 
+#~ msgid "This will save your settings and re-run speech generation for all languages."
+#~ msgstr "Esto guardará la configuración y regenerará la voz para todos los idiomas."
+
 msgid "This will save your settings and re-run the extraction pipeline. Any manual edits to extracted text will be overwritten for affected pages."
 msgstr "Esto guardará tus ajustes y reejecutará el pipeline de extracción. Las ediciones manuales al texto extraído se sobrescribirán en las páginas afectadas."
 
@@ -3007,8 +3130,11 @@ msgstr "Esto guardará tus ajustes y reejecutará el pipeline del storyboard. So
 msgid "This will save your settings and re-run the storyboard pipeline. Sectioning and rendering will be regenerated for all pages."
 msgstr "Esto guardará tus ajustes y reejecutará el pipeline del storyboard. El seccionamiento y la renderización se regenerarán para todas las páginas."
 
-msgid "This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech."
-msgstr "Esto guardará tus ajustes y reejecutará traducciones y generación de audio, reconstruyendo el catálogo de textos, traduciendo a los idiomas de salida y generando voz."
+#~ msgid "This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech."
+#~ msgstr "Esto guardará tus ajustes y reejecutará traducciones y generación de audio, reconstruyendo el catálogo de textos, traduciendo a los idiomas de salida y generando voz."
+
+msgid "This will save your settings and re-run translations, rebuilding the text catalog and translating to output languages."
+msgstr "Esto guardará la configuración y retraducirá, reconstruyendo el catálogo de texto y traduciendo a los idiomas de salida."
 
 msgid "Time"
 msgstr "Hora"
@@ -3037,6 +3163,12 @@ msgstr "Track reviewer progress and review findings captured from Preview."
 msgid "Translate the book content and generate audio narration."
 msgstr "Traduce el contenido del libro y genera narración de audio."
 
+#~ msgid "Translate the book content into target languages."
+#~ msgstr "Traducir el contenido del libro a los idiomas de destino."
+
+msgid "Translating..."
+msgstr "Traduciendo..."
+
 msgid "Translation"
 msgstr "Traducción"
 
@@ -3159,6 +3291,9 @@ msgstr "Ver código HTML"
 msgid "Visual & sensory cues"
 msgstr "Visual & sensory cues"
 
+msgid "Voice"
+msgstr "Voz"
+
 msgid "Voice Mappings"
 msgstr "Asignaciones de voz"
 
@@ -3203,6 +3338,9 @@ msgstr "Cuando está activado, los colores de fondo de la guía de estilo se apl
 msgid "Whole-book checks for packaged ADT output, plus reviewer findings captured from Preview."
 msgstr "Whole-book checks for packaged ADT output, plus reviewer findings captured from Preview."
 
+msgid "Word-level timecodes"
+msgstr "Códigos de tiempo por palabra"
+
 msgid "Wraps 'Edit this image' requests. The AI receives the original image alongside this prompt. Supports user_prompt and style variables."
 msgstr "Envuelve solicitudes de 'Editar esta imagen'. La IA recibe la imagen original junto con este prompt. Soporta variables user_prompt y style."
 
diff --git a/apps/studio/src/locales/pt-BR.po b/apps/studio/src/locales/pt-BR.po
index f286e984..713e016e 100644
--- a/apps/studio/src/locales/pt-BR.po
+++ b/apps/studio/src/locales/pt-BR.po
@@ -76,6 +76,9 @@ msgstr "{0} {1} reviewed so far"
 msgid "{0} audio"
 msgstr "{0} áudios"
 
+#~ msgid "{0} audio files generated"
+#~ msgstr "{0} arquivos de áudio gerados"
+
 #. placeholder {0}: activeProgress.criteriaPerPage
 msgid "{0} checks per page"
 msgstr "{0} checks per page"
@@ -84,6 +87,10 @@ msgstr "{0} checks per page"
 msgid "{0} entries"
 msgstr "{0} entradas"
 
+#. placeholder {0}: String(totalAudioFiles)
+msgid "{0} files"
+msgstr "{0} arquivos"
+
 #~ msgid "{0} findings"
 #~ msgstr "{0} findings"
 
@@ -119,6 +126,9 @@ msgstr "{0} idiomas"
 msgid "{0} of {1} assigned pages reviewed"
 msgstr "{0} of {1} assigned pages reviewed"
 
+#~ msgid "{0} of {1} audio files generated"
+#~ msgstr "{0} de {1} arquivos de áudio gerados"
+
 #. placeholder {0}: activeMetrics.pagesReviewed
 #. placeholder {1}: activeProgress.totalBookPages
 msgid "{0} of {1} book pages reviewed"
@@ -157,6 +167,11 @@ msgstr "{0} textos"
 #~ msgid "{0}. These checks need manual review."
 #~ msgstr "{0}. Essas verificações precisam de revisão manual."
 
+#. placeholder {0}: String(generatedAudioCount)
+#. placeholder {1}: String(displayEntries.length)
+msgid "{0}/{1}"
+msgstr "{0}/{1}"
+
 #. placeholder {0}: String(generatedAudioCount)
 #. placeholder {1}: String(displayEntries.length)
 msgid "{0}/{1} audio"
@@ -408,6 +423,9 @@ msgstr "Add reviewer guidance."
 msgid "Add section"
 msgstr "Add section"
 
+msgid "Add Translations"
+msgstr "Adicionar traduções"
+
 msgid "Add your first book"
 msgstr "Adicione seu primeiro livro"
 
@@ -457,6 +475,9 @@ msgstr "A IA vê a imagem atual e a modifica"
 msgid "AIza..."
 msgstr "AIza..."
 
+msgid "All"
+msgstr "Todos"
+
 msgid "All Categories"
 msgstr "All Categories"
 
@@ -615,6 +636,12 @@ msgstr "Autor"
 msgid "Book data is outdated and must be rebuilt."
 msgstr "Os dados do livro estão desatualizados e precisam ser reconstruídos."
 
+msgid "book language"
+msgstr "idioma do livro"
+
+msgid "Book Language"
+msgstr "Idioma do livro"
+
 msgid "Book Metadata"
 msgstr "Metadados do livro"
 
@@ -711,6 +738,9 @@ msgstr "Checklist item"
 msgid "Choose an existing image from this project"
 msgstr "Escolha uma imagem existente deste projeto"
 
+msgid "Choose Provider"
+msgstr "Escolher provedor"
+
 msgid "Choose which per-page validation items reviewers must check, customize the wording and guidance, and add your own checklist items for this document."
 msgstr "Choose which per-page validation items reviewers must check, customize the wording and guidance, and add your own checklist items for this document."
 
@@ -726,6 +756,12 @@ msgstr "Clear all"
 msgid "Clear language"
 msgstr "Limpar idioma"
 
+#~ msgid "Clear Speech"
+#~ msgstr "Limpar voz"
+
+msgid "Clear speech data"
+msgstr "Limpar dados de voz"
+
 msgid "Click images to select"
 msgstr "Clique nas imagens para selecionar"
 
@@ -795,6 +831,9 @@ msgstr "Completion across the assigned page range for this reviewer session."
 msgid "Configure API keys for AI pipeline features."
 msgstr "Configure as chaves de API para os recursos de IA."
 
+#~ msgid "Configure Voices"
+#~ msgstr "Configurar vozes"
+
 msgid "Confirm merge"
 msgstr "Confirmar mesclagem"
 
@@ -1031,6 +1070,9 @@ msgstr "Descartar"
 msgid "Display"
 msgstr "Exibição"
 
+msgid "done"
+msgstr "concluído"
+
 msgid "Drag to move boxes, drag edges/corners to resize"
 msgstr "Drag to move boxes, drag edges/corners to resize"
 
@@ -1345,14 +1387,26 @@ msgstr "Gerar"
 msgid "Generate and customize the table of contents for the book navigation."
 msgstr "Gere e personalize o sumário para a navegação do livro."
 
+msgid "Generate audio"
+msgstr "Gerar áudio"
+
+#~ msgid "Generate audio narration for text entries."
+#~ msgstr "Gerar narração de áudio para as entradas de texto."
+
+msgid "Generate audio narration for the book content."
+msgstr "Gerar narração de áudio para o conteúdo do livro."
+
+#~ msgid "Generate audio narration for the translated book content."
+#~ msgstr "Gerar narração em áudio para o conteúdo traduzido do livro."
+
 msgid "Generate comprehension quizzes and activities based on the book content."
 msgstr "Gera questionários e atividades de compreensão com base no conteúdo do livro."
 
 msgid "Generate from pages"
 msgstr "Gerar a partir das páginas"
 
-msgid "Generate missing Gemini audio"
-msgstr "Gerar áudio faltante do Gemini"
+#~ msgid "Generate missing Gemini audio"
+#~ msgstr "Gerar áudio faltante do Gemini"
 
 msgid "Generate new"
 msgstr "Gerar nova"
@@ -1378,8 +1432,11 @@ msgstr "Gerando imagem..."
 msgid "Generating Quizzes..."
 msgstr "Gerando questionários..."
 
-msgid "Generating Text & Speech..."
-msgstr "Gerando texto e fala..."
+msgid "Generating Speech..."
+msgstr "Gerando voz..."
+
+#~ msgid "Generating Text & Speech..."
+#~ msgstr "Gerando texto e fala..."
 
 msgid "Generating TOC..."
 msgstr "Gerando sumário..."
@@ -1584,8 +1641,8 @@ msgstr "Idiomas"
 msgid "Last"
 msgstr "Última"
 
-msgid "Leave empty to output only in the book language."
-msgstr "Deixe vazio para gerar apenas no idioma do livro."
+#~ msgid "Leave empty to output only in the book language."
+#~ msgstr "Deixe vazio para gerar apenas no idioma do livro."
 
 msgid "Leave empty to process all pages."
 msgstr "Deixe vazio para processar todas as páginas."
@@ -1875,6 +1932,9 @@ msgstr "No accessibility findings were reported for this page."
 msgid "No assessment"
 msgstr "No assessment"
 
+msgid "No audio"
+msgstr "Sem áudio"
+
 msgid "No captions for this page"
 msgstr "Sem legendas para esta página"
 
@@ -1959,6 +2019,9 @@ msgstr "Nenhuma seção correspondente"
 msgid "No metadata yet — run the pipeline to extract book details"
 msgstr "Ainda não há metadados — execute o pipeline para extrair os detalhes do livro"
 
+#~ msgid "No output languages configured"
+#~ msgstr "Nenhum idioma de saída configurado"
+
 msgid "No page linked"
 msgstr "Nenhuma página vinculada"
 
@@ -1998,6 +2061,9 @@ msgstr "Sem seções nesta página"
 msgid "No segmentation needed for this image"
 msgstr "No segmentation needed for this image"
 
+msgid "No speech entries for this page"
+msgstr "Nenhuma entrada de voz para esta página"
+
 msgid "No text extracted"
 msgstr "Nenhum texto extraído"
 
@@ -2019,6 +2085,9 @@ msgstr "Nodes"
 msgid "None"
 msgstr "Nenhum"
 
+msgid "Not detected"
+msgstr "Não detectado"
+
 msgid "Not reviewed"
 msgstr "Not reviewed"
 
@@ -2067,6 +2136,9 @@ msgstr "Chave de API da OpenAI"
 msgid "OpenAI Voice"
 msgstr "Voz OpenAI"
 
+#~ msgid "optional"
+#~ msgstr "opcional"
+
 msgid "Optional instructions for the LLM..."
 msgstr "Instruções opcionais para o LLM..."
 
@@ -2337,6 +2409,15 @@ msgstr "Refresh validation"
 msgid "Refreshing results for this packaged preview."
 msgstr "Refreshing results for this packaged preview."
 
+msgid "Regenerate"
+msgstr "Regenerar"
+
+msgid "Regenerate all speech"
+msgstr "Regenerar todo o áudio"
+
+#~ msgid "Regenerate speech"
+#~ msgstr "Regenerar áudio"
+
 msgid "Region"
 msgstr "Região"
 
@@ -2382,6 +2463,9 @@ msgstr "Require comment on failure"
 msgid "Require suggested modification"
 msgstr "Require suggested modification"
 
+msgid "Rerun translation"
+msgstr "Reexecutar tradução"
+
 msgid "Reset to defaults"
 msgstr "Reset to defaults"
 
@@ -2416,6 +2500,12 @@ msgstr "Tentar novamente"
 msgid "Retry Export"
 msgstr "Tentar exportar novamente"
 
+msgid "Retry Speech"
+msgstr "Tentar voz novamente"
+
+msgid "Retry Translation"
+msgstr "Tentar tradução novamente"
+
 msgid "Review areas with the most items marked as needing changes in this session."
 msgstr "Review areas with the most items marked as needing changes in this session."
 
@@ -2461,12 +2551,24 @@ msgstr "Reescrevendo a história desta seção..."
 msgid "Run {stageLabel}"
 msgstr "Executar {0}"
 
+msgid "Run Speech"
+msgstr "Executar voz"
+
 msgid "Run the pipeline through at least the"
 msgstr "Execute o pipeline pelo menos até a etapa"
 
 msgid "Run the pipeline through at least the <0>Storyboard</0> stage first."
 msgstr "Run the pipeline through at least the <0>Storyboard</0> stage first."
 
+msgid "Run Translation"
+msgstr "Executar tradução"
+
+#~ msgid "Run Translation & Speech"
+#~ msgstr "Executar tradução e áudio"
+
+#~ msgid "Run translation first to enable speech generation."
+#~ msgstr "Execute a tradução primeiro para habilitar a geração de voz."
+
 msgid "Run whole-book validation checks and configure accessibility assessment settings."
 msgstr "Run whole-book validation checks and configure accessibility assessment settings."
 
@@ -2500,14 +2602,20 @@ msgstr "Salvar e reexecutar glossário"
 msgid "Save & Rerun Quizzes"
 msgstr "Salvar e reexecutar quizzes"
 
+#~ msgid "Save & Rerun Speech Generation"
+#~ msgstr "Salvar e regerar voz"
+
 msgid "Save & Rerun Storyboard"
 msgstr "Salvar e reexecutar storyboard"
 
 msgid "Save & Rerun TOC Generation"
 msgstr "Salvar e reexecutar geração do sumário"
 
-msgid "Save & Rerun Translations + Audio"
-msgstr "Salvar e reexecutar traduções + áudio"
+msgid "Save & Rerun Translations"
+msgstr "Salvar e retraduzir"
+
+#~ msgid "Save & Rerun Translations + Audio"
+#~ msgstr "Salvar e reexecutar traduções + áudio"
 
 #~ msgid "Save changes before cloning"
 #~ msgstr "Salve as alterações antes de clonar"
@@ -2712,8 +2820,14 @@ msgstr "Mostrar todas"
 msgid "Show all {0}"
 msgstr "Show all {0}"
 
-msgid "Show all text & speech"
-msgstr "Mostrar todos os textos e áudios"
+msgid "Show all speech entries"
+msgstr "Mostrar todas as entradas de voz"
+
+#~ msgid "Show all text & speech"
+#~ msgstr "Mostrar todos os textos e áudios"
+
+msgid "Show all translations"
+msgstr "Mostrar todas as traduções"
 
 msgid "Show fewer"
 msgstr "Show fewer"
@@ -2748,6 +2862,9 @@ msgstr "Geração de fala"
 msgid "Speech Prompts"
 msgstr "Prompts de fala"
 
+#~ msgid "Speech Settings"
+#~ msgstr "Configurações de voz"
+
 msgid "Spread Mode"
 msgstr "Modo de páginas duplas"
 
@@ -2968,6 +3085,9 @@ msgstr "Esta página não tem imagens legendadas"
 msgid "This page has no storyboard sections"
 msgstr "Esta página não tem seções de storyboard"
 
+msgid "This page has no text entries with audio"
+msgstr "Esta página não tem entradas de texto com áudio"
+
 msgid "This page has no translatable text entries"
 msgstr "Esta página não tem entradas de texto traduzíveis"
 
@@ -2998,6 +3118,9 @@ msgstr "Isso salvará suas configurações e reexecutará a legendagem de imagen
 msgid "This will save your settings and re-run quiz generation."
 msgstr "Isso salvará suas configurações e reexecutará a geração de quizzes."
 
+#~ msgid "This will save your settings and re-run speech generation for all languages."
+#~ msgstr "Isso salvará suas configurações e regerará a voz para todos os idiomas."
+
 msgid "This will save your settings and re-run the extraction pipeline. Any manual edits to extracted text will be overwritten for affected pages."
 msgstr "Isso salvará suas configurações e reexecutará o pipeline de extração. Edições manuais no texto extraído serão sobrescritas nas páginas afetadas."
 
@@ -3007,8 +3130,11 @@ msgstr "Isso salvará suas configurações e reexecutará o pipeline do storyboa
 msgid "This will save your settings and re-run the storyboard pipeline. Sectioning and rendering will be regenerated for all pages."
 msgstr "Isso salvará suas configurações e reexecutará o pipeline do storyboard. O secionamento e a renderização serão regenerados para todas as páginas."
 
-msgid "This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech."
-msgstr "Isso salvará suas configurações e reexecutará traduções e geração de áudio, reconstruindo o catálogo de textos, traduzindo para os idiomas de saída e gerando fala."
+#~ msgid "This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech."
+#~ msgstr "Isso salvará suas configurações e reexecutará traduções e geração de áudio, reconstruindo o catálogo de textos, traduzindo para os idiomas de saída e gerando fala."
+
+msgid "This will save your settings and re-run translations, rebuilding the text catalog and translating to output languages."
+msgstr "Isso salvará suas configurações e retraduzirá, reconstruindo o catálogo de texto e traduzindo para os idiomas de saída."
 
 msgid "Time"
 msgstr "Hora"
@@ -3037,6 +3163,12 @@ msgstr "Track reviewer progress and review findings captured from Preview."
 msgid "Translate the book content and generate audio narration."
 msgstr "Traduza o conteúdo do livro e gera narração em áudio."
 
+#~ msgid "Translate the book content into target languages."
+#~ msgstr "Traduzir o conteúdo do livro para os idiomas de destino."
+
+msgid "Translating..."
+msgstr "Traduzindo..."
+
 msgid "Translation"
 msgstr "Tradução"
 
@@ -3159,6 +3291,9 @@ msgstr "Ver código HTML"
 msgid "Visual & sensory cues"
 msgstr "Visual & sensory cues"
 
+msgid "Voice"
+msgstr "Voz"
+
 msgid "Voice Mappings"
 msgstr "Mapeamentos de voz"
 
@@ -3203,6 +3338,9 @@ msgstr "Quando ativado, as cores de fundo do guia de estilo são aplicadas ao co
 msgid "Whole-book checks for packaged ADT output, plus reviewer findings captured from Preview."
 msgstr "Whole-book checks for packaged ADT output, plus reviewer findings captured from Preview."
 
+msgid "Word-level timecodes"
+msgstr "Códigos de tempo por palavra"
+
 msgid "Wraps 'Edit this image' requests. The AI receives the original image alongside this prompt. Supports user_prompt and style variables."
 msgstr "Envolve solicitações de 'Editar esta imagem'. A IA recebe a imagem original junto com este prompt. Suporta variáveis user_prompt e style."
 
diff --git a/apps/studio/src/routes/books.$label.$step.settings.tsx b/apps/studio/src/routes/books.$label.$step.settings.tsx
index b77b9818..674d6b28 100644
--- a/apps/studio/src/routes/books.$label.$step.settings.tsx
+++ b/apps/studio/src/routes/books.$label.$step.settings.tsx
@@ -9,6 +9,7 @@ import { GlossarySettings } from "@/components/pipeline/stages/glossary/Glossary
 import { TocSettings } from "@/components/pipeline/stages/toc/TocSettings"
 import { CaptionsSettings } from "@/components/pipeline/stages/captions/CaptionsSettings"
 import { TranslationsSettings } from "@/components/pipeline/stages/translations/TranslationsSettings"
+import { SpeechSettings } from "@/components/pipeline/stages/speech/SpeechSettings"
 import { ValidationSettings } from "@/components/pipeline/stages/ValidationSettings"
 import { getStageLabelI18n } from "@/components/pipeline/pipeline-i18n"
 import { cn } from "@/lib/utils"
@@ -89,8 +90,13 @@ export function StepSettingsPage() {
               return <TocSettings bookLabel={label} headerTarget={headerTarget} />
             case "captions":
               return <CaptionsSettings bookLabel={label} headerTarget={headerTarget} tab={tab} />
-            case "text-and-speech":
+            case "translation": {
+              const speechTabs: Record<string, string> = { speech: "general", "speech-prompts": "speech-prompts", voices: "voices" }
+              if (tab in speechTabs) {
+                return <SpeechSettings bookLabel={label} headerTarget={headerTarget} tab={speechTabs[tab]} />
+              }
               return <TranslationsSettings bookLabel={label} headerTarget={headerTarget} tab={tab} />
+            }
             case "validation":
               return <ValidationSettings bookLabel={label} headerTarget={headerTarget} tab={tab} />
             default:
diff --git a/apps/studio/src/routes/index.tsx b/apps/studio/src/routes/index.tsx
index fb731d3b..f80d22a0 100644
--- a/apps/studio/src/routes/index.tsx
+++ b/apps/studio/src/routes/index.tsx
@@ -32,8 +32,8 @@ export const Route = createFileRoute("/")({
   component: HomePage,
 })
 
-/** Pipeline stages shown in the sidebar (skip the "book" overview entry) */
-const PIPELINE_STEPS = getPipelineStages()
+/** Pipeline stages shown in the sidebar — skip "book" overview and "speech" (combined into "Text & Speech") */
+const PIPELINE_STEPS = getPipelineStages().filter((s) => s.slug !== "speech")
 
 function DetailRow({
   icon: Icon,
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 7cba6c0e..598312ab 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -120,12 +120,14 @@ captions  ───────────────────────
 glossary  ─────────────────────────────────────────────────────────┘ (after: storyboard)
   (all three run in parallel)
                             │
-text-and-speech ────────────┘  (after: quizzes, captions, glossary)
+translation ────────────────┘  (after: quizzes, captions, glossary)
   ├── text-catalog
-  ├── catalog-translation    (after: text-catalog)
-  └── tts                    (after: catalog-translation)
+  └── catalog-translation    (after: text-catalog)
                             │
-package ────────────────────┘  (after: text-and-speech)
+speech ─────────────────────┘  (after: translation)
+  └── tts
+                            │
+package ────────────────────┘  (after: speech)
   └── package-web
 ```
 
diff --git a/packages/pipeline/src/__tests__/dag.test.ts b/packages/pipeline/src/__tests__/dag.test.ts
index a5cb17b9..8aa059e1 100644
--- a/packages/pipeline/src/__tests__/dag.test.ts
+++ b/packages/pipeline/src/__tests__/dag.test.ts
@@ -337,8 +337,10 @@ describe("runPipelineDAG", () => {
     expect(completed).toContain("image-captioning")
     expect(completed).toContain("glossary")
 
-    // text-and-speech depends on all three → skipped because quizzes failed
-    expect(result.stages.statuses.get("text-and-speech")).toBe("skipped")
+    // translation depends on quizzes, captions, and glossary → skipped because quizzes failed
+    // speech depends on translation → also skipped
+    expect(result.stages.statuses.get("translation")).toBe("skipped")
+    expect(result.stages.statuses.get("speech")).toBe("skipped")
     expect(result.stages.statuses.get("package")).toBe("skipped")
   })
 
diff --git a/packages/types/src/__tests__/pipeline-effects.test.ts b/packages/types/src/__tests__/pipeline-effects.test.ts
index 08c16136..91383801 100644
--- a/packages/types/src/__tests__/pipeline-effects.test.ts
+++ b/packages/types/src/__tests__/pipeline-effects.test.ts
@@ -12,8 +12,8 @@ describe("pipeline effects", () => {
       "quiz-generation",
       "text-catalog",
       "catalog-translation",
-      "tts",
       "text-catalog-translation",
+      "tts",
       "package-web",
       "accessibility-assessment",
     ])
@@ -29,9 +29,15 @@ describe("pipeline effects", () => {
     ])
   })
 
-  it("derives stage-output cache resources from produced nodes", () => {
-    expect(getCacheResourcesForStageOutput("text-and-speech")).toEqual([
+  it("derives stage-output cache resources for translation stage", () => {
+    expect(getCacheResourcesForStageOutput("translation")).toEqual([
       "text-catalog",
+      "step-status",
+    ])
+  })
+
+  it("derives stage-output cache resources for speech stage", () => {
+    expect(getCacheResourcesForStageOutput("speech")).toEqual([
       "tts",
       "step-status",
     ])
diff --git a/packages/types/src/pipeline-effects.ts b/packages/types/src/pipeline-effects.ts
index fe6cf31f..185c8b75 100644
--- a/packages/types/src/pipeline-effects.ts
+++ b/packages/types/src/pipeline-effects.ts
@@ -22,7 +22,7 @@ export type PipelineCacheResource =
   | "debug"
 
 const EXTRA_STAGE_OUTPUT_NODES: Partial<Record<StageName, readonly PipelineNodeName[]>> = {
-  "text-and-speech": ["text-catalog-translation"],
+  "translation": ["text-catalog-translation"],
 }
 
 /** All node_data node names written by each stage. */
@@ -33,7 +33,8 @@ export const STAGE_OUTPUT_NODES: Record<StageName, readonly PipelineNodeName[]>
   "captions": [],
   "glossary": [],
   "toc": [],
-  "text-and-speech": [],
+  "translation": [],
+  "speech": [],
   "package": [],
 }
 
@@ -44,7 +45,8 @@ const STAGE_DIRECT_DEPENDENTS: Record<StageName, StageName[]> = {
   "captions": [],
   "glossary": [],
   "toc": [],
-  "text-and-speech": [],
+  "translation": [],
+  "speech": [],
   "package": [],
 }
 
diff --git a/packages/types/src/pipeline.ts b/packages/types/src/pipeline.ts
index e57f39a3..c55b1f43 100644
--- a/packages/types/src/pipeline.ts
+++ b/packages/types/src/pipeline.ts
@@ -35,7 +35,8 @@ export const StageName = z.enum([
   "captions",
   "glossary",
   "toc",
-  "text-and-speech",
+  "translation",
+  "speech",
   "package",
 ])
 export type StageName = z.infer<typeof StageName>
@@ -116,19 +117,26 @@ export const PIPELINE: StageDef[] = [
     ],
   },
   {
-    name: "text-and-speech",
-    label: "Text & Speech",
+    name: "translation",
+    label: "Translation",
     dependsOn: ["quizzes", "captions", "glossary", "toc"],
     steps: [
       { name: "text-catalog", label: "Text Catalog" },
       { name: "catalog-translation", label: "Catalog Translation", dependsOn: ["text-catalog"] },
-      { name: "tts", label: "Speech Generation", dependsOn: ["catalog-translation"] },
+    ],
+  },
+  {
+    name: "speech",
+    label: "Speech",
+    dependsOn: ["translation"],
+    steps: [
+      { name: "tts", label: "Speech Generation" },
     ],
   },
   {
     name: "package",
     label: "Package",
-    dependsOn: ["text-and-speech"],
+    dependsOn: ["speech"],
     steps: [
       { name: "package-web", label: "Web Package" },
       {