diff --git a/apps/api/src/routes/books.test.ts b/apps/api/src/routes/books.test.ts index 32105f90..b6bc070e 100644 --- a/apps/api/src/routes/books.test.ts +++ b/apps/api/src/routes/books.test.ts @@ -529,8 +529,8 @@ describe("POST /books/:label/stages/run", () => { "X-Gemini-API-Key": "gm-test", }, body: JSON.stringify({ - fromStage: "text-and-speech", - toStage: "text-and-speech", + fromStage: "translation", + toStage: "speech", }), }) diff --git a/apps/api/src/routes/pages.test.ts b/apps/api/src/routes/pages.test.ts index 740bfdd6..42a3f395 100644 --- a/apps/api/src/routes/pages.test.ts +++ b/apps/api/src/routes/pages.test.ts @@ -667,7 +667,7 @@ describe("Page routes", () => { } } - /** Assert that all caption + text-and-speech node data and step_runs were cleared. */ + /** Assert that all caption + translation + speech node data and step_runs were cleared. */ function expectAllDownstreamCleared(dir: string, bookLabel: string) { const s = createBookStorage(bookLabel, dir) try { @@ -687,7 +687,7 @@ describe("Page routes", () => { } } - /** Assert that text-and-speech (but NOT image-captioning) node data and step_runs were cleared. */ + /** Assert that translation + speech (but NOT image-captioning) node data and step_runs were cleared. */ function expectTextAndSpeechCleared(dir: string, bookLabel: string) { const s = createBookStorage(bookLabel, dir) try { @@ -709,7 +709,7 @@ describe("Page routes", () => { } describe("PUT /api/books/:label/pages/:pageId/sectioning clears downstream", () => { - it("clears caption + text-and-speech data on sectioning save", async () => { + it("clears caption + translation + speech data on sectioning save", async () => { seedDownstreamData(tmpDir, label) const data = { @@ -746,7 +746,7 @@ describe("Page routes", () => { }) describe("PUT /api/books/:label/pages/:pageId/rendering clears downstream", () => { - it("clears caption + text-and-speech data on rendering save", async () => { + it("clears caption + translation + speech data on rendering save", async () => { seedDownstreamData(tmpDir, label) const data = { @@ -773,7 +773,7 @@ describe("Page routes", () => { }) describe("POST clone clears downstream", () => { - it("clears caption + text-and-speech data on section clone", async () => { + it("clears caption + translation + speech data on section clone", async () => { seedDownstreamData(tmpDir, label) const res = await app.request( @@ -787,7 +787,7 @@ describe("Page routes", () => { }) describe("POST delete clears downstream", () => { - it("clears caption + text-and-speech data on section delete", async () => { + it("clears caption + translation + speech data on section delete", async () => { // Need at least 2 sections so delete is valid const s = createBookStorage(label, tmpDir) try { @@ -837,7 +837,7 @@ describe("Page routes", () => { }) describe("POST crop (images) clears downstream", () => { - it("clears caption + text-and-speech data on image crop", async () => { + it("clears caption + translation + speech data on image crop", async () => { seedDownstreamData(tmpDir, label) // Minimal valid PNG (1x1 pixel) @@ -865,7 +865,7 @@ describe("Page routes", () => { }) }) - describe("PUT image-captioning clears text-and-speech downstream", () => { + describe("PUT image-captioning clears translation + speech downstream", () => { it("clears text-catalog/translations/TTS but keeps image-captioning", async () => { seedDownstreamData(tmpDir, label) diff --git a/apps/api/src/routes/pages.ts b/apps/api/src/routes/pages.ts index b6b76bf3..1e29cae4 100644 --- a/apps/api/src/routes/pages.ts +++ b/apps/api/src/routes/pages.ts @@ -326,7 +326,7 @@ async function executeAiImageGeneration(params: AiImageGenParams): Promise<{ } } -/** Clear caption + downstream text-and-speech data when images change. */ +/** Clear caption + downstream translation + speech data when images change. */ function clearCaptionData(storage: Storage): void { storage.clearNodesByType(["image-captioning", "text-catalog", "text-catalog-translation", "tts"]) storage.clearStepRuns(["image-captioning", "text-catalog", "catalog-translation", "tts"]) diff --git a/apps/api/src/routes/stages.ts b/apps/api/src/routes/stages.ts index e2bdc40e..f6fc2d9b 100644 --- a/apps/api/src/routes/stages.ts +++ b/apps/api/src/routes/stages.ts @@ -135,6 +135,43 @@ export function createStageRoutes( return c.json({ status: result.status, label, fromStage, toStage }) }) + // DELETE /books/:label/stages/:stageName — Clear a stage's data and step runs + app.delete("/books/:label/stages/:stageName", (c) => { + const { label, stageName } = c.req.param() + + let safeLabel: string + try { + safeLabel = parseBookLabel(label) + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + throw new HTTPException(400, { message }) + } + + const parsed = StageName.safeParse(stageName) + if (!parsed.success) { + throw new HTTPException(400, { message: `Invalid stage name: ${stageName}` }) + } + + const stage = parsed.data + const storage = createBookStorage(safeLabel, booksDir) + try { + const nodes = getStageClearNodes(stage) + if (nodes.length > 0) { + storage.clearNodesByType(nodes) + } + const stagesToClear = getStageClearOrder(stage) + const stepsToClear = PIPELINE + .filter((s) => stagesToClear.includes(s.name)) + .flatMap((s) => s.steps.map((step) => step.name)) + storage.clearStepRuns(stepsToClear) + } finally { + storage.close() + } + + console.log(`[stages] ${label}: cleared stage ${stage}`) + return c.json({ ok: true, stage }) + }) + // GET /books/:label/step-status — Unified stage + step status // DB step_runs is the single source of truth for step/stage state. // Only "queued" comes from the in-memory run queue. diff --git a/apps/api/src/services/stage-runner.test.ts b/apps/api/src/services/stage-runner.test.ts index f54b0a9a..36a660e4 100644 --- a/apps/api/src/services/stage-runner.test.ts +++ b/apps/api/src/services/stage-runner.test.ts @@ -177,6 +177,12 @@ function seedTextAndSpeechBook(booksDir: string, label: string): void { }, ], }) + + // Pre-seed text catalog so the speech stage can read it + storage.putNodeData("text-catalog", "book", { + entries: [{ id: "pg001_t001", text: "Hello world" }], + generatedAt: new Date().toISOString(), + }) } finally { storage.close() } @@ -351,7 +357,7 @@ describe("createStageRunner storyboard render-only", () => { }) }) -describe("createStageRunner text-and-speech Gemini partial failures", () => { +describe("createStageRunner speech Gemini partial failures", () => { let tmpDir = "" beforeEach(() => { @@ -402,8 +408,8 @@ speech: geminiApiKey: "gm-test", promptsDir, configPath, - fromStage: "text-and-speech", - toStage: "text-and-speech", + fromStage: "speech", + toStage: "speech", }, { emit: (event) => events.push(event) } ) @@ -472,8 +478,8 @@ speech: geminiApiKey: "gm-test", promptsDir, configPath, - fromStage: "text-and-speech", - toStage: "text-and-speech", + fromStage: "speech", + toStage: "speech", }, { emit: (event) => events.push(event) } ) diff --git a/apps/api/src/services/stage-runner.ts b/apps/api/src/services/stage-runner.ts index 431485b8..38f515bb 100644 --- a/apps/api/src/services/stage-runner.ts +++ b/apps/api/src/services/stage-runner.ts @@ -169,7 +169,8 @@ const STAGE_RUNNERS: Record = { "captions": runCaptionsStep, "glossary": runGlossaryStep, "toc": runTocStep, - "text-and-speech": runTextAndSpeechStep, + "translation": runTranslationStep, + "speech": runSpeechStep, "package": async () => { /* packaging handled separately */ }, } @@ -1350,7 +1351,7 @@ async function runTocStep( // Text & Speech stage (text catalog + catalog translation + TTS) // --------------------------------------------------------------------------- -async function runTextAndSpeechStep( +async function runTranslationStep( label: string, options: StageRunOptions, progress: StageRunProgress @@ -1365,15 +1366,11 @@ async function runTextAndSpeechStep( try { const config = loadBookConfig(label, booksDir, configPath) const cacheDir = path.join(path.resolve(booksDir), label, ".cache") - const bookDir = path.join(path.resolve(booksDir), label) const bookPromptsDir = path.join(path.resolve(booksDir), label, "prompts") const promptEngine = createPromptEngine([bookPromptsDir, promptsDir]) const rateLimiter = config.rate_limit ? createRateLimiter(config.rate_limit.requests_per_minute) : undefined - const configDir = configPath - ? path.join(path.dirname(configPath), "config") - : path.resolve(process.cwd(), "config") // Get book language from metadata const metadataRow = storage.getLatestNodeData("metadata", "book") @@ -1514,8 +1511,61 @@ async function runTextAndSpeechStep( progress.emit({ type: "step-complete", step: "catalog-translation" }) console.log(`[stage-run] ${label}: catalog translation complete`) } + } finally { + storage.close() + if (previousKey !== undefined) { + process.env.OPENAI_API_KEY = previousKey + } else { + delete process.env.OPENAI_API_KEY + } + } +} + +async function runSpeechStep( + label: string, + options: StageRunOptions, + progress: StageRunProgress +): Promise { + const { booksDir, apiKey, configPath } = options + + const previousKey = process.env.OPENAI_API_KEY + process.env.OPENAI_API_KEY = apiKey + + const storage = createBookStorage(label, booksDir) + + try { + const config = loadBookConfig(label, booksDir, configPath) + const cacheDir = path.join(path.resolve(booksDir), label, ".cache") + const bookDir = path.join(path.resolve(booksDir), label) + const configDir = configPath + ? path.join(path.dirname(configPath), "config") + : path.resolve(process.cwd(), "config") + + // Get book language from metadata + const metadataRow = storage.getLatestNodeData("metadata", "book") + const metadata = metadataRow?.data as { language_code?: string | null } | null + const language = normalizeLocale(config.editing_language ?? metadata?.language_code ?? "en") + + const effectiveConcurrency = config.concurrency ?? 32 + + // Output languages default to editing language if not set + const outputLanguages = Array.from( + new Set( + (config.output_languages && config.output_languages.length > 0 + ? config.output_languages + : [language]).map((code) => normalizeLocale(code)) + ) + ) + + // Load text catalog from storage (produced by the translation stage) + const catalogRow = storage.getLatestNodeData("text-catalog", "book") + if (!catalogRow) { + progress.emit({ type: "step-skip", step: "tts" }) + console.log(`[stage-run] ${label}: TTS skipped (no text catalog)`) + return + } + const catalog = catalogRow.data as TextCatalogOutput - // ── Step 3: Generate TTS ──────────────────────────────────────── if (catalog.entries.length === 0) { progress.emit({ type: "step-skip", step: "tts" }) console.log(`[stage-run] ${label}: TTS skipped (empty catalog)`) @@ -1805,18 +1855,18 @@ async function runTextAndSpeechStep( } if (geminiFailedItems.length > 0) { - const summary = `${geminiFailedItems.length} Gemini TTS item(s) failed. Missing Gemini audio can be generated one by one from the Text & Speech view.` + const summary = `${geminiFailedItems.length} Gemini TTS item(s) failed. Missing Gemini audio can be generated one by one from the Speech view.` progress.emit({ type: "step-error", step: "tts", error: summary, }) - console.log(`[stage-run] ${label}: text & speech completed with Gemini TTS gaps`) + console.log(`[stage-run] ${label}: speech completed with Gemini TTS gaps`) return } progress.emit({ type: "step-complete", step: "tts" }) - console.log(`[stage-run] ${label}: text & speech complete`) + console.log(`[stage-run] ${label}: speech complete`) } finally { storage.close() if (previousKey !== undefined) { diff --git a/apps/studio/src/api/client.ts b/apps/studio/src/api/client.ts index 6bc16659..7427ceb5 100644 --- a/apps/studio/src/api/client.ts +++ b/apps/studio/src/api/client.ts @@ -475,6 +475,9 @@ export const api = { deleteBook: (label: string) => request<{ ok: boolean }>(`/books/${label}`, { method: "DELETE" }), + clearStage: (label: string, stageName: string) => + request<{ ok: boolean; stage: string }>(`/books/${label}/stages/${stageName}`, { method: "DELETE" }), + runStages: ( label: string, apiKey: string, diff --git a/apps/studio/src/components/LanguagePicker.tsx b/apps/studio/src/components/LanguagePicker.tsx index 81823a3d..9ea7ff00 100644 --- a/apps/studio/src/components/LanguagePicker.tsx +++ b/apps/studio/src/components/LanguagePicker.tsx @@ -25,12 +25,14 @@ export function LanguagePicker({ multiple, label, hint, + bookLanguage, }: { selected: string | Set onSelect: (code: string) => void multiple?: boolean label: string hint?: string + bookLanguage?: string | null }) { const { t } = useLingui() const [search, setSearch] = useState("") @@ -66,7 +68,8 @@ export function LanguagePicker({ // Build dropdown items based on phase const items: DropdownItem[] = useMemo(() => { if (lockedLang) { - // Phase 2: show base language first, then suggested countries, then all others + // Phase 2: show base language first, then suggested countries. + // Other countries only appear when the user is searching. const q = search.toLowerCase() const { suggested, all } = getCountriesForLanguage(lockedLang.code) const result: DropdownItem[] = [ @@ -83,7 +86,10 @@ export function LanguagePicker({ } } for (const c of suggested) addCountry(c) - for (const c of all) addCountry(c) + // Only search all countries when the user has typed something + if (q) { + for (const c of all) addCountry(c) + } return result } // Phase 1: show languages @@ -145,12 +151,16 @@ export function LanguagePicker({ const lockLanguage = useCallback( (lang: Language) => { - // Always enter phase 2 — any language can be paired with any country + // Only enter phase 2 if the language has defined regional variants + if (!lang.countries || lang.countries.length === 0) { + commit(lang.code) + return + } setLockedLang(lang) setSearch("") setHighlighted(0) }, - [] + [commit] ) const clearSelection = useCallback( @@ -257,22 +267,28 @@ export function LanguagePicker({ {/* Selected badges for multi-select */} {multiple && selectedSet && selectedSet.size > 0 && (
- {Array.from(selectedSet).map((code) => ( - - {getDisplayName(code) || code} - - - ))} + {getDisplayName(code) || code} + {isBookLang && ( + ({t`book language`}) + )} + + + ) + })}
)} diff --git a/apps/studio/src/components/pipeline/components/StageRunCard.tsx b/apps/studio/src/components/pipeline/components/StageRunCard.tsx index cc433fcb..16fba7aa 100644 --- a/apps/studio/src/components/pipeline/components/StageRunCard.tsx +++ b/apps/studio/src/components/pipeline/components/StageRunCard.tsx @@ -30,6 +30,10 @@ export const STAGE_SUB_STEPS: Record = Object.fromEnt interface StageRunCardProps { stageSlug: string + /** Additional pipeline stage slugs whose sub-steps should be shown in this card. */ + additionalStageSlugs?: string[] + /** Override the internal error check (useful when combining multiple stages). */ + overrideHasError?: boolean isRunning: boolean completed?: boolean showRunButton?: boolean @@ -50,6 +54,8 @@ const HOVER_BG_BY_COLOR: Record = { export function StageRunCard({ stageSlug, + additionalStageSlugs, + overrideHasError, isRunning, completed, showRunButton = true, @@ -60,11 +66,14 @@ export function StageRunCard({ const stage = STAGES.find((s) => s.slug === stageSlug) ?? STAGES[0] const { stageState, stepState, stepProgress, stepError, error } = useBookRun() const stageStatus = stageState(stageSlug) - const subSteps = STAGE_SUB_STEPS[stageSlug as StageName] ?? [] + const subSteps = [ + ...(STAGE_SUB_STEPS[stageSlug as StageName] ?? []), + ...(additionalStageSlugs?.flatMap((s) => STAGE_SUB_STEPS[s as StageName] ?? []) ?? []), + ] const Icon = stage.icon const color = stage.color const borderColor = stage.borderDark - const hasError = stageStatus === "error" + const hasError = overrideHasError ?? stageStatus === "error" const isCompleted = completed ?? (stageStatus === "done") const hasSubSteps = subSteps.length > 0 const hoverColorClass = HOVER_BG_BY_COLOR[color] ?? "hover:bg-gray-600" diff --git a/apps/studio/src/components/pipeline/components/StageSidebar.tsx b/apps/studio/src/components/pipeline/components/StageSidebar.tsx index ffdaa573..5dc68b32 100644 --- a/apps/studio/src/components/pipeline/components/StageSidebar.tsx +++ b/apps/studio/src/components/pipeline/components/StageSidebar.tsx @@ -96,7 +96,7 @@ function getSettingsTabs( captions: [ { key: "general", label: i18n._(SETTINGS_TAB_MESSAGE["caption-prompt"]) }, ], - "text-and-speech": [ + translation: [ { key: "general", label: i18n._(SETTINGS_TAB_MESSAGE.languages) }, { key: "prompt", label: i18n._(SETTINGS_TAB_MESSAGE["translation-prompt"]) }, { key: "speech", label: i18n._(SETTINGS_TAB_MESSAGE.speech) }, @@ -159,7 +159,8 @@ export function StageSidebar({ const storyboardDone = stageState("storyboard") === "done" const validationCompleted = Boolean(accessibilityAssessment?.assessment) - const stageItems = STAGES.map((step, index) => { + const visibleStages = STAGES.filter((s) => s.slug !== "speech") + const stageItems = visibleStages.map((step, index) => { const isActive = step.slug === activeStep const Icon = step.icon const settingsTabs = getSettingsTabs(step.slug, i18n) @@ -182,7 +183,7 @@ export function StageSidebar({ return (
{/* Connector line */} - {index < STAGES.length - 1 && ( + {index < visibleStages.length - 1 && (
)} diff --git a/apps/studio/src/components/pipeline/components/StepViewRouter.tsx b/apps/studio/src/components/pipeline/components/StepViewRouter.tsx index f9168d90..2f548b18 100644 --- a/apps/studio/src/components/pipeline/components/StepViewRouter.tsx +++ b/apps/studio/src/components/pipeline/components/StepViewRouter.tsx @@ -9,7 +9,8 @@ import { CaptionsView, GlossaryView, TocView, - TranslationsView, + TranslationStageView, + SpeechView, PreviewView, ValidationView, ExportView, @@ -53,7 +54,8 @@ const VIEW_MAP: Record = { captions: { component: CaptionsView }, glossary: { component: GlossaryView }, toc: { component: TocView }, - "text-and-speech": { component: TranslationsView, fullHeight: true }, + translation: { component: TranslationStageView, fullHeight: true }, + speech: { component: SpeechView, fullHeight: true }, validation: { component: ValidationView, fullHeight: true }, preview: { component: PreviewView, fullHeight: true }, export: { component: ExportView, fullHeight: true }, diff --git a/apps/studio/src/components/pipeline/pipeline-i18n.ts b/apps/studio/src/components/pipeline/pipeline-i18n.ts index df5c2847..be778479 100644 --- a/apps/studio/src/components/pipeline/pipeline-i18n.ts +++ b/apps/studio/src/components/pipeline/pipeline-i18n.ts @@ -14,7 +14,8 @@ export const STAGE_LABEL_MESSAGES: Record = { captions: msg`Captions`, glossary: msg`Glossary`, toc: msg`Table of Contents`, - "text-and-speech": msg`Text & Speech`, + translation: msg`Text & Speech`, + speech: msg`Speech`, validation: msg`Validation`, preview: msg`Preview`, export: msg`Export`, @@ -28,7 +29,8 @@ export const STAGE_RUNNING_LABEL_MESSAGES: Record = { captions: msg`Captioning Images...`, glossary: msg`Generating Glossary...`, toc: msg`Generating TOC...`, - "text-and-speech": msg`Generating Text & Speech...`, + translation: msg`Translating...`, + speech: msg`Generating Speech...`, validation: msg`Running Validation...`, preview: msg`Building Preview...`, export: msg`Exporting...`, @@ -41,7 +43,8 @@ export const STAGE_DESCRIPTION_MESSAGES: Record = { captions: msg`Create descriptive captions for images to improve accessibility.`, glossary: msg`Build a glossary of key terms and definitions found in the text.`, toc: msg`Generate and customize the table of contents for the book navigation.`, - "text-and-speech": msg`Translate the book content and generate audio narration.`, + translation: msg`Translate the book content and generate audio narration.`, + speech: msg`Generate audio narration for the book content.`, validation: msg`Run whole-book validation checks and configure accessibility assessment settings.`, preview: msg`Package and preview the final ADT web application.`, } diff --git a/apps/studio/src/components/pipeline/settings-routing.test.ts b/apps/studio/src/components/pipeline/settings-routing.test.ts index d904d32e..e3ae5e22 100644 --- a/apps/studio/src/components/pipeline/settings-routing.test.ts +++ b/apps/studio/src/components/pipeline/settings-routing.test.ts @@ -9,7 +9,8 @@ describe("settings-routing", () => { it("resolves known settings stages", () => { expect(resolveSettingsStageSlug("extract")).toBe("extract") - expect(resolveSettingsStageSlug("text-and-speech")).toBe("text-and-speech") + expect(resolveSettingsStageSlug("translation")).toBe("translation") + expect(resolveSettingsStageSlug("speech")).toBe("speech") }) it("returns null for stages without settings views", () => { diff --git a/apps/studio/src/components/pipeline/settings-routing.ts b/apps/studio/src/components/pipeline/settings-routing.ts index 05a70f3b..c5d827a2 100644 --- a/apps/studio/src/components/pipeline/settings-routing.ts +++ b/apps/studio/src/components/pipeline/settings-routing.ts @@ -5,7 +5,8 @@ export const SETTINGS_STAGE_SLUGS = [ "glossary", "toc", "captions", - "text-and-speech", + "translation", + "speech", "validation", ] as const diff --git a/apps/studio/src/components/pipeline/stage-config.test.ts b/apps/studio/src/components/pipeline/stage-config.test.ts index dd585544..5ad632f3 100644 --- a/apps/studio/src/components/pipeline/stage-config.test.ts +++ b/apps/studio/src/components/pipeline/stage-config.test.ts @@ -20,7 +20,8 @@ describe("stage-config", () => { "captions", "glossary", "toc", - "text-and-speech", + "translation", + "speech", "preview", ]) }) @@ -34,7 +35,8 @@ describe("stage-config", () => { "captions", "glossary", "toc", - "text-and-speech", + "translation", + "speech", "validation", "preview", "export", diff --git a/apps/studio/src/components/pipeline/stage-config.ts b/apps/studio/src/components/pipeline/stage-config.ts index 55828344..ea3866b1 100644 --- a/apps/studio/src/components/pipeline/stage-config.ts +++ b/apps/studio/src/components/pipeline/stage-config.ts @@ -7,6 +7,7 @@ import { BookOpen, List, Languages, + Volume2, Eye, ShieldCheck, FileDown, @@ -21,7 +22,8 @@ export const STAGES = [ { slug: "captions", label: "Captions", runningLabel: "Captioning Images", icon: Image, color: "bg-teal-600", hex: "#0d9488", textColor: "text-teal-600", bgLight: "bg-teal-50", borderColor: "border-teal-200", borderDark: "border-teal-600" }, { slug: "glossary", label: "Glossary", runningLabel: "Generating Glossary", icon: BookOpen, color: "bg-lime-600", hex: "#65a30d", textColor: "text-lime-600", bgLight: "bg-lime-50", borderColor: "border-lime-200", borderDark: "border-lime-600" }, { slug: "toc", label: "Table of Contents", runningLabel: "Generating TOC", icon: List, color: "bg-amber-600", hex: "#d97706", textColor: "text-amber-600", bgLight: "bg-amber-50", borderColor: "border-amber-200", borderDark: "border-amber-600" }, - { slug: "text-and-speech", label: "Text & Speech", runningLabel: "Generating Text & Speech", icon: Languages, color: "bg-pink-600", hex: "#db2777", textColor: "text-pink-600", bgLight: "bg-pink-50", borderColor: "border-pink-200", borderDark: "border-pink-600" }, + { slug: "translation", label: "Text & Speech", runningLabel: "Translating", icon: Languages, color: "bg-pink-600", hex: "#db2777", textColor: "text-pink-600", bgLight: "bg-pink-50", borderColor: "border-pink-200", borderDark: "border-pink-600" }, + { slug: "speech", label: "Speech", runningLabel: "Generating Speech", icon: Volume2, color: "bg-rose-600", hex: "#e11d48", textColor: "text-rose-600", bgLight: "bg-rose-50", borderColor: "border-rose-200", borderDark: "border-rose-600" }, { slug: "validation", label: "Validation", runningLabel: "Running Validation", icon: ShieldCheck, color: "bg-emerald-600", hex: "#059669", textColor: "text-emerald-600", bgLight: "bg-emerald-50", borderColor: "border-emerald-200", borderDark: "border-emerald-600" }, { slug: "preview", label: "Preview", runningLabel: "Building Preview", icon: Eye, color: "bg-gray-600", hex: "#4b5563", textColor: "text-gray-600", bgLight: "bg-gray-50", borderColor: "border-gray-200", borderDark: "border-gray-600" }, { slug: "export", label: "Export", runningLabel: "Exporting", icon: FileDown, color: "bg-indigo-700", hex: "#4338ca", textColor: "text-indigo-700", bgLight: "bg-indigo-50", borderColor: "border-indigo-200", borderDark: "border-indigo-700" }, @@ -52,7 +54,8 @@ export const STAGE_DESCRIPTIONS: Record = { captions: "Create descriptive captions for images to improve accessibility.", glossary: "Build a glossary of key terms and definitions found in the text.", toc: "Generate and customize the table of contents for the book navigation.", - "text-and-speech": "Translate the book content and generate audio narration.", + translation: "Translate the book content and generate audio narration.", + speech: "Generate audio narration for the book content.", validation: "Run whole-book validation checks and configure accessibility assessment settings.", preview: "Package and preview the final ADT web application.", export: "Export packaged ADTs and related artifacts for delivery.", @@ -63,7 +66,7 @@ export const STAGES_WITH_PAGES = new Set([ "storyboard", "quizzes", "captions", - "text-and-speech", + "translation", ]) const STAGE_SLUG_SET = new Set(STAGES.map((stage) => stage.slug)) diff --git a/apps/studio/src/components/pipeline/stages/BookView.tsx b/apps/studio/src/components/pipeline/stages/BookView.tsx index 20b3f8ea..b479cdd7 100644 --- a/apps/studio/src/components/pipeline/stages/BookView.tsx +++ b/apps/studio/src/components/pipeline/stages/BookView.tsx @@ -13,7 +13,8 @@ interface ViewProps { } export function BookView({ bookLabel }: ViewProps) { - const overviewSteps = getBookOverviewStages() + // Filter out "speech" — it's combined with "translation" into one "Text & Speech" card + const overviewSteps = getBookOverviewStages().filter((s) => s.slug !== "speech") const { stageState, queueRun } = useBookRun() const { apiKey, hasApiKey, azureKey, azureRegion, geminiKey } = useApiKey() const { data: accessibilityAssessment } = useAccessibilityAssessment(bookLabel) @@ -25,9 +26,12 @@ export function BookView({ bookLabel }: ViewProps) { const state = stageState(stage.slug) if (state === "running" || state === "queued") return + // "Text & Speech" card runs translation → speech + const toStage = stage.slug === "translation" ? "speech" : stage.slug + queueRun({ fromStage: stage.slug, - toStage: stage.slug, + toStage, apiKey, providerCredentials: { azure: { key: azureKey, region: azureRegion }, @@ -40,10 +44,29 @@ export function BookView({ bookLabel }: ViewProps) {
{overviewSteps.map((step, index) => { const isLast = index === overviewSteps.length - 1 - const state = step.slug === "validation" && validationCompleted ? "done" : stageState(step.slug) + + // For "translation", combine with speech stage state + const isTextAndSpeech = step.slug === "translation" + const translationState = stageState("translation") + const speechState = stageState("speech") + + const state = step.slug === "validation" && validationCompleted + ? "done" + : isTextAndSpeech + ? (translationState === "running" || translationState === "queued" || speechState === "running" || speechState === "queued") + ? "running" + : (translationState === "done" && speechState === "done") + ? "done" + : (translationState === "error" || speechState === "error") + ? "error" + : translationState + : stageState(step.slug) const isRunning = step.slug !== "validation" && (state === "running" || state === "queued") const stageCompleted = state === "done" const showRunButton = isPipelineStage(step) && step.slug !== "preview" + const hasError = isTextAndSpeech + ? translationState === "error" || speechState === "error" + : undefined return (
@@ -54,6 +77,8 @@ export function BookView({ bookLabel }: ViewProps) { > 0 const glossaryPending = stageState("glossary") === "done" && glossary.isLoading - const textAndSpeechStageDone = stageState("text-and-speech") === "done" + const speechStageDone = stageState("speech") === "done" + const translationStageDone = stageState("translation") === "done" const ttsAvailable = hasLanguageEntries(tts.data?.languages, sessionLanguage) - const ttsPending = textAndSpeechStageDone && tts.isLoading + const ttsPending = speechStageDone && tts.isLoading const translationAvailable = hasLanguageEntries(textCatalog.data?.translations, sessionLanguage) - const translationPending = textAndSpeechStageDone && textCatalog.isLoading + const translationPending = translationStageDone && textCatalog.isLoading const easyReadAvailable = false const resolvedResults = useMemo(() => { @@ -387,7 +388,8 @@ export function PreviewValidationCard({ explicitStatus: draftResults[criterion.id]?.status, glossaryAvailable, glossaryPending, - textAndSpeechStageDone, + speechStageDone, + translationStageDone, ttsAvailable, ttsPending, sessionLanguage, @@ -408,7 +410,8 @@ export function PreviewValidationCard({ glossaryAvailable, glossaryPending, sessionLanguage, - textAndSpeechStageDone, + speechStageDone, + translationStageDone, currentPage.hasActivity, currentPage.hasImages, currentPage.signLanguageEnabled, diff --git a/apps/studio/src/components/pipeline/stages/index.ts b/apps/studio/src/components/pipeline/stages/index.ts index 65c42e65..4a6e8b62 100644 --- a/apps/studio/src/components/pipeline/stages/index.ts +++ b/apps/studio/src/components/pipeline/stages/index.ts @@ -6,6 +6,8 @@ export { CaptionsView } from "./captions/CaptionsView" export { GlossaryView } from "./glossary/GlossaryView" export { TocView } from "./toc/TocView" export { TranslationsView } from "./translations/TranslationsView" +export { SpeechView } from "./speech/SpeechView" +export { TranslationStageView } from "./translations/TranslationStageView" export { PreviewView } from "./PreviewView" export { ValidationView } from "./ValidationView" export { ExportView } from "./ExportView" diff --git a/apps/studio/src/components/pipeline/stages/speech/SpeechSettings.tsx b/apps/studio/src/components/pipeline/stages/speech/SpeechSettings.tsx new file mode 100644 index 00000000..fd3a0f72 --- /dev/null +++ b/apps/studio/src/components/pipeline/stages/speech/SpeechSettings.tsx @@ -0,0 +1,274 @@ +import { useState, useEffect } from "react" +import { createPortal } from "react-dom" +import { Save } from "lucide-react" +import { Button } from "@/components/ui/button" +import { Input } from "@/components/ui/input" +import { Label } from "@/components/ui/label" +import { useBookConfig, useUpdateBookConfig } from "@/hooks/use-book-config" +import { useActiveConfig } from "@/hooks/use-debug" +import { SpeechPromptsEditor } from "../translations/components/SpeechPromptsEditor" +import { VoiceMappingsEditor } from "../translations/components/VoiceMappingsEditor" +import { useLingui } from "@lingui/react/macro" + +export function SpeechSettings({ bookLabel, headerTarget, tab = "general" }: { bookLabel: string; headerTarget?: HTMLDivElement | null; tab?: string }) { + const { t } = useLingui() + const { data: bookConfigData } = useBookConfig(bookLabel) + const { data: activeConfigData } = useActiveConfig(bookLabel) + const updateConfig = useUpdateBookConfig() + + // Speech settings + const [speechModel, setSpeechModel] = useState("") + const [format, setFormat] = useState("") + const [defaultProvider, setDefaultProvider] = useState("openai") + const [openaiModel, setOpenaiModel] = useState("") + const [openaiLanguages, setOpenaiLanguages] = useState("") + const [azureModel, setAzureModel] = useState("") + const [azureLanguages, setAzureLanguages] = useState("") + const [geminiModel, setGeminiModel] = useState("") + const [geminiLanguages, setGeminiLanguages] = useState("") + const [bitRate, setBitRate] = useState("") + const [sampleRate, setSampleRate] = useState("") + + const [dirty, setDirty] = useState>({}) + const markDirty = (field: string) => setDirty((prev) => ({ ...prev, [field]: true })) + + useEffect(() => { + if (!activeConfigData) return + const m = activeConfigData.merged as Record + if (m.speech && typeof m.speech === "object") { + const s = m.speech as Record + if (s.model) setSpeechModel(String(s.model)) + if (s.format) setFormat(String(s.format)) + if (s.default_provider) setDefaultProvider(String(s.default_provider)) + if (s.bit_rate) setBitRate(String(s.bit_rate)) + if (s.sample_rate) setSampleRate(String(s.sample_rate)) + if (s.providers && typeof s.providers === "object") { + const providers = s.providers as Record> + if (providers.openai) { + if (providers.openai.model) setOpenaiModel(String(providers.openai.model)) + if (Array.isArray(providers.openai.languages)) setOpenaiLanguages((providers.openai.languages as string[]).join(", ")) + } + if (providers.azure) { + if (providers.azure.model) setAzureModel(String(providers.azure.model)) + if (Array.isArray(providers.azure.languages)) setAzureLanguages((providers.azure.languages as string[]).join(", ")) + } + if (providers.gemini) { + if (providers.gemini.model) setGeminiModel(String(providers.gemini.model)) + if (Array.isArray(providers.gemini.languages)) setGeminiLanguages((providers.gemini.languages as string[]).join(", ")) + } + } + } + }, [activeConfigData]) + + const shouldWrite = (field: string) => + dirty[field] || (bookConfigData?.config && field in bookConfigData.config) + + const buildOverrides = () => { + const overrides: Record = {} + if (bookConfigData?.config) Object.assign(overrides, bookConfigData.config) + + if (shouldWrite("speech")) { + const existing = (bookConfigData?.config?.speech ?? {}) as Record + const openaiLangs = openaiLanguages.split(",").map((s) => s.trim()).filter(Boolean) + const azureLangs = azureLanguages.split(",").map((s) => s.trim()).filter(Boolean) + const geminiLangs = geminiLanguages.split(",").map((s) => s.trim()).filter(Boolean) + const providers: Record = {} + if (openaiModel.trim() || openaiLangs.length > 0) { + providers.openai = { + model: openaiModel.trim() || undefined, + languages: openaiLangs.length > 0 ? openaiLangs : undefined, + } + } + if (azureModel.trim() || azureLangs.length > 0) { + providers.azure = { + model: azureModel.trim() || undefined, + languages: azureLangs.length > 0 ? azureLangs : undefined, + } + } + if (geminiModel.trim() || geminiLangs.length > 0) { + providers.gemini = { + model: geminiModel.trim() || undefined, + languages: geminiLangs.length > 0 ? geminiLangs : undefined, + } + } + overrides.speech = { + ...existing, + model: speechModel.trim() || undefined, + format: format.trim() || undefined, + default_provider: defaultProvider || undefined, + providers: Object.keys(providers).length > 0 ? providers : undefined, + bit_rate: bitRate.trim() || undefined, + sample_rate: sampleRate.trim() ? Number(sampleRate.trim()) : undefined, + } + } + return overrides + } + + const saveOnly = async () => { + const overrides = buildOverrides() + updateConfig.mutate( + { label: bookLabel, config: overrides }, + { + onSuccess: () => { + setDirty({}) + }, + } + ) + } + + return ( +
+ {tab === "general" && ( +
+ {/* Provider Routing */} +
+

{t`Provider Routing`}

+
+ + +

{t`Provider used for languages not assigned to a specific provider.`}

+
+
+ + {/* OpenAI Provider */} +
+

{t`OpenAI`}

+
+ + { setOpenaiModel(e.target.value); markDirty("speech") }} + placeholder={t`e.g. gpt-4o-mini-tts`} + className="w-72 h-8 text-xs" + /> +
+
+ + { setOpenaiLanguages(e.target.value); markDirty("speech") }} + placeholder={t`e.g. en, fr`} + className="w-72 h-8 text-xs" + /> +

{t`Comma-separated language codes routed to OpenAI.`}

+
+
+ + {/* Azure Provider */} +
+

{t`Azure Speech`}

+
+ + { setAzureModel(e.target.value); markDirty("speech") }} + placeholder={t`e.g. azure-tts`} + className="w-72 h-8 text-xs" + /> +
+
+ + { setAzureLanguages(e.target.value); markDirty("speech") }} + placeholder={t`e.g. es, ta, si, sw`} + className="w-72 h-8 text-xs" + /> +

{t`Comma-separated language codes routed to Azure.`}

+
+
+ + {/* Gemini Provider */} +
+

{t`Gemini`}

+
+ + { setGeminiModel(e.target.value); markDirty("speech") }} + placeholder={t`e.g. gemini-2.5-pro-preview-tts`} + className="w-72 h-8 text-xs" + /> +
+
+ + { setGeminiLanguages(e.target.value); markDirty("speech") }} + placeholder={t`e.g. en, hi, ta`} + className="w-72 h-8 text-xs" + /> +

{t`Comma-separated language codes routed to Gemini.`}

+
+
+ + {/* Audio Settings */} +
+

{t`Audio Settings`}

+
+
+ + { setFormat(e.target.value); markDirty("speech") }} + placeholder={t`mp3`} + className="w-32 h-8 text-xs" + /> +
+
+ + { setBitRate(e.target.value); markDirty("speech") }} + placeholder={t`64k`} + className="w-32 h-8 text-xs" + /> +
+
+ + { setSampleRate(e.target.value); markDirty("speech") }} + placeholder={t`24000`} + className="w-32 h-8 text-xs" + /> +
+
+

+ {t`Gemini TTS outputs WAV audio in this integration; other providers continue using the configured format.`} +

+
+
+ )} + + {tab === "speech-prompts" && ( + + )} + + {tab === "voices" && ( + + )} + + {headerTarget && tab === "general" && createPortal( + , + headerTarget + )} +
+ ) +} diff --git a/apps/studio/src/components/pipeline/stages/speech/SpeechView.tsx b/apps/studio/src/components/pipeline/stages/speech/SpeechView.tsx new file mode 100644 index 00000000..d9010689 --- /dev/null +++ b/apps/studio/src/components/pipeline/stages/speech/SpeechView.tsx @@ -0,0 +1,1052 @@ +import { useState, useEffect, useRef, useCallback, useMemo } from "react" +import { Volume2, Languages, Loader2, Play, Pause, WandSparkles, RefreshCw, MoreVertical, Clock, Trash2, Settings } from "lucide-react" +import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query" +import { Link } from "@tanstack/react-router" +import { api, getAudioUrl, BASE_URL } from "@/api/client" +import type { TextCatalogEntry } from "@/api/client" +import { useActiveConfig } from "@/hooks/use-debug" +import { useBook } from "@/hooks/use-books" +import { useStepHeader } from "../../components/StepViewRouter" +import { useBookRun } from "@/hooks/use-book-run" +import { useApiKey } from "@/hooks/use-api-key" +import { useVirtualizer } from "@tanstack/react-virtual" +import { cn } from "@/lib/utils" +import { normalizeLocale } from "@/lib/languages" +import { languageUsesSpeechProvider } from "@/lib/speech-routing" +import { Alert, AlertDescription } from "@/components/ui/alert" +import { Button } from "@/components/ui/button" +import { Card, CardHeader, CardTitle, CardContent } from "@/components/ui/card" +import { resolveTranslationLanguageState } from "../translations/lib/translations-view-state" +import { msg } from "@lingui/core/macro" +import { useLingui } from "@lingui/react/macro" + +const IMAGE_ID_RE = /_im\d{3}/ +function isImageEntry(id: string): boolean { + return IMAGE_ID_RE.test(id) +} + +const ANSWER_ID_RE = /_ans_/ +function isAnswerEntry(id: string): boolean { + return ANSWER_ID_RE.test(id) +} + +type CatalogFilter = "all" | "text" | "captions" | "activities" | "answers" | "glossary" | "quizzes" + +function getEntryType(id: string): CatalogFilter { + if (id.startsWith("gl")) return "glossary" + if (id.startsWith("qz")) return "quizzes" + if (ANSWER_ID_RE.test(id)) return "answers" + if (IMAGE_ID_RE.test(id)) return "captions" + if (/_ac\d{3}/.test(id)) return "activities" + return "text" +} + +const langNames = new Intl.DisplayNames(["en"], { type: "language" }) +function displayLang(code: string): string { + try { return langNames.of(code) ?? code } catch { return code } +} + +function LanguageSummary({ bookLanguage, outputLanguages }: { bookLanguage: string | null; outputLanguages: string[] }) { + const { t } = useLingui() + return ( + <> +
+
{t`Book Language`}
+

+ {bookLanguage ? ( + <>{displayLang(bookLanguage)} ({bookLanguage}) + ) : ( + {t`Not detected`} + )} +

+
+
+
{t`Output Languages`}
+ {outputLanguages.length > 0 ? ( +
+ {outputLanguages.map((lang) => ( + + {displayLang(lang)} ({lang}) + + ))} +
+ ) : bookLanguage ? ( +
+ + {displayLang(bookLanguage)} ({bookLanguage}) + +
+ ) : ( +

{t`Not detected`}

+ )} +
+ + ) +} + +export function SpeechView({ bookLabel, selectedPageId, onSelectPage }: { bookLabel: string; selectedPageId?: string; onSelectPage?: (pageId: string | null) => void }) { + const { t, i18n } = useLingui() + const { setExtra } = useStepHeader() + const { data: activeConfigData } = useActiveConfig(bookLabel) + const { data: book, isLoading: isBookLoading } = useBook(bookLabel) + const queryClient = useQueryClient() + const { stageState, queueRun, error: runError } = useBookRun() + const { apiKey, hasApiKey, azureKey, azureRegion, geminiKey } = useApiKey() + // Translation state (for initial cards & control panel) + const translationState = stageState("translation") + const translationDone = translationState === "done" + const isTranslationRunning = translationState === "running" || translationState === "queued" + + // Speech state + const speechState = stageState("speech") + const speechDone = speechState === "done" + const hasStageError = speechState === "error" + const isRunning = speechState === "running" || speechState === "queued" + + const handleRunTranslations = useCallback(() => { + if (!hasApiKey || isTranslationRunning) return + queueRun({ + fromStage: "translation", + toStage: "translation", + apiKey, + providerCredentials: { + azure: { key: azureKey, region: azureRegion }, + geminiApiKey: geminiKey, + }, + }) + }, [hasApiKey, isTranslationRunning, apiKey, azureKey, azureRegion, geminiKey, queueRun]) + + const handleRunSpeech = useCallback(() => { + if (!hasApiKey || isRunning) return + queueRun({ + fromStage: "speech", + toStage: "speech", + apiKey, + providerCredentials: { + azure: { key: azureKey, region: azureRegion }, + geminiApiKey: geminiKey, + }, + }) + }, [hasApiKey, isRunning, apiKey, azureKey, azureRegion, geminiKey, queueRun]) + + const handleRunTranslationAndSpeech = useCallback(() => { + if (!hasApiKey || isTranslationRunning) return + queueRun({ + fromStage: "translation", + toStage: "speech", + apiKey, + providerCredentials: { + azure: { key: azureKey, region: azureRegion }, + geminiApiKey: geminiKey, + }, + }) + }, [hasApiKey, isTranslationRunning, apiKey, azureKey, azureRegion, geminiKey, queueRun]) + + const { data: catalog, isLoading } = useQuery({ + queryKey: ["books", bookLabel, "text-catalog"], + queryFn: () => api.getTextCatalog(bookLabel), + enabled: !!bookLabel, + }) + + const { data: ttsData } = useQuery({ + queryKey: ["books", bookLabel, "tts"], + queryFn: () => api.getTTS(bookLabel), + enabled: !!bookLabel, + }) + + const merged = activeConfigData?.merged as Record | undefined + const speechConfig = merged?.speech + const outputLanguages = Array.from( + new Set(((merged?.output_languages as string[] | undefined) ?? []).map((code) => normalizeLocale(code))) + ) + const bookLanguage = book?.languageCode ?? book?.metadata?.language_code ?? null + const configuredEditingLanguage = merged?.editing_language as string | undefined + + const hasExplicitOutputLanguages = outputLanguages.length > 0 + + const [selectedLang, setSelectedLang] = useState(null) + const [generateErrorById, setGenerateErrorById] = useState>({}) + const [catalogFilter, setCatalogFilter] = useState("all") + + useEffect(() => { + if (hasExplicitOutputLanguages && outputLanguages.length > 0 && !selectedLang) { + setSelectedLang(outputLanguages[0]) + } + }, [outputLanguages.length, hasExplicitOutputLanguages]) + + const entries = catalog?.entries ?? [] + const filteredByPage = selectedPageId + ? entries.filter((e) => e.id.startsWith(selectedPageId + "_")) + : entries + const displayEntries = catalogFilter === "all" + ? filteredByPage + : filteredByPage.filter((e) => getEntryType(e.id) === catalogFilter) + + const typeCounts = useMemo(() => { + const counts: Record = { all: 0, text: 0, captions: 0, activities: 0, answers: 0, glossary: 0, quizzes: 0 } + for (const e of filteredByPage) { + counts[getEntryType(e.id)]++ + counts.all++ + } + return counts + }, [filteredByPage]) + + const { editingLanguage, isSourceLang: isSelectedSourceLang } = resolveTranslationLanguageState({ + selectedLang, + configuredEditingLanguage, + bookLanguage, + isBookLoading, + }) + const isSourceLang = !hasExplicitOutputLanguages || isSelectedSourceLang + + // Translation data (read-only in speech view) + const translationData = selectedLang ? catalog?.translations?.[selectedLang] : undefined + const translatedEntries = isSourceLang ? entries : (translationData?.entries ?? []) + const translatedMap = new Map(translatedEntries.map((e) => [e.id, e.text])) + + const audioLang = selectedLang ?? + (hasExplicitOutputLanguages ? (outputLanguages[0] ?? editingLanguage) : editingLanguage) + const currentLanguageUsesGemini = + !!audioLang && languageUsesSpeechProvider(audioLang, "gemini", speechConfig) + const geminiRoutedLanguages = ( + outputLanguages.length > 0 ? outputLanguages : editingLanguage ? [editingLanguage] : [] + ).filter((language, index, array) => + languageUsesSpeechProvider(language, "gemini", speechConfig) && array.indexOf(language) === index + ) + const allowGeminiPartialView = + hasStageError && + geminiRoutedLanguages.length > 0 + const showRunCard = (!speechDone || isRunning) && !allowGeminiPartialView + + // Build audio lookup + const audioMap = new Map() + if (ttsData && audioLang && ttsData.languages[audioLang]) { + for (const e of ttsData.languages[audioLang].entries) { + audioMap.set(e.textId, { fileName: e.fileName, voice: e.voice }) + } + } + const totalAudioFiles = ttsData + ? Object.values(ttsData.languages).reduce((sum, lang) => sum + lang.entries.length, 0) + : 0 + const generatedAudioCount = displayEntries.filter((entry) => audioMap.has(entry.id)).length + const missingAudioCount = Math.max(displayEntries.length - generatedAudioCount, 0) + + // Speech config + const speechCfg = speechConfig as { default_provider?: string; voice?: string; model?: string } | undefined + const defaultProvider = speechCfg?.default_provider ?? "openai" + const defaultVoice = speechCfg?.voice ?? "alloy" + const defaultModel = speechCfg?.model ?? (defaultProvider === "openai" ? "gpt-4o-mini-tts" : undefined) + const providerLabel = defaultProvider.charAt(0).toUpperCase() + defaultProvider.slice(1) + + const scrollRef = useRef(null) + const virtualizer = useVirtualizer({ + count: displayEntries.length, + getScrollElement: () => scrollRef.current, + estimateSize: () => 140, + overscan: 3, + }) + + useEffect(() => { + virtualizer.scrollToOffset(0) + }, [catalogFilter, selectedLang, selectedPageId]) + + const generateAudioMutation = useMutation({ + mutationFn: async (variables: { textId: string; language: string }) => { + if (!geminiKey) { + throw new Error(i18n._(msg`Gemini API key is required to generate audio.`)) + } + return api.generateGeminiTTSForItem( + bookLabel, + variables.textId, + variables.language, + { + geminiApiKey: geminiKey, + openaiApiKey: apiKey || undefined, + azure: azureKey && azureRegion + ? { key: azureKey, region: azureRegion } + : undefined, + } + ) + }, + onMutate: (variables) => { + setGenerateErrorById((prev) => { + if (!(variables.textId in prev)) return prev + const next = { ...prev } + delete next[variables.textId] + return next + }) + }, + onSuccess: async () => { + await Promise.all([ + queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "tts"] }), + queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] }), + ]) + }, + onError: (error, variables) => { + setGenerateErrorById((prev) => ({ + ...prev, + [variables.textId]: + error instanceof Error ? error.message : String(error), + })) + queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] }) + }, + }) + + const handleGenerateAudio = useCallback( + (textId: string) => { + if (!audioLang || !currentLanguageUsesGemini) return + generateAudioMutation.mutate({ textId, language: audioLang }) + }, + [audioLang, currentLanguageUsesGemini, generateAudioMutation] + ) + + const clearSpeechMutation = useMutation({ + mutationFn: () => api.clearStage(bookLabel, "speech"), + onSuccess: async () => { + await Promise.all([ + queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "tts"] }), + queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] }), + ]) + setGenerateErrorById({}) + }, + }) + + useEffect(() => { + if (!catalog) return + setExtra( +
+ {t`${String(displayEntries.length)} texts`} + {currentLanguageUsesGemini ? ( + + {t`${String(generatedAudioCount)}/${String(displayEntries.length)} audio`} + + ) : totalAudioFiles > 0 && ( + {t`${String(totalAudioFiles)} audio`} + )} + {currentLanguageUsesGemini && missingAudioCount > 0 && ( + + {t`${missingAudioCount} missing`} + + )} +
+ ) + return () => setExtra(null) + }, [catalog, t, displayEntries.length, totalAudioFiles, currentLanguageUsesGemini, generatedAudioCount, missingAudioCount]) + + if (!showRunCard && isLoading) { + return ( +
+ + {t`Loading text catalog...`} +
+ ) + } + + // Show styled initial cards when speech hasn't been generated + if (showRunCard || !catalog || entries.length === 0) { + const resolvedBookLang = editingLanguage || bookLanguage + return ( +
+ {/* Translation info card */} + + +
+ +
+ + {t`Translation`} + {translationDone && ({t`done`})} + +
+ + + {!translationDone && ( +
+ + + {t`Add Translations`} + +
+ +
+ )} + + + + {/* Speech card */} + + +
+ +
+ {t`Speech`} +
+ +
+
{t`Voice`}
+

{providerLabel} · {defaultVoice} + {defaultModel && <>{" "}· {defaultModel}} +

+
+
+ + + {t`Choose Provider`} + +
+ +
+ + +
+ ) + } + + const showAllButton = selectedPageId ? ( +
+ +
+ ) : null + + return ( +
+
+ {allowGeminiPartialView && runError && ( + + + {runError} + + + )} + + {/* Language tabs */} + {outputLanguages.length > 1 && ( +
+ {outputLanguages.map((lang) => ( + + ))} +
+ )} + + {/* Catalog type filters */} + {filteredByPage.length > 0 && ( +
+ {([ + ["all", t`All`], + ["text", t`Text`], + ["captions", t`Captions`], + ["activities", t`Activities`], + ["answers", t`Answers`], + ["glossary", t`Glossary`], + ["quizzes", t`Quizzes`], + ] as const).map(([key, label]) => { + const count = typeCounts[key] + if (key !== "all" && count === 0) return null + return ( + + ) + })} +
+ )} + + {/* Translation & Speech control panels */} +
+ {/* Translation panel */} +
+
+ +

{t`Translation`}

+ {isTranslationRunning ? ( + + ) : ( + + )} +
+
+ {bookLanguage && {displayLang(bookLanguage)}} + {outputLanguages.length > 0 && ( + + {bookLanguage && } + {outputLanguages.map((l) => displayLang(l)).join(", ")} + + )} +
+
+ + + {t`Languages`} + + + + {t`Prompt`} + +
+
+ + {/* Speech panel */} +
+
+ +

+ {t`Speech`} + + {currentLanguageUsesGemini + ? t`${String(generatedAudioCount)}/${String(displayEntries.length)}` + : t`${String(totalAudioFiles)} files`} + +

+ {isRunning ? ( + + ) : ( +
+ + +
+ )} +
+
+ {providerLabel} · {defaultVoice} + {defaultModel && <>{" "}· {defaultModel}} +
+ + + {t`Choose Provider`} + +
+
+
+ + {/* Entries */} + {selectedPageId && displayEntries.length === 0 && entries.length > 0 ? ( +
+
+ +
+

{t`No speech entries for this page`}

+

{t`This page has no text entries with audio`}

+
+ ) : ( +
+
+ {virtualizer.getVirtualItems().map((virtualRow) => { + const entry = displayEntries[virtualRow.index] + const audio = audioMap.get(entry.id) + const isImg = isImageEntry(entry.id) + const isAnswer = isAnswerEntry(entry.id) + const translated = translatedMap.get(entry.id) + + return ( +
+
+ 0} + onGenerate={handleGenerateAudio} + isGenerating={ + generateAudioMutation.isPending && + generateAudioMutation.variables?.textId === entry.id && + generateAudioMutation.variables?.language === audioLang + } + errorMessage={generateErrorById[entry.id]} + /> +
+
+ ) + })} +
+ {showAllButton} +
+ )} +
+ ) +} + +/* ---------- Entry card ---------- */ + +function SpeechEntryCard({ + entry, + translated, + editingLanguage, + selectedLang, + audio, + audioLang, + bookLabel, + isImg, + isAnswer, + canGenerate, + hasGeminiKey, + onGenerate, + isGenerating, + errorMessage, +}: { + entry: TextCatalogEntry + translated: string | null + editingLanguage: string + selectedLang: string | null + audio?: { fileName: string; voice: string } + audioLang: string | null + bookLabel: string + isImg: boolean + isAnswer: boolean + canGenerate: boolean + hasGeminiKey: boolean + onGenerate: (textId: string) => void + isGenerating: boolean + errorMessage?: string +}) { + const { t } = useLingui() + + return ( +
+ {/* Header: ID + badges + action menu */} +
+ + {entry.id} + {isAnswer && {t`Answer`}} + + {audio && audioLang && ( + + {audio.voice} + + )} + {audio && canGenerate && ( + onGenerate(entry.id)} + isRegenerating={isGenerating} + hasGeminiKey={hasGeminiKey} + /> + )} +
+ + {/* Text content */} +
+ {isImg && ( + + )} +
+ + {displayLang(editingLanguage)} + +

{entry.text}

+
+ {translated !== null && selectedLang && ( +
+ + {displayLang(selectedLang)} + +

{translated || {t`Pending...`}}

+
+ )} +
+ + {/* Audio section */} + {!isAnswer && ( +
+ {audio && audioLang ? ( + + ) : canGenerate ? ( +
+ + {errorMessage && ( +

+ {errorMessage} +

+ )} +
+ ) : ( +
+ {t`No audio`} +
+ )} + + {/* Timecode placeholder */} + {audio && audioLang && ( +
+ + {t`Word-level timecodes`} +
+ )} +
+ )} +
+ ) +} + +/* ---------- Entry action menu ---------- */ + +function EntryMenu({ onRegenerate, isRegenerating, hasGeminiKey }: { onRegenerate: () => void; isRegenerating: boolean; hasGeminiKey: boolean }) { + const { t } = useLingui() + const [open, setOpen] = useState(false) + const ref = useRef(null) + + useEffect(() => { + if (!open) return + const handleClick = (e: MouseEvent) => { + if (ref.current && !ref.current.contains(e.target as Node)) setOpen(false) + } + document.addEventListener("mousedown", handleClick) + return () => document.removeEventListener("mousedown", handleClick) + }, [open]) + + return ( +
+ + {open && ( +
+ +
+ )} +
+ ) +} + +/* ---------- Waveform player ---------- */ + +let activePlayer: { stop: () => void } | null = null + +function formatTime(s: number): string { + const m = Math.floor(s / 60) + const sec = Math.floor(s % 60) + return `${m}:${sec.toString().padStart(2, "0")}` +} + +function computePeaks(buffer: AudioBuffer, barCount: number): number[] { + const data = buffer.getChannelData(0) + const step = Math.max(1, Math.floor(data.length / barCount)) + const peaks: number[] = [] + for (let i = 0; i < barCount; i++) { + let max = 0 + const start = i * step + const end = Math.min(start + step, data.length) + for (let j = start; j < end; j++) { + const v = Math.abs(data[j]) + if (v > max) max = v + } + peaks.push(max) + } + const maxPeak = Math.max(...peaks, 0.01) + return peaks.map((p) => p / maxPeak) +} + +const BAR_COUNT = 60 + +function WaveformPlayer({ audioUrl }: { audioUrl: string }) { + const [playing, setPlaying] = useState(false) + const [progress, setProgress] = useState(0) + const [duration, setDuration] = useState(0) + const [peaks, setPeaks] = useState(null) + const audioRef = useRef(null) + const rafRef = useRef(0) + const fetchedRef = useRef(false) + + // Fetch waveform data eagerly + useEffect(() => { + if (fetchedRef.current) return + fetchedRef.current = true + fetch(audioUrl) + .then((r) => r.arrayBuffer()) + .then((buf) => new AudioContext().decodeAudioData(buf)) + .then((decoded) => { + setPeaks(computePeaks(decoded, BAR_COUNT)) + setDuration(decoded.duration) + }) + .catch(() => {}) + }, [audioUrl]) + + const tick = useCallback(() => { + if (audioRef.current) { + setProgress(audioRef.current.currentTime) + } + rafRef.current = requestAnimationFrame(tick) + }, []) + + const stop = useCallback(() => { + if (audioRef.current) { + audioRef.current.pause() + audioRef.current.currentTime = 0 + } + cancelAnimationFrame(rafRef.current) + setPlaying(false) + setProgress(0) + }, []) + + const toggle = () => { + if (!audioRef.current) { + audioRef.current = new Audio(audioUrl) + audioRef.current.addEventListener("loadedmetadata", () => { + setDuration(audioRef.current!.duration) + }) + audioRef.current.addEventListener("ended", () => { + activePlayer = null + setPlaying(false) + setProgress(0) + cancelAnimationFrame(rafRef.current) + }) + } + if (playing) { + activePlayer = null + audioRef.current.pause() + cancelAnimationFrame(rafRef.current) + setPlaying(false) + } else { + if (activePlayer) activePlayer.stop() + activePlayer = { stop } + audioRef.current.play() + setPlaying(true) + rafRef.current = requestAnimationFrame(tick) + } + } + + const seek = (e: React.MouseEvent) => { + if (!audioRef.current || !duration) return + const rect = e.currentTarget.getBoundingClientRect() + const pct = Math.max(0, Math.min(1, (e.clientX - rect.left) / rect.width)) + audioRef.current.currentTime = pct * duration + setProgress(pct * duration) + } + + useEffect(() => { + return () => { + cancelAnimationFrame(rafRef.current) + if (audioRef.current) { + audioRef.current.pause() + audioRef.current = null + } + if (activePlayer?.stop === stop) activePlayer = null + } + }, [stop]) + + const pct = duration > 0 ? (progress / duration) * 100 : 0 + + return ( +
+ + +
+
+ {peaks ? ( +
+ {peaks.map((p, i) => { + const barPct = ((i + 0.5) / BAR_COUNT) * 100 + return ( +
+ ) + })} +
+ ) : ( +
+ +
+ )} +
+
+ + {duration > 0 ? formatTime(progress) : "0:00"} + + + {duration > 0 ? formatTime(duration) : "—"} + +
+
+
+ ) +} diff --git a/apps/studio/src/components/pipeline/stages/translations/TranslationStageView.tsx b/apps/studio/src/components/pipeline/stages/translations/TranslationStageView.tsx new file mode 100644 index 00000000..37a21433 --- /dev/null +++ b/apps/studio/src/components/pipeline/stages/translations/TranslationStageView.tsx @@ -0,0 +1,57 @@ +import { useState } from "react" +import { Languages, Volume2 } from "lucide-react" +import { TranslationsView } from "./TranslationsView" +import { SpeechView } from "../speech/SpeechView" +import { cn } from "@/lib/utils" +import { useLingui } from "@lingui/react/macro" + +type TabId = "translation" | "speech" + +export function TranslationStageView({ bookLabel, selectedPageId, onSelectPage }: { bookLabel: string; selectedPageId?: string; onSelectPage?: (pageId: string | null) => void }) { + const { t } = useLingui() + const [activeTab, setActiveTab] = useState("translation") + + return ( +
+ {/* Tab bar */} +
+ + +
+
+ + {/* Tab content */} +
+ {activeTab === "translation" ? ( + + ) : ( + + )} +
+
+ ) +} diff --git a/apps/studio/src/components/pipeline/stages/translations/TranslationsSettings.tsx b/apps/studio/src/components/pipeline/stages/translations/TranslationsSettings.tsx index 778fa978..82f84eef 100644 --- a/apps/studio/src/components/pipeline/stages/translations/TranslationsSettings.tsx +++ b/apps/studio/src/components/pipeline/stages/translations/TranslationsSettings.tsx @@ -1,7 +1,7 @@ import { useState, useEffect } from "react" import { createPortal } from "react-dom" import { useNavigate } from "@tanstack/react-router" -import { Play } from "lucide-react" +import { Play, Save } from "lucide-react" import { Button } from "@/components/ui/button" import { Dialog, @@ -11,10 +11,9 @@ import { DialogHeader, DialogTitle, } from "@/components/ui/dialog" -import { Input } from "@/components/ui/input" -import { Label } from "@/components/ui/label" import { useBookConfig, useUpdateBookConfig } from "@/hooks/use-book-config" import { useActiveConfig } from "@/hooks/use-debug" +import { useBook } from "@/hooks/use-books" import { useApiKey } from "@/hooks/use-api-key" import { api } from "@/api/client" import { PromptViewer } from "@/components/pipeline/components/PromptViewer" @@ -22,14 +21,13 @@ import { LanguagePicker } from "@/components/LanguagePicker" import { useBookRun } from "@/hooks/use-book-run" import { useStepConfig } from "@/hooks/use-step-config" import { normalizeLocale } from "@/lib/languages" -import { SpeechPromptsEditor } from "./components/SpeechPromptsEditor" -import { VoiceMappingsEditor } from "./components/VoiceMappingsEditor" import { useLingui } from "@lingui/react/macro" export function TranslationsSettings({ bookLabel, headerTarget, tab = "general" }: { bookLabel: string; headerTarget?: HTMLDivElement | null; tab?: string }) { const { t } = useLingui() const { data: bookConfigData } = useBookConfig(bookLabel) const { data: activeConfigData } = useActiveConfig(bookLabel) + const { data: book } = useBook(bookLabel) const updateConfig = useUpdateBookConfig() const { apiKey, hasApiKey, azureKey, azureRegion, geminiKey } = useApiKey() const { queueRun } = useBookRun() @@ -39,56 +37,23 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general" const [outputLanguages, setOutputLanguages] = useState>(new Set()) const [promptDraft, setPromptDraft] = useState(null) - // Speech settings - const [speechModel, setSpeechModel] = useState("") - const [format, setFormat] = useState("") - const [defaultProvider, setDefaultProvider] = useState("openai") - const [openaiModel, setOpenaiModel] = useState("") - const [openaiLanguages, setOpenaiLanguages] = useState("") - const [azureModel, setAzureModel] = useState("") - const [azureLanguages, setAzureLanguages] = useState("") - const [geminiModel, setGeminiModel] = useState("") - const [geminiLanguages, setGeminiLanguages] = useState("") - const [bitRate, setBitRate] = useState("") - const [sampleRate, setSampleRate] = useState("") - const [dirty, setDirty] = useState>({}) const markDirty = (field: string) => setDirty((prev) => ({ ...prev, [field]: true })) const merged = activeConfigData?.merged as Record | undefined const translation = useStepConfig(merged, "translation", markDirty) + const bookLanguage = book?.languageCode ?? book?.metadata?.language_code ?? null useEffect(() => { if (!activeConfigData) return const m = activeConfigData.merged as Record - if (Array.isArray(m.output_languages)) { + if (Array.isArray(m.output_languages) && m.output_languages.length > 0) { const normalized = (m.output_languages as string[]).map((code) => normalizeLocale(code)) setOutputLanguages(new Set(normalized)) + } else if (bookLanguage) { + setOutputLanguages(new Set([normalizeLocale(bookLanguage)])) } - if (m.speech && typeof m.speech === "object") { - const s = m.speech as Record - if (s.model) setSpeechModel(String(s.model)) - if (s.format) setFormat(String(s.format)) - if (s.default_provider) setDefaultProvider(String(s.default_provider)) - if (s.bit_rate) setBitRate(String(s.bit_rate)) - if (s.sample_rate) setSampleRate(String(s.sample_rate)) - if (s.providers && typeof s.providers === "object") { - const providers = s.providers as Record> - if (providers.openai) { - if (providers.openai.model) setOpenaiModel(String(providers.openai.model)) - if (Array.isArray(providers.openai.languages)) setOpenaiLanguages((providers.openai.languages as string[]).join(", ")) - } - if (providers.azure) { - if (providers.azure.model) setAzureModel(String(providers.azure.model)) - if (Array.isArray(providers.azure.languages)) setAzureLanguages((providers.azure.languages as string[]).join(", ")) - } - if (providers.gemini) { - if (providers.gemini.model) setGeminiModel(String(providers.gemini.model)) - if (Array.isArray(providers.gemini.languages)) setGeminiLanguages((providers.gemini.languages as string[]).join(", ")) - } - } - } - }, [activeConfigData]) + }, [activeConfigData, bookLanguage]) const shouldWrite = (field: string) => dirty[field] || (bookConfigData?.config && field in bookConfigData.config) @@ -103,41 +68,8 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general" } if (shouldWrite("output_languages")) { const normalized = Array.from(outputLanguages).map((code) => normalizeLocale(code)) - overrides.output_languages = normalized.length > 0 ? normalized : undefined - } - if (shouldWrite("speech")) { - const existing = (bookConfigData?.config?.speech ?? {}) as Record - const openaiLangs = openaiLanguages.split(",").map((s) => s.trim()).filter(Boolean) - const azureLangs = azureLanguages.split(",").map((s) => s.trim()).filter(Boolean) - const geminiLangs = geminiLanguages.split(",").map((s) => s.trim()).filter(Boolean) - const providers: Record = {} - if (openaiModel.trim() || openaiLangs.length > 0) { - providers.openai = { - model: openaiModel.trim() || undefined, - languages: openaiLangs.length > 0 ? openaiLangs : undefined, - } - } - if (azureModel.trim() || azureLangs.length > 0) { - providers.azure = { - model: azureModel.trim() || undefined, - languages: azureLangs.length > 0 ? azureLangs : undefined, - } - } - if (geminiModel.trim() || geminiLangs.length > 0) { - providers.gemini = { - model: geminiModel.trim() || undefined, - languages: geminiLangs.length > 0 ? geminiLangs : undefined, - } - } - overrides.speech = { - ...existing, - model: speechModel.trim() || undefined, - format: format.trim() || undefined, - default_provider: defaultProvider || undefined, - providers: Object.keys(providers).length > 0 ? providers : undefined, - bit_rate: bitRate.trim() || undefined, - sample_rate: sampleRate.trim() ? Number(sampleRate.trim()) : undefined, - } + const isOnlyBookLang = bookLanguage && normalized.length === 1 && normalizeLocale(normalized[0]) === normalizeLocale(bookLanguage) + overrides.output_languages = normalized.length > 0 && !isOnlyBookLang ? normalized : undefined } return overrides } @@ -153,6 +85,23 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general" markDirty("output_languages") } + const saveOnly = async () => { + const promptSaves: Promise[] = [] + if (promptDraft != null) promptSaves.push(api.updatePrompt("translation", promptDraft, bookLabel)) + if (promptSaves.length > 0) await Promise.all(promptSaves) + + const overrides = buildOverrides() + updateConfig.mutate( + { label: bookLabel, config: overrides }, + { + onSuccess: () => { + setDirty({}) + setPromptDraft(null) + }, + } + ) + } + const confirmSaveAndRerun = async () => { const promptSaves: Promise[] = [] if (promptDraft != null) promptSaves.push(api.updatePrompt("translation", promptDraft, bookLabel)) @@ -167,15 +116,15 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general" setPromptDraft(null) setShowRerunDialog(false) queueRun({ - fromStage: "text-and-speech", - toStage: "text-and-speech", + fromStage: "translation", + toStage: "translation", apiKey, providerCredentials: { azure: { key: azureKey, region: azureRegion }, geminiApiKey: geminiKey, }, }) - navigate({ to: "/books/$label/$step", params: { label: bookLabel, step: "text-and-speech" } }) + navigate({ to: "/books/$label/$step", params: { label: bookLabel, step: "translation" } }) }, } ) @@ -189,7 +138,7 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general" onSelect={toggleLanguage} multiple label={t`Output Languages`} - hint={t`Leave empty to output only in the book language.`} + bookLanguage={bookLanguage} /> )} @@ -208,164 +157,36 @@ export function TranslationsSettings({ bookLabel, headerTarget, tab = "general" /> )} - {tab === "speech" && ( -
- {/* Provider Routing */} -
-

{t`Provider Routing`}

-
- - -

{t`Provider used for languages not assigned to a specific provider.`}

-
-
- - {/* OpenAI Provider */} -
-

{t`OpenAI`}

-
- - { setOpenaiModel(e.target.value); markDirty("speech") }} - placeholder={t`e.g. gpt-4o-mini-tts`} - className="w-72 h-8 text-xs" - /> -
-
- - { setOpenaiLanguages(e.target.value); markDirty("speech") }} - placeholder={t`e.g. en, fr`} - className="w-72 h-8 text-xs" - /> -

{t`Comma-separated language codes routed to OpenAI.`}

-
-
- - {/* Azure Provider */} -
-

{t`Azure Speech`}

-
- - { setAzureModel(e.target.value); markDirty("speech") }} - placeholder={t`e.g. azure-tts`} - className="w-72 h-8 text-xs" - /> -
-
- - { setAzureLanguages(e.target.value); markDirty("speech") }} - placeholder={t`e.g. es, ta, si, sw`} - className="w-72 h-8 text-xs" - /> -

{t`Comma-separated language codes routed to Azure.`}

-
-
- - {/* Gemini Provider */} -
-

{t`Gemini`}

-
- - { setGeminiModel(e.target.value); markDirty("speech") }} - placeholder={t`e.g. gemini-2.5-pro-preview-tts`} - className="w-72 h-8 text-xs" - /> -
-
- - { setGeminiLanguages(e.target.value); markDirty("speech") }} - placeholder={t`e.g. en, hi, ta`} - className="w-72 h-8 text-xs" - /> -

{t`Comma-separated language codes routed to Gemini.`}

-
-
- - {/* Audio Settings */} -
-

{t`Audio Settings`}

-
-
- - { setFormat(e.target.value); markDirty("speech") }} - placeholder={t`mp3`} - className="w-32 h-8 text-xs" - /> -
-
- - { setBitRate(e.target.value); markDirty("speech") }} - placeholder={t`64k`} - className="w-32 h-8 text-xs" - /> -
-
- - { setSampleRate(e.target.value); markDirty("speech") }} - placeholder={t`24000`} - className="w-32 h-8 text-xs" - /> -
-
-

- {t`Gemini TTS outputs WAV audio in this integration; other providers continue using the configured format.`} -

-
-
- )} - - {tab === "speech-prompts" && ( - - )} - - {tab === "voices" && ( - - )} - - {headerTarget && (tab === "general" || tab === "prompt" || tab === "speech") && createPortal( - , + {headerTarget && (tab === "general" || tab === "prompt") && createPortal( +
+ + +
, headerTarget )} - {t`Save & Rerun Translations + Audio`} + {t`Save & Rerun Translations`} - {t`This will save your settings and re-run translations and audio generation, rebuilding the text catalog, translating to output languages, and generating speech.`} + {t`This will save your settings and re-run translations, rebuilding the text catalog and translating to output languages.`} diff --git a/apps/studio/src/components/pipeline/stages/translations/TranslationsView.tsx b/apps/studio/src/components/pipeline/stages/translations/TranslationsView.tsx index e57185d9..ce4113ab 100644 --- a/apps/studio/src/components/pipeline/stages/translations/TranslationsView.tsx +++ b/apps/studio/src/components/pipeline/stages/translations/TranslationsView.tsx @@ -1,7 +1,7 @@ -import { useState, useEffect, useRef, useCallback, useMemo } from "react" -import { Check, ChevronDown, Languages, Loader2, Play, Pause, WandSparkles } from "lucide-react" -import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query" -import { api, getAudioUrl, BASE_URL } from "@/api/client" +import { useState, useEffect, useRef, useCallback } from "react" +import { Check, ChevronDown, Languages, Loader2 } from "lucide-react" +import { useQuery, useQueryClient } from "@tanstack/react-query" +import { api, BASE_URL } from "@/api/client" import type { TextCatalogEntry, VersionEntry } from "@/api/client" import { useActiveConfig } from "@/hooks/use-debug" import { useBook } from "@/hooks/use-books" @@ -12,11 +12,7 @@ import { StageRunCard } from "../../components/StageRunCard" import { useVirtualizer } from "@tanstack/react-virtual" import { cn } from "@/lib/utils" import { normalizeLocale } from "@/lib/languages" -import { languageUsesSpeechProvider } from "@/lib/speech-routing" -import { Alert, AlertDescription } from "@/components/ui/alert" -import { Button } from "@/components/ui/button" import { resolveTranslationLanguageState } from "./lib/translations-view-state" -import { msg } from "@lingui/core/macro" import { useLingui } from "@lingui/react/macro" const IMAGE_ID_RE = /_im\d{3}/ @@ -159,18 +155,17 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: { const { data: activeConfigData } = useActiveConfig(bookLabel) const { data: book, isLoading: isBookLoading } = useBook(bookLabel) const queryClient = useQueryClient() - const { stageState, queueRun, error: runError } = useBookRun() + const { stageState, queueRun } = useBookRun() const { apiKey, hasApiKey, azureKey, azureRegion, geminiKey } = useApiKey() - const ttsState = stageState("text-and-speech") - const textAndSpeechDone = ttsState === "done" - const hasStageError = ttsState === "error" - const isRunning = ttsState === "running" || ttsState === "queued" + const translationState = stageState("translation") + const translationDone = translationState === "done" + const isRunning = translationState === "running" || translationState === "queued" const handleRunTranslations = useCallback(() => { if (!hasApiKey || isRunning) return queueRun({ - fromStage: "text-and-speech", - toStage: "text-and-speech", + fromStage: "translation", + toStage: "translation", apiKey, providerCredentials: { azure: { key: azureKey, region: azureRegion }, @@ -185,14 +180,7 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: { enabled: !!bookLabel, }) - const { data: ttsData } = useQuery({ - queryKey: ["books", bookLabel, "tts"], - queryFn: () => api.getTTS(bookLabel), - enabled: !!bookLabel, - }) - const merged = activeConfigData?.merged as Record | undefined - const speechConfig = merged?.speech const outputLanguages = Array.from( new Set(((merged?.output_languages as string[] | undefined) ?? []).map((code) => normalizeLocale(code))) ) @@ -224,29 +212,11 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: { isBookLoading, }) const isSourceLang = !hasExplicitOutputLanguages || isSelectedSourceLang - const audioLang = selectedLang ?? - (hasExplicitOutputLanguages ? (outputLanguages[0] ?? editingLanguage) : editingLanguage) - const currentLanguageUsesGemini = - !!audioLang && languageUsesSpeechProvider(audioLang, "gemini", speechConfig) - const geminiRoutedLanguages = ( - outputLanguages.length > 0 - ? outputLanguages - : editingLanguage - ? [editingLanguage] - : [] - ).filter((language, index, array) => - languageUsesSpeechProvider(language, "gemini", speechConfig) && - array.indexOf(language) === index - ) - const allowGeminiPartialView = - hasStageError && - geminiRoutedLanguages.length > 0 - const showRunCard = (!textAndSpeechDone || isRunning) && !allowGeminiPartialView + const showRunCard = !translationDone || isRunning // Pending state for edits (keyed by language) const [pendingEntries, setPendingEntries] = useState(null) const [saving, setSaving] = useState(false) - const [generateErrorById, setGenerateErrorById] = useState>({}) // Get translated entries for selected language const translationData = selectedLang ? catalog?.translations?.[selectedLang] : undefined @@ -290,19 +260,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: { } } - // Build audio lookup — use selected language, or editing language when no output languages - const audioMap = new Map() - if (ttsData && audioLang && ttsData.languages[audioLang]) { - for (const e of ttsData.languages[audioLang].entries) { - audioMap.set(e.textId, { fileName: e.fileName, voice: e.voice }) - } - } - const totalAudioFiles = ttsData - ? Object.values(ttsData.languages).reduce((sum, lang) => sum + lang.entries.length, 0) - : 0 - const generatedAudioCount = displayEntries.filter((entry) => audioMap.has(entry.id)).length - const missingAudioCount = Math.max(displayEntries.length - generatedAudioCount, 0) - const scrollRef = useRef(null) const virtualizer = useVirtualizer({ count: displayEntries.length, @@ -311,56 +268,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: { overscan: 5, }) - const generateAudioMutation = useMutation({ - mutationFn: async (variables: { textId: string; language: string }) => { - if (!geminiKey) { - throw new Error(i18n._(msg`Gemini API key is required to generate audio.`)) - } - return api.generateGeminiTTSForItem( - bookLabel, - variables.textId, - variables.language, - { - geminiApiKey: geminiKey, - openaiApiKey: apiKey || undefined, - azure: azureKey && azureRegion - ? { key: azureKey, region: azureRegion } - : undefined, - } - ) - }, - onMutate: (variables) => { - setGenerateErrorById((prev) => { - if (!(variables.textId in prev)) return prev - const next = { ...prev } - delete next[variables.textId] - return next - }) - }, - onSuccess: async () => { - await Promise.all([ - queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "tts"] }), - queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] }), - ]) - }, - onError: (error, variables) => { - setGenerateErrorById((prev) => ({ - ...prev, - [variables.textId]: - error instanceof Error ? error.message : String(error), - })) - queryClient.invalidateQueries({ queryKey: ["books", bookLabel, "step-status"] }) - }, - }) - - const handleGenerateAudio = useCallback( - (textId: string) => { - if (!audioLang || !currentLanguageUsesGemini) return - generateAudioMutation.mutate({ textId, language: audioLang }) - }, - [audioLang, currentLanguageUsesGemini, generateAudioMutation] - ) - useEffect(() => { if (!catalog) return setExtra( @@ -369,18 +276,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: { {outputLanguages.length > 1 && ( {t`${String(outputLanguages.length)} languages`} )} - {currentLanguageUsesGemini ? ( - - {t`${String(generatedAudioCount)}/${String(displayEntries.length)} audio`} - - ) : totalAudioFiles > 0 && ( - {t`${String(totalAudioFiles)} audio`} - )} - {currentLanguageUsesGemini && missingAudioCount > 0 && ( - - {t`${missingAudioCount} missing`} - - )} {selectedLang && translationVersion != null && !isSourceLang && ( ) return () => setExtra(null) - }, [catalog, t, displayEntries.length, outputLanguages.length, selectedLang, translationVersion, saving, dirty, bookLabel, isSourceLang, totalAudioFiles, selectedPageId, currentLanguageUsesGemini, generatedAudioCount, missingAudioCount]) + }, [catalog, t, displayEntries.length, outputLanguages.length, selectedLang, translationVersion, saving, dirty, bookLabel, isSourceLang, selectedPageId]) if (!showRunCard && isLoading) { return ( @@ -414,9 +309,9 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: { return (
@@ -431,7 +326,7 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: { onClick={() => onSelectPage?.(null)} className="text-xs font-medium text-pink-600 hover:text-pink-700 hover:underline transition-colors" > - {t`Show all text & speech`} + {t`Show all translations`}
) : null @@ -441,14 +336,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {
{/* Fixed header: alerts, language tabs, column headers */}
- {allowGeminiPartialView && runError && ( - - - {runError} - - - )} - {/* Language tabs — only when there are multiple output languages */} {outputLanguages.length > 1 && (
@@ -506,7 +393,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: { {virtualizer.getVirtualItems().map((virtualRow) => { const entry = displayEntries[virtualRow.index] const translated = translatedMap.get(entry.id) - const audio = audioMap.get(entry.id) const isImg = isImageEntry(entry.id) const isAnswer = isAnswerEntry(entry.id) @@ -540,21 +426,6 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {

{entry.text}

- {!isAnswer && 0} - onGenerate={handleGenerateAudio} - isGenerating={ - generateAudioMutation.isPending && - generateAudioMutation.variables?.textId === entry.id && - generateAudioMutation.variables?.language === audioLang - } - errorMessage={generateErrorById[entry.id]} - />}
) : (
@@ -574,33 +445,16 @@ export function TranslationsView({ bookLabel, selectedPageId, onSelectPage }: {

{entry.text}

-
-
-   -