Skip to content

Commit f9055bd

Browse files
Split Translate and Speech into separate stages with TTS word timestamps (#268)
* Split Translate and Speech into separate pipeline stages with TTS timestamps and word highlighting Separate the monolithic Translate stage into distinct Translate and Speech stages with independent settings, DAG dependencies, and UI views. Add Whisper-based word timestamp generation with inline playback highlighting, editable multi-column timecode tables, and background task queue for batch transcription. Improve language picker to show only linguistically relevant countries, add visual separation to stage run cards, and pass source text as Whisper prompt for better accuracy. * Add confirmation dialogs for delete speech and generate timestamps actions * Fix timestamp race condition and show language name in generate dialog Write timestamps incrementally during batch transcription instead of accumulating in a stale snapshot, preventing concurrent user edits from being silently overwritten. Show human-readable language name (e.g. "English") instead of locale code in the generate timestamps confirmation. * Add Class-suffix pattern to ESLint ignoreNames for CSS class variables Fixes CI lint failure where `inputClass` (a Tailwind class string variable) was flagged as an unlocalized string. * Fix tts-timestamps data not cleared when deleting TTS or editing pages tts-timestamps node data was not included in clearNodesByType calls alongside tts, leaving stale word-timestamp data after speech deletion or upstream page/caption edits. --------- Co-authored-by: Nic Pottier <nicpottier@gmail.com>
1 parent 39044ec commit f9055bd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2695
-543
lines changed

apps/api/src/app.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ app.route("/api", createQuizRoutes(booksDir))
9393
app.route("/api", createPackageRoutes(booksDir, webAssetsDir, configPath, taskService))
9494
app.route("/api", createPromptRoutes(promptsDir, booksDir))
9595
app.route("/api", createTextCatalogRoutes(booksDir))
96-
app.route("/api", createTTSRoutes(booksDir, configPath))
96+
app.route("/api", createTTSRoutes(booksDir, configPath, taskService))
9797
app.route(
9898
"/api",
9999
createStageRoutes(stageService, eventBus, booksDir, promptsDir, webAssetsDir, configPath)

apps/api/src/routes/books.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,8 +529,8 @@ describe("POST /books/:label/stages/run", () => {
529529
"X-Gemini-API-Key": "gm-test",
530530
},
531531
body: JSON.stringify({
532-
fromStage: "text-and-speech",
533-
toStage: "text-and-speech",
532+
fromStage: "translate",
533+
toStage: "speech",
534534
}),
535535
})
536536

apps/api/src/routes/pages.test.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ describe("Page routes", () => {
669669
}
670670
}
671671

672-
/** Assert that all caption + text-and-speech node data and step_runs were cleared. */
672+
/** Assert that all caption + translate/speech node data and step_runs were cleared. */
673673
function expectAllDownstreamCleared(dir: string, bookLabel: string) {
674674
const s = createBookStorage(bookLabel, dir)
675675
try {
@@ -689,7 +689,7 @@ describe("Page routes", () => {
689689
}
690690
}
691691

692-
/** Assert that text-and-speech (but NOT image-captioning) node data and step_runs were cleared. */
692+
/** Assert that translate/speech (but NOT image-captioning) node data and step_runs were cleared. */
693693
function expectTextAndSpeechCleared(dir: string, bookLabel: string) {
694694
const s = createBookStorage(bookLabel, dir)
695695
try {
@@ -711,7 +711,7 @@ describe("Page routes", () => {
711711
}
712712

713713
describe("PUT /api/books/:label/pages/:pageId/sectioning clears downstream", () => {
714-
it("clears caption + text-and-speech data on sectioning save", async () => {
714+
it("clears caption + translate/speech data on sectioning save", async () => {
715715
seedDownstreamData(tmpDir, label)
716716

717717
const data = {
@@ -748,7 +748,7 @@ describe("Page routes", () => {
748748
})
749749

750750
describe("PUT /api/books/:label/pages/:pageId/rendering clears downstream", () => {
751-
it("clears caption + text-and-speech data on rendering save", async () => {
751+
it("clears caption + translate/speech data on rendering save", async () => {
752752
seedDownstreamData(tmpDir, label)
753753

754754
const data = {
@@ -775,7 +775,7 @@ describe("Page routes", () => {
775775
})
776776

777777
describe("POST clone clears downstream", () => {
778-
it("clears caption + text-and-speech data on section clone", async () => {
778+
it("clears caption + translate/speech data on section clone", async () => {
779779
seedDownstreamData(tmpDir, label)
780780

781781
const res = await app.request(
@@ -789,7 +789,7 @@ describe("Page routes", () => {
789789
})
790790

791791
describe("POST delete clears downstream", () => {
792-
it("clears caption + text-and-speech data on section delete", async () => {
792+
it("clears caption + translate/speech data on section delete", async () => {
793793
// Need at least 2 sections so delete is valid
794794
const s = createBookStorage(label, tmpDir)
795795
try {
@@ -839,7 +839,7 @@ describe("Page routes", () => {
839839
})
840840

841841
describe("POST crop (images) clears downstream", () => {
842-
it("clears caption + text-and-speech data on image crop", async () => {
842+
it("clears caption + translate/speech data on image crop", async () => {
843843
seedDownstreamData(tmpDir, label)
844844

845845
// Minimal valid PNG (1x1 pixel)
@@ -867,7 +867,7 @@ describe("Page routes", () => {
867867
})
868868
})
869869

870-
describe("PUT image-captioning clears text-and-speech downstream", () => {
870+
describe("PUT image-captioning clears translate/speech downstream", () => {
871871
it("clears text-catalog/translations/TTS but keeps image-captioning", async () => {
872872
seedDownstreamData(tmpDir, label)
873873

apps/api/src/routes/pages.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -327,9 +327,9 @@ async function executeAiImageGeneration(params: AiImageGenParams): Promise<{
327327
}
328328
}
329329

330-
/** Clear caption + downstream text-and-speech data when images change. */
330+
/** Clear caption + downstream translate/speech data when images change. */
331331
function clearCaptionData(storage: Storage): void {
332-
storage.clearNodesByType(["image-captioning", "text-catalog", "text-catalog-translation", "tts"])
332+
storage.clearNodesByType(["image-captioning", "text-catalog", "text-catalog-translation", "tts", "tts-timestamps"])
333333
storage.clearStepRuns(["image-captioning", "text-catalog", "catalog-translation", "tts"])
334334
}
335335

@@ -736,7 +736,7 @@ export function createPageRoutes(
736736

737737
const version = storage.putNodeData("image-captioning", pageId, parsed.data)
738738
// Caption change cascades to text-catalog/translations/TTS
739-
storage.clearNodesByType(["text-catalog", "text-catalog-translation", "tts"])
739+
storage.clearNodesByType(["text-catalog", "text-catalog-translation", "tts", "tts-timestamps"])
740740
storage.clearStepRuns(["text-catalog", "catalog-translation", "tts"])
741741
return c.json({ version })
742742
} finally {

apps/api/src/routes/tts.test.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,3 +344,57 @@ describe("POST /books/:label/tts/generate-one", () => {
344344
)
345345
})
346346
})
347+
348+
describe("DELETE /books/:label/tts", () => {
349+
beforeEach(() => {
350+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "adt-tts-route-"))
351+
configPath = path.join(tmpDir, "config.yaml")
352+
writeConfig()
353+
})
354+
355+
afterEach(() => {
356+
fs.rmSync(tmpDir, { recursive: true, force: true })
357+
tmpDir = ""
358+
configPath = ""
359+
})
360+
361+
it("clears both tts and tts-timestamps data", async () => {
362+
const label = "delete-tts"
363+
seedBook(label)
364+
365+
// Seed TTS and tts-timestamps data
366+
const storage = createBookStorage(label, tmpDir)
367+
try {
368+
storage.putNodeData("tts", "en", {
369+
entries: [{ textId: "pg001_t001", fileName: "pg001_t001.wav" }],
370+
generatedAt: new Date().toISOString(),
371+
})
372+
storage.putNodeData("tts-timestamps", "en", {
373+
entries: {
374+
pg001_t001: {
375+
textId: "pg001_t001",
376+
language: "en",
377+
words: [{ word: "Hello", start: 0, end: 0.5 }],
378+
duration: 0.5,
379+
},
380+
},
381+
generatedAt: new Date().toISOString(),
382+
})
383+
} finally {
384+
storage.close()
385+
}
386+
387+
const app = createTTSRoutes(tmpDir, configPath)
388+
const res = await app.request(`/books/${label}/tts`, { method: "DELETE" })
389+
390+
expect(res.status).toBe(200)
391+
392+
const after = createBookStorage(label, tmpDir)
393+
try {
394+
expect(after.getLatestNodeData("tts", "en")).toBeNull()
395+
expect(after.getLatestNodeData("tts-timestamps", "en")).toBeNull()
396+
} finally {
397+
after.close()
398+
}
399+
})
400+
})

0 commit comments

Comments
 (0)