Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions packages/sdk/schemas/text-to-speech.ts
Original file line number Diff line number Diff line change
@@ -1,26 +1,55 @@
import { z } from "zod";
import { modelSrcInputSchema } from "./model-src-utils";

// TTS supported languages based on available models
export const TTS_LANGUAGES = [
// Chatterbox multilingual supported languages (18). The engines support
// different language sets, so the language enum is validated per engine.
export const TTS_CHATTERBOX_LANGUAGES = [
"en", // English
"es", // Spanish
"fr", // French
"de", // German
"it", // Italian
"pt", // Portuguese
"nl", // Dutch
"pl", // Polish
"tr", // Turkish
"sv", // Swedish
"da", // Danish
"fi", // Finnish
"no", // Norwegian
"el", // Greek
"ms", // Malay
"sw", // Swahili
"ar", // Arabic
"ko", // Korean
] as const;

const ttsLanguageSchema = z.enum(TTS_LANGUAGES);
// Supertonic supported languages (subset of the Chatterbox set).
export const TTS_SUPERTONIC_LANGUAGES = [
"en", // English
"es", // Spanish
"fr", // French
"pt", // Portuguese
"ko", // Korean
] as const;

// Union of all TTS-supported languages across engines. Kept for backwards
// compatibility; prefer the engine-specific lists when validating a config.
export const TTS_LANGUAGES = [...TTS_CHATTERBOX_LANGUAGES] as const;

const ttsChatterboxLanguageSchema = z.enum(TTS_CHATTERBOX_LANGUAGES);
const ttsSupertonicLanguageSchema = z.enum(TTS_SUPERTONIC_LANGUAGES);

export const ttsChatterboxRuntimeConfigSchema = z.object({
ttsEngine: z.literal("chatterbox"),
language: ttsLanguageSchema,
language: ttsChatterboxLanguageSchema,
voice: z.string().optional(),
useGPU: z.boolean().optional(),
});

export const ttsSupertonicRuntimeConfigSchema = z.object({
ttsEngine: z.literal("supertonic"),
language: ttsLanguageSchema,
language: ttsSupertonicLanguageSchema,
voice: z.string().optional(),
ttsSpeed: z.number().optional(),
ttsNumInferenceSteps: z.number().optional(),
Expand Down Expand Up @@ -141,6 +170,8 @@ export const textToSpeechStreamResponseSchema = z.object({
});

export type TtsLanguage = (typeof TTS_LANGUAGES)[number];
export type TtsChatterboxLanguage = (typeof TTS_CHATTERBOX_LANGUAGES)[number];
export type TtsSupertonicLanguage = (typeof TTS_SUPERTONIC_LANGUAGES)[number];
export type TtsChatterboxLoadConfig = z.infer<typeof ttsChatterboxLoadConfigSchema>;
export type TtsSupertonicLoadConfig = z.infer<typeof ttsSupertonicLoadConfigSchema>;
export type TtsLoadConfig = z.infer<typeof ttsLoadConfigSchema>;
Expand Down
51 changes: 51 additions & 0 deletions packages/sdk/test/unit/tts-schemas.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import {
ttsResponseSchema,
textToSpeechStreamResponseSchema,
ttsConfigSchema,
ttsChatterboxRuntimeConfigSchema,
ttsSupertonicRuntimeConfigSchema,
TTS_CHATTERBOX_LANGUAGES,
TTS_SUPERTONIC_LANGUAGES,
LEGACY_TTS_ONNX_MODEL_CONFIG_FIELDS,
} from "@/schemas/text-to-speech";

Expand All @@ -26,6 +29,54 @@ test("ttsConfigSchema: accepts GGML supertonic load config", (t) => {
t.is(r.success, true);
});

test("TTS_CHATTERBOX_LANGUAGES: exposes all 18 supported languages", (t) => {
t.is(TTS_CHATTERBOX_LANGUAGES.length, 18);
const expected = [
"en", "es", "fr", "de", "it", "pt", "nl", "pl", "tr",
"sv", "da", "fi", "no", "el", "ms", "sw", "ar", "ko",
];
t.alike([...TTS_CHATTERBOX_LANGUAGES], expected);
});

test("ttsChatterboxRuntimeConfigSchema: accepts all 18 chatterbox languages", (t) => {
for (const language of TTS_CHATTERBOX_LANGUAGES) {
const r = ttsChatterboxRuntimeConfigSchema.safeParse({
ttsEngine: "chatterbox",
language,
});
t.is(r.success, true, `chatterbox should accept ${language}`);
}
});

test("ttsSupertonicRuntimeConfigSchema: only accepts its language subset", (t) => {
t.alike([...TTS_SUPERTONIC_LANGUAGES], ["en", "es", "fr", "pt", "ko"]);
for (const language of TTS_SUPERTONIC_LANGUAGES) {
const r = ttsSupertonicRuntimeConfigSchema.safeParse({
ttsEngine: "supertonic",
language,
});
t.is(r.success, true, `supertonic should accept ${language}`);
}
});

test("ttsSupertonicRuntimeConfigSchema: rejects chatterbox-only languages", (t) => {
// 'de' is supported by chatterbox but not supertonic.
const r = ttsSupertonicRuntimeConfigSchema.safeParse({
ttsEngine: "supertonic",
language: "de",
});
t.is(r.success, false, "supertonic must reject 'de'");
});

test("ttsConfigSchema: accepts a chatterbox-only language for chatterbox", (t) => {
const r = ttsConfigSchema.safeParse({
ttsEngine: "chatterbox",
language: "tr",
s3genModelSrc: "s3:///example/s3gen.gguf",
});
t.is(r.success, true, "chatterbox load config accepts 'tr'");
});

test("ttsSupertonicRuntimeConfigSchema: strips removed ttsSupertonicMultilingual", (t) => {
const r = ttsSupertonicRuntimeConfigSchema.safeParse({
ttsEngine: "supertonic",
Expand Down
Loading