diff --git a/modules/tool/packages/minmax/children/tts/config.ts b/modules/tool/packages/minmax/children/tts/config.ts new file mode 100644 index 00000000..4dc4b824 --- /dev/null +++ b/modules/tool/packages/minmax/children/tts/config.ts @@ -0,0 +1,143 @@ +import { defineTool } from '@tool/type'; +import { FlowNodeInputTypeEnum, WorkflowIOValueTypeEnum } from '@tool/type/fastgpt'; + +export default defineTool({ + name: { + 'zh-CN': 'minmax 文本转语音', + en: 'minmax Text-to-Speech' + }, + description: { + 'zh-CN': '使用MinMax平台将文本转换为高质量语音', + en: 'Convert text to high-quality speech using MinMax platform' + }, + toolDescription: + 'Convert text to speech using MinMax TTS API. Supports multiple voice settings and audio formats.', + versionList: [ + { + value: '0.1.0', + description: 'Default version', + inputs: [ + { + key: 'text', + label: '文本内容', + renderTypeList: [FlowNodeInputTypeEnum.input, FlowNodeInputTypeEnum.reference], + valueType: WorkflowIOValueTypeEnum.string, + required: true + }, + { + key: 'model', + label: '模型', + renderTypeList: [FlowNodeInputTypeEnum.select, FlowNodeInputTypeEnum.reference], + valueType: WorkflowIOValueTypeEnum.string, + required: true, + defaultValue: 'speech-2.5-hd-preview', + list: [ + { label: 'speech-2.5-hd-preview', value: 'speech-2.5-hd-preview' }, + { label: 'speech-2.5-turbo-preview', value: 'speech-2.5-turbo-preview' }, + { label: 'speech-02-hd', value: 'speech-02-hd' }, + { label: 'speech-02-turbo', value: 'speech-02-turbo' }, + { label: 'speech-01-hd', value: 'speech-01-hd' }, + { label: 'speech-01-turbo', value: 'speech-01-turbo' } + ] + }, + { + key: 'voice_id', + label: '音色', + renderTypeList: [FlowNodeInputTypeEnum.select, FlowNodeInputTypeEnum.reference], + valueType: WorkflowIOValueTypeEnum.string, + required: true, + defaultValue: 'male-qn-qingse', + list: [ + { + label: '青涩青年音色', + value: 'male-qn-qingse' + }, + { + label: '精英青年音色', + value: 'male-qn-jingying' + }, + { + label: '少女音色', + value: 'female-shaonv' + }, + { + label: '成熟女性音色', + value: 'female-chengshu' + } + ] + }, + { + key: 'speed', + label: '语速', + renderTypeList: [FlowNodeInputTypeEnum.numberInput, FlowNodeInputTypeEnum.reference], + valueType: WorkflowIOValueTypeEnum.number, + required: true, + description: '语速,范围为 0.5-2, 值越大语速越快', + min: 0.5, + max: 2, + step: 0.1, + defaultValue: 1 + }, + { + key: 'vol', + label: '音量', + renderTypeList: [FlowNodeInputTypeEnum.numberInput, FlowNodeInputTypeEnum.reference], + valueType: WorkflowIOValueTypeEnum.number, + required: true, + description: '音量,范围为 0.1-10, 值越大音量越大', + min: 0.1, + max: 10, + step: 0.1, + defaultValue: 1 + }, + { + key: 'pitch', + label: '语调', + renderTypeList: [FlowNodeInputTypeEnum.numberInput, FlowNodeInputTypeEnum.reference], + valueType: WorkflowIOValueTypeEnum.number, + required: true, + description: '语调,范围为 -12-12, 值越大语调越高', + min: -12, + max: 12, + step: 1, + defaultValue: 0 + }, + { + key: 'emotion', + label: '情绪', + renderTypeList: [FlowNodeInputTypeEnum.select, FlowNodeInputTypeEnum.reference], + valueType: WorkflowIOValueTypeEnum.string, + required: true, + defaultValue: 'auto', + list: [ + { label: '自动(推荐)', value: 'auto' }, + { label: '高兴 happy', value: 'happy' }, + { label: '悲伤 sad', value: 'sad' }, + { label: '愤怒 angry', value: 'angry' }, + { label: '害怕 fearful', value: 'fearful' }, + { label: '厌恶 disgusted', value: 'disgusted' }, + { label: '惊讶 surprised', value: 'surprised' }, + { label: '中性 calm', value: 'calm' } + ] + }, + { + key: 'english_normalization', + label: '英文规范化', + renderTypeList: [FlowNodeInputTypeEnum.switch], + valueType: WorkflowIOValueTypeEnum.boolean, + description: '支持英语文本规范化,开启后可提升数字阅读场景的性能,但会略微增加延迟', + required: true, + defaultValue: false + } + ], + outputs: [ + { + valueType: WorkflowIOValueTypeEnum.string, + key: 'audioUrl', + label: '音频链接', + description: '语音合成后的音频文件链接' + } + ] + } + ] +}); diff --git a/modules/tool/packages/minmax/children/tts/index.ts b/modules/tool/packages/minmax/children/tts/index.ts new file mode 100644 index 00000000..d698ed48 --- /dev/null +++ b/modules/tool/packages/minmax/children/tts/index.ts @@ -0,0 +1,10 @@ +import config from './config'; +import { InputType, OutputType, tool as toolCb } from './src'; +import { exportTool } from '@tool/utils/tool'; + +export default exportTool({ + toolCb, + InputType, + OutputType, + config +}); diff --git a/modules/tool/packages/minmax/children/tts/src/index.ts b/modules/tool/packages/minmax/children/tts/src/index.ts new file mode 100644 index 00000000..ad98581f --- /dev/null +++ b/modules/tool/packages/minmax/children/tts/src/index.ts @@ -0,0 +1,120 @@ +import { z } from 'zod'; +import { POST, GET } from '@tool/utils/request'; +import { uploadFile } from '@tool/utils/uploadFile'; +import { delay } from '@tool/utils/delay'; + +export const InputType = z.object({ + apiKey: z.string(), + text: z.string().nonempty(), + model: z.string().nonempty(), + voice_id: z.string(), + speed: z.number(), + vol: z.number(), + pitch: z.number(), + emotion: z.string(), + english_normalization: z.boolean() +}); + +export const OutputType = z.object({ + audioUrl: z.string() +}); + +const MINIMAX_BASE_URL = 'https://api.minimaxi.com/v1'; + +export async function tool({ + apiKey, + text, + model, + voice_id, + speed, + vol, + pitch, + emotion, + english_normalization +}: z.infer): Promise> { + const headers = { + Authorization: `Bearer ${apiKey}`, + 'Content-Type': 'application/json' + }; + // these params are advanced settings, now not allow user to customize + const defaultSetting = { + pronunciation_dict: { + tone: [] + }, + audio_setting: { + audio_sample_rate: 32000, + bitrate: 128000, + format: 'mp3', + channel: 2 + }, + voice_modify: { + pitch: 0, + intensity: 0, + timbre: 0, + sound_effects: 'spacious_echo' + } + }; + + try { + // create tts task + const { data: taskData } = await POST( + `${MINIMAX_BASE_URL}/t2a_async_v2`, + { + model, + text, + language_boost: 'auto', + voice_setting: { + voice_id, + speed, + vol, + pitch, + emotion, + english_normalization + }, + ...defaultSetting + }, + { + headers + } + ); + + const task_id = taskData.task_id; + // polling task status until success or failed + // file can be downloaded when task status is success + const pollTaskStatus = async () => { + const maxRetries = 180; + for (let i = 0; i < maxRetries; i++) { + const { data: statusData } = await GET(`${MINIMAX_BASE_URL}/query/t2a_async_query_v2`, { + params: { task_id }, + headers + }); + const status = statusData.status; + if (status === 'Success') { + return statusData.file_id; + } + if (status === 'Failed') { + return Promise.reject('TTS task failed'); + } + await delay(1000); + } + return Promise.reject('TTS task timeout'); + }; + const file_id = await pollTaskStatus(); + + // retrieve file content + const { data: fileBuffer } = await GET(`${MINIMAX_BASE_URL}/files/retrieve_content`, { + params: { file_id }, + headers, + responseType: 'arrayBuffer' + }); + + const { accessUrl: audioUrl } = await uploadFile({ + buffer: Buffer.from(fileBuffer), + defaultFilename: 'minimax_tts.mp3' + }); + + return { audioUrl }; + } catch (error) { + throw new Error(`TTS failed: ${error}`); + } +} diff --git a/modules/tool/packages/minmax/children/tts/test/index.test.ts b/modules/tool/packages/minmax/children/tts/test/index.test.ts new file mode 100644 index 00000000..b70e289f --- /dev/null +++ b/modules/tool/packages/minmax/children/tts/test/index.test.ts @@ -0,0 +1,8 @@ +import { expect, test } from 'vitest'; +import tool from '..'; + +test(async () => { + expect(tool.name).toBeDefined(); + expect(tool.description).toBeDefined(); + expect(tool.cb).toBeDefined(); +}); diff --git a/modules/tool/packages/minmax/config.ts b/modules/tool/packages/minmax/config.ts new file mode 100644 index 00000000..373b03a4 --- /dev/null +++ b/modules/tool/packages/minmax/config.ts @@ -0,0 +1,26 @@ +import { defineToolSet } from '@tool/type'; +import { ToolTypeEnum } from '@tool/type/tool'; + +export default defineToolSet({ + name: { + 'zh-CN': 'minmax 工具集', + en: 'minmax Tool Set' + }, + courseUrl: 'https://platform.minimaxi.com/document/quick_start', + type: ToolTypeEnum.tools, + description: { + 'zh-CN': 'minmax 工具集, 包含文本转语音、语音转文本、语音合成、语音识别等功能', + en: 'minmax tool set, including text-to-speech, speech-to-text, speech synthesis, speech recognition等功能' + }, + toolDescription: + 'minmax tool set, including text-to-speech, speech-to-text, speech synthesis, speech recognition等功能', + secretInputConfig: [ + { + key: 'apiKey', + label: 'API Key', + description: '可以在 minmax 官网获取', + required: true, + inputType: 'secret' + } + ] +}); diff --git a/modules/tool/packages/minmax/index.ts b/modules/tool/packages/minmax/index.ts new file mode 100644 index 00000000..22bccae7 --- /dev/null +++ b/modules/tool/packages/minmax/index.ts @@ -0,0 +1,8 @@ +// You should not modify this file, if you need to modify the tool set configuration, please modify the config.ts file + +import config from './config'; +import { exportToolSet } from '@tool/utils/tool'; + +export default exportToolSet({ + config +}); diff --git a/modules/tool/packages/minmax/logo.png b/modules/tool/packages/minmax/logo.png new file mode 100644 index 00000000..19ed817f Binary files /dev/null and b/modules/tool/packages/minmax/logo.png differ diff --git a/modules/tool/packages/minmax/package.json b/modules/tool/packages/minmax/package.json new file mode 100644 index 00000000..16b92c27 --- /dev/null +++ b/modules/tool/packages/minmax/package.json @@ -0,0 +1,17 @@ +{ + "name": "@fastgpt-plugins/tool-minmax", + "module": "index.ts", + "type": "module", + "scripts": { + "build": "bun ../../../../scripts/build.ts" + }, + "devDependencies": { + "@types/bun": "latest" + }, + "peerDependencies": { + "typescript": "^5.0.0" + }, + "dependencies": { + "zod": "^3.24.2" + } +}