labring · c121914yu · Oct 11, 2025 · Oct 10, 2025 · Oct 11, 2025 · Oct 11, 2025
diff --git a/modules/tool/packages/minmaxTTS/config.ts b/modules/tool/packages/minmaxTTS/config.ts
@@ -0,0 +1,151 @@
+import { defineTool } from '@tool/type';
+import { FlowNodeInputTypeEnum, WorkflowIOValueTypeEnum } from '@tool/type/fastgpt';
+import { ToolTypeEnum } from '@tool/type/tool';
+
+export default defineTool({
+  name: {
+    'zh-CN': 'minmax 文本转语音',
+    en: 'minmax Text-to-Speech'
+  },
+  courseUrl: 'https://platform.minimaxi.com/document/quick_start',
+  type: ToolTypeEnum.tools,
+  description: {
+    'zh-CN': '使用MinMax平台将文本转换为高质量语音',
+    en: 'Convert text to high-quality speech using MinMax platform'
+  },
+  toolDescription:
+    'Convert text to speech using MinMax TTS API. Supports multiple voice settings and audio formats.',
+  secretInputConfig: [
+    {
+      key: 'apiKey',
+      label: 'API Key',
+      description: '可以在 minmax 官网获取',
+      required: true,
+      inputType: 'secret'
+    }
+  ],
+  versionList: [
+    {
+      value: '0.1.0',
+      description: 'Default version',
+      inputs: [
+        {
+          key: 'text',
+          label: '文本内容',
+          renderTypeList: [FlowNodeInputTypeEnum.input, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.string,
+          required: true
+        },
+        {
+          key: 'model',
+          label: '模型',
+          renderTypeList: [FlowNodeInputTypeEnum.select, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.string,
+          required: true,
+          defaultValue: 'speech-2.5-hd-preview',
+          list: [
+            { label: 'speech-2.5-hd-preview', value: 'speech-2.5-hd-preview' },
+            { label: 'speech-2.5-turbo-preview', value: 'speech-2.5-turbo-preview' },
+            { label: 'speech-02-hd', value: 'speech-02-hd' },
+            { label: 'speech-02-turbo', value: 'speech-02-turbo' },
+            { label: 'speech-01-hd', value: 'speech-01-hd' },
+            { label: 'speech-01-turbo', value: 'speech-01-turbo' }
+          ]
+        },
+        {
+          key: 'voice_id',
+          label: '音色',
+          renderTypeList: [FlowNodeInputTypeEnum.select, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.string,
+          required: true,
+          defaultValue: 'male-qn-qingse',
+          list: [
+            {
+              label: '青涩青年音色',
+              value: 'male-qn-qingse'
+            },
+            {
+              label: '精英青年音色',
+              value: 'male-qn-jingying'
+            },
+            {
+              label: '少女音色',
+              value: 'female-shaonv'
+            },
+            {
+              label: '成熟女性音色',
+              value: 'female-chengshu'
+            }
+          ]
+        },
+        {
+          key: 'speed',
+          label: '语速',
+          renderTypeList: [FlowNodeInputTypeEnum.numberInput, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.number,
+          required: false,
+          min: 0.5,
+          max: 2,
+          step: 0.1,
+          defaultValue: 1
+        },
+        {
+          key: 'vol',
+          label: '音量',
+          renderTypeList: [FlowNodeInputTypeEnum.numberInput, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.number,
+          required: false,
+          min: 0.1,
+          max: 10,
+          step: 0.1,
+          defaultValue: 1
+        },
+        {
+          key: 'pitch',
+          label: '语调',
+          renderTypeList: [FlowNodeInputTypeEnum.numberInput, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.number,
+          required: false,
+          min: -12,
+          max: 12,
+          step: 1,
+          defaultValue: 0
+        },
+        {
+          key: 'emotion',
+          label: '情绪',
+          renderTypeList: [FlowNodeInputTypeEnum.select, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.string,
+          required: false,
+          defaultValue: 'auto',
+          list: [
+            { label: '自动(推荐)', value: 'auto' },
+            { label: '高兴 happy', value: 'happy' },
+            { label: '悲伤 sad', value: 'sad' },
+            { label: '愤怒 angry', value: 'angry' },
+            { label: '害怕 fearful', value: 'fearful' },
+            { label: '厌恶 disgusted', value: 'disgusted' },
+            { label: '惊讶 surprised', value: 'surprised' },
+            { label: '中性 calm', value: 'calm' }
+          ]
+        },
+        {
+          key: 'english_normalization',
+          label: '英文规范化',
+          renderTypeList: [FlowNodeInputTypeEnum.switch],
+          valueType: WorkflowIOValueTypeEnum.boolean,
+          required: false,
+          defaultValue: false
+        }
+      ],
+      outputs: [
+        {
+          valueType: WorkflowIOValueTypeEnum.string,
+          key: 'audioUrl',
+          label: '音频链接',
+          description: '语音合成后的音频文件链接'
+        }
+      ]
+    }
+  ]
+});
diff --git a/modules/tool/packages/minmaxTTS/index.ts b/modules/tool/packages/minmaxTTS/index.ts
@@ -0,0 +1,10 @@
+import config from './config';
+import { InputType, OutputType, tool as toolCb } from './src';
+import { exportTool } from '@tool/utils/tool';
+
+export default exportTool({
+  toolCb,
+  InputType,
+  OutputType,
+  config
+});
diff --git a/modules/tool/packages/minmaxTTS/logo.png b/modules/tool/packages/minmaxTTS/logo.png
diff --git a/modules/tool/packages/minmaxTTS/package.json b/modules/tool/packages/minmaxTTS/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "@fastgpt-plugins/tool-minmax-tts",
+  "module": "index.ts",
+  "type": "module",
+  "scripts": {
+    "build": "bun ../../../../scripts/build.ts"
+  },
+  "devDependencies": {
+    "@types/bun": "latest"
+  },
+  "peerDependencies": {
+    "typescript": "^5.0.0"
+  },
+  "dependencies": {
+    "zod": "^3.24.2"
+  }
+}
diff --git a/modules/tool/packages/minmaxTTS/src/index.ts b/modules/tool/packages/minmaxTTS/src/index.ts
@@ -0,0 +1,117 @@
+import { z } from 'zod';
+import { POST, GET } from '@tool/utils/request';
+import { uploadFile } from '@tool/utils/uploadFile';
+
+export const InputType = z.object({
+  apiKey: z.string(),
+  text: z.string().nonempty(),
+  model: z.string().nonempty(),
+  voice_setting: z.object({
+    voice_id: z.string(),
+    speed: z.number(),
+    vol: z.number(),
+    pitch: z.number(),
+    emotion: z.string(),
+    en_normalization: z.boolean()
+  })
+});
+
+export const OutputType = z.object({
+  audioUrl: z.string()
+});
+
+const MINIMAX_BASE_URL = 'https://api.minimaxi.com/v1';
+
+export async function tool({
+  apiKey,
+  text,
+  model,
+  voice_setting
+}: z.infer<typeof InputType>): Promise<z.infer<typeof OutputType>> {
+  const { voice_id, speed, vol, pitch, emotion, en_normalization } = voice_setting;
+  const headers = {
+    Authorization: `Bearer ${apiKey}`,
+    'Content-Type': 'application/json'
+  };
+  // these params are advanced settings, now not allow user to customize
+  const defaultSetting = {
+    pronunciation_dict: {
+      tone: []
+    },
+    audio_setting: {
+      audio_sample_rate: 32000,
+      bitrate: 128000,
+      format: 'mp3',
+      channel: 2
+    },
+    voice_modify: {
+      pitch: 0,
+      intensity: 0,
+      timbre: 0,
+      sound_effects: 'spacious_echo'
+    }
+  };
+
+  try {
+    // create tts task
+    const { data: taskData } = await POST(
+      `${MINIMAX_BASE_URL}/t2a_async_v2`,
+      {
+        model,
+        text,
+        language_boost: 'auto',
+        voice_setting: {
+          voice_id,
+          speed,
+          vol,
+          pitch,
+          emotion,
+          en_normalization
+        },
+        ...defaultSetting
+      },
+      {
+        headers
+      }
+    );
+
+    const task_id = taskData.task_id;
+    // polling task status until success or failed
+    // file can be downloaded when task status is success
+    const pollTaskStatus = async () => {
+      const maxRetries = 180;
+      for (let i = 0; i < maxRetries; i++) {
+        const { data: statusData } = await GET(`${MINIMAX_BASE_URL}/query/t2a_async_query_v2`, {
+          params: { task_id },
+          headers
+        });
+        const status = statusData.status;
+        if (status === 'Success') {
+          return statusData.file_id;
+        }
+        if (status === 'Failed') {
+          throw new Error('TTS task failed');
+        }
+        await new Promise((r) => setTimeout(r, 1000));
+      }
+      throw new Error('TTS task timeout');
+    };
+    const file_id = await pollTaskStatus();
+
+    // retrieve file content
+    const { data: fileBuffer } = await GET(`${MINIMAX_BASE_URL}/files/retrieve_content`, {
+      params: { file_id },
+      headers,
+      responseType: 'arrayBuffer'
+    });
+
+    const { accessUrl: audioUrl } = await uploadFile({
+      buffer: Buffer.from(fileBuffer),
+      defaultFilename: 'minimax_tts.mp3'
+    });
+
+    return { audioUrl };
+  } catch (error) {
+    throw new Error(`TTS failed: ${error}`);
+  }
+}
diff --git a/modules/tool/packages/minmaxTTS/test/index.test.ts b/modules/tool/packages/minmaxTTS/test/index.test.ts
@@ -0,0 +1,8 @@
+import { expect, test } from 'vitest';
+import tool from '..';
+
+test(async () => {
+  expect(tool.name).toBeDefined();
+  expect(tool.description).toBeDefined();
+  expect(tool.cb).toBeDefined();
+});