99from abc import ABC
1010from io import BytesIO
1111from pathlib import Path
12- from typing import Dict , Optional
12+ from typing import Any , Dict , Optional
1313from uuid import uuid4
1414
1515from app .core .config import settings
1616from app .log import logger
17+ from app .utils .http import RequestUtils
1718
1819
1920class AgentCapabilityProvider (ABC ):
@@ -411,6 +412,160 @@ def _normalize_tts_model(self) -> str:
411412 return model
412413
413414
415+ class MiniMaxAudioProvider (OpenAIChatAudioProvider ):
416+ """MiniMax 音频 provider,语音合成使用官方 T2A HTTP 接口。"""
417+
418+ name = "minimax"
419+ DISPLAY_NAME = "MiniMax"
420+ DEFAULT_BASE_URL = "https://api.minimaxi.com/v1"
421+ DEFAULT_STT_MODEL = "MiniMax-M2.7"
422+ DEFAULT_TTS_MODEL = "speech-2.8-turbo"
423+ DEFAULT_VOICE = "Chinese (Mandarin)_Lyrical_Voice"
424+ AUDIO_INPUT_DATA_URL = True
425+ SUPPORTED_TTS_MODELS = frozenset (
426+ {
427+ "speech-2.8-hd" ,
428+ "speech-2.8-turbo" ,
429+ "speech-2.6-hd" ,
430+ "speech-2.6-turbo" ,
431+ "speech-02-hd" ,
432+ "speech-02-turbo" ,
433+ "speech-01-hd" ,
434+ "speech-01-turbo" ,
435+ }
436+ )
437+
438+ def _build_client (self , api_key : str , base_url : Optional [str ]):
439+ """构建 MiniMax OpenAI 兼容客户端,兼容用户误填 Anthropic 端点的情况。"""
440+ from openai import OpenAI
441+
442+ return OpenAI (
443+ api_key = api_key ,
444+ base_url = self ._normalize_api_base_url (base_url ),
445+ max_retries = 3 ,
446+ )
447+
448+ @classmethod
449+ def _normalize_api_base_url (cls , base_url : Optional [str ]) -> str :
450+ """归一化 MiniMax API 基础 URL,确保后续可以拼接 OpenAI/T2A 路径。"""
451+ normalized = (base_url or cls .DEFAULT_BASE_URL ).strip ().rstrip ("/" )
452+ if normalized .endswith ("/t2a_v2" ):
453+ normalized = normalized [: - len ("/t2a_v2" )]
454+ for suffix in ("/anthropic/v1" , "/openai/v1" ):
455+ if normalized .endswith (suffix ):
456+ return normalized [: - len (suffix )] + "/v1"
457+ if not normalized .endswith ("/v1" ):
458+ normalized = f"{ normalized } /v1"
459+ return normalized
460+
461+ @classmethod
462+ def _build_t2a_url (cls , base_url : Optional [str ]) -> str :
463+ """生成 MiniMax 同步 T2A 接口地址。"""
464+ return f"{ cls ._normalize_api_base_url (base_url )} /t2a_v2"
465+
466+ def _normalize_stt_model (self ) -> str :
467+ """将非 MiniMax 的默认转写模型名兜底为 MiniMax 对话模型。"""
468+ model = (settings .AUDIO_INPUT_MODEL or "" ).strip ()
469+ if not model or model .lower ().startswith (("gpt-" , "mimo-" )):
470+ return self .DEFAULT_STT_MODEL
471+ return model
472+
473+ def _normalize_tts_model (self ) -> str :
474+ """将非 MiniMax 语音模型兜底为官方 T2A 模型。"""
475+ model = (settings .AUDIO_OUTPUT_MODEL or "" ).strip ().lower ()
476+ if model in self .SUPPORTED_TTS_MODELS :
477+ return model
478+ return self .DEFAULT_TTS_MODEL
479+
480+ def _normalize_voice_id (self ) -> str :
481+ """将其他 provider 的默认音色兜底为 MiniMax 中文系统音色。"""
482+ voice_id = (settings .AUDIO_OUTPUT_VOICE or "" ).strip ()
483+ if not voice_id or voice_id in {"alloy" , "mimo_default" }:
484+ return self .DEFAULT_VOICE
485+ return voice_id
486+
487+ @staticmethod
488+ def _decode_audio_payload (audio_data : str ) -> bytes :
489+ """解析 MiniMax T2A 返回的音频数据,优先按官方 hex 格式处理。"""
490+ normalized = "" .join ((audio_data or "" ).split ())
491+ try :
492+ return bytes .fromhex (normalized )
493+ except ValueError :
494+ return base64 .b64decode (audio_data )
495+
496+ @staticmethod
497+ def _extract_minimax_error (data : dict [str , Any ]) -> Optional [str ]:
498+ """提取 MiniMax base_resp 错误信息,成功响应返回 None。"""
499+ base_resp = data .get ("base_resp" ) or {}
500+ status_code = base_resp .get ("status_code" )
501+ if status_code in (None , 0 , "0" ):
502+ return None
503+ status_msg = base_resp .get ("status_msg" ) or "unknown error"
504+ return f"{ status_code } : { status_msg } "
505+
506+ def synthesize_speech (self , text : str ) -> Optional [Path ]:
507+ """调用 MiniMax T2A HTTP 接口合成语音文件。"""
508+ if not text :
509+ return None
510+
511+ try :
512+ api_key , base_url = self ._output_credentials ()
513+ if not api_key :
514+ raise ValueError ("音频输出 provider 未配置 API Key" )
515+ response = RequestUtils (
516+ headers = {
517+ "Authorization" : f"Bearer { api_key } " ,
518+ "Content-Type" : "application/json" ,
519+ "Accept" : "application/json" ,
520+ },
521+ proxies = settings .PROXY or {},
522+ timeout = 60 ,
523+ ).post_res (
524+ url = self ._build_t2a_url (base_url ),
525+ json = {
526+ "model" : self ._normalize_tts_model (),
527+ "text" : text ,
528+ "stream" : False ,
529+ "language_boost" : "auto" ,
530+ "output_format" : "hex" ,
531+ "voice_setting" : {
532+ "voice_id" : self ._normalize_voice_id (),
533+ "speed" : 1 ,
534+ "vol" : 1 ,
535+ "pitch" : 0 ,
536+ },
537+ "audio_setting" : {
538+ "sample_rate" : 32000 ,
539+ "bitrate" : 128000 ,
540+ "format" : "opus" ,
541+ "channel" : 1 ,
542+ },
543+ },
544+ )
545+ if not response :
546+ raise ValueError ("MiniMax T2A 请求无响应" )
547+ if response .status_code >= 400 :
548+ raise ValueError (f"MiniMax T2A HTTP { response .status_code } " )
549+
550+ result = response .json ()
551+ minimax_error = self ._extract_minimax_error (result )
552+ if minimax_error :
553+ raise ValueError (f"MiniMax T2A 返回错误: { minimax_error } " )
554+
555+ audio_data = ((result .get ("data" ) or {}).get ("audio" ) or "" ).strip ()
556+ if not audio_data :
557+ raise ValueError ("MiniMax T2A 响应中没有音频数据" )
558+
559+ voice_dir = settings .TEMP_PATH / "voice"
560+ voice_dir .mkdir (parents = True , exist_ok = True )
561+ output_path = voice_dir / f"{ uuid4 ().hex } .opus"
562+ output_path .write_bytes (self ._decode_audio_payload (audio_data ))
563+ return output_path
564+ except Exception as err :
565+ logger .error (f"音频输出合成失败: provider={ self .name } , error={ err } " )
566+ return None
567+
568+
414569class AgentCapabilityManager :
415570 """Agent 能力统一入口。"""
416571
@@ -420,6 +575,7 @@ class AgentCapabilityManager:
420575 OpenAIAudioProvider .name : OpenAIAudioProvider (),
421576 OpenAIChatAudioProvider .name : OpenAIChatAudioProvider (),
422577 MiMoAudioProvider .name : MiMoAudioProvider (),
578+ MiniMaxAudioProvider .name : MiniMaxAudioProvider (),
423579 }
424580
425581 @classmethod
0 commit comments