@@ -1098,6 +1098,110 @@ def gemini_tts_worker(request_queue, response_queue, audio_api_key, voice_id):
10981098 text_buffer .append (tts_text )
10991099
11001100
1101+ def openai_tts_worker (request_queue , response_queue , audio_api_key , voice_id ):
1102+ """
1103+ OpenAI TTS worker(用于默认音色)
1104+ 使用 OpenAI 的 TTS API(gpt-4o-mini-tts)
1105+ 注意:OpenAI TTS 不支持流式输入,只支持流式输出
1106+ 因此需要累积文本后一次性发送,但可以流式接收音频
1107+
1108+ Args:
1109+ request_queue: 多进程请求队列,接收(speech_id, text)元组
1110+ response_queue: 多进程响应队列,发送音频数据(也用于发送就绪信号)
1111+ audio_api_key: API密钥
1112+ voice_id: 音色ID,默认使用"marin"(支持:marin, alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer)
1113+ """
1114+ import asyncio
1115+
1116+ try :
1117+ from openai import AsyncOpenAI
1118+ except ImportError :
1119+ logger .error ("❌ 无法导入 openai 库,OpenAI TTS 不可用" )
1120+ response_queue .put (("__ready__" , False ))
1121+ while True :
1122+ try :
1123+ sid , _ = request_queue .get ()
1124+ if sid is None :
1125+ continue
1126+ except Exception :
1127+ break
1128+ return
1129+
1130+ # 使用默认音色 "marin"
1131+ if not voice_id :
1132+ voice_id = "marin"
1133+
1134+ async def async_worker ():
1135+ """异步TTS worker主循环"""
1136+ current_speech_id = None
1137+ text_buffer = [] # 累积文本缓冲区
1138+
1139+ # 初始化 OpenAI 客户端
1140+ client = AsyncOpenAI (api_key = audio_api_key )
1141+
1142+ # OpenAI TTS 是基于 HTTP 的,无需建立持久连接,直接发送就绪信号
1143+ logger .info ("OpenAI TTS 已就绪,发送就绪信号" )
1144+ response_queue .put (("__ready__" , True ))
1145+
1146+ try :
1147+ loop = asyncio .get_running_loop ()
1148+
1149+ while True :
1150+ try :
1151+ sid , tts_text = await loop .run_in_executor (None , request_queue .get )
1152+ except Exception :
1153+ break
1154+
1155+ # 新的语音ID,清空缓冲区并重新开始
1156+ if current_speech_id != sid and sid is not None :
1157+ current_speech_id = sid
1158+ text_buffer = []
1159+
1160+ if sid is None :
1161+ # 收到终止信号,合成累积的文本
1162+ if text_buffer and current_speech_id is not None :
1163+ full_text = "" .join (text_buffer )
1164+ if full_text .strip ():
1165+ try :
1166+ # 使用 OpenAI TTS API 进行流式合成
1167+ # PCM 格式: 24000Hz, 16-bit, mono
1168+ async with client .audio .speech .with_streaming_response .create (
1169+ model = "gpt-4o-mini-tts" ,
1170+ voice = voice_id ,
1171+ input = full_text ,
1172+ response_format = "pcm" ,
1173+ ) as response :
1174+ # 流式接收音频数据
1175+ async for chunk in response .iter_bytes (chunk_size = 4096 ):
1176+ if chunk :
1177+ # OpenAI TTS 返回 PCM 16-bit @ 24000Hz
1178+ audio_array = np .frombuffer (chunk , dtype = np .int16 )
1179+ # 重采样到 48000Hz
1180+ resampled_bytes = _resample_audio (audio_array , 24000 , 48000 )
1181+ response_queue .put (resampled_bytes )
1182+
1183+ except Exception as e :
1184+ logger .error (f"OpenAI TTS 合成失败: { e } " )
1185+
1186+ # 清空缓冲区
1187+ text_buffer = []
1188+ current_speech_id = None
1189+ continue
1190+
1191+ # 累积文本到缓冲区(不立即发送)
1192+ if tts_text and tts_text .strip ():
1193+ text_buffer .append (tts_text )
1194+
1195+ except Exception as e :
1196+ logger .error (f"OpenAI TTS Worker错误: { e } " )
1197+
1198+ # 运行异步worker
1199+ try :
1200+ asyncio .run (async_worker ())
1201+ except Exception as e :
1202+ logger .error (f"OpenAI TTS Worker启动失败: { e } " )
1203+
1204+
11011205def dummy_tts_worker (request_queue , response_queue , audio_api_key , voice_id ):
11021206 """
11031207 空的TTS worker(用于不支持TTS的core_api)
@@ -1162,6 +1266,8 @@ def get_tts_worker(core_api_type='qwen', has_custom_voice=False):
11621266 return cogtts_tts_worker
11631267 elif core_api_type == 'gemini' :
11641268 return gemini_tts_worker
1269+ elif core_api_type == 'openai' :
1270+ return openai_tts_worker
11651271 else :
11661272 logger .error (f"{ core_api_type } 不支持原生TTS,请使用自定义语音" )
11671273 return dummy_tts_worker
0 commit comments