Skip to content

Commit 1da9ea8

Browse files
committed
feat: 添加 OpenAI TTS worker 支持,允许使用 OpenAI 的 TTS API 进行音频合成
- 实现了 openai_tts_worker 函数,支持异步处理文本到语音的转换。 - 更新 get_tts_worker 函数以支持 OpenAI TTS 作为可选的 TTS 处理器。 - 在 settings.py 中移除不必要的前端渲染字段,以优化配置管理。 - 在 CSS 中添加 tooltip 的可见性控制,改善用户界面体验。
1 parent 90ad2f5 commit 1da9ea8

3 files changed

Lines changed: 113 additions & 1 deletion

File tree

main_logic/tts_client.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,6 +1098,110 @@ def gemini_tts_worker(request_queue, response_queue, audio_api_key, voice_id):
10981098
text_buffer.append(tts_text)
10991099

11001100

1101+
def openai_tts_worker(request_queue, response_queue, audio_api_key, voice_id):
1102+
"""
1103+
OpenAI TTS worker(用于默认音色)
1104+
使用 OpenAI 的 TTS API(gpt-4o-mini-tts)
1105+
注意:OpenAI TTS 不支持流式输入,只支持流式输出
1106+
因此需要累积文本后一次性发送,但可以流式接收音频
1107+
1108+
Args:
1109+
request_queue: 多进程请求队列,接收(speech_id, text)元组
1110+
response_queue: 多进程响应队列,发送音频数据(也用于发送就绪信号)
1111+
audio_api_key: API密钥
1112+
voice_id: 音色ID,默认使用"marin"(支持:marin, alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer)
1113+
"""
1114+
import asyncio
1115+
1116+
try:
1117+
from openai import AsyncOpenAI
1118+
except ImportError:
1119+
logger.error("❌ 无法导入 openai 库,OpenAI TTS 不可用")
1120+
response_queue.put(("__ready__", False))
1121+
while True:
1122+
try:
1123+
sid, _ = request_queue.get()
1124+
if sid is None:
1125+
continue
1126+
except Exception:
1127+
break
1128+
return
1129+
1130+
# 使用默认音色 "marin"
1131+
if not voice_id:
1132+
voice_id = "marin"
1133+
1134+
async def async_worker():
1135+
"""异步TTS worker主循环"""
1136+
current_speech_id = None
1137+
text_buffer = [] # 累积文本缓冲区
1138+
1139+
# 初始化 OpenAI 客户端
1140+
client = AsyncOpenAI(api_key=audio_api_key)
1141+
1142+
# OpenAI TTS 是基于 HTTP 的,无需建立持久连接,直接发送就绪信号
1143+
logger.info("OpenAI TTS 已就绪,发送就绪信号")
1144+
response_queue.put(("__ready__", True))
1145+
1146+
try:
1147+
loop = asyncio.get_running_loop()
1148+
1149+
while True:
1150+
try:
1151+
sid, tts_text = await loop.run_in_executor(None, request_queue.get)
1152+
except Exception:
1153+
break
1154+
1155+
# 新的语音ID,清空缓冲区并重新开始
1156+
if current_speech_id != sid and sid is not None:
1157+
current_speech_id = sid
1158+
text_buffer = []
1159+
1160+
if sid is None:
1161+
# 收到终止信号,合成累积的文本
1162+
if text_buffer and current_speech_id is not None:
1163+
full_text = "".join(text_buffer)
1164+
if full_text.strip():
1165+
try:
1166+
# 使用 OpenAI TTS API 进行流式合成
1167+
# PCM 格式: 24000Hz, 16-bit, mono
1168+
async with client.audio.speech.with_streaming_response.create(
1169+
model="gpt-4o-mini-tts",
1170+
voice=voice_id,
1171+
input=full_text,
1172+
response_format="pcm",
1173+
) as response:
1174+
# 流式接收音频数据
1175+
async for chunk in response.iter_bytes(chunk_size=4096):
1176+
if chunk:
1177+
# OpenAI TTS 返回 PCM 16-bit @ 24000Hz
1178+
audio_array = np.frombuffer(chunk, dtype=np.int16)
1179+
# 重采样到 48000Hz
1180+
resampled_bytes = _resample_audio(audio_array, 24000, 48000)
1181+
response_queue.put(resampled_bytes)
1182+
1183+
except Exception as e:
1184+
logger.error(f"OpenAI TTS 合成失败: {e}")
1185+
1186+
# 清空缓冲区
1187+
text_buffer = []
1188+
current_speech_id = None
1189+
continue
1190+
1191+
# 累积文本到缓冲区(不立即发送)
1192+
if tts_text and tts_text.strip():
1193+
text_buffer.append(tts_text)
1194+
1195+
except Exception as e:
1196+
logger.error(f"OpenAI TTS Worker错误: {e}")
1197+
1198+
# 运行异步worker
1199+
try:
1200+
asyncio.run(async_worker())
1201+
except Exception as e:
1202+
logger.error(f"OpenAI TTS Worker启动失败: {e}")
1203+
1204+
11011205
def dummy_tts_worker(request_queue, response_queue, audio_api_key, voice_id):
11021206
"""
11031207
空的TTS worker(用于不支持TTS的core_api)
@@ -1162,6 +1266,8 @@ def get_tts_worker(core_api_type='qwen', has_custom_voice=False):
11621266
return cogtts_tts_worker
11631267
elif core_api_type == 'gemini':
11641268
return gemini_tts_worker
1269+
elif core_api_type == 'openai':
1270+
return openai_tts_worker
11651271
else:
11661272
logger.error(f"{core_api_type}不支持原生TTS,请使用自定义语音")
11671273
return dummy_tts_worker

memory/settings.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ def load_settings(self):
3737
self.lanlan_basic_config[i].pop('system_prompt', None)
3838
self.lanlan_basic_config[i].pop('live2d', None)
3939
self.lanlan_basic_config[i].pop('voice_id', None)
40+
# 前端渲染字段 - 仅用于模型显示,不应该出现在 prompt 中
41+
self.lanlan_basic_config[i].pop('model_type', None)
42+
self.lanlan_basic_config[i].pop('vrm', None)
43+
self.lanlan_basic_config[i].pop('vrm_animation', None)
44+
self.lanlan_basic_config[i].pop('lighting', None)
4045
# 工坊保留字段 - 由工坊系统管理,不应该被记忆系统读取
4146
for workshop_field in ['原始数据', '文件路径', '创意工坊物品ID',
4247
'description', 'tags', 'name',

static/css/index.css

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,12 +466,13 @@ box-shadow: 0 4px 12px rgba(68, 183, 254, 0.4);
466466
z-index: 22;
467467
pointer-events: none;
468468
animation: slideInBounce 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55);
469-
transition: opacity 0.3s ease, transform 0.3s ease;
469+
transition: opacity 0.3s ease, transform 0.3s ease, visibility 0s linear 0.3s;
470470
}
471471

472472
#chat-tooltip.hidden {
473473
opacity: 0;
474474
transform: translateX(-20px);
475+
visibility: hidden;
475476
pointer-events: none;
476477
}
477478

0 commit comments

Comments
 (0)