Skip to content

Commit d4af83a

Browse files
committed
Popping sound issue resolved. Added hint for perma-free glm models.
1 parent 1f092f0 commit d4af83a

2 files changed

Lines changed: 16 additions & 5 deletions

File tree

config/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ def get_character_data():
9494
AUDIO_API_KEY = OPENROUTER_API_KEY = ASSIST_API_KEY_OPENAI
9595
elif core_cfg['assistApi'] == 'glm':
9696
OPENROUTER_URL = "https://open.bigmodel.cn/api/paas/v4"
97-
SUMMARY_MODEL = "glm-4-air-250414"
98-
CORRECTION_MODEL = "glm-z1-air"
97+
SUMMARY_MODEL = "glm-4-air-250414" # glm-4-flash-250414 <-永久免费模型
98+
CORRECTION_MODEL = "glm-z1-air" # glm-z1-flash <-永久免费模型
9999
AUDIO_API_KEY = OPENROUTER_API_KEY = ASSIST_API_KEY_GLM
100100
else:
101101
logger.error("💥 Unknown assistApi: " + core_cfg['assistApi'])

main_helper/core.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -753,15 +753,26 @@ def speech_synthesis_worker(request_queue, response_queue, audio_api_key, voice_
753753
class Callback(ResultCallback):
754754
def __init__(self, response_queue):
755755
self.response_queue = response_queue
756+
self.cache = np.zeros(0).astype(np.float32)
756757
def on_open(self): pass
757-
def on_complete(self): pass
758+
def on_complete(self):
759+
if len(self.cache)>0:
760+
data = (resample(self.cache, orig_sr=24000, target_sr=48000)*32768.).clip(-32768, 32767).astype(np.int16).tobytes()
761+
self.response_queue.put(data)
762+
self.cache = np.zeros(0).astype(np.float32)
758763
def on_error(self, message: str): print(f"TTS Error: {message}")
759764
def on_close(self): pass
760765
def on_event(self, message): pass
761766
def on_data(self, data: bytes) -> None:
762767
audio = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0
763-
data = (resample(audio, orig_sr=24000, target_sr=48000)*32767.).clip(-32767, 32766).astype(np.int16).tobytes()
764-
self.response_queue.put(data)
768+
self.cache = np.concatenate([self.cache, audio])
769+
if len(self.cache)>=8000:
770+
data = self.cache[:8000]
771+
data = (resample(data, orig_sr=24000, target_sr=48000)*32768.).clip(-32768, 32767).astype(np.int16).tobytes()
772+
self.response_queue.put(data)
773+
self.cache = self.cache[8000:]
774+
775+
765776
callback = Callback(response_queue)
766777
current_speech_id = None
767778
synthesizer = None

0 commit comments

Comments
 (0)