Skip to content

windows上无法运行cosyvoice #979

Open
@JV-X

Description

@JV-X

Describe the bug
我尝试在win11上运行cosyvoice,写了一些代码尝试进行tts,出现ModuleNotFoundError: No module named 'ttsfrd' 错误,我看文档,应该是可以不安装这个库的,文档说ttsfrd不可用的时候会使用WeTextProcessing,但我在代码里也没找到关于WeTextProcessing的代码,怎样才能在win11上运行cosyvoice?

To Reproduce
运行下列脚本

import re

import torch
import os
from http import HTTPStatus
from typing import List, Optional, Tuple, Dict
from uuid import uuid4
import sys
sys.path.insert(1, "../cosyvoice")
sys.path.insert(1, "../sensevoice")
sys.path.insert(1, "../cosyvoice/third_party/AcademiCodec")
sys.path.insert(1, "../cosyvoice/third_party/Matcha-TTS")
sys.path.insert(1, "../")
from cosyvoice.cli.cosyvoice import CosyVoice
from cosyvoice.utils.file_utils import load_wav
import librosa
import torchaudio

prompt_sr, target_sr = 16000, 22050
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-Instruct')
prompt_text = '希望你以后能够做的比我还好呦。' # 录制音频的文本格式
max_val = 0.8

def postprocess(speech, top_db=60, hop_length=220, win_length=440):
    speech, _ = librosa.effects.trim(
        speech, top_db=top_db,
        frame_length=win_length,
        hop_length=hop_length
    )
    if speech.abs().max() > max_val:
        speech = speech / speech.abs().max() * max_val
    speech = torch.concat([speech, torch.zeros(1, int(target_sr * 0.2))], dim=1)
    return speech



DOCTOR_TIMBRES_DICT = {
    "毛医生": postprocess(load_wav('../3.wav', prompt_sr)),
    "向医生": postprocess(load_wav('../2.wav', prompt_sr)),
}
tts_speeches = []

def text_to_speech_v2(text, doctor):
    pattern = r"生成风格:\s*([^;]+);播报内容:\s*(.+)"
    match = re.search(pattern, text)
    if match:
        style = match.group(1).strip()
        content = match.group(2).strip()
        tts_text = f"{style}<endofprompt>{content}"
        print(f"生成风格: {style}")
        print(f"播报内容: {content}")
    else:
        print("No match found")
        tts_text = text

    model_input = cosyvoice.frontend.frontend_zero_shot(tts_text, prompt_text, DOCTOR_TIMBRES_DICT[doctor]) 
    model_output = cosyvoice.model.inference(**model_input)
    tts_speeches.append(model_output['tts_speech'])
    output = {'tts_speech': torch.concat(tts_speeches, dim=1)}
    return target_sr, output['tts_speech']

file_name = f"2025213.wav"
print(f'file path is:{file_name}')
# 将文本写入文件
t = "啊啊啊我好烦啊,头又疼肚子又痒脑壳又痛,脑壳发昏脚又伤,谁来救救可怜的我啊,我请他喝肥宅快乐水啊啊啊啊啊,嘤嘤嘤"
sample_rate, speech_data = text_to_speech_v2(t, "向医生")
torchaudio.save(file_name, speech_data, sample_rate)

Expected behavior
正常生成语音

Screenshots

Image

Desktop (please complete the following information):

  • OS: Windows 11

Additional context
error log:

PS D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP>  d:; cd 'd:\repo\funaudiollm-app-2025214\FunAudioLLM-APP'; & 'c:\Users\xjv11\.conda\envs\funaudiollm_app\python.exe' 'c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\launcher' '51322' '--' 'D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP\cosyvoice\demo.py' 
c:\Users\xjv11\.conda\envs\funaudiollm_app\Lib\site-packages\onnxruntime\capi\onnxruntime_validation.py:26: UserWarning: Unsupported Windows version (11). ONNX Runtime supports Windows 10 and above, only.
  warnings.warn(
Traceback (most recent call last):
  File "c:\Users\xjv11\.conda\envs\funaudiollm_app\Lib\runpy.py", line 198, in _run_module_as_main
    return _run_code(code, main_globals, None,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\xjv11\.conda\envs\funaudiollm_app\Lib\runpy.py", line 88, in _run_code
    exec(code, run_globals)
  File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\launcher/../..\debugpy\__main__.py", line 71, in <module>
    cli.main()
  File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 501, in main
    run()
  File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 351, in run_file
    runpy.run_path(target, run_name="__main__")
  File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 310, in run_path
    return _run_module_code(code, init_globals, run_name, pkg_name=pkg_name, script_name=fname)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 127, in _run_module_code
    _run_code(code, mod_globals, init_globals, mod_name, mod_spec, pkg_name, script_name)
  File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 118, in _run_code
    exec(code, run_globals)
  File "D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP\cosyvoice\demo.py", line 14, in <module>
    from cosyvoice.cli.cosyvoice import CosyVoice
  File "D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP\cosyvoice\cosyvoice\cli\cosyvoice.py", line 18, in <module>
    from cosyvoice.cli.frontend import CosyVoiceFrontEnd
  File "D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP\cosyvoice\cosyvoice\cli\frontend.py", line 24, in <module>
    import ttsfrd
ModuleNotFoundError: No module named 'ttsfrd'

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions