Open
Description
I followed the provided instructions.
I turned the demo_part3 file into a normal python file to test the code:
# Import necessary libraries
import os
import sys
import torch
from openvoice import se_extractor
from openvoice.api import ToneColorConverter
from melo.api import TTS
# Constants
ckpt_converter = 'checkpoints_v2/converter'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
output_dir = 'outputs_v2'
# Create output directory if it does not exist
os.makedirs(output_dir, exist_ok=True)
# Initialize Tone Color Converter
tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
# Extract tone color embedding for the target speaker
reference_speaker = 'resources/example_reference.mp3'
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad=False)
# Texts for various languages
texts = {
'EN_NEWEST': "Did you ever hear a folk tale about a giant turtle?",
'EN': "Did you ever hear a folk tale about a giant turtle?",
'ES': "El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante.",
'FR': "La lueur dorée du soleil caresse les vagues, peignant le ciel d'une palette éblouissante.",
'ZH': "在这次vacation中,我们计划去Paris欣赏埃菲尔铁塔和卢浮宫的美景。",
'JP': "彼は毎朝ジョギングをして体を健康に保っています。",
'KR': "안녕하세요! 오늘은 날씨가 정말 좋네요.",
}
# Output path for temporary audio file
src_path = f'{output_dir}/tmp.wav'
speed = 1.0 # Speed is adjustable
print("Processing TTS...")
# Process each language and text
for language, text in texts.items():
print(f"Processing {language}...")
model = TTS(language=language, device=device)
speaker_ids = model.hps.data.spk2id
for speaker_key in speaker_ids.keys():
speaker_id = speaker_ids[speaker_key]
speaker_key = speaker_key.lower().replace('_', '-')
# Load source speaker embedding
source_se = torch.load(f'checkpoints_v2/base_speakers/ses/{speaker_key}.pth', map_location=device)
# Generate speech and save to temporary file
model.tts_to_file(text, speaker_id, src_path, speed=speed)
save_path = f'{output_dir}/output_v2_{speaker_key}.wav'
# Convert tone color
encode_message = "@MyShell"
tone_color_converter.convert(
audio_src_path=src_path,
src_se=source_se,
tgt_se=target_se,
output_path=save_path,
message=encode_message)
# Print completion message
print("TTS processing complete. Check the outputs in:", output_dir)
When I run it I get:
(openvoice) ╭─arthur at aquarelle in ~/dev/ai/OpenVoice on main✘✘✘ 24-05-07 - 23:45:23
╰─(openvoice) ⠠⠵ python tts.py on main|…8
/home/arthur/.anaconda3/envs/openvoice/lib/python3.9/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
Loaded checkpoint 'checkpoints_v2/converter/checkpoint.pth'
missing/unexpected keys: [] []
OpenVoice version: v2
Could not load library libcudnn_cnn_infer.so.8. Error: /lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8: undefined symbol: _ZN5cudnn14cublasSaxpy_v2EP13cublasContextiPKfS3_iPfi, version libcudnn_ops_infer.so.8
Please make sure libcudnn_cnn_infer.so.8 is in your library path!
[1] 2253274 IOT instruction (core dumped) python tts.py
(openvoice) ╭─arthur at aquarelle in ~/dev/ai/OpenVoice on main✘✘✘ 24-05-07 - 23:45:35
╰─(openvoice) ⠠⠵
What am I doing wrong?
This is ubuntu 23.04, and when I ran into this error I did:
sudo apt install libcudnn9-static-cuda-12
sudo apt install libcudnn8 libcudnn8-dev
But it didn't help.
I have CUDA and everything installed, I run dozens of different CUDA/Pythorch/AI related projects on this machine including trying out most of the TTS stuff available on github.
Any help very welcome.
Thank you.
Metadata
Metadata
Assignees
Labels
No labels