|
58 | 58 | from sdialog.audio.utils import logger |
59 | 59 | from sdialog.audio.dialog import AudioDialog |
60 | 60 | from sdialog.audio.processing import AudioProcessor |
61 | | -from sdialog.audio.tts import BaseTTS, Qwen3TTS |
62 | 61 | from sdialog.audio.jsalt import MedicalRoomGenerator, RoomRole |
63 | 62 | from sdialog.audio.room import Room, RoomPosition, DirectivityType |
| 63 | +from sdialog.audio.tts import BaseTTS, Qwen3TTS, Qwen3TTSVoiceClone |
64 | 64 | from sdialog.audio.voice_database import Voice, BaseVoiceDatabase, HuggingfaceVoiceDatabase |
65 | 65 | from sdialog.audio import generate_utterances_audios, generate_audio_room_accoustic |
66 | 66 | from sdialog.audio.impulse_response_database import ImpulseResponseDatabase, RecordingDevice |
@@ -359,13 +359,12 @@ def __init__( |
359 | 359 |
|
360 | 360 | self.tts_engine = tts_engine |
361 | 361 | if self.tts_engine is None: |
362 | | - logger.warning("No TTS provided, using Qwen3-TTS as the default TTS model: Qwen/Qwen3-TTS-12Hz-1.7B-Base") |
363 | | - self.tts_engine = Qwen3TTS() |
| 362 | + logger.warning("No TTS provided, using voice cloning Qwen3-TTS as the default TTS model (Qwen3-TTS-12Hz-1.7B-Base)") |
| 363 | + self.tts_engine = Qwen3TTSVoiceClone() |
364 | 364 |
|
365 | 365 | self.voice_database = voice_database |
366 | 366 | if self.voice_database is None and isinstance(self.tts_engine, BaseTTS): |
367 | | - logger.warning("No voice database provided, make sure the TTS engine supports voice design or voice " |
368 | | - "cloning if you want to use the voice assignment features of the audio pipeline.") |
| 367 | + logger.warning("No voice database provided, using default voice database for the TTS engine.") |
369 | 368 | # TODO: default voice databased SHOULD be part of the TTS engine! |
370 | 369 | # since each engine supports a predefined voice database we should get the defalt as: |
371 | 370 | # self.voice_database = self.tts_engine.voice_database |
|
0 commit comments