|
29 | 29 |
|
30 | 30 | class F5TTSCreate: |
31 | 31 | voice_reg = re.compile(r"\{([^\}]+)\}") |
32 | | - model_types = ["F5", "F5-JP", "F5-FR", "E2"] |
| 32 | + model_types = ["F5", "F5-HI", "F5-JP", "F5-FR", "E2"] |
33 | 33 | vocoder_types = ["vocos", "bigvgan"] |
34 | 34 | tooltip_seed = "Seed. -1 = random" |
35 | 35 |
|
@@ -87,6 +87,7 @@ def load_voice(ref_audio, ref_text): |
87 | 87 | def get_model_funcs(self): |
88 | 88 | return { |
89 | 89 | "F5": self.load_f5_model, |
| 90 | + "F5-HI": self.load_f5_model_hi, |
90 | 91 | "F5-JP": self.load_f5_model_jp, |
91 | 92 | "F5-FR": self.load_f5_model_fr, |
92 | 93 | "E2": self.load_e2_model, |
@@ -170,13 +171,29 @@ def cached_path(self, url): |
170 | 171 | return None |
171 | 172 | return str(cached_path(url)) # noqa E501 |
172 | 173 |
|
173 | | - def load_f5_model_url(self, url, vocoder_name, vocab_url=None): |
174 | | - vocoder = self.load_vocoder(vocoder_name) |
175 | | - model_cls = DiT |
| 174 | + def load_f5_model_hi(self, vocoder): |
176 | 175 | model_cfg = dict( |
177 | | - dim=1024, depth=22, heads=16, |
| 176 | + dim=768, depth=18, heads=12, |
178 | 177 | ff_mult=2, text_dim=512, conv_layers=4 |
179 | 178 | ) |
| 179 | + return self.load_f5_model_url( |
| 180 | + "hf://SPRINGLab/F5-Hindi-24KHz/model_2500000.safetensors", |
| 181 | + "vocos", |
| 182 | + "hf://SPRINGLab/F5-Hindi-24KHz/vocab.txt", |
| 183 | + model_cfg=model_cfg, |
| 184 | + ) |
| 185 | + |
| 186 | + def load_f5_model_url( |
| 187 | + self, url, vocoder_name, vocab_url=None, model_cfg=None |
| 188 | + ): |
| 189 | + vocoder = self.load_vocoder(vocoder_name) |
| 190 | + model_cls = DiT |
| 191 | + if model_cfg is None: |
| 192 | + model_cfg = dict( |
| 193 | + dim=1024, depth=22, heads=16, |
| 194 | + ff_mult=2, text_dim=512, conv_layers=4 |
| 195 | + ) |
| 196 | + |
180 | 197 | ckpt_file = str(self.cached_path(url)) # noqa E501 |
181 | 198 |
|
182 | 199 | if vocab_url is None: |
|
0 commit comments