FunAudioLLM · KrugerCoder · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/README.md b/README.md
@@ -146,7 +146,7 @@ Please see the demo website for details.
 
 ``` python
 # change iic/CosyVoice-300M-SFT for sft inference, or iic/CosyVoice-300M-Instruct for instruct inference
-python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
+python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M --locale zh
 ```
 
 **Advanced Usage**

diff --git a/locales/en.json b/locales/en.json
@@ -0,0 +1,60 @@
+{
+    "en" : {
+        "inference_mode_list": {
+            "pretrained_voice": "PretrainedVoice", 
+            "3s_fast_replication": "3s Fast Replication", 
+            "crosslingual": "Crosslingual Replication", 
+            "natural_language_control": "Natural Language Control"
+        },
+        "instruct_dict": {
+            "pretrained_voice": "1. Select pretrained voice\n2. Click the generate audio button", 
+            "3s_fast_replication": "1. Select prompt audio file or record prompt audio (not exceeding 30s). If both are provided, prompt audio file will be prioritized.\n2. Enter prompt text\n3. Click the generate audio button", 
+            "crosslingual": "1. Select prompt audio file or record prompt audio (not exceeding 30s). If both are provided, prompt audio file will be prioritized.\n2. Click the generate audio button", 
+            "natural_language_control": "1. Select pretrained voice\n2. Enter instruct text\n3. Click the generate audio button"
+        },
+        "boolean": {
+            "true": "Yes",
+            "false": "No"
+        },
+        "warnings": {
+            "nlp_model_warn": "You are using the natural language control mode, {} model does not support this mode, please use the iic/CosyVoice-300M-Instruct model",
+            "instruct_text": "You are using the natural language control mode, please enter instruct text",
+            "no_crosslingual_support": "You are using the cross-lingual replication mode, {} model does not support this mode, please use the iic/CosyVoice-300M model",
+            "crosslingual_instruct_ignored": "You are using the cross-lingual replication mode, instruct text will be ignored",
+            "crosslingual_prompt_audio_required": "You are using the cross-lingual replication mode, please provide prompt audio",
+            "prompt_audio_empty": "Prompt audio is empty, did you forget to input prompt audio?",
+            "sample_rate_error": "Prompt audio sample rate {} is lower than {}",
+            "prompt_text_empty": "Prompt text is empty, did you forget to input prompt text?",
+            "instruct_text_empty": "You are using the 3s fast replication mode, pretrained voice/instruct text will be ignored!"
+        },
+        "info": {
+            "prompt_wav": "You are using the natural language control mode, prompt audio/prompt text will be ignored",
+            "crosslingual_language_reminder": "You are using the cross-lingual replication mode, please ensure that the synthesis text and prompt text are in different languages",
+            "pretrained_voice_warning": "You are using the pretrained voice mode, prompt text/prompt audio/instruct text will be ignored!"
+        },
+        "markdown": {
+            "code_reference": "### Code repository [CosyVoice](https://github.com/FunAudioLLM/CosyVoice) Pretrained models [CosyVoice-300M](https://www.modelscope.cn/models/iic/CosyVoice-300M) [CosyVoice-300M-Instruct](https://www.modelscope.cn/models/iic/CosyVoice-300M-Instruct) [CosyVoice-300M-SFT](https://www.modelscope.cn/models/iic/CosyVoice-300M-SFT)",
+            "output_text_prompt": "#### Please enter the text to be synthesized, select the inference mode, and follow the prompts to proceed"
+        },
+        "input_label": {
+            "enter_synthesis_text" : "Enter synthesis text",
+            "speed_adjustment": "Speed adjustment",
+            "select_inference_mode_radio": "Select inference mode",
+            "instruction_text": "Instructions",
+            "sft_dropdown": "Select pre-trained voice",
+            "stream": "Enable streaming",
+            "seed_number": "Random seed for inference",
+            "prompt_wav_upload": "Select prompt audio file, with a minimum sample rate of 16kHz",
+            "prompt_wav_record": "Record prompt audio file",
+            "prompt_text": "Enter prompt text",
+            "instruct_text": "Enter instruct text",
+            "generate_button": "Generate Audio",
+            "audio_output": "Synthesized Audio"
+        },
+        "placeholders": {
+            "enter_synthesis_text": "I am a state-of-the-art generative speech model developed by the CosyVoice team, providing comfortable and natural speech synthesis capabilities.",
+            "prompt_text": "Please enter prompt text, it should match the content of the prompt audio and automatic recognition is not supported at the moment...",
+            "instruct_text": "Please enter instruct text."
+        }
+    }
+}
diff --git a/locales/zh.json b/locales/zh.json
@@ -0,0 +1,61 @@
+{
+    "zh": {
+        "inference_mode_list": {
+            "pretrained_voice": "预训练音色", 
+            "3s_fast_replication": "3s极速复刻", 
+            "crosslingual": "跨语种复刻", 
+            "natural_language_control": "自然语言控制"
+        },
+        "instruct_dict": {
+            "pretrained_voice": "1. 选择预训练音色\n2. 点击生成音频按钮", 
+            "3s_fast_replication": "1. 选择prompt音频文件，或录入prompt音频，注意不超过30s，若同时提供，优先选择prompt音频文件\n2. 输入prompt文本\n3. 点击生成音频按钮", 
+            "crosslingual": "1. 选择prompt音频文件，或录入prompt音频，注意不超过30s，若同时提供，优先选择prompt音频文件\n2. 点击生成音频按钮", 
+            "natural_language_control": "1. 选择预训练音色\n2. 输入instruct文本\n3. 点击生成音频按钮"
+        },
+        "boolean": {
+            "true": "是",
+            "false": "否"
+        },
+        "warnings": {
+            "nlp_model_warn": "您正在使用自然语言控制模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M-Instruct模型",
+            "instruct_text": "您正在使用自然语言控制模式, 请输入instruct文本",
+            "no_crosslingual_support": "您正在使用跨语种复刻模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M模型",
+            "crosslingual_instruct_ignored": "您正在使用跨语种复刻模式, instruct文本会被忽略",
+            "crosslingual_prompt_audio_required": "您正在使用跨语种复刻模式, 请提供prompt音频",
+            "prompt_audio_empty": "prompt音频为空，您是否忘记输入prompt音频？",
+            "sample_rate_error": "prompt音频采样率{}低于{}",
+            "prompt_text_empty": "prompt文本为空，您是否忘记输入prompt文本？",
+            "instruct_text_empty": "您正在使用3s极速复刻模式，预训练音色/instruct文本会被忽略！"
+        },
+        "info": {
+            "prompt_wav": "您正在使用自然语言控制模式, prompt音频/prompt文本会被忽略",
+            "crosslingual_language_reminder": "您正在使用跨语种复刻模式, 请确保合成文本和prompt文本为不同语言",
+            "pretrained_voice_warning": "您正在使用预训练音色模式，prompt文本/prompt音频/instruct文本会被忽略！"
+        },
+        "markdown": {
+            "code_reference": "Welcome",
+            "output_text_prompt": "#### 请输入需要合成的文本，选择推理模式，并按照提示步骤进行操作"
+        },
+        "input_label": {
+            "enter_synthesis_text" : "输入合成文本",
+            "speed_adjustment": "语速调节",
+            "select_inference_mode_radio": "选择推理模式",
+            "instruction_text": "操作步骤",
+            "sft_dropdown": "选择预训练音色",
+            "stream": "是否流式推理",
+            "seed_number": "随机推理种子",
+            "prompt_wav_upload": "选择prompt音频文件，注意采样率不低于16khz",
+            "prompt_wav_record": "录制prompt音频文件",
+            "prompt_text": "输入prompt文本",
+            "instruct_text": "输入instruct文本",
+            "generate_button": "生成音频",
+            "audio_output": "合成音频"
+        },
+        "placeholders": {
+            "enter_synthesis_text": "我是通义实验室语音团队全新推出的生成式语音大模型，提供舒适自然的语音合成能力。",
+            "prompt_text": "请输入prompt文本，需与prompt音频内容一致，暂时不支持自动识别...",
+            "instruct_text": "请输入instruct文本."
+        }
+    }
+
+}
diff --git a/requirements.txt b/requirements.txt
@@ -31,3 +31,4 @@ wget==3.2
 fastapi==0.111.0
 fastapi-cli==0.0.4
 WeTextProcessing==1.0.3
+python-i18n==0.3.9