This repository was archived by the owner on Jan 17, 2024. It is now read-only.
forked from ming024/FastSpeech2
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtts_server.py
More file actions
97 lines (84 loc) · 3.46 KB
/
tts_server.py
File metadata and controls
97 lines (84 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""此文件以ChatGLM为例,展示了如何将一个AI模型包装为API,并允许远程调用"""
from APIWrapper import APIWrapper
from flask import request,send_file
import re
import argparse
from string import punctuation
from types import SimpleNamespace
from synthesize import preprocess_mandarin,synthesize
import torch
import yaml
import numpy as np
from torch.utils.data import DataLoader
from g2p_en import G2p
from pypinyin import pinyin, Style
from utils.model import get_model, get_vocoder
from utils.tools import to_device, synth_samples
from dataset import TextDataset
from text import text_to_sequence
if __name__ == "__main__":
api_app = APIWrapper() # 创建一个api_app对象
args = {
'restore_step': 600000,
'mode': 'single',
'source': None,
'text': None,
'speaker_id': 44,
'preprocess_config': 'config/AISHELL3/preprocess.yaml',
'model_config': 'config/AISHELL3/model.yaml',
'train_config': 'config/AISHELL3/train.yaml',
'pitch_control': 1.0,
'energy_control': 1.0,
'duration_control': 1.0
}
device = "cpu"
@api_app.addRoute('/tts', methods=['POST']) # 定义一个路由,用于处理单次聊天(不带历史记录)
def tts():
"""
将文本转换为音频文件,并发送
"""
secret = request.values.get("secret", None)
data = request.get_json()
text = data.get("text", None)
if text is None:
return {"time": api_app.getISOTime(), "content": "The text is empty."}, 400
global args
args["text"] = text
args=SimpleNamespace(**args)
# 以下执行部分拷贝自synthesize.py
# Check source texts
if args.mode == "batch":
assert args.source is not None and args.text is None
if args.mode == "single":
assert args.source is None and args.text is not None
# Read Config
preprocess_config = yaml.load(
open(args.preprocess_config, "r"), Loader=yaml.FullLoader
)
model_config = yaml.load(open(args.model_config, "r"), Loader=yaml.FullLoader)
train_config = yaml.load(open(args.train_config, "r"), Loader=yaml.FullLoader)
configs = (preprocess_config, model_config, train_config)
# Get model
model = get_model(args, configs, device, train=False)
# Load vocoder
vocoder = get_vocoder(model_config, device)
# Preprocess texts
if args.mode == "batch":
# Get dataset
dataset = TextDataset(args.source, preprocess_config)
batchs = DataLoader(
dataset,
batch_size=8,
collate_fn=dataset.collate_fn,
)
if args.mode == "single":
ids = raw_texts = [args.text[:100]]
speakers = np.array([args.speaker_id])
texts = np.array([preprocess_mandarin(args.text, preprocess_config)])
text_lens = np.array([len(texts[0])])
batchs = [(ids, raw_texts, speakers, texts, text_lens, max(text_lens))]
control_values = args.pitch_control, args.energy_control, args.duration_control
synthesize(model, args.restore_step, configs, vocoder, batchs, control_values)
file=open('output/result/AISHELL3/output.wav','rb')
return send_file(file,mimetype="audio/wav")
api_app.run()