-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpreprocessing.py
60 lines (40 loc) · 1.58 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import wave
import contextlib
import os
import jaconv
def set_sample_rate(name):
os.chdir(f'tts_dataset/{name}/mp3s/')
os.system('FOR /F "tokens=*" %G IN (\'dir /b *.mp3\') DO ffmpeg -i "%G" -ac 1 -ar 22050 "../wavs/%~nG.wav" ')
def text_replacing(text):
text = text.strip()
text = text.lower()
repl_lst = list('♪『』()/')
for i in repl_lst:
text = text.replace(i, '')
text = jaconv.alphabet2kata(text)
# text = jaconv.normalize(text)
return text
def determining_dataset(name):
metadata = open(f'tts_dataset/{name}/metadata2.txt', 'w', encoding='utf-8')
with open(f'tts_dataset/{name}/metadata.txt', 'r', encoding='utf-8') as f:
for i in f.read().split('\n'):
fname, transcript = i.split('|')
with contextlib.closing(wave.open(f'{fname[3:]}', 'r')) as f:
frames = f.getnframes()
rate = f.getframerate()
duration = frames / float(rate)
transcript = text_replacing(transcript)
if (2.0 < duration < 10.0) and (10 <= len(transcript) <= 40):
metadata.writelines(f'{fname}|{transcript}\n')
if __name__ == '__main__':
import sys
import json
char_json = json.loads(
open('character_code.json', 'r', encoding='utf-8').read()
)['code'] [int(sys.argv[1])] # Speaker ID
name = char_json['name']
character = char_json['character']
char_code = char_json['char_code']
print(name, character, char_code)
# set_sample_rate(name)
# determining_dataset(name)