forked from RuABraun/fst-util
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_lfst.py
More file actions
57 lines (50 loc) · 1.63 KB
/
create_lfst.py
File metadata and controls
57 lines (50 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import plac
from wrappedfst import WrappedFst
from loguru import logger
def readsym(f):
dct = {}
for line in open(f):
w, i = line.split()
i = int(i)
dct[w] = i
dct[i] = w
return dct
def main(inlex, isym_f, osym_f, outf):
logger.info('Not including words with phone count <= 1 or duplicate prons!')
isym = readsym(isym_f)
osym = readsym(osym_f)
fst = WrappedFst()
start = fst.add_state()
end = fst.add_state()
set_prons = set()
words_skipped = []
with open(inlex) as fh:
for line in fh:
word, *phones = line.split()
if len(phones) <= 1:
words_skipped.append(word)
continue
if tuple(phones) not in set_prons:
set_prons.add(tuple(phones))
else:
words_skipped.append(word)
continue
state = start
for i, phone in enumerate(phones):
olabel = 0
if i == 0:
olabel = osym[word]
new_state = fst.add_state()
fst.add_arc(start, new_state, isym[phone + '_B'], olabel, 0.)
state = new_state
elif i == len(phones) - 1:
fst.add_arc(state, end, isym[phone + '_E'], 0, 0.)
else:
new_state = fst.add_state()
fst.add_arc(state, new_state, isym[phone + '_I'], 0, 0.)
state = new_state
fst.set_start(start)
fst.set_final(end)
fst.write(outf)
print(words_skipped)
plac.call(main)