forked from RuABraun/fst-util
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_grammar.py
More file actions
69 lines (60 loc) · 1.81 KB
/
create_grammar.py
File metadata and controls
69 lines (60 loc) · 1.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import plac
import itertools
from wrappedfst import WrappedFst
def updatesym(symtab, label):
if label not in symtab:
symtab[label] = len(symtab)
return symtab[label]
def main(inf, symtab_f, outf):
""" inf is a file which contains the different possible labels
it looks like
START
a
b
x
y
END
lines with no space in between them are understood as different options.
A space means signifies the end of the different options, and the next
"paragraph" would be arcs to the next state.
TODO: improve description
"""
symtab = {'<eps>': 0}
fst = WrappedFst()
state = fst.add_state()
fst.set_start(state)
nstate = fst.add_state()
opts = []
for line in open(inf):
label = line.strip()
if not label:
state = nstate
nstate = fst.add_state()
continue
if '(' in line and ')' in line:
si = label.find('(')
ei = label.find(')')
prefixline = label[:si]
suffixline = label[ei+1:]
opts = label[si+1: ei]
opts = opts.split(';')
tstate = fst.add_state()
n = updatesym(symtab, prefixline)
fst.add_arc(state, tstate, n, n, 0.)
estate = fst.add_state()
for opt in opts:
n = updatesym(symtab, opt)
fst.add_arc(tstate, estate, n, n, 0.)
n = updatesym(symtab, suffixline)
fst.add_arc(estate, nstate, n, n, 0.)
else:
n = updatesym(symtab, label)
fst.add_arc(state, nstate, n, n, 0.)
fst.set_final(nstate)
fst.determinize()
fst.minimize()
with open(symtab_f, 'w') as fh:
for k, i in symtab.items():
fh.write(f'{k}\t{i}\n')
fst.write(outf)
plac.call(main)