1
1
// latbin/lattice-compose.cc
2
2
3
3
// Copyright 2009-2011 Microsoft Corporation; Saarland University
4
+ // 2022 Brno University of Technology
4
5
5
6
// See ../../COPYING for clarification regarding multiple authors
6
7
//
17
18
// See the Apache 2 License for the specific language governing permissions and
18
19
// limitations under the License.
19
20
20
-
21
21
#include " base/kaldi-common.h"
22
22
#include " util/common-utils.h"
23
23
#include " fstext/fstext-lib.h"
@@ -34,27 +34,37 @@ int main(int argc, char *argv[]) {
34
34
using fst::StdArc;
35
35
36
36
const char *usage =
37
- " Composes lattices (in transducer form, as type Lattice). Depending\n "
38
- " on the command-line arguments, either composes lattices with lattices,\n "
39
- " or lattices with FSTs (rspecifiers are assumed to be lattices, and\n "
40
- " rxfilenames are assumed to be FSTs, which have their weights interpreted\n "
41
- " as \" graph weights\" when converted into the Lattice format.\n "
37
+ " Composes lattices (in transducer form, as type Lattice).\n "
38
+ " Depending on the command-line arguments, either composes\n "
39
+ " lattices with lattices, or lattices with a single FST or\n "
40
+ " multiple FSTs (whose weights are interpreted as \" graph weights\" ).\n "
42
41
" \n "
43
- " Usage: lattice-compose [options] lattice-rspecifier1 "
44
- " ( lattice-rspecifier2|fst-rxfilename2) lattice-wspecifier\n "
45
- " e.g.: lattice-compose ark:1.lats ark:2.lats ark:composed.lats \n "
46
- " or: lattice-compose ark:1.lats G.fst ark:composed.lats \n " ;
42
+ " Usage: lattice-compose [options] < lattice-rspecifier1> "
43
+ " < lattice-rspecifier2|fst-rxfilename2|fst-rspecifier2> < lattice-wspecifier> \n "
44
+ " If the 2nd arg is an rspecifier, it is interpreted by default as a table of \n "
45
+ " lattices, or as a table of FSTs if you specify --compose-with-fst=true. \n " ;
47
46
48
47
ParseOptions po (usage);
49
48
50
49
bool write_compact = true ;
51
50
int32 num_states_cache = 50000 ;
52
51
int32 phi_label = fst::kNoLabel ; // == -1
52
+ int32 rho_label = fst::kNoLabel ; // == -1
53
+ std::string compose_with_fst = " auto" ;
54
+
53
55
po.Register (" write-compact" , &write_compact, " If true, write in normal (compact) form." );
54
56
po.Register (" phi-label" , &phi_label, " If >0, the label on backoff arcs of the LM" );
57
+ po.Register (" rho-label" , &rho_label,
58
+ " If >0, the label to forward lat1 paths not present in biasing graph fst2 "
59
+ " (rho is input and output symbol on special arc in biasing graph fst2;"
60
+ " rho is like phi (matches rest), but rho label is rewritten to the"
61
+ " specific symbol from lat1)" );
55
62
po.Register (" num-states-cache" , &num_states_cache,
56
63
" Number of states we cache when mapping LM FST to lattice type. "
57
64
" More -> more memory but faster." );
65
+ po.Register (" compose-with-fst" , &compose_with_fst,
66
+ " (true|false|auto) For auto arg2 is: rspecifier=lats, rxfilename=fst "
67
+ " (old behavior), for true/false rspecifier is fst/lattice." );
58
68
po.Read (argc, argv);
59
69
60
70
if (po.NumArgs () != 3 ) {
@@ -63,14 +73,28 @@ int main(int argc, char *argv[]) {
63
73
}
64
74
65
75
KALDI_ASSERT (phi_label > 0 || phi_label == fst::kNoLabel ); // e.g. 0 not allowed.
76
+ KALDI_ASSERT (rho_label > 0 || rho_label == fst::kNoLabel ); // e.g. 0 not allowed.
77
+ if (phi_label > 0 && rho_label > 0 ) {
78
+ KALDI_ERR << " You cannot set both 'phi_label' and 'rho_label' at the same time." ;
79
+ }
80
+
81
+ { // convert 'compose_with_fst' to lowercase to support: true, True, TRUE
82
+ std::string& str (compose_with_fst);
83
+ std::transform (str.begin (), str.end (), str.begin (), (int (*)(int ))std::tolower); // lc
84
+ }
85
+ if (compose_with_fst != " auto" && compose_with_fst != " true" &&
86
+ compose_with_fst != " false" ) {
87
+ KALDI_ERR << " Unkown 'compose_with_fst' value : " << compose_with_fst
88
+ << " , values are (auto|true|false)" ;
89
+ }
66
90
67
91
std::string lats_rspecifier1 = po.GetArg (1 ),
68
92
arg2 = po.GetArg (2 ),
69
93
lats_wspecifier = po.GetArg (3 );
70
94
int32 n_done = 0 , n_fail = 0 ;
71
95
72
96
SequentialLatticeReader lattice_reader1 (lats_rspecifier1);
73
-
97
+
74
98
CompactLatticeWriter compact_lattice_writer;
75
99
LatticeWriter lattice_writer;
76
100
@@ -79,33 +103,48 @@ int main(int argc, char *argv[]) {
79
103
else
80
104
lattice_writer.Open (lats_wspecifier);
81
105
82
- if (ClassifyRspecifier (arg2, NULL , NULL ) == kNoRspecifier ) {
106
+ bool arg2_is_rxfilename = (ClassifyRspecifier (arg2, NULL , NULL ) == kNoRspecifier );
107
+
108
+ if (arg2_is_rxfilename && (compose_with_fst == " auto" || compose_with_fst == " true" )) {
109
+ /* *
110
+ * arg2 is rxfilename that contains a single fst
111
+ * - compose arg1 lattices with single fst in arg2
112
+ */
83
113
std::string fst_rxfilename = arg2;
84
- VectorFst<StdArc> *fst2 = fst::ReadFstKaldi (fst_rxfilename);
85
- // mapped_fst2 is fst2 interpreted using the LatticeWeight semiring,
86
- // with all the cost on the first member of the pair (since we're
87
- // assuming it's a graph weight).
114
+ VectorFst<StdArc>* fst2 = fst::ReadFstKaldi (fst_rxfilename);
115
+
116
+ // Make sure fst2 is sorted on ilabel
88
117
if (fst2->Properties (fst::kILabelSorted , true ) == 0 ) {
89
- // Make sure fst2 is sorted on ilabel.
90
118
fst::ILabelCompare<StdArc> ilabel_comp;
91
119
ArcSort (fst2, ilabel_comp);
92
120
}
121
+
93
122
if (phi_label > 0 )
94
123
PropagateFinal (phi_label, fst2);
95
124
125
+ // mapped_fst2 is fst2 interpreted using the LatticeWeight semiring,
126
+ // with all the cost on the first member of the pair (since we're
127
+ // assuming it's a graph weight).
96
128
fst::CacheOptions cache_opts (true , num_states_cache);
97
129
fst::MapFstOptions mapfst_opts (cache_opts);
98
130
fst::StdToLatticeMapper<BaseFloat> mapper;
99
131
fst::MapFst<StdArc, LatticeArc, fst::StdToLatticeMapper<BaseFloat> >
100
132
mapped_fst2 (*fst2, mapper, mapfst_opts);
133
+
101
134
for (; !lattice_reader1.Done (); lattice_reader1.Next ()) {
102
135
std::string key = lattice_reader1.Key ();
103
136
KALDI_VLOG (1 ) << " Processing lattice for key " << key;
104
137
Lattice lat1 = lattice_reader1.Value ();
105
138
ArcSort (&lat1, fst::OLabelCompare<LatticeArc>());
139
+
106
140
Lattice composed_lat;
107
- if (phi_label > 0 ) PhiCompose (lat1, mapped_fst2, phi_label, &composed_lat);
108
- else Compose (lat1, mapped_fst2, &composed_lat);
141
+ if (phi_label > 0 ) {
142
+ PhiCompose (lat1, mapped_fst2, phi_label, &composed_lat);
143
+ } else if (rho_label > 0 ) {
144
+ RhoCompose (lat1, mapped_fst2, rho_label, &composed_lat);
145
+ } else {
146
+ Compose (lat1, mapped_fst2, &composed_lat);
147
+ }
109
148
if (composed_lat.Start () == fst::kNoStateId ) {
110
149
KALDI_WARN << " Empty lattice for utterance " << key << " (incompatible LM?)" ;
111
150
n_fail++;
@@ -121,13 +160,27 @@ int main(int argc, char *argv[]) {
121
160
}
122
161
}
123
162
delete fst2;
124
- } else {
163
+
164
+ } else if (arg2_is_rxfilename && compose_with_fst == " false" ) {
165
+ /* *
166
+ * arg2 is rxfilename that contains a single lattice
167
+ * - would it make sense to do this? Not implementing...
168
+ */
169
+ KALDI_ERR << " Unimplemented..." ;
170
+
171
+ } else if (!arg2_is_rxfilename &&
172
+ (compose_with_fst == " auto" || compose_with_fst == " false" )) {
173
+ /* *
174
+ * arg2 is rspecifier that contains a table of lattices
175
+ * - composing arg1 lattices with arg2 lattices
176
+ */
125
177
std::string lats_rspecifier2 = arg2;
126
178
// This is the case similar to lattice-interp.cc, where we
127
179
// read in another set of lattices and compose them. But in this
128
180
// case we don't do any projection; we assume that the user has already
129
181
// done this (e.g. with lattice-project).
130
182
RandomAccessLatticeReader lattice_reader2 (lats_rspecifier2);
183
+
131
184
for (; !lattice_reader1.Done (); lattice_reader1.Next ()) {
132
185
std::string key = lattice_reader1.Key ();
133
186
KALDI_VLOG (1 ) << " Processing lattice for key " << key;
@@ -139,6 +192,7 @@ int main(int argc, char *argv[]) {
139
192
n_fail++;
140
193
continue ;
141
194
}
195
+
142
196
Lattice lat2 = lattice_reader2.Value (key);
143
197
// Make sure that either lat2 is ilabel sorted
144
198
// or lat1 is olabel sorted, to ensure that
@@ -150,27 +204,103 @@ int main(int argc, char *argv[]) {
150
204
fst::ArcSort (&lat2, ilabel_comp);
151
205
}
152
206
153
- Lattice lat_out;
207
+ Lattice composed_lat;
208
+ // Btw, can the lat2 lattice contin phi/rho symbols ?
154
209
if (phi_label > 0 ) {
155
210
PropagateFinal (phi_label, &lat2);
156
- PhiCompose (lat1, lat2, phi_label, &lat_out);
211
+ PhiCompose (lat1, lat2, phi_label, &composed_lat);
212
+ } else if (rho_label > 0 ) {
213
+ RhoCompose (lat1, lat2, rho_label, &composed_lat);
214
+ } else {
215
+ Compose (lat1, lat2, &composed_lat);
216
+ }
217
+ if (composed_lat.Start () == fst::kNoStateId ) {
218
+ KALDI_WARN << " Empty lattice for utterance " << key << " (incompatible LM?)" ;
219
+ n_fail++;
220
+ } else {
221
+ if (write_compact) {
222
+ CompactLattice clat;
223
+ ConvertLattice (composed_lat, &clat);
224
+ compact_lattice_writer.Write (key, clat);
225
+ } else {
226
+ lattice_writer.Write (key, composed_lat);
227
+ }
228
+ n_done++;
229
+ }
230
+ }
231
+
232
+ } else if (!arg2_is_rxfilename && compose_with_fst == " true" ) {
233
+ /* *
234
+ * arg2 is rspecifier that contains a table of fsts
235
+ * - composing arg1 lattices with arg2 fsts
236
+ */
237
+ std::string fst_rspecifier2 = arg2;
238
+ RandomAccessTableReader<fst::VectorFstHolder> fst_reader2 (fst_rspecifier2);
239
+
240
+ for (; !lattice_reader1.Done (); lattice_reader1.Next ()) {
241
+ std::string key = lattice_reader1.Key ();
242
+ KALDI_VLOG (1 ) << " Processing lattice for key " << key;
243
+ Lattice lat1 = lattice_reader1.Value ();
244
+ lattice_reader1.FreeCurrent ();
245
+
246
+ if (!fst_reader2.HasKey (key)) {
247
+ KALDI_WARN << " Not producing output for utterance " << key
248
+ << " because not present in second table." ;
249
+ n_fail++;
250
+ continue ;
251
+ }
252
+
253
+ VectorFst<StdArc> fst2 = fst_reader2.Value (key);
254
+ // Make sure fst2 is sorted on ilabel
255
+ if (fst2.Properties (fst::kILabelSorted , true ) == 0 ) {
256
+ fst::ILabelCompare<StdArc> ilabel_comp;
257
+ fst::ArcSort (&fst2, ilabel_comp);
258
+ }
259
+
260
+ // for composing with LM-fsts, it makes all fst2 states final
261
+ if (phi_label > 0 )
262
+ PropagateFinal (phi_label, &fst2);
263
+
264
+ // mapped_fst2 is fst2 interpreted using the LatticeWeight semiring,
265
+ // with all the cost on the first member of the pair (since we're
266
+ // assuming it's a graph weight).
267
+ fst::CacheOptions cache_opts (true , num_states_cache);
268
+ fst::MapFstOptions mapfst_opts (cache_opts);
269
+ fst::StdToLatticeMapper<BaseFloat> mapper;
270
+ fst::MapFst<StdArc, LatticeArc, fst::StdToLatticeMapper<BaseFloat> >
271
+ mapped_fst2 (fst2, mapper, mapfst_opts);
272
+
273
+ // sort lat1 on olabel.
274
+ ArcSort (&lat1, fst::OLabelCompare<LatticeArc>());
275
+
276
+ Lattice composed_lat;
277
+ if (phi_label > 0 ) {
278
+ PhiCompose (lat1, mapped_fst2, phi_label, &composed_lat);
279
+ } else if (rho_label > 0 ) {
280
+ RhoCompose (lat1, mapped_fst2, rho_label, &composed_lat);
157
281
} else {
158
- Compose (lat1, lat2 , &lat_out );
282
+ Compose (lat1, mapped_fst2 , &composed_lat );
159
283
}
160
- if (lat_out.Start () == fst::kNoStateId ) {
284
+
285
+ if (composed_lat.Start () == fst::kNoStateId ) {
161
286
KALDI_WARN << " Empty lattice for utterance " << key << " (incompatible LM?)" ;
162
287
n_fail++;
163
288
} else {
164
289
if (write_compact) {
165
- CompactLattice clat_out ;
166
- ConvertLattice (lat_out , &clat_out );
167
- compact_lattice_writer.Write (key, clat_out );
290
+ CompactLattice clat ;
291
+ ConvertLattice (composed_lat , &clat );
292
+ compact_lattice_writer.Write (key, clat );
168
293
} else {
169
- lattice_writer.Write (key, lat_out );
294
+ lattice_writer.Write (key, composed_lat );
170
295
}
171
296
n_done++;
172
297
}
173
298
}
299
+ } else {
300
+ /* *
301
+ * none of the 'if-else-if' applied...
302
+ */
303
+ KALDI_ERR << " You should never reach here..." ;
174
304
}
175
305
176
306
KALDI_LOG << " Done " << n_done << " lattices; failed for "
0 commit comments