|
31 | 31 | #include "base/timer.h"
|
32 | 32 |
|
33 | 33 | #include <fst/compose.h>
|
| 34 | +#include <fst/rmepsilon.h> |
34 | 35 | #include <memory>
|
35 | 36 |
|
36 | 37 |
|
@@ -154,106 +155,119 @@ int main(int argc, char *argv[]) {
|
154 | 155 |
|
155 | 156 | RandomAccessTableReader<fst::VectorFstHolder> boosting_fst_reader(boosting_fst_rspecifier);
|
156 | 157 |
|
157 |
| - // HCLG FST is just one FST, not a table of FSTs. |
158 |
| - auto hclg_fst = std::unique_ptr<VectorFst<StdArc>>(fst::ReadFstKaldi(hclg_fst_rxfilename)); |
| 158 | + // 'hclg_fst' is a single FST. |
| 159 | + VectorFst<StdArc> hclg_fst; |
| 160 | + { |
| 161 | + auto hclg_fst_tmp = std::unique_ptr<Fst<StdArc>>(fst::ReadFstKaldiGeneric(hclg_fst_rxfilename)); |
| 162 | + hclg_fst = VectorFst<StdArc>(*hclg_fst_tmp); // Fst -> VectorFst, as it has to be MutableFst... |
| 163 | + // 'hclg_fst_tmp' is deleted by 'going out of scope' ... |
| 164 | + } |
159 | 165 |
|
160 | 166 | // make sure hclg is sorted on olabel
|
161 |
| - if (hclg_fst->Properties(fst::kOLabelSorted, true) == 0) { |
| 167 | + if (hclg_fst.Properties(fst::kOLabelSorted, true) == 0) { |
162 | 168 | fst::OLabelCompare<StdArc> olabel_comp;
|
163 |
| - fst::ArcSort(hclg_fst.get(), olabel_comp); |
| 169 | + fst::ArcSort(&hclg_fst, olabel_comp); |
164 | 170 | }
|
165 | 171 |
|
166 | 172 | timer.Reset();
|
167 | 173 |
|
168 |
| - { |
169 |
| - |
170 |
| - for (; !feature_reader.Done(); feature_reader.Next()) { |
171 |
| - std::string utt = feature_reader.Key(); |
172 |
| - const Matrix<BaseFloat> &features (feature_reader.Value()); |
173 |
| - if (features.NumRows() == 0) { |
174 |
| - KALDI_WARN << "Zero-length utterance: " << utt; |
| 174 | + //// MAIN LOOP //// |
| 175 | + for (; !feature_reader.Done(); feature_reader.Next()) { |
| 176 | + std::string utt = feature_reader.Key(); |
| 177 | + const Matrix<BaseFloat> &features (feature_reader.Value()); |
| 178 | + if (features.NumRows() == 0) { |
| 179 | + KALDI_WARN << "Zero-length utterance: " << utt; |
| 180 | + num_fail++; |
| 181 | + continue; |
| 182 | + } |
| 183 | + const Matrix<BaseFloat> *online_ivectors = NULL; |
| 184 | + const Vector<BaseFloat> *ivector = NULL; |
| 185 | + if (!ivector_rspecifier.empty()) { |
| 186 | + if (!ivector_reader.HasKey(utt)) { |
| 187 | + KALDI_WARN << "No iVector available for utterance " << utt; |
175 | 188 | num_fail++;
|
176 | 189 | continue;
|
| 190 | + } else { |
| 191 | + ivector = &ivector_reader.Value(utt); |
177 | 192 | }
|
178 |
| - const Matrix<BaseFloat> *online_ivectors = NULL; |
179 |
| - const Vector<BaseFloat> *ivector = NULL; |
180 |
| - if (!ivector_rspecifier.empty()) { |
181 |
| - if (!ivector_reader.HasKey(utt)) { |
182 |
| - KALDI_WARN << "No iVector available for utterance " << utt; |
183 |
| - num_fail++; |
184 |
| - continue; |
185 |
| - } else { |
186 |
| - ivector = &ivector_reader.Value(utt); |
187 |
| - } |
188 |
| - } |
189 |
| - if (!online_ivector_rspecifier.empty()) { |
190 |
| - if (!online_ivector_reader.HasKey(utt)) { |
191 |
| - KALDI_WARN << "No online iVector available for utterance " << utt; |
192 |
| - num_fail++; |
193 |
| - continue; |
194 |
| - } else { |
195 |
| - online_ivectors = &online_ivector_reader.Value(utt); |
196 |
| - } |
197 |
| - } |
198 |
| - |
199 |
| - // get the boosting graph, |
200 |
| - VectorFst<StdArc> boosting_fst; |
201 |
| - if (!boosting_fst_reader.HasKey(utt)) { |
202 |
| - KALDI_WARN << "No boosting fst for utterance " << utt; |
| 193 | + } |
| 194 | + if (!online_ivector_rspecifier.empty()) { |
| 195 | + if (!online_ivector_reader.HasKey(utt)) { |
| 196 | + KALDI_WARN << "No online iVector available for utterance " << utt; |
203 | 197 | num_fail++;
|
204 | 198 | continue;
|
205 | 199 | } else {
|
206 |
| - boosting_fst = boosting_fst_reader.Value(utt); // copy, |
| 200 | + online_ivectors = &online_ivector_reader.Value(utt); |
207 | 201 | }
|
| 202 | + } |
208 | 203 |
|
209 |
| - timer_compose.Reset(); |
210 |
| - |
211 |
| - // make sure boosting graph is sorted on ilabel, |
212 |
| - if (boosting_fst.Properties(fst::kILabelSorted, true) == 0) { |
213 |
| - fst::ILabelCompare<StdArc> ilabel_comp; |
214 |
| - fst::ArcSort(&boosting_fst, ilabel_comp); |
215 |
| - } |
| 204 | + // get the boosting graph, |
| 205 | + VectorFst<StdArc> boosting_fst; |
| 206 | + if (!boosting_fst_reader.HasKey(utt)) { |
| 207 | + KALDI_WARN << "No boosting fst for utterance " << utt; |
| 208 | + num_fail++; |
| 209 | + continue; |
| 210 | + } else { |
| 211 | + boosting_fst = boosting_fst_reader.Value(utt); // copy, |
| 212 | + } |
216 | 213 |
|
217 |
| - // TODO: should we call rmepsilon on boosting_fst ? |
| 214 | + timer_compose.Reset(); |
218 | 215 |
|
219 |
| - // run composition (measure time), |
220 |
| - VectorFst<StdArc> decode_fst; |
221 |
| - fst::Compose(*hclg_fst, boosting_fst, &decode_fst); |
| 216 | + // RmEpsilon saved 30% of composition runtime... |
| 217 | + // - Note: we are loading 2-state graphs with eps back-link to the initial state. |
| 218 | + if (boosting_fst.Properties(fst::kIEpsilons, true) != 0) { |
| 219 | + fst::RmEpsilon(&boosting_fst); |
| 220 | + } |
222 | 221 |
|
223 |
| - // TODO: should we sort the 'decode_fst' by isymbols ? |
224 |
| - // (we don't do it, as it would take time. |
225 |
| - // not sure it decoding would be faster if |
226 |
| - // decode_fst was sorted by isymbols) |
| 222 | + // make sure boosting graph is sorted on ilabel, |
| 223 | + if (boosting_fst.Properties(fst::kILabelSorted, true) == 0) { |
| 224 | + fst::ILabelCompare<StdArc> ilabel_comp; |
| 225 | + fst::ArcSort(&boosting_fst, ilabel_comp); |
| 226 | + } |
227 | 227 |
|
228 |
| - // Check that composed graph is non-empty, |
229 |
| - if (decode_fst.Start() == fst::kNoStateId) { |
230 |
| - KALDI_WARN << "Empty 'decode_fst' HCLG for utterance " |
231 |
| - << utt << " (bad boosting graph?)"; |
232 |
| - num_fail++; |
233 |
| - continue; |
234 |
| - } |
| 228 | + // run composition, |
| 229 | + VectorFst<StdArc> decode_fst; |
| 230 | + fst::Compose(hclg_fst, boosting_fst, &decode_fst); |
235 | 231 |
|
236 |
| - elapsed_compose += timer_compose.Elapsed(); |
237 |
| - |
238 |
| - DecodableAmNnetSimple nnet_decodable( |
239 |
| - decodable_opts, trans_model, am_nnet, |
240 |
| - features, ivector, online_ivectors, |
241 |
| - online_ivector_period, &compiler); |
242 |
| - |
243 |
| - LatticeFasterDecoder decoder(decode_fst, config); |
244 |
| - |
245 |
| - double like; |
246 |
| - if (DecodeUtteranceLatticeFaster( |
247 |
| - decoder, nnet_decodable, trans_model, word_syms.get(), utt, |
248 |
| - decodable_opts.acoustic_scale, determinize, allow_partial, |
249 |
| - &alignment_writer, &words_writer, &compact_lattice_writer, |
250 |
| - &lattice_writer, |
251 |
| - &like)) { |
252 |
| - tot_like += like; |
253 |
| - frame_count += nnet_decodable.NumFramesReady(); |
254 |
| - num_success++; |
255 |
| - } else num_fail++; |
| 232 | + // check that composed graph is non-empty, |
| 233 | + if (decode_fst.Start() == fst::kNoStateId) { |
| 234 | + KALDI_WARN << "Empty 'decode_fst' HCLG for utterance " |
| 235 | + << utt << " (bad boosting graph?)"; |
| 236 | + num_fail++; |
| 237 | + continue; |
256 | 238 | }
|
| 239 | + |
| 240 | + elapsed_compose += timer_compose.Elapsed(); |
| 241 | + |
| 242 | + DecodableAmNnetSimple nnet_decodable( |
| 243 | + decodable_opts, trans_model, am_nnet, |
| 244 | + features, ivector, online_ivectors, |
| 245 | + online_ivector_period, &compiler); |
| 246 | + |
| 247 | + // Note: decode_fst is VectorFst, not ConstFst. |
| 248 | + // |
| 249 | + // OpenFst docs say that more specific iterators |
| 250 | + // are faster than generic iterators. And in HCLG |
| 251 | + // is usually loaded for decoding as ConstFst. |
| 252 | + // |
| 253 | + // auto decode_fst_ = ConstFst<StdArc>(decode_fst); |
| 254 | + // |
| 255 | + // In this way, I tried to cast VectorFst to ConstFst, |
| 256 | + // but this made the decoding 20% slower. |
| 257 | + // |
| 258 | + LatticeFasterDecoder decoder(decode_fst, config); |
| 259 | + |
| 260 | + double like; |
| 261 | + if (DecodeUtteranceLatticeFaster( |
| 262 | + decoder, nnet_decodable, trans_model, word_syms.get(), utt, |
| 263 | + decodable_opts.acoustic_scale, determinize, allow_partial, |
| 264 | + &alignment_writer, &words_writer, &compact_lattice_writer, |
| 265 | + &lattice_writer, |
| 266 | + &like)) { |
| 267 | + tot_like += like; |
| 268 | + frame_count += nnet_decodable.NumFramesReady(); |
| 269 | + num_success++; |
| 270 | + } else num_fail++; |
257 | 271 | }
|
258 | 272 | }
|
259 | 273 |
|
|
0 commit comments