Skip to content

Commit d87ec23

Browse files
committed
adding script for on-the-fly boosting of HCLG graph
1 parent ecb0ce7 commit d87ec23

File tree

2 files changed

+189
-0
lines changed

2 files changed

+189
-0
lines changed
+181
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright 2021 Brno University of Technology (Author: Karel Vesely).
4+
# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey).
5+
# Apache 2.0.
6+
7+
# This script does decoding with a neural-net.
8+
# It calls 'nnet3-latgen-faster-compose', which does on-the-fly boosting
9+
# of HCLG graph by composing it with per-utterance boosting graphs (pre-existing).
10+
11+
# Begin configuration section.
12+
stage=1
13+
nj=4 # number of decoding jobs.
14+
acwt=0.1 # Just a default value, used for adaptation and beam-pruning..
15+
post_decode_acwt=1.0 # can be used in 'chain' systems to scale acoustics by 10 so the
16+
# regular scoring script works.
17+
cmd=run.pl
18+
beam=15.0
19+
frames_per_chunk=50
20+
max_active=7000
21+
min_active=200
22+
ivector_scale=1.0
23+
lattice_beam=8.0 # Beam we use in lattice generation.
24+
iter=final
25+
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
26+
use_gpu=false # If true, will use a GPU, with nnet3-latgen-faster-batch.
27+
# In that case it is recommended to set num-threads to a large
28+
# number, e.g. 20 if you have that many free CPU slots on a GPU
29+
# node, and to use a small number of jobs.
30+
scoring_opts=
31+
skip_diagnostics=false
32+
skip_scoring=false
33+
extra_left_context=0
34+
extra_right_context=0
35+
extra_left_context_initial=-1
36+
extra_right_context_final=-1
37+
online_ivector_dir=
38+
minimize=false
39+
40+
boosting_graphs=
41+
# End configuration section.
42+
43+
echo "$0 $@" # Print the command line for logging
44+
45+
[ -f ./path.sh ] && . ./path.sh; # source the path.
46+
. utils/parse_options.sh || exit 1;
47+
48+
if [ $# -ne 3 ]; then
49+
echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
50+
echo "e.g.: steps/nnet3/decode.sh --nj 8 \\"
51+
echo "--online-ivector-dir exp/nnet2_online/ivectors_test_eval92 \\"
52+
echo " exp/tri4b/graph_bg data/test_eval92_hires $dir/decode_bg_eval92"
53+
echo "main options (for others, see top of script file)"
54+
echo " --config <config-file> # config containing options"
55+
echo " --nj <nj> # number of parallel jobs"
56+
echo " --cmd <cmd> # Command to run in parallel with"
57+
echo " --beam <beam> # Decoding beam; default 15.0"
58+
echo " --iter <iter> # Iteration of model to decode; default is final."
59+
echo " --scoring-opts <string> # options to local/score.sh"
60+
echo " --num-threads <n> # number of threads to use, default 1."
61+
echo " --use-gpu <true|false> # default: false. If true, we recommend"
62+
echo " # to use large --num-threads as the graph"
63+
echo " # search becomes the limiting factor."
64+
exit 1;
65+
fi
66+
67+
graphdir=$1
68+
data=$2
69+
dir=$3
70+
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
71+
model=$srcdir/$iter.mdl
72+
73+
[ -z "$boosting_graphs" ] && echo "Error, \$boosting_graphs have to be set !" && exit 1
74+
75+
extra_files=
76+
if [ ! -z "$online_ivector_dir" ]; then
77+
steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1
78+
extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
79+
fi
80+
81+
utils/lang/check_phones_compatible.sh {$srcdir,$graphdir}/phones.txt || exit 1
82+
83+
for f in $graphdir/HCLG.fst $data/feats.scp $model $extra_files; do
84+
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
85+
done
86+
87+
sdata=$data/split$nj;
88+
if [ -f $srcdir/cmvn_opts ]; then
89+
cmvn_opts=`cat $srcdir/cmvn_opts`
90+
else
91+
cmvn_opts="--norm-means=false --norm-vars=false"
92+
fi
93+
94+
#thread_string=
95+
#if $use_gpu; then
96+
# if [ $num_threads -eq 1 ]; then
97+
# echo "$0: **Warning: we recommend to use --num-threads > 1 for GPU-based decoding."
98+
# fi
99+
# thread_string="-batch --num-threads=$num_threads"
100+
# queue_opt="--num-threads $num_threads --gpu 1"
101+
#elif [ $num_threads -gt 1 ]; then
102+
# thread_string="-parallel --num-threads=$num_threads"
103+
# queue_opt="--num-threads $num_threads"
104+
#fi
105+
queue_opt="--num-threads 1" # 1 thread, we do on-the-fly boosting, the binary has no multi-threading...
106+
107+
mkdir -p $dir/log
108+
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
109+
echo $nj > $dir/num_jobs
110+
111+
## Set up features.
112+
if [ -f $srcdir/online_cmvn ]; then online_cmvn=true
113+
else online_cmvn=false; fi
114+
115+
if ! $online_cmvn; then
116+
echo "$0: feature type is raw"
117+
feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
118+
else
119+
echo "$0: feature type is raw (apply-cmvn-online)"
120+
feats="ark,s,cs:apply-cmvn-online $cmvn_opts --spk2utt=ark:$sdata/JOB/spk2utt $srcdir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- |"
121+
fi
122+
123+
if [ ! -z "$online_ivector_dir" ]; then
124+
ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
125+
ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period"
126+
fi
127+
128+
if [ "$post_decode_acwt" == 1.0 ]; then
129+
lat_wspecifier="ark:|gzip -c >$dir/lat.JOB.gz"
130+
else
131+
lat_wspecifier="ark:|lattice-scale --acoustic-scale=$post_decode_acwt ark:- ark:- | gzip -c >$dir/lat.JOB.gz"
132+
fi
133+
134+
frame_subsampling_opt=
135+
if [ -f $srcdir/frame_subsampling_factor ]; then
136+
# e.g. for 'chain' systems
137+
frame_subsampling_opt="--frame-subsampling-factor=$(cat $srcdir/frame_subsampling_factor)"
138+
elif [ -f $srcdir/init/info.txt ]; then
139+
frame_subsampling_factor=$(awk '/^frame_subsampling_factor/ {print $2}' <$srcdir/init/info.txt)
140+
if [ ! -z $frame_subsampling_factor ]; then
141+
frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor"
142+
fi
143+
fi
144+
145+
if [ $stage -le 1 ]; then
146+
$cmd $queue_opt JOB=1:$nj $dir/log/decode.JOB.log \
147+
nnet3-latgen-faster-compose $ivector_opts $frame_subsampling_opt \
148+
--frames-per-chunk=$frames_per_chunk \
149+
--extra-left-context=$extra_left_context \
150+
--extra-right-context=$extra_right_context \
151+
--extra-left-context-initial=$extra_left_context_initial \
152+
--extra-right-context-final=$extra_right_context_final \
153+
--minimize=$minimize --max-active=$max_active --min-active=$min_active --beam=$beam \
154+
--lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true \
155+
--word-symbol-table=$graphdir/words.txt "$model" \
156+
$graphdir/HCLG.fst "$boosting_graphs" "$feats" "$lat_wspecifier" || exit 1;
157+
fi
158+
159+
160+
if [ $stage -le 2 ]; then
161+
if ! $skip_diagnostics ; then
162+
[ ! -z $iter ] && iter_opt="--iter $iter"
163+
steps/diagnostic/analyze_lats.sh --cmd "$cmd" $iter_opt $graphdir $dir
164+
fi
165+
fi
166+
167+
168+
# The output of this script is the files "lat.*.gz"-- we'll rescore this at
169+
# different acoustic scales to get the final output.
170+
if [ $stage -le 3 ]; then
171+
if ! $skip_scoring ; then
172+
[ ! -x local/score.sh ] && \
173+
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
174+
echo "score best paths"
175+
[ "$iter" != "final" ] && iter_opt="--iter $iter"
176+
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
177+
echo "score confidence and timing with sclite"
178+
fi
179+
fi
180+
echo "Decoding done."
181+
exit 0;

src/nnet3bin/nnet3-latgen-faster-compose.cc

+8
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,14 @@ int main(int argc, char *argv[]) {
225225
// not sure it decoding would be faster if
226226
// decode_fst was sorted by isymbols)
227227

228+
// Check that composed graph is non-empty,
229+
if (decode_fst.Start() == fst::kNoStateId) {
230+
KALDI_WARN << "Empty 'decode_fst' HCLG for utterance "
231+
<< utt << " (bad boosting graph?)";
232+
num_fail++;
233+
continue;
234+
}
235+
228236
elapsed_compose += timer_compose.Elapsed();
229237

230238
DecodableAmNnetSimple nnet_decodable(

0 commit comments

Comments
 (0)