|
| 1 | +#!/usr/bin/env bash |
| 2 | + |
| 3 | +# Copyright 2021 Brno University of Technology (Author: Karel Vesely). |
| 4 | +# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). |
| 5 | +# Apache 2.0. |
| 6 | + |
| 7 | +# This script does decoding with a neural-net. |
| 8 | +# It calls 'nnet3-latgen-faster-compose', which does on-the-fly boosting |
| 9 | +# of HCLG graph by composing it with per-utterance boosting graphs (pre-existing). |
| 10 | + |
| 11 | +# Begin configuration section. |
| 12 | +stage=1 |
| 13 | +nj=4 # number of decoding jobs. |
| 14 | +acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. |
| 15 | +post_decode_acwt=1.0 # can be used in 'chain' systems to scale acoustics by 10 so the |
| 16 | + # regular scoring script works. |
| 17 | +cmd=run.pl |
| 18 | +beam=15.0 |
| 19 | +frames_per_chunk=50 |
| 20 | +max_active=7000 |
| 21 | +min_active=200 |
| 22 | +ivector_scale=1.0 |
| 23 | +lattice_beam=8.0 # Beam we use in lattice generation. |
| 24 | +iter=final |
| 25 | +num_threads=1 # if >1, will use gmm-latgen-faster-parallel |
| 26 | +use_gpu=false # If true, will use a GPU, with nnet3-latgen-faster-batch. |
| 27 | + # In that case it is recommended to set num-threads to a large |
| 28 | + # number, e.g. 20 if you have that many free CPU slots on a GPU |
| 29 | + # node, and to use a small number of jobs. |
| 30 | +scoring_opts= |
| 31 | +skip_diagnostics=false |
| 32 | +skip_scoring=false |
| 33 | +extra_left_context=0 |
| 34 | +extra_right_context=0 |
| 35 | +extra_left_context_initial=-1 |
| 36 | +extra_right_context_final=-1 |
| 37 | +online_ivector_dir= |
| 38 | +minimize=false |
| 39 | + |
| 40 | +boosting_graphs= |
| 41 | +# End configuration section. |
| 42 | + |
| 43 | +echo "$0 $@" # Print the command line for logging |
| 44 | + |
| 45 | +[ -f ./path.sh ] && . ./path.sh; # source the path. |
| 46 | +. utils/parse_options.sh || exit 1; |
| 47 | + |
| 48 | +if [ $# -ne 3 ]; then |
| 49 | + echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>" |
| 50 | + echo "e.g.: steps/nnet3/decode.sh --nj 8 \\" |
| 51 | + echo "--online-ivector-dir exp/nnet2_online/ivectors_test_eval92 \\" |
| 52 | + echo " exp/tri4b/graph_bg data/test_eval92_hires $dir/decode_bg_eval92" |
| 53 | + echo "main options (for others, see top of script file)" |
| 54 | + echo " --config <config-file> # config containing options" |
| 55 | + echo " --nj <nj> # number of parallel jobs" |
| 56 | + echo " --cmd <cmd> # Command to run in parallel with" |
| 57 | + echo " --beam <beam> # Decoding beam; default 15.0" |
| 58 | + echo " --iter <iter> # Iteration of model to decode; default is final." |
| 59 | + echo " --scoring-opts <string> # options to local/score.sh" |
| 60 | + echo " --num-threads <n> # number of threads to use, default 1." |
| 61 | + echo " --use-gpu <true|false> # default: false. If true, we recommend" |
| 62 | + echo " # to use large --num-threads as the graph" |
| 63 | + echo " # search becomes the limiting factor." |
| 64 | + exit 1; |
| 65 | +fi |
| 66 | + |
| 67 | +graphdir=$1 |
| 68 | +data=$2 |
| 69 | +dir=$3 |
| 70 | +srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory. |
| 71 | +model=$srcdir/$iter.mdl |
| 72 | + |
| 73 | +[ -z "$boosting_graphs" ] && echo "Error, \$boosting_graphs have to be set !" && exit 1 |
| 74 | + |
| 75 | +extra_files= |
| 76 | +if [ ! -z "$online_ivector_dir" ]; then |
| 77 | + steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1 |
| 78 | + extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" |
| 79 | +fi |
| 80 | + |
| 81 | +utils/lang/check_phones_compatible.sh {$srcdir,$graphdir}/phones.txt || exit 1 |
| 82 | + |
| 83 | +for f in $graphdir/HCLG.fst $data/feats.scp $model $extra_files; do |
| 84 | + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; |
| 85 | +done |
| 86 | + |
| 87 | +sdata=$data/split$nj; |
| 88 | +if [ -f $srcdir/cmvn_opts ]; then |
| 89 | + cmvn_opts=`cat $srcdir/cmvn_opts` |
| 90 | +else |
| 91 | + cmvn_opts="--norm-means=false --norm-vars=false" |
| 92 | +fi |
| 93 | + |
| 94 | +#thread_string= |
| 95 | +#if $use_gpu; then |
| 96 | +# if [ $num_threads -eq 1 ]; then |
| 97 | +# echo "$0: **Warning: we recommend to use --num-threads > 1 for GPU-based decoding." |
| 98 | +# fi |
| 99 | +# thread_string="-batch --num-threads=$num_threads" |
| 100 | +# queue_opt="--num-threads $num_threads --gpu 1" |
| 101 | +#elif [ $num_threads -gt 1 ]; then |
| 102 | +# thread_string="-parallel --num-threads=$num_threads" |
| 103 | +# queue_opt="--num-threads $num_threads" |
| 104 | +#fi |
| 105 | +queue_opt="--num-threads 1" # 1 thread, we do on-the-fly boosting, the binary has no multi-threading... |
| 106 | + |
| 107 | +mkdir -p $dir/log |
| 108 | +[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; |
| 109 | +echo $nj > $dir/num_jobs |
| 110 | + |
| 111 | +## Set up features. |
| 112 | +if [ -f $srcdir/online_cmvn ]; then online_cmvn=true |
| 113 | +else online_cmvn=false; fi |
| 114 | + |
| 115 | +if ! $online_cmvn; then |
| 116 | + echo "$0: feature type is raw" |
| 117 | + feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |" |
| 118 | +else |
| 119 | + echo "$0: feature type is raw (apply-cmvn-online)" |
| 120 | + feats="ark,s,cs:apply-cmvn-online $cmvn_opts --spk2utt=ark:$sdata/JOB/spk2utt $srcdir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- |" |
| 121 | +fi |
| 122 | + |
| 123 | +if [ ! -z "$online_ivector_dir" ]; then |
| 124 | + ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; |
| 125 | + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" |
| 126 | +fi |
| 127 | + |
| 128 | +if [ "$post_decode_acwt" == 1.0 ]; then |
| 129 | + lat_wspecifier="ark:|gzip -c >$dir/lat.JOB.gz" |
| 130 | +else |
| 131 | + lat_wspecifier="ark:|lattice-scale --acoustic-scale=$post_decode_acwt ark:- ark:- | gzip -c >$dir/lat.JOB.gz" |
| 132 | +fi |
| 133 | + |
| 134 | +frame_subsampling_opt= |
| 135 | +if [ -f $srcdir/frame_subsampling_factor ]; then |
| 136 | + # e.g. for 'chain' systems |
| 137 | + frame_subsampling_opt="--frame-subsampling-factor=$(cat $srcdir/frame_subsampling_factor)" |
| 138 | +elif [ -f $srcdir/init/info.txt ]; then |
| 139 | + frame_subsampling_factor=$(awk '/^frame_subsampling_factor/ {print $2}' <$srcdir/init/info.txt) |
| 140 | + if [ ! -z $frame_subsampling_factor ]; then |
| 141 | + frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" |
| 142 | + fi |
| 143 | +fi |
| 144 | + |
| 145 | +if [ $stage -le 1 ]; then |
| 146 | + $cmd $queue_opt JOB=1:$nj $dir/log/decode.JOB.log \ |
| 147 | + nnet3-latgen-faster-compose $ivector_opts $frame_subsampling_opt \ |
| 148 | + --frames-per-chunk=$frames_per_chunk \ |
| 149 | + --extra-left-context=$extra_left_context \ |
| 150 | + --extra-right-context=$extra_right_context \ |
| 151 | + --extra-left-context-initial=$extra_left_context_initial \ |
| 152 | + --extra-right-context-final=$extra_right_context_final \ |
| 153 | + --minimize=$minimize --max-active=$max_active --min-active=$min_active --beam=$beam \ |
| 154 | + --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true \ |
| 155 | + --word-symbol-table=$graphdir/words.txt "$model" \ |
| 156 | + $graphdir/HCLG.fst "$boosting_graphs" "$feats" "$lat_wspecifier" || exit 1; |
| 157 | +fi |
| 158 | + |
| 159 | + |
| 160 | +if [ $stage -le 2 ]; then |
| 161 | + if ! $skip_diagnostics ; then |
| 162 | + [ ! -z $iter ] && iter_opt="--iter $iter" |
| 163 | + steps/diagnostic/analyze_lats.sh --cmd "$cmd" $iter_opt $graphdir $dir |
| 164 | + fi |
| 165 | +fi |
| 166 | + |
| 167 | + |
| 168 | +# The output of this script is the files "lat.*.gz"-- we'll rescore this at |
| 169 | +# different acoustic scales to get the final output. |
| 170 | +if [ $stage -le 3 ]; then |
| 171 | + if ! $skip_scoring ; then |
| 172 | + [ ! -x local/score.sh ] && \ |
| 173 | + echo "Not scoring because local/score.sh does not exist or not executable." && exit 1; |
| 174 | + echo "score best paths" |
| 175 | + [ "$iter" != "final" ] && iter_opt="--iter $iter" |
| 176 | + local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir |
| 177 | + echo "score confidence and timing with sclite" |
| 178 | + fi |
| 179 | +fi |
| 180 | +echo "Decoding done." |
| 181 | +exit 0; |
0 commit comments