Skip to content

Commit 459f5ee

Browse files
committed
add RESULTS for kaldi pybind LF-MMI pipeline with PyTorch.
1 parent aed291b commit 459f5ee

File tree

8 files changed

+725
-16
lines changed

8 files changed

+725
-16
lines changed

egs/aishell/s10/RESULTS

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# If you execute `run.sh`, then you should get similar results as follows:
2+
3+
# Results for kaldi pybind LF-MMI training with PyTorch
4+
## head exp/chain/decode_res/*/scoring_kaldi/best_* > RESULTS
5+
#
6+
==> exp/chain/decode_res/dev/scoring_kaldi/best_cer <==
7+
%WER 8.22 [ 16888 / 205341, 774 ins, 1007 del, 15107 sub ] exp/chain/decode_res/dev/cer_10_1.0
8+
9+
==> exp/chain/decode_res/dev/scoring_kaldi/best_wer <==
10+
%WER 16.66 [ 21278 / 127698, 1690 ins, 3543 del, 16045 sub ] exp/chain/decode_res/dev/wer_11_0.5
11+
12+
==> exp/chain/decode_res/test/scoring_kaldi/best_cer <==
13+
%WER 9.98 [ 10454 / 104765, 693 ins, 802 del, 8959 sub ] exp/chain/decode_res/test/cer_11_1.0
14+
15+
==> exp/chain/decode_res/test/scoring_kaldi/best_wer <==
16+
%WER 18.89 [ 12170 / 64428, 1112 ins, 1950 del, 9108 sub ] exp/chain/decode_res/test/wer_12_0.5
17+
18+
# Results for kaldi nnet3 LF-MMI training
19+
## head exp/chain_nnet3/tdnn_1b/decode_*/scoring_kaldi/best_*
20+
#
21+
==> exp/chain_nnet3/tdnn_1b/decode_dev/scoring_kaldi/best_cer <==
22+
%WER 7.06 [ 14494 / 205341, 466 ins, 726 del, 13302 sub ] exp/chain_nnet3/tdnn_1b/decode_dev/cer_10_0.5
23+
24+
==> exp/chain_nnet3/tdnn_1b/decode_dev/scoring_kaldi/best_wer <==
25+
%WER 15.11 [ 19296 / 127698, 1800 ins, 2778 del, 14718 sub ] exp/chain_nnet3/tdnn_1b/decode_dev/wer_11_0.0
26+
27+
==> exp/chain_nnet3/tdnn_1b/decode_test/scoring_kaldi/best_cer <==
28+
%WER 8.63 [ 9041 / 104765, 367 ins, 668 del, 8006 sub ] exp/chain_nnet3/tdnn_1b/decode_test/cer_11_1.0
29+
30+
==> exp/chain_nnet3/tdnn_1b/decode_test/scoring_kaldi/best_wer <==
31+
%WER 17.40 [ 11210 / 64428, 1059 ins, 1654 del, 8497 sub ] exp/chain_nnet3/tdnn_1b/decode_test/wer_11_0.5
32+
33+
# kaldi pybind LF-MMI training currently uses batchnorm to replace the LDA layer
34+
# since it is not easy to get lda.mat without constructing a nnet3 network.
35+
36+
# Training Time comparison between kaldi pybind with PyTorch and nnet3
37+
# on single GPU is as follows:
38+
#
39+
# training time for 6 epochs:
40+
# - kaldi pybind with PyTorch: about 55 minutes
41+
# - kaldi nnet3: about 4 hours 37 minutes
42+
43+
# You can find the training log in the following:
44+
# - kaldi pybind with PyTorch: ./kaldi-pybind-with-pytorch-training-log.txt
45+
# - kaldi nnet3: ./nnet3-training-log.txt

egs/aishell/s10/chain/train.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
#!/usr/bin/env python3
22

3-
# Copyright 2019 Mobvoi AI Lab, Beijing, China (author: Fangjun Kuang)
3+
# Copyright 2019-2020 Mobvoi AI Lab, Beijing, China (author: Fangjun Kuang)
44
# Apache 2.0
55

66
import logging
77
import os
88
import sys
9+
import warnings
10+
11+
# disable warnings when loading tensorboard
12+
warnings.simplefilter(action='ignore', category=FutureWarning)
913

1014
import torch
1115
import torch.optim as optim

egs/aishell/s10/cmd.sh

+1
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@
1313
export train_cmd="run.pl"
1414
export decode_cmd="run.pl"
1515
export mkgraph_cmd="run.pl"
16+
export cuda_cmd="run.pl"

egs/aishell/s10/kaldi-pybind-with-pytorch-training-log.txt

+225
Large diffs are not rendered by default.

egs/aishell/s10/local/run_chain.sh

+17-11
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ stage=0
99

1010
# GPU device id to use (count from 0).
1111
# you can also set `CUDA_VISIBLE_DEVICES` and set `device_id=0`
12-
device_id=7
12+
device_id=0
1313

1414
nj=10
1515

1616
lang=data/lang_chain # output lang dir
17-
ali_dir=exp/tri3a_ali # input alignment dir
18-
lat_dir=exp/tri3a_lats # input lat dir
19-
treedir=exp/chain/tri3_tree # output tree dir
17+
ali_dir=exp/tri5a_ali # input alignment dir
18+
lat_dir=exp/tri5a_lats # input lat dir
19+
treedir=exp/chain/tri5_tree # output tree dir
2020

2121
# You should know how to calculate your model's left/right context **manually**
2222
model_left_context=12
@@ -27,8 +27,8 @@ frames_per_eg=150,110,90
2727
frames_per_iter=1500000
2828
minibatch_size=128
2929

30-
num_epochs=10
31-
lr=2e-3
30+
num_epochs=6
31+
lr=1e-3
3232

3333
hidden_dim=625
3434
kernel_size_list="1, 3, 3, 3, 3, 3" # comma separated list
@@ -48,11 +48,17 @@ save_nn_output_as_compressed=false
4848
if [[ $stage -le 0 ]]; then
4949
for datadir in train dev test; do
5050
dst_dir=data/fbank_pitch/$datadir
51-
utils/copy_data_dir.sh data/$datadir $dst_dir
52-
echo "making fbank-pitch features for LF-MMI training"
53-
steps/make_fbank_pitch.sh --cmd $train_cmd --nj $nj $dst_dir || exit 1
54-
steps/compute_cmvn_stats.sh $dst_dir || exit 1
55-
utils/fix_data_dir.sh $dst_dir
51+
if [[ ! -f $dst_dir/feats.scp ]]; then
52+
utils/copy_data_dir.sh data/$datadir $dst_dir
53+
echo "making fbank-pitch features for LF-MMI training"
54+
steps/make_fbank_pitch.sh --cmd $train_cmd --nj $nj $dst_dir || exit 1
55+
steps/compute_cmvn_stats.sh $dst_dir || exit 1
56+
utils/fix_data_dir.sh $dst_dir
57+
else
58+
echo "$dst_dir/feats.scp already exists."
59+
echo "kaldi (local/run_tdnn_1b.sh) LF-MMI may have generated it."
60+
echo "skip $dst_dir"
61+
fi
5662
done
5763
fi
5864

egs/aishell/s10/local/run_tdnn_1b.sh

+179
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
#!/bin/bash
2+
3+
# This script is based on run_tdnn_7h.sh in swbd chain recipe.
4+
5+
set -e
6+
7+
nj=10
8+
# configs for 'chain'
9+
affix=
10+
stage=0
11+
train_stage=-10
12+
get_egs_stage=-10
13+
dir=exp/chain_nnet3/tdnn_1b
14+
decode_iter=
15+
16+
# training options
17+
num_epochs=6
18+
initial_effective_lrate=0.001
19+
final_effective_lrate=0.0001
20+
max_param_change=2.0
21+
final_layer_normalize_target=0.5
22+
num_jobs_initial=2
23+
num_jobs_final=12
24+
minibatch_size=128
25+
frames_per_eg=150,110,90
26+
remove_egs=true
27+
common_egs_dir=
28+
xent_regularize=0.1
29+
30+
# End configuration section.
31+
echo "$0 $@" # Print the command line for logging
32+
33+
. ./cmd.sh
34+
. ./path.sh
35+
. ./utils/parse_options.sh
36+
37+
if ! cuda-compiled; then
38+
echo "This script is intended to be used with GPUs"
39+
echo "but you have not compiled Kaldi with CUDA"
40+
echo "If you want to use GPUs (and have them), go to src/,"
41+
echo "and configure and make on a machine where "nvcc" is installed."
42+
exit 1
43+
fi
44+
45+
dir=${dir}${affix:+_$affix}
46+
train_set=train
47+
ali_dir=exp/tri5a_ali
48+
lat_dir=exp/tri5a_lats
49+
treedir=exp/chain_nnet3/tri5_tree
50+
lang=data/lang_chain_nnet3
51+
52+
53+
if [[ $stage -le 0 ]]; then
54+
for datadir in train dev test; do
55+
dst_dir=data/fbank_pitch/$datadir
56+
if [[ ! -f $dst_dir/feats.scp ]]; then
57+
utils/copy_data_dir.sh data/$datadir $dst_dir
58+
echo "making fbank-pitch features for LF-MMI training"
59+
steps/make_fbank_pitch.sh --cmd $train_cmd --nj $nj $dst_dir || exit 1
60+
steps/compute_cmvn_stats.sh $dst_dir || exit 1
61+
utils/fix_data_dir.sh $dst_dir
62+
else
63+
echo "$dst_dir/feats.scp already exists."
64+
echo "kaldi pybind (local/run_chain.sh) LF-MMI may have generated it."
65+
echo "skip $dst_dir"
66+
fi
67+
done
68+
fi
69+
70+
if [[ $stage -le 1 ]]; then
71+
# Create a version of the lang/ directory that has one state per phone in the
72+
# topo file. [note, it really has two states.. the first one is only repeated
73+
# once, the second one has zero or more repeats.]
74+
rm -rf $lang
75+
cp -r data/lang $lang
76+
silphonelist=$(cat $lang/phones/silence.csl) || exit 1
77+
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1
78+
# Use our special topology... note that later on may have to tune this
79+
# topology.
80+
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
81+
fi
82+
83+
if [[ $stage -le 2 ]]; then
84+
# Build a tree using our new topology. This is the critically different
85+
# step compared with other recipes.
86+
steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
87+
--context-opts "--context-width=2 --central-position=1" \
88+
--cmd $train_cmd 5000 data/train $lang $ali_dir $treedir
89+
fi
90+
91+
if [[ $stage -le 3 ]]; then
92+
echo "creating neural net configs using the xconfig parser"
93+
94+
num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
95+
learning_rate_factor=$(echo "print(0.5/$xent_regularize)" | python3)
96+
feat_dim=$(feat-to-dim scp:data/fbank_pitch/train/feats.scp -)
97+
98+
mkdir -p $dir/configs
99+
cat <<EOF > $dir/configs/network.xconfig
100+
input dim=$feat_dim name=input
101+
102+
# please note that it is important to have input layer with the name=input
103+
# as the layer immediately preceding the fixed-affine-layer to enable
104+
# the use of short notation for the descriptor
105+
fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
106+
107+
# the first splicing is moved before the lda layer, so no splicing here
108+
relu-batchnorm-layer name=tdnn1 dim=625
109+
relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=625
110+
relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=625
111+
relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=625
112+
relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=625
113+
relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=625
114+
115+
## adding the layers for chain branch
116+
relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=625 target-rms=0.5
117+
output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
118+
119+
# adding the layers for xent branch
120+
# This block prints the configs for a separate output that will be
121+
# trained with a cross-entropy objective in the 'chain' models... this
122+
# has the effect of regularizing the hidden parts of the model. we use
123+
# 0.5 / args.xent_regularize as the learning rate factor- the factor of
124+
# 0.5 / args.xent_regularize is suitable as it means the xent
125+
# final-layer learns at a rate independent of the regularization
126+
# constant; and the 0.5 was tuned so as to make the relative progress
127+
# similar in the xent and regular final layers.
128+
relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=625 target-rms=0.5
129+
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
130+
131+
EOF
132+
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
133+
fi
134+
135+
if [[ $stage -le 4 ]]; then
136+
steps/nnet3/chain/train.py --stage $train_stage \
137+
--cmd $cuda_cmd \
138+
--feat.cmvn-opts "--norm-means=false --norm-vars=false" \
139+
--chain.xent-regularize $xent_regularize \
140+
--chain.leaky-hmm-coefficient 0.1 \
141+
--chain.l2-regularize 0.00005 \
142+
--chain.apply-deriv-weights false \
143+
--chain.lm-opts="--num-extra-lm-states=2000" \
144+
--egs.dir "$common_egs_dir" \
145+
--egs.stage $get_egs_stage \
146+
--egs.opts "--frames-overlap-per-eg 0" \
147+
--egs.chunk-width $frames_per_eg \
148+
--trainer.num-chunk-per-minibatch $minibatch_size \
149+
--trainer.frames-per-iter 1500000 \
150+
--trainer.num-epochs $num_epochs \
151+
--trainer.optimization.num-jobs-initial $num_jobs_initial \
152+
--trainer.optimization.num-jobs-final $num_jobs_final \
153+
--trainer.optimization.initial-effective-lrate $initial_effective_lrate \
154+
--trainer.optimization.final-effective-lrate $final_effective_lrate \
155+
--trainer.max-param-change $max_param_change \
156+
--cleanup.remove-egs $remove_egs \
157+
--cleanup.preserve-model-interval=1 \
158+
--feat-dir data/fbank_pitch/train \
159+
--tree-dir $treedir \
160+
--use-gpu "wait" \
161+
--lat-dir $lat_dir \
162+
--dir $dir || exit 1
163+
fi
164+
165+
if [[ $stage -le 5 ]]; then
166+
# Note: it might appear that this $lang directory is mismatched, and it is as
167+
# far as the 'topo' is concerned, but this script doesn't read the 'topo' from
168+
# the lang directory.
169+
utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph
170+
fi
171+
172+
graph_dir=$dir/graph
173+
if [[ $stage -le 6 ]]; then
174+
for test_set in dev test; do
175+
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
176+
--nj $nj --cmd $decode_cmd \
177+
$graph_dir data/fbank_pitch/${test_set} $dir/decode_${test_set} || exit 1
178+
done
179+
fi

0 commit comments

Comments
 (0)