-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun_dnn_semisup.sh
executable file
·205 lines (167 loc) · 7.81 KB
/
run_dnn_semisup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/bin/bash -e
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
. ./path.sh ## Source the tools/utils (import the queue.pl)
set -e
set -o pipefail
set -u
# Set the location of the SBS speech
SBS_CORPUS=/export/ws15-pt-data/data/audio
LANG="SW" # Target language
feats_nj=40
train_nj=20
decode_nj=5
stage=-100 # resume training with --stage=N
# Semi-supervised training options
num_copies=2 # Make this many copies of supervised data
threshold=0.7 # If provided, use frame thresholding -- keep only frames whose
# best path posterior is above this value.
use_soft_counts=true # Use soft-counts as targets for unlabeled data
# Decode Config:
acwt=0.2
parallel_opts="--num-threads 6"
gmmdir=exp/tri3b # SAT GMM to get fMLLR transforms for unlabeled data
data_fmllr=data-fmllr-tri3b # Directory to store fMLLR feats of unlabeled data
graph_dir=exp/tri3b/$LANG/graph_text_G # Graph directory used while decoding unlabeled data and dev data
feature_transform=exp/dnn4_pretrain-dbn/final.feature_transform # DBN feature transform used with the seed DBN
dbn=exp/dnn4_pretrain-dbn/6.dbn # Seed DBN used to initialize semi-supervised DNN
dnndir=exp/dnn4_pretrain-dbn_dnn # Seed DNN used for decoding the unlabeled data
dir=exp/dnn5_pretrain-dbn_dnn_semisup # Directory to store final DNN
# End of config.
set -o pipefail
set -e
set -u
. utils/parse_options.sh
L=$LANG
# Prepare unsupervised data
if [ $stage -le -4 ]; then
local/sbs_gen_data_dir.sh --corpus-dir=$SBS_CORPUS \
--lang-map=conf/lang_codes.txt $LANG || exit 1
fi
# Prepare features for a subset of 4000 utterance ~ 8hrs of data. This is to
# maintain unifority across different languages that have different
# amount of unlabeled data
if [ $stage -le -3 ]; then
mfccdir=mfcc/$L
steps/make_mfcc.sh --nj $feats_nj --cmd "$train_cmd" data/$L/unsup exp/$L/make_mfcc/unsup $mfccdir || exit 1
utils/subset_data_dir.sh data/$L/unsup 4000 data/$L/unsup_4k || exit 1
steps/compute_cmvn_stats.sh data/$L/unsup_4k exp/$L/make_mfcc/unsup_4k $mfccdir || exit 1
fi
# Decode unlabeled data using GMM
graph_affix=${graph_dir#*graph}
if [ $stage -le -2 ]; then
steps/decode_fmllr.sh $parallel_opts --nj $train_nj --cmd "$decode_cmd" \
--skip-scoring true --acwt $acwt \
$graph_dir data/$L/unsup_4k $gmmdir/decode${graph_affix}_unsup_4k_$L || exit 1
fi
# Create fMLLR features of the unlabeled data
if [ $stage -le -1 ]; then
featdir=$data_fmllr/unsup_4k_$L
steps/nnet/make_fmllr_feats.sh --nj $feats_nj --cmd "$train_cmd" \
--transform-dir $gmmdir/decode${graph_affix}_unsup_4k_$L \
$featdir data/$L/unsup_4k $gmmdir $featdir/log $featdir/data || exit 1
fi
decode_dir=$dnndir/decode${graph_affix}_unsup_4k_$L
best_path_dir=$dnndir/best_path${graph_affix}_unsup_4k_$L
# Decode unlabeled data using DNN
if [ $stage -le 0 ]; then
steps/nnet/decode.sh $parallel_opts --nj $train_nj --cmd "$decode_cmd" \
--acwt $acwt --skip-scoring true \
$graph_dir $data_fmllr/unsup_4k_$L $decode_dir || exit 1
fi
postdir=$dnndir/post${graph_affix}_semisup_4k${threshold:+_$threshold}
# Find best path and frame weights for unlabeled data
if [ $stage -le 1 ]; then
L=$LANG
local/best_path_weights.sh --acwt $acwt data/$L/unsup_4k $graph_dir \
$decode_dir $dnndir/best_path${graph_affix}_unsup_4k_$L || exit 1
fi
# Converted alignments of the labeled (DT) data from all seen languages into
# one-hot posteriors and frame weights
if [ $stage -le 2 ]; then
nj=$(cat $gmmdir/num_jobs)
$train_cmd JOB=1:$nj $postdir/get_train_post.JOB.log \
ali-to-pdf $gmmdir/final.mdl "ark:gunzip -c $gmmdir/ali.JOB.gz |" ark:- \| \
ali-to-post ark:- ark,scp:$postdir/train_post.JOB.ark,$postdir/train_post.JOB.scp || exit 1
for n in `seq $nj`; do
cat $postdir/train_post.$n.scp
done > $postdir/train_post.scp
for n in `seq $nj`; do
copy-int-vector "ark:gunzip -c $gmmdir/ali.$n.gz |" ark,t:-
done | \
awk '{printf $1" ["; for (i=2; i<=NF; i++) { printf " "1; }; print " ]";}' | \
copy-vector ark,t:- ark,scp:$postdir/train_frame_weights.ark,$postdir/train_frame_weights.scp || exit 1
fi
if [ $stage -le 3 ]; then
nj=$(cat $best_path_dir/num_jobs)
if ! $use_soft_counts; then
$train_cmd JOB=1:$nj $postdir/get_unsup_post.JOB.log \
ali-to-pdf $gmmdir/final.mdl "ark:gunzip -c $best_path_dir/ali.JOB.gz |" ark:- \| \
ali-to-post ark:- ark,scp:$postdir/unsup_post.JOB.ark,$postdir/unsup_post.JOB.scp || exit 1
else
# Get soft-counts for unlabeled data of target language
$train_cmd JOB=1:$nj $postdir/get_unsup_soft_post.JOB.log \
lattice-to-post --acoustic-scale=$acwt "ark:gunzip -c $decode_dir/lat.JOB.gz |" ark:- \| \
post-to-pdf-post $gmmdir/final.mdl ark:- \
ark,scp:$postdir/unsup_post.JOB.ark,$postdir/unsup_post.JOB.scp || exit 1
fi
for n in `seq $nj`; do
cat $postdir/unsup_post.$n.scp
done > $postdir/unsup_post.scp
copy_command=copy-vector
if [ ! -z "$threshold" ]; then
copy_command="thresh-vector --threshold=$threshold --lower-cap=0.0 --upper-cap=1.0"
fi
# Apply binary threshold on the frame weights
$train_cmd JOB=1:$nj $postdir/copy_frame_weights.JOB.log \
$copy_command "ark:gunzip -c $best_path_dir/weights.JOB.gz |" \
ark,scp:$postdir/unsup_frame_weights.JOB.ark,$postdir/unsup_frame_weights.JOB.scp || exit 1
for n in `seq $nj`; do
cat $postdir/unsup_frame_weights.$n.scp
done > $postdir/unsup_frame_weights.scp
fi
# Make copies of labeled data to give higher weight to the labeled data
# relative to the unlabeled data
if [ $stage -le 4 ]; then
awk -v num_copies=$num_copies \
'{for (i=0; i<num_copies; i++) { print i"-"$1" "$2 } }' \
$postdir/train_post.scp > $postdir/train_post_${num_copies}x.scp
awk -v num_copies=$num_copies \
'{for (i=0; i<num_copies; i++) { print i"-"$1" "$2 } }' \
$postdir/train_frame_weights.scp > $postdir/train_frame_weights_${num_copies}x.scp
copied_data_dirs=
for i in `seq 0 $[num_copies-1]`; do
utils/copy_data_dir.sh --utt-prefix ${i}- --spk-prefix ${i}- $data_fmllr/train_tr90 \
$data_fmllr/train_tr90_$i || exit 1
copied_data_dirs="$copied_data_dirs $data_fmllr/train_tr90_$i"
done
utils/combine_data.sh $data_fmllr/train_tr90_${num_copies}x $copied_data_dirs || exit 1
fi
# Train a DNN starting from the DBN using the labeled data from seen languages
# and unlabeled data from target language
if [ $stage -le 5 ]; then
utils/combine_data.sh $dir/data_semisup_4k_${num_copies}x $data_fmllr/unsup_4k_$L $data_fmllr/train_tr90_${num_copies}x || exit 1
utils/copy_data_dir.sh --utt-prefix 0- --spk-prefix 0- $data_fmllr/train_cv10 \
$data_fmllr/train_cv10_0 || exit 1
sort -k1,1 $postdir/unsup_post.scp $postdir/train_post_${num_copies}x.scp > $dir/all_post.scp
sort -k1,1 $postdir/unsup_frame_weights.scp $postdir/train_frame_weights_${num_copies}x.scp > $dir/all_frame_weights.scp
num_tgt=$(hmm-info --print-args=false $gmmdir/final.mdl | grep pdfs | awk '{ print $NF }')
$cuda_cmd $dir/log/train.log \
steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn \
--hid-layers 0 --learn-rate 0.008 --num-tgt $num_tgt \
--labels scp:$dir/all_post.scp --frame-weights scp:$dir/all_frame_weights.scp \
$dir/data_semisup_4k_${num_copies}x $data_fmllr/train_cv10_0 \
data/$L/lang dummy dummy $dir || exit 1;
fi
# Get posteriors for the trained semi-supervised DNN
if [ $stage -le 6 ]; then
steps/nnet/make_priors.sh --cmd "$train_cmd" --nj $train_nj $data_fmllr/train $dir || exit 1
cp $dnndir/final.mdl $dir
fi
if [ $stage -le 7 ]; then
# Decode (reuse HCLG graph)
for lang in $L; do
steps/nnet/decode.sh $parallel_opts --nj $decode_nj --cmd "$decode_cmd" --acwt $acwt \
$graph_dir $data_fmllr/dev_$lang $dir/decode${graph_affix}_dev_$lang || exit 1
done
fi