Skip to content

Commit d1b2e4e

Browse files
committed
Merge branch 'master' of github.com:kaldi-asr/kaldi
2 parents 71b8e43 + b0fc09d commit d1b2e4e

File tree

114 files changed

+3458
-1911
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+3458
-1911
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ GSYMS
8383
/tools/ATLAS/
8484
/tools/atlas3.8.3.tar.gz
8585
/tools/irstlm/
86+
/tools/mitlm/
8687
/tools/openfst
8788
/tools/openfst-1.3.2.tar.gz
8889
/tools/openfst-1.3.2/

egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ unsup_data_list=./conf/lists/404-georgian/untranscribed-training.list
7575
unsup_nj=32
7676

7777

78-
lexicon_file=
79-
lexiconFlags="--romanized --oov <unk>"
78+
lexicon_file=/export/corpora/LDC/LDC2016S12/IARPA_BABEL_OP3_404/conversational/reference_materials/lexicon.txt
79+
lexiconFlags=" --romanized --oov <unk>"
8080

8181

8282

egs/babel/s5d/local/make_L_align.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,24 @@ tmpdir=$1
3434
dir=$2
3535
outdir=$3
3636

37+
for f in $dir/phones/optional_silence.txt $dir/phones.txt $dir/words.txt ; do
38+
[ ! -f $f ] && echo "$0: The file $f must exist!" exit 1
39+
fi
40+
3741
silphone=`cat $dir/phones/optional_silence.txt` || exit 1;
3842

43+
if [ ! -f $tmpdir/lexicon.txt ] && [ ! -f $tmpdir/lexiconp.txt ] ; then
44+
echo "$0: At least one of the files $tmpdir/lexicon.txt or $tmpdir/lexiconp.txt must exist" >&2
45+
exit 1
46+
fi
47+
3948
# Create lexicon with alignment info
4049
if [ -f $tmpdir/lexicon.txt ] ; then
4150
cat $tmpdir/lexicon.txt | \
4251
awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }'
43-
elif [ -f $tmpdir/lexiconp.txt ] ; then
52+
else
4453
cat $tmpdir/lexiconp.txt | \
4554
awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }'
46-
else
47-
echo "Neither $tmpdir/lexicon.txt nor $tmpdir/lexiconp.txt does not exist"
48-
exit 1
4955
fi | utils/make_lexicon_fst.pl - 0.5 $silphone | \
5056
fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
5157
--keep_isymbols=false --keep_osymbols=false | \

egs/fisher_callhome_spanish/s5/local/merge_lexicons.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
#!/usr/bin/env python
21
# Copyright 2014 Gaurav Kumar. Apache 2.0
2+
# 2018 Saikiran Valluri, GoVivace inc., Avaaya
3+
#!/usr/bin/env python
34
# -*- coding: utf-8 -*-
45
#
56
# Merges unique words from Spanish Fisher, Gigaword and the LDC spanish lexicon
6-
77
from __future__ import print_function
8-
import sys
8+
import sys, re
99
import json
1010
import codecs
1111
import operator
@@ -17,6 +17,7 @@
1717
uw_gigaword = tmpdir + "/es_wordlist.json"
1818
uw_LDC = ldc_lexicon + "/callhome_spanish_lexicon_970908/preferences"
1919

20+
filtered_letters = re.compile(u'[¡¥ª°º¿àçèëìîôö0123456789]')
2021
merged_lexicon = []
2122
# All three lexicons are in different formats
2223
# First add the data from lexicon_fisher (A) into the dictionary
@@ -55,7 +56,8 @@
5556
ltuples = sorted(merged_lexicon)
5657

5758
for item in ltuples:
58-
lf.write(item + "\n")
59+
if not item==u'ñ' and not re.search(filtered_letters, item):
60+
lf.write(item + "\n")
5961

6062
lf.close()
6163

egs/gale_arabic/s5b/RESULTS

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,7 @@
22
# This file is generated using local/split_wer.sh $galeData //galeData is a local folder to keep intermediate gale data
33
# look at the end of run.sh in the same folder
44
##
5-
##### RESULTS generated by amali at 2017-01-01-08-05-59
6-
75
Report Results WER:
8-
%WER 9.50 [ 2124 / 22363, 160 ins, 275 del, 1689 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_report_9
9-
%WER 10.72 [ 2398 / 22363, 163 ins, 313 del, 1922 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_report_9
10-
%WER 12.04 [ 2693 / 22363, 226 ins, 271 del, 2196 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_report_9
11-
%WER 12.29 [ 2749 / 22363, 273 ins, 266 del, 2210 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_report_10
126
%WER 17.82 [ 3986 / 22363, 315 ins, 618 del, 3053 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_report_12
137
%WER 18.15 [ 4059 / 22363, 335 ins, 589 del, 3135 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_report_11
148
%WER 18.42 [ 4119 / 22363, 346 ins, 590 del, 3183 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_report_11
@@ -27,10 +21,6 @@ Report Results WER:
2721
%WER 25.66 [ 5738 / 22363, 478 ins, 838 del, 4422 sub ] exp/tri2a/decode/wer_report_14
2822
%WER 26.38 [ 5900 / 22363, 435 ins, 929 del, 4536 sub ] exp/tri1/decode/wer_report_15
2923
Conversational Results WER:
30-
%WER 21.59 [ 10213 / 47305, 944 ins, 3092 del, 6177 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_conversational_9
31-
%WER 24.77 [ 11716 / 47305, 1098 ins, 3579 del, 7039 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_conversational_9
32-
%WER 26.78 [ 12670 / 47305, 1741 ins, 2434 del, 8495 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_conversational_9
33-
%WER 27.55 [ 13032 / 47305, 1800 ins, 2666 del, 8566 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_conversational_11
3424
%WER 34.10 [ 16133 / 47305, 1903 ins, 3245 del, 10985 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_conversational_11
3525
%WER 34.81 [ 16466 / 47305, 2077 ins, 3037 del, 11352 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_conversational_10
3626
%WER 35.19 [ 16648 / 47305, 1933 ins, 3264 del, 11451 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_conversational_11
@@ -49,10 +39,6 @@ Conversational Results WER:
4939
%WER 45.92 [ 21724 / 47305, 1995 ins, 5213 del, 14516 sub ] exp/tri2a/decode/wer_conversational_14
5040
%WER 46.86 [ 22166 / 47305, 2212 ins, 4819 del, 15135 sub ] exp/tri1/decode/wer_conversational_13
5141
Combined Results for Reports and Conversational WER:
52-
%WER 17.64 [ 12286 / 69668, 1310 ins, 2807 del, 8169 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_8
53-
%WER 20.26 [ 14114 / 69668, 1261 ins, 3892 del, 8961 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_9
54-
%WER 22.05 [ 15363 / 69668, 1967 ins, 2705 del, 10691 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_9
55-
%WER 22.66 [ 15786 / 69668, 2047 ins, 2955 del, 10784 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_11
5642
%WER 28.89 [ 20127 / 69668, 2244 ins, 3829 del, 14054 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_11
5743
%WER 29.48 [ 20541 / 69668, 2243 ins, 3860 del, 14438 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_11
5844
%WER 29.81 [ 20767 / 69668, 2279 ins, 3854 del, 14634 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_11
@@ -65,8 +51,30 @@ Combined Results for Reports and Conversational WER:
6551
%WER 32.36 [ 22542 / 69668, 2156 ins, 4184 del, 16202 sub ] exp/tri2b_mmi/decode_it4/wer_11
6652
%WER 32.50 [ 22640 / 69668, 2393 ins, 3956 del, 16291 sub ] exp/tri2b_mmi/decode_it3/wer_11
6753
%WER 32.79 [ 22847 / 69668, 2407 ins, 4760 del, 15680 sub ] exp/tri2b_mpe/decode_it3/wer_13
54+
# WER with train_sat_basis
55+
%WER 33.35 [ 23233 / 69668, 2385 ins, 5274 del, 15574 sub ] exp/tri3b/decode/wer_16_0.5
56+
# WER with train_sat
6857
%WER 33.61 [ 23413 / 69668, 2817 ins, 4577 del, 16019 sub ] exp/tri3b/decode/wer_17
6958
%WER 35.73 [ 24894 / 69668, 2630 ins, 4944 del, 17320 sub ] exp/tri3b/decode.si/wer_15
7059
%WER 36.17 [ 25196 / 69668, 2429 ins, 5393 del, 17374 sub ] exp/tri2b/decode/wer_16
7160
%WER 39.42 [ 27462 / 69668, 2473 ins, 6051 del, 18938 sub ] exp/tri2a/decode/wer_14
7261
%WER 40.35 [ 28113 / 69668, 2713 ins, 5635 del, 19765 sub ] exp/tri1/decode/wer_13
62+
63+
64+
# Effect of GMM seed model (tri2b instead of tri3b). Using tri3b give a slightly better result
65+
# as compared to using tri2b as seed.
66+
%WER 16.66 [ 11610 / 69668, 1233 ins, 2747 del, 7630 sub ] exp/chain/tdnn_1a_3b_sp/decode_test/wer_10_0.0
67+
%WER 16.71 [ 11642 / 69668, 1145 ins, 2908 del, 7589 sub ] exp/chain/tdnn_1a_2b_sp/decode_test/wer_9_0.0
68+
69+
# Effect of Tree-size (3500, 4500, 7000, 11000)
70+
%WER 16.66 [ 11610 / 69668, 1233 ins, 2747 del, 7630 sub ] exp/chain/tdnn_1a_3500_sp/decode_test/wer_10_0.0
71+
%WER 16.59 [ 11557 / 69668, 1234 ins, 2646 del, 7677 sub ] exp/chain/tdnn_1a_4500_sp/decode_test/wer_10_0.0
72+
%WER 16.47 [ 11474 / 69668, 1421 ins, 2207 del, 7846 sub ] exp/chain/tdnn_1a_7000_sp/decode_test/wer_9_0.0
73+
%WER 16.62 [ 11580 / 69668, 1164 ins, 2789 del, 7627 sub ] exp/chain/tdnn_1a_11000_sp/decode_test/wer_10_0.0
74+
75+
# Effect of l2-regularization on the output with tree-size=7000. l2 on the output (0.005,0.002)
76+
%WER 16.54 [ 11522 / 69668, 1123 ins, 2739 del, 7660 sub ] exp/chain/tdnn_1a_7000_005_sp/decode_test/wer_9_0.5
77+
%WER 16.47 [ 11474 / 69668, 1421 ins, 2207 del, 7846 sub ] exp/chain/tdnn_1a_7000_002_sp/decode_test/wer_9_0.0
78+
79+
#current best 'chain' models (see local/chain/tuning/run_tdnn_1a.sh)
80+
%WER 16.47 [ 11474 / 69668, 1421 ins, 2207 del, 7846 sub ] exp/chain/tdnn_1a_sp/decode_test/wer_9_0.0

egs/gale_arabic/s5b/cmd.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@
1010
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
1111
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
1212

13-
export train_cmd="queue.pl --mem 2G"
14-
export decode_cmd="queue.pl --mem 4G"
15-
export mkgraph_cmd="queue.pl --mem 8G"
13+
export train_cmd="retry.pl queue.pl --mem 2G"
14+
export decode_cmd="retry.pl queue.pl --mem 4G"
15+
export mkgraph_cmd="retry.pl queue.pl --mem 8G"
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/bin/bash
2+
3+
# this script is used for comparing decoding results between systems.
4+
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
5+
6+
# ./local/chain/compare_wer.sh exp/chain/cnn1a
7+
# System cnn1a
8+
# WER 0.61
9+
# CER 0.15
10+
# Final train prob -0.0377
11+
# Final valid prob -0.0380
12+
# Final train prob (xent) -0.0830
13+
# Final valid prob (xent) -0.0838
14+
15+
if [ $# == 0 ]; then
16+
echo "Usage: $0: <dir1> [<dir2> ... ]"
17+
echo "e.g.: $0 exp/chain/cnn{1a,1b}"
18+
exit 1
19+
fi
20+
21+
echo "# $0 $*"
22+
used_epochs=false
23+
24+
echo -n "# System "
25+
for x in $*; do printf "% 10s" " $(basename $x)"; done
26+
echo
27+
28+
echo -n "# WER "
29+
for x in $*; do
30+
wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
31+
printf "% 10s" $wer
32+
done
33+
echo
34+
35+
echo -n "# CER "
36+
for x in $*; do
37+
cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
38+
printf "% 10s" $cer
39+
done
40+
echo
41+
42+
if $used_epochs; then
43+
exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
44+
fi
45+
46+
echo -n "# Final train prob "
47+
for x in $*; do
48+
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
49+
printf "% 10s" $prob
50+
done
51+
echo
52+
53+
echo -n "# Final valid prob "
54+
for x in $*; do
55+
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
56+
printf "% 10s" $prob
57+
done
58+
echo
59+
60+
echo -n "# Final train prob (xent) "
61+
for x in $*; do
62+
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
63+
printf "% 10s" $prob
64+
done
65+
echo
66+
67+
echo -n "# Final valid prob (xent) "
68+
for x in $*; do
69+
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
70+
printf "% 10s" $prob
71+
done
72+
echo
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/bin/bash
2+
3+
# this script has common stages shared across librispeech chain recipes.
4+
# It generates a new topology in a new lang directory, gets the alignments as
5+
# lattices, and builds a tree for the new topology
6+
set -e
7+
8+
stage=11
9+
10+
# input directory names. These options are actually compulsory, and they have
11+
# been named for convenience
12+
gmm_dir=
13+
ali_dir=
14+
lores_train_data_dir=
15+
16+
num_leaves=6000
17+
18+
# output directory names. They are also compulsory.
19+
lang=
20+
lat_dir=
21+
tree_dir=
22+
# End configuration section.
23+
echo "$0 $@" # Print the command line for logging
24+
25+
. ./cmd.sh
26+
. ./path.sh
27+
. ./utils/parse_options.sh
28+
29+
[ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1;
30+
[ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1;
31+
[ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1;
32+
33+
for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do
34+
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
35+
done
36+
37+
if [ $stage -le 11 ]; then
38+
echo "$0: creating lang directory with one state per phone."
39+
# Create a version of the lang/ directory that has one state per phone in the
40+
# topo file. [note, it really has two states.. the first one is only repeated
41+
# once, the second one has zero or more repeats.]
42+
if [ -d $lang ]; then
43+
if [ $lang/L.fst -nt data/lang/L.fst ]; then
44+
echo "$0: $lang already exists, not overwriting it; continuing"
45+
else
46+
echo "$0: $lang already exists and seems to be older than data/lang..."
47+
echo " ... not sure what to do. Exiting."
48+
exit 1;
49+
fi
50+
else
51+
cp -r data/lang $lang
52+
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
53+
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
54+
# Use our special topology... note that later on may have to tune this
55+
# topology.
56+
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
57+
fi
58+
fi
59+
60+
if [ $stage -le 12 ]; then
61+
# Get the alignments as lattices (gives the chain training more freedom).
62+
# use the same num-jobs as the alignments
63+
nj=$(cat ${ali_dir}/num_jobs) || exit 1;
64+
steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
65+
$lang $gmm_dir $lat_dir
66+
rm $lat_dir/fsts.*.gz # save space
67+
fi
68+
69+
if [ $stage -le 13 ]; then
70+
# Build a tree using our new topology. We know we have alignments for the
71+
# speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
72+
# those.
73+
if [ -f $tree_dir/final.mdl ]; then
74+
echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
75+
exit 1;
76+
fi
77+
steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
78+
--context-opts "--context-width=2 --central-position=1" \
79+
--cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir
80+
fi
81+
82+
exit 0;

0 commit comments

Comments
 (0)