kaldi-asr · johnjosephmorgan · Nov 1, 2018 · Nov 2, 2018 · Nov 2, 2018 · Nov 2, 2018
diff --git a/egs/yaounde_fr/s5/README.txt b/egs/yaounde_fr/s5/README.txt
@@ -0,0 +1,15 @@
+Recipe for the African Accented Speech Corpus
+
+This recipe follows the pattern of the mini_librispeech recipe.
+It is built using the African Accented French Corpus available from the Open Speech and Language Resources repository.
+
+Information about the corpus is at:
+
+http://www.openslr.org/57
+
+This recipe uses about 11 hours of speech from the corpus for training, about 1.5 hours for development and about 20 minutes for testing.
+Most of the speakers are from Cameroon. 
+However, there are recordings from speakers from   Chad, Congo, and Gabon. 
+
+All of the data resources required to run this recipe are freely available on the web.
+In addition to the speech data, the cmusphinx French lexicon is used for a pronouncing dictionary and the French open subtitles text corpus  is used to build language models.
diff --git a/egs/yaounde_fr/s5/RESULTS b/egs/yaounde_fr/s5/RESULTS
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+for x in exp/*/decode*/; do [ -d "$x" ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done
+
+for x in exp/chain/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done
+exit 0
+
+| model | dev tgsmall | test tgsmall | devtest tgsmall | dev tgmed | test tgmed | devtest tgmed | dev tglarge | test tglarge | devtest tglarge |
+| mono | 54.58 | 25.67 | 7.86 | 76.18 | 69.64 | 52.09| | | |
+| tri1 | 32.06 | 19.14 | 3.79 | 31.98 | 27.16 | 10.06 | 29.60 | 22.28 | 5.99 |
+| tri2b | 33.02 | 19.01 | 3.85 | 33.26 | 26.36 | 9.91 | 31.48 | 21.27 | 5.53 |
+| tri3b | 26.91 | 18.85 | 3.49 | 25.90 | 24.51 | 8.51 | 23.83 | 20.01 | 4.37 |
+| chain tdnn-f | 24.02 | 17.20 | 1.96 | 22.30 | 33.66 | 16.17 | 20.14 | 18.69 | 3.33 | 
+| chain tdnn-f online | 24.21 | 17.23 | 1.96 | 22.26 | 33.72 | 16.14 | 19.10 | 32.07 | 14.74 |
+
+| model | dev tgsmall | test tgsmall | devtest tgsmall | dev tgmed | test tgmed | devtest tgmed | dev tglarge | test tglarge | devtest tglarge |
+| mono | 54.44 | 22.47 | 4.83 | 61.56 | 35.45 | 18.43 | 58.69  | 30.52 | 12.66 |
+| tri1 | 33.88 | 18.69 | 2.93 | 37.04 | 26.79 | 11.51 | 34.73 | 22.05 | 5.23 |
+| tri3b | 26.28 | 17.62 | 2.63 | 27.31 | 23.62 | 10.29 | 24.80 | 19.14 | 4.10 |
+| tri3b enhanced lexicon | 26.04 | 17.81 | 2.60 | 27.07 | 23.21 | 8.71 | 24.07 | 19.59 | 3.79 |
+| chain tdnn-f | 30.91 | 17.26 | 1.59 | 41.78 | 33.62 | 18.62 |  31.22 | 18.66  | 3.36 |
+| chain tdnn-f online | 30.41 | 17.26 | 1.62 | 41.31 | 33.62 | 18.62 | 38.67 | 31.26 | 15.87 |
diff --git a/egs/yaounde_fr/s5/cmd.sh b/egs/yaounde_fr/s5/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
diff --git a/egs/yaounde_fr/s5/conf/decode.config b/egs/yaounde_fr/s5/conf/decode.config
@@ -0,0 +1 @@
+# empty config, just use the defaults.
diff --git a/egs/yaounde_fr/s5/conf/mfcc.conf b/egs/yaounde_fr/s5/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/yaounde_fr/s5/conf/mfcc_hires.conf b/egs/yaounde_fr/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why 
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
+                  # there might be some information at the low end.
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) 
diff --git a/egs/yaounde_fr/s5/conf/online_cmvn.conf b/egs/yaounde_fr/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/yaounde_fr/s5/local/aafr_download.sh b/egs/yaounde_fr/s5/local/aafr_download.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+speech=$1
+
+# where to put the downloaded speech corpus
+download_dir=$(pwd)
+data_dir=$download_dir/African_Accented_French
+
+# download the corpus from openslr
+if [ ! -f $download_dir/aafr.tar.gz ]; then
+    wget -O $download_dir/aafr.tar.gz $speech
+else
+  echo "$0: The corpus $speech was already downloaded."
+fi
+
+if [ ! -d $download_dir/African_Accented_French ]; then
+  (
+    cd $download_dir
+    tar -xzf aafr.tar.gz
+  )
+else
+  echo "$0: The corpus was already unzipped."
+fi
diff --git a/egs/yaounde_fr/s5/local/ca16_conv/make_lists.pl b/egs/yaounde_fr/s5/local/ca16_conv/make_lists.pl
@@ -0,0 +1,87 @@
+#!/usr/bin/env perl
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# make_lists.pl - write lists for acoustic model training
+# writes files under data/local/tmp/ca16conv/lists
+# This script associates a .wav file with a transcript.
+
+use strict;
+use warnings;
+use Carp;
+
+BEGIN {
+    @ARGV == 1 or croak "USAGE: $0 <DATA_SRC_DIR>
+Example:
+$0 African_Accented_French
+";
+}
+
+use File::Spec;
+use File::Copy;
+use File::Basename;
+
+my ($d) = @ARGV;
+
+# initialize variables
+my $tmpdir = "data/local/tmp/ca16conv_train";
+my $transcripts = "$d/transcripts/train/ca16_conv/transcripts.txt";
+# input wav file list
+my $w = "$tmpdir/wav_list.txt";
+# output temporary wav.scp file
+my $o = "$tmpdir/lists/wav.scp";
+# output temporary utt2spk file
+my $u = "$tmpdir/lists/utt2spk";
+# output temporary text files
+my $t = "$tmpdir/lists/text";
+# initialize hash for transcripts
+my %transcript = ();
+# done setting variables
+
+system "mkdir -p $tmpdir/lists";
+open my $TRANS, '<', $transcripts or croak "problem with $transcripts $!";
+# store prompts in hash
+LINEA: while ( my $line = <$TRANS> ) {
+  chomp $line;
+  my ($j,$sent) = split /\s/, $line, 2;
+  my ($volume,$directories,$file) = File::Spec->splitpath( $j );
+  my @dirs = split /\//, $directories;
+  my $b = basename $file, '.tdf';
+  my ($x,$d,$s,$y,$i) = split /\_/, $b, 5;
+  my $bn = 'gabonconv_' . $s . '_' . $i;
+      # dashes?
+  $sent =~ s/(\w)(\p{dash_punctuation}+?)/$1 $2/g;
+  $transcript{$bn} = $sent;
+}
+close $TRANS;
+
+open my $W, '<', $w or croak "problem with $w $!";
+open my $O, '+>', $o or croak "problem with $o $!";
+open my $U, '+>', $u or croak "problem with $u $!";
+open my $T, '+>', $t or croak "problem with $t $!";
+
+LINE: while ( my $line = <$W> ) {
+  chomp $line;
+  my ($volume,$directories,$file) = File::Spec->splitpath( $line );
+  my @dirs = split /\//, $directories;
+  my $r = basename $line, ".wav";
+  my ($x,$d,$s,$y,$i) = split /\_/, $r, 5;
+  my $speaker = $dirs[-1];
+
+  my $bn = 'gabonconv_' . $s . '_' . $i;
+
+  # only work with utterances in transcript file
+  if ( exists $transcript{$bn} ) {
+    my $fn = $bn . ".wav";
+    print $T "$bn $transcript{$bn}\n";
+    print $O "$bn sox $line -t .wav - |\n";
+    print $U "$bn gabonconv_${s}\n";
+  } else {
+    # warn "no transcript for $line";
+  }
+}
+close $T;
+close $O;
+close $U;
+close $W;
diff --git a/egs/yaounde_fr/s5/local/ca16_conv/prepare_data.sh b/egs/yaounde_fr/s5/local/ca16_conv/prepare_data.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# set variables
+datadir=$1
+speech_datadir=$datadir/speech/train/ca16
+tmpdir=data/local/tmp/ca16conv_train
+# end  setting variables
+
+mkdir -p $tmpdir
+find $speech_datadir -type f -name "*.wav" | grep  conv > $tmpdir/wav_list.txt
+local/ca16_conv/make_lists.pl $datadir
+utils/utt2spk_to_spk2utt.pl $tmpdir/lists/
+utils/fix_data_dir.sh $tmpdir/lists
diff --git a/egs/yaounde_fr/s5/local/ca16_read_devtest/make_lists.pl b/egs/yaounde_fr/s5/local/ca16_read_devtest/make_lists.pl
@@ -0,0 +1,80 @@
+#!/usr/bin/env perl
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# make_lists.pl - write lists for acoustic model training
+# writes files under data/local/tmp/ca16read_devtest/lists
+
+use strict;
+use warnings;
+use Carp;
+
+BEGIN {
+    @ARGV == 1 or croak "USAGE: $0 <DATA_DIR>
+Example:
+$0 African_Accented_French";
+}
+
+use File::Spec;
+use File::Copy;
+use File::Basename;
+
+my ($d) = @ARGV;
+
+# initialize variables
+my $tmpdir = "data/local/tmp/ca16read_devtest";
+my $p = "$d/transcripts/devtest/ca16_read/conditioned.txt";
+# input wav file list
+my $wav_list = "$tmpdir/wav_list.txt";
+# output temporary wav.scp files
+my $wav_scp = "$tmpdir/lists/wav.scp";
+# output temporary utt2spk files
+my $u = "$tmpdir/lists/utt2spk";
+# output temporary text files
+my $t = "$tmpdir/lists/text";
+# initialize hash for prompts
+my %p = ();
+# done setting variables
+
+system "mkdir -p $tmpdir/lists";
+open my $P, '<', $p or croak "problem with $p $!";
+# store prompts in hash
+LINEA: while ( my $line = <$P> ) {
+  chomp $line;
+  my ($j,$sent) = split /\s/, $line, 2;
+  my ($x,$d,$s,$y,$i) = split /\_/, $j, 5;
+  my $bn = 'gabonread_' . $s . '_' . $i;
+  # dashes?
+  $sent =~ s/(\w)(\p{dash_punctuation}+?)/$1 $2/g;
+  $p{$bn} = $sent;
+}
+close $P;
+
+open my $WAVLIST, '<', $wav_list or croak "problem with $wav_list $!";
+open my $WAVSCP, '+>', $wav_scp or croak "problem with $wav_scp $!";
+open my $U, '+>', $u or croak "problem with $u $!";
+open my $T, '+>', $t or croak "problem with $t $!";
+
+LINE: while ( my $line = <$WAVLIST> ) {
+  chomp $line;
+  my ($volume,$directories,$file) = File::Spec->splitpath( $line );
+  my @dirs = split /\//, $directories;
+  my $r = basename $line, ".wav";
+  my ($x,$d,$s,$y,$i) = split /\_/, $r, 5;
+  my $speaker = $dirs[-1];
+  my $bn = 'gabonread_' . $s . '_' . $i;
+  # only work with utterances in transcript file
+  if ( exists $p{$bn} ) {
+    my $fn = $bn . ".wav";
+    print $T "$bn $p{$bn}\n";
+    print $WAVSCP "$bn sox $line -t .wav - |\n";
+    print $U "$bn gabonread_${s}\n";
+  } else {
+    # warn "no transcript for $line";
+  }
+}
+close $T;
+close $WAVSCP;
+close $U;
+close $WAVLIST;
diff --git a/egs/yaounde_fr/s5/local/ca16_read_devtest/prepare_data.sh b/egs/yaounde_fr/s5/local/ca16_read_devtest/prepare_data.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# ca16 read devtest prep
+
+if [ $# != 1 ]; then
+  echo "usage: $0 <CORPUS_DIRECTORY>
+example:
+$0 African_Accented_French";
+  exit 1
+fi
+
+# set variables
+datadir=$1
+speech_datadir=$datadir/speech/devtest/ca16
+tmpdir=data/local/tmp/ca16read_devtest
+# done setting variables
+
+mkdir -p $tmpdir
+#get a list of the ca16 read devtest .wav files
+find $speech_datadir -type f -name "*.wav" | grep read > $tmpdir/wav_list.txt
+#  make ca16 read devtest lists
+local/ca16_read_devtest/make_lists.pl $datadir
+utils/fix_data_dir.sh $tmpdir/lists
+mkdir -p data/devtest
+for x in spk2utt text utt2spk wav.scp; do
+  cp $tmpdir/lists/$x data/devtest/
+done
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh