1+ #include < dsplib/mfcc.h>
2+
3+ #include < dsplib/window.h>
4+ #include < dsplib/array.h>
5+ #include < dsplib/fft.h>
6+ #include < dsplib/utils.h>
7+ #include < dsplib/math.h>
8+
9+ namespace dsplib {
10+
11+ namespace {
12+
13+ arr_int _freq2bin (const arr_real& freqs, int nfft, int fs) {
14+ arr_int bins (freqs.size ());
15+ for (int i = 0 ; i < freqs.size (); ++i) {
16+ bins[i] = std::round ((freqs[i] / fs) * nfft);
17+ }
18+ return bins;
19+ }
20+
21+ // TODO: add to utils
22+ arr_real _linspace (real_t start, real_t end, int count) {
23+ arr_real out (count);
24+ const real_t step = (count == 1 ) ? 0 : (end - start) / real_t (count - 1 );
25+ for (int i = 0 ; i < count; ++i) {
26+ out[i] = start + (step * i);
27+ }
28+ return out;
29+ }
30+
31+ std::vector<arr_real> _gen_dcttable (int nmels, int nbands) {
32+ // The DCT-II unitary algorithm
33+ const real_t A = std::sqrt (1.0 / nbands);
34+ const real_t B = std::sqrt (2.0 / nbands);
35+ const real_t piv = pi / nbands;
36+ std::vector<arr_real> mat (nmels, ones (nbands) * A);
37+ for (int k = 0 ; k < nbands; ++k) {
38+ for (int n = 1 ; n < nmels; ++n) {
39+ mat[n][k] = B * std::cos (piv * n * (k + 0.5 ));
40+ }
41+ }
42+ return mat;
43+ }
44+
45+ } // namespace
46+
47+ arr_real MFCCExtractor::design_band_edges (int min_freq, int max_freq, int nbands) {
48+ const auto min_mel = hz2mel (real_t (min_freq));
49+ const auto max_mel = hz2mel (real_t (max_freq));
50+ const auto mels = _linspace (min_mel, max_mel, nbands + 2 );
51+ const auto freqs = mel2hz (mels);
52+ return freqs;
53+ }
54+
55+ std::vector<arr_real> MFCCExtractor::design_filterbank (int fs, int min_freq, int max_freq, int nbands, int nfft) {
56+ const auto band_edges = design_band_edges (min_freq, max_freq, nbands);
57+ const auto bins = _freq2bin (band_edges, nfft, fs) + 1 ; // matlab-style indexies
58+ const int nb = band_edges.size ();
59+ const auto band_width = *band_edges.slice (2 , nb) - *band_edges.slice (0 , nb - 2 );
60+ const auto weight_bands = band_width / 2 ;
61+
62+ std::vector<arr_real> filterbank;
63+ for (int en = 0 ; en < nbands; ++en) {
64+ arr_real flt (nfft);
65+
66+ // rising side of triangle
67+ for (int i = bins[en]; i <= bins[en + 1 ]; ++i) {
68+ flt[i - 1 ] = real_t (i - bins[en]) / (bins[en + 1 ] - bins[en]);
69+ }
70+
71+ // falling side of triangle
72+ for (int i = bins[en + 1 ] + 1 ; i <= bins[en + 2 ]; ++i) {
73+ flt[i - 1 ] = 1.0 - real_t (i - bins[en + 1 ]) / (bins[en + 2 ] - bins[en + 1 ]);
74+ }
75+
76+ // normalization
77+ flt /= weight_bands (en);
78+
79+ filterbank.push_back (flt);
80+ }
81+
82+ return filterbank;
83+ }
84+
85+ MFCCExtractor::MFCCExtractor (int fs, int nfft, int overlap, int nmels, int nbands, const arr_real& win)
86+ : winlen_{win.size ()}
87+ , win_{win}
88+ , hop_size_{winlen_ - overlap}
89+ , nmels_{nmels}
90+ , nfft_{nfft}
91+ , fbank_{design_filterbank (fs, 0 , fs / 2 , nbands, nfft)}
92+ , dctmat_{_gen_dcttable (nmels, nbands)}
93+ , buf_(winlen_) {
94+ }
95+
96+ arr_real MFCCExtractor::process (const arr_real& x) {
97+ DSPLIB_ASSERT (x.size () == hop_size_, " input size must be equal hop_size" );
98+
99+ buf_.slice (0 , winlen_ - hop_size_) = buf_.slice (hop_size_, winlen_);
100+ buf_.slice (winlen_ - hop_size_, winlen_) = x;
101+ const arr_real xq = abs2 (fft (buf_ * win_, nfft_));
102+
103+ // filterbank
104+ // TODO: use only nfft/2+1 points for real signal
105+ // TODO: store and multiply only small non-zero part of filterbank - (fftpos, fbank)
106+ arr_real z (fbank_.size ());
107+ for (int i = 0 ; i < fbank_.size (); ++i) {
108+ z[i] = dot (fbank_[i], xq);
109+ }
110+
111+ // cepstral
112+ arr_real y (nmels_);
113+ for (int i = 0 ; i < nmels_; ++i) {
114+ y[i] = dot (z, dctmat_[i]);
115+ }
116+
117+ return y;
118+ }
119+
120+ } // namespace dsplib
0 commit comments