Skip to content

Commit 56fe8e5

Browse files
committed
Add MFCCExtractor class
1 parent 7c5ef44 commit 56fe8e5

File tree

3 files changed

+160
-0
lines changed

3 files changed

+160
-0
lines changed

include/dsplib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <dsplib/spectrum.h>
2626
#include <dsplib/stft.h>
2727
#include <dsplib/assert.h>
28+
#include <dsplib/mfcc.h>
2829

2930
#include <dsplib/audio/noise-gate.h>
3031
#include <dsplib/audio/compressor.h>

include/dsplib/mfcc.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#pragma once
2+
3+
#include <dsplib/array.h>
4+
5+
namespace dsplib {
6+
7+
template<typename T>
8+
T hz2mel(const T& freq) noexcept {
9+
return 2595 * log10(1 + freq / 700);
10+
}
11+
12+
template<typename T>
13+
T mel2hz(const T& mel) noexcept {
14+
return 700 * (power(10, (mel / 2595)) - 1);
15+
}
16+
17+
class MFCCExtractor
18+
{
19+
public:
20+
explicit MFCCExtractor(int fs, int nfft, int overlap, int nmels, int nbands, const arr_real& win);
21+
22+
arr_real process(const arr_real& x);
23+
24+
static arr_real design_band_edges(int min_freq, int max_freq, int nbands);
25+
26+
static std::vector<arr_real> design_filterbank(int fs, int min_freq, int max_freq, int nbands, int nfft);
27+
28+
private:
29+
const int winlen_;
30+
const arr_real win_;
31+
const int hop_size_;
32+
const int nmels_;
33+
const int nfft_;
34+
const std::vector<arr_real> fbank_;
35+
const std::vector<arr_real> dctmat_;
36+
arr_real buf_;
37+
};
38+
39+
} // namespace dsplib

lib/mfcc.cpp

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#include <dsplib/mfcc.h>
2+
3+
#include <dsplib/window.h>
4+
#include <dsplib/array.h>
5+
#include <dsplib/fft.h>
6+
#include <dsplib/utils.h>
7+
#include <dsplib/math.h>
8+
9+
namespace dsplib {
10+
11+
namespace {
12+
13+
arr_int _freq2bin(const arr_real& freqs, int nfft, int fs) {
14+
arr_int bins(freqs.size());
15+
for (int i = 0; i < freqs.size(); ++i) {
16+
bins[i] = std::round((freqs[i] / fs) * nfft);
17+
}
18+
return bins;
19+
}
20+
21+
//TODO: add to utils
22+
arr_real _linspace(real_t start, real_t end, int count) {
23+
arr_real out(count);
24+
const real_t step = (count == 1) ? 0 : (end - start) / real_t(count - 1);
25+
for (int i = 0; i < count; ++i) {
26+
out[i] = start + (step * i);
27+
}
28+
return out;
29+
}
30+
31+
std::vector<arr_real> _gen_dcttable(int nmels, int nbands) {
32+
// The DCT-II unitary algorithm
33+
const real_t A = std::sqrt(1.0 / nbands);
34+
const real_t B = std::sqrt(2.0 / nbands);
35+
const real_t piv = pi / nbands;
36+
std::vector<arr_real> mat(nmels, ones(nbands) * A);
37+
for (int k = 0; k < nbands; ++k) {
38+
for (int n = 1; n < nmels; ++n) {
39+
mat[n][k] = B * std::cos(piv * n * (k + 0.5));
40+
}
41+
}
42+
return mat;
43+
}
44+
45+
} // namespace
46+
47+
arr_real MFCCExtractor::design_band_edges(int min_freq, int max_freq, int nbands) {
48+
const auto min_mel = hz2mel(real_t(min_freq));
49+
const auto max_mel = hz2mel(real_t(max_freq));
50+
const auto mels = _linspace(min_mel, max_mel, nbands + 2);
51+
const auto freqs = mel2hz(mels);
52+
return freqs;
53+
}
54+
55+
std::vector<arr_real> MFCCExtractor::design_filterbank(int fs, int min_freq, int max_freq, int nbands, int nfft) {
56+
const auto band_edges = design_band_edges(min_freq, max_freq, nbands);
57+
const auto bins = _freq2bin(band_edges, nfft, fs) + 1; //matlab-style indexies
58+
const int nb = band_edges.size();
59+
const auto band_width = *band_edges.slice(2, nb) - *band_edges.slice(0, nb - 2);
60+
const auto weight_bands = band_width / 2;
61+
62+
std::vector<arr_real> filterbank;
63+
for (int en = 0; en < nbands; ++en) {
64+
arr_real flt(nfft);
65+
66+
// rising side of triangle
67+
for (int i = bins[en]; i <= bins[en + 1]; ++i) {
68+
flt[i - 1] = real_t(i - bins[en]) / (bins[en + 1] - bins[en]);
69+
}
70+
71+
// falling side of triangle
72+
for (int i = bins[en + 1] + 1; i <= bins[en + 2]; ++i) {
73+
flt[i - 1] = 1.0 - real_t(i - bins[en + 1]) / (bins[en + 2] - bins[en + 1]);
74+
}
75+
76+
//normalization
77+
flt /= weight_bands(en);
78+
79+
filterbank.push_back(flt);
80+
}
81+
82+
return filterbank;
83+
}
84+
85+
MFCCExtractor::MFCCExtractor(int fs, int nfft, int overlap, int nmels, int nbands, const arr_real& win)
86+
: winlen_{win.size()}
87+
, win_{win}
88+
, hop_size_{winlen_ - overlap}
89+
, nmels_{nmels}
90+
, nfft_{nfft}
91+
, fbank_{design_filterbank(fs, 0, fs / 2, nbands, nfft)}
92+
, dctmat_{_gen_dcttable(nmels, nbands)}
93+
, buf_(winlen_) {
94+
}
95+
96+
arr_real MFCCExtractor::process(const arr_real& x) {
97+
DSPLIB_ASSERT(x.size() == hop_size_, "input size must be equal hop_size");
98+
99+
buf_.slice(0, winlen_ - hop_size_) = buf_.slice(hop_size_, winlen_);
100+
buf_.slice(winlen_ - hop_size_, winlen_) = x;
101+
const arr_real xq = abs2(fft(buf_ * win_, nfft_));
102+
103+
//filterbank
104+
//TODO: use only nfft/2+1 points for real signal
105+
//TODO: store and multiply only small non-zero part of filterbank - (fftpos, fbank)
106+
arr_real z(fbank_.size());
107+
for (int i = 0; i < fbank_.size(); ++i) {
108+
z[i] = dot(fbank_[i], xq);
109+
}
110+
111+
//cepstral
112+
arr_real y(nmels_);
113+
for (int i = 0; i < nmels_; ++i) {
114+
y[i] = dot(z, dctmat_[i]);
115+
}
116+
117+
return y;
118+
}
119+
120+
} // namespace dsplib

0 commit comments

Comments
 (0)