-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathdemo_mfcc.cpp
229 lines (205 loc) · 9.06 KB
/
demo_mfcc.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
//
// Created by toson on 19-7-17.
//
#include "utils/mfcc.hpp"
#include "utils/sas_util.h"
#include "fstream"
#if 0 //没有使用加载wav的程序,而是直接加载保存好的二进制数据
typedef struct wave_tag {
char ChunkID[4]; // "RIFF"标志
unsigned int ChunkSize; // 文件长度(WAVE文件的大小, 不含前8个字节)
char Format[4]; // "WAVE"标志
char SubChunk1ID[4]; // "fmt "标志
unsigned int SubChunk1Size; // 过渡字节(不定)
unsigned short int AudioFormat; // 格式类别(10H为PCM格式的声音数据)
unsigned short int NumChannels; // 通道数(单声道为1, 双声道为2)
unsigned int SampleRate; // 采样率(每秒样本数), 表示每个通道的播放速度
unsigned int ByteRate; // 波形音频数据传输速率, 其值为:通道数*每秒数据位数*每样本的数据位数/8
unsigned short int BlockAlign; // 每样本的数据位数(按字节算), 其值为:通道数*每样本的数据位值/8
unsigned short int BitsPerSample; // 每样本的数据位数, 表示每个声道中各个样本的数据位数.
char SubChunk2ID[4]; // 数据标记"data"
unsigned int SubChunk2Size; // 语音数据的长度
} waveft;
int resampleData(const int16_t *sourceData, int32_t sampleRate, uint32_t srcSize, int16_t *destinationData,
int32_t newSampleRate, uint32_t dstSize = 0) {
if (sampleRate == newSampleRate) {
memcpy(destinationData, sourceData, srcSize * sizeof(int16_t));
return 0;
}
uint32_t last_pos = srcSize - 1;
if (dstSize != (uint32_t) (srcSize * ((float) newSampleRate / sampleRate))) {
return -1;
}
int s = 0;
for (uint32_t idx = 0; idx < dstSize; idx++) {
float index = ((float) idx * sampleRate) / (newSampleRate);
uint32_t p1 = (uint32_t) index;
float coef = index - p1;
uint32_t p2 = (p1 == last_pos) ? last_pos : p1 + 1;
destinationData[idx] = (int16_t) ((1.0f - coef) * sourceData[p1] + coef * sourceData[p2]);
}
return 0;
}
class Wav {
private:
char *buffer8;
short *buffer16;
float *buffer32;
public:
unsigned char *srcdata; //data中储存音频的有效数据
unsigned char *resampled_data; //重采样过后的数据
waveft waveformatex;
int length_buffer;
int length_wav;//音频有效数据的长度(采样点数)
int thislabel;
Wav(const char *path, unsigned int nSamplesPerSec) {
//创建对象,并初始化对象的变量值
memset(&waveformatex, 0, sizeof(waveformatex));
length_buffer = 0;
length_wav = 0;
buffer8 = new char[length_buffer];
buffer16 = new short[length_buffer];
buffer32 = new float[length_buffer];
srcdata = new unsigned char[length_wav];
resampled_data = new unsigned char[length_wav];
thislabel = Load(path, nSamplesPerSec);
}
void WavToBuffer() {
switch (waveformatex.BitsPerSample) {
case 8:
buffer8 = (char *) realloc(buffer8, sizeof(char) * (length_buffer = length_wav));
for (int i = 0; i < length_buffer; i++) {
buffer8[i] = (char) resampled_data[i];
}
break;
case 16:
buffer16 = (short *) realloc(buffer16, sizeof(short) * (length_buffer = length_wav / 2));
for (int i = 0; i < length_buffer; i++) {
buffer16[i] = (short) ((resampled_data[2 * i + 1] << 8) | resampled_data[2 * i]);
}
break;
case 32:
buffer32 = (float *) realloc(buffer32, sizeof(float) * (length_buffer = length_wav / 4));
for (int i = 0; i < length_buffer; i++) {
buffer32[i] = *(float *) &resampled_data[4 * i];
}
break;
}
}
int Load(const char *path, unsigned int nSamplesPerSec) {
FILE *file = fopen(path, "rb");
if (!file) {
fprintf(stderr, "[Load] [%s not found]\n", path);
return -1;
}
int chunk = 0;
fread(&waveformatex, sizeof(struct wave_tag), 1, file);
std::cout << "wav file length: " << waveformatex.ChunkSize << " bytes" << std::endl;
//std::cout << "waveformatex.SubChunk1Size: " << waveformatex.SubChunk1Size << " bytes" << std::endl;
//std::cout << "waveformatex.NumChannels: " << waveformatex.NumChannels << std::endl;
std::cout << "nSamplesPerSec: " << waveformatex.SampleRate << " Hz" << std::endl;
//std::cout << "waveformatex.BitsPerSample: " << waveformatex.BitsPerSample << " bits" << std::endl;
fread(&chunk, 2, 1, file);
while (chunk != 0x6164 && fread(&chunk, 2, 1, file) != EOF);
fread(&chunk, 2, 1, file);
if (chunk != 0x6174) { //检索“data”字符: 0x64617461
return -1;
}
fread(&length_wav, sizeof(int), 1, file);
srcdata = (unsigned char *) realloc(srcdata, length_wav);//申请length_wav大小的空间,空间的首地址为data
std::cout << "length_buffer = length_wav: " << length_wav << " bytes" << std::endl;
fread(srcdata, length_wav, 1, file);//将音频的有效数据存储到data中。
fclose(file);
//重采样
if (nSamplesPerSec != waveformatex.SampleRate) {
uint32_t dstSize = (uint32_t) (length_wav * ((float) nSamplesPerSec / waveformatex.SampleRate));
resampled_data = (unsigned char *) realloc(resampled_data, dstSize);
if (!resampleData((int16_t *) srcdata, waveformatex.SampleRate, length_wav / 2,
(int16_t *) resampled_data, nSamplesPerSec, dstSize / 2)) {
waveformatex.SampleRate = nSamplesPerSec;
length_wav = dstSize;
}
} else {
resampled_data = srcdata;
}
if (waveformatex.BitsPerSample != 32 || waveformatex.NumChannels != 1 ||
waveformatex.SampleRate != nSamplesPerSec)
return 1;
else
return 0;
}
double Get_Buffer(int index) {
if (0 <= index && index < length_buffer) {
switch (waveformatex.BitsPerSample) {
case 8:
return (buffer8[index] + 0.5) / 127.5;
case 16:
return (buffer16[index] + 0.5) / 32767.5;
case 32:
return buffer32[index];
}
}
return 0;
}
};
#endif
int main() {
// // 读取文件
// Wav wav("raw.wav", nSamplesPerSec);
// wav.WavToBuffer();
// if (wav.thislabel == 1) {
// std::cout << "wav.thislabel == 1" << std::endl;
// return -1;
// }
// std::cout << "SampleRate: " << wav.waveformatex.SampleRate << " Hz" << std::endl;
//我读取的wav数据和Python(librosa)读出来的不一样,深入检查未排查到原因。
// 于是我将python里读取的wav数据保存了下来(张尚龙python代码读取的wav数据)
// 现在载入该数据:
unsigned long ifile_length = 0;
std::vector<uint8_t> ifile_data; //wav数据
std::fstream ifile("/home/toson/CLionProjects/MFCC/y.bin", std::ios::in);//使用python导出的wav数据
if (ifile) {
ifile.seekg(0, std::ios::end); // go to the end
ifile_length = (unsigned long) ifile.tellg(); // report location (this is the length)
ifile.seekg(0, std::ios::beg); // go back to the beginning
ifile_data.resize(ifile_length, 0);
ifile.read((char *) ifile_data.data(), ifile_length);
ifile.close();
} else {
printf("It is no file: y.bin\n");
return -1;
}
auto stime = NowTime<milliseconds>();
// 运算mel特征向量 //11ms
//auto mel = log_mel(ifile_data, 16000);
auto mel = pcen(ifile_data, 16000);
auto etime = NowTime<milliseconds>();
std::cout << "all cost: " << etime - stime << std::endl;
std::cout << "[" << mel.rows << ", " << mel.cols << "]" << std::endl;
// cv::print(mel);
// 首次运算会多使用5ms,生成mel滤波器用。
//auto mel2 = log_mel(ifile_data, 16000);
auto mel2 = pcen(ifile_data, 16000);
auto etime2 = NowTime<milliseconds>();
std::cout << "all2 cost: " << etime2 - etime << std::endl;
cv::print(mel2);
// for (int j = 0; j < mel2.rows; j++) {
// for (int i = 0; i < mel2.cols; i++) {
// std::cout << "[ " << j << " , " << i << " ]: " << mel2(j, i) << std::endl;
// }
// }
// std::cout << std::endl;
//保存为文件
// std::fstream ofile("out_mel.csv", std::ios::out);
// if (ofile) {
// for (int i = 0; i < mel.shape().rows; i++) {
// for (int j = 0; j < mel.shape().cols; j++) {
// ofile << mel(i, j) << " ";
// }
// ofile << std::endl;
// }
// ofile.close();
// }
// std::cout << R"(Save as "out_mel.csv")" << std::endl;
return 0;
}