2020
2121namespace nemo_mel {
2222
23- // ─── Slaney mel scale constants ─────────────────────────────────────────────
23+ // Slaney mel scale constants
2424
2525static constexpr float kMinLogHz = 1000 .0f ;
2626static constexpr float kMinLogMel = 15 .0f ; // 1000 / (200/3)
2727static constexpr float kLinScale = 200 .0f / 3 .0f ; // Hz per mel (linear region)
2828static constexpr float kLogStep = 0 .06875177742094912f ; // log(6.4) / 27
2929
30- // ─── Mel scale conversions ──────────────────────────────────────────────────
31-
3230float HzToMel (float hz) {
3331 if (hz < kMinLogHz ) return hz / kLinScale ;
3432 return kMinLogMel + std::log (hz / kMinLogHz ) / kLogStep ;
@@ -39,8 +37,6 @@ float MelToHz(float mel) {
3937 return kMinLogHz * std::exp ((mel - kMinLogMel ) * kLogStep );
4038}
4139
42- // ─── Filterbank creation ────────────────────────────────────────────────────
43-
4440std::vector<std::vector<float >> CreateMelFilterbank (int num_mels, int fft_size, int sample_rate) {
4541 int num_bins = fft_size / 2 + 1 ;
4642 float mel_low = HzToMel (0 .0f );
@@ -82,8 +78,6 @@ std::vector<std::vector<float>> CreateMelFilterbank(int num_mels, int fft_size,
8278 return filterbank;
8379}
8480
85- // ─── Single-frame DFT ──────────────────────────────────────────────────────
86-
8781void ComputeSTFTFrame (const float * frame, const float * window, int frame_len,
8882 int fft_size, std::vector<float >& magnitudes) {
8983 int num_bins = fft_size / 2 + 1 ;
@@ -107,8 +101,7 @@ void ComputeSTFTFrame(const float* frame, const float* window, int frame_len,
107101 }
108102}
109103
110- // ─── Batch (offline) log-mel extraction ─────────────────────────────────────
111-
104+ // BATCH LOG-MEL EXTRACTION
112105std::vector<float > NemoComputeLogMelBatch (const float * audio, size_t num_samples,
113106 const NemoMelConfig& cfg, int & out_num_frames) {
114107 // Lazily-initialized statics are fine for batch mode (same config per process).
@@ -163,8 +156,7 @@ std::vector<float> NemoComputeLogMelBatch(const float* audio, size_t num_samples
163156 return mel_spec;
164157}
165158
166- // ─── Streaming log-mel extraction ───────────────────────────────────────────
167-
159+ // STREAMING LOG-MEL EXTRACTION
168160NemoStreamingMelExtractor::NemoStreamingMelExtractor (const NemoMelConfig& cfg)
169161 : cfg_(cfg) {
170162 mel_filters_ = CreateMelFilterbank (cfg_.num_mels , cfg_.fft_size , cfg_.sample_rate );
@@ -192,7 +184,7 @@ std::pair<std::vector<float>, int> NemoStreamingMelExtractor::Process(
192184
193185 // Left-only center pad for streaming: prepend overlap from previous chunk.
194186 // For the first chunk this is zeros (matching center=True left edge).
195- int pad = cfg_.fft_size / 2 ; // 256 samples
187+ int pad = cfg_.fft_size / 2 ;
196188 std::vector<float > padded (pad + num_samples);
197189 std::memcpy (padded.data (), audio_overlap_.data (), pad * sizeof (float ));
198190 std::memcpy (padded.data () + pad, preemphasized.data (), num_samples * sizeof (float ));
0 commit comments