Skip to content

Commit 73c8baa

Browse files
committed
Clean code a bit
1 parent 9e85702 commit 73c8baa

File tree

4 files changed

+8
-28
lines changed

4 files changed

+8
-28
lines changed

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,6 @@ target_link_libraries(ocos_operators PRIVATE ${ocos_libraries})
778778

779779
file(GLOB _TARGET_LIB_SRC "shared/lib/*.cc")
780780

781-
# NeMo mel spectrogram is standalone (no ORT/C API deps) — always compile when audio is enabled
782781
if(OCOS_ENABLE_AUDIO)
783782
file(GLOB nemo_mel_SRC "shared/api/nemo_mel_*")
784783
list(APPEND _TARGET_LIB_SRC ${nemo_mel_SRC})

shared/api/nemo_mel_spectrogram.cc

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,13 @@
2020

2121
namespace nemo_mel {
2222

23-
// ─── Slaney mel scale constants ─────────────────────────────────────────────
23+
// Slaney mel scale constants
2424

2525
static constexpr float kMinLogHz = 1000.0f;
2626
static constexpr float kMinLogMel = 15.0f; // 1000 / (200/3)
2727
static constexpr float kLinScale = 200.0f / 3.0f; // Hz per mel (linear region)
2828
static constexpr float kLogStep = 0.06875177742094912f; // log(6.4) / 27
2929

30-
// ─── Mel scale conversions ──────────────────────────────────────────────────
31-
3230
float HzToMel(float hz) {
3331
if (hz < kMinLogHz) return hz / kLinScale;
3432
return kMinLogMel + std::log(hz / kMinLogHz) / kLogStep;
@@ -39,8 +37,6 @@ float MelToHz(float mel) {
3937
return kMinLogHz * std::exp((mel - kMinLogMel) * kLogStep);
4038
}
4139

42-
// ─── Filterbank creation ────────────────────────────────────────────────────
43-
4440
std::vector<std::vector<float>> CreateMelFilterbank(int num_mels, int fft_size, int sample_rate) {
4541
int num_bins = fft_size / 2 + 1;
4642
float mel_low = HzToMel(0.0f);
@@ -82,8 +78,6 @@ std::vector<std::vector<float>> CreateMelFilterbank(int num_mels, int fft_size,
8278
return filterbank;
8379
}
8480

85-
// ─── Single-frame DFT ──────────────────────────────────────────────────────
86-
8781
void ComputeSTFTFrame(const float* frame, const float* window, int frame_len,
8882
int fft_size, std::vector<float>& magnitudes) {
8983
int num_bins = fft_size / 2 + 1;
@@ -107,8 +101,7 @@ void ComputeSTFTFrame(const float* frame, const float* window, int frame_len,
107101
}
108102
}
109103

110-
// ─── Batch (offline) log-mel extraction ─────────────────────────────────────
111-
104+
// BATCH LOG-MEL EXTRACTION
112105
std::vector<float> NemoComputeLogMelBatch(const float* audio, size_t num_samples,
113106
const NemoMelConfig& cfg, int& out_num_frames) {
114107
// Lazily-initialized statics are fine for batch mode (same config per process).
@@ -163,8 +156,7 @@ std::vector<float> NemoComputeLogMelBatch(const float* audio, size_t num_samples
163156
return mel_spec;
164157
}
165158

166-
// ─── Streaming log-mel extraction ───────────────────────────────────────────
167-
159+
// STREAMING LOG-MEL EXTRACTION
168160
NemoStreamingMelExtractor::NemoStreamingMelExtractor(const NemoMelConfig& cfg)
169161
: cfg_(cfg) {
170162
mel_filters_ = CreateMelFilterbank(cfg_.num_mels, cfg_.fft_size, cfg_.sample_rate);
@@ -192,7 +184,7 @@ std::pair<std::vector<float>, int> NemoStreamingMelExtractor::Process(
192184

193185
// Left-only center pad for streaming: prepend overlap from previous chunk.
194186
// For the first chunk this is zeros (matching center=True left edge).
195-
int pad = cfg_.fft_size / 2; // 256 samples
187+
int pad = cfg_.fft_size / 2;
196188
std::vector<float> padded(pad + num_samples);
197189
std::memcpy(padded.data(), audio_overlap_.data(), pad * sizeof(float));
198190
std::memcpy(padded.data() + pad, preemphasized.data(), num_samples * sizeof(float));

shared/api/nemo_mel_spectrogram.h

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
// Licensed under the MIT License.
33
//
44
// NeMo-compatible log-mel spectrogram extraction (Slaney scale, matching librosa/NeMo).
5-
// No ONNX Runtime or other framework dependencies — pure C++ with standard library only.
6-
// Designed to be portable across repos.
75

86
#pragma once
97

@@ -13,8 +11,6 @@
1311

1412
namespace nemo_mel {
1513

16-
// ─── Configuration ──────────────────────────────────────────────────────────
17-
1814
struct NemoMelConfig {
1915
int num_mels;
2016
int fft_size;
@@ -25,19 +21,15 @@ struct NemoMelConfig {
2521
float log_eps;
2622
};
2723

28-
// ─── Mel scale conversions (Slaney) ────────────────────────────────────────
24+
// Mel scale conversions (Slaney)
2925

3026
float HzToMel(float hz);
3127
float MelToHz(float mel);
3228

33-
// ─── Filterbank creation ────────────────────────────────────────────────────
34-
3529
/// Build a triangular mel filterbank with Slaney normalization (matches librosa).
3630
/// Returns shape [num_mels][num_bins] where num_bins = fft_size/2 + 1.
3731
std::vector<std::vector<float>> CreateMelFilterbank(int num_mels, int fft_size, int sample_rate);
3832

39-
// ─── Single-frame DFT ──────────────────────────────────────────────────────
40-
4133
/// Compute |DFT|^2 (power spectrum) for a single windowed frame.
4234
/// frame: pointer to fft_size samples (or win_length samples with window applied).
4335
/// window: pointer to window coefficients (same length as frame_len).
@@ -47,8 +39,8 @@ std::vector<std::vector<float>> CreateMelFilterbank(int num_mels, int fft_size,
4739
void ComputeSTFTFrame(const float* frame, const float* window, int frame_len,
4840
int fft_size, std::vector<float>& magnitudes);
4941

50-
// ─── Batch (offline) log-mel extraction ─────────────────────────────────────
5142

43+
// BATCH LOG-MEL EXTRACTION
5244
/// Compute NeMo-compatible log-mel spectrogram for a complete audio buffer.
5345
/// Applies pre-emphasis, center-pads both sides (fft_size/2 zeros), computes STFT
5446
/// with a periodic Hann window, applies mel filterbank, and takes log(mel + eps).
@@ -58,8 +50,7 @@ void ComputeSTFTFrame(const float* frame, const float* window, int frame_len,
5850
std::vector<float> NemoComputeLogMelBatch(const float* audio, size_t num_samples,
5951
const NemoMelConfig& cfg, int& out_num_frames);
6052

61-
// ─── Streaming log-mel extraction ───────────────────────────────────────────
62-
53+
// STREAMING LOG-MEL EXTRACTION
6354
/// Stateful streaming NeMo-compatible mel extractor that maintains overlap and
6455
/// pre-emphasis state across successive audio chunks.
6556
///

test/static_test/test_nemo_mel.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111

1212
using namespace nemo_mel;
1313

14-
// ─── Helpers ────────────────────────────────────────────────────────────────
15-
1614
static NemoMelConfig MakeTestConfig() {
1715
NemoMelConfig cfg;
1816
cfg.num_mels = 128;
@@ -37,7 +35,7 @@ static std::vector<float> SineWave(float freq_hz, float duration_sec,
3735
return wav;
3836
}
3937

40-
// ─── Mel scale conversions ──────────────────────────────────────────────────
38+
// Mel scale conversions (Slaney)
4139

4240
TEST(NemoMelTest, HzToMelLinearRegion) {
4341
// Below 1000 Hz the Slaney scale is linear: mel = 3 * hz / 200

0 commit comments

Comments
 (0)