Skip to content

Commit 81d3358

Browse files
authored
Flac output improvements (#34)
* use simple cast instead of byte level manipulation * add a mode to always output left-justified 32 bit samples to make downstream processing easier for 24 bits per sample audio
1 parent 2e8abb7 commit 81d3358

File tree

2 files changed

+107
-98
lines changed

2 files changed

+107
-98
lines changed

include/flac_decoder.h

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ class FLACDecoder {
223223
/// @return FLAC_DECODER_SUCCESS on success
224224
/// FLAC_DECODER_NO_MORE_FRAMES when end of stream reached
225225
/// Error code on failure
226-
FLACDecoderResult decode_frame(const uint8_t *buffer, size_t buffer_length, uint8_t *output_buffer, uint32_t *num_samples);
226+
FLACDecoderResult decode_frame(const uint8_t *buffer, size_t buffer_length, uint8_t *output_buffer,
227+
uint32_t *num_samples);
227228

228229
// ========================================
229230
// Stream Information Getters
@@ -236,7 +237,13 @@ class FLACDecoder {
236237
uint64_t get_num_samples() const { return this->num_samples_; }
237238

238239
/// Get number of bytes per sample in output (e.g., 2 for 16-bit, 3 for 24-bit)
239-
uint32_t get_output_bytes_per_sample() const { return (this->sample_depth_ + 7) / 8; }
240+
/// Returns 4 when 32-bit output mode is enabled
241+
uint32_t get_output_bytes_per_sample() const {
242+
if (this->output_32bit_samples_) {
243+
return 4;
244+
}
245+
return (this->sample_depth_ + 7) / 8;
246+
}
240247

241248
/// Get sample rate in Hz (e.g., 44100, 48000)
242249
uint32_t get_sample_rate() const { return this->sample_rate_; }
@@ -329,6 +336,19 @@ class FLACDecoder {
329336
/// Get current CRC checking state
330337
bool get_crc_check_enabled() const { return this->enable_crc_check_; }
331338

339+
/// @brief Enable or disable 32-bit sample output mode
340+
///
341+
/// When enabled, all samples are output as 32-bit values regardless of the
342+
/// original bit depth. Samples are left-justified (MSB-aligned), so 24-bit
343+
/// audio is shifted left by 8, 16-bit by 16, etc. This simplifies downstream
344+
/// processing on embedded devices by avoiding 3-byte packed samples.
345+
///
346+
/// @param enabled true to enable 32-bit output, false for native packing (default)
347+
void set_output_32bit_samples(bool enabled) { this->output_32bit_samples_ = enabled; }
348+
349+
/// Get current 32-bit sample output state
350+
bool get_output_32bit_samples() const { return this->output_32bit_samples_; }
351+
332352
private:
333353
// ========================================
334354
// Frame Decoding
@@ -406,6 +426,15 @@ class FLACDecoder {
406426
void write_samples_general(uint8_t *output_buffer, uint32_t block_size, uint32_t bytes_per_sample,
407427
uint32_t shift_amount, uint32_t sample_depth);
408428

429+
/// @brief Write decoded samples to output buffer using 32-bit stereo fast path
430+
void write_samples_32bit_stereo(uint8_t *output_buffer, uint32_t block_size, uint32_t shift_amount);
431+
432+
/// @brief Write decoded samples to output buffer using 32-bit mono fast path
433+
void write_samples_32bit_mono(uint8_t *output_buffer, uint32_t block_size, uint32_t shift_amount);
434+
435+
/// @brief Write decoded samples to output buffer using 32-bit general path (>2 channels)
436+
void write_samples_32bit_general(uint8_t *output_buffer, uint32_t block_size, uint32_t shift_amount);
437+
409438
// ========================================
410439
// Input Buffer State
411440
// ========================================
@@ -446,8 +475,9 @@ class FLACDecoder {
446475
// ========================================
447476
// Decoder State Flags
448477
// ========================================
449-
bool out_of_data_ = false; // Flag indicating end of input data reached
450-
bool enable_crc_check_ = true; // Flag to enable/disable CRC validation
478+
bool out_of_data_ = false; // Flag indicating end of input data reached
479+
bool enable_crc_check_ = true; // Flag to enable/disable CRC validation
480+
bool output_32bit_samples_ = false; // Output all samples as 32-bit
451481

452482
// ========================================
453483
// Header Parsing State (for streaming)

src/decode/flac/flac_decoder.cpp

Lines changed: 73 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -241,22 +241,36 @@ FLACDecoderResult FLACDecoder::decode_frame(const uint8_t *buffer, size_t buffer
241241
}
242242
}
243243

244-
uint32_t bytes_per_sample = (this->curr_frame_sample_depth_ + 7) / 8;
245-
uint32_t shift_amount = 0;
246-
if (this->curr_frame_sample_depth_ % 8 != 0) {
247-
shift_amount = 8 - (this->curr_frame_sample_depth_ % 8);
248-
}
249-
250244
// Write decoded samples to output buffer using optimized fast paths
251-
if (this->curr_frame_sample_depth_ == 16 && shift_amount == 0 && this->num_channels_ == 2) {
252-
this->write_samples_16bit_stereo(output_buffer, this->curr_frame_block_size_);
253-
} else if (this->curr_frame_sample_depth_ == 16 && shift_amount == 0 && this->num_channels_ == 1) {
254-
this->write_samples_16bit_mono(output_buffer, this->curr_frame_block_size_);
255-
} else if (this->curr_frame_sample_depth_ == 24 && shift_amount == 0 && this->num_channels_ == 2) {
256-
this->write_samples_24bit_stereo(output_buffer, this->curr_frame_block_size_);
245+
if (this->output_32bit_samples_) {
246+
// 32-bit output mode: all samples output as 4 bytes, left-justified (MSB-aligned)
247+
uint32_t shift_amount = 32 - this->curr_frame_sample_depth_;
248+
249+
if (this->num_channels_ == 2) {
250+
this->write_samples_32bit_stereo(output_buffer, this->curr_frame_block_size_, shift_amount);
251+
} else if (this->num_channels_ == 1) {
252+
this->write_samples_32bit_mono(output_buffer, this->curr_frame_block_size_, shift_amount);
253+
} else {
254+
this->write_samples_32bit_general(output_buffer, this->curr_frame_block_size_, shift_amount);
255+
}
257256
} else {
258-
this->write_samples_general(output_buffer, this->curr_frame_block_size_, bytes_per_sample, shift_amount,
259-
this->curr_frame_sample_depth_);
257+
// Native output mode: pack to nearest byte boundary
258+
uint32_t bytes_per_sample = (this->curr_frame_sample_depth_ + 7) / 8;
259+
uint32_t shift_amount = 0;
260+
if (this->curr_frame_sample_depth_ % 8 != 0) {
261+
shift_amount = 8 - (this->curr_frame_sample_depth_ % 8);
262+
}
263+
264+
if (this->curr_frame_sample_depth_ == 16 && shift_amount == 0 && this->num_channels_ == 2) {
265+
this->write_samples_16bit_stereo(output_buffer, this->curr_frame_block_size_);
266+
} else if (this->curr_frame_sample_depth_ == 16 && shift_amount == 0 && this->num_channels_ == 1) {
267+
this->write_samples_16bit_mono(output_buffer, this->curr_frame_block_size_);
268+
} else if (this->curr_frame_sample_depth_ == 24 && shift_amount == 0 && this->num_channels_ == 2) {
269+
this->write_samples_24bit_stereo(output_buffer, this->curr_frame_block_size_);
270+
} else {
271+
this->write_samples_general(output_buffer, this->curr_frame_block_size_, bytes_per_sample, shift_amount,
272+
this->curr_frame_sample_depth_);
273+
}
260274
}
261275

262276
this->reset_bit_buffer();
@@ -265,90 +279,22 @@ FLACDecoderResult FLACDecoder::decode_frame(const uint8_t *buffer, size_t buffer
265279

266280
FLAC_OPTIMIZE_O3
267281
void FLACDecoder::write_samples_16bit_stereo(uint8_t *output_buffer, uint32_t block_size) {
268-
// 16-bit stereo fast path with 4-sample unrolling
269-
std::size_t output_index = 0;
270-
uint32_t i = 0;
271-
const uint32_t unroll_limit = block_size & ~3U; // Round down to multiple of 4
282+
// 16-bit mono fast path
283+
int16_t *output_samples = reinterpret_cast<int16_t *>(output_buffer);
272284

273-
// Process 4 samples at a time
274-
for (; i < unroll_limit; i += 4) {
275-
// Sample 0 - Left and Right channels
276-
int32_t sample_0_l = this->block_samples_[i];
277-
int32_t sample_0_r = this->block_samples_[block_size + i];
278-
// Sample 1 - Left and Right channels
279-
int32_t sample_1_l = this->block_samples_[i + 1];
280-
int32_t sample_1_r = this->block_samples_[block_size + i + 1];
281-
// Sample 2 - Left and Right channels
282-
int32_t sample_2_l = this->block_samples_[i + 2];
283-
int32_t sample_2_r = this->block_samples_[block_size + i + 2];
284-
// Sample 3 - Left and Right channels
285-
int32_t sample_3_l = this->block_samples_[i + 3];
286-
int32_t sample_3_r = this->block_samples_[block_size + i + 3];
287-
288-
// Direct 16-bit writes (little-endian)
289-
output_buffer[output_index++] = sample_0_l & 0xFF;
290-
output_buffer[output_index++] = (sample_0_l >> 8) & 0xFF;
291-
output_buffer[output_index++] = sample_0_r & 0xFF;
292-
output_buffer[output_index++] = (sample_0_r >> 8) & 0xFF;
293-
294-
output_buffer[output_index++] = sample_1_l & 0xFF;
295-
output_buffer[output_index++] = (sample_1_l >> 8) & 0xFF;
296-
output_buffer[output_index++] = sample_1_r & 0xFF;
297-
output_buffer[output_index++] = (sample_1_r >> 8) & 0xFF;
298-
299-
output_buffer[output_index++] = sample_2_l & 0xFF;
300-
output_buffer[output_index++] = (sample_2_l >> 8) & 0xFF;
301-
output_buffer[output_index++] = sample_2_r & 0xFF;
302-
output_buffer[output_index++] = (sample_2_r >> 8) & 0xFF;
303-
304-
output_buffer[output_index++] = sample_3_l & 0xFF;
305-
output_buffer[output_index++] = (sample_3_l >> 8) & 0xFF;
306-
output_buffer[output_index++] = sample_3_r & 0xFF;
307-
output_buffer[output_index++] = (sample_3_r >> 8) & 0xFF;
308-
}
309-
310-
// Handle remaining samples
311-
for (; i < block_size; i++) {
312-
int32_t sample_l = this->block_samples_[i];
313-
int32_t sample_r = this->block_samples_[block_size + i];
314-
315-
output_buffer[output_index++] = sample_l & 0xFF;
316-
output_buffer[output_index++] = (sample_l >> 8) & 0xFF;
317-
output_buffer[output_index++] = sample_r & 0xFF;
318-
output_buffer[output_index++] = (sample_r >> 8) & 0xFF;
285+
for (uint32_t i = 0; i < block_size; ++i) {
286+
output_samples[2 * i] = this->block_samples_[i];
287+
output_samples[2 * i + 1] = this->block_samples_[block_size + i];
319288
}
320289
}
321290

322291
FLAC_OPTIMIZE_O3
323292
void FLACDecoder::write_samples_16bit_mono(uint8_t *output_buffer, uint32_t block_size) {
324-
// 16-bit mono fast path with 4-sample unrolling
325-
std::size_t output_index = 0;
326-
uint32_t i = 0;
327-
const uint32_t unroll_limit = block_size & ~3U; // Round down to multiple of 4
328-
329-
// Process 4 samples at a time
330-
for (; i < unroll_limit; i += 4) {
331-
int32_t sample_0 = this->block_samples_[i];
332-
int32_t sample_1 = this->block_samples_[i + 1];
333-
int32_t sample_2 = this->block_samples_[i + 2];
334-
int32_t sample_3 = this->block_samples_[i + 3];
335-
336-
// Direct 16-bit writes (little-endian)
337-
output_buffer[output_index++] = sample_0 & 0xFF;
338-
output_buffer[output_index++] = (sample_0 >> 8) & 0xFF;
339-
output_buffer[output_index++] = sample_1 & 0xFF;
340-
output_buffer[output_index++] = (sample_1 >> 8) & 0xFF;
341-
output_buffer[output_index++] = sample_2 & 0xFF;
342-
output_buffer[output_index++] = (sample_2 >> 8) & 0xFF;
343-
output_buffer[output_index++] = sample_3 & 0xFF;
344-
output_buffer[output_index++] = (sample_3 >> 8) & 0xFF;
345-
}
293+
// 16-bit mono fast path
294+
int16_t *output_samples = reinterpret_cast<int16_t *>(output_buffer);
346295

347-
// Handle remaining samples
348-
for (; i < block_size; i++) {
349-
int32_t sample = this->block_samples_[i];
350-
output_buffer[output_index++] = sample & 0xFF;
351-
output_buffer[output_index++] = (sample >> 8) & 0xFF;
296+
for (uint32_t i = 0; i < block_size; ++i) {
297+
output_samples[i] = this->block_samples_[i];
352298
}
353299
}
354300

@@ -448,6 +394,40 @@ void FLACDecoder::write_samples_general(uint8_t *output_buffer, uint32_t block_s
448394
}
449395
}
450396

397+
FLAC_OPTIMIZE_O3
398+
void FLACDecoder::write_samples_32bit_stereo(uint8_t *output_buffer, uint32_t block_size, uint32_t shift_amount) {
399+
int32_t *output_samples = reinterpret_cast<int32_t *>(output_buffer);
400+
const int32_t *left = this->block_samples_;
401+
const int32_t *right = this->block_samples_ + block_size;
402+
403+
for (uint32_t i = 0; i < block_size; ++i) {
404+
output_samples[i * 2] = left[i] << shift_amount;
405+
output_samples[i * 2 + 1] = right[i] << shift_amount;
406+
}
407+
}
408+
409+
FLAC_OPTIMIZE_O3
410+
void FLACDecoder::write_samples_32bit_mono(uint8_t *output_buffer, uint32_t block_size, uint32_t shift_amount) {
411+
int32_t *output_samples = reinterpret_cast<int32_t *>(output_buffer);
412+
const int32_t *samples = this->block_samples_;
413+
414+
for (uint32_t i = 0; i < block_size; ++i) {
415+
output_samples[i] = samples[i] << shift_amount;
416+
}
417+
}
418+
419+
FLAC_OPTIMIZE_O3
420+
void FLACDecoder::write_samples_32bit_general(uint8_t *output_buffer, uint32_t block_size, uint32_t shift_amount) {
421+
int32_t *output_samples = reinterpret_cast<int32_t *>(output_buffer);
422+
uint32_t output_index = 0;
423+
424+
for (uint32_t i = 0; i < block_size; ++i) {
425+
for (uint32_t ch = 0; ch < this->num_channels_; ++ch) {
426+
output_samples[output_index++] = this->block_samples_[ch * block_size + i] << shift_amount;
427+
}
428+
}
429+
}
430+
451431
FLACDecoderResult FLACDecoder::find_frame_sync(uint8_t &sync_byte_0, uint8_t &sync_byte_1) {
452432
this->frame_start_index_ = 0;
453433

@@ -627,9 +607,8 @@ FLACDecoderResult FLACDecoder::decode_frame_header() {
627607
frame_sample_rate = this->sample_rate_;
628608
} else {
629609
// Standard sample rate codes (1-11)
630-
static const uint32_t sample_rate_table[] = {
631-
88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000
632-
};
610+
static const uint32_t sample_rate_table[] = {88200, 176400, 192000, 8000, 16000, 22050,
611+
24000, 32000, 44100, 48000, 96000};
633612
if (sample_rate_code >= 1 && sample_rate_code <= 11) {
634613
frame_sample_rate = sample_rate_table[sample_rate_code - 1];
635614
} else {

0 commit comments

Comments
 (0)