Skip to content

Commit 19dae8d

Browse files
ivaniushkovnordicjm
authored andcommitted
applications: nrf_audio: refactor pscm_deinterleave()
Refactored pscm_deinterleave() function to use uint16_t/uint32_t types instead of copying byte-by-byte. It reduced time from typical 75 us to 23 us when using 2 16-bit channels of 960 bytes each (default values in USB BIS source). The timing were measured with pin-toggling during transmission of sine wave. Saves around 1% of CPU time in USB BIS source case. Signed-off-by: Ivan Iushkov <ivan.iushkov@nordicsemi.no>
1 parent 1b143ef commit 19dae8d

3 files changed

Lines changed: 68 additions & 33 deletions

File tree

include/pcm_stream_channel_modifier.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,16 @@ int pscm_interleave(void const *const input, size_t input_size, uint8_t channel,
131131

132132
/**
133133
* @brief De-interleave a channel from a buffer of N channels of PCM
134-
* @note: The de-interleaver can not be executed inplace (i.e. input != output)
134+
* @note: The de-interleaver can not be executed inplace (i.e. input != output)
135135
*
136-
* @param[in] input Pointer to the multi channel input buffer.
137-
* @param[in] input_size Number of bytes in input.
136+
* @param[in] input Pointer to the multi channel input buffer.
137+
* Should be 4-bytes aligned.
138+
* @param[in] input_size Number of bytes in input.
138139
* @param[in] input_channels Number of channels in the input buffer.
139-
* @param[in] channel Channel to de-interleave.
140+
* @param[in] channel Channel to de-interleave.
140141
* @param[in] pcm_bit_depth Bit depth of PCM samples (8, 16, 24, or 32).
141-
* @param[out] output Pointer to the channel output.
142-
* @param[in] output_size Number of bytes in output. Must be at least
142+
* @param[out] output Pointer to the channel output. Should be 4-bytes aligned.
143+
* @param[in] output_size Number of bytes in output. Must be at least
143144
* (input_size / output_channels).
144145
*
145146
* @return 0 if successful, error value

lib/pcm_stream_channel_modifier/pcm_stream_channel_modifier.c

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -262,14 +262,12 @@ int pscm_interleave(void const *const input, size_t input_size, uint8_t channel,
262262
int pscm_deinterleave(void const *const input, size_t input_size, uint8_t input_channels,
263263
uint8_t channel, uint8_t pcm_bit_depth, void *output, size_t output_size)
264264
{
265-
uint8_t bytes_per_sample;
266-
size_t step;
267-
uint8_t *pointer_input;
268-
uint8_t *pointer_output;
265+
size_t bytes_to_copy;
269266

270267
if (input == NULL || output == NULL || input_size == 0 || channel >= input_channels ||
271268
pcm_bit_depth == 0 || pcm_bit_depth % 8 || output_size == 0 ||
272-
pcm_bit_depth > PSCM_MAX_CARRIER_BIT_DEPTH || input_channels == 0) {
269+
pcm_bit_depth > PSCM_MAX_CARRIER_BIT_DEPTH || input_channels == 0 ||
270+
!IS_ALIGNED(input, 4) || !IS_ALIGNED(output, 4)) {
273271
return -EINVAL;
274272
}
275273

@@ -278,17 +276,41 @@ int pscm_deinterleave(void const *const input, size_t input_size, uint8_t input_
278276
return -EINVAL;
279277
}
280278

281-
bytes_per_sample = pcm_bit_depth / 8;
282-
step = bytes_per_sample * (input_channels - 1);
283-
pointer_input = (uint8_t *)input + (bytes_per_sample * channel);
284-
pointer_output = (uint8_t *)output;
279+
uint8_t bytes_per_sample = pcm_bit_depth / 8;
280+
/*
281+
* Use types corresponding to pcm_bit_depth to make iterating over an array faster
282+
* when pcm_bit_depth is 16 or 32. Use uint8_t in 8/24 bits case.
283+
*/
284+
bytes_to_copy = input_size / input_channels;
285+
if (bytes_per_sample == sizeof(uint16_t)) {
286+
uint16_t *output_16 = (uint16_t *)output;
287+
const uint16_t *input_16 = (const uint16_t *)input + channel;
288+
uint16_t *output_16_end = output_16 + (bytes_to_copy / sizeof(uint16_t));
285289

286-
for (size_t i = 0; i < input_size; i += (step + bytes_per_sample)) {
287-
for (size_t j = 0; j < bytes_per_sample; j++) {
288-
*pointer_output++ = *pointer_input++;
290+
while (output_16 < output_16_end) {
291+
*output_16++ = *input_16;
292+
input_16 += input_channels;
289293
}
294+
} else if (bytes_per_sample == sizeof(uint32_t)) {
295+
uint32_t *output_32 = (uint32_t *)output;
296+
const uint32_t *input_32 = (const uint32_t *)input + channel;
297+
uint32_t *output_32_end = output_32 + (bytes_to_copy / sizeof(uint32_t));
290298

291-
pointer_input += step;
299+
while (output_32 < output_32_end) {
300+
*output_32++ = *input_32;
301+
input_32 += input_channels;
302+
}
303+
} else {
304+
uint8_t *output_8 = (uint8_t *)output;
305+
const uint8_t *input_8 = (const uint8_t *)input + (channel * bytes_per_sample);
306+
size_t step = bytes_per_sample * (input_channels - 1);
307+
308+
for (size_t i = 0; i < bytes_to_copy; i += bytes_per_sample) {
309+
for (uint8_t j = 0; j < bytes_per_sample; j++) {
310+
*output_8++ = *input_8++;
311+
}
312+
input_8 += step;
313+
}
292314
}
293315

294316
return 0;

tests/lib/pcm_stream_channel_modifier/src/main.c

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,11 @@ uint8_t __aligned(4) unpadded_surround_left[] = {37, 38, 39, 40, 41, 42, 43, 44,
4545
uint8_t __aligned(4) unpadded_surround_right[] = {49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60};
4646
uint8_t stereo_split[] = {1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18,
4747
7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24};
48-
uint8_t multi_split[] = {1, 13, 25, 37, 49, 2, 14, 26, 38, 50, 3, 15, 27, 39, 51,
49-
4, 16, 28, 40, 52, 5, 17, 29, 41, 53, 6, 18, 30, 42, 54,
50-
7, 19, 31, 43, 55, 8, 20, 32, 44, 56, 9, 21, 33, 45, 57,
51-
10, 22, 34, 46, 58, 11, 23, 35, 47, 59, 12, 24, 36, 48, 60};
48+
uint8_t __aligned(4) multi_split[] = {
49+
1, 13, 25, 37, 49, 2, 14, 26, 38, 50, 3, 15, 27, 39, 51,
50+
4, 16, 28, 40, 52, 5, 17, 29, 41, 53, 6, 18, 30, 42, 54,
51+
7, 19, 31, 43, 55, 8, 20, 32, 44, 56, 9, 21, 33, 45, 57,
52+
10, 22, 34, 46, 58, 11, 23, 35, 47, 59, 12, 24, 36, 48, 60};
5253

5354
/* Result arrays */
5455
uint8_t left_zero_padded_8[] = {1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0,
@@ -57,17 +58,17 @@ uint8_t right_zero_padded_8[] = {0, 13, 0, 14, 0, 15, 0, 16, 0, 17, 0, 18,
5758
0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0, 24};
5859
uint8_t copy_padded_8[] = {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
5960
7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12};
60-
uint8_t combine_8_ch2[] = {1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18,
61-
7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24};
61+
uint8_t __aligned(4) combine_8_ch2[] = {1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18,
62+
7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24};
6263

6364
uint8_t left_zero_padded_16[] = {1, 2, 0, 0, 3, 4, 0, 0, 5, 6, 0, 0,
6465
7, 8, 0, 0, 9, 10, 0, 0, 11, 12, 0, 0};
6566
uint8_t right_zero_padded_16[] = {0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 5, 6,
6667
0, 0, 7, 8, 0, 0, 9, 10, 0, 0, 11, 12};
6768
uint8_t copy_padded_16[] = {1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6,
6869
7, 8, 7, 8, 9, 10, 9, 10, 11, 12, 11, 12};
69-
uint8_t combine_16[] = {1, 2, 13, 14, 3, 4, 15, 16, 5, 6, 17, 18,
70-
7, 8, 19, 20, 9, 10, 21, 22, 11, 12, 23, 24};
70+
uint8_t __aligned(4) combine_16[] = {1, 2, 13, 14, 3, 4, 15, 16, 5, 6, 17, 18,
71+
7, 8, 19, 20, 9, 10, 21, 22, 11, 12, 23, 24};
7172
uint8_t stereo_split_left_16[] = {1, 13, 3, 15, 5, 17, 7, 19, 9, 21, 11, 23};
7273
uint8_t stereo_split_right_16[] = {2, 14, 4, 16, 6, 18, 8, 20, 10, 22, 12, 24};
7374

@@ -77,8 +78,8 @@ uint8_t right_zero_padded_24[] = {0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5, 6,
7778
0, 0, 0, 7, 8, 9, 0, 0, 0, 10, 11, 12};
7879
uint8_t copy_padded_24[] = {1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6,
7980
7, 8, 9, 7, 8, 9, 10, 11, 12, 10, 11, 12};
80-
uint8_t combine_24[] = {1, 2, 3, 13, 14, 15, 4, 5, 6, 16, 17, 18,
81-
7, 8, 9, 19, 20, 21, 10, 11, 12, 22, 23, 24};
81+
uint8_t __aligned(4) combine_24[] = {1, 2, 3, 13, 14, 15, 4, 5, 6, 16, 17, 18,
82+
7, 8, 9, 19, 20, 21, 10, 11, 12, 22, 23, 24};
8283
uint8_t stereo_split_left_24[] = {1, 13, 2, 4, 16, 5, 7, 19, 8, 10, 22, 11};
8384
uint8_t stereo_split_right_24[] = {14, 3, 15, 17, 6, 18, 20, 9, 21, 23, 12, 24};
8485

@@ -88,8 +89,8 @@ uint8_t right_zero_padded_32[] = {0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0,
8889
5, 6, 7, 8, 0, 0, 0, 0, 9, 10, 11, 12};
8990
uint8_t copy_padded_32[] = {1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8,
9091
5, 6, 7, 8, 9, 10, 11, 12, 9, 10, 11, 12};
91-
uint8_t combine_32[] = {1, 2, 3, 4, 13, 14, 15, 16, 5, 6, 7, 8,
92-
17, 18, 19, 20, 9, 10, 11, 12, 21, 22, 23, 24};
92+
uint8_t __aligned(4) combine_32[] = {1, 2, 3, 4, 13, 14, 15, 16, 5, 6, 7, 8,
93+
17, 18, 19, 20, 9, 10, 11, 12, 21, 22, 23, 24};
9394
uint8_t stereo_split_left_32[] = {1, 13, 2, 14, 5, 17, 6, 18, 9, 21, 10, 22};
9495
uint8_t stereo_split_right_32[] = {3, 15, 4, 16, 7, 19, 8, 20, 11, 23, 12, 24};
9596

@@ -160,7 +161,7 @@ ZTEST(suite_pscm_int, test_pscm_interleave_api_parameters)
160161
ZTEST(suite_pscm_deint, test_pscm_deinterleave_api_parameters)
161162
{
162163
int ret;
163-
uint8_t input[2], output[2];
164+
uint8_t __aligned(4) input[2], output[2];
164165
size_t input_size = sizeof(input);
165166
size_t output_size = sizeof(output);
166167
size_t test_output_size = 0;
@@ -205,6 +206,17 @@ ZTEST(suite_pscm_deint, test_pscm_deinterleave_api_parameters)
205206
ret = pscm_deinterleave(input, input_size, input_channels, channel, pcm_bit_depth, &output,
206207
test_output_size);
207208
zassert_equal(ret, -EINVAL, "Failed de-interleave for output size too small: ret %d", ret);
209+
210+
uint8_t *misaligned_input = (uint8_t *)input + 1;
211+
uint8_t *misaligned_output = (uint8_t *)output + 1;
212+
213+
ret = pscm_deinterleave(misaligned_input, input_size, input_channels, channel,
214+
pcm_bit_depth, output, output_size);
215+
zassert_equal(ret, -EINVAL, "Failed de-interleave for misaligned input: ret %d", ret);
216+
217+
ret = pscm_deinterleave(input, input_size, input_channels, channel, pcm_bit_depth,
218+
misaligned_output, output_size);
219+
zassert_equal(ret, -EINVAL, "Failed de-interleave for misaligned output: ret %d", ret);
208220
}
209221

210222
int interleave_test(void const *const input, size_t input_size, uint8_t pcm_bit_depth,
@@ -233,7 +245,7 @@ int deinterleave_test(void const *const input, size_t input_size, uint8_t pcm_bi
233245
size_t test_result_size)
234246
{
235247
int ret;
236-
uint8_t output[TEST_PCM_DEINT_SIZE];
248+
uint8_t __aligned(4) output[TEST_PCM_DEINT_SIZE];
237249
size_t output_size = sizeof(output);
238250

239251
ret = pscm_deinterleave(input, input_size, input_channels, channel, pcm_bit_depth, &output,

0 commit comments

Comments
 (0)