Skip to content

Commit 5f9e8ff

Browse files
committed
New fresh and beautiful latency calculation that takes also the parallel processing into accout
1 parent bf82938 commit 5f9e8ff

4 files changed

Lines changed: 164 additions & 40 deletions

File tree

include/anira/scheduler/SessionElement.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,10 @@ class ANIRA_API SessionElement {
274274
* @param host_buffer_size Host audio buffer size
275275
* @param host_sample_rate Host audio sample rate
276276
* @param wait_time Expected wait time for inference completion
277+
* @param postprocess_output_size Size of the model's postprocessed output in samples
277278
* @return Additional inference-caused latency in samples
278279
*/
279-
int calculate_inference_caused_latency(float max_possible_inferences, float host_buffer_size, float host_sample_rate, float wait_time) const;
280+
int calculate_inference_caused_latency(float max_possible_inferences, float host_buffer_size, float host_sample_rate, float wait_time, size_t postprocess_output_size) const;
280281

281282
/**
282283
* @brief Calculates expected wait time for inference completion

src/scheduler/SessionElement.cpp

Lines changed: 73 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,13 @@ void SessionElement::prepare(const HostConfig& host_config, std::vector<long> cu
7171
// If the host config allows smaller buffers, we need to adjust the latency and number of structs
7272
if (host_config.m_allow_smaller_buffers) {
7373
HostConfig adjusted_config = host_config;
74+
HostConfig min_config = host_config;
7475

7576
// Find the greatest relative buffersize and count down from there
7677
float greatest_buffer_size = 0;
7778
size_t greatest_buffer_size_index = 0;
7879
bool greatest_buffer_size_is_input = true;
80+
float buffer_size_ratio = 1.f;
7981

8082
for (size_t i = 0; i < m_inference_config.get_tensor_input_shape().size(); ++i) {
8183
if (m_inference_config.get_preprocess_input_size()[i] > 0) {
@@ -95,32 +97,47 @@ void SessionElement::prepare(const HostConfig& host_config, std::vector<long> cu
9597
}
9698
}
9799

100+
// Calculate the minimum buffer size based on the greatest buffer size
101+
if (greatest_buffer_size_is_input) {
102+
buffer_size_ratio = 1.f / static_cast<float>(m_inference_config.get_preprocess_input_size()[greatest_buffer_size_index]);
103+
} else {
104+
buffer_size_ratio = 1.f / static_cast<float>(m_inference_config.get_postprocess_output_size()[greatest_buffer_size_index]);
105+
}
106+
min_config.m_buffer_size = buffer_size_ratio * static_cast<float>(m_inference_config.get_preprocess_input_size()[host_config.m_tensor_index]);
107+
98108
while (--greatest_buffer_size > 0) {
99-
float greatest_buffer_size_ratio;
109+
float buffer_size_ratio;
100110
if (greatest_buffer_size_is_input) {
101-
greatest_buffer_size_ratio = greatest_buffer_size / m_inference_config.get_preprocess_input_size()[greatest_buffer_size_index];
111+
buffer_size_ratio = greatest_buffer_size / static_cast<float>(m_inference_config.get_preprocess_input_size()[greatest_buffer_size_index]);
102112
} else {
103-
greatest_buffer_size_ratio = greatest_buffer_size / m_inference_config.get_postprocess_output_size()[greatest_buffer_size_index];
113+
buffer_size_ratio = greatest_buffer_size / static_cast<float>(m_inference_config.get_postprocess_output_size()[greatest_buffer_size_index]);
104114
}
105-
adjusted_config.m_buffer_size = greatest_buffer_size_ratio * m_inference_config.get_preprocess_input_size()[host_config.m_tensor_index];
115+
adjusted_config.m_buffer_size = buffer_size_ratio * static_cast<float>(m_inference_config.get_preprocess_input_size()[host_config.m_tensor_index]);
116+
106117
std::vector<float> adjusted_latency;
107118
for (size_t i = 0; i < m_inference_config.get_tensor_output_shape().size(); ++i) {
108119
if (m_inference_config.get_postprocess_output_size()[i] > 0) {
109-
float adjusted_buffer_size = adjusted_config.get_relative_buffer_size(m_inference_config, i, false);
110-
float adjusted_sample_rate = adjusted_config.get_relative_sample_rate(m_inference_config, i, false);
111120
float max_buffer_size = host_config.get_relative_buffer_size(m_inference_config, i, false);
112-
float max_sample_rate = host_config.get_relative_sample_rate(m_inference_config, i, false);
113-
114-
int buffer_adaptation = calculate_buffer_adaptation(adjusted_buffer_size, m_inference_config.get_postprocess_output_size()[i]);
115-
int buffer_adaptation_full_buffer = std::min(static_cast<int>(std::ceil(max_buffer_size)), (int) m_inference_config.get_postprocess_output_size()[i]);
116-
buffer_adaptation = std::max(buffer_adaptation_full_buffer, buffer_adaptation);
117-
118-
float wait_time = calculate_wait_time(adjusted_buffer_size, adjusted_sample_rate);
119-
int inference_caused_latency_full_buffer = calculate_inference_caused_latency(max_num_inferences(adjusted_config), max_buffer_size, max_sample_rate, wait_time);
120-
// Inference caused latency of only full buffers and one buffer of adjusted sample rate
121-
inference_caused_latency_full_buffer = std::max(0, inference_caused_latency_full_buffer - (static_cast<int>(std::ceil(max_buffer_size - adjusted_buffer_size))));
122-
int inference_caused_latency = calculate_inference_caused_latency(max_num_inferences(adjusted_config), adjusted_buffer_size, adjusted_sample_rate, wait_time);
123-
inference_caused_latency = std::max(inference_caused_latency, inference_caused_latency_full_buffer);
121+
float adjusted_buffer_size = adjusted_config.get_relative_buffer_size(m_inference_config, i, false);
122+
float min_buffer_size = min_config.get_relative_buffer_size(m_inference_config, i, false);
123+
float sample_rate = adjusted_config.get_relative_sample_rate(m_inference_config, i, false);
124+
125+
// When allowing smaller buffer sizes, the buffer adaptation is always the post-process output size minus one
126+
// Because we could have buffers of size one only and this is the maximum adaptation possible
127+
int buffer_adaptation = std::max(static_cast<int>(m_inference_config.get_postprocess_output_size()[i]) - 1, 0);
128+
129+
float max_wait_time = calculate_wait_time(max_buffer_size, sample_rate);
130+
float adjusted_wait_time = calculate_wait_time(adjusted_buffer_size, sample_rate);
131+
float min_wait_time = calculate_wait_time(min_buffer_size, sample_rate);
132+
133+
float max_possible_inferences = std::max(max_num_inferences(adjusted_config), max_num_inferences(host_config));
134+
135+
int inference_caused_latency_max_buffer = calculate_inference_caused_latency(max_possible_inferences, max_buffer_size, sample_rate, max_wait_time, m_inference_config.get_postprocess_output_size()[i]);
136+
int inference_caused_latency_min_buffer = calculate_inference_caused_latency(1, min_buffer_size, sample_rate, min_wait_time, m_inference_config.get_postprocess_output_size()[i]);
137+
int inference_caused_latency_adjusted_buffer = calculate_inference_caused_latency(max_num_inferences(adjusted_config), adjusted_buffer_size, sample_rate, adjusted_wait_time, m_inference_config.get_postprocess_output_size()[i]);
138+
139+
int inference_caused_latency = std::max({inference_caused_latency_max_buffer, inference_caused_latency_adjusted_buffer, inference_caused_latency_min_buffer});
140+
124141
adjusted_latency.push_back(inference_caused_latency + buffer_adaptation);
125142
}
126143
}
@@ -249,7 +266,7 @@ std::vector<float> SessionElement::calculate_latency(const HostConfig& host_conf
249266
// Calculate the different parts of the latency
250267
int buffer_adaptation = calculate_buffer_adaptation(host_output_size, m_inference_config.get_postprocess_output_size()[i]);
251268
float wait_time = calculate_wait_time(host_output_size, sample_rate);
252-
int inference_caused_latency = calculate_inference_caused_latency(max_possible_inferences, host_output_size, sample_rate, wait_time);
269+
int inference_caused_latency = calculate_inference_caused_latency(max_possible_inferences, host_output_size, sample_rate, wait_time, m_inference_config.get_postprocess_output_size()[i]);
253270
// Add it all together
254271
result_float.push_back(buffer_adaptation + inference_caused_latency);
255272
}
@@ -291,12 +308,45 @@ int SessionElement::calculate_buffer_adaptation(float host_buffer_size, int post
291308
return res;
292309
}
293310

294-
int SessionElement::calculate_inference_caused_latency(float max_possible_inferences, float host_buffer_size, float host_sample_rate, float wait_time) const {
311+
int SessionElement::calculate_inference_caused_latency(float max_possible_inferences, float host_buffer_size, float host_sample_rate, float wait_time, size_t postprocess_output_size) const {
295312
// Calculate the host buffer time in ms
296313
float host_buffer_time = host_buffer_size * 1000.f / host_sample_rate;
297-
float total_inference_time_after_wait = (max_possible_inferences * m_inference_config.m_max_inference_time) - wait_time;
298-
float num_buffers_for_max_inferences = std::ceil(total_inference_time_after_wait / host_buffer_time);
299-
return std::ceil(num_buffers_for_max_inferences * host_buffer_size);
314+
float inference_time_left = 0.f;
315+
int host_buffer_size_int = static_cast<int>(std::floor(host_buffer_size));
316+
float host_buffer_time_int = host_buffer_size_int * 1000.f / host_sample_rate;
317+
int inference_caused_latency = 0;
318+
319+
unsigned int max_possible_inferences_parallel = static_cast<unsigned int>(std::ceil((max_possible_inferences) / static_cast<float>(m_inference_config.m_num_parallel_processors)));
320+
int already_inferred = 0;
321+
float wait_time_left = wait_time;
322+
for (unsigned int i = 0; i < max_possible_inferences_parallel; ++i) {
323+
inference_time_left += m_inference_config.m_max_inference_time;
324+
while (inference_time_left >= host_buffer_time_int && host_buffer_size_int > 0) {
325+
inference_caused_latency += host_buffer_size_int;
326+
inference_time_left -= host_buffer_time_int;
327+
wait_time_left += host_buffer_time_int;
328+
}
329+
}
330+
331+
while (inference_time_left > 0) {
332+
if (wait_time_left >= m_inference_config.m_max_inference_time) {
333+
inference_time_left -= m_inference_config.m_max_inference_time;
334+
already_inferred += m_inference_config.m_num_parallel_processors;
335+
wait_time_left -= m_inference_config.m_max_inference_time;
336+
} else {
337+
inference_caused_latency += host_buffer_size_int;
338+
if (host_buffer_time_int > 0) {
339+
inference_time_left -= host_buffer_time_int;
340+
} else {
341+
inference_caused_latency += 1;
342+
break;
343+
}
344+
}
345+
}
346+
347+
inference_caused_latency -= already_inferred * postprocess_output_size;
348+
349+
return inference_caused_latency;
300350
}
301351

302352
float SessionElement::calculate_wait_time(float host_buffer_size, float host_sample_rate) const {

test/scheduler/test_InferenceManager.cpp

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,14 +221,87 @@ INSTANTIATE_TEST_SUITE_P(
221221
),
222222
{ 4095 }
223223
},
224+
InferenceManagerTestParams {
225+
HostConfig(2048, 48000, true),
226+
InferenceConfig(
227+
std::vector<ModelData>{ModelData("placeholder", anira::InferenceBackend::CUSTOM)},
228+
std::vector<TensorShape>{TensorShape({{1, 1024}}, {{360}})},
229+
ProcessingSpec({1}, {2}, {160}, {1}),
230+
3.f,
231+
0,
232+
false,
233+
0.5f,
234+
2
235+
),
236+
{ 1 }
237+
},
238+
InferenceManagerTestParams {
239+
HostConfig(2048, 48000, true),
240+
InferenceConfig(
241+
std::vector<ModelData>{ModelData("placeholder", anira::InferenceBackend::CUSTOM)},
242+
std::vector<TensorShape>{TensorShape({{1, 1024}}, {{360}})},
243+
ProcessingSpec({1}, {2}, {160}, {1}),
244+
5.f,
245+
0,
246+
false,
247+
0.5f,
248+
2
249+
),
250+
{ 6 }
251+
},
252+
InferenceManagerTestParams {
253+
HostConfig(2048, 48000, true),
254+
InferenceConfig(
255+
std::vector<ModelData>{ModelData("placeholder", anira::InferenceBackend::CUSTOM)},
256+
std::vector<TensorShape>{TensorShape({{1, 1024}}, {{360}})},
257+
ProcessingSpec({1}, {2}, {160}, {1}),
258+
4.9f,
259+
0,
260+
false,
261+
0.5f,
262+
1
263+
),
264+
{ 8 }
265+
},
266+
InferenceManagerTestParams {
267+
HostConfig(2048, 48000, true),
268+
InferenceConfig(
269+
std::vector<ModelData>{ModelData("placeholder", anira::InferenceBackend::CUSTOM)},
270+
std::vector<TensorShape>{TensorShape({{1, 1024}}, {{360}})},
271+
ProcessingSpec({1}, {2}, {160}, {1}),
272+
4.f,
273+
0,
274+
false,
275+
0.f
276+
),
277+
{ 12 }
278+
},
224279
InferenceManagerTestParams {
225280
HostConfig(1, 48000.0/2048, true),
226281
InferenceConfig(
227282
std::vector<ModelData>{ModelData("placeholder", anira::InferenceBackend::CUSTOM)},
228283
std::vector<TensorShape>{TensorShape({{1, 1, 1}}, {{1, 1, 2048}})},
229284
40.f
230285
),
231-
{ 5886 }
286+
{ 5885 }
287+
},
288+
InferenceManagerTestParams {
289+
HostConfig(2048, 48000.0, true),
290+
InferenceConfig(
291+
std::vector<ModelData>{ModelData("placeholder", anira::InferenceBackend::CUSTOM)},
292+
std::vector<TensorShape>{TensorShape({{1, 1, 2048}}, {{1, 1, 1}})},
293+
40.f
294+
),
295+
{ 1 }
296+
},
297+
InferenceManagerTestParams {
298+
HostConfig(2048, 48000.0, false),
299+
InferenceConfig(
300+
std::vector<ModelData>{ModelData("placeholder", anira::InferenceBackend::CUSTOM)},
301+
std::vector<TensorShape>{TensorShape({{1, 1, 2048}}, {{1, 1, 1}})},
302+
39.f
303+
),
304+
{ 1 }
232305
},
233306
InferenceManagerTestParams {
234307
HostConfig(1, 48000.0/2048),
@@ -247,7 +320,7 @@ INSTANTIATE_TEST_SUITE_P(
247320
ProcessingSpec({1}, {4}),
248321
20.f
249322
),
250-
{ 2 }
323+
{ 1 }
251324
},
252325
InferenceManagerTestParams {
253326
HostConfig(1./256., 48000./2048.),
@@ -356,7 +429,7 @@ INSTANTIATE_TEST_SUITE_P(
356429
ProcessingSpec({4, 2}, {1, 1}),
357430
50.f
358431
),
359-
{ 18944, 7104 }
432+
{ 6656, 2496 }
360433
},
361434
InferenceManagerTestParams {
362435
HostConfig(1500, 44100./8., true, 1),
@@ -366,7 +439,7 @@ INSTANTIATE_TEST_SUITE_P(
366439
ProcessingSpec({4, 2, 1}, {1, 1}),
367440
50.f
368441
),
369-
{ 18944, 7104 }
442+
{ 6656, 2496 }
370443
},
371444
InferenceManagerTestParams {
372445
HostConfig(256., 48000./8, false, 1),

test/scheduler/test_SessionElement.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,10 @@ INSTANTIATE_TEST_SUITE_P(
169169
false,
170170
0.5f
171171
),
172-
{3967},
172+
{3966},
173173
2,
174174
{4096}, // Expected send buffer sizes
175-
{8063} // Expected receive buffer sizes
175+
{8062} // Expected receive buffer sizes
176176
},
177177
SessionElementTestParams {
178178
HostConfig(2048, 48000, true),
@@ -184,10 +184,10 @@ INSTANTIATE_TEST_SUITE_P(
184184
false,
185185
0.5f
186186
),
187-
{3007},
187+
{3006},
188188
2,
189189
{4096}, // Expected send buffer sizes
190-
{7103} // Expected receive buffer sizes
190+
{7102} // Expected receive buffer sizes
191191
},
192192
SessionElementTestParams {
193193
HostConfig(2048, 48000, true),
@@ -297,10 +297,10 @@ INSTANTIATE_TEST_SUITE_P(
297297
ProcessingSpec({16, 2}, {1, 3}),
298298
50.f
299299
),
300-
{12288, 768},
300+
{8192, 512},
301301
9,
302302
{3, 848}, // Expected send buffer sizes
303-
{30720, 1920} // Expected receive buffer sizes
303+
{26624, 1664} // Expected receive buffer sizes
304304
},
305305
// Non-power-of-two buffer size tests
306306
SessionElementTestParams {
@@ -351,10 +351,10 @@ INSTANTIATE_TEST_SUITE_P(
351351
false,
352352
0.5f
353353
),
354-
{1407},
354+
{1406},
355355
6,
356356
{6}, // Expected send buffer sizes
357-
{7551} // Expected receive buffer sizes
357+
{7550} // Expected receive buffer sizes
358358
},
359359
SessionElementTestParams {
360360
HostConfig(2048, 48000, true),
@@ -390,10 +390,10 @@ INSTANTIATE_TEST_SUITE_P(
390390
std::vector<TensorShape>{TensorShape({{1, 1, 1024}}, {{1, 1, 1024}})},
391391
20.f
392392
),
393-
{8192},
393+
{4096},
394394
12,
395395
{4096}, // Expected send buffer sizes
396-
{20480} // Expected receive buffer sizes
396+
{16384} // Expected receive buffer sizes
397397
},
398398
// Test with very short inference times
399399
SessionElementTestParams {
@@ -416,10 +416,10 @@ INSTANTIATE_TEST_SUITE_P(
416416
std::vector<TensorShape>{TensorShape({{1, 1, 256}}, {{1, 1, 256}})},
417417
100.f
418418
),
419-
{9728},
419+
{5120},
420420
40,
421421
{512}, // Expected send buffer sizes
422-
{19968} // Expected receive buffer sizes
422+
{15360} // Expected receive buffer sizes
423423
}
424424
),
425425
build_test_name

0 commit comments

Comments
 (0)