Skip to content

Commit f1eee93

Browse files
authored
Reset pipeline cache usage statistics on each generate call (openvinotoolkit#1961)
Otherwise the average and max cache are persisted during the entire lifetime of the pipeline, which is inflexible if we want to check the per-`generate` dynamics of cache utilization.
1 parent ddfc3be commit f1eee93

File tree

3 files changed

+10
-2
lines changed

3 files changed

+10
-2
lines changed

src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ struct PipelineMetrics {
4242
float cache_usage = 0.0;
4343

4444
/**
45-
* Max KV cache usage during the lifetime of the pipeline in %
45+
* Max KV cache usage during the last .generate() call in %
4646
*/
4747
float max_cache_usage = 0.0;
4848

4949
/**
50-
* Running average of the KV cache usage during the lifetime of the pipeline, with max window size of 1000 steps
50+
* Running average of the KV cache usage during the last .generate() call, with max window size of 1000 internal model inferences
5151
*/
5252
float avg_cache_usage = 0.0;
5353

src/cpp/src/continuous_batching_impl.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ std::vector<EncodedGenerationResult>
367367
ContinuousBatchingPipeline::ContinuousBatchingImpl::generate(const std::vector<ov::Tensor>& input_ids,
368368
const std::vector<GenerationConfig>& sampling_params,
369369
const StreamerVariant& streamer) {
370+
_reset_cache_usage_statistics();
370371
ManualTimer generate_timer("generate()");
371372
generate_timer.start();
372373

@@ -511,6 +512,12 @@ float ContinuousBatchingPipeline::ContinuousBatchingImpl::_get_current_running_a
511512
return std::accumulate(m_previous_step_cache_usages.begin(), m_previous_step_cache_usages.end(), 0.0) / m_previous_step_cache_usages.size();
512513
}
513514

515+
void ContinuousBatchingPipeline::ContinuousBatchingImpl::_reset_cache_usage_statistics() {
516+
m_previous_step_cache_usages.clear();
517+
m_pipeline_metrics.max_cache_usage = 0.0;
518+
m_pipeline_metrics.avg_cache_usage = 0.0;
519+
}
520+
514521
void ContinuousBatchingPipeline::ContinuousBatchingImpl::drop_requests() {
515522
for (const std::shared_ptr<ov::genai::SequenceGroup> request : m_requests) {
516523
for (const auto& sequence: request->get_sequences()) {

src/cpp/src/continuous_batching_impl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ class ContinuousBatchingPipeline::ContinuousBatchingImpl : public ContinuousBatc
9292
void _maybe_evict_cache_blocks(const SchedulerConfig& sched_config);
9393

9494
void _register_step_cache_usage(float step_cache_usage);
95+
void _reset_cache_usage_statistics();
9596
float _get_current_running_average_cache_usage() const;
9697
void _compute_cache_rotation_data(const std::vector<SequenceGroup::Ptr>& sequence_groups, const Scheduler::Output& scheduler_output);
9798

0 commit comments

Comments
 (0)