openvinotoolkit · rasapala · Feb 18, 2026 · Feb 19, 2026
diff --git a/src/llm/language_model/continuous_batching/llm_executor.hpp b/src/llm/language_model/continuous_batching/llm_executor.hpp
@@ -31,13 +31,15 @@
 
 namespace ovms {
 struct LLMExecutor {
+    bool isDynamicKVCache;
     // For logging purposes we could have more information about graph and node here
     std::mutex mutex;
     std::condition_variable cv;
     std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe = nullptr;
 
-    LLMExecutor(std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe) {
+    LLMExecutor(std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe, bool isDynamicKVCacheSet = false) {
         this->pipe = std::move(pipe);
+        this->isDynamicKVCache = isDynamicKVCacheSet;
     }
 
     bool hasRequests() {
@@ -59,12 +61,48 @@ struct LLMExecutor {
         cv.notify_one();
     }
 
+    std::string formatCacheInfo(float cacheUsage, size_t cacheBytes, bool isCacheDynamic) {
+        std::ostringstream oss;
+        oss << std::fixed << std::setprecision(1);
+        if (isCacheDynamic) {
+            oss << formatBytes(cacheBytes);
+        } else {
+            oss << cacheUsage << "% of " << formatBytes(cacheBytes);
+        }
+
+        return oss.str();
+    }
+
+    std::string formatBytes(size_t bytes)
+    {
+        const double KB = 1024.0;
+        const double MB = KB * 1024.0;
+        const double GB = MB * 1024.0;
+        const double TB = GB * 1024.0;
+
+        std::ostringstream oss;
+        oss << std::fixed << std::setprecision(1);
+
+        if (bytes >= TB)
+            oss << (bytes / TB) << " TB";
+        else if (bytes >= GB)
+            oss << (bytes / GB) << " GB";
+        else if (bytes >= MB)
+            oss << (bytes / MB) << " MB";
+        else if (bytes >= KB)
+            oss << (bytes / KB) << " KB";
+        else
+            oss << bytes << " B";
+
+        return oss.str();
+    }
+
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-but-set-variable"
     void printMetrics() {
         ov::genai::PipelineMetrics metrics = pipe->get_metrics();
-        SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {}; Cache usage {:.1f}%;",
-            metrics.requests, metrics.scheduled_requests, metrics.cache_usage);
+        SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {}; Cache usage {};",
+            metrics.requests, metrics.scheduled_requests, formatCacheInfo(metrics.cache_usage, metrics.kv_cache_usage_in_bytes, this->isDynamicKVCache));
     }
 };
 #pragma GCC diagnostic pop
@@ -98,8 +136,8 @@ class LLMExecutorWrapper {
     }
 
 public:
-    LLMExecutorWrapper(std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe) :
-        llmExecutor(std::move(pipe)) {
+    LLMExecutorWrapper(std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe, bool isDynamicKVCache = false) :
+        llmExecutor(std::move(pipe), isDynamicKVCache) {
         llmExecutorThread = std::thread(LLMExecutorWrapper::run, &llmExecutor, &finishExecutorThread);
     }
 

diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp
@@ -223,7 +223,7 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr<GenAiSe
     }
     properties->maxModelLength = parseMaxModelLength(parsedModelsPath);
 
-    properties->llmExecutorWrapper = std::make_shared<LLMExecutorWrapper>(properties->pipeline);
+    properties->llmExecutorWrapper = std::make_shared<LLMExecutorWrapper>(properties->pipeline, properties->schedulerConfig.cache_size == 0);
 
     return StatusCode::OK;
 }