Updated logic whether PA backend is explicitly required (openvinotoolkit#1976)

ilya-lavrenov · web-flow · commit 7ea260c4e737 · 2025-03-26T16:20:20.000+04:00
If user passed `LLMPipeline(model_path, device, ATTENTION_BACKEND=PA)`,
then it should throw in case of PA backend cannot be used (either PA op
is not available or model cannot be converted to PA representation)
diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp
@@ -45,7 +45,10 @@ SchedulerConfig get_latency_oriented_scheduler_config() {
 }
 
 bool explicitly_requires_paged_attention(const ov::AnyMap& properties) {
-    if (properties.find(ov::genai::scheduler_config.name()) != properties.end()) {
+    auto attention_backend_it = properties.find("ATTENTION_BACKEND");
+
+    if (properties.find(ov::genai::scheduler_config.name()) != properties.end() ||
+        (attention_backend_it != properties.end() && attention_backend_it->second.as<std::string>() == PA_BACKEND)) {
         if (is_paged_attention_available()) {
             return true;
         } else {