Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions src/cpp/src/llm/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@

namespace ov {

// forward declaration, taken from OpenVINO Dev API
bool with_cpu_sve();

namespace genai {

namespace {
Expand All @@ -28,10 +25,8 @@ const std::string PA_BACKEND = "PA";
const std::string SDPA_BACKEND = "SDPA";

inline bool is_paged_attention_available() {
#ifdef OPENVINO_ARCH_X86_64
#if defined(OPENVINO_ARCH_X86_64) || defined(OPENVINO_ARCH_ARM64)
return true;
#elif defined OPENVINO_ARCH_ARM64
return with_cpu_sve();
#else
return false;
#endif
Expand All @@ -52,21 +47,21 @@ bool explicitly_requires_paged_attention(const ov::AnyMap& properties) {
if (is_paged_attention_available()) {
return true;
} else {
OPENVINO_THROW("Continuous batching backend requires PagedAttention operation support, which is available on x86_64 or ARM64 with SVE platforms only");
OPENVINO_THROW("Continuous batching backend requires PagedAttention operation support, which is available on x86_64 or ARM64 platforms only");
}
}
if (properties.find(utils::DRAFT_MODEL_ARG_NAME) != properties.end()) {
if (is_paged_attention_available()) {
return true;
} else {
OPENVINO_THROW("Speculative decoding requires PagedAttention operation support, which is available on x86_64 or ARM64 with SVE platforms only");
OPENVINO_THROW("Speculative decoding requires PagedAttention operation support, which is available on x86_64 or ARM64 platforms only");
}
}
if (properties.find(ov::genai::prompt_lookup.name()) != properties.end()) {
if (is_paged_attention_available()) {
return true;
} else {
OPENVINO_THROW("Prompt lookup decoding requires PagedAttention operation support, which is available on x86_64 or ARM64 with SVE platforms only");
OPENVINO_THROW("Prompt lookup decoding requires PagedAttention operation support, which is available on x86_64 or ARM64 platforms only");
}
}
return false;
Expand Down