Skip to content

Commit 78d52cb

Browse files
committed
Removed forced convert of Kv_cache precision for PA
1 parent 1902410 commit 78d52cb

File tree

1 file changed

+0
-6
lines changed

1 file changed

+0
-6
lines changed

src/common/transformations/src/transformations/common_optimizations/convert_pagedattn_inputs.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,10 @@ ov::pass::ConvertPagedAttnInputs::ConvertPagedAttnInputs(const KVCacheConfig& co
7474
const auto pa_op = m.get_match_root();
7575
auto key_cache = ov::as_type_ptr<ov::op::v0::Parameter>(pa_op->get_input_node_shared_ptr(3));
7676
auto value_cache = ov::as_type_ptr<ov::op::v0::Parameter>(pa_op->get_input_node_shared_ptr(4));
77-
#if defined(OPENVINO_ARCH_ARM64) && !defined(__APPLE__)
78-
auto format_cache_precision = [](ov::element::Type cache_precision, ov::element::Type infer_precision) {
79-
return ov::element::u8;
80-
};
81-
#else
8277
auto format_cache_precision = [](ov::element::Type cache_precision, ov::element::Type infer_precision) {
8378
return cache_precision == ov::element::f16 && infer_precision == ov::element::bf16 ? infer_precision
8479
: cache_precision;
8580
};
86-
#endif
8781
auto init_cache_shape = [&](const size_t head_nums,
8882
const size_t head_size,
8983
const size_t block_size,

0 commit comments

Comments
 (0)