3232# include " nodes/kernels/aarch64/brgemm_kernel.hpp"
3333# include " nodes/kernels/aarch64/sve_utils.hpp"
3434# include " nodes/kernels/kai/kleidi_kernel.hpp"
35- using namespace ov ::intel_cpu::sve_utils;
3635#endif
3736
3837namespace ov ::Extensions::Cpu::XARCH {
@@ -2593,9 +2592,9 @@ struct MHAHelper {
25932592 PlainTensor f32_cvt;
25942593 if (q_is_xf16) {
25952594 f32_cvt.resize <float >({size_t {rnd_up (cur_kv_len, _block_size)}});
2596- cvt_copy (f32_cvt.ptr <float >(0 ),
2597- reinterpret_cast <DATA_TYPE*>(score),
2598- rnd_up (cur_kv_len, _block_size));
2595+ sve_utils:: cvt_copy (f32_cvt.ptr <float >(0 ),
2596+ reinterpret_cast <DATA_TYPE*>(score),
2597+ rnd_up (cur_kv_len, _block_size));
25992598 soft_in = f32_cvt.ptr <float >(0 );
26002599 }
26012600 if (_sliding_window) {
@@ -2641,9 +2640,9 @@ struct MHAHelper {
26412640 alibi_slope);
26422641 }
26432642 if (score_output) {
2644- cvt_copy (score_output + h * rnd_up (cur_kv_len, 16 ),
2645- reinterpret_cast <DATA_TYPE*>(score),
2646- cur_kv_len);
2643+ sve_utils:: cvt_copy (score_output + h * rnd_up (cur_kv_len, 16 ),
2644+ reinterpret_cast <DATA_TYPE*>(score),
2645+ cur_kv_len);
26472646 }
26482647 }
26492648
@@ -3164,7 +3163,8 @@ struct MHA {
31643163 v_ptr,
31653164 _helper._block_size ,
31663165 _helper.SV ,
3167- _helper._value_group_size );
3166+ _helper._value_group_size ,
3167+ _helper._quant_value_bychannel );
31683168# else
31693169 pack_32NxK<DATA_TYPE, VALUE_PREC>(
31703170 _helper._wv_scratch_b .template ptr <DATA_TYPE>(batch_in_reorder, kv_block, hk),
@@ -3176,6 +3176,7 @@ struct MHA {
31763176 _helper.SV ,
31773177 _helper._value_group_size ,
31783178 _helper._quant_value_bychannel );
3179+ # endif
31793180 } else {
31803181 // need to decompress
31813182 if (!q_cache_is_same) {
@@ -3929,9 +3930,11 @@ std::shared_ptr<PagedAttentionExecutor> make_pa_executor(ov::element::Type data_
39293930 }
39303931 if (data_type == ov::element::f16 ) {
39313932 if (key_cache_type == ov::element::u8 && value_cache_type == ov::element::u8 ) {
3932- executor = std::make_shared<AttentionExecutor<ov::float16, uint8_t , ov::element::u8 >>(key_group_size,
3933- value_group_size,
3934- quant_key_bychannel);
3933+ executor = std::make_shared<AttentionExecutor<ov::float16, ov::element::u8 , ov::element::u8 >>(
3934+ key_group_size,
3935+ value_group_size,
3936+ quant_key_bychannel,
3937+ quant_value_bychannel);
39353938 } else {
39363939 OPENVINO_THROW (" make_pa_executor: key_cache_type and value_cache_type of u8 is only support" );
39373940 }
0 commit comments