updated scale for FP16

ashwins990 · ashwins990 · commit 8491d6221e1a · 2026-02-10T13:11:09.000+05:30
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant_kernel.hpp
@@ -190,7 +190,7 @@ void quant_u8(const T* src, uint8_t* dst, size_t n, float& scale, float& zp) {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
         // For FP16 in ARM we use FP16 accumulator
         if constexpr (std::is_same_v<T, ov::float16>) {
-            scale = 0.05f;
+            scale = std::max(0.05f, std::abs(min) / 65504.0f);
         }
 #endif
     }

Original file line number	Diff line number	Diff line change
`@@ -190,7 +190,7 @@ void quant_u8(const T* src, uint8_t* dst, size_t n, float& scale, float& zp) {`
`190`	`190`	`#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC`
`191`	`191`	`// For FP16 in ARM we use FP16 accumulator`
`192`	`192`	`if constexpr (std::is_same_v<T, ov::float16>) {`
`193`		`- scale = 0.05f;`
	`193`	`+ scale = std::max(0.05f, std::abs(min) / 65504.0f);`
`194`	`194`	`}`
`195`	`195`	`#endif`
`196`	`196`	`}`