Skip to content

Commit ae73099

Browse files
committed
extend ov::with_cpu_x86_bfloat16() to cover avx2_vnni_2 and introduce ov::with_cpu_x86_float16()
1 parent 2d9f533 commit ae73099

File tree

13 files changed

+39
-17
lines changed

13 files changed

+39
-17
lines changed

src/inference/dev_api/openvino/runtime/system_conf.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,17 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx512_core_vnni();
142142
/**
143143
* @brief Checks whether CPU supports BFloat16 capability
144144
* @ingroup ov_dev_api_system_conf
145-
* @return `True` is tAVX512_BF16 instructions are available, `false` otherwise
145+
* @return `True` is tAVX512_BF16 or AVX2_VNNI_2 instructions are available, `false` otherwise
146146
*/
147147
OPENVINO_RUNTIME_API bool with_cpu_x86_bfloat16();
148148

149+
/**
150+
* @brief Checks whether CPU supports Float16 capability
151+
* @ingroup ov_dev_api_system_conf
152+
* @return `True` is tAVX512_FP16 or AVX2_VNNI_2 instructions are available, `false` otherwise
153+
*/
154+
OPENVINO_RUNTIME_API bool with_cpu_x86_float16();
155+
149156
/**
150157
* @brief Checks whether CPU supports fp16 capability
151158
* @ingroup ov_dev_api_system_conf

src/inference/src/system_conf.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,17 @@ bool with_cpu_x86_avx512_core_vnni() {
9393
}
9494

9595
bool with_cpu_x86_bfloat16() {
96-
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16);
96+
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16) || with_cpu_x86_avx2_vnni_2();
9797
}
9898

9999
bool with_cpu_x86_avx512_core_fp16() {
100100
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_FP16);
101101
}
102102

103+
bool with_cpu_x86_float16() {
104+
return with_cpu_x86_avx512_core_fp16() || with_cpu_x86_avx2_vnni_2();
105+
}
106+
103107
bool with_cpu_x86_avx512_core_amx_int8() {
104108
return get_cpu_info().has(Xbyak::util::Cpu::tAMX_INT8);
105109
}
@@ -156,6 +160,9 @@ bool with_cpu_x86_bfloat16() {
156160
bool with_cpu_x86_avx512_core_fp16() {
157161
return false;
158162
}
163+
bool with_cpu_x86_float16() {
164+
return false;
165+
}
159166
bool with_cpu_x86_avx512_core_amx_int8() {
160167
return false;
161168
}

src/plugins/intel_cpu/src/nodes/paged_attn.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ bool PagedAttention::isSupportedOperation(const std::shared_ptr<const ov::Node>&
256256
ov::element::Type PagedAttention::getRuntimePrecision() const {
257257
auto rtPrecision = getOriginalInputPrecisionAtPort(0);
258258
// bf16 should be enabled only when platform supports
259-
if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) {
259+
if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
260260
rtPrecision = ov::element::bf16;
261261
} else if (rtPrecision == ov::element::f16 && ov::with_cpu_x86_avx512_core_fp16()) {
262262
rtPrecision = ov::element::f16;

src/plugins/intel_cpu/src/nodes/scaled_attn.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,7 +1200,7 @@ void ScaledDotProductAttention::createPrimitive() {
12001200
std::shared_ptr<Executor> executor = nullptr;
12011201
#ifdef OPENVINO_ARCH_X86_64
12021202
if (rtPrecision == ov::element::bf16) {
1203-
if (ov::with_cpu_x86_bfloat16()) {
1203+
if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
12041204
executor = std::make_shared<AttentionExecutor<KT_ONEDNN, ov::bfloat16>>(context);
12051205
} else {
12061206
executor = std::make_shared<AttentionExecutor<KT_REF, ov::bfloat16>>(context);
@@ -1903,7 +1903,7 @@ ov::element::Type ScaledDotProductAttention::getKVCachePrecision() {
19031903
ov::element::Type ScaledDotProductAttention::getRuntimePrecision() const {
19041904
auto rtPrecision = getOriginalInputPrecisionAtPort(0);
19051905
// bf16 should be enabled only when platform supports
1906-
if (rtPrecision == ov::element::bf16 && (ov::with_cpu_x86_bfloat16() || with_cpu_x86_avx2_vnni_2())) {
1906+
if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) {
19071907
rtPrecision = ov::element::bf16;
19081908
} else if (rtPrecision == ov::element::f16 && ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
19091909
rtPrecision = ov::element::f16;

src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) {
156156
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
157157
const auto expected_precision_for_performance_mode = ov::intel_cpu::hasHardwareSupport(ov::element::f16) ? ov::element::f16 : ov::element::f32;
158158
#else
159-
const auto expected_precision_for_performance_mode = ov::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
159+
const auto expected_precision_for_performance_mode =
160+
(ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) ? ov::element::bf16 : ov::element::f32;
160161
#endif
161162

162163
TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigHintInferencePrecision) {

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/random_uniform.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void RandomUniformLayerTestCPU::SetUp() {
7979
updateSelectedType(getPrimitiveType(), ElementType::f32, configuration);
8080
}
8181
} else if (output_prc == ElementType::bf16) {
82-
if (ov::with_cpu_x86_bfloat16()) {
82+
if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
8383
updateSelectedType(getPrimitiveType(), ElementType::bf16, configuration);
8484
} else {
8585
updateSelectedType("ref_any", ElementType::bf16, configuration);

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ std::vector<groupConvLayerCPUTestParamsSet> filterParamsSetForDevice(
275275
auto additionalConfig = std::get<configIndex>(param);
276276
if (additionalConfig.count(ov::hint::inference_precision.name()) &&
277277
ov::element::bf16 == additionalConfig[ov::hint::inference_precision.name()].as<ov::element::Type>() &&
278-
!ov::with_cpu_x86_bfloat16()) {
278+
(!ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx2_vnni_2())) {
279279
continue;
280280
}
281281
resParamsSet.push_back(param);

src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ std::vector<std::string> disabledTestPatterns() {
572572
// TODO: Issue 92895
573573
// on platforms which do not support AMX, we are disabling I8 input tests
574574
retVector.emplace_back(R"(smoke_LPT/FakeQuantizeWithNotOptimalTransformation.CompareWithRefImpl.*CPU.*i8.*)");
575-
if (!ov::with_cpu_x86_avx512_core_amx_bf16() && !ov::with_cpu_x86_bfloat16()) {
575+
if ((!ov::with_cpu_x86_avx512_core_amx_bf16() && !ov::with_cpu_x86_bfloat16()) || ov::with_cpu_x86_avx2_vnni_2()) {
576576
// ignored for not supported bf16 platforms
577577
retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)");
578578
retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)");
@@ -631,7 +631,7 @@ std::vector<std::string> disabledTestPatterns() {
631631
retVector.emplace_back(R"(.*smoke_Deconv_(2|3)D_NSPC_INT8_AMX/DeconvolutionLayerCPUTest.*)");
632632
}
633633

634-
if (ov::with_cpu_x86_avx512_core_fp16() || ov::with_cpu_x86_avx2_vnni_2()) {
634+
if (ov::with_cpu_x86_float16()) {
635635
// Issue: 143852
636636
retVector.emplace_back(R"(smoke_ConvertRangeSubgraphCPUTest/ConvertRangeSubgraphCPUTest\.CompareWithRefs.*Prc=f16.*)");
637637
retVector.emplace_back(R"((smoke|nightly)_FC_3D_FP16/.*_Fused=Multiply\(PerChannel\).*)");
@@ -648,6 +648,12 @@ std::vector<std::string> disabledTestPatterns() {
648648
retVector.emplace_back(
649649
R"(smoke_GroupConv_.*D_Gemm_BF16/GroupConvolutionLayerCPUTest.CompareWithRefs.*primitive=jit_gemm.*)");
650650
retVector.emplace_back(R"(smoke_.*MatMulLayerCPUTest.*INFERENCE_PRECISION_HINT=bf16.*_primitive=jit_gemm.*)");
651+
// Issue: 163147
652+
retVector.emplace_back(
653+
R"(smoke_CompareWithRefs_4D.*[Ff]using.*EltwiseLayerCPUTest\.CompareWithRefs.*INFERENCE_PRECISION_HINT=f16.*enforceSnippets=1.*)");
654+
// Issue: 163144
655+
retVector.emplace_back(
656+
R"(smoke_ScaledAttn_CPU/ScaledAttnLayerCPUTest.CompareWithRefs/netPRC=bf16.*_TS=\(2\.8\.16\.32\)_\(2\.8\.16\.32\)_\(2\.8\.16\.32\)_\(1\.8\.48\.32\)_\(1\.8\.48\.32\)_\(1\.8\.48\.32\)_\(16\.48\)_\(16\.1\)_\(1\.48\).*)");
651657
}
652658

653659
return retVector;

src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32
3131
auto quant = quantized_precisions();
3232
std::copy(quant.begin(), quant.end(), std::back_inserter(prc));
3333
// In Snippets MatMul BF16 is supported only on bf16/AMX platforms
34-
if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
34+
if ((ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) ||
35+
ov::with_cpu_x86_avx512_core_amx_bf16()) {
3536
prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
3637
}
3738
}

src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@ static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32
179179
prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
180180
}
181181
// In Snippets MatMul BF16 is supported only on bf16/AMX platforms
182-
if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
182+
if ((ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) ||
183+
ov::with_cpu_x86_avx512_core_amx_bf16()) {
183184
prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
184185
}
185186
}

0 commit comments

Comments
 (0)