Skip to content

Commit dea6afe

Browse files
committed
Extend cpu functional testing on LNL+ systems
1 parent fd58592 commit dea6afe

File tree

6 files changed

+46
-13
lines changed

6 files changed

+46
-13
lines changed

src/inference/dev_api/openvino/runtime/system_conf.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,13 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx2();
111111
*/
112112
OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni();
113113

114+
/**
115+
* @brief Checks whether CPU supports AVX2_VNNI_2 capability
116+
* @ingroup ov_dev_api_system_conf
117+
* @return `True` is AVX2_VNNI_2 instructions are available, `false` otherwise
118+
*/
119+
OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni_2();
120+
114121
/**
115122
* @brief Checks whether CPU supports AVX 512 capability
116123
* @ingroup ov_dev_api_system_conf

src/inference/src/system_conf.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ bool with_cpu_x86_avx2_vnni() {
7575
return get_cpu_info().has(Xbyak::util::Cpu::tAVX2 | Xbyak::util::Cpu::tAVX_VNNI);
7676
}
7777

78+
bool with_cpu_x86_avx2_vnni_2() {
79+
return with_cpu_x86_avx2_vnni() && get_cpu_info().has(Xbyak::util::Cpu::tAVX_VNNI_INT8) &&
80+
get_cpu_info().has(Xbyak::util::Cpu::tAVX_NE_CONVERT);
81+
}
82+
7883
bool with_cpu_x86_avx512f() {
7984
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F);
8085
}
@@ -133,6 +138,9 @@ bool with_cpu_x86_avx2() {
133138
bool with_cpu_x86_avx2_vnni() {
134139
return false;
135140
}
141+
bool with_cpu_x86_avx2_vnni_2() {
142+
return false;
143+
}
136144
bool with_cpu_x86_avx512f() {
137145
return false;
138146
}

src/plugins/intel_cpu/src/nodes/scaled_attn.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1902,7 +1902,7 @@ ov::element::Type ScaledDotProductAttention::getKVCachePrecision() {
19021902
ov::element::Type ScaledDotProductAttention::getRuntimePrecision() const {
19031903
auto rtPrecision = getOriginalInputPrecisionAtPort(0);
19041904
// bf16 should be enabled only when platform supports
1905-
if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) {
1905+
if (rtPrecision == ov::element::bf16 && (ov::with_cpu_x86_bfloat16() || with_cpu_x86_avx2_vnni_2())) {
19061906
rtPrecision = ov::element::bf16;
19071907
} else if (rtPrecision == ov::element::f16 && ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
19081908
rtPrecision = ov::element::f16;

src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -484,13 +484,17 @@ std::vector<std::string> disabledTestPatterns() {
484484
retVector.emplace_back(R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.CompareWithRefs/Type=GRUSequence.*2.5.10.*2.1.4.*2.1.4.*)");
485485
#endif
486486
if (!ov::with_cpu_x86_avx512_core()) {
487+
// Issue: MFDNN-12818
488+
retVector.emplace_back(
489+
R"(.*smoke_LPT/RecurrentCellTransformation.CompareWithRefImpl/f32_\[1,1,3\]_CPU_f32FQ_X_level=256_.*_FQ_W_level=255.*)");
490+
retVector.emplace_back(
491+
R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.CompareWithRefs/Type=GRUSequence.*2.5.10.*2.1.4.*2.1.4.*)");
492+
}
493+
if (!ov::intel_cpu::hasHardwareSupport(ov::element::bf16)) {
487494
// on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
488495
// tests are useless on such platforms
489496
retVector.emplace_back(R"(.*(BF|bf)16.*)");
490497
retVector.emplace_back(R"(.*bfloat16.*)");
491-
// Issue: MFDNN-12818
492-
retVector.emplace_back(R"(.*smoke_LPT/RecurrentCellTransformation.CompareWithRefImpl/f32_\[1,1,3\]_CPU_f32FQ_X_level=256_.*_FQ_W_level=255.*)");
493-
retVector.emplace_back(R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.CompareWithRefs/Type=GRUSequence.*2.5.10.*2.1.4.*2.1.4.*)");
494498
}
495499
if (!ov::with_cpu_x86_avx2()) {
496500
// MatMul in Snippets uses BRGEMM that is supported only on AVX2 (and newer) platforms
@@ -499,7 +503,7 @@ std::vector<std::string> disabledTestPatterns() {
499503
retVector.emplace_back(R"(.*Snippets.*(MatMul|Matmul).*)");
500504
}
501505
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
502-
if (!ov::with_cpu_x86_avx512_core_fp16()) {
506+
if (!ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
503507
// Skip fp16 tests for paltforms that don't support fp16 precision
504508
retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)");
505509
retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*)");
@@ -637,7 +641,7 @@ std::vector<std::string> disabledTestPatterns() {
637641
retVector.emplace_back(R"(.*smoke_Deconv_(2|3)D_NSPC_INT8_AMX/DeconvolutionLayerCPUTest.*)");
638642
}
639643

640-
if (ov::with_cpu_x86_avx512_core_fp16()) {
644+
if (ov::with_cpu_x86_avx512_core_fp16() || ov::with_cpu_x86_avx2_vnni_2()) {
641645
// Issue: 130473
642646
retVector.emplace_back(R"(smoke_CompareWithRefs_4D.*/EltwiseLayerCPUTest.*Sub_secondary.*INFERENCE_PRECISION_HINT=f16.*FakeQuantize.*enforceSnippets=1.*)");
643647
retVector.emplace_back(R"(smoke_Reduce.*/ReduceCPULayerTest.*axes=\((0.1|1)\).*Prod_KeepDims.*INFERENCE_PRECISION_HINT=f16.*)");
@@ -651,5 +655,11 @@ std::vector<std::string> disabledTestPatterns() {
651655
retVector.emplace_back(R"(smoke_Conv_Sum_Broadcast_FP16/ConvSumInPlaceTest.*Relu\.Multiply\(PerChannel\)\.Add\(PerChannel\).*)");
652656
}
653657

658+
if (ov::with_cpu_x86_avx2_vnni_2()) {
659+
// jit_gemm_BF16 kernels are not supported for conv,inner_product,matmul on avx2_vnni_2 platforms
660+
retVector.emplace_back(R"(smoke_Conv_.*D_GEMM_BF16.*)");
661+
retVector.emplace_back(R"(smoke_.*MatMulLayerCPUTest.*INFERENCE_PRECISION_HINT=bf16.*_primitive=jit_gemm.*)");
662+
}
663+
654664
return retVector;
655665
}

src/plugins/intel_cpu/tests/functional/utils/cpu_test_utils.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -472,11 +472,11 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
472472
if (it != configuration.end()) {
473473
auto inferencePrecisionConfig = it->second.as<ov::element::Type>();
474474
inferencePrecisionSetExplicitly = true;
475-
// TODO also need to check (dnnl::impl::cpu::x64::avx2_vnni_2)
476-
if ((inferencePrecisionConfig == ov::element::bf16 && ov::with_cpu_x86_avx512_core())
477-
|| (inferencePrecisionConfig == ov::element::f16 && ov::with_cpu_x86_avx512_core_fp16())
478-
|| (inferencePrecisionConfig == ov::element::f32)
479-
|| (inferencePrecisionConfig == ov::element::undefined)) {
475+
if ((inferencePrecisionConfig == ov::element::bf16 &&
476+
(ov::with_cpu_x86_avx512_core() || ov::with_cpu_x86_avx2_vnni_2())) ||
477+
(inferencePrecisionConfig == ov::element::f16 &&
478+
(ov::with_cpu_x86_avx512_core_fp16() || ov::with_cpu_x86_avx2_vnni_2())) ||
479+
(inferencePrecisionConfig == ov::element::f32) || (inferencePrecisionConfig == ov::element::undefined)) {
480480
inferencePrecision = inferencePrecisionConfig;
481481
}
482482
}
@@ -496,7 +496,8 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
496496
ov::element::Type deducedType = opPrecision;
497497
// enforceInferPrecision stage
498498
if (inferencePrecision == ov::element::bf16) {
499-
deducedType = ov::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
499+
deducedType =
500+
(ov::with_cpu_x86_avx512_core() || ov::with_cpu_x86_avx2_vnni_2()) ? ov::element::bf16 : ov::element::f32;
500501
}
501502

502503
// ngraph transform pipeline stage
@@ -506,7 +507,8 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
506507
}
507508
}
508509
if (deducedType == ov::element::bf16) {
509-
deducedType = ov::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
510+
deducedType =
511+
(ov::with_cpu_x86_avx512_core() || ov::with_cpu_x86_avx2_vnni_2()) ? ov::element::bf16 : ov::element::f32;
510512
} else if (deducedType == ov::element::f16) {
511513
if (inferencePrecision != ov::element::f16 && inferencePrecision != ov::element::undefined) {
512514
deducedType = ov::element::f32;

src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,12 @@ void SubgraphBaseTest::compile_model() {
307307
}
308308
try {
309309
inference_precision = core->get_property(targetDevice, ov::hint::inference_precision);
310+
// for avx2_vnni_2 platforms use the set inference_precision hint in case generate higher threshold during
311+
// following calculate_thresholds stage
312+
if (ov::with_cpu_x86_avx2_vnni_2() &&
313+
configuration.find(ov::hint::inference_precision.name()) != configuration.end()) {
314+
inference_precision = configuration.at(ov::hint::inference_precision.name()).as<ov::element::Type>();
315+
}
310316
} catch (std::exception& e) {
311317
std::cout << "[ WARNING ] Impossible to get Inference Precision with exception: " << e.what() << std::endl;
312318
}

0 commit comments

Comments
 (0)