Skip to content

Commit 2b477c1

Browse files
authored
[CPU] extend bf16 fp16 cpu functional tests on LNL+ systems (#29168)
### Details: - *extend bf16 fp16 cpu functional tests on LNL+ systems* ### Tickets: - *CVS-161636*
1 parent 8c7c31a commit 2b477c1

File tree

16 files changed

+112
-26
lines changed

16 files changed

+112
-26
lines changed

src/plugins/intel_cpu/src/nodes/scaled_attn.cpp

+12-5
Original file line numberDiff line numberDiff line change
@@ -1238,10 +1238,17 @@ void ScaledDotProductAttention::createPrimitive() {
12381238
std::shared_ptr<Executor> executor = nullptr;
12391239
#ifdef OPENVINO_ARCH_X86_64
12401240
if (rtPrecision == ov::element::bf16) {
1241-
executor = std::make_shared<AttentionExecutor<KT_ONEDNN, ov::bfloat16>>(context,
1242-
m_key_quant_param.groupSize,
1243-
m_value_quant_param.groupSize,
1244-
m_key_quant_param.isByChannel);
1241+
if (ov::with_cpu_x86_bfloat16()) {
1242+
executor = std::make_shared<AttentionExecutor<KT_ONEDNN, ov::bfloat16>>(context,
1243+
m_key_quant_param.groupSize,
1244+
m_value_quant_param.groupSize,
1245+
m_key_quant_param.isByChannel);
1246+
} else {
1247+
executor = std::make_shared<AttentionExecutor<KT_REF, ov::bfloat16>>(context,
1248+
m_key_quant_param.groupSize,
1249+
m_value_quant_param.groupSize,
1250+
m_key_quant_param.isByChannel);
1251+
}
12451252
} else if (rtPrecision == ov::element::f16) {
12461253
if (with_cpu_x86_avx512_core_fp16()) {
12471254
executor = std::make_shared<AttentionExecutor<KT_ONEDNN, ov::float16>>(context,
@@ -2075,7 +2082,7 @@ const ScaledDotProductAttention::SDPAQuantParam& ScaledDotProductAttention::getV
20752082
ov::element::Type ScaledDotProductAttention::getRuntimePrecision() const {
20762083
auto rtPrecision = getOriginalInputPrecisionAtPort(0);
20772084
// bf16 should be enabled only when platform supports
2078-
if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) {
2085+
if (rtPrecision == ov::element::bf16 && (ov::with_cpu_x86_bfloat16() || mayiuse(cpu_isa_t::avx2_vnni_2))) {
20792086
rtPrecision = ov::element::bf16;
20802087
} else if (rtPrecision == ov::element::f16 && ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
20812088
rtPrecision = ov::element::f16;

src/plugins/intel_cpu/src/nodes/topk.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "onednn/dnnl.h"
1919
#include "openvino/core/parallel.hpp"
2020
#include "openvino/op/topk.hpp"
21+
#include "utils/cpu_utils.hpp"
2122
#include "utils/ngraph_utils.hpp"
2223

2324
using namespace dnnl;
@@ -1985,11 +1986,9 @@ void TopK::initSupportedPrimitiveDescriptors() {
19851986
ov::element::u8};
19861987

19871988
ov::element::Type dataPrecision = getOriginalOutputPrecisionAtPort(TOPK_DATA);
1988-
if (dataPrecision == ov::element::bf16 && !mayiuse(avx512_core)) {
1989-
THROW_CPU_NODE_ERR("gets incorrect isa for BF16! AVX512 must be supported!");
1990-
}
19911989
bool precisionSupported = std::find(std::begin(supportedPrecision), std::end(supportedPrecision), dataPrecision) !=
19921990
std::end(supportedPrecision);
1991+
precisionSupported = (dataPrecision == ov::element::bf16 && !mayiuse(avx512_core)) ? false : precisionSupported;
19931992
if (!precisionSupported) {
19941993
if (dataPrecision.is_real()) {
19951994
dataPrecision = ov::element::f32;

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,12 @@ void ConvertCPULayerTest::SetUp() {
145145
auto conversion = std::make_shared<ov::op::v0::Convert>(params.front(), outPrc);
146146

147147
function = makeNgraphFunction(inPrc, params, conversion, "ConversionCPU");
148+
149+
// issue 161636
150+
if (special_value == ov::test::SpecialValue::none && outPrc == ov::element::f8e4m3) {
151+
abs_threshold = 0.0078125f;
152+
rel_threshold = 1e-2f;
153+
}
148154
}
149155

150156
void ConvertCPULayerTest::generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) {

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,15 @@ void EltwiseLayerCPUTest::SetUp() {
158158
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
159159
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
160160

161+
// issue 163147
162+
if (ElementType::f16 == netType && enforceSnippets) {
163+
auto fusedOpsNames = postOpMgrPtr ? postOpMgrPtr->getFusedOpsNames() : "";
164+
if (fusedOpsNames.find("PerChannel") != std::string::npos) {
165+
rel_threshold = 0.01f;
166+
abs_threshold = 0.0078125f;
167+
}
168+
}
169+
161170
shapes.resize(2);
162171
switch (opType) {
163172
case ov::test::utils::OpType::SCALAR: {

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/scaled_attn.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,12 @@ void ScaledAttnLayerCPUTest::SetUp() {
5959
}
6060

6161
if (inType == ElementType::bf16) {
62-
rel_threshold = 2e-2f;
62+
// Issue: 163144
63+
if (with_cpu_x86_avx2_vnni_2()) {
64+
rel_threshold = 0.2f;
65+
} else {
66+
rel_threshold = 2e-2f;
67+
}
6368
}
6469
selectedType = makeSelectedTypeStr(selectedType, inType);
6570
init_input_shapes(inputShapes);
@@ -133,7 +138,7 @@ TEST_P(ScaledAttnLayerCPUTest, CompareWithRefs) {
133138
bool has_scale;
134139
std::string targetDevice;
135140
std::tie(inType, inputShapes, is_causal, has_attn, has_scale, targetDevice, cpuParams) = this->GetParam();
136-
if (inType == ElementType::bf16 && !ov::with_cpu_x86_bfloat16())
141+
if (inType == ElementType::bf16 && !ov::with_cpu_x86_bfloat16() && !with_cpu_x86_avx2_vnni_2())
137142
GTEST_SKIP();
138143
run();
139144
CheckPluginRelatedResults(compiledModel, "ScaledAttn");

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ std::vector<ov::AnyMap> filterAdditionalConfig_Brgemm() {
157157
#else
158158
std::vector<ov::AnyMap> additionalConfig = {{}};
159159
#endif
160-
if (with_cpu_x86_bfloat16()) {
160+
if (with_cpu_x86_bfloat16() || with_cpu_x86_avx2_vnni_2()) {
161161
additionalConfig.push_back({ov::hint::inference_precision(ov::element::bf16)});
162162
}
163163

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/normalize.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,13 @@ class NormalizeL2LayerCPUTest : public testing::WithParamInterface<NormalizeL2La
6464
if (selectedType.empty()) {
6565
selectedType = getPrimitiveType();
6666
}
67-
selectedType = makeSelectedTypeStr("unknown", inType);
67+
68+
if (!with_cpu_x86_avx512_core() && inType == ElementType::bf16) {
69+
selectedType = makeSelectedTypeStr("unknown", ElementType::f32);
70+
} else {
71+
selectedType = makeSelectedTypeStr("unknown", inType);
72+
}
73+
6874
targetDevice = ov::test::utils::DEVICE_CPU;
6975
init_input_shapes({shapes});
7076

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/roi_pooling.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,12 @@ class ROIPoolingCPULayerTest : public testing::WithParamInterface<ROIPoolingCPUT
194194
selectedType = getPrimitiveType();
195195
}
196196
selectedType.push_back('_');
197-
selectedType += netPrecision.to_string();
197+
198+
if (!with_cpu_x86_avx512_core() && netPrecision == ElementType::bf16) {
199+
selectedType += ov::element::f32.to_string();
200+
} else {
201+
selectedType += netPrecision.to_string();
202+
}
198203

199204
if (netPrecision == ov::element::bf16) {
200205
rel_threshold = 1e-2;

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/topk.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,11 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
102102
inPrc = outPrc = netPrecision;
103103
configuration.insert(additionalConfig.begin(), additionalConfig.end());
104104

105-
selectedType = getPrimitiveType() + "_" + ov::element::Type(netPrecision).get_type_name();
105+
if (!ov::with_cpu_x86_avx512_core() && netPrecision == ElementType::bf16) {
106+
selectedType = makeSelectedTypeStr(getPrimitiveType(), ElementType::f32);
107+
} else {
108+
selectedType = makeSelectedTypeStr(getPrimitiveType(), netPrecision);
109+
}
106110

107111
staticShape = inputShape.first.rank() == 0;
108112
if (staticShape) {

src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/interaction.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ class IntertactionCPUTest : public testing::WithParamInterface<InteractionLayerC
149149
ElementType inType;
150150
InputShape inputShape;
151151
std::tie(inType, inputShape) = this->GetParam();
152-
bool with_bf16 = ov::with_cpu_x86_bfloat16();
152+
bool with_bf16 = ov::with_cpu_x86_bfloat16() || with_cpu_x86_avx2_vnni_2();
153153
if (with_bf16 && (inType == ov::element::bf16 || inType == ov::element::i32)) {
154154
selectedType = makeSelectedTypeStr("ref_any", ov::element::bf16);
155155
} else {
@@ -177,6 +177,8 @@ TEST_P(IntertactionCPUTest_FP16, CompareWithRefs) {
177177
GTEST_SKIP() << "Skipping test, platform don't support precision f16";
178178
}
179179
configuration.insert({ov::hint::inference_precision.name(), ov::element::f16});
180+
rel_threshold = 0.01;
181+
abs_threshold = 0.0078125;
180182

181183
run();
182184
CheckNumberOfNodesWithType(compiledModel, "Interaction", 1);

src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp

+13-4
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99
#if defined(OPENVINO_ARCH_RISCV64)
1010
# include "nodes/kernels/riscv64/cpu_isa_traits.hpp"
1111
#endif
12-
1312
#include <string>
1413
#include <vector>
1514

15+
#include "utils/cpu_test_utils.hpp"
16+
1617
std::vector<std::string> disabledTestPatterns() {
1718
std::vector<std::string> retVector{
1819
// TODO: Issue 31841
@@ -491,7 +492,7 @@ std::vector<std::string> disabledTestPatterns() {
491492
retVector.emplace_back(R"(.*smoke_RDFT_CPU_1D/RDFTTestCPU.CompareWithRefs/prec=f32_IS0=\[\]_TS0=\(\(126\)\)_constAxes=true_axes=\(\(0\)\)_isInverse=false.*)");
492493
retVector.emplace_back(R"(.*smoke_RDFT_CPU_2D/RDFTTestCPU.CompareWithRefs/prec=f32_IS0=\[\]_TS0=\(\(16.38\)\)_constAxes=true_axes=\(\(0.1\)\)_isInverse=false.*)");
493494
#endif
494-
if (!ov::with_cpu_x86_avx512_core()) {
495+
if (!ov::intel_cpu::hasHardwareSupport(ov::element::bf16)) {
495496
// on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
496497
// tests are useless on such platforms
497498
retVector.emplace_back(R"(.*(BF|bf)16.*)");
@@ -504,7 +505,7 @@ std::vector<std::string> disabledTestPatterns() {
504505
retVector.emplace_back(R"(.*Snippets.*MHA.*)");
505506
retVector.emplace_back(R"(.*Snippets.*(MatMul|Matmul).*)");
506507
}
507-
if (!ov::with_cpu_x86_avx512_core_fp16()) {
508+
if (!ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
508509
// Skip fp16 tests for paltforms that don't support fp16 precision
509510
retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)");
510511
retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*)");
@@ -638,7 +639,7 @@ std::vector<std::string> disabledTestPatterns() {
638639
retVector.emplace_back(R"(.*smoke_Deconv_(2|3)D_NSPC_INT8_AMX/DeconvolutionLayerCPUTest.*)");
639640
}
640641

641-
if (ov::with_cpu_x86_avx512_core_fp16()) {
642+
if (ov::with_cpu_x86_avx512_core_fp16() || CPUTestUtils::with_cpu_x86_avx2_vnni_2()) {
642643
// Issue: 143852
643644
retVector.emplace_back(R"(smoke_ConvertRangeSubgraphCPUTest/ConvertRangeSubgraphCPUTest\.CompareWithRefs.*Prc=f16.*)");
644645
retVector.emplace_back(R"((smoke|nightly)_FC_3D_FP16/.*_Fused=Multiply\(PerChannel\).*)");
@@ -649,5 +650,13 @@ std::vector<std::string> disabledTestPatterns() {
649650
retVector.emplace_back(R"(smoke_Conv_Sum_Broadcast_FP16/ConvSumInPlaceTest.*Relu\.Multiply\(PerChannel\)\.Add\(PerChannel\).*)");
650651
}
651652

653+
if (CPUTestUtils::with_cpu_x86_avx2_vnni_2()) {
654+
// jit_gemm_BF16 kernels are not supported for conv,inner_product,matmul on avx2_vnni_2 platforms
655+
retVector.emplace_back(R"(smoke_Conv_.*D_GEMM_BF16.*)");
656+
retVector.emplace_back(
657+
R"(smoke_GroupConv_.*D_Gemm_BF16/GroupConvolutionLayerCPUTest.CompareWithRefs.*primitive=jit_gemm.*)");
658+
retVector.emplace_back(R"(smoke_.*MatMulLayerCPUTest.*INFERENCE_PRECISION_HINT=bf16.*_primitive=jit_gemm.*)");
659+
}
660+
652661
return retVector;
653662
}

src/plugins/intel_cpu/tests/functional/utils/cpu_test_utils.cpp

+27-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
#include "transformations/rt_info/primitives_priority_attribute.hpp"
1313
#include "utils/general_utils.h"
1414
#include "utils/rt_info/memory_formats_attribute.hpp"
15+
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
16+
# include <xbyak/xbyak_util.h>
17+
#endif
1518

1619
namespace CPUTestUtils {
1720
const char* CPUTestsBase::any_type = "any_type";
@@ -45,6 +48,22 @@ const char* CPUTestsBase::cpu_fmt2str(cpu_memory_format_t v) {
4548
return "undef";
4649
}
4750

51+
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
52+
static Xbyak::util::Cpu& get_cpu_info() {
53+
static Xbyak::util::Cpu cpu;
54+
return cpu;
55+
}
56+
bool with_cpu_x86_avx2_vnni_2() {
57+
return get_cpu_info().has(Xbyak::util::Cpu::tAVX2 | Xbyak::util::Cpu::tAVX_VNNI) &&
58+
get_cpu_info().has(Xbyak::util::Cpu::tAVX_VNNI_INT8) &&
59+
get_cpu_info().has(Xbyak::util::Cpu::tAVX_NE_CONVERT);
60+
}
61+
#else // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64
62+
bool with_cpu_x86_avx2_vnni_2() {
63+
return false;
64+
}
65+
#endif // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64
66+
4867
cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char* str) {
4968
#define CASE(_fmt) \
5069
do { \
@@ -472,9 +491,10 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
472491
if (it != configuration.end()) {
473492
auto inferencePrecisionConfig = it->second.as<ov::element::Type>();
474493
inferencePrecisionSetExplicitly = true;
475-
// TODO also need to check (dnnl::impl::cpu::x64::avx2_vnni_2)
476-
if ((inferencePrecisionConfig == ov::element::bf16 && ov::with_cpu_x86_avx512_core()) ||
477-
(inferencePrecisionConfig == ov::element::f16 && ov::with_cpu_x86_avx512_core_fp16()) ||
494+
if ((inferencePrecisionConfig == ov::element::bf16 &&
495+
(ov::with_cpu_x86_avx512_core() || with_cpu_x86_avx2_vnni_2())) ||
496+
(inferencePrecisionConfig == ov::element::f16 &&
497+
(ov::with_cpu_x86_avx512_core_fp16() || with_cpu_x86_avx2_vnni_2())) ||
478498
(inferencePrecisionConfig == ov::element::f32) || (inferencePrecisionConfig == ov::element::dynamic)) {
479499
inferencePrecision = inferencePrecisionConfig;
480500
}
@@ -495,7 +515,8 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
495515
ov::element::Type deducedType = opPrecision;
496516
// enforceInferPrecision stage
497517
if (inferencePrecision == ov::element::bf16) {
498-
deducedType = ov::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
518+
deducedType =
519+
(ov::with_cpu_x86_avx512_core() || with_cpu_x86_avx2_vnni_2()) ? ov::element::bf16 : ov::element::f32;
499520
}
500521

501522
// ngraph transform pipeline stage
@@ -505,7 +526,8 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
505526
}
506527
}
507528
if (deducedType == ov::element::bf16) {
508-
deducedType = ov::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
529+
deducedType =
530+
(ov::with_cpu_x86_avx512_core() || with_cpu_x86_avx2_vnni_2()) ? ov::element::bf16 : ov::element::f32;
509531
} else if (deducedType == ov::element::f16) {
510532
if (inferencePrecision != ov::element::f16 && inferencePrecision != ov::element::dynamic) {
511533
deducedType = ov::element::f32;

src/plugins/intel_cpu/tests/functional/utils/cpu_test_utils.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ inline std::string nodeType2str(nodeType nt) {
113113
return "GroupConvolutionBackpropData";
114114
throw std::runtime_error("Undefined node type to convert to string!");
115115
}
116-
116+
bool with_cpu_x86_avx2_vnni_2();
117117
class CPUTestsBase {
118118
public:
119119
typedef std::map<std::string, ov::Any> CPUInfo;

src/tests/functional/plugin/shared/include/snippets/mha.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class MHA : public testing::WithParamInterface<ov::test::snippets::MHAParams>,
5858
protected:
5959
std::shared_ptr<SnippetsFunctionBase> get_subgraph() const override;
6060
void init_params(std::vector<InputShape>& input_shapes, ov::element::Type& prc, ov::AnyMap& additional_config) override;
61+
void init_thresholds() override;
6162

6263
bool m_with_mul = false;
6364
};

src/tests/functional/plugin/shared/src/snippets/mha.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,17 @@ std::shared_ptr<SnippetsFunctionBase> MHA::get_subgraph() const {
153153
return std::make_shared<ov::test::snippets::MHAFunction>(inputDynamicShapes, m_input_types, m_with_mul, is_with_reshape);
154154
}
155155

156+
void MHA::init_thresholds() {
157+
MHABase::init_thresholds();
158+
auto precision_hint = configuration.count(ov::hint::inference_precision.name())
159+
? configuration.at(ov::hint::inference_precision.name())
160+
: ov::element::f32;
161+
if (m_input_types.size() > 1 && m_input_types[0] == ov::element::bf16 && precision_hint == ov::element::f32) {
162+
rel_threshold = 0.01f;
163+
abs_threshold = 0.0078125f;
164+
}
165+
}
166+
156167
void MHASelect::generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) {
157168
inputs.clear();
158169
auto model_inputs = function->inputs();

src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ void SubgraphBaseTest::compile_model() {
306306
std::cout << "[ PLUGIN ] `SubgraphBaseTest::compile_model()` is finished successfully. Duration is " << duration.count() << "s" << std::endl;
307307
}
308308
try {
309-
inference_precision = core->get_property(targetDevice, ov::hint::inference_precision);
309+
inference_precision = compiledModel.get_property(ov::hint::inference_precision);
310310
} catch (std::exception& e) {
311311
std::cout << "[ WARNING ] Impossible to get Inference Precision with exception: " << e.what() << std::endl;
312312
}

0 commit comments

Comments
 (0)