Skip to content

Commit 68f629f

Browse files
committed
Bf16 saturation handling for PowerStatic parameters in Eltwise::init() to fix nomic-ai model accuracy issue
1 parent 27b1ab5 commit 68f629f

File tree

2 files changed

+110
-28
lines changed

2 files changed

+110
-28
lines changed

src/plugins/intel_cpu/src/nodes/eltwise.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -546,23 +546,34 @@ bool Eltwise::isWithBroadcast() {
546546
}
547547

548548
void Eltwise::init() {
549-
// Bf16 saturation handling for gamma parameter when input precision is bf16 to make sure it stays within the valid
550-
// range for bfloat16.
551-
if (m_attrs.data.algo == Algorithm::EltwisePowerStatic && getOriginalInputPrecisionAtPort(0) == ov::element::bf16) {
549+
// Bf16 saturation handling for PowerStatic parameters
550+
// to make sure they stay within the valid range for bfloat16.
551+
if (m_attrs.data.algo == Algorithm::EltwisePowerStatic) {
552552
const float lowest = static_cast<float>(std::numeric_limits<ov::bfloat16>::lowest());
553553
const float max = static_cast<float>(std::numeric_limits<ov::bfloat16>::max());
554-
auto& gamma = m_attrs.data.gamma;
555554

556-
if (gamma < lowest) {
557-
gamma = lowest;
555+
// Clamp alpha parameter
556+
if (m_attrs.data.alpha < lowest) {
557+
m_attrs.data.alpha = lowest;
558+
} else if (m_attrs.data.alpha > max) {
559+
m_attrs.data.alpha = max;
558560
}
559561

560-
if (gamma > max) {
561-
gamma = max;
562+
// Clamp beta parameter
563+
if (m_attrs.data.beta < lowest) {
564+
m_attrs.data.beta = lowest;
565+
} else if (m_attrs.data.beta > max) {
566+
m_attrs.data.beta = max;
567+
}
568+
569+
// Clamp gamma parameter
570+
if (m_attrs.data.gamma < lowest) {
571+
m_attrs.data.gamma = lowest;
572+
} else if (m_attrs.data.gamma > max) {
573+
m_attrs.data.gamma = max;
562574
}
563575
}
564576
}
565-
566577
void Eltwise::getSupportedDescriptors() {
567578
CPU_NODE_ASSERT(!getParentEdges().empty(), "Incorrect number of input edges");
568579
CPU_NODE_ASSERT(!getChildEdges().empty(), "Incorrect number of output edges");

src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/powerstatic_bf16_saturation.cpp

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
#include "internal_properties.hpp"
77
#include "openvino/op/add.hpp"
88
#include "openvino/op/concat.hpp"
9+
#include "openvino/op/convert.hpp"
910
#include "openvino/op/matmul.hpp"
1011
#include "openvino/op/multiply.hpp"
12+
#include "openvino/op/subtract.hpp"
1113
#include "shared_test_classes/base/ov_subgraph.hpp"
1214
#include "utils/fusing_test_utils.hpp"
1315

@@ -29,6 +31,9 @@ namespace test {
2931
|
3032
|
3133
result
34+
35+
This pattern creates a PowerStatic node with gamma parameter = -3.40282e+38,
36+
which exceeds bf16 range and requires saturation handling.
3237
*/
3338

3439
class PowerStaticBF16Saturation : virtual public SubgraphBaseTest, public CpuTestWithFusing {
@@ -46,18 +51,17 @@ class PowerStaticBF16Saturation : virtual public SubgraphBaseTest, public CpuTes
4651
parameters.push_back(param0);
4752
parameters.push_back(param1);
4853

49-
const auto constMatmul0 = std::make_shared<ov::op::v0::Constant>(ov::element::f32,
50-
ov::Shape{1, 1},
51-
std::vector<double>({3.40282e+38}));
52-
const auto constMatmul1 = std::make_shared<ov::op::v0::Constant>(ov::element::f32,
53-
ov::Shape{1, 1},
54-
std::vector<double>({3.40282e+38}));
55-
const auto matMul0 = std::make_shared<ov::op::v0::MatMul>(parameters[0], constMatmul0, false, false);
56-
const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(parameters[1], constMatmul1, false, false);
54+
// Constants that will create large values exceeding bf16 range
55+
const auto const_large_positive = std::make_shared<ov::op::v0::Constant>(ov::element::f32,
56+
ov::Shape{1, 1},
57+
std::vector<float>({3.40282e+38f}));
58+
const auto matMul0 = std::make_shared<ov::op::v0::MatMul>(parameters[0], const_large_positive, false, false);
59+
const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(parameters[1], const_large_positive, false, false);
5760
const auto concat = std::make_shared<ov::op::v0::Concat>(OutputVector{matMul0, matMul1}, 0);
5861

59-
const auto constAdd = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, -3.40282e+38);
60-
const auto add = std::make_shared<ov::op::v1::Add>(concat, constAdd);
62+
const auto const_large_negative =
63+
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, -3.40282e+38f);
64+
const auto add = std::make_shared<ov::op::v1::Add>(concat, const_large_negative);
6165

6266
function = makeNgraphFunction(ElementType::f32, parameters, add, "PowerStaticBF16Saturation");
6367
configuration.insert({ov::hint::inference_precision(ov::element::bf16)});
@@ -71,21 +75,88 @@ class PowerStaticBF16Saturation : virtual public SubgraphBaseTest, public CpuTes
7175
in_data.start_from = 1;
7276
in_data.range = 1;
7377
in_data.resolution = 1;
74-
auto tensor0 = ov::test::utils::create_and_fill_tensor(modelInputs[0].get_element_type(),
75-
targetInputStaticShapes[0],
76-
in_data);
77-
auto tensor1 = ov::test::utils::create_and_fill_tensor(modelInputs[1].get_element_type(),
78-
targetInputStaticShapes[1],
79-
in_data);
80-
81-
inputs.insert({modelInputs[0].get_node_shared_ptr(), tensor0});
82-
inputs.insert({modelInputs[1].get_node_shared_ptr(), tensor1});
78+
79+
for (size_t i = 0; i < modelInputs.size(); ++i) {
80+
auto tensor = ov::test::utils::create_and_fill_tensor(modelInputs[i].get_element_type(),
81+
targetInputStaticShapes[i],
82+
in_data);
83+
inputs.insert({modelInputs[i].get_node_shared_ptr(), tensor});
84+
}
8385
}
8486
};
8587

8688
TEST_F(PowerStaticBF16Saturation, CompareWithRefs) {
8789
run();
8890
}
8991

92+
/* Bf16 saturation handling for the PowerStatic beta parameter when input precision is bf16
93+
94+
Param
95+
|
96+
multiply(1.0)
97+
|
98+
subtract(1.0 - multiply_result)
99+
|
100+
multiply(-3.40282e+38)
101+
|
102+
result
103+
104+
This pattern gets fused into a PowerStatic node with beta parameter = -3.40282e+38,
105+
which exceeds bf16 range and requires saturation handling.
106+
*/
107+
108+
class PowerStaticBetaBF16Saturation : virtual public SubgraphBaseTest, public CpuTestWithFusing {
109+
protected:
110+
void SetUp() override {
111+
abs_threshold = 0;
112+
targetDevice = ov::test::utils::DEVICE_CPU;
113+
InputShape input = {{-1, -1}, {{1, 1}}};
114+
115+
init_input_shapes({input});
116+
ov::ParameterVector parameters;
117+
auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{-1, -1});
118+
parameters.push_back(param);
119+
120+
// Constants that create a PowerStatic node with beta parameter exceeding bf16 range
121+
const auto const_one =
122+
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{1, 1}, std::vector<float>{1.0f});
123+
124+
const auto const_large_negative =
125+
std::make_shared<ov::op::v0::Constant>(ov::element::f32,
126+
ov::Shape{1, 1},
127+
std::vector<float>{-3.40282346638528859812e+38f});
128+
129+
// Create computation pattern:
130+
// Step 1: input * 1.0 = input
131+
// Step 2: 1.0 - input
132+
// Step 3: (1.0 - input) * (-3.40282e+38)
133+
// This pattern will be fused into PowerStatic node with beta = -3.40282e+38, exceeding bf16 range
134+
auto multiply_step = std::make_shared<ov::op::v1::Multiply>(param, const_one);
135+
auto subtract_step = std::make_shared<ov::op::v1::Subtract>(const_one, multiply_step);
136+
auto final_multiply = std::make_shared<ov::op::v1::Multiply>(subtract_step, const_large_negative);
137+
138+
function = makeNgraphFunction(ElementType::f32, parameters, final_multiply, "PowerStaticBetaBF16Saturation");
139+
configuration.insert({ov::hint::inference_precision(ov::element::bf16)});
140+
configuration.insert(ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::DISABLE));
141+
}
142+
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
143+
inputs.clear();
144+
const auto& modelInputs = function->inputs();
145+
ov::test::utils::InputGenerateData in_data;
146+
in_data.start_from = 1;
147+
in_data.range = 1;
148+
in_data.resolution = 1;
149+
150+
auto tensor = ov::test::utils::create_and_fill_tensor(modelInputs[0].get_element_type(),
151+
targetInputStaticShapes[0],
152+
in_data);
153+
inputs.insert({modelInputs[0].get_node_shared_ptr(), tensor});
154+
}
155+
};
156+
157+
TEST_F(PowerStaticBetaBF16Saturation, CompareWithRefs) {
158+
run();
159+
}
160+
90161
} // namespace test
91162
} // namespace ov

0 commit comments

Comments
 (0)