Skip to content

Commit 35aa1ab

Browse files
committed
Bf16 saturation handling for PowerStatic parameters in Eltwise::init() to fix nomic-ai model accuracy issue
1 parent 27b1ab5 commit 35aa1ab

File tree

2 files changed

+126
-26
lines changed

2 files changed

+126
-26
lines changed

src/plugins/intel_cpu/src/nodes/eltwise.cpp

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -546,23 +546,37 @@ bool Eltwise::isWithBroadcast() {
546546
}
547547

548548
void Eltwise::init() {
549-
// Bf16 saturation handling for gamma parameter when input precision is bf16 to make sure it stays within the valid
550-
// range for bfloat16.
549+
// Bf16 saturation handling for PowerStatic parameters
550+
// to make sure they stay within the valid range for bfloat16.
551551
if (m_attrs.data.algo == Algorithm::EltwisePowerStatic && getOriginalInputPrecisionAtPort(0) == ov::element::bf16) {
552552
const float lowest = static_cast<float>(std::numeric_limits<ov::bfloat16>::lowest());
553553
const float max = static_cast<float>(std::numeric_limits<ov::bfloat16>::max());
554-
auto& gamma = m_attrs.data.gamma;
555554

556-
if (gamma < lowest) {
557-
gamma = lowest;
555+
// Clamp alpha parameter
556+
auto& alpha = m_attrs.data.alpha;
557+
if (alpha < lowest) {
558+
alpha = lowest;
559+
} else if (alpha > max) {
560+
alpha = max;
561+
}
562+
563+
// Clamp beta parameter
564+
auto& beta = m_attrs.data.beta;
565+
if (beta < lowest) {
566+
beta = lowest;
567+
} else if (beta > max) {
568+
beta = max;
558569
}
559570

560-
if (gamma > max) {
571+
// Clamp gamma parameter
572+
auto& gamma = m_attrs.data.gamma;
573+
if (gamma < lowest) {
574+
gamma = lowest;
575+
} else if (gamma > max) {
561576
gamma = max;
562577
}
563578
}
564579
}
565-
566580
void Eltwise::getSupportedDescriptors() {
567581
CPU_NODE_ASSERT(!getParentEdges().empty(), "Incorrect number of input edges");
568582
CPU_NODE_ASSERT(!getChildEdges().empty(), "Incorrect number of output edges");

src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/powerstatic_bf16_saturation.cpp

Lines changed: 105 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
#include "internal_properties.hpp"
77
#include "openvino/op/add.hpp"
88
#include "openvino/op/concat.hpp"
9+
#include "openvino/op/convert.hpp"
910
#include "openvino/op/matmul.hpp"
1011
#include "openvino/op/multiply.hpp"
12+
#include "openvino/op/subtract.hpp"
1113
#include "shared_test_classes/base/ov_subgraph.hpp"
1214
#include "utils/fusing_test_utils.hpp"
1315

@@ -29,6 +31,9 @@ namespace test {
2931
|
3032
|
3133
result
34+
35+
This pattern creates a PowerStatic node with gamma parameter = -3.40282e+38,
36+
which exceeds bf16 range and requires saturation handling.
3237
*/
3338

3439
class PowerStaticBF16Saturation : virtual public SubgraphBaseTest, public CpuTestWithFusing {
@@ -46,18 +51,17 @@ class PowerStaticBF16Saturation : virtual public SubgraphBaseTest, public CpuTes
4651
parameters.push_back(param0);
4752
parameters.push_back(param1);
4853

49-
const auto constMatmul0 = std::make_shared<ov::op::v0::Constant>(ov::element::f32,
50-
ov::Shape{1, 1},
51-
std::vector<double>({3.40282e+38}));
52-
const auto constMatmul1 = std::make_shared<ov::op::v0::Constant>(ov::element::f32,
53-
ov::Shape{1, 1},
54-
std::vector<double>({3.40282e+38}));
55-
const auto matMul0 = std::make_shared<ov::op::v0::MatMul>(parameters[0], constMatmul0, false, false);
56-
const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(parameters[1], constMatmul1, false, false);
54+
// Constants that will create large values exceeding bf16 range
55+
const auto const_large_positive = std::make_shared<ov::op::v0::Constant>(ov::element::f32,
56+
ov::Shape{1, 1},
57+
std::vector<float>({3.40282e+38f}));
58+
const auto matMul0 = std::make_shared<ov::op::v0::MatMul>(parameters[0], const_large_positive, false, false);
59+
const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(parameters[1], const_large_positive, false, false);
5760
const auto concat = std::make_shared<ov::op::v0::Concat>(OutputVector{matMul0, matMul1}, 0);
5861

59-
const auto constAdd = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, -3.40282e+38);
60-
const auto add = std::make_shared<ov::op::v1::Add>(concat, constAdd);
62+
const auto const_large_negative =
63+
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, -3.40282e+38f);
64+
const auto add = std::make_shared<ov::op::v1::Add>(concat, const_large_negative);
6165

6266
function = makeNgraphFunction(ElementType::f32, parameters, add, "PowerStaticBF16Saturation");
6367
configuration.insert({ov::hint::inference_precision(ov::element::bf16)});
@@ -71,21 +75,103 @@ class PowerStaticBF16Saturation : virtual public SubgraphBaseTest, public CpuTes
7175
in_data.start_from = 1;
7276
in_data.range = 1;
7377
in_data.resolution = 1;
74-
auto tensor0 = ov::test::utils::create_and_fill_tensor(modelInputs[0].get_element_type(),
75-
targetInputStaticShapes[0],
76-
in_data);
77-
auto tensor1 = ov::test::utils::create_and_fill_tensor(modelInputs[1].get_element_type(),
78-
targetInputStaticShapes[1],
79-
in_data);
80-
81-
inputs.insert({modelInputs[0].get_node_shared_ptr(), tensor0});
82-
inputs.insert({modelInputs[1].get_node_shared_ptr(), tensor1});
78+
79+
for (size_t i = 0; i < modelInputs.size(); ++i) {
80+
auto tensor = ov::test::utils::create_and_fill_tensor(modelInputs[i].get_element_type(),
81+
targetInputStaticShapes[i],
82+
in_data);
83+
inputs.insert({modelInputs[i].get_node_shared_ptr(), tensor});
84+
}
8385
}
8486
};
8587

8688
TEST_F(PowerStaticBF16Saturation, CompareWithRefs) {
8789
run();
8890
}
8991

92+
/* Bf16 saturation handling for the PowerStatic beta parameter when input precision is bf16
93+
94+
Param0 Param1
95+
| |
96+
matmul0 matmul1
97+
\ /
98+
\ /
99+
concat
100+
|
101+
multiply(1.0)
102+
|
103+
subtract(1.0 - multiply_result)
104+
|
105+
multiply(-3.40282e+38)
106+
|
107+
result
108+
109+
This pattern gets fused into a PowerStatic node with beta parameter = -3.40282e+38,
110+
which exceeds bf16 range and requires saturation handling.
111+
The matmul operations ensure the input precision is bf16 through inference_precision setting.
112+
*/
113+
114+
class PowerStaticBetaBF16Saturation : virtual public SubgraphBaseTest, public CpuTestWithFusing {
115+
protected:
116+
void SetUp() override {
117+
abs_threshold = 0;
118+
targetDevice = ov::test::utils::DEVICE_CPU;
119+
InputShape input0 = {{-1, -1}, {{1, 1}}};
120+
InputShape input1 = {{-1, -1}, {{1, 1}}};
121+
122+
init_input_shapes({input0, input1});
123+
ov::ParameterVector parameters;
124+
auto param0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{-1, -1});
125+
auto param1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{-1, -1});
126+
parameters.push_back(param0);
127+
parameters.push_back(param1);
128+
129+
// Constants that create a PowerStatic node with beta parameter exceeding bf16 range
130+
const auto const_one =
131+
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{1, 1}, std::vector<float>{1.0f});
132+
133+
const auto const_large_negative =
134+
std::make_shared<ov::op::v0::Constant>(ov::element::f32,
135+
ov::Shape{1, 1},
136+
std::vector<float>{-3.40282346638528859812e+38f});
137+
138+
const auto matMul0 = std::make_shared<ov::op::v0::MatMul>(parameters[0], const_one, false, false);
139+
const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(parameters[1], const_one, false, false);
140+
const auto concat = std::make_shared<ov::op::v0::Concat>(OutputVector{matMul0, matMul1}, 0);
141+
142+
// Create computation pattern:
143+
// Step 1: concat_result * 1.0 = concat_result (bf16 after precision conversion)
144+
// Step 2: 1.0 - multiply_result
145+
// Step 3: (1.0 - multiply_result) * (-3.40282e+38)
146+
// This pattern will be fused into PowerStatic node with beta = -3.40282e+38, exceeding bf16 range
147+
auto multiply_step = std::make_shared<ov::op::v1::Multiply>(concat, const_one);
148+
auto subtract_step = std::make_shared<ov::op::v1::Subtract>(const_one, multiply_step);
149+
auto final_multiply = std::make_shared<ov::op::v1::Multiply>(subtract_step, const_large_negative);
150+
151+
function = makeNgraphFunction(ElementType::f32, parameters, final_multiply, "PowerStaticBetaBF16Saturation");
152+
configuration.insert({ov::hint::inference_precision(ov::element::bf16)});
153+
configuration.insert(ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::DISABLE));
154+
}
155+
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
156+
inputs.clear();
157+
const auto& modelInputs = function->inputs();
158+
ov::test::utils::InputGenerateData in_data;
159+
in_data.start_from = 1;
160+
in_data.range = 1;
161+
in_data.resolution = 1;
162+
163+
for (size_t i = 0; i < modelInputs.size(); ++i) {
164+
auto tensor = ov::test::utils::create_and_fill_tensor(modelInputs[i].get_element_type(),
165+
targetInputStaticShapes[i],
166+
in_data);
167+
inputs.insert({modelInputs[i].get_node_shared_ptr(), tensor});
168+
}
169+
}
170+
};
171+
172+
TEST_F(PowerStaticBetaBF16Saturation, CompareWithRefs) {
173+
run();
174+
}
175+
90176
} // namespace test
91177
} // namespace ov

0 commit comments

Comments
 (0)