Skip to content

Commit c4472b7

Browse files
committed
used to verify ovms pipeline
1 parent f3c838d commit c4472b7

File tree

2 files changed

+37
-14
lines changed

2 files changed

+37
-14
lines changed

src/plugins/intel_cpu/src/nodes/eltwise.cpp

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#include <cassert>
1212
#include <cmath>
1313
#include <cstddef>
14-
#include <limits>
1514
#include <map>
1615
#include <memory>
1716
#include <oneapi/dnnl/dnnl.hpp>
@@ -46,7 +45,6 @@
4645
#include "openvino/core/node.hpp"
4746
#include "openvino/core/shape.hpp"
4847
#include "openvino/core/type.hpp"
49-
#include "openvino/core/type/bfloat16.hpp"
5048
#include "openvino/core/type/element_type.hpp"
5149
#include "openvino/op/abs.hpp"
5250
#include "openvino/op/add.hpp"
@@ -546,20 +544,25 @@ bool Eltwise::isWithBroadcast() {
546544
}
547545

548546
void Eltwise::init() {
549-
// Bf16 saturation handling for gamma parameter when input precision is bf16 to make sure it stays within the valid
550-
// range for bfloat16.
547+
// Bf16 saturation handling for PowerStatic parameters
548+
// to make sure they stay within the valid range for bfloat16.
551549
if (m_attrs.data.algo == Algorithm::EltwisePowerStatic && getOriginalInputPrecisionAtPort(0) == ov::element::bf16) {
552-
const float lowest = static_cast<float>(std::numeric_limits<ov::bfloat16>::lowest());
553-
const float max = static_cast<float>(std::numeric_limits<ov::bfloat16>::max());
554-
auto& gamma = m_attrs.data.gamma;
555-
556-
if (gamma < lowest) {
557-
gamma = lowest;
558-
}
550+
// Use the actual float values corresponding to bfloat16 limits
551+
// 0xFF7F = -65504.0F (lowest), 0x7F7F = 65504.0F (max)
552+
static constexpr float bf16_lowest = -65504.0F;
553+
static constexpr float bf16_max = 65504.0F;
554+
555+
// Helper lambda to clamp parameter values within bf16 range
556+
auto clampBf16Parameter = [&](auto& param) {
557+
if (std::isfinite(param)) {
558+
param = std::clamp(static_cast<float>(param), bf16_lowest, bf16_max);
559+
}
560+
};
559561

560-
if (gamma > max) {
561-
gamma = max;
562-
}
562+
// Clamp all PowerStatic parameters
563+
clampBf16Parameter(m_attrs.data.alpha);
564+
clampBf16Parameter(m_attrs.data.beta);
565+
clampBf16Parameter(m_attrs.data.gamma);
563566
}
564567
}
565568

src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mlp_fusion.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,26 @@ ov::intel_cpu::MLPFusionPass::MLPFusionPass() {
122122
matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
123123
const auto& pattern_map = m.get_pattern_value_map();
124124
auto root = m.get_match_root();
125+
126+
// Verify VariadicSplit output[1] connects to Multiply (up branch) in combined mode
127+
if (pattern_map.count(gate_up_proj_split)) {
128+
auto mlp_gated_up_node = pattern_map.at(mlp_gated_up).get_node_shared_ptr();
129+
auto input0 = mlp_gated_up_node->input_value(0);
130+
auto input1 = mlp_gated_up_node->input_value(1);
131+
132+
bool found_valid_up_connection = false;
133+
134+
if (input0.get_node() == pattern_map.at(gate_up_proj_split).get_node() && input0.get_index() == 1) {
135+
found_valid_up_connection = true;
136+
}
137+
if (input1.get_node() == pattern_map.at(gate_up_proj_split).get_node() && input1.get_index() == 1) {
138+
found_valid_up_connection = true;
139+
}
140+
141+
if (!found_valid_up_connection) {
142+
return false;
143+
}
144+
}
125145
auto src = pattern_map.at(input);
126146
if (!src.get_element_type().is_real()) {
127147
// FakeQuantize, should skip fusion

0 commit comments

Comments
 (0)