Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,28 @@ void jit_uni_eltwise_generic<isa>::load_vector(Vmm vmm_src,
if (src_prc == dst_prc) {
if (broadcast) {
load_scalar(xmm_src, op, src_prc, dst_prc);
uni_vbroadcastss(vmm_src, xmm_src);
switch (src_prc.size()) {
case 1:
if (isa == x64::sse41) {
punpcklbw(xmm_src, xmm_src);
punpcklbw(xmm_src, xmm_src);
pshufd(xmm_src, xmm_src, 0);
} else {
vpbroadcastb(vmm_src, xmm_src);
}
break;
case 2:
if (isa == x64::sse41) {
punpcklwd(xmm_src, xmm_src);
pshufd(xmm_src, xmm_src, 0);
} else {
vpbroadcastw(vmm_src, xmm_src);
}
break;
Comment on lines +562 to +569
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM] load_vector() adds a 2-byte broadcast path (case 2), but this code calls load_scalar() first, and load_scalar() currently throws for src_prc == dst_prc with src_prc.size() == 2 (it only supports sizes 1 and 4 in that branch). As a result, the new 2-byte broadcast logic is effectively unreachable and any future attempt to broadcast u16/i16 without type conversion will still fail at runtime. Either add 2-byte support to load_scalar() for the src_prc == dst_prc case (load 16 bits and clear upper bits) or remove the case 2 handling here to avoid implying support that isn't actually implemented.

Suggested change
case 2:
if (isa == x64::sse41) {
punpcklwd(xmm_src, xmm_src);
pshufd(xmm_src, xmm_src, 0);
} else {
vpbroadcastw(vmm_src, xmm_src);
}
break;

Copilot uses AI. Check for mistakes.
default:
uni_vbroadcastss(vmm_src, xmm_src);
break;
}
} else {
uni_vmovups(vmm_src, op);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,11 @@ static const std::vector<std::vector<InputShape>> bitwise_in_shapes_4D = {
},
};

static const std::vector<std::vector<InputShape>> bitwise_in_shapes_2D_broadcast = {
{{{1, -1}, {{1, 64}, {1, 1}}}, {{1, 64}, {{1, 64}}}},
{{{-1, -1}, {{32, 256}, {1, 1}}}, {{32, 256}, {{32, 256}}}},
};

static const std::vector<CPUSpecificParams>& bitwiseCpuParams() {
static const std::vector<CPUSpecificParams> params = {CPUSpecificParams({nhwc, nhwc}, {nhwc}, {}, {}),
CPUSpecificParams({nchw, nchw}, {nchw}, {}, {})};
Expand Down Expand Up @@ -316,6 +321,27 @@ const auto params_4D_bitwise = ::testing::Combine(

INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_Bitwise, EltwiseLayerCPUTest, params_4D_bitwise, EltwiseLayerCPUTest::getTestCaseName);

const auto params_2D_bitwise_broadcast =
::testing::Combine(::testing::Combine(::testing::ValuesIn(bitwise_in_shapes_2D_broadcast),
::testing::ValuesIn({ov::test::utils::EltwiseTypes::BITWISE_AND,
ov::test::utils::EltwiseTypes::BITWISE_OR,
ov::test::utils::EltwiseTypes::BITWISE_XOR}),
::testing::ValuesIn(secondaryInputTypes()),
::testing::ValuesIn({ov::test::utils::OpType::VECTOR}),
::testing::ValuesIn({ov::element::Type_t::i8, ov::element::Type_t::u8}),
::testing::Values(ov::element::Type_t::dynamic),
::testing::Values(ov::element::Type_t::dynamic),
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::Values(ov::AnyMap())),
::testing::Values(CPUSpecificParams({}, {}, {}, {})),
::testing::Values(emptyFusingSpec),
::testing::Values(false));

INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_2D_Bitwise_i8u8_Broadcast,
EltwiseLayerCPUTest,
params_2D_bitwise_broadcast,
EltwiseLayerCPUTest::getTestCaseName);

const auto params_4D_bitwise_i32 = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(bitwise_in_shapes_4D),
Expand Down
Loading