Skip to content

Commit b88cade

Browse files
committed
fix int8 models crash during inference on the GNR platform with fp16 inference_precision
1 parent bee1952 commit b88cade

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

src/plugins/intel_cpu/src/nodes/conv.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,12 @@ std::tuple<VecMemoryDescs, MemoryDescPtr> Convolution::initMemoryDescriptors(ov:
400400
srcDescs.push_back(MemoryDescUtils::makeEmptyDesc());
401401
continue;
402402
}
403+
// int8 convolution with f16 bias is not supported in oneDNN
404+
if (i == BIAS && m_attrs.withBias && canBeExecutedInInt8() && srcTypes[i] == ov::element::f16) {
405+
auto srcDesc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(ov::element::f32, getInputShapeAtPort(i));
406+
srcDescs.push_back(srcDesc);
407+
continue;
408+
}
403409
auto srcDesc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(srcTypes[i], getInputShapeAtPort(i));
404410
srcDescs.push_back(srcDesc);
405411
}

0 commit comments

Comments
 (0)