Skip to content

[CPU][LPT] Simplify LPT pass pipeline description #30392

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -799,54 +799,12 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
void Transformations::runLptPasses(const std::vector<ov::element::Type>& defaultPrecisions) {
using namespace ov::pass::low_precision;
ov::pass::Manager lptManager("CPU:LPT");

#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>();
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ov::opset1::MatMul>({{{0, 1}, {ov::element::i8}}}),
});

auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>();

CPU_REGISTER_PASS_COMMON(lptManager,
LowPrecision,
supportedPrecisions,
quantizationRestrictions,
LayerTransformation::Params(true, ov::element::f32, defaultPrecisions));
CPU_DISABLE_PASS_COMMON(lptManager, AvgPoolTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, ConvolutionTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, ConvolutionBackpropDataTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, InterpolateTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, GroupConvolutionTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, MaxPoolTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, MVNTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, NormalizeL2Transformation);
CPU_DISABLE_PASS_COMMON(lptManager, RecurrentCellTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, ReduceMaxTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, ReduceMeanTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, ReduceMinTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, ReduceSumTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, MultiplyToGroupConvolutionTransformation);

CPU_SET_CALLBACK_COMMON(
lptManager,
[](const_node_ptr& node) -> bool {
return ov::marked_as_bias(node);
},
AddTransformation);

// Enable MatMulTransformation against FC nodes only
// int8 MatMul is disabled because acl_lowp_matmul_t supports 2D case only
// most models have 3D/4D cases, so fallback to jit_gemm_i8 gives worse perf than gemm_acl_f16
// oneDNN ticket #2696
CPU_SET_CALLBACK_COMMON(
lptManager,
[&](const_node_ptr& node) -> bool {
if (NetworkHelper::isConstantPath(node->get_input_node_shared_ptr(1)) &&
one_of(node->input_value(1).get_partial_shape().rank().get_length(), 2, 3)) {
return false;
}
return true;
},
MatMulTransformation);
#else
// Only enable conv/group conv signed input on AMX and avx2_vnni_2 platform.
std::vector<ov::element::Type> input0LowPrecisionList;
Expand All @@ -857,6 +815,9 @@ void Transformations::runLptPasses(const std::vector<ov::element::Type>& default
input0LowPrecisionList = {ov::element::u8};
}

auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>(
{QuantizationGranularityRestriction::create<ov::opset1::Convolution>({0}),
QuantizationGranularityRestriction::create<ov::opset1::ConvolutionBackpropData>({0})});
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ov::opset1::Convolution>({
{{0}, input0LowPrecisionList},
Expand All @@ -880,30 +841,53 @@ void Transformations::runLptPasses(const std::vector<ov::element::Type>& default
PrecisionsRestriction::create<ov::opset5::LSTMSequence>({{{0, 1}, {ov::element::u8}}}),
PrecisionsRestriction::create<ov::opset6::GRUSequence>({{{0, 1}, {ov::element::u8}}}),
});

auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>(
{QuantizationGranularityRestriction::create<ov::opset1::Convolution>({0}),
QuantizationGranularityRestriction::create<ov::opset1::ConvolutionBackpropData>({0})});

#endif
CPU_REGISTER_PASS_COMMON(lptManager,
LowPrecision,
supportedPrecisions,
quantizationRestrictions,
LayerTransformation::Params(true, ov::element::f32, defaultPrecisions));

CPU_SET_CALLBACK_COMMON(
lptManager,
[](const_node_ptr& node) -> bool {
return ov::marked_as_bias(node);
},
AddTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, MultiplyToGroupConvolutionTransformation);

CPU_DISABLE_PASS_ARM(lptManager, AvgPoolTransformation);
CPU_DISABLE_PASS_ARM(lptManager, ConvolutionTransformation);
CPU_DISABLE_PASS_ARM(lptManager, ConvolutionBackpropDataTransformation);
CPU_DISABLE_PASS_ARM(lptManager, InterpolateTransformation);
CPU_DISABLE_PASS_ARM(lptManager, GroupConvolutionTransformation);
CPU_DISABLE_PASS_ARM(lptManager, MaxPoolTransformation);
CPU_DISABLE_PASS_ARM(lptManager, MVNTransformation);
CPU_DISABLE_PASS_ARM(lptManager, NormalizeL2Transformation);
CPU_DISABLE_PASS_ARM(lptManager, RecurrentCellTransformation);
CPU_DISABLE_PASS_ARM(lptManager, ReduceMaxTransformation);
CPU_DISABLE_PASS_ARM(lptManager, ReduceMeanTransformation);
CPU_DISABLE_PASS_ARM(lptManager, ReduceMinTransformation);
CPU_DISABLE_PASS_ARM(lptManager, ReduceSumTransformation);

// Enable MatMulTransformation against FC nodes only
// int8 MatMul is disabled because acl_lowp_matmul_t supports 2D case only
// most models have 3D/4D cases, so fallback to jit_gemm_i8 gives worse perf than gemm_acl_f16
// oneDNN ticket #2696
CPU_SET_CALLBACK_ARM(
lptManager,
[&](const_node_ptr& node) -> bool {
return !(NetworkHelper::isConstantPath(node->get_input_node_shared_ptr(1)) &&
one_of(node->input_value(1).get_partial_shape().rank().get_length(), 2, 3));
},
MatMulTransformation);

CPU_SET_CALLBACK_X64(
lptManager,
[&defaultPrecisions](const_node_ptr& node) -> bool {
return LayerTransformation::isAsymmetricQuantization(node, defaultPrecisions) ||
WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
},
ConvolutionBackpropDataTransformation);
CPU_SET_CALLBACK_COMMON(
lptManager,
[](const_node_ptr& node) -> bool {
return ov::marked_as_bias(node);
},
AddTransformation);

CPU_SET_CALLBACK_X64(
lptManager,
Expand Down Expand Up @@ -934,8 +918,6 @@ void Transformations::runLptPasses(const std::vector<ov::element::Type>& default
},
FuseConvertTransformation);

CPU_DISABLE_PASS_COMMON(lptManager, MultiplyToGroupConvolutionTransformation);
#endif
lptManager.run_passes(model);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, ConcatWithNeighborsGraphTransformation,
::testing::ValuesIn(precisions),
::testing::ValuesIn(shapes),
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::ValuesIn(trasformationParamValues)),
::testing::ValuesIn(trasformationParamValues),
::testing::Values("convolution_addition_original"),
::testing::Values("u8")),
ConcatWithNeighborsGraphTransformation::getTestCaseName);
} // namespace
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, ConcatWithNeighborsGraphTransformation,
::testing::ValuesIn(precisions),
::testing::Values(ov::PartialShape({ 1, 3, 16, 16 })),
::testing::Values(ov::test::utils::DEVICE_GPU),
::testing::ValuesIn(trasformationParamValues)),
::testing::ValuesIn(trasformationParamValues),
::testing::Values("convolution_addition_original"),
::testing::Values("u8")),
ConcatWithNeighborsGraphTransformation::getTestCaseName);
} // namespace
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ typedef std::tuple<
ov::element::Type,
ov::PartialShape,
std::string,
ov::pass::low_precision::LayerTransformation::Params> ConcatNeighboringGraphTransformationParams;
ov::pass::low_precision::LayerTransformation::Params,
std::string,
std::string> ConcatNeighboringGraphTransformationParams;

class ConcatWithNeighborsGraphTransformation :
public testing::WithParamInterface<ConcatNeighboringGraphTransformationParams>,
Expand All @@ -25,6 +27,8 @@ class ConcatWithNeighborsGraphTransformation :

protected:
void SetUp() override;
void run() override;
std::string expectedKernelName, expectedRuntimePrecision;
};

} // namespace LayerTestsDefinitions
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,21 @@ std::string ConcatWithNeighborsGraphTransformation::getTestCaseName(const testin
ov::PartialShape inputShapes;
std::string targetDevice;
ov::pass::low_precision::LayerTransformation::Params params;
std::tie(precision, inputShapes, targetDevice, params) = obj.param;
std::string expectedKernelName, expectedRuntimePrecision;
std::tie(precision, inputShapes, targetDevice, params, expectedKernelName, expectedRuntimePrecision) = obj.param;

return get_test_case_name_by_params(precision, inputShapes, targetDevice, params);
}


void ConcatWithNeighborsGraphTransformation::SetUp() {
SKIP_IF_CURRENT_TEST_IS_DISABLED()

ov::element::Type ngPrecision;
ov::PartialShape inputShape;
ov::pass::low_precision::LayerTransformation::Params params;
std::tie(ngPrecision, inputShape, targetDevice, params) = this->GetParam();
std::string expectedKernelName, expectedRuntimePrecision;
std::tie(ngPrecision, inputShape, targetDevice, params, expectedKernelName, expectedRuntimePrecision) = this->GetParam();

init_input_shapes({ inputShape, inputShape, inputShape });

Expand All @@ -40,7 +44,18 @@ void ConcatWithNeighborsGraphTransformation::SetUp() {
{ 256ul, ov::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f / 2.f} },
{ 256ul, ov::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f / 3.f} },
"concat",
"");
"convolution");
}

void ConcatWithNeighborsGraphTransformation::run() {
LayerTransformation::run();

const auto params = std::get<3>(GetParam());
const auto expectedKernelName = std::get<4>(GetParam());
const auto expectedRuntimePrecision = std::get<5>(GetParam());
const auto actualType = get_runtime_precision(expectedKernelName);

EXPECT_EQ(actualType, expectedRuntimePrecision);
}

TEST_P(ConcatWithNeighborsGraphTransformation, CompareWithRefImpl) {
Expand Down
4 changes: 2 additions & 2 deletions src/tests/ov_helpers/ov_lpt_models/src/concat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ std::shared_ptr<ov::Model> ConcatFunction::getOriginalWithNeighbors(
concat1,
std::make_shared<ov::opset1::Multiply>(
std::make_shared<ov::opset1::Convert>(ov::opset1::Constant::create(ov::element::i8, convShape, {1}),
ov::element::f32),
ov::opset1::Constant::create(ov::element::f32, Shape{}, {1})),
precision),
ov::opset1::Constant::create(precision, Shape{}, {1})),
ov::Strides{1, 1},
ov::CoordinateDiff{0, 0},
ov::CoordinateDiff{0, 0},
Expand Down
Loading