|
| 1 | +// Copyright (C) 2022-2023 Intel Corporation |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | +// |
| 4 | + |
| 5 | +#include "shared_test_classes/single_layer/mat_mul.hpp" |
| 6 | +#include "shared_test_classes/base/ov_subgraph.hpp" |
| 7 | +#include "ie_precision.hpp" |
| 8 | +#include "test_utils/fusing_test_utils.hpp" |
| 9 | +#include "ngraph_functions/builders.hpp" |
| 10 | +#include <string> |
| 11 | +#include <ov_ops/type_relaxed.hpp> |
| 12 | +#include "shared_test_classes/base/utils/generate_inputs.hpp" |
| 13 | +#include "cpu/cpu_config.hpp" |
| 14 | + |
| 15 | +using namespace ngraph; |
| 16 | +using namespace InferenceEngine; |
| 17 | +using namespace CPUTestUtils; |
| 18 | +using namespace ov::test; |
| 19 | + |
| 20 | +namespace CPULayerTestsDefinitions { |
| 21 | + |
| 22 | +struct ShapeRelatedParams { |
| 23 | + std::vector<InputShape> inputShapes; |
| 24 | + std::pair<bool, bool> transpose; |
| 25 | +}; |
| 26 | + |
| 27 | +typedef std::tuple< |
| 28 | + ShapeRelatedParams, |
| 29 | + ElementType, // Input precision |
| 30 | + ElementType, // Weights precision |
| 31 | + ElementType, // Output precision |
| 32 | + fusingSpecificParams, |
| 33 | + CPUSpecificParams, |
| 34 | + std::map<std::string, std::string>, // Additional config |
| 35 | + float // Weights sparse rate |
| 36 | +> MatMulSparseParamSet; |
| 37 | + |
| 38 | +class MatMulSparseCPUTest : public testing::WithParamInterface<MatMulSparseParamSet>, |
| 39 | + virtual public SubgraphBaseTest, public CpuTestWithFusing { |
| 40 | +public: |
| 41 | + static std::string getTestCaseName(const testing::TestParamInfo<MatMulSparseParamSet>& obj) { |
| 42 | + ShapeRelatedParams shapeRelatedParams; |
| 43 | + ElementType inType, weiType, outType; |
| 44 | + fusingSpecificParams fusingParams; |
| 45 | + CPUSpecificParams cpuParams; |
| 46 | + std::map<std::string, std::string> additionalConfig; |
| 47 | + float weiSparseRate; |
| 48 | + std::tie(shapeRelatedParams, inType, weiType, outType, fusingParams, cpuParams, additionalConfig, |
| 49 | + weiSparseRate) = obj.param; |
| 50 | + |
| 51 | + std::ostringstream result; |
| 52 | + result << "IS="; |
| 53 | + for (const auto& shape : shapeRelatedParams.inputShapes) { |
| 54 | + result << CommonTestUtils::partialShape2str({shape.first}) << "_"; |
| 55 | + } |
| 56 | + result << "TS="; |
| 57 | + for (const auto& shape : shapeRelatedParams.inputShapes) { |
| 58 | + result << "("; |
| 59 | + if (!shape.second.empty()) { |
| 60 | + auto itr = shape.second.begin(); |
| 61 | + do { |
| 62 | + result << CommonTestUtils::vec2str(*itr); |
| 63 | + } while (++itr != shape.second.end() && result << "_"); |
| 64 | + } |
| 65 | + result << ")_"; |
| 66 | + } |
| 67 | + result << "transpose_a=" << shapeRelatedParams.transpose.first << "_"; |
| 68 | + result << "transpose_b=" << shapeRelatedParams.transpose.second << "_"; |
| 69 | + result << "inType=" << inType << "_"; |
| 70 | + result << "weiType=" << weiType << "_"; |
| 71 | + result << "outType=" << outType << "_"; |
| 72 | + result << CpuTestWithFusing::getTestCaseName(fusingParams); |
| 73 | + result << CPUTestsBase::getTestCaseName(cpuParams); |
| 74 | + |
| 75 | + if (!additionalConfig.empty()) { |
| 76 | + result << "_PluginConf"; |
| 77 | + for (auto& item : additionalConfig) { |
| 78 | + result << "_" << item.first << "=" << item.second; |
| 79 | + } |
| 80 | + } |
| 81 | + result << "_weiSparseRate=" << weiSparseRate; |
| 82 | + |
| 83 | + return result.str(); |
| 84 | + } |
| 85 | + |
| 86 | +protected: |
| 87 | + std::string cpuNodeType; |
| 88 | + |
| 89 | + template<typename T> |
| 90 | + void transpose(T& shape) { |
| 91 | + IE_ASSERT(shape.size() > 1); |
| 92 | + std::swap(*(shape.end() - 1), *(shape.end() - 2)); |
| 93 | + } |
| 94 | + |
| 95 | + std::vector<int8_t> inline generateSparseVector(size_t vec_len, |
| 96 | + float sparseRate = 0.0f, |
| 97 | + int8_t upTo = 10, |
| 98 | + int8_t startFrom = 1, |
| 99 | + int32_t seed = 1) { |
| 100 | + std::vector<int8_t> res(vec_len); |
| 101 | + std::mt19937 gen(seed); |
| 102 | + std::uniform_int_distribution<long> dist(static_cast<long>(startFrom), static_cast<long>(upTo)); |
| 103 | + |
| 104 | + std::mt19937 gen_f(123); |
| 105 | + std::uniform_real_distribution<float> dist_f(0.f, 1.f); |
| 106 | + |
| 107 | + int countZero = 0; |
| 108 | + |
| 109 | + res[0] = startFrom; |
| 110 | + res[vec_len - 1] = upTo; |
| 111 | + for (size_t i = 1; i < vec_len - 1; i++) { |
| 112 | + if (dist_f(gen_f) > sparseRate) { |
| 113 | + res[i] = static_cast<int8_t>(dist(gen)); |
| 114 | + } else { |
| 115 | + res[i] = 0; |
| 116 | + countZero++; |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + std::cout << "Sparse rate = " << countZero * 100 / vec_len << "%" << std::endl; |
| 121 | + |
| 122 | + return res; |
| 123 | + } |
| 124 | + |
| 125 | + std::shared_ptr<Node> makeMatMulRelaxed(const Output<Node>& A, |
| 126 | + const ov::PartialShape& inShapeB, |
| 127 | + ElementType weiType, |
| 128 | + bool transpose_a, |
| 129 | + bool transpose_b, |
| 130 | + const std::vector<int8_t>& weiData) { |
| 131 | + using namespace ngraph; |
| 132 | + auto inputParamsFP32 = builder::makeDynamicParams(element::f32, {A.get_partial_shape()}); |
| 133 | + auto matrixBFP32 = builder::makeDynamicInputLayer(element::f32, helpers::InputLayerType::CONSTANT, inShapeB); |
| 134 | + |
| 135 | + auto matMulRelaxed = std::make_shared<op::TypeRelaxed<opset3::MatMul>>( |
| 136 | + *as_type_ptr<opset3::MatMul>(builder::makeMatMul(inputParamsFP32[0], matrixBFP32, transpose_a, transpose_b)), |
| 137 | + element::f32); |
| 138 | + |
| 139 | + auto matrixB = ngraph::builder::makeConstant<int8_t>(weiType, inShapeB.get_shape(), weiData); |
| 140 | + |
| 141 | + auto matMul = matMulRelaxed->copy_with_new_inputs({A, matrixB}); |
| 142 | + |
| 143 | + return matMul; |
| 144 | + } |
| 145 | + |
| 146 | + void SetUp() override { |
| 147 | + abs_threshold = 0.5f; |
| 148 | + using ngraph::pass::ConvertPrecision; |
| 149 | + |
| 150 | + ShapeRelatedParams shapeRelatedParams; |
| 151 | + ElementType inType, weiType, outType; |
| 152 | + fusingSpecificParams fusingParams; |
| 153 | + CPUSpecificParams cpuParams; |
| 154 | + std::map<std::string, std::string> additionalConfig; |
| 155 | + float weiSparseRate; |
| 156 | + |
| 157 | + std::tie(shapeRelatedParams, inType, weiType, outType, fusingParams, cpuParams, additionalConfig, |
| 158 | + weiSparseRate) = this->GetParam(); |
| 159 | + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; |
| 160 | + |
| 161 | + configuration.insert(additionalConfig.begin(), additionalConfig.end()); |
| 162 | + |
| 163 | + init_input_shapes(shapeRelatedParams.inputShapes); |
| 164 | + |
| 165 | + bool transpA = shapeRelatedParams.transpose.first; |
| 166 | + bool transpB = shapeRelatedParams.transpose.second; |
| 167 | + |
| 168 | + if (transpA) { |
| 169 | + transpose(inputDynamicShapes[0]); |
| 170 | + for (auto& shapes : targetStaticShapes) { |
| 171 | + transpose(shapes[0]); |
| 172 | + } |
| 173 | + } |
| 174 | + if (transpB) { |
| 175 | + transpose(inputDynamicShapes[1]); |
| 176 | + for (auto& shapes : targetStaticShapes) { |
| 177 | + transpose(shapes[1]); |
| 178 | + } |
| 179 | + } |
| 180 | + |
| 181 | + const auto& inShapeA = inputDynamicShapes[0]; |
| 182 | + const auto& inShapeB = inputDynamicShapes[1]; |
| 183 | + |
| 184 | + std::tie(postOpMgrPtr, fusedOps) = fusingParams; |
| 185 | + |
| 186 | + configuration.insert(additionalConfig.begin(), additionalConfig.end()); |
| 187 | + |
| 188 | + cpuNodeType = "FullyConnected"; |
| 189 | + selectedType = makeSelectedTypeStr(selectedType, element::i8); |
| 190 | + |
| 191 | + auto params = builder::makeDynamicParams(inType, {inShapeA}); |
| 192 | + auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<opset1::Parameter>(params)); |
| 193 | + |
| 194 | + auto matrixB = builder::makeDynamicInputLayer(element::f32, helpers::InputLayerType::CONSTANT, inShapeB); |
| 195 | + |
| 196 | + auto weiData = generateSparseVector(ngraph::shape_size(inShapeB.get_shape()), weiSparseRate); |
| 197 | + auto matMul = makeMatMulRelaxed(paramOuts[0], inShapeB, weiType, transpA, transpB, weiData); |
| 198 | + |
| 199 | + function = makeNgraphFunction(element::f32, params, matMul, cpuNodeType); |
| 200 | + |
| 201 | + checkFusingPosition = false; |
| 202 | + |
| 203 | + targetDevice = CommonTestUtils::DEVICE_CPU; |
| 204 | + |
| 205 | + functionRefs = ov::clone_model(*function); |
| 206 | + ngraph::pass::ConvertPrecision<ngraph::element::Type_t::i8, ngraph::element::Type_t::f32>().run_on_function(functionRefs); |
| 207 | + ngraph::pass::ConvertPrecision<ngraph::element::Type_t::u8, ngraph::element::Type_t::f32>().run_on_function(functionRefs); |
| 208 | + functionRefs->validate_nodes_and_infer_types(); |
| 209 | + } |
| 210 | +}; |
| 211 | + |
| 212 | +TEST_P(MatMulSparseCPUTest, CompareWithRefs) { |
| 213 | + SKIP_IF_CURRENT_TEST_IS_DISABLED() |
| 214 | + |
| 215 | + run(); |
| 216 | + CheckPluginRelatedResults(compiledModel, cpuNodeType); |
| 217 | +} |
| 218 | + |
| 219 | +namespace { |
| 220 | + |
| 221 | +/* ============= Common params ============= */ |
| 222 | + |
| 223 | +std::vector<CPUSpecificParams> filterSpecificParams(bool sparseExpected) { |
| 224 | + std::vector<CPUSpecificParams> specificParams; |
| 225 | + if (with_cpu_x86_avx512_core_amx()) { |
| 226 | + if (sparseExpected) { |
| 227 | + specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx512_amx"}, "brgemm_avx512_amx_sparse"}); |
| 228 | + } else { |
| 229 | + specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx512_amx"}, "brgemm_avx512_amx"}); |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + return specificParams; |
| 234 | +} |
| 235 | + |
| 236 | +/* ============= FullyConnected ============= */ |
| 237 | +namespace fullyConnected { |
| 238 | + |
| 239 | +// cpu (sparse) configs |
| 240 | +const std::map<std::string, std::string> emptyConfig = {}; |
| 241 | +const std::map<std::string, std::string> SparseRate50 = {{CPUConfigParams::KEY_CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE, "0.5"}}; |
| 242 | +const std::map<std::string, std::string> SparseRate80 = {{CPUConfigParams::KEY_CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE, "0.8"}}; |
| 243 | + |
| 244 | +const std::vector<ShapeRelatedParams> IS2D_sparse_smoke = { |
| 245 | + {static_shapes_to_test_representation({{64, 64}, {64, 64}}), {false, true}}, |
| 246 | + {static_shapes_to_test_representation({{71, 64}, {64, 64}}), {false, true}}, |
| 247 | + {static_shapes_to_test_representation({{3, 128}, {128, 64}}), {false, true}}, |
| 248 | + {static_shapes_to_test_representation({{71, 64}, {64, 128}}), {false, true}}, |
| 249 | + |
| 250 | + { |
| 251 | + { |
| 252 | + {{-1, -1}, {{20, 64}, {20, 64}}}, |
| 253 | + {{64, 128}, {{64, 128}, {64, 128}}} |
| 254 | + }, |
| 255 | + {false, true} |
| 256 | + }, |
| 257 | + |
| 258 | + { |
| 259 | + { |
| 260 | + {{{0, 100}, {0, 64}}, {{20, 64}, {14, 64}, {20, 64}, {14, 64}}}, |
| 261 | + {{64, 128}, {{64, 128}, {64, 128}, {64, 128}, {64, 128}}} |
| 262 | + }, |
| 263 | + {false, true} |
| 264 | + }, |
| 265 | +}; |
| 266 | + |
| 267 | +const auto testParams2D_i8_smoke = ::testing::Combine(::testing::ValuesIn(IS2D_sparse_smoke), |
| 268 | + ::testing::Values(ElementType::i8, ElementType::u8), |
| 269 | + ::testing::Values(ElementType::i8), |
| 270 | + ::testing::Values(ElementType::f32), |
| 271 | + ::testing::Values(emptyFusingSpec), |
| 272 | + ::testing::ValuesIn(filterSpecificParams(false)), |
| 273 | + ::testing::Values(emptyConfig, SparseRate80), |
| 274 | + ::testing::Values(0.7)); |
| 275 | + |
| 276 | +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_I8, MatMulSparseCPUTest, testParams2D_i8_smoke, |
| 277 | + MatMulSparseCPUTest::getTestCaseName); |
| 278 | + |
| 279 | +const auto testParams2D_i8_sparse_smoke = ::testing::Combine(::testing::ValuesIn(IS2D_sparse_smoke), |
| 280 | + ::testing::Values(ElementType::i8, ElementType::u8), |
| 281 | + ::testing::Values(ElementType::i8), |
| 282 | + ::testing::Values(ElementType::f32), |
| 283 | + ::testing::Values(emptyFusingSpec), |
| 284 | + ::testing::ValuesIn(filterSpecificParams(true)), |
| 285 | + ::testing::Values(SparseRate50), |
| 286 | + ::testing::Values(0.7)); |
| 287 | + |
| 288 | +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_I8_sparse, MatMulSparseCPUTest, testParams2D_i8_sparse_smoke, |
| 289 | + MatMulSparseCPUTest::getTestCaseName); |
| 290 | + |
| 291 | +const std::vector<ShapeRelatedParams> IS3D_sparse_smoke = { |
| 292 | + {static_shapes_to_test_representation({{1, 64, 64}, {64, 64}}), {false, true}}, |
| 293 | + {static_shapes_to_test_representation({{3, 71, 64}, {64, 64}}), {false, true}}, |
| 294 | + {static_shapes_to_test_representation({{3, 5, 128}, {128, 64}}), {false, true}}, |
| 295 | + {static_shapes_to_test_representation({{1, 71, 64}, {64, 128}}), {false, true}}, |
| 296 | + |
| 297 | + { |
| 298 | + { |
| 299 | + {{-1, -1, 64}, {{1, 5, 64}, {1, 10, 64}, {1, 5, 64}, {1, 10, 64}}}, |
| 300 | + {{64, 128}, {{64, 128}, {64, 128}}} |
| 301 | + }, |
| 302 | + {false, true} |
| 303 | + }, |
| 304 | + |
| 305 | + // todo: [av] investigate "Primitive descriptor was not found" error for this case |
| 306 | + // { |
| 307 | + // { |
| 308 | + // {{{0, 60}, {0, 60}, {0, 64}}}, {{1, 3, 64}, {1, 7, 64}}}, |
| 309 | + // {{64, 64}, {{64, 64}, {64, 64}}} |
| 310 | + // }, |
| 311 | + // {false, true} |
| 312 | + // }, |
| 313 | +}; |
| 314 | + |
| 315 | +const auto testParams3D_i8_smoke = ::testing::Combine(::testing::ValuesIn(IS3D_sparse_smoke), |
| 316 | + ::testing::Values(ElementType::i8, ElementType::u8), |
| 317 | + ::testing::Values(ElementType::i8), |
| 318 | + ::testing::Values(ElementType::f32), |
| 319 | + ::testing::Values(emptyFusingSpec), |
| 320 | + ::testing::ValuesIn(filterSpecificParams(false)), |
| 321 | + ::testing::Values(emptyConfig, SparseRate80), |
| 322 | + ::testing::Values(0.7)); |
| 323 | + |
| 324 | +INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_I8, MatMulSparseCPUTest, testParams3D_i8_smoke, |
| 325 | + MatMulSparseCPUTest::getTestCaseName); |
| 326 | + |
| 327 | +const auto testParams3D_i8_sparse_smoke = ::testing::Combine(::testing::ValuesIn(IS3D_sparse_smoke), |
| 328 | + ::testing::Values(ElementType::i8, ElementType::u8), |
| 329 | + ::testing::Values(ElementType::i8), |
| 330 | + ::testing::Values(ElementType::f32), |
| 331 | + ::testing::Values(emptyFusingSpec), |
| 332 | + ::testing::ValuesIn(filterSpecificParams(true)), |
| 333 | + ::testing::Values(SparseRate50), |
| 334 | + ::testing::Values(0.7)); |
| 335 | + |
| 336 | +INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_I8_sparse, MatMulSparseCPUTest, testParams3D_i8_sparse_smoke, |
| 337 | + MatMulSparseCPUTest::getTestCaseName); |
| 338 | + |
| 339 | +} // namespace fullyConnected |
| 340 | + |
| 341 | +} // namespace |
| 342 | + |
| 343 | +} // namespace CPULayerTestsDefinitions |
0 commit comments