Skip to content

Commit 6343650

Browse files
authored
[GPU] Fix SmolVLA inference fail (openvinotoolkit#33125)
SmolVLA has a VariadicSplit opset prior to FullyConnected. FullyConnected cannot accept input without default output as variadicSplit has two outputs. Prevent fc_convert_fusion transformation to run when input data has two outputs. Besides, add u8 type to cum_sum. CVS-174293
1 parent b2220e9 commit 6343650

5 files changed

Lines changed: 61 additions & 8 deletions

File tree

src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ namespace detail {
9292

9393
attach_cum_sum_impl::attach_cum_sum_impl() {
9494
implementation_map<cum_sum>::add(impl_types::ocl, shape_types::any, typed_primitive_impl_ocl<cum_sum>::create<cum_sum_impl>, {
95+
std::make_tuple(data_types::u8, format::bfyx),
96+
std::make_tuple(data_types::u8, format::bfzyx),
97+
std::make_tuple(data_types::u8, format::bfwzyx),
9598
std::make_tuple(data_types::i32, format::bfyx),
9699
std::make_tuple(data_types::i32, format::bfzyx),
97100
std::make_tuple(data_types::i32, format::bfwzyx),

src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_ref.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,13 @@
1010
namespace kernel_selector {
1111
ParamsKey CumSumKernelRef::GetSupportedKey() const {
1212
ParamsKey k;
13+
14+
k.EnableInputDataType(Datatype::UINT8);
1315
k.EnableInputDataType(Datatype::F16);
1416
k.EnableInputDataType(Datatype::F32);
1517
k.EnableInputDataType(Datatype::INT32);
1618
k.EnableInputDataType(Datatype::INT64);
19+
k.EnableOutputDataType(Datatype::UINT8);
1720
k.EnableOutputDataType(Datatype::F16);
1821
k.EnableOutputDataType(Datatype::F32);
1922
k.EnableOutputDataType(Datatype::INT32);

src/plugins/intel_gpu/src/plugin/transformations/fc_convert_fusion.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ FullyConnectedConvertFusion::FullyConnectedConvertFusion() {
3131
ov::matcher_pass_callback callback = [=](Matcher& m) {
3232
const auto& pattern_map = m.get_pattern_value_map();
3333

34-
const auto& m_data = pattern_map.at(data).get_node_shared_ptr();
3534
const auto& m_weights = pattern_map.at(weights).get_node_shared_ptr();
3635
const auto& m_bias = pattern_map.at(bias).get_node_shared_ptr();
3736
const auto& m_convert = pattern_map.at(convert).get_node_shared_ptr();
@@ -42,17 +41,18 @@ FullyConnectedConvertFusion::FullyConnectedConvertFusion() {
4241
auto it = pattern_map.find(fully_connected);
4342
if (it != pattern_map.end()) {
4443
m_fc = it->second.get_node_shared_ptr();
45-
new_fc = std::make_shared<op::FullyConnected>(m_data, m_weights, m_bias, output_type);
44+
new_fc = std::make_shared<op::FullyConnected>(m_fc->input_value(0), m_weights, m_bias, output_type);
4645
} else {
4746
m_fc = pattern_map.at(fully_connected_compressed).get_node_shared_ptr();
47+
4848
if (m_fc->input_values().size() == 4)
49-
new_fc = std::make_shared<op::FullyConnectedCompressed>(m_data,
49+
new_fc = std::make_shared<op::FullyConnectedCompressed>(m_fc->input_value(0),
5050
m_weights,
5151
m_bias,
5252
m_fc->input_value(3),
5353
output_type);
5454
else
55-
new_fc = std::make_shared<op::FullyConnectedCompressed>(m_data,
55+
new_fc = std::make_shared<op::FullyConnectedCompressed>(m_fc->input_value(0),
5656
m_weights,
5757
m_bias,
5858
m_fc->input_value(3),

src/plugins/intel_gpu/tests/unit/test_cases/cum_sum_gpu_test.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ class cum_sum_gpu : public ::testing::TestWithParam<cum_sum_params> {
158158
return data_types::i32;
159159
else if (std::is_same<input_type, int64_t>::value)
160160
return data_types::i64;
161+
else if (std::is_same<input_type, unsigned char>::value)
162+
return data_types::u8;
161163
else
162164
throw std::runtime_error("Unsupported cum sum data type in cum_sum_gpu_test.cpp");
163165
}
@@ -215,11 +217,13 @@ class cum_sum_gpu_fp16 : public ::cum_sum_gpu<cum_sum_test_params, ov::float16,
215217
class cum_sum_gpu_fp32 : public ::cum_sum_gpu<cum_sum_test_params, float, float> {};
216218
class cum_sum_gpu_int32 : public ::cum_sum_gpu<cum_sum_test_params, int32_t, int32_t> {};
217219
class cum_sum_gpu_int64 : public ::cum_sum_gpu<cum_sum_test_params, int64_t, int64_t> {};
220+
class cum_sum_gpu_uint8 : public ::cum_sum_gpu<cum_sum_test_params, unsigned char, unsigned char> {};
218221

219222
TEST_P(cum_sum_gpu_fp16, basic) { auto p = GetParam(); execute(p); }
220223
TEST_P(cum_sum_gpu_fp32, basic) { auto p = GetParam(); execute(p); }
221224
TEST_P(cum_sum_gpu_int32, basic) { auto p = GetParam(); execute(p); }
222225
TEST_P(cum_sum_gpu_int64, basic) { auto p = GetParam(); execute(p); }
226+
TEST_P(cum_sum_gpu_uint8, basic) { auto p = GetParam(); execute(p); }
223227

224228
namespace {
225229
std::vector<std::vector<int>> axes = {
@@ -237,31 +241,37 @@ INSTANTIATE_TEST_SUITE_P(axis_0, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_S
237241
INSTANTIATE_TEST_SUITE_P(axis_0, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_0));
238242
INSTANTIATE_TEST_SUITE_P(axis_0, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_0));
239243
INSTANTIATE_TEST_SUITE_P(axis_0, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_0));
244+
INSTANTIATE_TEST_SUITE_P(axis_0, cum_sum_gpu_uint8, ::testing::Combine(CASE_CUM_SUM_AXIS_0));
240245

241246
INSTANTIATE_TEST_SUITE_P(axis_1, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_1));
242247
INSTANTIATE_TEST_SUITE_P(axis_1, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_1));
243248
INSTANTIATE_TEST_SUITE_P(axis_1, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_1));
244249
INSTANTIATE_TEST_SUITE_P(axis_1, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_1));
250+
INSTANTIATE_TEST_SUITE_P(axis_1, cum_sum_gpu_uint8, ::testing::Combine(CASE_CUM_SUM_AXIS_1));
245251

246252
INSTANTIATE_TEST_SUITE_P(axis_2, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_2));
247253
INSTANTIATE_TEST_SUITE_P(axis_2, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_2));
248254
INSTANTIATE_TEST_SUITE_P(axis_2, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_2));
249255
INSTANTIATE_TEST_SUITE_P(axis_2, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_2));
256+
INSTANTIATE_TEST_SUITE_P(axis_2, cum_sum_gpu_uint8, ::testing::Combine(CASE_CUM_SUM_AXIS_2));
250257

251258
INSTANTIATE_TEST_SUITE_P(axis_3, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_3));
252259
INSTANTIATE_TEST_SUITE_P(axis_3, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_3));
253260
INSTANTIATE_TEST_SUITE_P(axis_3, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_3));
254261
INSTANTIATE_TEST_SUITE_P(axis_3, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_3));
262+
INSTANTIATE_TEST_SUITE_P(axis_3, cum_sum_gpu_uint8, ::testing::Combine(CASE_CUM_SUM_AXIS_3));
255263

256264
INSTANTIATE_TEST_SUITE_P(axis_4, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_4));
257265
INSTANTIATE_TEST_SUITE_P(axis_4, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_4));
258266
INSTANTIATE_TEST_SUITE_P(axis_4, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_4));
259267
INSTANTIATE_TEST_SUITE_P(axis_4, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_4));
268+
INSTANTIATE_TEST_SUITE_P(axis_4, cum_sum_gpu_uint8, ::testing::Combine(CASE_CUM_SUM_AXIS_4));
260269

261270
INSTANTIATE_TEST_SUITE_P(axis_5, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_5));
262271
INSTANTIATE_TEST_SUITE_P(axis_5, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_5));
263272
INSTANTIATE_TEST_SUITE_P(axis_5, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_5));
264273
INSTANTIATE_TEST_SUITE_P(axis_5, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_5));
274+
INSTANTIATE_TEST_SUITE_P(axis_5, cum_sum_gpu_uint8, ::testing::Combine(CASE_CUM_SUM_AXIS_5));
265275

266276
INSTANTIATE_TEST_SUITE_P(export_import, cum_sum_gpu_int64,
267277
::testing::Combine(::testing::Values(5), ::testing::Values(5), ::testing::Values(5),

src/plugins/intel_gpu/tests/unit/transformations/fc_convert_fusion_test.cpp

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "intel_gpu/op/fully_connected.hpp"
2121
#include "intel_gpu/op/fully_connected_compressed.hpp"
2222
#include "intel_gpu/op/placeholder.hpp"
23+
#include "openvino/op/variadic_split.hpp"
2324

2425
using namespace testing;
2526
using namespace ov::intel_gpu;
@@ -28,7 +29,7 @@ TEST_F(TransformationTestsF, FullyConnectedConvertFusionTest1) {
2829
{
2930
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ -1, 16 });
3031
auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 });
31-
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
32+
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
3233
auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 });
3334
auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 });
3435
auto fc_compressed = std::make_shared<ov::intel_gpu::op::FullyConnectedCompressed>(input, weights_const, no_bias, scale_const, zp_const);
@@ -40,7 +41,7 @@ TEST_F(TransformationTestsF, FullyConnectedConvertFusionTest1) {
4041
{
4142
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ -1, 16 });
4243
auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 });
43-
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
44+
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
4445
auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 });
4546
auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 });
4647
auto fc_compressed = std::make_shared<ov::intel_gpu::op::FullyConnectedCompressed>(input, weights_const, no_bias, scale_const, zp_const, ov::element::f32);
@@ -53,7 +54,7 @@ TEST_F(TransformationTestsF, FullyConnectedConvertFusionTest2) {
5354
{
5455
auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::Shape{3, 2, 2});
5556
auto input2 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{2, 2}, {1});
56-
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
57+
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
5758
auto matmul = std::make_shared<op::FullyConnected>(input1, input2, no_bias);
5859
auto convert = std::make_shared<ov::op::v0::Convert>(matmul, ov::element::f32);
5960

@@ -63,7 +64,43 @@ TEST_F(TransformationTestsF, FullyConnectedConvertFusionTest2) {
6364
{
6465
auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::Shape{3, 2, 2});
6566
auto input2 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{2, 2}, {1});
66-
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
67+
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
68+
auto matmul = std::make_shared<op::FullyConnected>(input1, input2, no_bias, ov::element::f32);
69+
70+
model_ref = std::make_shared<ov::Model>(ov::OutputVector{matmul}, ov::ParameterVector{input1});
71+
}
72+
}
73+
74+
TEST_F(TransformationTestsF, FullyConnectedConvertFusionTest3) {
75+
{
76+
auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::Shape{1, 163, 960});
77+
auto axis_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {1});
78+
auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{2}, {113, 50});
79+
auto split = std::make_shared<ov::op::v1::VariadicSplit>(input1, axis_const, split_const);
80+
auto input2 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{720, 960}, {1});
81+
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
82+
83+
bool default_output_mismatch_exception = false;
84+
try {
85+
auto matmul = std::make_shared<op::FullyConnected>(split, input2, no_bias);
86+
} catch(std::exception& exc) {
87+
const std::string error = exc.what();
88+
default_output_mismatch_exception = error.find("Default output not supported") != std::string::npos;
89+
}
90+
91+
ASSERT_TRUE(default_output_mismatch_exception);
92+
93+
auto non_split_input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::Shape{1, 50, 960});
94+
auto matmul = std::make_shared<op::FullyConnected>(non_split_input, input2, no_bias);
95+
auto convert = std::make_shared<ov::op::v0::Convert>(matmul, ov::element::f32);
96+
model = std::make_shared<ov::Model>(ov::OutputVector{convert}, ov::ParameterVector{non_split_input});
97+
manager.register_pass<FullyConnectedConvertFusion>();
98+
99+
}
100+
{
101+
auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::Shape{1, 50, 960});
102+
auto input2 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{720, 960}, {1});
103+
auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
67104
auto matmul = std::make_shared<op::FullyConnected>(input1, input2, no_bias, ov::element::f32);
68105

69106
model_ref = std::make_shared<ov::Model>(ov::OutputVector{matmul}, ov::ParameterVector{input1});

0 commit comments

Comments
 (0)