Skip to content

[GPU] Add unit test for OneDNN FC w/ int4 compressed weight which has the odd dimension #30359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4877,6 +4877,94 @@ TEST(fully_connected_3d_onednn_gpu, compressed_int4_scale_static) {
ASSERT_NEAR(output_ptr_ref[i], output_ptr[i], 9.0) << "i = " << i << std::endl;
}
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you attach this to an existing test case? I guess we can reuse most of the code.

TEST(fully_connected_2d_onednn_gpu, compressed_int4_odd_dim) {
tests::random_generator rg(GET_SUITE_NAME);

auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
return;

long int batch_num = 1;
long int ifm_num = 63;
long int ofm_num = 32;
long int scales_group_size = 63;
cldnn::layout input_dyn_layout({ -1, ifm_num}, data_types::f16, format::bfyx);
auto input_mem = engine.allocate_memory({ { batch_num, ifm_num}, data_types::f16, format::bfyx });
auto weights_mem = engine.allocate_memory({ {ofm_num, ifm_num}, data_types::u4, format::bfyx });
auto scale_mem = engine.allocate_memory({ {ofm_num, ifm_num / scales_group_size}, data_types::f16, format::bfyx });

auto input_data = rg.generate_random_1d<ov::float16>(batch_num * ifm_num, -2.0f, 2.0f);
set_values(input_mem, input_data);

auto weigths_data = rg.generate_random_1d<uint8_t>(ofm_num * ifm_num / 2, 0, 10);
set_values(weights_mem, weigths_data);

auto scale_data = rg.generate_random_1d<ov::float16>(ofm_num * ifm_num / scales_group_size, -4.0f, 4.0f);
set_values(scale_mem, scale_data);

auto in_layout = layout{ {batch_num, ifm_num}, data_types::f16, format::bfyx };

auto fc_prim = fully_connected("fc_prim", input_info("input"), "weights", "", "scale", "", data_types::f16, 2, 2);

auto get_ref_results = [&]() {
topology topology(
input_layout("input", input_mem->get_layout()),
data("weights", weights_mem),
data("scale", scale_mem),
fc_prim
);

auto config = get_test_default_config(engine);
ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} }));

network network(engine, topology, config);
network.set_input_data("input", input_mem);

auto outputs = network.execute();
OPENVINO_ASSERT(outputs.size() == 1);
OPENVINO_ASSERT(outputs.begin()->first == "fc_prim");

auto output_layout = outputs.begin()->second.get_layout();
auto output_mem = outputs.begin()->second.get_memory();

return engine.reinterpret_buffer(*output_mem, output_layout);
};

topology topology(
input_layout("input", input_dyn_layout),
data("weights", weights_mem),
data("scale", scale_mem),
fc_prim
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::use_onednn(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} }));

network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false);

network->set_input_data("input", input_mem);

auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "fc_prim");

auto output_mem = outputs.begin()->second.get_memory();
cldnn::mem_lock<ov::float16> output_ptr (output_mem, get_test_stream());

auto ref_output_mem = get_ref_results();
cldnn::mem_lock<ov::float16> output_ptr_ref (ref_output_mem, get_test_stream());

for (size_t i = 0; i < output_ptr_ref.size(); i++) {
ASSERT_NEAR(output_ptr_ref[i], output_ptr[i], 9.0) << "i = " << i << std::endl;
}
}

#endif

TEST_F(fully_connected_gpu_tests, compressed_scale_zp_bias) {
Expand Down
Loading