Skip to content

Fix ReadValue variable memory by using fake alignment layout #30516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ struct read_value_impl : public typed_primitive_impl<read_value> {

if (!variable.is_set()) {
if (instance.get_impl_params()->input_layouts.size() > 0) {
variable.get_memory()->copy_from(stream, instance.dep_memory(0), true);
const auto copy_size = instance.get_impl_params()->get_input_layout(0).bytes_count();
variable.get_memory()->copy_from(stream, instance.dep_memory(0), 0, 0, copy_size, true);
} else {
variable.get_memory()->fill(stream);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ struct fully_connected_onednn : typed_primitive_onednn_impl<fully_connected> {
if (prim_input_size < 4) {
auto output_pshape = output_layout.get_partial_shape();
if (output_pshape.size() > prim_input_size) {
std::vector<ov::Dimension> new_dims;
for (const auto& dim : output_pshape) {
if (!dim.is_static() || dim.get_length() != 1)
new_dims.push_back(dim);
}
ov::PartialShape new_shape(new_dims);
output_pshape = new_shape;
output_pshape.resize(prim_input_size);
output_layout.set_partial_shape(output_pshape);
}
Expand Down
84 changes: 82 additions & 2 deletions src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
#include "intel_gpu/plugin/remote_context.hpp"
#include "intel_gpu/plugin/variable_state.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "random_generator.hpp"
#include "test_utils.h"

#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/eltwise.hpp>
#include <intel_gpu/primitives/assign.hpp>
#include <intel_gpu/primitives/data.hpp>
#include <intel_gpu/primitives/eltwise.hpp>
#include <intel_gpu/primitives/fully_connected.hpp>
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/read_value.hpp>

using namespace cldnn;
Expand Down Expand Up @@ -155,6 +158,79 @@ TEST(variable_test_common, exception_on_wrong_layout) {
test_exception_on_wrong_layout<float>(false);
}

template <typename T>
void test_variable_copy_from_fake_aligned_fc(bool is_caching_test) {
tests::random_generator rg;
auto& engine = get_test_engine();

const int32_t input_b = 3, input_f = 590, input_x = 768, weight_b = 768;
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(), input_f, input_x }, data_types::f32, format::bfyx };
auto input_data = engine.allocate_memory(layout{ ov::PartialShape{ input_b, input_f, input_x }, data_types::f32, format::bfyx });
auto input_data_rnd = rg.generate_random_1d<float>(input_b * input_f * input_x, 0, 1);
set_values(input_data, input_data_rnd);

auto weights_data_layout = layout{ov::PartialShape{weight_b, input_x}, data_types::f32, format::os_iyx_osv32};
auto weights_data = engine.allocate_memory(weights_data_layout);
auto weights_data_rnd = rg.generate_random_1d<float>(weights_data_layout.count(), 0, 1);
set_values(weights_data, weights_data_rnd);

auto bias_data_layout = layout{ov::PartialShape{1, 1, weight_b}, data_types::f32, format::bfyx};
auto bias_data = engine.allocate_memory(bias_data_layout);
auto bias_data_rnd = rg.generate_random_1d<float>(bias_data_layout.count(), 0, 1);
set_values(bias_data, bias_data_rnd);

const layout variable_layout{{input_b, input_f, input_x}, data_types::f32, format::bfyx};

topology topology;
topology.add(input_layout("input", input_dyn_layout));
topology.add(data("weights", weights_data));
topology.add(data("bias", bias_data));
topology.add(fully_connected("fc", input_info("input"), "weights", "bias"));
topology.add(read_value{"read_value", { input_info("fc") }, "v0", { variable_layout }});


ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);

auto context = std::make_shared<RemoteContextImpl>("GPU", std::vector<cldnn::device::ptr>{engine.get_device()});
auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, context, network->get_shape_predictor());
network->set_variable("v0", variable);
network->set_input_data("input", input_data);

auto outputs = network->execute();

// fc
const auto fc_output_memory = network->get_output_memory("fc");
const auto fc_output_shape = fc_output_memory->get_layout().get_partial_shape().to_shape();
auto fc_output_batch_size = std::accumulate(fc_output_shape.begin(),
fc_output_shape.end() - 1,
size_t{1},
std::multiplies<size_t>());

auto weights_layout_dt = weights_data_layout.data_type;
auto is_4bit = weights_layout_dt == data_types::i4 || weights_layout_dt == data_types::u4;
auto is_8bit = weights_layout_dt == data_types::i8 || weights_layout_dt == data_types::u8;
auto is_extra_alignment_needed = input_b >= 256;
auto fake_align_base = (is_4bit || is_8bit) && is_extra_alignment_needed ? 64 : 16;

const auto fc_output_batch_size_aligned = align_to(input_b * input_f, fake_align_base);
ASSERT_EQ(fc_output_batch_size, fc_output_batch_size_aligned);

// read_value
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "read_value");

auto output_memory = outputs.begin()->second.get_memory();
auto output_layout = output_memory->get_layout();
ASSERT_EQ(output_layout, variable_layout);
}

TEST(variable_test_common, variable_copy_from_fc) {
test_variable_copy_from_fake_aligned_fc<float>(false);
}

template <typename T>
void test_different_output_data_type(bool is_caching_test) {
auto& engine = get_test_engine();
Expand Down Expand Up @@ -268,6 +344,10 @@ TEST(variable_test_common, exception_on_wrong_layout_cached) {
test_exception_on_wrong_layout<float>(true);
}

TEST(variable_test_common, variable_copy_from_fc_cached) {
test_variable_copy_from_fake_aligned_fc<float>(true);
}

TEST(variable_test_common, different_output_data_type_cached) {
test_different_output_data_type<int16_t>(true);
}
Expand Down
Loading