Skip to content

[ET-VK] Using push constants for buffer to image prepack nodes. #11252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@ layout(std430) buffer;

${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_buffer(B, "r", "nchw_in", "int")}
${layout_declare_ubo(B, "ivec4", "sizes")}

$if USE_PUSH_CONST:
layout(push_constant) uniform restrict Block {
ivec4 sizes;
};
$else:
${layout_declare_ubo(B, "ivec4", "sizes")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ nchw_to_bitw8_image_nobitw8buffer:
parameter_names_with_default_values:
STORAGE: texture3d
DTYPE: int8
USE_PUSH_CONST: True
generate_variant_forall:
STORAGE:
- VALUE: texture2d
Expand All @@ -17,3 +18,5 @@ nchw_to_bitw8_image_nobitw8buffer:
- VALUE: uint8
shader_variants:
- NAME: nchw_to_bitw8_image_nobitw8buffer
- NAME: nchw_to_bitw8_image_nobitw8buffer_no_pc
USE_PUSH_CONST: False
14 changes: 11 additions & 3 deletions backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,17 @@ layout(std430) buffer;

${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(1, "r", "nchw_in", DTYPE, STORAGE)}
${layout_declare_ubo(2, "ivec4", "out_sizes")}
${layout_declare_ubo(3, "ivec4", "out_strides")}
${layout_declare_ubo(4, "int", "numel")}

$if USE_PUSH_CONST:
layout(push_constant) uniform restrict Block {
ivec4 out_sizes;
ivec4 out_strides;
int numel;
};
$else:
${layout_declare_ubo(2, "ivec4", "out_sizes")}
${layout_declare_ubo(3, "ivec4", "out_strides")}
${layout_declare_ubo(4, "int", "numel")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

Expand Down
3 changes: 3 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ nchw_to_buffer:
parameter_names_with_default_values:
DTYPE: float
STORAGE: buffer
USE_PUSH_CONST: True
generate_variant_forall:
DTYPE:
- VALUE: half
Expand All @@ -16,3 +17,5 @@ nchw_to_buffer:
- VALUE: int8
shader_variants:
- NAME: nchw_to_buffer
- NAME: nchw_to_buffer_no_pc
USE_PUSH_CONST: False
14 changes: 11 additions & 3 deletions backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,17 @@ layout(std430) buffer;

${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_buffer(B, "r", "buf_in", DTYPE)}
${layout_declare_ubo(B, "ivec4", "sizes")}
$if not FROM_STAGING:
${layout_declare_ubo(B, "ivec4", "buf_strides")}

$if USE_PUSH_CONST:
layout(push_constant) uniform restrict Block {
ivec4 sizes;
$if not FROM_STAGING:
ivec4 buf_strides;
};
$else:
${layout_declare_ubo(B, "ivec4", "sizes")}
$if not FROM_STAGING:
${layout_declare_ubo(B, "ivec4", "buf_strides")}

#include "indexing_utils.h"

Expand Down
9 changes: 9 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ nchw_to_image:
STORAGE: texture3d
DTYPE: float
FROM_STAGING: True
USE_PUSH_CONST: True
generate_variant_forall:
DTYPE:
- VALUE: half
Expand All @@ -21,3 +22,11 @@ nchw_to_image:
STORAGE: texture2d
- NAME: clone_buffer_to_image
FROM_STAGING: False
- NAME: nchw_to_image_no_pc_texture3d
USE_PUSH_CONST: False
- NAME: nchw_to_image_no_pc_texture2d
STORAGE: texture2d
USE_PUSH_CONST: False
- NAME: clone_buffer_to_image_no_pc
FROM_STAGING: False
USE_PUSH_CONST: False
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/Clone.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ void add_buffer_to_image_node(
// Input and Outputs
{{image, vkapi::kWrite}, {buffer, vkapi::kRead}},
// Parameter Buffers
{graph.sizes_ubo(image), graph.strides_ubo(buffer)},
// Push Constants
{},
// Push Constants
{graph.sizes_pc_of(image), graph.strides_pc_of(buffer)},
// Specialization Constants
{graph.hashed_layout_of(image)},
// Resize Args
Expand Down
5 changes: 3 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,10 @@ ValueRef prepack_biases(
graph.create_local_wg_size(v),
vref,
v,
{t->sizes_ubo()},
{},
// Specialization constants
{t->hashed_layout()}));
{t->hashed_layout()},
{graph.sizes_pc_of(v)}));

return v;
}
Expand Down
33 changes: 17 additions & 16 deletions backends/vulkan/runtime/graph/ops/impl/Staging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ void add_staging_to_tensor_node(
vkapi::ShaderInfo shader = get_nchw_to_tensor_shader(
*graph.get_tensor(out_tensor), graph.int8_buffers_enabled());

vkapi::ParamsBindList ubos;
std::vector<PushConstantDataInfo> pcs;
if (graph.is_buffer_storage(out_tensor)) {
ubos.append(
{graph.sizes_ubo(out_tensor),
graph.strides_ubo(out_tensor),
graph.numel_ubo(out_tensor)});
pcs = {
graph.sizes_pc_of(out_tensor),
graph.strides_pc_of(out_tensor),
graph.numel_pc_of(out_tensor)};
} else {
ubos.append({graph.sizes_ubo(out_tensor)});
pcs = {graph.sizes_pc_of(out_tensor)};
}

graph.execute_nodes().emplace_back(new DispatchNode(
Expand All @@ -46,9 +46,9 @@ void add_staging_to_tensor_node(
// Input and Outputs
{{out_tensor, vkapi::kWrite}, {in_staging, vkapi::kRead}},
// Parameter Buffers
ubos,
// Push Constants
{},
// Push Constants
pcs,
// Specialization Constants
{graph.hashed_layout_of(out_tensor)},
// Resize Args
Expand Down Expand Up @@ -127,14 +127,14 @@ void add_prepack_standard_node(
vkapi::ShaderInfo shader = get_nchw_to_tensor_shader(
*graph.get_tensor(tensor), graph.int8_buffers_enabled());

vkapi::ParamsBindList ubos;
std::vector<PushConstantDataInfo> pcs;
if (graph.is_buffer_storage(tensor)) {
ubos.append(
{graph.sizes_ubo(tensor),
graph.strides_ubo(tensor),
graph.numel_ubo(tensor)});
pcs = {
graph.sizes_pc_of(tensor),
graph.strides_pc_of(tensor),
graph.numel_pc_of(tensor)};
} else {
ubos.append({graph.sizes_ubo(tensor)});
pcs = {graph.sizes_pc_of(tensor)};
}

int transpose_hw_spec = transpose_hw ? 1 : 0;
Expand All @@ -148,9 +148,10 @@ void add_prepack_standard_node(
tensor_data,
tensor,
// Parameter Buffers
ubos,
{},
// Specialization Constants
{graph.hashed_layout_of(tensor), transpose_hw_spec}));
{graph.hashed_layout_of(tensor), transpose_hw_spec},
pcs));
}

ValueRef prepack_standard(
Expand Down
12 changes: 11 additions & 1 deletion backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,35 @@ bool is_bitw8(vkapi::ScalarType dtype) {

vkapi::ShaderInfo get_nchw_to_tensor_shader(
const api::vTensor& v_dst,
const bool int8_buffer_enabled) {
bool int8_buffer_enabled,
bool push_constant_variant) {
std::string kernel_name;
kernel_name.reserve(kShaderNameReserve);

if (is_bitw8(v_dst.dtype()) && v_dst.storage_type() != utils::kBuffer &&
!int8_buffer_enabled) {
kernel_name = "nchw_to_bitw8_image_nobitw8buffer";
if (!push_constant_variant) {
kernel_name += "_no_pc";
}
add_storage_type_suffix(kernel_name, v_dst);
add_dtype_suffix(kernel_name, v_dst);
return VK_KERNEL_FROM_STR(kernel_name);
}

if (v_dst.storage_type() == utils::kBuffer) {
kernel_name = "nchw_to_buffer";
if (!push_constant_variant) {
kernel_name += "_no_pc";
}
add_dtype_suffix(kernel_name, v_dst);
return VK_KERNEL_FROM_STR(kernel_name);
}

kernel_name = "nchw_to_image";
if (!push_constant_variant) {
kernel_name += "_no_pc";
}
add_storage_type_suffix(kernel_name, v_dst);
add_dtype_suffix(kernel_name, v_dst);

Expand Down
3 changes: 2 additions & 1 deletion backends/vulkan/runtime/graph/ops/utils/StagingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ namespace vkcompute {

vkapi::ShaderInfo get_nchw_to_tensor_shader(
const api::vTensor& v_dst,
bool int8_buffer_enabled = true);
bool int8_buffer_enabled = true,
bool push_constant_variant = true);
vkapi::ShaderInfo get_tensor_to_nchw_shader(
const api::vTensor& v_src,
bool int8_buffer_enabled = true);
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/test/utils/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void record_nchw_to_buffer_op(
vkapi::PipelineBarrier pipeline_barrier{};

context->submit_compute_job(
get_nchw_to_tensor_shader(v_dst),
get_nchw_to_tensor_shader(v_dst, true, false),
pipeline_barrier,
{uint32_t(v_dst.numel()), 1, 1},
{64, 1, 1},
Expand Down Expand Up @@ -74,7 +74,7 @@ void record_nchw_to_image_op(

context->submit_compute_job(
get_nchw_to_tensor_shader(
v_dst, context->adapter_ptr()->has_full_int8_buffers_support()),
v_dst, context->adapter_ptr()->has_full_int8_buffers_support(), false),
pipeline_barrier,
v_dst.logical_limits(),
adaptive_work_group_size(v_dst.logical_limits()),
Expand Down
6 changes: 2 additions & 4 deletions backends/vulkan/test/vulkan_compute_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1600,8 +1600,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
/*shared_object_idx = */ 4);

// +2: t.sizes_ubo() for each staging shader
// +2: staging buffer for each input tensor
expected_vma_allocation_count += 4;
expected_vma_allocation_count += 2;
EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);

ValueRef c = graph.add_tensor(
Expand All @@ -1621,8 +1620,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
/*shared_object_idx = */ 2);

// +1: t.sizes_ubo() uniform buffer for staging shader
// +1: staging buffer for the input tensor
expected_vma_allocation_count += 2;
expected_vma_allocation_count += 1;
EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);

ValueRef e = graph.add_tensor(
Expand Down
Loading