From bbe5426e3b0a388ac55d5280abc305d88a30be7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 26 Nov 2025 13:00:34 +0000 Subject: [PATCH 01/41] Separate layouts into separate entities for input, weight, and output tensors. --- .../include/ck_tile/builder/conv_factory.hpp | 126 ++++-- .../builder/conv_signature_concepts.hpp | 71 +++- .../builder/include/ck_tile/builder/types.hpp | 361 ++++++++++++++---- .../test/conv/test_ckb_conv_fwd_1d_fp16.cpp | 9 +- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 39 ++ .../test/impl/conv_algorithm_types.hpp | 8 + .../test/impl/conv_signature_types.hpp | 23 +- .../test/utils/ckb_conv_test_configs.hpp | 3 + ...wd_xdl_scaleadd_scaleadd_relu_instance.hpp | 8 +- 9 files changed, 525 insertions(+), 123 deletions(-) create mode 100644 experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp diff --git a/experimental/builder/include/ck_tile/builder/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/conv_factory.hpp index 6f8e50db15..c684d85bfa 100644 --- a/experimental/builder/include/ck_tile/builder/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_factory.hpp @@ -61,21 +61,53 @@ namespace ck_tile::builder::factory_internal { +template +consteval auto get_input_layout_value(ConvInputLayout layout) { + if constexpr (SPATIAL_DIM == 1) return layout._1d; + else if constexpr (SPATIAL_DIM == 2) return layout._2d; + else if constexpr (SPATIAL_DIM == 3) return layout._3d; + else static_assert(false, "Unsupported spatial dimension"); +} + +template +consteval auto get_weight_layout_value(ConvWeightLayout layout) { + if constexpr (SPATIAL_DIM == 1) return layout._1d; + else if constexpr (SPATIAL_DIM == 2) return layout._2d; + else if constexpr (SPATIAL_DIM == 3) return layout._3d; + else static_assert(false, "Unsupported spatial dimension"); +} + +template +consteval auto get_output_layout_value(ConvOutputLayout layout) { + if constexpr (SPATIAL_DIM == 1) return layout._1d; + else if constexpr (SPATIAL_DIM == 2) return layout._2d; + else if constexpr (SPATIAL_DIM == 3) return layout._3d; + else static_assert(false, "Unsupported spatial dimension"); +} + // Type mappings from the builder FwdGroupConvLayout enum classes to the CK tensor data types. -template - requires(ConvSpatialDim && ValidConvLayoutForSpatialDim) +template + requires(ConvSpatialDim + && ValidConvInputLayoutForSpatialDim + && ValidConvWeightLayoutForSpatialDim + && ValidConvOutputLayoutForSpatialDim) struct ConvTensorLayouts { // This will trigger if a specialization for the given layout is not found. // We should always catch this in an earlier validation check. - using Layout = decltype(LayoutValue); - static_assert(sizeof(Layout) == 0, + using InputLayout = decltype(InputLayoutValue); + using WeightLayout = decltype(WeightLayoutValue); + using OutputLayout = decltype(OutputLayoutValue); + static_assert(sizeof(InputLayout) == 0 && sizeof(WeightLayout) == 0 && sizeof(OutputLayout) == 0, "Internal error. Unsupported layout for convolution factory."); }; // 1D Forward Convolution Layout Specializations template <> -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::NWGC; using BLayout = ck::tensor_layout::convolution::GKXC; @@ -84,7 +116,10 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::NGCW; using BLayout = ck::tensor_layout::convolution::GKXC; @@ -93,7 +128,10 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::GNWC; using BLayout = ck::tensor_layout::convolution::GKXC; @@ -102,7 +140,10 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::NGCW; using BLayout = ck::tensor_layout::convolution::GKCX; @@ -110,8 +151,12 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::NGCHW; using BLayout = ck::tensor_layout::convolution::GKYXC; @@ -120,7 +165,10 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::NHWGC; using BLayout = ck::tensor_layout::convolution::GKYXC; @@ -129,7 +177,10 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::GNHWC; using BLayout = ck::tensor_layout::convolution::GKYXC; @@ -138,7 +189,10 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::NGCHW; using BLayout = ck::tensor_layout::convolution::GKCYX; @@ -146,8 +200,12 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::NGCDHW; using BLayout = ck::tensor_layout::convolution::GKCZYX; @@ -156,7 +214,10 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::NDHWGC; using BLayout = ck::tensor_layout::convolution::GKZYXC; @@ -165,7 +226,10 @@ struct ConvTensorLayouts -struct ConvTensorLayouts +struct ConvTensorLayouts { using ALayout = ck::tensor_layout::convolution::GNDHWC; using BLayout = ck::tensor_layout::convolution::GKZYXC; @@ -173,26 +237,24 @@ struct ConvTensorLayouts +template +consteval auto GetTensorLayoutInternal() +{ + return factory_internal::ConvTensorLayouts{}; +} + +template consteval auto GetTensorLayout() { + constexpr auto INPUT_LAYOUT = Layout.input_layout; + constexpr auto WEIGHT_LAYOUT = Layout.weight_layout; + constexpr auto OUTPUT_LAYOUT = Layout.output_layout; - if constexpr(SPATIAL_DIM == 1) - { - return factory_internal::ConvTensorLayouts{}; - } - else if constexpr(SPATIAL_DIM == 2) - { - return factory_internal::ConvTensorLayouts{}; - } - else if constexpr(SPATIAL_DIM == 3) - { - return factory_internal::ConvTensorLayouts{}; - } - else - { - static_assert(false, "Unsupported spatial dimension for convolution layout."); - } + return GetTensorLayoutInternal(); } // Type mappings from builder convolution data type to CK tensor types. diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index 05575590c4..110ffae3c8 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -28,20 +28,73 @@ namespace ck_tile::builder { template concept ConvSpatialDim = std::is_integral_v && (N == 1 || N == 2 || N == 3); -// Constraints for forward convolution layouts. -template -concept ValidConvLayoutForSpatialDim = - (SpatialDim == 1 && std::same_as) || - (SpatialDim == 2 && std::same_as) || - (SpatialDim == 3 && std::same_as); - // Constrains convolution data types to common floating-point types. template concept ConvDataType = (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); template -concept ConvLayout = std::same_as, GroupConvLayout>; +concept HasInputBiasLayout = requires(T t) { + { t.input_bias_layout }; +}; + +template +concept ConvertibleToArrayOfConvInputBiasLayout = + std::is_same_v>>; + +template +concept InputBiasLayoutWellDefinedIfProvided = requires(T t) { + requires !HasInputBiasLayout || requires { + { t.input_bias_layout } -> ConvertibleToArrayOfConvInputBiasLayout; + }; +}; + +template +concept HasOutputBiasLayout = requires(T t) { + { t.output_bias_layout }; +}; + +template +concept ConvertibleToArrayOfConvOutputBiasLayout = + std::is_same_v>>; + +template +concept OutputBiasLayoutWellDefinedIfProvided = requires(T t) { + requires !HasOutputBiasLayout || requires { + { t.output_bias_layout } -> ConvertibleToArrayOfConvOutputBiasLayout; + }; +}; + +template +concept ConvLayoutDescriptor = requires(T t) { + { t.input_layout } -> std::convertible_to; + { t.weight_layout } -> std::convertible_to; + { t.output_layout } -> std::convertible_to; + requires InputBiasLayoutWellDefinedIfProvided; + requires OutputBiasLayoutWellDefinedIfProvided; +}; + + +// Constraints for forward convolution input layouts. +template +concept ValidConvInputLayoutForSpatialDim = + (SpatialDim == 1 && std::same_as) || + (SpatialDim == 2 && std::same_as) || + (SpatialDim == 3 && std::same_as); + +// Constraints for forward convolution output layouts. +template +concept ValidConvOutputLayoutForSpatialDim = + (SpatialDim == 1 && std::same_as) || + (SpatialDim == 2 && std::same_as) || + (SpatialDim == 3 && std::same_as); + +// Constraints for forward convolution weight layouts. +template +concept ValidConvWeightLayoutForSpatialDim = + (SpatialDim == 1 && std::same_as) || + (SpatialDim == 2 && std::same_as) || + (SpatialDim == 3 && std::same_as); template concept HasElementwiseOp = requires(T t) { @@ -74,7 +127,7 @@ concept ConvolutionDirectionWellDefinedIfProvided = requires(T t) { template concept ConvSignatureDescriptor = requires(T t) { { t.spatial_dim } -> std::convertible_to; - { t.layout } -> ConvLayout; + { t.layout } -> ConvLayoutDescriptor; { t.data_type } -> std::convertible_to; requires ElementwiseOpWellDefinedIfProvided; requires ConvolutionDirectionWellDefinedIfProvided; diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 1aeb71af10..6cb3b7c500 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -19,51 +19,136 @@ enum class DataType U8 }; -// Memory layouts for 1D convolution tensors. -// G: Group, N: Batch, K: Output Channel, C: Input Channel, W: Width -// Enum defines Input, Weight, and Output tensor layouts respectively. -enum class GroupConvLayout1D -{ - GNWC_GKXC_GNWK, - NWGC_GKXC_NWGK, - NGCW_GKXC_NGKW, - NGCW_GKCX_NGKW +enum class ConvInputBiasLayout +{ + GC, + G_C_strided +}; + +enum class ConvOutputBiasLayout +{ + GK, + G_K_strided +}; + +enum class ConvInputLayout1D +{ + GNCW, + GNWC, + NWGC, + NGCW, + G_NW_C_strided +}; + +enum class ConvInputLayout2D +{ + GNCHW, + GNHWC, + NHWGC, + NGCHW, + G_NHW_C_strided +}; + +enum class ConvInputLayout3D +{ + GNCDHW, + GNDHWC, + NDHWGC, + NGCDHW, + G_NDHW_C_strided +}; + +struct ConvInputLayout +{ + union + { + ConvInputLayout1D _1d; + ConvInputLayout2D _2d; + ConvInputLayout3D _3d; + }; + + constexpr ConvInputLayout(ConvInputLayout1D layout) : _1d(layout) {} + constexpr ConvInputLayout(ConvInputLayout2D layout) : _2d(layout) {} + constexpr ConvInputLayout(ConvInputLayout3D layout) : _3d(layout) {} +}; + +enum class ConvWeightLayout1D +{ + GKXC, + GKCX, + KXGC, + G_K_X_C_strided +}; + +enum class ConvWeightLayout2D +{ + GKYXC, + GKCYX, + KYXGC, + G_K_YX_C_strided +}; + +enum class ConvWeightLayout3D +{ + GKZYXC, + GKCZYX, + KZYXGC, + G_K_ZYX_C_strided +}; + +struct ConvWeightLayout +{ + union + { + ConvWeightLayout1D _1d; + ConvWeightLayout2D _2d; + ConvWeightLayout3D _3d; + }; + + constexpr ConvWeightLayout(ConvWeightLayout1D layout) : _1d(layout) {} + constexpr ConvWeightLayout(ConvWeightLayout2D layout) : _2d(layout) {} + constexpr ConvWeightLayout(ConvWeightLayout3D layout) : _3d(layout) {} +}; + +enum class ConvOutputLayout1D +{ + GNKW, + GNWK, + NWGK, + NGKW, + G_NW_K_strided }; -// Memory layouts for 2D convolution tensors. -// G: Group, N: Batch, K: Output Channel, C: Input Channel, Y: Height, X: Width, H: Height -// Enum defines Input, Weight, and Output tensor layouts respectively. -enum class GroupConvLayout2D +enum class ConvOutputLayout2D { - GNHWC_GKYXC_GNHWK, - NHWGC_GKYXC_NHWGK, - NGCHW_GKYXC_NGKHW, - NGCHW_GKCYX_NGKHW + GNKHW, + GNHWK, + NHWGK, + NGKHW, + G_NHW_K_strided }; -// Memory layouts for 3D convolution tensors. -// G: Group, N: Batch, K: Output Channel, C: Input Channel, Z: Depth, Y: Height, X: Width, D: Depth, -// H: Height Enum defines Input, Weight, and Output tensor layouts respectively. -enum class GroupConvLayout3D +enum class ConvOutputLayout3D { - GNDHWC_GKZYXC_GNDHWK, - NDHWGC_GKZYXC_NDHWGK, - NGCDHW_GKZYXC_NGKDHW, - NGCDHW_GKCZYX_NGKDHW, + GNKDHW, + GNDHWK, + NDHWGK, + NGKDHW, + G_NDHW_K_strided }; -struct GroupConvLayout +struct ConvOutputLayout { union { - GroupConvLayout1D _1d; - GroupConvLayout2D _2d; - GroupConvLayout3D _3d; + ConvOutputLayout1D _1d; + ConvOutputLayout2D _2d; + ConvOutputLayout3D _3d; }; - constexpr GroupConvLayout(GroupConvLayout1D layout) : _1d(layout) {} - constexpr GroupConvLayout(GroupConvLayout2D layout) : _2d(layout) {} - constexpr GroupConvLayout(GroupConvLayout3D layout) : _3d(layout) {} + constexpr ConvOutputLayout(ConvOutputLayout1D layout) : _1d(layout) {} + constexpr ConvOutputLayout(ConvOutputLayout2D layout) : _2d(layout) {} + constexpr ConvOutputLayout(ConvOutputLayout3D layout) : _3d(layout) {} }; // Direction of the convolution operation. @@ -83,7 +168,8 @@ enum class ElementwiseOperation BILINEAR, CLAMP, SCALE, - PASS_THROUGH + PASS_THROUGH, + SCALEADD_SCALEADD_RELU }; // Enums for pipeline versions & schedulers @@ -206,45 +292,6 @@ inline std::ostream& operator<<(std::ostream& os, ConvDirection dir) } } -inline std::ostream& operator<<(std::ostream& os, GroupConvLayout1D layout) -{ - using enum GroupConvLayout1D; - switch(layout) - { - case GNWC_GKXC_GNWK: return os << "GNWC_GKXC_GNWK"; - case NWGC_GKXC_NWGK: return os << "NWGC_GKXC_NWGK"; - case NGCW_GKXC_NGKW: return os << "NGCW_GKXC_NGKW"; - case NGCW_GKCX_NGKW: return os << "NGCW_GKCX_NGKW"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, GroupConvLayout2D layout) -{ - using enum GroupConvLayout2D; - switch(layout) - { - case GNHWC_GKYXC_GNHWK: return os << "GNHWC_GKYXC_GNHWK"; - case NHWGC_GKYXC_NHWGK: return os << "NHWGC_GKYXC_NHWGK"; - case NGCHW_GKYXC_NGKHW: return os << "NGCHW_GKYXC_NGKHW"; - case NGCHW_GKCYX_NGKHW: return os << "NGCHW_GKCYX_NGKHW"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, GroupConvLayout3D layout) -{ - using enum GroupConvLayout3D; - switch(layout) - { - case GNDHWC_GKZYXC_GNDHWK: return os << "GNDHWC_GKZYXC_GNDHWK"; - case NDHWGC_GKZYXC_NDHWGK: return os << "NDHWGC_GKZYXC_NDHWGK"; - case NGCDHW_GKZYXC_NGKDHW: return os << "NGCDHW_GKZYXC_NGKDHW"; - case NGCDHW_GKCZYX_NGKDHW: return os << "NGCDHW_GKCZYX_NGKDHW"; - default: return os << "Unknown"; - } -} - inline std::ostream& operator<<(std::ostream& os, ElementwiseOperation op) { using enum ElementwiseOperation; @@ -257,6 +304,7 @@ inline std::ostream& operator<<(std::ostream& os, ElementwiseOperation op) case CLAMP: return os << "CLAMP"; case SCALE: return os << "SCALE"; case PASS_THROUGH: return os << "PASS_THROUGH"; + case SCALEADD_SCALEADD_RELU: return os << "SCALEADD_SCALEADD_RELU"; default: return os << "Unknown"; } } @@ -375,10 +423,171 @@ inline std::ostream& operator<<(std::ostream& os, PipelineScheduler sched) } } -// ostream operator overload for std::variant of layout types -inline std::ostream& -operator<<(std::ostream& os, - const std::variant& layout) +inline std::ostream& operator<<(std::ostream& os, ConvInputLayout1D layout) +{ + using enum ConvInputLayout1D; + switch(layout) + { + case GNCW: return os << "GNCW"; + case GNWC: return os << "GNWC"; + case NWGC: return os << "NWGC"; + case NGCW: return os << "NGCW"; + case G_NW_C_strided: return os << "G_NW_C_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvInputLayout2D layout) +{ + using enum ConvInputLayout2D; + switch(layout) + { + case GNCHW: return os << "GNCHW"; + case GNHWC: return os << "GNHWC"; + case NHWGC: return os << "NHWGC"; + case NGCHW: return os << "NGCHW"; + case G_NHW_C_strided: return os << "G_NHW_C_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvInputLayout3D layout) +{ + using enum ConvInputLayout3D; + switch(layout) + { + case GNCDHW: return os << "GNCDHW"; + case GNDHWC: return os << "GNDHWC"; + case NDHWGC: return os << "NDHWGC"; + case NGCDHW: return os << "NGCDHW"; + case G_NDHW_C_strided: return os << "G_NDHW_C_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvWeightLayout1D layout) +{ + using enum ConvWeightLayout1D; + switch(layout) + { + case GKXC: return os << "GKXC"; + case GKCX: return os << "GKCX"; + case KXGC: return os << "KXGC"; + case G_K_X_C_strided: return os << "G_K_X_C_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvWeightLayout2D layout) +{ + using enum ConvWeightLayout2D; + switch(layout) + { + case GKYXC: return os << "GKYXC"; + case GKCYX: return os << "GKCYX"; + case KYXGC: return os << "KYXGC"; + case G_K_YX_C_strided: return os << "G_K_YX_C_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvWeightLayout3D layout) +{ + using enum ConvWeightLayout3D; + switch(layout) + { + case GKZYXC: return os << "GKZYXC"; + case GKCZYX: return os << "GKCZYX"; + case KZYXGC: return os << "KZYXGC"; + case G_K_ZYX_C_strided: return os << "G_K_ZYX_C_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvOutputLayout1D layout) +{ + using enum ConvOutputLayout1D; + switch(layout) + { + case GNKW: return os << "GNKW"; + case GNWK: return os << "GNWK"; + case NWGK: return os << "NWGK"; + case NGKW: return os << "NGKW"; + case G_NW_K_strided: return os << "G_NW_K_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvOutputLayout2D layout) +{ + using enum ConvOutputLayout2D; + switch(layout) + { + case GNKHW: return os << "GNKHW"; + case GNHWK: return os << "GNHWK"; + case NHWGK: return os << "NHWGK"; + case NGKHW: return os << "NGKHW"; + case G_NHW_K_strided: return os << "G_NHW_K_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvOutputLayout3D layout) +{ + using enum ConvOutputLayout3D; + switch(layout) + { + case GNKDHW: return os << "GNKDHW"; + case GNDHWK: return os << "GNDHWK"; + case NDHWGK: return os << "NDHWGK"; + case NGKDHW: return os << "NGKDHW"; + case G_NDHW_K_strided: return os << "G_NDHW_K_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvInputBiasLayout layout) +{ + using enum ConvInputBiasLayout; + switch(layout) + { + case GC: return os << "GC"; + case G_C_strided: return os << "G_C_strided"; + default: return os << "Unknown"; + } +} + +inline std::ostream& operator<<(std::ostream& os, ConvOutputBiasLayout layout) +{ + using enum ConvOutputBiasLayout; + switch(layout) + { + case GK: return os << "GK"; + case G_K_strided: return os << "G_K_strided"; + default: return os << "Unknown"; + } +} + + +inline std::ostream& operator<<(std::ostream& os, const std::variant& layout) +{ + std::visit([&os](const auto& l) { os << l; }, layout); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const std::variant& layout) +{ + std::visit([&os](const auto& l) { os << l; }, layout); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const std::variant& layout) { std::visit([&os](const auto& l) { os << l; }, layout); return os; diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp index 3315eb6f64..e3bacdb204 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp @@ -12,9 +12,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_1D_FP16_ChannelsFirst_scale) { + constexpr ConvLayout<> FwdConvLayout + { + .input_layout = ConvInputLayout1D::NWGC, + .weight_layout = ConvWeightLayout1D::GKXC, + .output_layout = ConvOutputLayout1D::NWGK + }; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout1D::NWGC_GKXC_NWGK, + .layout = FwdConvLayout, .data_type = DataType::FP16, .elementwise_operation = ElementwiseOperation::PASS_THROUGH}; diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp new file mode 100644 index 0000000000..245c4e31fb --- /dev/null +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -0,0 +1,39 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "utils/ckb_conv_test_configs.hpp" +#include "utils/ckb_conv_test_utils.hpp" + +namespace { + +using namespace ck_tile::builder::test_utils; + +// TEST(FwdConvInstances, +// Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_BF16_ChannelsLast) +// { +// constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, +// .direction = ConvDirection::FORWARD, +// .layout = GroupConvLayout2D::NHWGC_GKYXC_NHWGK, +// .data_type = DataType::BF16, +// .elementwise_operation = +// ElementwiseOperation::PASS_THROUGH}; + +// constexpr auto FwdConvAlgorithm = +// ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} +// .with_thread_block(FwdThreadBlock_64_64x32x32) +// .with_gemm_config(FwdGemmParams_Xdl_2x2_per_wave) +// .with_transfer(FwdTransfer_4x16x1) +// .with_specializations(ConvFwdSpecialization::DEFAULT, GemmSpecialization::MNKPadding) +// .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT) +// .with_elementwise_ops({ElementwiseOperation::PASS_THROUGH, ElementwiseOperation::PASS_THROUGH, +// ElementwiseOperation::SCALEADD_SCALEADD_RELU}); + +// using Builder = ConvBuilder; +// run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", +// "256, 256, 256, 32", +// "Default", +// "BlkGemmPipelineScheduler: Intrawave", +// "BlkGemmPipelineVersion: v1"}); +// } + +} // namespace diff --git a/experimental/builder/test/impl/conv_algorithm_types.hpp b/experimental/builder/test/impl/conv_algorithm_types.hpp index 3331bf204f..082b0ccbbc 100644 --- a/experimental/builder/test/impl/conv_algorithm_types.hpp +++ b/experimental/builder/test/impl/conv_algorithm_types.hpp @@ -339,6 +339,14 @@ struct ConvAlgorithmTemplate : Components... result.transfer = t; return result; } + + template + constexpr auto with_elementwise_operation(const OP& op) const + { + auto result = *this; + result.element_op = op; + return result; + } }; // Algorithm types diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index f18abb1c8d..efd262b551 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -10,6 +10,28 @@ namespace ck_tile::builder::test { using namespace ck_tile::builder; +template +struct ConvInputBiasLayouts +{ + std::array input_bias_layout{InputBiasLayouts...}; +}; + +template +struct ConvOutputBiasLayouts +{ + std::array output_bias_layout{OutputBiasLayouts...}; +}; + +template +struct ConvLayout : BiasTensorLayouts... +{ + ConvInputLayout input_layout; + ConvWeightLayout weight_layout; + ConvOutputLayout output_layout; + +}; + +template struct ConvSignature { int spatial_dim; @@ -18,6 +40,5 @@ struct ConvSignature DataType data_type; ElementwiseOperation elementwise_operation; }; -static_assert(ConvSignatureDescriptor); } // namespace ck_tile::builder::test diff --git a/experimental/builder/test/utils/ckb_conv_test_configs.hpp b/experimental/builder/test/utils/ckb_conv_test_configs.hpp index 7384603854..da88b98051 100644 --- a/experimental/builder/test/utils/ckb_conv_test_configs.hpp +++ b/experimental/builder/test/utils/ckb_conv_test_configs.hpp @@ -178,6 +178,9 @@ constexpr GridwiseXdlGemm FwdGemmParams_Xdl_4x4_per_wave{ constexpr GridwiseXdlGemm FwdGemmParams_Xdl_4x2_per_wave{ .ak1 = 8, .bk1 = 8, .m_per_xdl = 32, .n_per_xdl = 32, .m_xdl_per_wave = 4, .n_xdl_per_wave = 2}; +constexpr GridwiseXdlGemm FwdGemmParams_Xdl_2x2_per_wave{ + .ak1 = 8, .bk1 = 8, .m_per_xdl = 32, .n_per_xdl = 32, .m_xdl_per_wave = 2, .n_xdl_per_wave = 2}; + constexpr GridwiseXdlGemm FwdGemmParams_Xdl_2x1_per_wave{ .ak1 = 8, .bk1 = 8, .m_per_xdl = 32, .n_per_xdl = 32, .m_xdl_per_wave = 2, .n_xdl_per_wave = 1}; diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp index defe8985a9..8051b29c19 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp @@ -45,10 +45,10 @@ template using device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_bf16_instances = std::tuple< // clang-format off - //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| - //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector| - //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | Stage| | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerXdl| _NWaveNPerXdl| - //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| + //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector| + //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | Stage| | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerXdl| _NWaveNPerXdl| + //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | // generic instance DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle, BF16, PassThrough, PassThrough, ScaleAddScaleAddRelu, ConvSpec, GemmMNKPadding, 1, 64, 64, 64, 32, 8, 8, 32, 32, 2, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 8, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 8, 1, 1, 1, S<1, 16, 1, 4>, 1>, // instances for small conv.K and conv.C From 6d7fdf6065209d1711b1438c2fff13a5793f50a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 26 Nov 2025 14:07:04 +0000 Subject: [PATCH 02/41] Add test for handling bias tensor layouts. --- .../builder/conv_signature_concepts.hpp | 4 +- .../builder/include/ck_tile/builder/types.hpp | 74 +++++++++++++------ ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 63 +++++++++------- .../test/impl/conv_signature_types.hpp | 1 - 4 files changed, 89 insertions(+), 53 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index 110ffae3c8..2f9c4c8004 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -40,7 +40,7 @@ concept HasInputBiasLayout = requires(T t) { template concept ConvertibleToArrayOfConvInputBiasLayout = - std::is_same_v>>; + std::is_same_v, std::array>>>; template concept InputBiasLayoutWellDefinedIfProvided = requires(T t) { @@ -56,7 +56,7 @@ concept HasOutputBiasLayout = requires(T t) { template concept ConvertibleToArrayOfConvOutputBiasLayout = - std::is_same_v>>; + std::is_same_v, std::array>>>; template concept OutputBiasLayoutWellDefinedIfProvided = requires(T t) { diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 6cb3b7c500..4eaad96647 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -19,13 +19,13 @@ enum class DataType U8 }; -enum class ConvInputBiasLayout +enum class InputBiasLayout { GC, G_C_strided }; -enum class ConvOutputBiasLayout +enum class OutputBiasLayout { GK, G_K_strided @@ -72,6 +72,20 @@ struct ConvInputLayout constexpr ConvInputLayout(ConvInputLayout3D layout) : _3d(layout) {} }; +struct ConvInputBiasLayout +{ + union { + InputBiasLayout _input_bias_layout; + ConvInputLayout _conv_input_layout; + }; + + constexpr ConvInputBiasLayout(InputBiasLayout layout) : _input_bias_layout(layout) {} + constexpr ConvInputBiasLayout(ConvInputLayout layout) : _conv_input_layout(layout) {} + constexpr ConvInputBiasLayout(ConvInputLayout1D layout) : _conv_input_layout(layout) {} + constexpr ConvInputBiasLayout(ConvInputLayout2D layout) : _conv_input_layout(layout) {} + constexpr ConvInputBiasLayout(ConvInputLayout3D layout) : _conv_input_layout(layout) {} +}; + enum class ConvWeightLayout1D { GKXC, @@ -151,6 +165,20 @@ struct ConvOutputLayout constexpr ConvOutputLayout(ConvOutputLayout3D layout) : _3d(layout) {} }; +struct ConvOutputBiasLayout +{ + union { + OutputBiasLayout _output_bias_layout; + ConvOutputLayout _conv_output_layout; + }; + + constexpr ConvOutputBiasLayout(OutputBiasLayout layout) : _output_bias_layout(layout) {} + constexpr ConvOutputBiasLayout(ConvOutputLayout layout) : _conv_output_layout(layout) {} + constexpr ConvOutputBiasLayout(ConvOutputLayout1D layout) : _conv_output_layout(layout) {} + constexpr ConvOutputBiasLayout(ConvOutputLayout2D layout) : _conv_output_layout(layout) {} + constexpr ConvOutputBiasLayout(ConvOutputLayout3D layout) : _conv_output_layout(layout) {} +}; + // Direction of the convolution operation. enum class ConvDirection { @@ -546,27 +574,27 @@ inline std::ostream& operator<<(std::ostream& os, ConvOutputLayout3D layout) } } -inline std::ostream& operator<<(std::ostream& os, ConvInputBiasLayout layout) -{ - using enum ConvInputBiasLayout; - switch(layout) - { - case GC: return os << "GC"; - case G_C_strided: return os << "G_C_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, ConvOutputBiasLayout layout) -{ - using enum ConvOutputBiasLayout; - switch(layout) - { - case GK: return os << "GK"; - case G_K_strided: return os << "G_K_strided"; - default: return os << "Unknown"; - } -} +// inline std::ostream& operator<<(std::ostream& os, ConvInputBiasLayout layout) +// { +// using enum ConvInputBiasLayout; +// switch(layout) +// { +// case GC: return os << "GC"; +// case G_C_strided: return os << "G_C_strided"; +// default: return os << "Unknown"; +// } +// } + +// inline std::ostream& operator<<(std::ostream& os, ConvOutputBiasLayout layout) +// { +// using enum ConvOutputBiasLayout; +// switch(layout) +// { +// case GK: return os << "GK"; +// case G_K_strided: return os << "G_K_strided"; +// default: return os << "Unknown"; +// } +// } inline std::ostream& operator<<(std::ostream& os, const std::variant; -// run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", -// "256, 256, 256, 32", -// "Default", -// "BlkGemmPipelineScheduler: Intrawave", -// "BlkGemmPipelineVersion: v1"}); -// } +TEST(FwdConvInstances, + Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_BF16_scale_add_relu) +{ + constexpr auto G_K = OutputBiasLayout::G_K_strided; + constexpr auto NHWGK = ConvOutputLayout2D::NHWGK; + using FwdOutputBiasLayouts = ConvOutputBiasLayouts; + + constexpr ConvLayout FwdConvLayout + { + .input_layout = ConvInputLayout2D::NHWGC, + .weight_layout = ConvWeightLayout2D::GKYXC, + .output_layout = ConvOutputLayout2D::NHWGK + }; + + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .layout = FwdConvLayout, + .data_type = DataType::BF16, + .elementwise_operation = + ElementwiseOperation::SCALEADD_SCALEADD_RELU}; + + constexpr auto FwdConvAlgorithm = + ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} + .with_thread_block(FwdThreadBlock_64_64x32x32) + .with_gemm_config(FwdGemmParams_Xdl_2x2_per_wave) + .with_transfer(FwdTransfer_4x16x1) + .with_specializations(ConvFwdSpecialization::DEFAULT, GemmSpecialization::MNKPadding) + .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT); + + using Builder = ConvBuilder; + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "64,64,32,32", + "MNKPadding", + "Default"}); +} } // namespace diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index efd262b551..6241d2c9d0 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -28,7 +28,6 @@ struct ConvLayout : BiasTensorLayouts... ConvInputLayout input_layout; ConvWeightLayout weight_layout; ConvOutputLayout output_layout; - }; template From 626610f460ac616528d543e6101a4c550b3e1867 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 26 Nov 2025 14:07:20 +0000 Subject: [PATCH 03/41] Use instance string in builder tests. --- experimental/builder/test/utils/ckb_conv_test_utils.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/builder/test/utils/ckb_conv_test_utils.hpp b/experimental/builder/test/utils/ckb_conv_test_utils.hpp index f3db734da8..508c621c2e 100644 --- a/experimental/builder/test/utils/ckb_conv_test_utils.hpp +++ b/experimental/builder/test/utils/ckb_conv_test_utils.hpp @@ -15,7 +15,7 @@ constexpr void run_test(const std::vector& kernel_instance_componen { auto instance = typename Builder::Instance{}; - const auto kernel_string = instance.GetTypeString(); + const auto kernel_string = instance.GetInstanceString(); std::cout << "Generated kernel: " << kernel_string << std::endl; EXPECT_GT(kernel_string.size(), 0); From edd180a8b6cd7eb1e6b9340bb0a5868c810a8d08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 26 Nov 2025 15:16:15 +0000 Subject: [PATCH 04/41] Add handling of output bias data types and layouts. --- .../include/ck_tile/builder/conv_factory.hpp | 102 +++++++++++++----- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 1 + 2 files changed, 76 insertions(+), 27 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/conv_factory.hpp index c684d85bfa..8132e37fa5 100644 --- a/experimental/builder/include/ck_tile/builder/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_factory.hpp @@ -85,7 +85,51 @@ consteval auto get_output_layout_value(ConvOutputLayout layout) { else static_assert(false, "Unsupported spatial dimension"); } -// Type mappings from the builder FwdGroupConvLayout enum classes to the CK tensor data types. +struct EmptyOutputBiasLayout +{ + using DsLayout = ck::Tuple<>; + using DsDataTypes = ck::Tuple<>; +}; + +// Type mappings from the builder ConvOutputBiasLayout enum classes to the CK tensor data types. +template + requires(ConvSpatialDim) +struct ConvOutputBiasTensorLayouts +{ + // This will trigger if a specialization for the given layout is not found. + // We should always catch this in an earlier validation check. + using OutputBiasLayout = decltype(OutputBiasLayoutValue); + static_assert(sizeof(OutputBiasLayout) == 0, + "Internal error. Unsupported layout for convolution factory."); +}; + +constexpr std::array NHWGK_G_K_STRIDED_LAYOUT = { + ConvOutputBiasLayout{ConvOutputLayout2D::NHWGK}, + ConvOutputBiasLayout{OutputBiasLayout::G_K_strided} +}; + +template<> +struct ConvOutputBiasTensorLayouts +{ + using DsLayout = ck::Tuple; + using DsDataTypes = ck::Tuple; +}; + +template +requires (HasOutputBiasLayout) +consteval auto GetOutputBiasTensorLayout() +{ + return factory_internal::ConvOutputBiasTensorLayouts{}; +} + +template +requires (!HasOutputBiasLayout) +consteval auto GetOutputBiasTensorLayout() +{ + return EmptyOutputBiasLayout{}; +} + +// Type mappings from the builder ConvLayout enum classes to the CK tensor data types. template requires(ConvSpatialDim && ValidConvInputLayoutForSpatialDim @@ -111,7 +155,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::NWGK; }; @@ -123,7 +166,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::NGKW; }; @@ -135,7 +177,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::GNWK; }; @@ -147,7 +188,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::NGKW; }; @@ -160,7 +200,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::NGKHW; }; @@ -172,7 +211,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::NHWGK; }; @@ -184,7 +222,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::GNHWK; }; @@ -196,7 +233,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::NGKHW; }; @@ -209,7 +245,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::NGKDHW; }; @@ -221,7 +256,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::NDHWGK; }; @@ -233,7 +267,6 @@ struct ConvTensorLayouts; using ELayout = ck::tensor_layout::convolution::GNDHWK; }; @@ -275,7 +308,6 @@ struct ConvTensorTypes using BDataType = ck::half_t; using BComputeType = ck::half_t; using CShuffleDataType = ck::half_t; - using DsDataTypes = ck::Tuple<>; using AccDataType = float; using EDataType = ck::half_t; }; @@ -288,7 +320,6 @@ struct ConvTensorTypes using BDataType = ck::bhalf_t; using BComputeType = ck::bhalf_t; using CShuffleDataType = ck::bhalf_t; - using DsDataTypes = ck::Tuple<>; using AccDataType = float; using EDataType = ck::bhalf_t; }; @@ -301,7 +332,6 @@ struct ConvTensorTypes using BDataType = float; using BComputeType = float; using CShuffleDataType = float; - using DsDataTypes = ck::Tuple<>; using AccDataType = float; using EDataType = float; }; @@ -314,7 +344,6 @@ struct ConvTensorTypes using BDataType = int8_t; using BComputeType = int8_t; using CShuffleDataType = int8_t; - using DsDataTypes = ck::Tuple<>; using AccDataType = int32_t; using EDataType = int8_t; }; @@ -327,7 +356,6 @@ struct ConvTensorTypes using BDataType = ck::f8_t; using BComputeType = ck::f8_t; using CShuffleDataType = ck::f8_t; - using DsDataTypes = ck::Tuple<>; using AccDataType = float; using EDataType = ck::f8_t; }; @@ -606,6 +634,10 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); + using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); + using Types = factory_internal::ConvTensorTypes; using Ops = factory_internal::ElementwiseOps()>; using AlgorithmType = decltype(ALGORITHM); @@ -647,13 +679,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename OutputBiasLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename Types::DsDataTypes, + typename OutputBiasLayouts::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -708,6 +740,10 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); + using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); + using Types = factory_internal::ConvTensorTypes; using Ops = factory_internal::ElementwiseOps()>; using AlgorithmType = decltype(ALGORITHM); @@ -744,13 +780,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename OutputBiasLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename Types::DsDataTypes, + typename OutputBiasLayouts::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -805,6 +841,10 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); + using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); + using Types = factory_internal::ConvTensorTypes; using Ops = factory_internal::ElementwiseOps()>; using AlgorithmType = decltype(ALGORITHM); @@ -843,13 +883,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename OutputBiasLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename Types::DsDataTypes, + typename OutputBiasLayouts::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -901,6 +941,10 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); + using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); + using Types = factory_internal::ConvTensorTypes; using Ops = factory_internal::ElementwiseOps()>; using AlgorithmType = decltype(ALGORITHM); @@ -969,12 +1013,12 @@ struct ConvFactory SPATIAL_DIM, typename Types::ADataType, typename Types::BDataType, - typename Types::DsDataTypes, + typename OutputBiasLayouts::DsDataTypes, typename Types::EDataType, typename Types::AccDataType, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename OutputBiasLayouts::DsLayout, typename Layouts::ELayout, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -1024,6 +1068,10 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); + using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); + using Types = factory_internal::ConvTensorTypes; using Ops = factory_internal::ElementwiseOps()>; using AlgorithmType = decltype(ALGORITHM); @@ -1062,13 +1110,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename OutputBiasLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename Types::DsDataTypes, + typename OutputBiasLayouts::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index e6a9ad0549..48b7c522d8 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -40,6 +40,7 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "NHWGC,GKYXC,NHWGK,G_K,NHWGK", // Check layouts "64,64,32,32", "MNKPadding", "Default"}); From 0bd50a577165f29ccc8b66c29b4af3204d00be98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 26 Nov 2025 15:49:44 +0000 Subject: [PATCH 05/41] Generalize handling of the elementwise ops. --- .../include/ck_tile/builder/conv_factory.hpp | 49 +++++++++++++++---- .../builder/conv_signature_concepts.hpp | 9 +++- .../ck_tile/builder/conv_signature_utils.hpp | 3 +- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 4 +- .../test/impl/conv_signature_types.hpp | 9 +++- 5 files changed, 60 insertions(+), 14 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/conv_factory.hpp index 8132e37fa5..473ac662e0 100644 --- a/experimental/builder/include/ck_tile/builder/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_factory.hpp @@ -360,17 +360,17 @@ struct ConvTensorTypes using EDataType = ck::f8_t; }; -template +template struct ElementwiseOps { - // This will trigger if a specialization for the given DataType is not found. + // This will trigger if a specialization for the given ElementwiseOps combination is not found. // We should always catch this in an earlier validation check. - static_assert(sizeof(UnsupportedEnumValue) == 0, + static_assert(sizeof(UnsupportedEnumValue) == 0, "Internal error. Unsupported elementwise operation for convolution factory."); }; template <> -struct ElementwiseOps +struct ElementwiseOps { using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; @@ -378,13 +378,42 @@ struct ElementwiseOps }; template <> -struct ElementwiseOps +struct ElementwiseOps { using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; using CDEElementwiseOp = ck::tensor_operation::element_wise::Scale; }; +template <> +struct ElementwiseOps +{ + using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; + using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; + using CDEElementwiseOp = ck::tensor_operation::element_wise::ScaleAddScaleAddRelu; +}; + +struct PassThroughOp +{ + using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; + using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; + using CDEElementwiseOp = ck::tensor_operation::element_wise::PassThrough; +}; + +template +requires (HasElementwiseOp) +constexpr auto GetElementwiseOp() +{ + return ElementwiseOps{}; +} + +template +requires (!HasElementwiseOp) +constexpr auto GetElementwiseOp() +{ + return PassThroughOp{}; +} + // The algorithm specializations for the convolution and GEMM. template requires( @@ -639,7 +668,7 @@ struct ConvFactory ConvDirection::FORWARD>()); using Types = factory_internal::ConvTensorTypes; - using Ops = factory_internal::ElementwiseOps()>; + using Ops = decltype(factory_internal::GetElementwiseOp()); using AlgorithmType = decltype(ALGORITHM); static_assert(ALGORITHM.transfer.a.lds_transfer.is_direct_load == @@ -745,7 +774,7 @@ struct ConvFactory ConvDirection::FORWARD>()); using Types = factory_internal::ConvTensorTypes; - using Ops = factory_internal::ElementwiseOps()>; + using Ops = decltype(factory_internal::GetElementwiseOp()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = @@ -846,7 +875,7 @@ struct ConvFactory ConvDirection::FORWARD>()); using Types = factory_internal::ConvTensorTypes; - using Ops = factory_internal::ElementwiseOps()>; + using Ops = decltype(factory_internal::GetElementwiseOp()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = @@ -946,7 +975,7 @@ struct ConvFactory ConvDirection::FORWARD>()); using Types = factory_internal::ConvTensorTypes; - using Ops = factory_internal::ElementwiseOps()>; + using Ops = decltype(factory_internal::GetElementwiseOp()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = @@ -1073,7 +1102,7 @@ struct ConvFactory ConvDirection::FORWARD>()); using Types = factory_internal::ConvTensorTypes; - using Ops = factory_internal::ElementwiseOps()>; + using Ops = decltype(factory_internal::GetElementwiseOp()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto BASE_ALGORITHM = ALGORITHM.base_algorithm; diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index 2f9c4c8004..088ad50156 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -106,12 +106,19 @@ concept HasConvolutionDirection = requires(T t) { { t.direction }; }; +template +concept ElementwiseOperationDescriptor = requires(T t) { + { t.input_op } -> std::convertible_to; + { t.weight_op } -> std::convertible_to; + { t.output_op } -> std::convertible_to; +}; + // Note: it is not required to provide an ElementwiseOp, but if one is provided, check if well // defined template concept ElementwiseOpWellDefinedIfProvided = requires(T t) { requires !HasElementwiseOp || requires { - { t.elementwise_operation } -> std::convertible_to; + { t.elementwise_operation } -> ElementwiseOperationDescriptor; }; }; diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp index 65a4b60588..7ceffa2c11 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp @@ -20,9 +20,10 @@ template concept ProvidesConvolutionDirection = requires { Sig.direction; }; template +requires (HasElementwiseOp) constexpr auto get_elementwise_operation() { - if constexpr(ProvidesElementwiseOperation) + if constexpr(HasElementwiseOp) { return Sig.elementwise_operation; } diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 48b7c522d8..2d765d6abf 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -28,7 +28,8 @@ TEST(FwdConvInstances, .layout = FwdConvLayout, .data_type = DataType::BF16, .elementwise_operation = - ElementwiseOperation::SCALEADD_SCALEADD_RELU}; + { .output_op = ElementwiseOperation::SCALEADD_SCALEADD_RELU} + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} @@ -41,6 +42,7 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", "NHWGC,GKYXC,NHWGK,G_K,NHWGK", // Check layouts + "PassThrough,PassThrough,ScaleAddScaleAddRelu", // Check elementwise ops "64,64,32,32", "MNKPadding", "Default"}); diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index 6241d2c9d0..cc777d0409 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -30,6 +30,13 @@ struct ConvLayout : BiasTensorLayouts... ConvOutputLayout output_layout; }; +struct ElementwiseOperations +{ + ElementwiseOperation input_op{ElementwiseOperation::PASS_THROUGH}; + ElementwiseOperation weight_op{ElementwiseOperation::PASS_THROUGH}; + ElementwiseOperation output_op{ElementwiseOperation::PASS_THROUGH}; +}; + template struct ConvSignature { @@ -37,7 +44,7 @@ struct ConvSignature ConvDirection direction; GroupConvLayout layout; DataType data_type; - ElementwiseOperation elementwise_operation; + ElementwiseOperations elementwise_operation; }; } // namespace ck_tile::builder::test From c7c814f27d9806d218fb3f624e0e3ccae145dc19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 26 Nov 2025 15:52:31 +0000 Subject: [PATCH 06/41] Test fix. --- .../test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 2d765d6abf..026a6f69a0 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -41,7 +41,7 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", - "NHWGC,GKYXC,NHWGK,G_K,NHWGK", // Check layouts + "NHWGC,GKYXC,Tuple(NHWGK,G_K),NHWGK", // Check layouts "PassThrough,PassThrough,ScaleAddScaleAddRelu", // Check elementwise ops "64,64,32,32", "MNKPadding", From 809c8b4ce13407c1fbd0418ec96a48819e528fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 27 Nov 2025 08:04:27 +0000 Subject: [PATCH 07/41] Create builder for layouts. --- .../builder/include/ck_tile/builder/types.hpp | 11 ++++ ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 17 +++--- .../test/impl/conv_signature_types.hpp | 58 +++++++++++++++++++ 3 files changed, 78 insertions(+), 8 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 4eaad96647..611594f22b 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -58,6 +58,11 @@ enum class ConvInputLayout3D G_NDHW_C_strided }; +enum class UndefinedLayout +{ + None +}; + struct ConvInputLayout { union @@ -65,8 +70,10 @@ struct ConvInputLayout ConvInputLayout1D _1d; ConvInputLayout2D _2d; ConvInputLayout3D _3d; + UndefinedLayout _undefined; }; + constexpr ConvInputLayout() : _undefined(UndefinedLayout::None) {} constexpr ConvInputLayout(ConvInputLayout1D layout) : _1d(layout) {} constexpr ConvInputLayout(ConvInputLayout2D layout) : _2d(layout) {} constexpr ConvInputLayout(ConvInputLayout3D layout) : _3d(layout) {} @@ -117,8 +124,10 @@ struct ConvWeightLayout ConvWeightLayout1D _1d; ConvWeightLayout2D _2d; ConvWeightLayout3D _3d; + UndefinedLayout _undefined; }; + constexpr ConvWeightLayout() : _undefined(UndefinedLayout::None) {} constexpr ConvWeightLayout(ConvWeightLayout1D layout) : _1d(layout) {} constexpr ConvWeightLayout(ConvWeightLayout2D layout) : _2d(layout) {} constexpr ConvWeightLayout(ConvWeightLayout3D layout) : _3d(layout) {} @@ -158,8 +167,10 @@ struct ConvOutputLayout ConvOutputLayout1D _1d; ConvOutputLayout2D _2d; ConvOutputLayout3D _3d; + UndefinedLayout _undefined; }; + constexpr ConvOutputLayout() : _undefined(UndefinedLayout::None) {} constexpr ConvOutputLayout(ConvOutputLayout1D layout) : _1d(layout) {} constexpr ConvOutputLayout(ConvOutputLayout2D layout) : _2d(layout) {} constexpr ConvOutputLayout(ConvOutputLayout3D layout) : _3d(layout) {} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 026a6f69a0..7990f4f884 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -14,14 +14,15 @@ TEST(FwdConvInstances, { constexpr auto G_K = OutputBiasLayout::G_K_strided; constexpr auto NHWGK = ConvOutputLayout2D::NHWGK; - using FwdOutputBiasLayouts = ConvOutputBiasLayouts; - - constexpr ConvLayout FwdConvLayout - { - .input_layout = ConvInputLayout2D::NHWGC, - .weight_layout = ConvWeightLayout2D::GKYXC, - .output_layout = ConvOutputLayout2D::NHWGK - }; + constexpr auto FwdOutputBiasLayouts = ConvOutputBiasLayouts{} + .with_layout() + .with_layout(); + + constexpr auto FwdConvLayout = ConvLayout{} + .with_input_layout() + .with_weight_layout() + .with_output_layout() + .with_output_bias_layouts(FwdOutputBiasLayouts); constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, .direction = ConvDirection::FORWARD, diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index cc777d0409..00397765ec 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -14,12 +14,36 @@ template struct ConvInputBiasLayouts { std::array input_bias_layout{InputBiasLayouts...}; + + template + constexpr auto with_layout() const + { + return ConvInputBiasLayouts{}; + } + + template + constexpr auto with_layout() const + { + return ConvInputBiasLayouts{}; + } }; template struct ConvOutputBiasLayouts { std::array output_bias_layout{OutputBiasLayouts...}; + + template + constexpr auto with_layout() const + { + return ConvOutputBiasLayouts{}; + } + + template + constexpr auto with_layout() const + { + return ConvOutputBiasLayouts{}; + } }; template @@ -28,6 +52,40 @@ struct ConvLayout : BiasTensorLayouts... ConvInputLayout input_layout; ConvWeightLayout weight_layout; ConvOutputLayout output_layout; + + template + constexpr auto with_input_layout() const + { + auto result = *this; + result.input_layout = Layout; + return result; + } + + template + constexpr auto with_weight_layout() const + { + auto result = *this; + result.weight_layout = Layout; + return result; + } + + template + constexpr auto with_output_layout() const + { + auto result = *this; + result.output_layout = Layout; + return result; + } + + template + constexpr auto with_output_bias_layouts(const OutputBiasLayouts&) const + { + return ConvLayout{ + .input_layout = this->input_layout, + .weight_layout = this->weight_layout, + .output_layout = this->output_layout + }; + } }; struct ElementwiseOperations From f1cff54b44d7fa842c4186b8c6ebc26edc874eac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 27 Nov 2025 11:12:05 +0000 Subject: [PATCH 08/41] Layout builder improvements. --- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 18 ++--- .../test/impl/conv_signature_types.hpp | 73 +++---------------- 2 files changed, 21 insertions(+), 70 deletions(-) diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 7990f4f884..0c72633b74 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -14,15 +14,15 @@ TEST(FwdConvInstances, { constexpr auto G_K = OutputBiasLayout::G_K_strided; constexpr auto NHWGK = ConvOutputLayout2D::NHWGK; - constexpr auto FwdOutputBiasLayouts = ConvOutputBiasLayouts{} - .with_layout() - .with_layout(); - - constexpr auto FwdConvLayout = ConvLayout{} - .with_input_layout() - .with_weight_layout() - .with_output_layout() - .with_output_bias_layouts(FwdOutputBiasLayouts); + + constexpr auto FwdConvLayout = ConvLayout + { + .input_layout = ConvInputLayout2D::NHWGC, + .weight_layout = ConvWeightLayout2D::GKYXC, + .output_layout = ConvOutputLayout2D::NHWGK + } + .with_output_bias_layout() + .with_output_bias_layout(); constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, .direction = ConvDirection::FORWARD, diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index 00397765ec..28281eb483 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -10,77 +10,28 @@ namespace ck_tile::builder::test { using namespace ck_tile::builder; -template -struct ConvInputBiasLayouts -{ - std::array input_bias_layout{InputBiasLayouts...}; - - template - constexpr auto with_layout() const - { - return ConvInputBiasLayouts{}; - } - - template - constexpr auto with_layout() const - { - return ConvInputBiasLayouts{}; - } -}; - template -struct ConvOutputBiasLayouts -{ - std::array output_bias_layout{OutputBiasLayouts...}; - - template - constexpr auto with_layout() const - { - return ConvOutputBiasLayouts{}; - } - - template - constexpr auto with_layout() const - { - return ConvOutputBiasLayouts{}; - } -}; - -template -struct ConvLayout : BiasTensorLayouts... +struct ConvLayout { ConvInputLayout input_layout; ConvWeightLayout weight_layout; ConvOutputLayout output_layout; + std::array output_bias_layout{OutputBiasLayouts...}; - template - constexpr auto with_input_layout() const - { - auto result = *this; - result.input_layout = Layout; - return result; - } - - template - constexpr auto with_weight_layout() const + template + constexpr auto with_output_bias_layout() const { - auto result = *this; - result.weight_layout = Layout; - return result; + return ConvLayout{ + .input_layout = this->input_layout, + .weight_layout = this->weight_layout, + .output_layout = this->output_layout + }; } - template - constexpr auto with_output_layout() const - { - auto result = *this; - result.output_layout = Layout; - return result; - } - - template - constexpr auto with_output_bias_layouts(const OutputBiasLayouts&) const + template + constexpr auto with_output_bias_layout() const { - return ConvLayout{ + return ConvLayout{ .input_layout = this->input_layout, .weight_layout = this->weight_layout, .output_layout = this->output_layout From 3173d945b94944f1a9b910f2ab9c11f32f75990f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 27 Nov 2025 11:40:56 +0000 Subject: [PATCH 09/41] Improve layout builder. --- ...test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 7 +++---- .../builder/test/impl/conv_signature_types.hpp | 18 ++++-------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 0c72633b74..974c3926f5 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -13,16 +13,15 @@ TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_BF16_scale_add_relu) { constexpr auto G_K = OutputBiasLayout::G_K_strided; - constexpr auto NHWGK = ConvOutputLayout2D::NHWGK; - + constexpr auto NHWGK = ConvOutputLayout2D::NHWGK; + constexpr auto FwdConvLayout = ConvLayout { .input_layout = ConvInputLayout2D::NHWGC, .weight_layout = ConvWeightLayout2D::GKYXC, .output_layout = ConvOutputLayout2D::NHWGK } - .with_output_bias_layout() - .with_output_bias_layout(); + .with_output_bias_layout(); constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, .direction = ConvDirection::FORWARD, diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index 28281eb483..e9bacf6eeb 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -10,28 +10,18 @@ namespace ck_tile::builder::test { using namespace ck_tile::builder; -template +template struct ConvLayout { ConvInputLayout input_layout; ConvWeightLayout weight_layout; ConvOutputLayout output_layout; - std::array output_bias_layout{OutputBiasLayouts...}; + std::array output_bias_layout{ConvOutputBiasLayout{OutputBiasLayouts}...}; - template + template constexpr auto with_output_bias_layout() const { - return ConvLayout{ - .input_layout = this->input_layout, - .weight_layout = this->weight_layout, - .output_layout = this->output_layout - }; - } - - template - constexpr auto with_output_bias_layout() const - { - return ConvLayout{ + return ConvLayout{ .input_layout = this->input_layout, .weight_layout = this->weight_layout, .output_layout = this->output_layout From d3588954e6cf693ec84b6db2b9ef109b01eec090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 27 Nov 2025 12:06:39 +0000 Subject: [PATCH 10/41] Simplify bias layout handling. --- .../include/ck_tile/builder/conv_factory.hpp | 62 +++++++++---------- .../builder/conv_signature_concepts.hpp | 33 +++------- .../builder/include/ck_tile/builder/types.hpp | 36 +++-------- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 4 +- .../test/impl/conv_signature_types.hpp | 8 +-- 5 files changed, 54 insertions(+), 89 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/conv_factory.hpp index 473ac662e0..7224abaab6 100644 --- a/experimental/builder/include/ck_tile/builder/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_factory.hpp @@ -85,48 +85,48 @@ consteval auto get_output_layout_value(ConvOutputLayout layout) { else static_assert(false, "Unsupported spatial dimension"); } -struct EmptyOutputBiasLayout +struct EmptyBiasLayout { using DsLayout = ck::Tuple<>; using DsDataTypes = ck::Tuple<>; }; -// Type mappings from the builder ConvOutputBiasLayout enum classes to the CK tensor data types. -template +// Type mappings from the builder ConvBiasLayout enum classes to the CK tensor data types. +template requires(ConvSpatialDim) -struct ConvOutputBiasTensorLayouts +struct ConvBiasTensorLayouts { // This will trigger if a specialization for the given layout is not found. // We should always catch this in an earlier validation check. - using OutputBiasLayout = decltype(OutputBiasLayoutValue); - static_assert(sizeof(OutputBiasLayout) == 0, + using BiasLayoutType = decltype(BiasLayoutValue); + static_assert(sizeof(BiasLayoutType) == 0, "Internal error. Unsupported layout for convolution factory."); }; -constexpr std::array NHWGK_G_K_STRIDED_LAYOUT = { - ConvOutputBiasLayout{ConvOutputLayout2D::NHWGK}, - ConvOutputBiasLayout{OutputBiasLayout::G_K_strided} +constexpr std::array NHWGK_G_K_STRIDED_LAYOUT = { + ConvBiasLayout{ConvOutputLayout2D::NHWGK}, + ConvBiasLayout{BiasLayout::G_K_strided} }; template<> -struct ConvOutputBiasTensorLayouts +struct ConvBiasTensorLayouts { using DsLayout = ck::Tuple; using DsDataTypes = ck::Tuple; }; template -requires (HasOutputBiasLayout) -consteval auto GetOutputBiasTensorLayout() +requires (HasBiasLayout) +consteval auto GetBiasTensorLayout() { - return factory_internal::ConvOutputBiasTensorLayouts{}; + return factory_internal::ConvBiasTensorLayouts{}; } template -requires (!HasOutputBiasLayout) -consteval auto GetOutputBiasTensorLayout() +requires (!HasBiasLayout) +consteval auto GetBiasTensorLayout() { - return EmptyOutputBiasLayout{}; + return EmptyBiasLayout{}; } // Type mappings from the builder ConvLayout enum classes to the CK tensor data types. @@ -663,7 +663,7 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); - using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); @@ -708,13 +708,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename OutputBiasLayouts::DsLayout, + typename BiasLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename OutputBiasLayouts::DsDataTypes, + typename BiasLayouts::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -769,7 +769,7 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); - using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); @@ -809,13 +809,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename OutputBiasLayouts::DsLayout, + typename BiasLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename OutputBiasLayouts::DsDataTypes, + typename BiasLayouts::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -870,7 +870,7 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); - using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); @@ -912,13 +912,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename OutputBiasLayouts::DsLayout, + typename BiasLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename OutputBiasLayouts::DsDataTypes, + typename BiasLayouts::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -970,7 +970,7 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); - using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); @@ -1042,12 +1042,12 @@ struct ConvFactory SPATIAL_DIM, typename Types::ADataType, typename Types::BDataType, - typename OutputBiasLayouts::DsDataTypes, + typename BiasLayouts::DsDataTypes, typename Types::EDataType, typename Types::AccDataType, typename Layouts::ALayout, typename Layouts::BLayout, - typename OutputBiasLayouts::DsLayout, + typename BiasLayouts::DsLayout, typename Layouts::ELayout, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -1097,7 +1097,7 @@ struct ConvFactory using Layouts = decltype(factory_internal::GetTensorLayout()); - using OutputBiasLayouts = decltype(factory_internal::GetOutputBiasTensorLayout()); @@ -1139,13 +1139,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename OutputBiasLayouts::DsLayout, + typename BiasLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename OutputBiasLayouts::DsDataTypes, + typename BiasLayouts::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index 088ad50156..f093f1d74a 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -34,34 +34,18 @@ concept ConvDataType = (T == DataType::FP32) || (T == DataType::FP16) || (T == D (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); template -concept HasInputBiasLayout = requires(T t) { - { t.input_bias_layout }; +concept HasBiasLayout = requires(T t) { + { t.bias_layout }; }; template -concept ConvertibleToArrayOfConvInputBiasLayout = - std::is_same_v, std::array>>>; +concept ConvertibleToArrayOfConvBiasLayout = + std::is_same_v, std::array>>>; template -concept InputBiasLayoutWellDefinedIfProvided = requires(T t) { - requires !HasInputBiasLayout || requires { - { t.input_bias_layout } -> ConvertibleToArrayOfConvInputBiasLayout; - }; -}; - -template -concept HasOutputBiasLayout = requires(T t) { - { t.output_bias_layout }; -}; - -template -concept ConvertibleToArrayOfConvOutputBiasLayout = - std::is_same_v, std::array>>>; - -template -concept OutputBiasLayoutWellDefinedIfProvided = requires(T t) { - requires !HasOutputBiasLayout || requires { - { t.output_bias_layout } -> ConvertibleToArrayOfConvOutputBiasLayout; +concept BiasLayoutWellDefinedIfProvided = requires(T t) { + requires !HasBiasLayout || requires { + { t.bias_layout } -> ConvertibleToArrayOfConvBiasLayout; }; }; @@ -70,8 +54,7 @@ concept ConvLayoutDescriptor = requires(T t) { { t.input_layout } -> std::convertible_to; { t.weight_layout } -> std::convertible_to; { t.output_layout } -> std::convertible_to; - requires InputBiasLayoutWellDefinedIfProvided; - requires OutputBiasLayoutWellDefinedIfProvided; + requires BiasLayoutWellDefinedIfProvided; }; diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 611594f22b..182a3935d9 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -19,14 +19,10 @@ enum class DataType U8 }; -enum class InputBiasLayout +enum class BiasLayout { GC, - G_C_strided -}; - -enum class OutputBiasLayout -{ + G_C_strided, GK, G_K_strided }; @@ -79,20 +75,6 @@ struct ConvInputLayout constexpr ConvInputLayout(ConvInputLayout3D layout) : _3d(layout) {} }; -struct ConvInputBiasLayout -{ - union { - InputBiasLayout _input_bias_layout; - ConvInputLayout _conv_input_layout; - }; - - constexpr ConvInputBiasLayout(InputBiasLayout layout) : _input_bias_layout(layout) {} - constexpr ConvInputBiasLayout(ConvInputLayout layout) : _conv_input_layout(layout) {} - constexpr ConvInputBiasLayout(ConvInputLayout1D layout) : _conv_input_layout(layout) {} - constexpr ConvInputBiasLayout(ConvInputLayout2D layout) : _conv_input_layout(layout) {} - constexpr ConvInputBiasLayout(ConvInputLayout3D layout) : _conv_input_layout(layout) {} -}; - enum class ConvWeightLayout1D { GKXC, @@ -176,18 +158,18 @@ struct ConvOutputLayout constexpr ConvOutputLayout(ConvOutputLayout3D layout) : _3d(layout) {} }; -struct ConvOutputBiasLayout +struct ConvBiasLayout { union { - OutputBiasLayout _output_bias_layout; + BiasLayout _bias_layout; ConvOutputLayout _conv_output_layout; }; - constexpr ConvOutputBiasLayout(OutputBiasLayout layout) : _output_bias_layout(layout) {} - constexpr ConvOutputBiasLayout(ConvOutputLayout layout) : _conv_output_layout(layout) {} - constexpr ConvOutputBiasLayout(ConvOutputLayout1D layout) : _conv_output_layout(layout) {} - constexpr ConvOutputBiasLayout(ConvOutputLayout2D layout) : _conv_output_layout(layout) {} - constexpr ConvOutputBiasLayout(ConvOutputLayout3D layout) : _conv_output_layout(layout) {} + constexpr ConvBiasLayout(BiasLayout layout) : _bias_layout(layout) {} + constexpr ConvBiasLayout(ConvOutputLayout layout) : _conv_output_layout(layout) {} + constexpr ConvBiasLayout(ConvOutputLayout1D layout) : _conv_output_layout(layout) {} + constexpr ConvBiasLayout(ConvOutputLayout2D layout) : _conv_output_layout(layout) {} + constexpr ConvBiasLayout(ConvOutputLayout3D layout) : _conv_output_layout(layout) {} }; // Direction of the convolution operation. diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 974c3926f5..a331f284e4 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -12,7 +12,7 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_BF16_scale_add_relu) { - constexpr auto G_K = OutputBiasLayout::G_K_strided; + constexpr auto G_K = BiasLayout::G_K_strided; constexpr auto NHWGK = ConvOutputLayout2D::NHWGK; constexpr auto FwdConvLayout = ConvLayout @@ -21,7 +21,7 @@ TEST(FwdConvInstances, .weight_layout = ConvWeightLayout2D::GKYXC, .output_layout = ConvOutputLayout2D::NHWGK } - .with_output_bias_layout(); + .with_bias_layout(); constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, .direction = ConvDirection::FORWARD, diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index e9bacf6eeb..903acaaae6 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -10,18 +10,18 @@ namespace ck_tile::builder::test { using namespace ck_tile::builder; -template +template struct ConvLayout { ConvInputLayout input_layout; ConvWeightLayout weight_layout; ConvOutputLayout output_layout; - std::array output_bias_layout{ConvOutputBiasLayout{OutputBiasLayouts}...}; + std::array bias_layout{ConvBiasLayout{BiasLayouts}...}; template - constexpr auto with_output_bias_layout() const + constexpr auto with_bias_layout() const { - return ConvLayout{ + return ConvLayout{ .input_layout = this->input_layout, .weight_layout = this->weight_layout, .output_layout = this->output_layout From 69bfe64c2fecfa2e94c58659e9cd53e644a47344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 27 Nov 2025 12:38:40 +0000 Subject: [PATCH 11/41] Code clean-up. --- .../ck_tile/builder/conv_signature_utils.hpp | 17 ----------------- .../test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 1 - 2 files changed, 18 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp index 7ceffa2c11..bf1840caf4 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp @@ -13,26 +13,9 @@ namespace ck_tile::builder { * constexpr helper functions for optional parameters **********************************************/ -template -concept ProvidesElementwiseOperation = requires { Sig.elementwiseOperation; }; - template concept ProvidesConvolutionDirection = requires { Sig.direction; }; -template -requires (HasElementwiseOp) -constexpr auto get_elementwise_operation() -{ - if constexpr(HasElementwiseOp) - { - return Sig.elementwise_operation; - } - else - { - return ElementwiseOperation::PASS_THROUGH; - } -} - template constexpr auto get_conv_direction() { diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index a331f284e4..8d5f64d853 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -14,7 +14,6 @@ TEST(FwdConvInstances, { constexpr auto G_K = BiasLayout::G_K_strided; constexpr auto NHWGK = ConvOutputLayout2D::NHWGK; - constexpr auto FwdConvLayout = ConvLayout { .input_layout = ConvInputLayout2D::NHWGC, From aba3eeae02054e0af64d073ff96b04138cb1bd85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 27 Nov 2025 12:39:24 +0000 Subject: [PATCH 12/41] Move layout utils into separate file. --- .../include/ck_tile/builder/conv_factory.hpp | 206 +--------------- .../ck_tile/builder/conv_layout_utils.hpp | 219 ++++++++++++++++++ 2 files changed, 220 insertions(+), 205 deletions(-) create mode 100644 experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp diff --git a/experimental/builder/include/ck_tile/builder/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/conv_factory.hpp index 7224abaab6..5f79b774c5 100644 --- a/experimental/builder/include/ck_tile/builder/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_factory.hpp @@ -58,6 +58,7 @@ #include "ck_tile/builder/versions.hpp" #include "ck_tile/builder/conv_signature_utils.hpp" +#include "ck_tile/builder/conv_layout_utils.hpp" namespace ck_tile::builder::factory_internal { @@ -85,211 +86,6 @@ consteval auto get_output_layout_value(ConvOutputLayout layout) { else static_assert(false, "Unsupported spatial dimension"); } -struct EmptyBiasLayout -{ - using DsLayout = ck::Tuple<>; - using DsDataTypes = ck::Tuple<>; -}; - -// Type mappings from the builder ConvBiasLayout enum classes to the CK tensor data types. -template - requires(ConvSpatialDim) -struct ConvBiasTensorLayouts -{ - // This will trigger if a specialization for the given layout is not found. - // We should always catch this in an earlier validation check. - using BiasLayoutType = decltype(BiasLayoutValue); - static_assert(sizeof(BiasLayoutType) == 0, - "Internal error. Unsupported layout for convolution factory."); -}; - -constexpr std::array NHWGK_G_K_STRIDED_LAYOUT = { - ConvBiasLayout{ConvOutputLayout2D::NHWGK}, - ConvBiasLayout{BiasLayout::G_K_strided} -}; - -template<> -struct ConvBiasTensorLayouts -{ - using DsLayout = ck::Tuple; - using DsDataTypes = ck::Tuple; -}; - -template -requires (HasBiasLayout) -consteval auto GetBiasTensorLayout() -{ - return factory_internal::ConvBiasTensorLayouts{}; -} - -template -requires (!HasBiasLayout) -consteval auto GetBiasTensorLayout() -{ - return EmptyBiasLayout{}; -} - -// Type mappings from the builder ConvLayout enum classes to the CK tensor data types. -template - requires(ConvSpatialDim - && ValidConvInputLayoutForSpatialDim - && ValidConvWeightLayoutForSpatialDim - && ValidConvOutputLayoutForSpatialDim) -struct ConvTensorLayouts -{ - // This will trigger if a specialization for the given layout is not found. - // We should always catch this in an earlier validation check. - using InputLayout = decltype(InputLayoutValue); - using WeightLayout = decltype(WeightLayoutValue); - using OutputLayout = decltype(OutputLayoutValue); - static_assert(sizeof(InputLayout) == 0 && sizeof(WeightLayout) == 0 && sizeof(OutputLayout) == 0, - "Internal error. Unsupported layout for convolution factory."); -}; - -// 1D Forward Convolution Layout Specializations -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NWGC; - using BLayout = ck::tensor_layout::convolution::GKXC; - using ELayout = ck::tensor_layout::convolution::NWGK; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCW; - using BLayout = ck::tensor_layout::convolution::GKXC; - using ELayout = ck::tensor_layout::convolution::NGKW; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::GNWC; - using BLayout = ck::tensor_layout::convolution::GKXC; - using ELayout = ck::tensor_layout::convolution::GNWK; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCW; - using BLayout = ck::tensor_layout::convolution::GKCX; - using ELayout = ck::tensor_layout::convolution::NGKW; -}; - -// 2D Forward Convolution Layout Specializations -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCHW; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using ELayout = ck::tensor_layout::convolution::NGKHW; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NHWGC; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using ELayout = ck::tensor_layout::convolution::NHWGK; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::GNHWC; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using ELayout = ck::tensor_layout::convolution::GNHWK; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCHW; - using BLayout = ck::tensor_layout::convolution::GKCYX; - using ELayout = ck::tensor_layout::convolution::NGKHW; -}; - -// 3D Forward Convolution Layout Specializations -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCDHW; - using BLayout = ck::tensor_layout::convolution::GKCZYX; - using ELayout = ck::tensor_layout::convolution::NGKDHW; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NDHWGC; - using BLayout = ck::tensor_layout::convolution::GKZYXC; - using ELayout = ck::tensor_layout::convolution::NDHWGK; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::GNDHWC; - using BLayout = ck::tensor_layout::convolution::GKZYXC; - using ELayout = ck::tensor_layout::convolution::GNDHWK; -}; - -template -consteval auto GetTensorLayoutInternal() -{ - return factory_internal::ConvTensorLayouts{}; -} - -template -consteval auto GetTensorLayout() -{ - constexpr auto INPUT_LAYOUT = Layout.input_layout; - constexpr auto WEIGHT_LAYOUT = Layout.weight_layout; - constexpr auto OUTPUT_LAYOUT = Layout.output_layout; - - return GetTensorLayoutInternal(); -} - // Type mappings from builder convolution data type to CK tensor types. template struct ConvTensorTypes diff --git a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp new file mode 100644 index 0000000000..c5d510e8be --- /dev/null +++ b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp @@ -0,0 +1,219 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +// #include +// #include +#include "ck_tile/builder/conv_signature_concepts.hpp" +#include "ck_tile/builder/types.hpp" + +namespace ck_tile::builder::factory_internal +{ + +struct EmptyBiasLayout +{ + using DsLayout = ck::Tuple<>; + using DsDataTypes = ck::Tuple<>; +}; + +// Type mappings from the builder ConvBiasLayout enum classes to the CK tensor data types. +template + requires(ConvSpatialDim) +struct ConvBiasTensorLayouts +{ + // This will trigger if a specialization for the given layout is not found. + // We should always catch this in an earlier validation check. + using BiasLayoutType = decltype(BiasLayoutValue); + static_assert(sizeof(BiasLayoutType) == 0, + "Internal error. Unsupported layout for convolution factory."); +}; + +constexpr std::array NHWGK_G_K_STRIDED_LAYOUT = { + ConvBiasLayout{ConvOutputLayout2D::NHWGK}, + ConvBiasLayout{BiasLayout::G_K_strided} +}; + +template<> +struct ConvBiasTensorLayouts +{ + using DsLayout = ck::Tuple; + using DsDataTypes = ck::Tuple; +}; + +template +requires (HasBiasLayout) +consteval auto GetBiasTensorLayout() +{ + return factory_internal::ConvBiasTensorLayouts{}; +} + +template +requires (!HasBiasLayout) +consteval auto GetBiasTensorLayout() +{ + return EmptyBiasLayout{}; +} + +// Type mappings from the builder ConvLayout enum classes to the CK tensor data types. +template + requires(ConvSpatialDim + && ValidConvInputLayoutForSpatialDim + && ValidConvWeightLayoutForSpatialDim + && ValidConvOutputLayoutForSpatialDim) +struct ConvTensorLayouts +{ + // This will trigger if a specialization for the given layout is not found. + // We should always catch this in an earlier validation check. + using InputLayout = decltype(InputLayoutValue); + using WeightLayout = decltype(WeightLayoutValue); + using OutputLayout = decltype(OutputLayoutValue); + static_assert(sizeof(InputLayout) == 0 && sizeof(WeightLayout) == 0 && sizeof(OutputLayout) == 0, + "Internal error. Unsupported layout for convolution factory."); +}; + +// 1D Forward Convolution Layout Specializations +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::NWGC; + using BLayout = ck::tensor_layout::convolution::GKXC; + using ELayout = ck::tensor_layout::convolution::NWGK; +}; + +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::NGCW; + using BLayout = ck::tensor_layout::convolution::GKXC; + using ELayout = ck::tensor_layout::convolution::NGKW; +}; + +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::GNWC; + using BLayout = ck::tensor_layout::convolution::GKXC; + using ELayout = ck::tensor_layout::convolution::GNWK; +}; + +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::NGCW; + using BLayout = ck::tensor_layout::convolution::GKCX; + using ELayout = ck::tensor_layout::convolution::NGKW; +}; + +// 2D Forward Convolution Layout Specializations +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::NGCHW; + using BLayout = ck::tensor_layout::convolution::GKYXC; + using ELayout = ck::tensor_layout::convolution::NGKHW; +}; + +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::NHWGC; + using BLayout = ck::tensor_layout::convolution::GKYXC; + using ELayout = ck::tensor_layout::convolution::NHWGK; +}; + +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::GNHWC; + using BLayout = ck::tensor_layout::convolution::GKYXC; + using ELayout = ck::tensor_layout::convolution::GNHWK; +}; + +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::NGCHW; + using BLayout = ck::tensor_layout::convolution::GKCYX; + using ELayout = ck::tensor_layout::convolution::NGKHW; +}; + +// 3D Forward Convolution Layout Specializations +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::NGCDHW; + using BLayout = ck::tensor_layout::convolution::GKCZYX; + using ELayout = ck::tensor_layout::convolution::NGKDHW; +}; + +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::NDHWGC; + using BLayout = ck::tensor_layout::convolution::GKZYXC; + using ELayout = ck::tensor_layout::convolution::NDHWGK; +}; + +template <> +struct ConvTensorLayouts +{ + using ALayout = ck::tensor_layout::convolution::GNDHWC; + using BLayout = ck::tensor_layout::convolution::GKZYXC; + using ELayout = ck::tensor_layout::convolution::GNDHWK; +}; + +template +consteval auto GetTensorLayoutInternal() +{ + return factory_internal::ConvTensorLayouts{}; +} + +template +consteval auto GetTensorLayout() +{ + constexpr auto INPUT_LAYOUT = Layout.input_layout; + constexpr auto WEIGHT_LAYOUT = Layout.weight_layout; + constexpr auto OUTPUT_LAYOUT = Layout.output_layout; + + return GetTensorLayoutInternal(); +} + +} From f00ac4e98b33d0549ec927fb9e7020b30088db4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 27 Nov 2025 14:46:30 +0000 Subject: [PATCH 13/41] Remove hard-coded layout combinations. --- .../ck_tile/builder/conv_layout_utils.hpp | 332 +++++++++--------- .../builder/include/ck_tile/builder/types.hpp | 2 +- 2 files changed, 170 insertions(+), 164 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp index c5d510e8be..8d5e360d84 100644 --- a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp @@ -3,8 +3,6 @@ #pragma once -// #include -// #include #include "ck_tile/builder/conv_signature_concepts.hpp" #include "ck_tile/builder/types.hpp" @@ -17,28 +15,75 @@ struct EmptyBiasLayout using DsDataTypes = ck::Tuple<>; }; -// Type mappings from the builder ConvBiasLayout enum classes to the CK tensor data types. -template - requires(ConvSpatialDim) -struct ConvBiasTensorLayouts +template +consteval bool IsGenericBiasLayoutActive() { + return requires { typename std::integral_constant; }; +} + +template +consteval auto GetCKBiasLayoutSingle() +{ + if constexpr (IsGenericBiasLayoutActive()) + { + constexpr auto val = Layout._bias_layout; + if constexpr (val == BiasLayout::G_K_strided) + return ck::tensor_layout::convolution::G_K{}; + else if constexpr (val == BiasLayout::GC) + return ck::tensor_layout::convolution::GC{}; + else if constexpr (val == BiasLayout::G_C_strided) + return ck::tensor_layout::convolution::G_C{}; + else + static_assert(false, "Unsupported generic bias layout"); + } + else + { + constexpr auto out_layout = Layout._conv_output_layout; + + if constexpr (SPATIAL_DIM == 1) + { + constexpr auto val = out_layout._1d; + if constexpr (val == ConvOutputLayout1D::NWGK) return ck::tensor_layout::convolution::NWGK{}; + else if constexpr (val == ConvOutputLayout1D::NGKW) return ck::tensor_layout::convolution::NGKW{}; + else if constexpr (val == ConvOutputLayout1D::GNWK) return ck::tensor_layout::convolution::GNWK{}; + } + else if constexpr (SPATIAL_DIM == 2) + { + constexpr auto val = out_layout._2d; + if constexpr (val == ConvOutputLayout2D::NHWGK) return ck::tensor_layout::convolution::NHWGK{}; + else if constexpr (val == ConvOutputLayout2D::GNHWK) return ck::tensor_layout::convolution::GNHWK{}; + else if constexpr (val == ConvOutputLayout2D::NGKHW) return ck::tensor_layout::convolution::NGKHW{}; + } + else if constexpr (SPATIAL_DIM == 3) + { + constexpr auto val = out_layout._3d; + if constexpr (val == ConvOutputLayout3D::NDHWGK) return ck::tensor_layout::convolution::NDHWGK{}; + else if constexpr (val == ConvOutputLayout3D::GNDHWK) return ck::tensor_layout::convolution::GNDHWK{}; + else if constexpr (val == ConvOutputLayout3D::NGKDHW) return ck::tensor_layout::convolution::NGKDHW{}; + } + } +} + +template +consteval auto GetCKBiasLayoutTuple(std::index_sequence) { - // This will trigger if a specialization for the given layout is not found. - // We should always catch this in an earlier validation check. - using BiasLayoutType = decltype(BiasLayoutValue); - static_assert(sizeof(BiasLayoutType) == 0, - "Internal error. Unsupported layout for convolution factory."); -}; + return ck::Tuple())...>{}; +} -constexpr std::array NHWGK_G_K_STRIDED_LAYOUT = { - ConvBiasLayout{ConvOutputLayout2D::NHWGK}, - ConvBiasLayout{BiasLayout::G_K_strided} -}; +// TODO: Remove hardcoding of bhalf_t +template +consteval auto GetCKBiasTypesTuple(std::index_sequence) +{ + return ck::Tuple{}; +} -template<> -struct ConvBiasTensorLayouts +template + requires(ConvSpatialDim) +struct ConvBiasTensorLayouts { - using DsLayout = ck::Tuple; - using DsDataTypes = ck::Tuple; + static constexpr auto Size = BiasLayoutValue.size(); + + using DsLayout = decltype(GetCKBiasLayoutTuple(std::make_index_sequence{})); + using DsDataTypes = decltype(GetCKBiasTypesTuple(std::make_index_sequence{})); }; template @@ -55,7 +100,105 @@ consteval auto GetBiasTensorLayout() return EmptyBiasLayout{}; } -// Type mappings from the builder ConvLayout enum classes to the CK tensor data types. +template +consteval auto GetCKInputLayout() +{ + if constexpr(SPATIAL_DIM == 1) + { + constexpr auto val = Layout._1d; + if constexpr(val == ConvInputLayout1D::NWGC) + return ck::tensor_layout::convolution::NWGC{}; + else if constexpr(val == ConvInputLayout1D::NGCW) + return ck::tensor_layout::convolution::NGCW{}; + else if constexpr(val == ConvInputLayout1D::GNWC) + return ck::tensor_layout::convolution::GNWC{}; + } + else if constexpr(SPATIAL_DIM == 2) + { + constexpr auto val = Layout._2d; + if constexpr(val == ConvInputLayout2D::NGCHW) + return ck::tensor_layout::convolution::NGCHW{}; + else if constexpr(val == ConvInputLayout2D::NHWGC) + return ck::tensor_layout::convolution::NHWGC{}; + else if constexpr(val == ConvInputLayout2D::GNHWC) + return ck::tensor_layout::convolution::GNHWC{}; + } + else if constexpr(SPATIAL_DIM == 3) + { + constexpr auto val = Layout._3d; + if constexpr(val == ConvInputLayout3D::NGCDHW) + return ck::tensor_layout::convolution::NGCDHW{}; + else if constexpr(val == ConvInputLayout3D::NDHWGC) + return ck::tensor_layout::convolution::NDHWGC{}; + else if constexpr(val == ConvInputLayout3D::GNDHWC) + return ck::tensor_layout::convolution::GNDHWC{}; + } +} + +template +consteval auto GetCKWeightLayout() +{ + if constexpr(SPATIAL_DIM == 1) + { + constexpr auto val = Layout._1d; + if constexpr(val == ConvWeightLayout1D::GKXC) + return ck::tensor_layout::convolution::GKXC{}; + else if constexpr(val == ConvWeightLayout1D::GKCX) + return ck::tensor_layout::convolution::GKCX{}; + } + else if constexpr(SPATIAL_DIM == 2) + { + constexpr auto val = Layout._2d; + if constexpr(val == ConvWeightLayout2D::GKYXC) + return ck::tensor_layout::convolution::GKYXC{}; + else if constexpr(val == ConvWeightLayout2D::GKCYX) + return ck::tensor_layout::convolution::GKCYX{}; + } + else if constexpr(SPATIAL_DIM == 3) + { + constexpr auto val = Layout._3d; + if constexpr(val == ConvWeightLayout3D::GKCZYX) + return ck::tensor_layout::convolution::GKCZYX{}; + else if constexpr(val == ConvWeightLayout3D::GKZYXC) + return ck::tensor_layout::convolution::GKZYXC{}; + } +} + +template +consteval auto GetCKOutputLayout() +{ + if constexpr(SPATIAL_DIM == 1) + { + constexpr auto val = Layout._1d; + if constexpr(val == ConvOutputLayout1D::NWGK) + return ck::tensor_layout::convolution::NWGK{}; + else if constexpr(val == ConvOutputLayout1D::NGKW) + return ck::tensor_layout::convolution::NGKW{}; + else if constexpr(val == ConvOutputLayout1D::GNWK) + return ck::tensor_layout::convolution::GNWK{}; + } + else if constexpr(SPATIAL_DIM == 2) + { + constexpr auto val = Layout._2d; + if constexpr(val == ConvOutputLayout2D::NGKHW) + return ck::tensor_layout::convolution::NGKHW{}; + else if constexpr(val == ConvOutputLayout2D::NHWGK) + return ck::tensor_layout::convolution::NHWGK{}; + else if constexpr(val == ConvOutputLayout2D::GNHWK) + return ck::tensor_layout::convolution::GNHWK{}; + } + else if constexpr(SPATIAL_DIM == 3) + { + constexpr auto val = Layout._3d; + if constexpr(val == ConvOutputLayout3D::NGKDHW) + return ck::tensor_layout::convolution::NGKDHW{}; + else if constexpr(val == ConvOutputLayout3D::NDHWGK) + return ck::tensor_layout::convolution::NDHWGK{}; + else if constexpr(val == ConvOutputLayout3D::GNDHWK) + return ck::tensor_layout::convolution::GNDHWK{}; + } +} + template requires(ConvSpatialDim && ValidConvInputLayoutForSpatialDim @@ -63,149 +206,12 @@ template ) struct ConvTensorLayouts { - // This will trigger if a specialization for the given layout is not found. - // We should always catch this in an earlier validation check. - using InputLayout = decltype(InputLayoutValue); - using WeightLayout = decltype(WeightLayoutValue); - using OutputLayout = decltype(OutputLayoutValue); - static_assert(sizeof(InputLayout) == 0 && sizeof(WeightLayout) == 0 && sizeof(OutputLayout) == 0, - "Internal error. Unsupported layout for convolution factory."); -}; - -// 1D Forward Convolution Layout Specializations -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NWGC; - using BLayout = ck::tensor_layout::convolution::GKXC; - using ELayout = ck::tensor_layout::convolution::NWGK; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCW; - using BLayout = ck::tensor_layout::convolution::GKXC; - using ELayout = ck::tensor_layout::convolution::NGKW; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::GNWC; - using BLayout = ck::tensor_layout::convolution::GKXC; - using ELayout = ck::tensor_layout::convolution::GNWK; + static_assert(DIR == ConvDirection::FORWARD, "Only Forward convolution is supported."); + using ALayout = decltype(GetCKInputLayout()); + using BLayout = decltype(GetCKWeightLayout()); + using ELayout = decltype(GetCKOutputLayout()); }; -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCW; - using BLayout = ck::tensor_layout::convolution::GKCX; - using ELayout = ck::tensor_layout::convolution::NGKW; -}; - -// 2D Forward Convolution Layout Specializations -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCHW; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using ELayout = ck::tensor_layout::convolution::NGKHW; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NHWGC; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using ELayout = ck::tensor_layout::convolution::NHWGK; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::GNHWC; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using ELayout = ck::tensor_layout::convolution::GNHWK; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCHW; - using BLayout = ck::tensor_layout::convolution::GKCYX; - using ELayout = ck::tensor_layout::convolution::NGKHW; -}; - -// 3D Forward Convolution Layout Specializations -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NGCDHW; - using BLayout = ck::tensor_layout::convolution::GKCZYX; - using ELayout = ck::tensor_layout::convolution::NGKDHW; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::NDHWGC; - using BLayout = ck::tensor_layout::convolution::GKZYXC; - using ELayout = ck::tensor_layout::convolution::NDHWGK; -}; - -template <> -struct ConvTensorLayouts -{ - using ALayout = ck::tensor_layout::convolution::GNDHWC; - using BLayout = ck::tensor_layout::convolution::GKZYXC; - using ELayout = ck::tensor_layout::convolution::GNDHWK; -}; - -template -consteval auto GetTensorLayoutInternal() -{ - return factory_internal::ConvTensorLayouts{}; -} - template consteval auto GetTensorLayout() { @@ -213,7 +219,7 @@ consteval auto GetTensorLayout() constexpr auto WEIGHT_LAYOUT = Layout.weight_layout; constexpr auto OUTPUT_LAYOUT = Layout.output_layout; - return GetTensorLayoutInternal(); + return factory_internal::ConvTensorLayouts{}; } } diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 182a3935d9..d821cbf96e 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -23,7 +23,7 @@ enum class BiasLayout { GC, G_C_strided, - GK, + //GK, G_K_strided }; From 74bc17a2816f9dcd7652c8dc1e55ec83bd22a82c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 27 Nov 2025 14:56:24 +0000 Subject: [PATCH 14/41] Small code clean-up. --- .../include/ck_tile/builder/conv_layout_utils.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp index 8d5e360d84..9f454379c3 100644 --- a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp @@ -21,7 +21,7 @@ consteval bool IsGenericBiasLayoutActive() { } template -consteval auto GetCKBiasLayoutSingle() +consteval auto GetBiasLayoutValue() { if constexpr (IsGenericBiasLayoutActive()) { @@ -63,15 +63,15 @@ consteval auto GetCKBiasLayoutSingle() } } -template -consteval auto GetCKBiasLayoutTuple(std::index_sequence) +template +consteval auto GetBiasLayoutTuple(std::index_sequence) { - return ck::Tuple())...>{}; + return ck::Tuple())...>{}; } // TODO: Remove hardcoding of bhalf_t template -consteval auto GetCKBiasTypesTuple(std::index_sequence) +consteval auto GetBiasTypesTuple(std::index_sequence) { return ck::Tuple{}; } @@ -82,8 +82,8 @@ struct ConvBiasTensorLayouts { static constexpr auto Size = BiasLayoutValue.size(); - using DsLayout = decltype(GetCKBiasLayoutTuple(std::make_index_sequence{})); - using DsDataTypes = decltype(GetCKBiasTypesTuple(std::make_index_sequence{})); + using DsLayout = decltype(GetBiasLayoutTuple(std::make_index_sequence{})); + using DsDataTypes = decltype(GetBiasTypesTuple(std::make_index_sequence{})); }; template From 54c58f1619e3bbbf33b4a8bed839a9fcee717240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 27 Nov 2025 15:00:10 +0000 Subject: [PATCH 15/41] Move data type utils into a separate file. --- .../ck_tile/builder/conv_data_type_utils.hpp | 81 +++++++++++++++++++ .../include/ck_tile/builder/conv_factory.hpp | 71 +--------------- 2 files changed, 82 insertions(+), 70 deletions(-) create mode 100644 experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp diff --git a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp new file mode 100644 index 0000000000..c900fa5a6a --- /dev/null +++ b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp @@ -0,0 +1,81 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "ck_tile/builder/conv_signature_concepts.hpp" +#include "ck_tile/builder/types.hpp" + +namespace ck_tile::builder::factory_internal +{ + // Type mappings from builder convolution data type to CK tensor types. +template +struct ConvTensorTypes +{ + // This will trigger if a specialization for the given DataType is not found. + // We should always catch this in an earlier validation check. + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Internal error. Unsupported data type for convolution factory."); +}; + +template <> +struct ConvTensorTypes +{ + using ADataType = ck::half_t; + using AComputeType = ck::half_t; + using BDataType = ck::half_t; + using BComputeType = ck::half_t; + using CShuffleDataType = ck::half_t; + using AccDataType = float; + using EDataType = ck::half_t; +}; + +template <> +struct ConvTensorTypes +{ + using ADataType = ck::bhalf_t; + using AComputeType = ck::bhalf_t; + using BDataType = ck::bhalf_t; + using BComputeType = ck::bhalf_t; + using CShuffleDataType = ck::bhalf_t; + using AccDataType = float; + using EDataType = ck::bhalf_t; +}; + +template <> +struct ConvTensorTypes +{ + using ADataType = float; + using AComputeType = float; + using BDataType = float; + using BComputeType = float; + using CShuffleDataType = float; + using AccDataType = float; + using EDataType = float; +}; + +template <> +struct ConvTensorTypes +{ + using ADataType = int8_t; + using AComputeType = int8_t; + using BDataType = int8_t; + using BComputeType = int8_t; + using CShuffleDataType = int8_t; + using AccDataType = int32_t; + using EDataType = int8_t; +}; + +template <> +struct ConvTensorTypes +{ + using ADataType = ck::f8_t; + using AComputeType = ck::f8_t; + using BDataType = ck::f8_t; + using BComputeType = ck::f8_t; + using CShuffleDataType = ck::f8_t; + using AccDataType = float; + using EDataType = ck::f8_t; +}; + +} diff --git a/experimental/builder/include/ck_tile/builder/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/conv_factory.hpp index 5f79b774c5..965b0bdbc5 100644 --- a/experimental/builder/include/ck_tile/builder/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_factory.hpp @@ -59,6 +59,7 @@ #include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/conv_layout_utils.hpp" +#include "ck_tile/builder/conv_data_type_utils.hpp" namespace ck_tile::builder::factory_internal { @@ -86,76 +87,6 @@ consteval auto get_output_layout_value(ConvOutputLayout layout) { else static_assert(false, "Unsupported spatial dimension"); } -// Type mappings from builder convolution data type to CK tensor types. -template -struct ConvTensorTypes -{ - // This will trigger if a specialization for the given DataType is not found. - // We should always catch this in an earlier validation check. - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Internal error. Unsupported data type for convolution factory."); -}; - -template <> -struct ConvTensorTypes -{ - using ADataType = ck::half_t; - using AComputeType = ck::half_t; - using BDataType = ck::half_t; - using BComputeType = ck::half_t; - using CShuffleDataType = ck::half_t; - using AccDataType = float; - using EDataType = ck::half_t; -}; - -template <> -struct ConvTensorTypes -{ - using ADataType = ck::bhalf_t; - using AComputeType = ck::bhalf_t; - using BDataType = ck::bhalf_t; - using BComputeType = ck::bhalf_t; - using CShuffleDataType = ck::bhalf_t; - using AccDataType = float; - using EDataType = ck::bhalf_t; -}; - -template <> -struct ConvTensorTypes -{ - using ADataType = float; - using AComputeType = float; - using BDataType = float; - using BComputeType = float; - using CShuffleDataType = float; - using AccDataType = float; - using EDataType = float; -}; - -template <> -struct ConvTensorTypes -{ - using ADataType = int8_t; - using AComputeType = int8_t; - using BDataType = int8_t; - using BComputeType = int8_t; - using CShuffleDataType = int8_t; - using AccDataType = int32_t; - using EDataType = int8_t; -}; - -template <> -struct ConvTensorTypes -{ - using ADataType = ck::f8_t; - using AComputeType = ck::f8_t; - using BDataType = ck::f8_t; - using BComputeType = ck::f8_t; - using CShuffleDataType = ck::f8_t; - using AccDataType = float; - using EDataType = ck::f8_t; -}; - template struct ElementwiseOps { From d6fc6c8dd9e7cebcaf8da4b00cb0c3661332286a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Fri, 28 Nov 2025 10:56:11 +0000 Subject: [PATCH 16/41] Add data types, layouts, and elementwise ops per conv tensor. --- .../ck_tile/builder/conv_data_type_utils.hpp | 274 ++++++++++++++---- .../builder/conv_elementwise_op_utils.hpp | 87 ++++++ .../include/ck_tile/builder/conv_factory.hpp | 139 +++------ .../ck_tile/builder/conv_layout_utils.hpp | 70 ++--- .../builder/conv_signature_concepts.hpp | 112 +++---- .../builder/include/ck_tile/builder/types.hpp | 45 ++- .../test/impl/conv_signature_types.hpp | 37 +-- 7 files changed, 493 insertions(+), 271 deletions(-) create mode 100644 experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp diff --git a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp index c900fa5a6a..42bbe53ad2 100644 --- a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp @@ -8,74 +8,242 @@ namespace ck_tile::builder::factory_internal { - // Type mappings from builder convolution data type to CK tensor types. -template -struct ConvTensorTypes -{ - // This will trigger if a specialization for the given DataType is not found. - // We should always catch this in an earlier validation check. - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Internal error. Unsupported data type for convolution factory."); + +// // Type mappings from builder convolution data type to CK tensor types. +// template +// struct ConvTensorTypes +// { +// // This will trigger if a specialization for the given DataType is not found. +// // We should always catch this in an earlier validation check. +// static_assert(sizeof(UnsupportedEnumValue) == 0, +// "Internal error. Unsupported data type for convolution factory."); +// }; + +// template <> +// struct ConvTensorTypes +// { +// using ADataType = ck::half_t; +// using AComputeType = ck::half_t; +// using BDataType = ck::half_t; +// using BComputeType = ck::half_t; +// using CShuffleDataType = ck::half_t; +// using DsDataType = ck::Tuple<>; +// using AccDataType = float; +// using EDataType = ck::half_t; +// }; + +// template <> +// struct ConvTensorTypes +// { +// using ADataType = ck::bhalf_t; +// using AComputeType = ck::bhalf_t; +// using BDataType = ck::bhalf_t; +// using BComputeType = ck::bhalf_t; +// using CShuffleDataType = ck::bhalf_t; +// using DsDataType = ck::Tuple<>; +// using AccDataType = float; +// using EDataType = ck::bhalf_t; +// }; + +// template <> +// struct ConvTensorTypes +// { +// using ADataType = float; +// using AComputeType = float; +// using BDataType = float; +// using BComputeType = float; +// using CShuffleDataType = float; +// using DsDataType = ck::Tuple<>; +// using AccDataType = float; +// using EDataType = float; +// }; + +// template <> +// struct ConvTensorTypes +// { +// using ADataType = int8_t; +// using AComputeType = int8_t; +// using BDataType = int8_t; +// using BComputeType = int8_t; +// using CShuffleDataType = int8_t; +// using DsDataType = ck::Tuple<>; +// using AccDataType = int32_t; +// using EDataType = int8_t; +// }; + +// template <> +// struct ConvTensorTypes +// { +// using ADataType = ck::f8_t; +// using AComputeType = ck::f8_t; +// using BDataType = ck::f8_t; +// using BComputeType = ck::f8_t; +// using CShuffleDataType = ck::f8_t; +// using DsDataType = ck::Tuple<>; +// using AccDataType = float; +// using EDataType = ck::f8_t; +// }; + +struct CK_half +{ + using type = ck::half_t; +}; + +struct CK_bhalf +{ + using type = ck::bhalf_t; }; -template <> -struct ConvTensorTypes +struct CK_float { - using ADataType = ck::half_t; - using AComputeType = ck::half_t; - using BDataType = ck::half_t; - using BComputeType = ck::half_t; - using CShuffleDataType = ck::half_t; - using AccDataType = float; - using EDataType = ck::half_t; + using type = float; }; -template <> -struct ConvTensorTypes +struct CK_int8 { - using ADataType = ck::bhalf_t; - using AComputeType = ck::bhalf_t; - using BDataType = ck::bhalf_t; - using BComputeType = ck::bhalf_t; - using CShuffleDataType = ck::bhalf_t; - using AccDataType = float; - using EDataType = ck::bhalf_t; + using type = int8_t; }; -template <> -struct ConvTensorTypes +struct CK_f8 { - using ADataType = float; - using AComputeType = float; - using BDataType = float; - using BComputeType = float; - using CShuffleDataType = float; - using AccDataType = float; - using EDataType = float; + using type = ck::f8_t; }; -template <> -struct ConvTensorTypes +struct CK_int32 { - using ADataType = int8_t; - using AComputeType = int8_t; - using BDataType = int8_t; - using BComputeType = int8_t; - using CShuffleDataType = int8_t; - using AccDataType = int32_t; - using EDataType = int8_t; + using type = int32_t; }; -template <> -struct ConvTensorTypes +struct CK_empty_tuple +{ + using type = ck::Tuple<>; +}; + +template +consteval auto ConvertDataTypeToCK() +{ + if constexpr (dt == DataType::FP16) + { + return CK_half{}; + } + else if constexpr (dt == DataType::BF16) + { + return CK_bhalf{}; + } + else if constexpr (dt == DataType::FP32) + { + return CK_float{}; + } + else if constexpr (dt == DataType::I8) + { + return CK_int8{}; + } + else if constexpr (dt == DataType::FP8) + { + return CK_f8{}; + } + else + { + static_assert(sizeof(UnsupportedEnumValue
) == 0, + "Internal error. Unsupported data type conversion to CK."); + } +} + +template +consteval auto GetTensorDataAndComputeTypes() +{ + constexpr auto data_type = Config.data_type; + constexpr auto compute_type = Config.compute_type; + + if constexpr (data_type == DataType::UNDEFINDED && compute_type == DataType::UNDEFINDED) + { + return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); + } + else if constexpr (data_type == DataType::UNDEFINDED) + { + return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); + } + else if constexpr (compute_type == DataType::UNDEFINDED) + { + return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); + } + + return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); +} + +template +consteval auto GetTensorAccumulationType() { - using ADataType = ck::f8_t; - using AComputeType = ck::f8_t; - using BDataType = ck::f8_t; - using BComputeType = ck::f8_t; - using CShuffleDataType = ck::f8_t; - using AccDataType = float; - using EDataType = ck::f8_t; + constexpr auto data_type = SignatureAccDataType; + if constexpr (data_type == DataType::UNDEFINDED) + { + return ConvertDataTypeToCK(); + } + return ConvertDataTypeToCK(); +} + +template +consteval auto GetAuxiliaryTensorDataTypeValue() +{ + constexpr auto data_type = Config.data_type; + if constexpr (data_type == DataType::UNDEFINDED) + { + return ConvertDataTypeToCK(); + } + return ConvertDataTypeToCK(); +} + +template +consteval auto GetAuxiliaryTensorDataTypeTuple(std::index_sequence) +{ + return ck::Tuple())...>{}; +} + +template +struct AuxiliaryTensorDataTypes +{ + static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); + using type = decltype(GetAuxiliaryTensorDataTypeTuple(std::make_index_sequence{})); +}; + +// TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). +template +requires (HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorDataTypes() +{ + return AuxiliaryTensorDataTypes{}; +} + +template +requires (!HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorDataTypes() +{ + return CK_empty_tuple{}; +} + +template +struct FwdConvTensorDataTypes +{ + static constexpr auto input_types = + GetTensorDataAndComputeTypes(); + static constexpr auto weight_types = + GetTensorDataAndComputeTypes(); + static constexpr auto output_types = + GetTensorDataAndComputeTypes(); + + using ADataType = typename decltype(input_types.first)::type; + using AComputeType = typename decltype(input_types.second)::type; + using BDataType = typename decltype(weight_types.first)::type; + using BComputeType = typename decltype(weight_types.second)::type; + using AccDataType = typename decltype( + GetTensorAccumulationType())::type; + using EDataType = typename decltype(output_types.first)::type; + + // This is the "compute" type for output. + using CShuffleDataType = typename decltype(output_types.second)::type; + + // Data types for the auxiliary tensors (e.g., bias). + using DsDataType = typename decltype(GetAuxiliaryTensorDataTypes())::type; }; } diff --git a/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp new file mode 100644 index 0000000000..ab28c8f4c5 --- /dev/null +++ b/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp @@ -0,0 +1,87 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "ck_tile/builder/conv_signature_concepts.hpp" +#include "ck_tile/builder/types.hpp" + +namespace ck_tile::builder::factory_internal +{ +struct CK_PassThroughOp +{ + using Op = ck::tensor_operation::element_wise::PassThrough; +}; + +struct CK_ScaleOp +{ + using Op = ck::tensor_operation::element_wise::Scale; +}; + +struct CK_ClampOp +{ + using Op = ck::tensor_operation::element_wise::Clamp; +}; + +struct CK_ScaleAddScaleAddReluOp +{ + using Op = ck::tensor_operation::element_wise::ScaleAddScaleAddRelu; +}; + +struct CK_BiasNormalizeInInferClampOp +{ + using Op = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp; +}; + + +template +consteval auto GetElementwiseOp() +{ + if constexpr (HasTensorOp) + { + if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::SCALE) + { + return CK_ScaleOp{}; + } + else if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::SCALEADD_SCALEADD_RELU) + { + return CK_ScaleAddScaleAddReluOp{}; + } + else if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::BIAS_BNORM_CLAMP) + { + return CK_BiasNormalizeInInferClampOp{}; + } + else if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::CLAMP) + { + return CK_ClampOp{}; + } + else if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::PASS_THROUGH) + { + return CK_PassThroughOp{}; + } + else + { + static_assert(false, "Unsupported elementwise operation!"); + } + } + return CK_PassThroughOp{}; +} + +template +struct ElementwiseOps +{ + static const auto input_op = GetElementwiseOp(); + static const auto weight_op = GetElementwiseOp(); + static const auto output_op = GetElementwiseOp(); + using AElementwiseOp = typename decltype(input_op)::Op; + using BElementwiseOp = typename decltype(weight_op)::Op; + using CDEElementwiseOp = typename decltype(output_op)::Op; +}; + +template +constexpr auto GetElementwiseOps() +{ + return ElementwiseOps{}; +} + +} diff --git a/experimental/builder/include/ck_tile/builder/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/conv_factory.hpp index 965b0bdbc5..aa3078a0a7 100644 --- a/experimental/builder/include/ck_tile/builder/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_factory.hpp @@ -60,6 +60,7 @@ #include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/conv_layout_utils.hpp" #include "ck_tile/builder/conv_data_type_utils.hpp" +#include "ck_tile/builder/conv_elementwise_op_utils.hpp" namespace ck_tile::builder::factory_internal { @@ -87,60 +88,6 @@ consteval auto get_output_layout_value(ConvOutputLayout layout) { else static_assert(false, "Unsupported spatial dimension"); } -template -struct ElementwiseOps -{ - // This will trigger if a specialization for the given ElementwiseOps combination is not found. - // We should always catch this in an earlier validation check. - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Internal error. Unsupported elementwise operation for convolution factory."); -}; - -template <> -struct ElementwiseOps -{ - using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using CDEElementwiseOp = ck::tensor_operation::element_wise::PassThrough; -}; - -template <> -struct ElementwiseOps -{ - using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using CDEElementwiseOp = ck::tensor_operation::element_wise::Scale; -}; - -template <> -struct ElementwiseOps -{ - using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using CDEElementwiseOp = ck::tensor_operation::element_wise::ScaleAddScaleAddRelu; -}; - -struct PassThroughOp -{ - using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using CDEElementwiseOp = ck::tensor_operation::element_wise::PassThrough; -}; - -template -requires (HasElementwiseOp) -constexpr auto GetElementwiseOp() -{ - return ElementwiseOps{}; -} - -template -requires (!HasElementwiseOp) -constexpr auto GetElementwiseOp() -{ - return PassThroughOp{}; -} - // The algorithm specializations for the convolution and GEMM. template requires( @@ -387,15 +334,15 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using BiasLayouts = decltype(factory_internal::GetBiasTensorLayout()); - using Types = factory_internal::ConvTensorTypes; - using Ops = decltype(factory_internal::GetElementwiseOp()); + using Types = factory_internal::FwdConvTensorDataTypes; + using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static_assert(ALGORITHM.transfer.a.lds_transfer.is_direct_load == @@ -435,13 +382,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename BiasLayouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename BiasLayouts::DsDataTypes, + typename Types::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -493,15 +440,15 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using BiasLayouts = decltype(factory_internal::GetBiasTensorLayout()); - - using Types = factory_internal::ConvTensorTypes; - using Ops = decltype(factory_internal::GetElementwiseOp()); + using AuxiliaryLayouts = decltype(factory_internal::GetAuxiliaryTensorLayouts()); + + using Types = factory_internal::FwdConvTensorDataTypes; + using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = @@ -536,13 +483,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename BiasLayouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename BiasLayouts::DsDataTypes, + typename Types::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -594,15 +541,15 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using BiasLayouts = decltype(factory_internal::GetBiasTensorLayout()); - - using Types = factory_internal::ConvTensorTypes; - using Ops = decltype(factory_internal::GetElementwiseOp()); + using AuxiliaryLayouts = decltype(factory_internal::GetAuxiliaryTensorLayouts()); + + using Types = factory_internal::FwdConvTensorDataTypes; + using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = @@ -639,13 +586,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename BiasLayouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename BiasLayouts::DsDataTypes, + typename Types::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -694,15 +641,15 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using BiasLayouts = decltype(factory_internal::GetBiasTensorLayout()); - - using Types = factory_internal::ConvTensorTypes; - using Ops = decltype(factory_internal::GetElementwiseOp()); + using AuxiliaryLayouts = decltype(factory_internal::GetAuxiliaryTensorLayouts()); + + using Types = factory_internal::FwdConvTensorDataTypes; + using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = @@ -769,12 +716,12 @@ struct ConvFactory SPATIAL_DIM, typename Types::ADataType, typename Types::BDataType, - typename BiasLayouts::DsDataTypes, + typename Types::DsDataTypes, typename Types::EDataType, typename Types::AccDataType, typename Layouts::ALayout, typename Layouts::BLayout, - typename BiasLayouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -821,15 +768,15 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using BiasLayouts = decltype(factory_internal::GetBiasTensorLayout()); - - using Types = factory_internal::ConvTensorTypes; - using Ops = decltype(factory_internal::GetElementwiseOp()); + using AuxiliaryLayouts = decltype(factory_internal::GetAuxiliaryTensorLayouts()); + + using Types = factory_internal::FwdConvTensorDataTypes; + using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto BASE_ALGORITHM = ALGORITHM.base_algorithm; @@ -866,13 +813,13 @@ struct ConvFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename BiasLayouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename BiasLayouts::DsDataTypes, + typename Types::DsDataTypes, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, diff --git a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp index 9f454379c3..78f692383f 100644 --- a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp @@ -9,10 +9,9 @@ namespace ck_tile::builder::factory_internal { -struct EmptyBiasLayout +struct EmptyAuxiliaryTensorLayout { using DsLayout = ck::Tuple<>; - using DsDataTypes = ck::Tuple<>; }; template @@ -20,9 +19,10 @@ consteval bool IsGenericBiasLayoutActive() { return requires { typename std::integral_constant; }; } -template -consteval auto GetBiasLayoutValue() +template +consteval auto GetAuxiliaryTensorLayoutValue() { + constexpr auto Layout = Config.layout; if constexpr (IsGenericBiasLayoutActive()) { constexpr auto val = Layout._bias_layout; @@ -63,45 +63,39 @@ consteval auto GetBiasLayoutValue() } } -template -consteval auto GetBiasLayoutTuple(std::index_sequence) -{ - return ck::Tuple())...>{}; -} - -// TODO: Remove hardcoding of bhalf_t -template -consteval auto GetBiasTypesTuple(std::index_sequence) +template +consteval auto GetAuxiliaryTensorLayoutTuple(std::index_sequence) { - return ck::Tuple{}; + // TODO: Use std::tuple instead of ck::Tuple + return ck::Tuple())...>{}; } -template +template requires(ConvSpatialDim) -struct ConvBiasTensorLayouts +struct AuxiliaryTensorLayouts { - static constexpr auto Size = BiasLayoutValue.size(); - - using DsLayout = decltype(GetBiasLayoutTuple(std::make_index_sequence{})); - using DsDataTypes = decltype(GetBiasTypesTuple(std::make_index_sequence{})); + static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); + using DsLayout = decltype(GetAuxiliaryTensorLayoutTuple(std::make_index_sequence{})); }; -template -requires (HasBiasLayout) -consteval auto GetBiasTensorLayout() +// TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). +template +requires (HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorLayouts() { - return factory_internal::ConvBiasTensorLayouts{}; + return AuxiliaryTensorLayouts< + Signature.output.operation.auxiliary_operand_configs, SPATIAL_DIM, DIR>{}; } -template -requires (!HasBiasLayout) -consteval auto GetBiasTensorLayout() +template +requires (!HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorLayouts() { - return EmptyBiasLayout{}; + return EmptyAuxiliaryTensorLayout{}; } template -consteval auto GetCKInputLayout() +consteval auto GetInputLayout() { if constexpr(SPATIAL_DIM == 1) { @@ -136,7 +130,7 @@ consteval auto GetCKInputLayout() } template -consteval auto GetCKWeightLayout() +consteval auto GetWeightLayout() { if constexpr(SPATIAL_DIM == 1) { @@ -165,7 +159,7 @@ consteval auto GetCKWeightLayout() } template -consteval auto GetCKOutputLayout() +consteval auto GetOutputLayout() { if constexpr(SPATIAL_DIM == 1) { @@ -207,17 +201,17 @@ template ()); - using BLayout = decltype(GetCKWeightLayout()); - using ELayout = decltype(GetCKOutputLayout()); + using ALayout = decltype(GetInputLayout()); + using BLayout = decltype(GetWeightLayout()); + using ELayout = decltype(GetOutputLayout()); }; -template +template consteval auto GetTensorLayout() { - constexpr auto INPUT_LAYOUT = Layout.input_layout; - constexpr auto WEIGHT_LAYOUT = Layout.weight_layout; - constexpr auto OUTPUT_LAYOUT = Layout.output_layout; + constexpr auto INPUT_LAYOUT = Signature.input.config.layout._input_layout; + constexpr auto WEIGHT_LAYOUT = Signature.weight.config.layout._weight_layout; + constexpr auto OUTPUT_LAYOUT = Signature.output.config.layout._output_layout; return factory_internal::ConvTensorLayouts{}; } diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index f093f1d74a..b94d81f14f 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -30,58 +30,41 @@ concept ConvSpatialDim = std::is_integral_v && (N == 1 || N == 2 || // Constrains convolution data types to common floating-point types. template -concept ConvDataType = (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || +concept ValidConvDataType = (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); template -concept HasBiasLayout = requires(T t) { - { t.bias_layout }; +concept TensorConfigDescriptor = requires(T t) { + { t.layout } -> std::convertible_to; + // Only require that data type is defined. It might be set to undefined value, in which case the signature's data type is used. + { t.data_type } -> std::convertible_to; +}; + +template +concept HasAuxiliaryOperandConfigs = requires(T t) { + { t.auxiliary_operand_configs }; }; template -concept ConvertibleToArrayOfConvBiasLayout = - std::is_same_v, std::array>>>; +concept ConvertibleToArrayOfTensorConfigs = + std::is_same_v, std::array>>>; template -concept BiasLayoutWellDefinedIfProvided = requires(T t) { - requires !HasBiasLayout || requires { - { t.bias_layout } -> ConvertibleToArrayOfConvBiasLayout; +concept AuxiliaryOperandConfigsWellDefinedIfProvided = requires(T t) { + requires !HasAuxiliaryOperandConfigs || requires { + { t.auxiliary_operand_configs } -> ConvertibleToArrayOfTensorConfigs; }; }; template -concept ConvLayoutDescriptor = requires(T t) { - { t.input_layout } -> std::convertible_to; - { t.weight_layout } -> std::convertible_to; - { t.output_layout } -> std::convertible_to; - requires BiasLayoutWellDefinedIfProvided; +concept TensorOperatorDescriptor = requires(T t) { + { t.elementwise_operation } -> std::convertible_to; + requires AuxiliaryOperandConfigsWellDefinedIfProvided; }; - -// Constraints for forward convolution input layouts. -template -concept ValidConvInputLayoutForSpatialDim = - (SpatialDim == 1 && std::same_as) || - (SpatialDim == 2 && std::same_as) || - (SpatialDim == 3 && std::same_as); - -// Constraints for forward convolution output layouts. -template -concept ValidConvOutputLayoutForSpatialDim = - (SpatialDim == 1 && std::same_as) || - (SpatialDim == 2 && std::same_as) || - (SpatialDim == 3 && std::same_as); - -// Constraints for forward convolution weight layouts. -template -concept ValidConvWeightLayoutForSpatialDim = - (SpatialDim == 1 && std::same_as) || - (SpatialDim == 2 && std::same_as) || - (SpatialDim == 3 && std::same_as); - template -concept HasElementwiseOp = requires(T t) { - { t.elementwise_operation }; +concept HasTensorOp = requires(T t) { + { t.operation }; }; template @@ -89,21 +72,10 @@ concept HasConvolutionDirection = requires(T t) { { t.direction }; }; -template -concept ElementwiseOperationDescriptor = requires(T t) { - { t.input_op } -> std::convertible_to; - { t.weight_op } -> std::convertible_to; - { t.output_op } -> std::convertible_to; -}; - // Note: it is not required to provide an ElementwiseOp, but if one is provided, check if well // defined template -concept ElementwiseOpWellDefinedIfProvided = requires(T t) { - requires !HasElementwiseOp || requires { - { t.elementwise_operation } -> ElementwiseOperationDescriptor; - }; -}; +concept ElementwiseOpWellDefinedIfProvided = requires { !HasTensorOp || TensorOperatorDescriptor;}; // Note: it is not required to provide a convolution, but if one is provided, check if well defined template @@ -113,13 +85,28 @@ concept ConvolutionDirectionWellDefinedIfProvided = requires(T t) { }; }; +// Concept for the convolution tensor +template +concept ConvTensorDescriptor = requires(T t) { + { t.type } -> std::convertible_to; + { t.config } -> TensorConfigDescriptor; + requires ElementwiseOpWellDefinedIfProvided; +}; + +template +concept HasElementwiseOpWithAuxiliaryOperands = requires(T t) { + requires HasTensorOp; + requires HasAuxiliaryOperandConfigs; +}; + // Concept for a type that defines a convolution's operational signature. template concept ConvSignatureDescriptor = requires(T t) { { t.spatial_dim } -> std::convertible_to; - { t.layout } -> ConvLayoutDescriptor; { t.data_type } -> std::convertible_to; - requires ElementwiseOpWellDefinedIfProvided; + { t.input } -> ConvTensorDescriptor; + { t.weight } -> ConvTensorDescriptor; + { t.output } -> ConvTensorDescriptor; requires ConvolutionDirectionWellDefinedIfProvided; }; @@ -127,7 +114,7 @@ concept ConvSignatureDescriptor = requires(T t) { template concept ValidConvSignature = requires { requires ConvSpatialDim; - requires ConvDataType; + requires ValidConvDataType; }; // Predicate for forward convolution (default if direction is not included). @@ -143,4 +130,25 @@ concept ConvDirectionIsBackwardData = (Sig.direction == ConvDirection::BACKWARD_ template concept ConvDirectionIsBackwardWeight = (Sig.direction == ConvDirection::BACKWARD_WEIGHT); +// Constraints for forward convolution input layouts. +template +concept ValidConvInputLayoutForSpatialDim = + (SpatialDim == 1 && std::same_as) || + (SpatialDim == 2 && std::same_as) || + (SpatialDim == 3 && std::same_as); + +// Constraints for forward convolution output layouts. +template +concept ValidConvOutputLayoutForSpatialDim = + (SpatialDim == 1 && std::same_as) || + (SpatialDim == 2 && std::same_as) || + (SpatialDim == 3 && std::same_as); + +// Constraints for forward convolution weight layouts. +template +concept ValidConvWeightLayoutForSpatialDim = + (SpatialDim == 1 && std::same_as) || + (SpatialDim == 2 && std::same_as) || + (SpatialDim == 3 && std::same_as); + } // namespace ck_tile::builder diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index d821cbf96e..ab75eb3ff2 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -11,6 +11,7 @@ namespace ck_tile::builder { enum class DataType { + UNDEFINDED = 0, FP32, FP16, BF16, @@ -19,11 +20,19 @@ enum class DataType U8 }; +// TODO: This might be redundant. +enum class ConvolutionTensorType +{ + Input, + Weight, + Output, + Bias +}; + enum class BiasLayout { GC, G_C_strided, - //GK, G_K_strided }; @@ -172,6 +181,29 @@ struct ConvBiasLayout constexpr ConvBiasLayout(ConvOutputLayout3D layout) : _conv_output_layout(layout) {} }; +struct ConvLayout +{ + union { + ConvInputLayout _input_layout; + ConvWeightLayout _weight_layout; + ConvOutputLayout _output_layout; + ConvBiasLayout _bias_layout; + }; + + constexpr ConvLayout(ConvInputLayout layout) : _input_layout(layout) {} + constexpr ConvLayout(ConvWeightLayout layout) : _weight_layout(layout) {} + constexpr ConvLayout(ConvOutputLayout layout) : _output_layout(layout) {} + constexpr ConvLayout(ConvBiasLayout layout) : _bias_layout(layout) {} +}; + +struct TensorConfig +{ + ConvLayout layout; + // Optional data types, override the type defined in the signature if provided. + DataType data_type{DataType::UNDEFINDED}; + DataType compute_type{DataType::UNDEFINDED}; +}; + // Direction of the convolution operation. enum class ConvDirection { @@ -183,12 +215,9 @@ enum class ConvDirection // Fused element-wise operations. enum class ElementwiseOperation { - BIAS, - BIAS_CLAMP, BIAS_BNORM_CLAMP, - BILINEAR, - CLAMP, SCALE, + CLAMP, PASS_THROUGH, SCALEADD_SCALEADD_RELU }; @@ -297,6 +326,7 @@ inline std::ostream& operator<<(std::ostream& os, DataType dt) case FP8: return os << "FP8"; case I8: return os << "I8"; case U8: return os << "U8"; + case UNDEFINDED: return os << "UNDEFINDED"; default: return os << "Unknown"; } } @@ -318,13 +348,10 @@ inline std::ostream& operator<<(std::ostream& os, ElementwiseOperation op) using enum ElementwiseOperation; switch(op) { - case BIAS: return os << "BIAS"; - case BIAS_CLAMP: return os << "BIAS_CLAMP"; - case BIAS_BNORM_CLAMP: return os << "BIAS_BNORM_CLAMP"; - case BILINEAR: return os << "BILINEAR"; case CLAMP: return os << "CLAMP"; case SCALE: return os << "SCALE"; case PASS_THROUGH: return os << "PASS_THROUGH"; + case BIAS_BNORM_CLAMP: return os << "BIAS_BNORM_CLAMP"; case SCALEADD_SCALEADD_RELU: return os << "SCALEADD_SCALEADD_RELU"; default: return os << "Unknown"; } diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index 903acaaae6..bde0a639ac 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -10,40 +10,31 @@ namespace ck_tile::builder::test { using namespace ck_tile::builder; -template -struct ConvLayout +template +struct TensorOperation { - ConvInputLayout input_layout; - ConvWeightLayout weight_layout; - ConvOutputLayout output_layout; - std::array bias_layout{ConvBiasLayout{BiasLayouts}...}; - - template - constexpr auto with_bias_layout() const - { - return ConvLayout{ - .input_layout = this->input_layout, - .weight_layout = this->weight_layout, - .output_layout = this->output_layout - }; - } + ElementwiseOperation elementwise_operation{ElementwiseOperation::PASS_THROUGH}; + std::array auxiliary_operand_configs{Configs...}; }; -struct ElementwiseOperations +template +struct ConvolutionTensor { - ElementwiseOperation input_op{ElementwiseOperation::PASS_THROUGH}; - ElementwiseOperation weight_op{ElementwiseOperation::PASS_THROUGH}; - ElementwiseOperation output_op{ElementwiseOperation::PASS_THROUGH}; + ConvolutionTensorType type; + TensorConfig config; + Op operation; }; -template +template struct ConvSignature { int spatial_dim; ConvDirection direction; - GroupConvLayout layout; DataType data_type; - ElementwiseOperations elementwise_operation; + DataType accumulation_data_type; + InputTensor input; + WeightTensor weight; + OutputTensor output; }; } // namespace ck_tile::builder::test From 05a4067b7e8aadea95fb298e90edb938b4edafd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Fri, 28 Nov 2025 12:40:50 +0000 Subject: [PATCH 17/41] Builder bug fixes after refactoring. --- .../ck_tile/builder/conv_data_type_utils.hpp | 99 +++---------------- .../builder/conv_elementwise_op_utils.hpp | 8 +- .../include/ck_tile/builder/conv_factory.hpp | 10 +- .../ck_tile/builder/conv_layout_utils.hpp | 6 +- .../builder/conv_signature_concepts.hpp | 1 - .../builder/include/ck_tile/builder/types.hpp | 35 +++---- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 45 +++++++-- .../test/impl/conv_signature_types.hpp | 14 ++- 8 files changed, 92 insertions(+), 126 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp index 42bbe53ad2..b0f41a3865 100644 --- a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp @@ -8,82 +8,7 @@ namespace ck_tile::builder::factory_internal { - -// // Type mappings from builder convolution data type to CK tensor types. -// template -// struct ConvTensorTypes -// { -// // This will trigger if a specialization for the given DataType is not found. -// // We should always catch this in an earlier validation check. -// static_assert(sizeof(UnsupportedEnumValue) == 0, -// "Internal error. Unsupported data type for convolution factory."); -// }; - -// template <> -// struct ConvTensorTypes -// { -// using ADataType = ck::half_t; -// using AComputeType = ck::half_t; -// using BDataType = ck::half_t; -// using BComputeType = ck::half_t; -// using CShuffleDataType = ck::half_t; -// using DsDataType = ck::Tuple<>; -// using AccDataType = float; -// using EDataType = ck::half_t; -// }; - -// template <> -// struct ConvTensorTypes -// { -// using ADataType = ck::bhalf_t; -// using AComputeType = ck::bhalf_t; -// using BDataType = ck::bhalf_t; -// using BComputeType = ck::bhalf_t; -// using CShuffleDataType = ck::bhalf_t; -// using DsDataType = ck::Tuple<>; -// using AccDataType = float; -// using EDataType = ck::bhalf_t; -// }; - -// template <> -// struct ConvTensorTypes -// { -// using ADataType = float; -// using AComputeType = float; -// using BDataType = float; -// using BComputeType = float; -// using CShuffleDataType = float; -// using DsDataType = ck::Tuple<>; -// using AccDataType = float; -// using EDataType = float; -// }; - -// template <> -// struct ConvTensorTypes -// { -// using ADataType = int8_t; -// using AComputeType = int8_t; -// using BDataType = int8_t; -// using BComputeType = int8_t; -// using CShuffleDataType = int8_t; -// using DsDataType = ck::Tuple<>; -// using AccDataType = int32_t; -// using EDataType = int8_t; -// }; - -// template <> -// struct ConvTensorTypes -// { -// using ADataType = ck::f8_t; -// using AComputeType = ck::f8_t; -// using BDataType = ck::f8_t; -// using BComputeType = ck::f8_t; -// using CShuffleDataType = ck::f8_t; -// using DsDataType = ck::Tuple<>; -// using AccDataType = float; -// using EDataType = ck::f8_t; -// }; - + struct CK_half { using type = ck::half_t; @@ -167,8 +92,10 @@ consteval auto GetTensorDataAndComputeTypes() { return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); } - - return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); + else + { + return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); + } } template @@ -179,7 +106,10 @@ consteval auto GetTensorAccumulationType() { return ConvertDataTypeToCK(); } - return ConvertDataTypeToCK(); + else + { + return ConvertDataTypeToCK(); + } } template @@ -190,7 +120,10 @@ consteval auto GetAuxiliaryTensorDataTypeValue() { return ConvertDataTypeToCK(); } - return ConvertDataTypeToCK(); + else + { + return ConvertDataTypeToCK(); + } } template @@ -225,11 +158,11 @@ template struct FwdConvTensorDataTypes { static constexpr auto input_types = - GetTensorDataAndComputeTypes(); + GetTensorDataAndComputeTypes(); static constexpr auto weight_types = - GetTensorDataAndComputeTypes(); + GetTensorDataAndComputeTypes(); static constexpr auto output_types = - GetTensorDataAndComputeTypes(); + GetTensorDataAndComputeTypes(); using ADataType = typename decltype(input_types.first)::type; using AComputeType = typename decltype(input_types.second)::type; diff --git a/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp index ab28c8f4c5..381bc208a1 100644 --- a/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp @@ -70,9 +70,9 @@ consteval auto GetElementwiseOp() template struct ElementwiseOps { - static const auto input_op = GetElementwiseOp(); - static const auto weight_op = GetElementwiseOp(); - static const auto output_op = GetElementwiseOp(); + static constexpr auto input_op = GetElementwiseOp(); + static constexpr auto weight_op = GetElementwiseOp(); + static constexpr auto output_op = GetElementwiseOp(); using AElementwiseOp = typename decltype(input_op)::Op; using BElementwiseOp = typename decltype(weight_op)::Op; using CDEElementwiseOp = typename decltype(output_op)::Op; @@ -81,7 +81,7 @@ struct ElementwiseOps template constexpr auto GetElementwiseOps() { - return ElementwiseOps{}; + return ElementwiseOps{}; } } diff --git a/experimental/builder/include/ck_tile/builder/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/conv_factory.hpp index aa3078a0a7..ec138ae182 100644 --- a/experimental/builder/include/ck_tile/builder/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_factory.hpp @@ -388,7 +388,7 @@ struct ConvFactory typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename Types::DsDataTypes, + typename Types::DsDataType, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -489,7 +489,7 @@ struct ConvFactory typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename Types::DsDataTypes, + typename Types::DsDataType, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -592,7 +592,7 @@ struct ConvFactory typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename Types::DsDataTypes, + typename Types::DsDataType, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, @@ -716,7 +716,7 @@ struct ConvFactory SPATIAL_DIM, typename Types::ADataType, typename Types::BDataType, - typename Types::DsDataTypes, + typename Types::DsDataType, typename Types::EDataType, typename Types::AccDataType, typename Layouts::ALayout, @@ -819,7 +819,7 @@ struct ConvFactory typename Types::BDataType, typename Types::AccDataType, typename Types::CShuffleDataType, - typename Types::DsDataTypes, + typename Types::DsDataType, typename Types::EDataType, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, diff --git a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp index 78f692383f..e8e03f949a 100644 --- a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp @@ -16,7 +16,7 @@ struct EmptyAuxiliaryTensorLayout template consteval bool IsGenericBiasLayoutActive() { - return requires { typename std::integral_constant; }; + return requires { typename std::integral_constant; }; } template @@ -25,7 +25,7 @@ consteval auto GetAuxiliaryTensorLayoutValue() constexpr auto Layout = Config.layout; if constexpr (IsGenericBiasLayoutActive()) { - constexpr auto val = Layout._bias_layout; + constexpr auto val = Layout._aux_tensor_layout._bias_layout; if constexpr (val == BiasLayout::G_K_strided) return ck::tensor_layout::convolution::G_K{}; else if constexpr (val == BiasLayout::GC) @@ -37,7 +37,7 @@ consteval auto GetAuxiliaryTensorLayoutValue() } else { - constexpr auto out_layout = Layout._conv_output_layout; + constexpr auto out_layout = Layout._output_layout; if constexpr (SPATIAL_DIM == 1) { diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index b94d81f14f..2df12a0b87 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -88,7 +88,6 @@ concept ConvolutionDirectionWellDefinedIfProvided = requires(T t) { // Concept for the convolution tensor template concept ConvTensorDescriptor = requires(T t) { - { t.type } -> std::convertible_to; { t.config } -> TensorConfigDescriptor; requires ElementwiseOpWellDefinedIfProvided; }; diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index ab75eb3ff2..ab9bf0ae90 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -20,15 +20,6 @@ enum class DataType U8 }; -// TODO: This might be redundant. -enum class ConvolutionTensorType -{ - Input, - Weight, - Output, - Bias -}; - enum class BiasLayout { GC, @@ -167,18 +158,18 @@ struct ConvOutputLayout constexpr ConvOutputLayout(ConvOutputLayout3D layout) : _3d(layout) {} }; -struct ConvBiasLayout +struct ConvAuxiliaryTensorLayout { union { BiasLayout _bias_layout; ConvOutputLayout _conv_output_layout; }; - constexpr ConvBiasLayout(BiasLayout layout) : _bias_layout(layout) {} - constexpr ConvBiasLayout(ConvOutputLayout layout) : _conv_output_layout(layout) {} - constexpr ConvBiasLayout(ConvOutputLayout1D layout) : _conv_output_layout(layout) {} - constexpr ConvBiasLayout(ConvOutputLayout2D layout) : _conv_output_layout(layout) {} - constexpr ConvBiasLayout(ConvOutputLayout3D layout) : _conv_output_layout(layout) {} + constexpr ConvAuxiliaryTensorLayout(BiasLayout layout) : _bias_layout(layout) {} + constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout layout) : _conv_output_layout(layout) {} + constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout1D layout) : _conv_output_layout(layout) {} + constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout2D layout) : _conv_output_layout(layout) {} + constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout3D layout) : _conv_output_layout(layout) {} }; struct ConvLayout @@ -187,15 +178,25 @@ struct ConvLayout ConvInputLayout _input_layout; ConvWeightLayout _weight_layout; ConvOutputLayout _output_layout; - ConvBiasLayout _bias_layout; + ConvAuxiliaryTensorLayout _aux_tensor_layout; }; constexpr ConvLayout(ConvInputLayout layout) : _input_layout(layout) {} + constexpr ConvLayout(ConvInputLayout1D layout) : _input_layout(layout) {} + constexpr ConvLayout(ConvInputLayout2D layout) : _input_layout(layout) {} + constexpr ConvLayout(ConvInputLayout3D layout) : _input_layout(layout) {} constexpr ConvLayout(ConvWeightLayout layout) : _weight_layout(layout) {} + constexpr ConvLayout(ConvWeightLayout1D layout) : _weight_layout(layout) {} + constexpr ConvLayout(ConvWeightLayout2D layout) : _weight_layout(layout) {} + constexpr ConvLayout(ConvWeightLayout3D layout) : _weight_layout(layout) {} constexpr ConvLayout(ConvOutputLayout layout) : _output_layout(layout) {} - constexpr ConvLayout(ConvBiasLayout layout) : _bias_layout(layout) {} + constexpr ConvLayout(ConvOutputLayout1D layout) : _output_layout(layout) {} + constexpr ConvLayout(ConvOutputLayout2D layout) : _output_layout(layout) {} + constexpr ConvLayout(ConvOutputLayout3D layout) : _output_layout(layout) {} + constexpr ConvLayout(BiasLayout layout) : _aux_tensor_layout(layout) {} }; +// TODO: Move to conv_signature_types.hpp struct TensorConfig { ConvLayout layout; diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 8d5f64d853..8d91114603 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -14,20 +14,45 @@ TEST(FwdConvInstances, { constexpr auto G_K = BiasLayout::G_K_strided; constexpr auto NHWGK = ConvOutputLayout2D::NHWGK; - constexpr auto FwdConvLayout = ConvLayout - { - .input_layout = ConvInputLayout2D::NHWGC, - .weight_layout = ConvWeightLayout2D::GKYXC, - .output_layout = ConvOutputLayout2D::NHWGK - } - .with_bias_layout(); + + // Input + constexpr auto inputConfig = TensorConfig{ + .layout = ConvInputLayout2D::NHWGC + }; + constexpr auto inputTensor = ConvolutionTensor{ + .config = inputConfig + }; + + // Weight + constexpr auto weight = TensorConfig{ + .layout = ConvWeightLayout2D::GKYXC + }; + constexpr auto weightTensor = ConvolutionTensor{ + .config = weight + }; + + // Output with elementwise ops + constexpr auto output = TensorConfig{ + .layout = ConvOutputLayout2D::NHWGK + }; + constexpr auto output_op = TensorOperation<> + { + .elementwise_operation = ElementwiseOperation::SCALEADD_SCALEADD_RELU + } + .with_auxiliary_operand_configs(); + + constexpr auto outputTensor = ConvolutionTensor{ + .config = output, + .operation = output_op + }; constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, .direction = ConvDirection::FORWARD, - .layout = FwdConvLayout, .data_type = DataType::BF16, - .elementwise_operation = - { .output_op = ElementwiseOperation::SCALEADD_SCALEADD_RELU} + .accumulation_data_type = DataType::FP32, + .input = inputTensor, + .weight = weightTensor, + .output = outputTensor }; constexpr auto FwdConvAlgorithm = diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index bde0a639ac..708f54aee9 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -15,14 +15,22 @@ struct TensorOperation { ElementwiseOperation elementwise_operation{ElementwiseOperation::PASS_THROUGH}; std::array auxiliary_operand_configs{Configs...}; + + // Add builder to add auxiliary tensor configs + template + constexpr auto with_auxiliary_operand_configs() const + { + return TensorOperation{ + .elementwise_operation = this->elementwise_operation + }; + } }; -template +template > struct ConvolutionTensor { - ConvolutionTensorType type; TensorConfig config; - Op operation; + Op operation{}; }; template From c25eb6569b7adf440ab727068679a5c9fbca4725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Fri, 28 Nov 2025 13:15:18 +0000 Subject: [PATCH 18/41] Working baseline. --- .../ck_tile/builder/conv_data_type_utils.hpp | 4 ++-- .../builder/conv_elementwise_op_utils.hpp | 20 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp index b0f41a3865..b35c8c94d9 100644 --- a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp @@ -8,7 +8,7 @@ namespace ck_tile::builder::factory_internal { - + struct CK_half { using type = ck::half_t; @@ -129,7 +129,7 @@ consteval auto GetAuxiliaryTensorDataTypeValue() template consteval auto GetAuxiliaryTensorDataTypeTuple(std::index_sequence) { - return ck::Tuple())...>{}; + return ck::Tuple())::type...>{}; } template diff --git a/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp index 381bc208a1..95c777c6ea 100644 --- a/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp @@ -39,32 +39,36 @@ consteval auto GetElementwiseOp() { if constexpr (HasTensorOp) { - if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::SCALE) + constexpr auto op = TensorDesc.operation.elementwise_operation; + if constexpr (op == ElementwiseOperation::SCALE) { return CK_ScaleOp{}; } - else if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::SCALEADD_SCALEADD_RELU) + else if constexpr (op == ElementwiseOperation::SCALEADD_SCALEADD_RELU) { return CK_ScaleAddScaleAddReluOp{}; } - else if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::BIAS_BNORM_CLAMP) + else if constexpr (op == ElementwiseOperation::BIAS_BNORM_CLAMP) { return CK_BiasNormalizeInInferClampOp{}; } - else if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::CLAMP) + else if constexpr (op == ElementwiseOperation::CLAMP) { return CK_ClampOp{}; } - else if constexpr (TensorDesc.operation.elementwise_operation == ElementwiseOperation::PASS_THROUGH) + else if constexpr (op == ElementwiseOperation::PASS_THROUGH) { return CK_PassThroughOp{}; } else { - static_assert(false, "Unsupported elementwise operation!"); + static_assert(sizeof(UnsupportedEnumValue) == 0, "Unsupported elementwise operation!"); } } - return CK_PassThroughOp{}; + else + { + return CK_PassThroughOp{}; + } } template @@ -81,7 +85,7 @@ struct ElementwiseOps template constexpr auto GetElementwiseOps() { - return ElementwiseOps{}; + return ElementwiseOps{}; } } From a4252c48cef00ed36bdddbb78e27998134579870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Fri, 28 Nov 2025 13:36:46 +0000 Subject: [PATCH 19/41] Make signature definition look nice in the test code. --- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 57 +++++-------------- 1 file changed, 15 insertions(+), 42 deletions(-) diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 8d91114603..5ea91f0d48 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -12,48 +12,21 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_BF16_scale_add_relu) { - constexpr auto G_K = BiasLayout::G_K_strided; - constexpr auto NHWGK = ConvOutputLayout2D::NHWGK; - - // Input - constexpr auto inputConfig = TensorConfig{ - .layout = ConvInputLayout2D::NHWGC - }; - constexpr auto inputTensor = ConvolutionTensor{ - .config = inputConfig - }; - - // Weight - constexpr auto weight = TensorConfig{ - .layout = ConvWeightLayout2D::GKYXC - }; - constexpr auto weightTensor = ConvolutionTensor{ - .config = weight - }; - - // Output with elementwise ops - constexpr auto output = TensorConfig{ - .layout = ConvOutputLayout2D::NHWGK - }; - constexpr auto output_op = TensorOperation<> - { - .elementwise_operation = ElementwiseOperation::SCALEADD_SCALEADD_RELU - } - .with_auxiliary_operand_configs(); - - constexpr auto outputTensor = ConvolutionTensor{ - .config = output, - .operation = output_op - }; - - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::BF16, - .accumulation_data_type = DataType::FP32, - .input = inputTensor, - .weight = weightTensor, - .output = outputTensor - }; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NHWGC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .output = ConvolutionTensor + { + .config = { .layout = ConvOutputLayout2D::NHWGK }, + .operation = TensorOperation<> { .elementwise_operation = ElementwiseOperation::SCALEADD_SCALEADD_RELU } + .with_auxiliary_operand_configs() + } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} From 7ed339f36a74a09da3ab80e9563b9145f8b2020e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Fri, 28 Nov 2025 13:57:11 +0000 Subject: [PATCH 20/41] Move TensorConfig into test implementations. --- .../include/ck_tile/builder/conv_data_type_utils.hpp | 2 +- .../ck_tile/builder/conv_signature_concepts.hpp | 11 ++++++++++- .../builder/include/ck_tile/builder/types.hpp | 9 --------- .../conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 2 +- .../builder/test/impl/conv_signature_types.hpp | 8 ++++++++ 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp index b35c8c94d9..6bc0e7ea1e 100644 --- a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp @@ -74,7 +74,7 @@ consteval auto ConvertDataTypeToCK() } } -template +template consteval auto GetTensorDataAndComputeTypes() { constexpr auto data_type = Config.data_type; diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index 2df12a0b87..b0f13ff58c 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -45,9 +45,18 @@ concept HasAuxiliaryOperandConfigs = requires(T t) { { t.auxiliary_operand_configs }; }; +namespace detail { + template + struct IsArrayOfTensorConfigDescriptors : std::false_type {}; + + template + requires TensorConfigDescriptor + struct IsArrayOfTensorConfigDescriptors> : std::true_type {}; +} + template concept ConvertibleToArrayOfTensorConfigs = - std::is_same_v, std::array>>>; + detail::IsArrayOfTensorConfigDescriptors>::value; template concept AuxiliaryOperandConfigsWellDefinedIfProvided = requires(T t) { diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index ab9bf0ae90..07fe306a4e 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -196,15 +196,6 @@ struct ConvLayout constexpr ConvLayout(BiasLayout layout) : _aux_tensor_layout(layout) {} }; -// TODO: Move to conv_signature_types.hpp -struct TensorConfig -{ - ConvLayout layout; - // Optional data types, override the type defined in the signature if provided. - DataType data_type{DataType::UNDEFINDED}; - DataType compute_type{DataType::UNDEFINDED}; -}; - // Direction of the convolution operation. enum class ConvDirection { diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 5ea91f0d48..9ce34681e4 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -19,7 +19,7 @@ TEST(FwdConvInstances, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NHWGC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC, .data_type = DataType::BF16 } }, // For demo purposes .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::NHWGK }, diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index 708f54aee9..97b927b722 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -10,6 +10,14 @@ namespace ck_tile::builder::test { using namespace ck_tile::builder; +struct TensorConfig +{ + ConvLayout layout; + // Optional data types, override the type defined in the signature if provided. + DataType data_type{DataType::UNDEFINDED}; + DataType compute_type{DataType::UNDEFINDED}; +}; + template struct TensorOperation { From beac0e82fac4683c78504a1bcc1ab60274f43022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Fri, 28 Nov 2025 14:49:53 +0000 Subject: [PATCH 21/41] Fix all fwd conv builder tests. --- .../ck_tile/builder/conv_data_type_utils.hpp | 4 ++ .../builder/include/ck_tile/builder/types.hpp | 2 + experimental/builder/test/CMakeLists.txt | 1 + .../test/conv/test_ckb_conv_fwd_1d_bf16.cpp | 27 ++++++--- .../test/conv/test_ckb_conv_fwd_1d_fp16.cpp | 32 ++++++----- .../test/conv/test_ckb_conv_fwd_1d_i8.cpp | 23 +++++--- .../test/conv/test_ckb_conv_fwd_2d_bf16.cpp | 53 ++++++++++++------ ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 15 +++-- .../conv/test_ckb_conv_fwd_2d_dl_fp16.cpp | 50 ++++++++++++----- .../test/conv/test_ckb_conv_fwd_2d_fp16.cpp | 32 +++++++---- .../test/conv/test_ckb_conv_fwd_2d_fp32.cpp | 32 +++++++---- .../test/conv/test_ckb_conv_fwd_2d_fp8.cpp | 25 ++++++--- ...test_ckb_conv_fwd_2d_large_tensor_fp16.cpp | 56 +++++++++++++------ .../test/conv/test_ckb_conv_fwd_3d_bf16.cpp | 32 +++++++---- .../test/conv/test_ckb_conv_fwd_3d_fp16.cpp | 32 +++++++---- .../test/conv/test_ckb_conv_fwd_3d_fp32.cpp | 32 +++++++---- 16 files changed, 301 insertions(+), 147 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp index 6bc0e7ea1e..3c01833154 100644 --- a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp @@ -59,6 +59,10 @@ consteval auto ConvertDataTypeToCK() { return CK_float{}; } + else if constexpr (dt == DataType::INT32) + { + return CK_int32{}; + } else if constexpr (dt == DataType::I8) { return CK_int8{}; diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 07fe306a4e..51231e7c54 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -16,6 +16,7 @@ enum class DataType FP16, BF16, FP8, + INT32, I8, U8 }; @@ -316,6 +317,7 @@ inline std::ostream& operator<<(std::ostream& os, DataType dt) case FP32: return os << "FP32"; case BF16: return os << "BF16"; case FP8: return os << "FP8"; + case INT32: return os << "INT32"; case I8: return os << "I8"; case U8: return os << "U8"; case UNDEFINDED: return os << "UNDEFINDED"; diff --git a/experimental/builder/test/CMakeLists.txt b/experimental/builder/test/CMakeLists.txt index 6ea06e4575..898fffd8dc 100644 --- a/experimental/builder/test/CMakeLists.txt +++ b/experimental/builder/test/CMakeLists.txt @@ -37,6 +37,7 @@ add_ck_builder_test(test_ckb_get_instance_string # Testing the fwd convolution builder requires kernel compilation. # To enable parallel compilation, the individual tests are split into separate files. add_ck_builder_test(test_ckb_build_fwd_instances + conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp conv/test_ckb_conv_fwd_1d_fp16.cpp conv/test_ckb_conv_fwd_1d_bf16.cpp conv/test_ckb_conv_fwd_1d_i8.cpp diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp index 1cace0cf9a..0bfc03058f 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp @@ -13,11 +13,19 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_1D_BF16_ChannelsFirst_scale) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout1D::NGCW_GKXC_NGKW, - .data_type = DataType::BF16, - .elementwise_operation = ElementwiseOperation::SCALE}; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout1D::NGCW } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout1D::GKXC,} }, + .output = ConvolutionTensor + { + .config = { .layout = ConvOutputLayout1D::NGKW }, + .operation = TensorOperation<> { .elementwise_operation = ElementwiseOperation::SCALE } + } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -30,10 +38,13 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", + "256,256,256,32", + "NGCW,GKXC,EmptyTuple,NGKW", + "PassThrough,PassThrough,Scale", "Filter1x1Stride1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v2"}); + "MNKPadding", + "Intrawave", + "v2"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp index e3bacdb204..46e771212d 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp @@ -10,22 +10,19 @@ using namespace ck_tile::builder::test_utils; // 1D FP16 (channels-last) with DEFAULT specialization TEST(FwdConvInstances, - Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_1D_FP16_ChannelsFirst_scale) + Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_1D_FP16_ChannelsFirst) { - constexpr ConvLayout<> FwdConvLayout - { - .input_layout = ConvInputLayout1D::NWGC, - .weight_layout = ConvWeightLayout1D::GKXC, - .output_layout = ConvOutputLayout1D::NWGK + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout1D::NWGC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout1D::GKXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout1D::NWGK } } }; - constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .layout = FwdConvLayout, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; - constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} .with_thread_block(FwdThreadBlock_64_64x32x32) @@ -36,7 +33,14 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test( - {"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", "64, 64, 32, 32", "Default"}); + { + "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "NWGC,GKXC,EmptyTuple,NWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding", + "64,64,32,32", + "Default" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp index f6b18747b7..33a734d814 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp @@ -14,12 +14,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle_Instance_1D_FP32_ChannelsFirst_scale) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout1D::GNWC_GKXC_GNWK, - .data_type = DataType::I8, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::I8, + .accumulation_data_type = DataType::INT32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout1D::GNWC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout1D::GKXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout1D::GNWK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle{} @@ -31,7 +35,12 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test( - {"DeviceGroupedConvFwdMultipleD_Wmma_CShuffle", "128, 64, 64, 64", "Default"}); + { + "DeviceGroupedConvFwdMultipleD_Wmma_CShuffle", + "128,64,64,64", + "GNWC,GKXC,EmptyTuple,GNWK", + "PassThrough,PassThrough,PassThrough", + "Default"}); } #endif diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp index e0dc3225fa..796849174f 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp @@ -12,12 +12,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_BF16_ChannelsLast) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::NHWGC_GKYXC_NHWGK, - .data_type = DataType::BF16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NHWGC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::NHWGK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -29,22 +33,29 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", + "256,256,256,32", "Default", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v1"}); + "NHWGC,GKYXC,EmptyTuple,NHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding", + "Intrawave", + "v1"}); } // 2D BF16 NHWGC (channels-last) with Pipeline V5 and FILTER_3x3 TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_BF16_NHWGC_Filter3x3) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::NHWGC_GKYXC_NHWGK, - .data_type = DataType::BF16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NHWGC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::NHWGK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -55,9 +66,15 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v5_intrawave); using Builder = ConvBuilder; - run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "Filter3x3", - "BlkGemmPipelineVersion: v5"}); + run_test( + { + "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "Filter3x3", + "NHWGC,GKYXC,EmptyTuple,NHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding", + "v5" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 9ce34681e4..0a424ac1ac 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -37,12 +37,15 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT); using Builder = ConvBuilder; - run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", - "NHWGC,GKYXC,Tuple(NHWGK,G_K),NHWGK", // Check layouts - "PassThrough,PassThrough,ScaleAddScaleAddRelu", // Check elementwise ops - "64,64,32,32", - "MNKPadding", - "Default"}); + run_test( + { + "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "NHWGC,GKYXC,Tuple(NHWGK,G_K),NHWGK", + "PassThrough,PassThrough,ScaleAddScaleAddRelu", + "64,64,32,32", + "MNKPadding", + "Default" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp index 4c4d128717..dd4299c28f 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp @@ -10,12 +10,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} @@ -27,18 +31,29 @@ TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Ins using Builder = ConvBuilder; run_test( - {"DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", "256, 128, 128, 16", "Default"}); + { + "DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", + "256,128,128,16", + "Default", + "MNKPadding", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough" + }); } TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Instance_2D_FP16_FILTER_1X1_PAD0) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} @@ -51,7 +66,14 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test( - {"DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", "256, 128, 128, 16", "Filter1x1Pad0"}); + { + "DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", + "256,128,128,16", + "Filter1x1Pad0", + "MNKPadding", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp index 36b44ffb41..75886cfe92 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp @@ -11,12 +11,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -28,11 +32,17 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v3_intrawave); using Builder = ConvBuilder; - run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", - "Filter1x1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v3"}); + run_test( + { + "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,256,256,32", + "Filter1x1Pad0", + "Intrawave", + "v3", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp index b2943d91b9..217268d16a 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp @@ -11,12 +11,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_FP32_NGCHW_GKCYX) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::NGCHW_GKCYX_NGKHW, - .data_type = DataType::FP32, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NGCHW } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKCYX } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::NGKHW } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -28,11 +32,17 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v4_intrawave); using Builder = ConvBuilder; - run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 128, 128, 32", - "Filter1x1Stride1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v4"}); + run_test( + { + "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,128,128,32", + "Filter1x1Stride1Pad0", + "Intrawave", + "v4", + "NGCHW,GKCYX,EmptyTuple,NGKHW", + "PassThrough,PassThrough,PassThrough", + "MNKPadding" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp index d24df998fd..b460f0f8f1 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp @@ -12,12 +12,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_FP8_ChannelsLast) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::NHWGC_GKYXC_NHWGK, - .data_type = DataType::FP8, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP8, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NHWGC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::NHWGK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} @@ -29,7 +33,14 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test( - {"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", "256, 256, 128, 32", "Default"}); + { + "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "256,256,128,32", + "Default", + "NHWGC,GKYXC,EmptyTuple,NHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp index be0ea3d0a5..b57594fa3e 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp @@ -11,12 +11,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ @@ -29,21 +33,31 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT)}; using Builder = ConvBuilder; - run_test({"DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", - "256, 256, 128, 32", - "Default"}); + run_test( + { + "DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", + "256,256,128,32", + "Default", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding" + }); } TEST( FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor_Instance_2D_FP16_GNHWC_Filter1x1Pad0) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ @@ -56,9 +70,15 @@ TEST( .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT)}; using Builder = ConvBuilder; - run_test({"DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", - "128, 128, 128, 32", - "Filter1x1Pad0"}); + run_test( + { + "DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", + "128,128,128,32", + "Filter1x1Pad0", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp index 0db89669f7..224762de05 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp @@ -12,12 +12,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_3D_BF16_GNDHWC) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 3, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout3D::GNDHWC_GKZYXC_GNDHWK, - .data_type = DataType::BF16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 3, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout3D::GNDHWC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout3D::GKZYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout3D::GNDHWK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -28,11 +32,17 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v3_intrawave); using Builder = ConvBuilder; - run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", - "Default", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v3"}); + run_test( + { + "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,256,256,32", + "Default", + "Intrawave", + "v3", + "GNDHWC,GKZYXC,EmptyTuple,GNDHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp index 80e12f9572..d1f0a26cc7 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp @@ -12,12 +12,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_3D_FP16_NDHWGC_ChannelsLast) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 3, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout3D::NDHWGC_GKZYXC_NDHWGK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 3, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout3D::NDHWGC } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout3D::GKZYXC } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout3D::NDHWGK } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -29,11 +33,17 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v4_intrawave); using Builder = ConvBuilder; - run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 128, 128, 32", - "Filter1x1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v4"}); + run_test( + { + "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,128,128,32", + "Filter1x1Pad0", + "Intrawave", + "v4", + "NDHWGC,GKZYXC,EmptyTuple,NDHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding" + }); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp index bfddd6efcb..aa8560f2f4 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp @@ -12,12 +12,16 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_3D_FP32_ChannelsFirst) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 3, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout3D::NGCDHW_GKCZYX_NGKDHW, - .data_type = DataType::FP32, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature + { + .spatial_dim = 3, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor { .config = { .layout = ConvInputLayout3D::NGCDHW } }, + .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout3D::GKCZYX } }, + .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout3D::NGKDHW } } + }; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -29,11 +33,17 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v1_intrawave); using Builder = ConvBuilder; - run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", - "Filter1x1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v1"}); + run_test( + { + "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,256,256,32", + "Filter1x1Pad0", + "Intrawave", + "v1", + "NGCDHW,GKCZYX,EmptyTuple,NGKDHW", + "PassThrough,PassThrough,PassThrough", + "MNKPadding" + }); } } // namespace From 05c46bad65726fb4e9ba03c8441ae43e3bc8da6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Mon, 1 Dec 2025 10:19:41 +0000 Subject: [PATCH 22/41] Fix conv traits and descriptors tests. --- .../builder/conv_signature_concepts.hpp | 4 +- .../builder/reflect/conv_description.hpp | 44 ++++++++++- .../ck_tile/builder/reflect/conv_traits.hpp | 78 +++++++++++++------ .../builder/include/ck_tile/builder/types.hpp | 66 ++++++++++------ .../builder/test/conv/test_conv_traits.cpp | 15 +++- .../builder/test/test_conv_description.cpp | 77 ++++++++++++++---- 6 files changed, 213 insertions(+), 71 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index b0f13ff58c..23ec72e1b8 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -84,7 +84,9 @@ concept HasConvolutionDirection = requires(T t) { // Note: it is not required to provide an ElementwiseOp, but if one is provided, check if well // defined template -concept ElementwiseOpWellDefinedIfProvided = requires { !HasTensorOp || TensorOperatorDescriptor;}; +concept ElementwiseOpWellDefinedIfProvided = !HasTensorOp || requires(T t) { + requires TensorOperatorDescriptor; +}; // Note: it is not required to provide a convolution, but if one is provided, check if well defined template diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp index 08e506b614..eda93ff551 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp @@ -22,8 +22,12 @@ struct ConvSignatureInfo { int spatial_dim; builder::ConvDirection direction; - std::variant - layout; + std::variant + input_layout; + std::variant + weight_layout; + std::variant + output_layout; builder::DataType data_type; builder::ElementwiseOperation input_element_op; builder::ElementwiseOperation weight_element_op; @@ -69,7 +73,9 @@ struct ConvDescription f.writeLine(0, signature.spatial_dim, "D ", signature.direction, " Convolution Kernel"); f.writeLine(1, "Signature"); f.writeLine(2, "Tensor Type: ", signature.data_type); - f.writeLine(2, "Memory Layout: ", signature.layout); + f.writeLine(2, "Input Layout: ", signature.input_layout); + f.writeLine(2, "Weight Layout: ", signature.weight_layout); + f.writeLine(2, "Output Layout: ", signature.output_layout); f.writeLine(2, "Input elementwise operation: ", signature.input_element_op); f.writeLine(2, "Weights elementwise operation: ", signature.weight_element_op); f.writeLast(2, "Output elementwise operation: ", signature.output_element_op); @@ -235,10 +241,40 @@ ConvDescription Describe() { using Traits = ConvTraits; + // TODO: This is a temporary fix. We should refactor also the traits and descriptors to better reflect the conv signature. + auto get_input_layout = []() -> decltype(ConvSignatureInfo::input_layout) { + if constexpr(Traits::spatial_dim == 1) + return Traits::layout[0]._input_layout._1d; + else if constexpr(Traits::spatial_dim == 2) + return Traits::layout[0]._input_layout._2d; + else + return Traits::layout[0]._input_layout._3d; + }; + + auto get_weight_layout = []() -> decltype(ConvSignatureInfo::weight_layout) { + if constexpr(Traits::spatial_dim == 1) + return Traits::layout[1]._weight_layout._1d; + else if constexpr(Traits::spatial_dim == 2) + return Traits::layout[1]._weight_layout._2d; + else + return Traits::layout[1]._weight_layout._3d; + }; + + auto get_output_layout = []() -> decltype(ConvSignatureInfo::output_layout) { + if constexpr(Traits::spatial_dim == 1) + return Traits::layout[2]._output_layout._1d; + else if constexpr(Traits::spatial_dim == 2) + return Traits::layout[2]._output_layout._2d; + else + return Traits::layout[2]._output_layout._3d; + }; + return ConvDescription{ .signature = ConvSignatureInfo{.spatial_dim = Traits::spatial_dim, .direction = Traits::direction, - .layout = Traits::layout, + .input_layout = get_input_layout(), + .weight_layout = get_weight_layout(), + .output_layout = get_output_layout(), .data_type = Traits::data_type, .input_element_op = Traits::input_element_op, .weight_element_op = Traits::weight_element_op, diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp index 181f174c29..2999e59352 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp @@ -314,22 +314,34 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout1D::GNWC_GKXC_GNWK; + return std::array{ + builder::ConvInputLayout1D::GNWC, + builder::ConvWeightLayout1D::GKXC, + builder::ConvOutputLayout1D::GNWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout1D::NWGC_GKXC_NWGK; + return std::array{ + builder::ConvInputLayout1D::NWGC, + builder::ConvWeightLayout1D::GKXC, + builder::ConvOutputLayout1D::NWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout1D::NGCW_GKXC_NGKW; + return std::array{ + builder::ConvInputLayout1D::NGCW, + builder::ConvWeightLayout1D::GKXC, + builder::ConvOutputLayout1D::NGKW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout1D::NGCW_GKCX_NGKW; + return std::array{ + builder::ConvInputLayout1D::NGCW, + builder::ConvWeightLayout1D::GKCX, + builder::ConvOutputLayout1D::NGKW}; } } else if constexpr(InstTraits::kSpatialDim == 2) @@ -337,25 +349,37 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout2D::GNHWC_GKYXC_GNHWK; + return std::array{ + builder::ConvInputLayout2D::GNHWC, + builder::ConvWeightLayout2D::GKYXC, + builder::ConvOutputLayout2D::GNHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout2D::NHWGC_GKYXC_NHWGK; + return std::array{ + builder::ConvInputLayout2D::NHWGC, + builder::ConvWeightLayout2D::GKYXC, + builder::ConvOutputLayout2D::NHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout2D::NGCHW_GKYXC_NGKHW; + return std::array{ + builder::ConvInputLayout2D::NGCHW, + builder::ConvWeightLayout2D::GKYXC, + builder::ConvOutputLayout2D::NGKHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout2D::NGCHW_GKCYX_NGKHW; + return std::array{ + builder::ConvInputLayout2D::NGCHW, + builder::ConvWeightLayout2D::GKCYX, + builder::ConvOutputLayout2D::NGKHW}; } } else if constexpr(InstTraits::kSpatialDim == 3) @@ -363,25 +387,37 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout3D::GNDHWC_GKZYXC_GNDHWK; + return std::array{ + builder::ConvInputLayout3D::GNDHWC, + builder::ConvWeightLayout3D::GKZYXC, + builder::ConvOutputLayout3D::GNDHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout3D::NDHWGC_GKZYXC_NDHWGK; + return std::array{ + builder::ConvInputLayout3D::NDHWGC, + builder::ConvWeightLayout3D::GKZYXC, + builder::ConvOutputLayout3D::NDHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout3D::NGCDHW_GKZYXC_NGKDHW; + return std::array{ + builder::ConvInputLayout3D::NGCDHW, + builder::ConvWeightLayout3D::GKZYXC, + builder::ConvOutputLayout3D::NGKDHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout3D::NGCDHW_GKCZYX_NGKDHW; + return std::array{ + builder::ConvInputLayout3D::NGCDHW, + builder::ConvWeightLayout3D::GKCZYX, + builder::ConvOutputLayout3D::NGKDHW}; } } } @@ -433,22 +469,10 @@ template constexpr builder::ElementwiseOperation elementwise_op() { constexpr std::string_view name = detail::elementwise_op_name(); - if constexpr(detail::case_insensitive_equal(name, "Bias")) - { - return builder::ElementwiseOperation::BIAS; - } - else if constexpr(detail::case_insensitive_equal(name, "BiasClamp")) - { - return builder::ElementwiseOperation::BIAS_CLAMP; - } - else if constexpr(detail::case_insensitive_equal(name, "BiasBnormClamp")) + if constexpr(detail::case_insensitive_equal(name, "BiasBnormClamp")) { return builder::ElementwiseOperation::BIAS_BNORM_CLAMP; } - else if constexpr(detail::case_insensitive_equal(name, "Bilinear")) - { - return builder::ElementwiseOperation::BILINEAR; - } else if constexpr(detail::case_insensitive_equal(name, "Clamp")) { return builder::ElementwiseOperation::CLAMP; @@ -461,6 +485,10 @@ constexpr builder::ElementwiseOperation elementwise_op() { return builder::ElementwiseOperation::PASS_THROUGH; } + else if constexpr(detail::case_insensitive_equal(name, "ScaleAddScaleAddRelu")) + { + return builder::ElementwiseOperation::SCALEADD_SCALEADD_RELU; + } } /// @brief Derives a gemm padding from a kernel instance type. diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 51231e7c54..1970f86dd8 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace ck_tile::builder { @@ -74,6 +76,12 @@ struct ConvInputLayout constexpr ConvInputLayout(ConvInputLayout1D layout) : _1d(layout) {} constexpr ConvInputLayout(ConvInputLayout2D layout) : _2d(layout) {} constexpr ConvInputLayout(ConvInputLayout3D layout) : _3d(layout) {} + + friend constexpr bool operator==(const ConvInputLayout& lhs, const ConvInputLayout& rhs) + { + return std::bit_cast>(lhs) == + std::bit_cast>(rhs); + } }; enum class ConvWeightLayout1D @@ -114,6 +122,12 @@ struct ConvWeightLayout constexpr ConvWeightLayout(ConvWeightLayout1D layout) : _1d(layout) {} constexpr ConvWeightLayout(ConvWeightLayout2D layout) : _2d(layout) {} constexpr ConvWeightLayout(ConvWeightLayout3D layout) : _3d(layout) {} + + friend constexpr bool operator==(const ConvWeightLayout& lhs, const ConvWeightLayout& rhs) + { + return std::bit_cast>(lhs) == + std::bit_cast>(rhs); + } }; enum class ConvOutputLayout1D @@ -157,6 +171,12 @@ struct ConvOutputLayout constexpr ConvOutputLayout(ConvOutputLayout1D layout) : _1d(layout) {} constexpr ConvOutputLayout(ConvOutputLayout2D layout) : _2d(layout) {} constexpr ConvOutputLayout(ConvOutputLayout3D layout) : _3d(layout) {} + + friend constexpr bool operator==(const ConvOutputLayout& lhs, const ConvOutputLayout& rhs) + { + return std::bit_cast>(lhs) == + std::bit_cast>(rhs); + } }; struct ConvAuxiliaryTensorLayout @@ -171,6 +191,13 @@ struct ConvAuxiliaryTensorLayout constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout1D layout) : _conv_output_layout(layout) {} constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout2D layout) : _conv_output_layout(layout) {} constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout3D layout) : _conv_output_layout(layout) {} + + friend constexpr bool operator==(const ConvAuxiliaryTensorLayout& lhs, + const ConvAuxiliaryTensorLayout& rhs) + { + return std::bit_cast>(lhs) == + std::bit_cast>(rhs); + } }; struct ConvLayout @@ -195,6 +222,12 @@ struct ConvLayout constexpr ConvLayout(ConvOutputLayout2D layout) : _output_layout(layout) {} constexpr ConvLayout(ConvOutputLayout3D layout) : _output_layout(layout) {} constexpr ConvLayout(BiasLayout layout) : _aux_tensor_layout(layout) {} + + friend constexpr bool operator==(const ConvLayout& lhs, const ConvLayout& rhs) + { + return std::bit_cast>(lhs) == + std::bit_cast>(rhs); + } }; // Direction of the convolution operation. @@ -588,28 +621,17 @@ inline std::ostream& operator<<(std::ostream& os, ConvOutputLayout3D layout) } } -// inline std::ostream& operator<<(std::ostream& os, ConvInputBiasLayout layout) -// { -// using enum ConvInputBiasLayout; -// switch(layout) -// { -// case GC: return os << "GC"; -// case G_C_strided: return os << "G_C_strided"; -// default: return os << "Unknown"; -// } -// } - -// inline std::ostream& operator<<(std::ostream& os, ConvOutputBiasLayout layout) -// { -// using enum ConvOutputBiasLayout; -// switch(layout) -// { -// case GK: return os << "GK"; -// case G_K_strided: return os << "G_K_strided"; -// default: return os << "Unknown"; -// } -// } - +inline std::ostream& operator<<(std::ostream& os, BiasLayout layout) +{ + using enum BiasLayout; + switch(layout) + { + case GC: return os << "GC"; + case G_C_strided: return os << "G_C_strided"; + case G_K_strided: return os << "G_K_strided"; + default: return os << "Unknown"; + } +} inline std::ostream& operator<<(std::ostream& os, const std::variant); + +struct TensorConfig +{ + ckb::ConvLayout layout; + ckb::DataType data_type{ckb::DataType::UNDEFINDED}; + ckb::DataType compute_type{ckb::DataType::UNDEFINDED}; +}; + +struct ConvTensorSimple { + TensorConfig config; +}; + +struct ConvTensorWithOp { + TensorConfig config; + TensorOp operation{}; +}; + +struct ConvTensorWithInvalidOp { + TensorConfig config; + InvalidTensorOp operation{}; +}; + // Defines the signature of the convolution operation to be tested. // This includes dimensionality, direction, data layout, and data type. struct ConvSignature { - int spatial_dim = 2; - ckb::GroupConvLayout layout = ckb::GroupConvLayout2D::GNHWC_GKYXC_GNHWK; - ckb::DataType data_type = ckb::DataType::FP16; - // ckb::GroupConvDeviceOp device_operation = - // ckb::FwdGroupConvDeviceOperation::DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3; + int spatial_dim = 2; + ckb::DataType data_type = ckb::DataType::FP16; + ckb::DataType accumulation_data_type = ckb::DataType::FP32; + ConvTensorSimple input = {.config = {ckb::ConvInputLayout2D::GNHWC}}; + ConvTensorSimple weight = {.config = {ckb::ConvWeightLayout2D::GKYXC}}; + ConvTensorSimple output = {.config = {ckb::ConvOutputLayout2D::GNHWK}}; }; static_assert(ckb::ConvSignatureDescriptor); @@ -32,24 +65,34 @@ static_assert(ckb::ConvSignatureDescriptor); struct ConvSignatureWithOptionalParams { int spatial_dim = 2; - ckb::ConvDirection direction = ckb::ConvDirection::FORWARD; - ckb::GroupConvLayout layout = ckb::GroupConvLayout2D::GNHWC_GKYXC_GNHWK; ckb::DataType data_type = ckb::DataType::FP16; - ckb::ElementwiseOperation elementwise_operation = ckb::ElementwiseOperation::PASS_THROUGH; + ckb::DataType accumulation_data_type = ckb::DataType::FP32; + ckb::ConvDirection direction = ckb::ConvDirection::FORWARD; + ConvTensorWithOp input = + { + .config = {ckb::ConvInputLayout2D::GNHWC, ckb::DataType::FP16}, + }; + ConvTensorWithOp weight = + { + .config = {ckb::ConvWeightLayout2D::GKYXC, ckb::DataType::FP16} + }; + ConvTensorWithOp output = + { + .config = {ckb::ConvOutputLayout2D::GNHWK, ckb::DataType::FP16}, + .operation = {ckb::ElementwiseOperation::SCALE} + }; }; static_assert(ckb::ConvSignatureDescriptor); struct ConvSignatureWithInvalidOptionalParams { int spatial_dim = 2; - ckb::ConvDirection direction = ckb::ConvDirection::FORWARD; - ckb::GroupConvLayout layout = ckb::GroupConvLayout2D::GNHWC_GKYXC_GNHWK; - ckb::DataType data_type = ckb::DataType::FP16; - int elementwise_operation = 7; // this should fail - // ckb::GroupConvDeviceOp device_operation = - // ckb::FwdGroupConvDeviceOperation::DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3; + ckb::DataType data_type = ckb::DataType::FP16; + ckb::DataType accumulation_data_type = ckb::DataType::FP32; + ConvTensorWithInvalidOp input = {.config = {ckb::ConvInputLayout2D::GNHWC}}; + ConvTensorWithInvalidOp weight = {.config = {ckb::ConvWeightLayout2D::GKYXC}}; + ConvTensorWithInvalidOp output = {.config = {ckb::ConvOutputLayout2D::GNHWK}}; }; - static_assert(!ckb::ConvSignatureDescriptor); struct DefaultAlgorithm @@ -123,7 +166,9 @@ TEST(ConvDescriptionTest, DefaultInstanceHasDetailedDescription) "2D Forward Convolution Kernel\n" "├─ Signature\n" "│ ├─ Tensor Type: FP16\n" - "│ ├─ Memory Layout: GNHWC_GKYXC_GNHWK\n" + "│ ├─ Input Layout: GNHWC\n" + "│ ├─ Weight Layout: GKYXC\n" + "│ ├─ Output Layout: GNHWK\n" "│ ├─ Input elementwise operation: PASS_THROUGH\n" "│ ├─ Weights elementwise operation: PASS_THROUGH\n" "│ └─ Output elementwise operation: PASS_THROUGH\n" From bab26ee910c1652bd9be73da06032d6f0d5c75fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Mon, 1 Dec 2025 11:37:48 +0000 Subject: [PATCH 23/41] More factory assets under a separate directory. --- .../include/ck_tile/builder/conv_builder.hpp | 2 +- .../ck_tile/builder/conv_signature_utils.hpp | 31 ------------------- .../builder/{ => factory}/conv_factory.hpp | 7 ++--- .../helpers}/conv_data_type_utils.hpp | 0 .../helpers}/conv_elementwise_op_utils.hpp | 0 .../helpers}/conv_layout_utils.hpp | 0 6 files changed, 4 insertions(+), 36 deletions(-) delete mode 100644 experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp rename experimental/builder/include/ck_tile/builder/{ => factory}/conv_factory.hpp (99%) rename experimental/builder/include/ck_tile/builder/{ => factory/helpers}/conv_data_type_utils.hpp (100%) rename experimental/builder/include/ck_tile/builder/{ => factory/helpers}/conv_elementwise_op_utils.hpp (100%) rename experimental/builder/include/ck_tile/builder/{ => factory/helpers}/conv_layout_utils.hpp (100%) diff --git a/experimental/builder/include/ck_tile/builder/conv_builder.hpp b/experimental/builder/include/ck_tile/builder/conv_builder.hpp index bf63bc83f6..78f9d9b7c4 100644 --- a/experimental/builder/include/ck_tile/builder/conv_builder.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_builder.hpp @@ -6,7 +6,7 @@ #include #include -#include "ck_tile/builder/conv_factory.hpp" +#include "ck_tile/builder/factory/conv_factory.hpp" #include "ck_tile/builder/versions.hpp" namespace ck_tile::builder { diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp deleted file mode 100644 index bf1840caf4..0000000000 --- a/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include - -#include "ck_tile/builder/types.hpp" - -namespace ck_tile::builder { -/********************************************** - * constexpr helper functions for optional parameters - **********************************************/ - -template -concept ProvidesConvolutionDirection = requires { Sig.direction; }; - -template -constexpr auto get_conv_direction() -{ - if constexpr(ProvidesConvolutionDirection) - { - return Sig.direction; - } - else - { - return ConvDirection::FORWARD; - } -} -} // namespace ck_tile::builder diff --git a/experimental/builder/include/ck_tile/builder/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp similarity index 99% rename from experimental/builder/include/ck_tile/builder/conv_factory.hpp rename to experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp index ec138ae182..a1fdb01336 100644 --- a/experimental/builder/include/ck_tile/builder/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp @@ -57,10 +57,9 @@ #include "ck_tile/builder/types.hpp" #include "ck_tile/builder/versions.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" -#include "ck_tile/builder/conv_layout_utils.hpp" -#include "ck_tile/builder/conv_data_type_utils.hpp" -#include "ck_tile/builder/conv_elementwise_op_utils.hpp" +#include "ck_tile/builder/factory/helpers/conv_layout_utils.hpp" +#include "ck_tile/builder/factory/helpers/conv_data_type_utils.hpp" +#include "ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp" namespace ck_tile::builder::factory_internal { diff --git a/experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp similarity index 100% rename from experimental/builder/include/ck_tile/builder/conv_data_type_utils.hpp rename to experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp diff --git a/experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp similarity index 100% rename from experimental/builder/include/ck_tile/builder/conv_elementwise_op_utils.hpp rename to experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp diff --git a/experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp similarity index 100% rename from experimental/builder/include/ck_tile/builder/conv_layout_utils.hpp rename to experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp From 8a9e22ca4e861ab042caf9589cd597aaaa192274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Mon, 1 Dec 2025 12:30:37 +0000 Subject: [PATCH 24/41] Fix building conv traits. --- .../builder/include/ck_tile/builder/reflect/conv_traits.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp index 3df343ee22..b89346fe58 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include #include From 979a8516d622257e9fbf83ff606f393b1c334277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Mon, 1 Dec 2025 12:40:34 +0000 Subject: [PATCH 25/41] Fix clang-format. --- .../builder/conv_signature_concepts.hpp | 31 +++-- .../ck_tile/builder/factory/conv_factory.hpp | 116 ++++++++++-------- .../factory/helpers/conv_data_type_utils.hpp | 69 ++++++----- .../helpers/conv_elementwise_op_utils.hpp | 31 +++-- .../factory/helpers/conv_layout_utils.hpp | 94 ++++++++------ .../builder/reflect/conv_description.hpp | 11 +- .../ck_tile/builder/reflect/conv_traits.hpp | 84 ++++++------- .../builder/include/ck_tile/builder/types.hpp | 24 ++-- .../test/conv/test_ckb_conv_fwd_1d_bf16.cpp | 22 ++-- .../test/conv/test_ckb_conv_fwd_1d_fp16.cpp | 33 +++-- .../test/conv/test_ckb_conv_fwd_1d_i8.cpp | 30 ++--- .../test/conv/test_ckb_conv_fwd_2d_bf16.cpp | 51 ++++---- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 44 +++---- .../conv/test_ckb_conv_fwd_2d_dl_fp16.cpp | 66 +++++----- .../test/conv/test_ckb_conv_fwd_2d_fp16.cpp | 37 +++--- .../test/conv/test_ckb_conv_fwd_2d_fp32.cpp | 37 +++--- .../test/conv/test_ckb_conv_fwd_2d_fp8.cpp | 33 +++-- ...test_ckb_conv_fwd_2d_large_tensor_fp16.cpp | 66 +++++----- .../test/conv/test_ckb_conv_fwd_3d_bf16.cpp | 37 +++--- .../test/conv/test_ckb_conv_fwd_3d_fp16.cpp | 37 +++--- .../test/conv/test_ckb_conv_fwd_3d_fp32.cpp | 37 +++--- .../builder/test/conv/test_conv_traits.cpp | 24 ++-- .../test/impl/conv_algorithm_types.hpp | 2 +- .../test/impl/conv_signature_types.hpp | 5 +- .../builder/test/test_conv_description.cpp | 56 ++++----- 25 files changed, 520 insertions(+), 557 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index 23ec72e1b8..234cfe685d 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -30,13 +30,15 @@ concept ConvSpatialDim = std::is_integral_v && (N == 1 || N == 2 || // Constrains convolution data types to common floating-point types. template -concept ValidConvDataType = (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || - (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); +concept ValidConvDataType = + (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || + (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); template concept TensorConfigDescriptor = requires(T t) { { t.layout } -> std::convertible_to; - // Only require that data type is defined. It might be set to undefined value, in which case the signature's data type is used. + // Only require that data type is defined. It might be set to undefined value, in which case the + // signature's data type is used. { t.data_type } -> std::convertible_to; }; @@ -46,16 +48,20 @@ concept HasAuxiliaryOperandConfigs = requires(T t) { }; namespace detail { - template - struct IsArrayOfTensorConfigDescriptors : std::false_type {}; +template +struct IsArrayOfTensorConfigDescriptors : std::false_type +{ +}; - template - requires TensorConfigDescriptor - struct IsArrayOfTensorConfigDescriptors> : std::true_type {}; -} +template + requires TensorConfigDescriptor +struct IsArrayOfTensorConfigDescriptors> : std::true_type +{ +}; +} // namespace detail template -concept ConvertibleToArrayOfTensorConfigs = +concept ConvertibleToArrayOfTensorConfigs = detail::IsArrayOfTensorConfigDescriptors>::value; template @@ -84,9 +90,8 @@ concept HasConvolutionDirection = requires(T t) { // Note: it is not required to provide an ElementwiseOp, but if one is provided, check if well // defined template -concept ElementwiseOpWellDefinedIfProvided = !HasTensorOp || requires(T t) { - requires TensorOperatorDescriptor; -}; +concept ElementwiseOpWellDefinedIfProvided = + !HasTensorOp || requires(T t) { requires TensorOperatorDescriptor; }; // Note: it is not required to provide a convolution, but if one is provided, check if well defined template diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp index a1fdb01336..45a5b2853f 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp @@ -63,28 +63,43 @@ namespace ck_tile::builder::factory_internal { -template -consteval auto get_input_layout_value(ConvInputLayout layout) { - if constexpr (SPATIAL_DIM == 1) return layout._1d; - else if constexpr (SPATIAL_DIM == 2) return layout._2d; - else if constexpr (SPATIAL_DIM == 3) return layout._3d; - else static_assert(false, "Unsupported spatial dimension"); +template +consteval auto get_input_layout_value(ConvInputLayout layout) +{ + if constexpr(SPATIAL_DIM == 1) + return layout._1d; + else if constexpr(SPATIAL_DIM == 2) + return layout._2d; + else if constexpr(SPATIAL_DIM == 3) + return layout._3d; + else + static_assert(false, "Unsupported spatial dimension"); } -template -consteval auto get_weight_layout_value(ConvWeightLayout layout) { - if constexpr (SPATIAL_DIM == 1) return layout._1d; - else if constexpr (SPATIAL_DIM == 2) return layout._2d; - else if constexpr (SPATIAL_DIM == 3) return layout._3d; - else static_assert(false, "Unsupported spatial dimension"); +template +consteval auto get_weight_layout_value(ConvWeightLayout layout) +{ + if constexpr(SPATIAL_DIM == 1) + return layout._1d; + else if constexpr(SPATIAL_DIM == 2) + return layout._2d; + else if constexpr(SPATIAL_DIM == 3) + return layout._3d; + else + static_assert(false, "Unsupported spatial dimension"); } -template -consteval auto get_output_layout_value(ConvOutputLayout layout) { - if constexpr (SPATIAL_DIM == 1) return layout._1d; - else if constexpr (SPATIAL_DIM == 2) return layout._2d; - else if constexpr (SPATIAL_DIM == 3) return layout._3d; - else static_assert(false, "Unsupported spatial dimension"); +template +consteval auto get_output_layout_value(ConvOutputLayout layout) +{ + if constexpr(SPATIAL_DIM == 1) + return layout._1d; + else if constexpr(SPATIAL_DIM == 2) + return layout._2d; + else if constexpr(SPATIAL_DIM == 3) + return layout._3d; + else + static_assert(false, "Unsupported spatial dimension"); } // The algorithm specializations for the convolution and GEMM. @@ -333,13 +348,12 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using AuxiliaryLayouts = decltype(factory_internal::GetAuxiliaryTensorLayouts()); - + using Layouts = decltype(factory_internal:: + GetTensorLayout()); + using AuxiliaryLayouts = + decltype(factory_internal:: + GetAuxiliaryTensorLayouts()); + using Types = factory_internal::FwdConvTensorDataTypes; using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); @@ -439,13 +453,12 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using AuxiliaryLayouts = decltype(factory_internal::GetAuxiliaryTensorLayouts()); - + using Layouts = decltype(factory_internal:: + GetTensorLayout()); + using AuxiliaryLayouts = + decltype(factory_internal:: + GetAuxiliaryTensorLayouts()); + using Types = factory_internal::FwdConvTensorDataTypes; using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); @@ -540,13 +553,12 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using AuxiliaryLayouts = decltype(factory_internal::GetAuxiliaryTensorLayouts()); - + using Layouts = decltype(factory_internal:: + GetTensorLayout()); + using AuxiliaryLayouts = + decltype(factory_internal:: + GetAuxiliaryTensorLayouts()); + using Types = factory_internal::FwdConvTensorDataTypes; using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); @@ -640,13 +652,12 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using AuxiliaryLayouts = decltype(factory_internal::GetAuxiliaryTensorLayouts()); - + using Layouts = decltype(factory_internal:: + GetTensorLayout()); + using AuxiliaryLayouts = + decltype(factory_internal:: + GetAuxiliaryTensorLayouts()); + using Types = factory_internal::FwdConvTensorDataTypes; using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); @@ -767,13 +778,12 @@ template { static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal::GetTensorLayout()); - using AuxiliaryLayouts = decltype(factory_internal::GetAuxiliaryTensorLayouts()); - + using Layouts = decltype(factory_internal:: + GetTensorLayout()); + using AuxiliaryLayouts = + decltype(factory_internal:: + GetAuxiliaryTensorLayouts()); + using Types = factory_internal::FwdConvTensorDataTypes; using Ops = decltype(factory_internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp index 3c01833154..4a23894041 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp @@ -6,8 +6,7 @@ #include "ck_tile/builder/conv_signature_concepts.hpp" #include "ck_tile/builder/types.hpp" -namespace ck_tile::builder::factory_internal -{ +namespace ck_tile::builder::factory_internal { struct CK_half { @@ -47,27 +46,27 @@ struct CK_empty_tuple template consteval auto ConvertDataTypeToCK() { - if constexpr (dt == DataType::FP16) + if constexpr(dt == DataType::FP16) { return CK_half{}; } - else if constexpr (dt == DataType::BF16) + else if constexpr(dt == DataType::BF16) { return CK_bhalf{}; } - else if constexpr (dt == DataType::FP32) + else if constexpr(dt == DataType::FP32) { return CK_float{}; } - else if constexpr (dt == DataType::INT32) + else if constexpr(dt == DataType::INT32) { return CK_int32{}; } - else if constexpr (dt == DataType::I8) + else if constexpr(dt == DataType::I8) { return CK_int8{}; } - else if constexpr (dt == DataType::FP8) + else if constexpr(dt == DataType::FP8) { return CK_f8{}; } @@ -81,24 +80,28 @@ consteval auto ConvertDataTypeToCK() template consteval auto GetTensorDataAndComputeTypes() { - constexpr auto data_type = Config.data_type; + constexpr auto data_type = Config.data_type; constexpr auto compute_type = Config.compute_type; - if constexpr (data_type == DataType::UNDEFINDED && compute_type == DataType::UNDEFINDED) + if constexpr(data_type == DataType::UNDEFINDED && compute_type == DataType::UNDEFINDED) { - return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); } - else if constexpr (data_type == DataType::UNDEFINDED) + else if constexpr(data_type == DataType::UNDEFINDED) { - return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); } - else if constexpr (compute_type == DataType::UNDEFINDED) + else if constexpr(compute_type == DataType::UNDEFINDED) { - return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); } - else + else { - return std::make_pair(ConvertDataTypeToCK(), ConvertDataTypeToCK()); + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); } } @@ -106,11 +109,11 @@ template consteval auto GetTensorAccumulationType() { constexpr auto data_type = SignatureAccDataType; - if constexpr (data_type == DataType::UNDEFINDED) + if constexpr(data_type == DataType::UNDEFINDED) { return ConvertDataTypeToCK(); } - else + else { return ConvertDataTypeToCK(); } @@ -120,7 +123,7 @@ template consteval auto GetAuxiliaryTensorDataTypeValue() { constexpr auto data_type = Config.data_type; - if constexpr (data_type == DataType::UNDEFINDED) + if constexpr(data_type == DataType::UNDEFINDED) { return ConvertDataTypeToCK(); } @@ -133,26 +136,31 @@ consteval auto GetAuxiliaryTensorDataTypeValue() template consteval auto GetAuxiliaryTensorDataTypeTuple(std::index_sequence) { - return ck::Tuple())::type...>{}; + return ck::Tuple< + typename decltype(GetAuxiliaryTensorDataTypeValue())::type...>{}; } template struct AuxiliaryTensorDataTypes { static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); - using type = decltype(GetAuxiliaryTensorDataTypeTuple(std::make_index_sequence{})); + using type = + decltype(GetAuxiliaryTensorDataTypeTuple( + std::make_index_sequence{})); }; // TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). template -requires (HasElementwiseOpWithAuxiliaryOperands) + requires(HasElementwiseOpWithAuxiliaryOperands) consteval auto GetAuxiliaryTensorDataTypes() { - return AuxiliaryTensorDataTypes{}; + return AuxiliaryTensorDataTypes{}; } template -requires (!HasElementwiseOpWithAuxiliaryOperands) + requires(!HasElementwiseOpWithAuxiliaryOperands) consteval auto GetAuxiliaryTensorDataTypes() { return CK_empty_tuple{}; @@ -172,15 +180,16 @@ struct FwdConvTensorDataTypes using AComputeType = typename decltype(input_types.second)::type; using BDataType = typename decltype(weight_types.first)::type; using BComputeType = typename decltype(weight_types.second)::type; - using AccDataType = typename decltype( - GetTensorAccumulationType())::type; - using EDataType = typename decltype(output_types.first)::type; + using AccDataType = + typename decltype(GetTensorAccumulationType())::type; + using EDataType = typename decltype(output_types.first)::type; // This is the "compute" type for output. - using CShuffleDataType = typename decltype(output_types.second)::type; + using CShuffleDataType = typename decltype(output_types.second)::type; // Data types for the auxiliary tensors (e.g., bias). using DsDataType = typename decltype(GetAuxiliaryTensorDataTypes())::type; }; -} +} // namespace ck_tile::builder::factory_internal diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp index 95c777c6ea..2c6a0d4ee1 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp @@ -6,8 +6,7 @@ #include "ck_tile/builder/conv_signature_concepts.hpp" #include "ck_tile/builder/types.hpp" -namespace ck_tile::builder::factory_internal -{ +namespace ck_tile::builder::factory_internal { struct CK_PassThroughOp { using Op = ck::tensor_operation::element_wise::PassThrough; @@ -33,36 +32,36 @@ struct CK_BiasNormalizeInInferClampOp using Op = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp; }; - template consteval auto GetElementwiseOp() { - if constexpr (HasTensorOp) + if constexpr(HasTensorOp) { constexpr auto op = TensorDesc.operation.elementwise_operation; - if constexpr (op == ElementwiseOperation::SCALE) + if constexpr(op == ElementwiseOperation::SCALE) { return CK_ScaleOp{}; } - else if constexpr (op == ElementwiseOperation::SCALEADD_SCALEADD_RELU) + else if constexpr(op == ElementwiseOperation::SCALEADD_SCALEADD_RELU) { return CK_ScaleAddScaleAddReluOp{}; } - else if constexpr (op == ElementwiseOperation::BIAS_BNORM_CLAMP) + else if constexpr(op == ElementwiseOperation::BIAS_BNORM_CLAMP) { return CK_BiasNormalizeInInferClampOp{}; } - else if constexpr (op == ElementwiseOperation::CLAMP) + else if constexpr(op == ElementwiseOperation::CLAMP) { return CK_ClampOp{}; } - else if constexpr (op == ElementwiseOperation::PASS_THROUGH) + else if constexpr(op == ElementwiseOperation::PASS_THROUGH) { return CK_PassThroughOp{}; } - else + else { - static_assert(sizeof(UnsupportedEnumValue) == 0, "Unsupported elementwise operation!"); + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported elementwise operation!"); } } else @@ -74,12 +73,12 @@ consteval auto GetElementwiseOp() template struct ElementwiseOps { - static constexpr auto input_op = GetElementwiseOp(); + static constexpr auto input_op = GetElementwiseOp(); static constexpr auto weight_op = GetElementwiseOp(); static constexpr auto output_op = GetElementwiseOp(); - using AElementwiseOp = typename decltype(input_op)::Op; - using BElementwiseOp = typename decltype(weight_op)::Op; - using CDEElementwiseOp = typename decltype(output_op)::Op; + using AElementwiseOp = typename decltype(input_op)::Op; + using BElementwiseOp = typename decltype(weight_op)::Op; + using CDEElementwiseOp = typename decltype(output_op)::Op; }; template @@ -88,4 +87,4 @@ constexpr auto GetElementwiseOps() return ElementwiseOps{}; } -} +} // namespace ck_tile::builder::factory_internal diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp index e8e03f949a..ceaa62bbcb 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp @@ -6,31 +6,33 @@ #include "ck_tile/builder/conv_signature_concepts.hpp" #include "ck_tile/builder/types.hpp" -namespace ck_tile::builder::factory_internal -{ +namespace ck_tile::builder::factory_internal { -struct EmptyAuxiliaryTensorLayout +struct EmptyAuxiliaryTensorLayout { using DsLayout = ck::Tuple<>; }; template -consteval bool IsGenericBiasLayoutActive() { - return requires { typename std::integral_constant; }; +consteval bool IsGenericBiasLayoutActive() +{ + return requires { + typename std::integral_constant; + }; } template consteval auto GetAuxiliaryTensorLayoutValue() { constexpr auto Layout = Config.layout; - if constexpr (IsGenericBiasLayoutActive()) + if constexpr(IsGenericBiasLayoutActive()) { constexpr auto val = Layout._aux_tensor_layout._bias_layout; - if constexpr (val == BiasLayout::G_K_strided) + if constexpr(val == BiasLayout::G_K_strided) return ck::tensor_layout::convolution::G_K{}; - else if constexpr (val == BiasLayout::GC) + else if constexpr(val == BiasLayout::GC) return ck::tensor_layout::convolution::GC{}; - else if constexpr (val == BiasLayout::G_C_strided) + else if constexpr(val == BiasLayout::G_C_strided) return ck::tensor_layout::convolution::G_C{}; else static_assert(false, "Unsupported generic bias layout"); @@ -38,27 +40,36 @@ consteval auto GetAuxiliaryTensorLayoutValue() else { constexpr auto out_layout = Layout._output_layout; - - if constexpr (SPATIAL_DIM == 1) + + if constexpr(SPATIAL_DIM == 1) { constexpr auto val = out_layout._1d; - if constexpr (val == ConvOutputLayout1D::NWGK) return ck::tensor_layout::convolution::NWGK{}; - else if constexpr (val == ConvOutputLayout1D::NGKW) return ck::tensor_layout::convolution::NGKW{}; - else if constexpr (val == ConvOutputLayout1D::GNWK) return ck::tensor_layout::convolution::GNWK{}; + if constexpr(val == ConvOutputLayout1D::NWGK) + return ck::tensor_layout::convolution::NWGK{}; + else if constexpr(val == ConvOutputLayout1D::NGKW) + return ck::tensor_layout::convolution::NGKW{}; + else if constexpr(val == ConvOutputLayout1D::GNWK) + return ck::tensor_layout::convolution::GNWK{}; } - else if constexpr (SPATIAL_DIM == 2) + else if constexpr(SPATIAL_DIM == 2) { constexpr auto val = out_layout._2d; - if constexpr (val == ConvOutputLayout2D::NHWGK) return ck::tensor_layout::convolution::NHWGK{}; - else if constexpr (val == ConvOutputLayout2D::GNHWK) return ck::tensor_layout::convolution::GNHWK{}; - else if constexpr (val == ConvOutputLayout2D::NGKHW) return ck::tensor_layout::convolution::NGKHW{}; + if constexpr(val == ConvOutputLayout2D::NHWGK) + return ck::tensor_layout::convolution::NHWGK{}; + else if constexpr(val == ConvOutputLayout2D::GNHWK) + return ck::tensor_layout::convolution::GNHWK{}; + else if constexpr(val == ConvOutputLayout2D::NGKHW) + return ck::tensor_layout::convolution::NGKHW{}; } - else if constexpr (SPATIAL_DIM == 3) + else if constexpr(SPATIAL_DIM == 3) { constexpr auto val = out_layout._3d; - if constexpr (val == ConvOutputLayout3D::NDHWGK) return ck::tensor_layout::convolution::NDHWGK{}; - else if constexpr (val == ConvOutputLayout3D::GNDHWK) return ck::tensor_layout::convolution::GNDHWK{}; - else if constexpr (val == ConvOutputLayout3D::NGKDHW) return ck::tensor_layout::convolution::NGKDHW{}; + if constexpr(val == ConvOutputLayout3D::NDHWGK) + return ck::tensor_layout::convolution::NDHWGK{}; + else if constexpr(val == ConvOutputLayout3D::GNDHWK) + return ck::tensor_layout::convolution::GNDHWK{}; + else if constexpr(val == ConvOutputLayout3D::NGKDHW) + return ck::tensor_layout::convolution::NGKDHW{}; } } } @@ -67,7 +78,8 @@ template ) { // TODO: Use std::tuple instead of ck::Tuple - return ck::Tuple())...>{}; + return ck::Tuple())...>{}; } template @@ -75,20 +87,23 @@ template (std::make_index_sequence{})); + using DsLayout = + decltype(GetAuxiliaryTensorLayoutTuple( + std::make_index_sequence{})); }; // TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). template -requires (HasElementwiseOpWithAuxiliaryOperands) + requires(HasElementwiseOpWithAuxiliaryOperands) consteval auto GetAuxiliaryTensorLayouts() { - return AuxiliaryTensorLayouts< - Signature.output.operation.auxiliary_operand_configs, SPATIAL_DIM, DIR>{}; + return AuxiliaryTensorLayouts{}; } template -requires (!HasElementwiseOpWithAuxiliaryOperands) + requires(!HasElementwiseOpWithAuxiliaryOperands) consteval auto GetAuxiliaryTensorLayouts() { return EmptyAuxiliaryTensorLayout{}; @@ -193,11 +208,15 @@ consteval auto GetOutputLayout() } } -template - requires(ConvSpatialDim - && ValidConvInputLayoutForSpatialDim - && ValidConvWeightLayoutForSpatialDim - && ValidConvOutputLayoutForSpatialDim) +template + requires(ConvSpatialDim && + ValidConvInputLayoutForSpatialDim && + ValidConvWeightLayoutForSpatialDim && + ValidConvOutputLayoutForSpatialDim) struct ConvTensorLayouts { static_assert(DIR == ConvDirection::FORWARD, "Only Forward convolution is supported."); @@ -209,11 +228,12 @@ struct ConvTensorLayouts template consteval auto GetTensorLayout() { - constexpr auto INPUT_LAYOUT = Signature.input.config.layout._input_layout; - constexpr auto WEIGHT_LAYOUT = Signature.weight.config.layout._weight_layout; + constexpr auto INPUT_LAYOUT = Signature.input.config.layout._input_layout; + constexpr auto WEIGHT_LAYOUT = Signature.weight.config.layout._weight_layout; constexpr auto OUTPUT_LAYOUT = Signature.output.config.layout._output_layout; - return factory_internal::ConvTensorLayouts{}; + return factory_internal:: + ConvTensorLayouts{}; } -} +} // namespace ck_tile::builder::factory_internal diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp index 6d67a9d309..bec4975259 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp @@ -42,9 +42,13 @@ struct ConvSignatureInfo builder::ConvDirection direction; std::variant input_layout; - std::variant + std::variant weight_layout; - std::variant + std::variant output_layout; builder::DataType data_type; builder::ElementwiseOperation input_element_op; @@ -264,7 +268,8 @@ ConvDescription Describe() { using Traits = ConvTraits; - // TODO: This is a temporary fix. We should refactor also the traits and descriptors to better reflect the conv signature. + // TODO: This is a temporary fix. We should refactor also the traits and descriptors to better + // reflect the conv signature. auto get_input_layout = []() -> decltype(ConvSignatureInfo::input_layout) { if constexpr(Traits::spatial_dim == 1) return Traits::layout[0]._input_layout._1d; diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp index b89346fe58..da5a35051e 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp @@ -314,34 +314,30 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout1D::GNWC, - builder::ConvWeightLayout1D::GKXC, - builder::ConvOutputLayout1D::GNWK}; + return std::array{builder::ConvInputLayout1D::GNWC, + builder::ConvWeightLayout1D::GKXC, + builder::ConvOutputLayout1D::GNWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout1D::NWGC, - builder::ConvWeightLayout1D::GKXC, - builder::ConvOutputLayout1D::NWGK}; + return std::array{builder::ConvInputLayout1D::NWGC, + builder::ConvWeightLayout1D::GKXC, + builder::ConvOutputLayout1D::NWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout1D::NGCW, - builder::ConvWeightLayout1D::GKXC, - builder::ConvOutputLayout1D::NGKW}; + return std::array{builder::ConvInputLayout1D::NGCW, + builder::ConvWeightLayout1D::GKXC, + builder::ConvOutputLayout1D::NGKW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout1D::NGCW, - builder::ConvWeightLayout1D::GKCX, - builder::ConvOutputLayout1D::NGKW}; + return std::array{builder::ConvInputLayout1D::NGCW, + builder::ConvWeightLayout1D::GKCX, + builder::ConvOutputLayout1D::NGKW}; } } else if constexpr(InstTraits::kSpatialDim == 2) @@ -349,37 +345,33 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout2D::GNHWC, - builder::ConvWeightLayout2D::GKYXC, - builder::ConvOutputLayout2D::GNHWK}; + return std::array{builder::ConvInputLayout2D::GNHWC, + builder::ConvWeightLayout2D::GKYXC, + builder::ConvOutputLayout2D::GNHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout2D::NHWGC, - builder::ConvWeightLayout2D::GKYXC, - builder::ConvOutputLayout2D::NHWGK}; + return std::array{builder::ConvInputLayout2D::NHWGC, + builder::ConvWeightLayout2D::GKYXC, + builder::ConvOutputLayout2D::NHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout2D::NGCHW, - builder::ConvWeightLayout2D::GKYXC, - builder::ConvOutputLayout2D::NGKHW}; + return std::array{builder::ConvInputLayout2D::NGCHW, + builder::ConvWeightLayout2D::GKYXC, + builder::ConvOutputLayout2D::NGKHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout2D::NGCHW, - builder::ConvWeightLayout2D::GKCYX, - builder::ConvOutputLayout2D::NGKHW}; + return std::array{builder::ConvInputLayout2D::NGCHW, + builder::ConvWeightLayout2D::GKCYX, + builder::ConvOutputLayout2D::NGKHW}; } } else if constexpr(InstTraits::kSpatialDim == 3) @@ -387,37 +379,33 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout3D::GNDHWC, - builder::ConvWeightLayout3D::GKZYXC, - builder::ConvOutputLayout3D::GNDHWK}; + return std::array{builder::ConvInputLayout3D::GNDHWC, + builder::ConvWeightLayout3D::GKZYXC, + builder::ConvOutputLayout3D::GNDHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout3D::NDHWGC, - builder::ConvWeightLayout3D::GKZYXC, - builder::ConvOutputLayout3D::NDHWGK}; + return std::array{builder::ConvInputLayout3D::NDHWGC, + builder::ConvWeightLayout3D::GKZYXC, + builder::ConvOutputLayout3D::NDHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout3D::NGCDHW, - builder::ConvWeightLayout3D::GKZYXC, - builder::ConvOutputLayout3D::NGKDHW}; + return std::array{builder::ConvInputLayout3D::NGCDHW, + builder::ConvWeightLayout3D::GKZYXC, + builder::ConvOutputLayout3D::NGKDHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{ - builder::ConvInputLayout3D::NGCDHW, - builder::ConvWeightLayout3D::GKCZYX, - builder::ConvOutputLayout3D::NGKDHW}; + return std::array{builder::ConvInputLayout3D::NGCDHW, + builder::ConvWeightLayout3D::GKCZYX, + builder::ConvOutputLayout3D::NGKDHW}; } } } diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 1970f86dd8..9e10388bb3 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -181,7 +181,8 @@ struct ConvOutputLayout struct ConvAuxiliaryTensorLayout { - union { + union + { BiasLayout _bias_layout; ConvOutputLayout _conv_output_layout; }; @@ -202,7 +203,8 @@ struct ConvAuxiliaryTensorLayout struct ConvLayout { - union { + union + { ConvInputLayout _input_layout; ConvWeightLayout _weight_layout; ConvOutputLayout _output_layout; @@ -633,25 +635,25 @@ inline std::ostream& operator<<(std::ostream& os, BiasLayout layout) } } -inline std::ostream& operator<<(std::ostream& os, const std::variant& layout) +inline std::ostream& +operator<<(std::ostream& os, + const std::variant& layout) { std::visit([&os](const auto& l) { os << l; }, layout); return os; } -inline std::ostream& operator<<(std::ostream& os, const std::variant& layout) +inline std::ostream& +operator<<(std::ostream& os, + const std::variant& layout) { std::visit([&os](const auto& l) { os << l; }, layout); return os; } -inline std::ostream& operator<<(std::ostream& os, const std::variant& layout) +inline std::ostream& +operator<<(std::ostream& os, + const std::variant& layout) { std::visit([&os](const auto& l) { os << l; }, layout); return os; diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp index 0bfc03058f..8c9cf5f9d7 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp @@ -14,18 +14,18 @@ TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_1D_BF16_ChannelsFirst_scale) { constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .data_type = DataType::BF16, + .spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout1D::NGCW } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout1D::GKXC,} }, - .output = ConvolutionTensor - { - .config = { .layout = ConvOutputLayout1D::NGKW }, - .operation = TensorOperation<> { .elementwise_operation = ElementwiseOperation::SCALE } - } - }; + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout1D::NGCW}}, + .weight = ConvolutionTensor{.config = + { + .layout = ConvWeightLayout1D::GKXC, + }}, + .output = ConvolutionTensor{ + .config = {.layout = ConvOutputLayout1D::NGKW}, + .operation = TensorOperation<>{.elementwise_operation = ElementwiseOperation::SCALE}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp index 46e771212d..2aab68b219 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp @@ -12,16 +12,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_1D_FP16_ChannelsFirst) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout1D::NWGC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout1D::GKXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout1D::NWGK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout1D::NWGC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout1D::GKXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout1D::NWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} @@ -32,15 +30,12 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 2, PipelineScheduler::DEFAULT); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", - "NWGC,GKXC,EmptyTuple,NWGK", - "PassThrough,PassThrough,PassThrough", - "MNKPadding", - "64,64,32,32", - "Default" - }); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "NWGC,GKXC,EmptyTuple,NWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding", + "64,64,32,32", + "Default"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp index 33a734d814..051c616cc6 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp @@ -14,16 +14,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle_Instance_1D_FP32_ChannelsFirst_scale) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .data_type = DataType::I8, - .accumulation_data_type = DataType::INT32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout1D::GNWC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout1D::GKXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout1D::GNWK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::I8, + .accumulation_data_type = DataType::INT32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout1D::GNWC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout1D::GKXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout1D::GNWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle{} @@ -34,13 +32,11 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 0, PipelineScheduler::DEFAULT); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleD_Wmma_CShuffle", - "128,64,64,64", - "GNWC,GKXC,EmptyTuple,GNWK", - "PassThrough,PassThrough,PassThrough", - "Default"}); + run_test({"DeviceGroupedConvFwdMultipleD_Wmma_CShuffle", + "128,64,64,64", + "GNWC,GKXC,EmptyTuple,GNWK", + "PassThrough,PassThrough,PassThrough", + "Default"}); } #endif diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp index 796849174f..06178fdae7 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp @@ -12,16 +12,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_BF16_ChannelsLast) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::BF16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NHWGC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::NHWGK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -46,16 +44,14 @@ TEST(FwdConvInstances, TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_BF16_NHWGC_Filter3x3) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::BF16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NHWGC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::NHWGK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -66,15 +62,12 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v5_intrawave); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "Filter3x3", - "NHWGC,GKYXC,EmptyTuple,NHWGK", - "PassThrough,PassThrough,PassThrough", - "MNKPadding", - "v5" - }); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "Filter3x3", + "NHWGC,GKYXC,EmptyTuple,NHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding", + "v5"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index 0a424ac1ac..f2a9287b80 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -12,21 +12,20 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_BF16_scale_add_relu) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::BF16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NHWGC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC, .data_type = DataType::BF16 } }, // For demo purposes - .output = ConvolutionTensor - { - .config = { .layout = ConvOutputLayout2D::NHWGK }, - .operation = TensorOperation<> { .elementwise_operation = ElementwiseOperation::SCALEADD_SCALEADD_RELU } - .with_auxiliary_operand_configs() - } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC, + .data_type = DataType::BF16}}, // For demo purposes + .output = ConvolutionTensor{ + .config = {.layout = ConvOutputLayout2D::NHWGK}, + .operation = TensorOperation<>{.elementwise_operation = + ElementwiseOperation::SCALEADD_SCALEADD_RELU} + .with_auxiliary_operand_configs()}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} @@ -37,15 +36,12 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", - "NHWGC,GKYXC,Tuple(NHWGK,G_K),NHWGK", - "PassThrough,PassThrough,ScaleAddScaleAddRelu", - "64,64,32,32", - "MNKPadding", - "Default" - }); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "NHWGC,GKYXC,Tuple(NHWGK,G_K),NHWGK", + "PassThrough,PassThrough,ScaleAddScaleAddRelu", + "64,64,32,32", + "MNKPadding", + "Default"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp index dd4299c28f..c29ea309b7 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp @@ -10,16 +10,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} @@ -30,30 +28,25 @@ TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Ins .with_dl_transfer(DlFwdTransfer); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", - "256,128,128,16", - "Default", - "MNKPadding", - "GNHWC,GKYXC,EmptyTuple,GNHWK", - "PassThrough,PassThrough,PassThrough" - }); + run_test({"DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", + "256,128,128,16", + "Default", + "MNKPadding", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough"}); } TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Instance_2D_FP16_FILTER_1X1_PAD0) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} @@ -65,15 +58,12 @@ TEST(FwdConvInstances, .with_dl_transfer(DlFwdTransfer); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", - "256,128,128,16", - "Filter1x1Pad0", - "MNKPadding", - "GNHWC,GKYXC,EmptyTuple,GNHWK", - "PassThrough,PassThrough,PassThrough" - }); + run_test({"DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", + "256,128,128,16", + "Filter1x1Pad0", + "MNKPadding", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp index 75886cfe92..f3c74f8f62 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp @@ -11,16 +11,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -32,17 +30,14 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v3_intrawave); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256,256,256,32", - "Filter1x1Pad0", - "Intrawave", - "v3", - "GNHWC,GKYXC,EmptyTuple,GNHWK", - "PassThrough,PassThrough,PassThrough", - "MNKPadding" - }); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,256,256,32", + "Filter1x1Pad0", + "Intrawave", + "v3", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp index 217268d16a..6f393dc265 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp @@ -11,16 +11,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_FP32_NGCHW_GKCYX) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP32, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NGCHW } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKCYX } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::NGKHW } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NGCHW}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKCYX}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::NGKHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -32,17 +30,14 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v4_intrawave); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256,128,128,32", - "Filter1x1Stride1Pad0", - "Intrawave", - "v4", - "NGCHW,GKCYX,EmptyTuple,NGKHW", - "PassThrough,PassThrough,PassThrough", - "MNKPadding" - }); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,128,128,32", + "Filter1x1Stride1Pad0", + "Intrawave", + "v4", + "NGCHW,GKCYX,EmptyTuple,NGKHW", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp index b460f0f8f1..df13243cd7 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp @@ -12,16 +12,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_FP8_ChannelsLast) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP8, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::NHWGC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::NHWGK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP8, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} @@ -32,15 +30,12 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", - "256,256,128,32", - "Default", - "NHWGC,GKYXC,EmptyTuple,NHWGK", - "PassThrough,PassThrough,PassThrough", - "MNKPadding" - }); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "256,256,128,32", + "Default", + "NHWGC,GKYXC,EmptyTuple,NHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp index b57594fa3e..227a2aa918 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp @@ -11,16 +11,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ @@ -33,31 +31,26 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT)}; using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", - "256,256,128,32", - "Default", - "GNHWC,GKYXC,EmptyTuple,GNHWK", - "PassThrough,PassThrough,PassThrough", - "MNKPadding" - }); + run_test({"DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", + "256,256,128,32", + "Default", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } TEST( FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor_Instance_2D_FP16_GNHWC_Filter1x1Pad0) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout2D::GNHWC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout2D::GKYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout2D::GNHWK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ @@ -70,15 +63,12 @@ TEST( .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT)}; using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", - "128,128,128,32", - "Filter1x1Pad0", - "GNHWC,GKYXC,EmptyTuple,GNHWK", - "PassThrough,PassThrough,PassThrough", - "MNKPadding" - }); + run_test({"DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", + "128,128,128,32", + "Filter1x1Pad0", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp index 224762de05..75f0a6eed3 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp @@ -12,16 +12,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_3D_BF16_GNDHWC) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 3, - .direction = ConvDirection::FORWARD, - .data_type = DataType::BF16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout3D::GNDHWC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout3D::GKZYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout3D::GNDHWK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 3, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout3D::GNDHWC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout3D::GKZYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout3D::GNDHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -32,17 +30,14 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v3_intrawave); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256,256,256,32", - "Default", - "Intrawave", - "v3", - "GNDHWC,GKZYXC,EmptyTuple,GNDHWK", - "PassThrough,PassThrough,PassThrough", - "MNKPadding" - }); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,256,256,32", + "Default", + "Intrawave", + "v3", + "GNDHWC,GKZYXC,EmptyTuple,GNDHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp index d1f0a26cc7..c8d891f91f 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp @@ -12,16 +12,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_3D_FP16_NDHWGC_ChannelsLast) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 3, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout3D::NDHWGC } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout3D::GKZYXC } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout3D::NDHWGK } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 3, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout3D::NDHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout3D::GKZYXC}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout3D::NDHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -33,17 +31,14 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v4_intrawave); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256,128,128,32", - "Filter1x1Pad0", - "Intrawave", - "v4", - "NDHWGC,GKZYXC,EmptyTuple,NDHWGK", - "PassThrough,PassThrough,PassThrough", - "MNKPadding" - }); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,128,128,32", + "Filter1x1Pad0", + "Intrawave", + "v4", + "NDHWGC,GKZYXC,EmptyTuple,NDHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp index aa8560f2f4..8ffbbe6b46 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp @@ -12,16 +12,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_3D_FP32_ChannelsFirst) { - constexpr ConvSignature FwdConvSignature - { - .spatial_dim = 3, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP32, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor { .config = { .layout = ConvInputLayout3D::NGCDHW } }, - .weight = ConvolutionTensor { .config = { .layout = ConvWeightLayout3D::GKCZYX } }, - .output = ConvolutionTensor { .config = { .layout = ConvOutputLayout3D::NGKDHW } } - }; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 3, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = ConvolutionTensor{.config = {.layout = ConvInputLayout3D::NGCDHW}}, + .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout3D::GKCZYX}}, + .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout3D::NGKDHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -33,17 +31,14 @@ TEST(FwdConvInstances, .with_block_gemm(BlockGemmDesc_v1_intrawave); using Builder = ConvBuilder; - run_test( - { - "DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256,256,256,32", - "Filter1x1Pad0", - "Intrawave", - "v1", - "NGCDHW,GKCZYX,EmptyTuple,NGKDHW", - "PassThrough,PassThrough,PassThrough", - "MNKPadding" - }); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", + "256,256,256,32", + "Filter1x1Pad0", + "Intrawave", + "v1", + "NGCDHW,GKCZYX,EmptyTuple,NGKDHW", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_conv_traits.cpp b/experimental/builder/test/conv/test_conv_traits.cpp index b3870e52d7..d10a530aa9 100644 --- a/experimental/builder/test/conv/test_conv_traits.cpp +++ b/experimental/builder/test/conv/test_conv_traits.cpp @@ -85,10 +85,10 @@ TEST_F(ConvTraitsTest, ConvFwdTraitsExtraction) // Verify signature information EXPECT_EQ(Traits::spatial_dim, 2); EXPECT_EQ(Traits::direction, ck_tile::builder::ConvDirection::FORWARD); - EXPECT_THAT(Traits::layout, ::testing::ElementsAre( - ck_tile::builder::ConvInputLayout2D::GNHWC, - ck_tile::builder::ConvWeightLayout2D::GKYXC, - ck_tile::builder::ConvOutputLayout2D::GNHWK)); + EXPECT_THAT(Traits::layout, + ::testing::ElementsAre(ck_tile::builder::ConvInputLayout2D::GNHWC, + ck_tile::builder::ConvWeightLayout2D::GKYXC, + ck_tile::builder::ConvOutputLayout2D::GNHWK)); EXPECT_EQ(Traits::data_type, ck_tile::builder::DataType::FP16); EXPECT_EQ(Traits::input_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); EXPECT_EQ(Traits::weight_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); @@ -215,10 +215,10 @@ TEST_F(ConvTraitsTest, ConvFwdBaseTraitsExtraction) // Verify signature information EXPECT_EQ(Traits::spatial_dim, 2); EXPECT_EQ(Traits::direction, ck_tile::builder::ConvDirection::FORWARD); - EXPECT_THAT(Traits::layout, ::testing::ElementsAre( - ck_tile::builder::ConvInputLayout2D::GNHWC, - ck_tile::builder::ConvWeightLayout2D::GKYXC, - ck_tile::builder::ConvOutputLayout2D::GNHWK)); + EXPECT_THAT(Traits::layout, + ::testing::ElementsAre(ck_tile::builder::ConvInputLayout2D::GNHWC, + ck_tile::builder::ConvWeightLayout2D::GKYXC, + ck_tile::builder::ConvOutputLayout2D::GNHWK)); EXPECT_EQ(Traits::data_type, ck_tile::builder::DataType::FP16); EXPECT_EQ(Traits::input_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); EXPECT_EQ(Traits::weight_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); @@ -301,10 +301,10 @@ TEST_F(ConvTraitsTest, ConvFwdLargeTensorTraitsExtraction) // Verify signature information EXPECT_EQ(Traits::spatial_dim, 2); EXPECT_EQ(Traits::direction, ck_tile::builder::ConvDirection::FORWARD); - EXPECT_THAT(Traits::layout, ::testing::ElementsAre( - ck_tile::builder::ConvInputLayout2D::GNHWC, - ck_tile::builder::ConvWeightLayout2D::GKYXC, - ck_tile::builder::ConvOutputLayout2D::GNHWK)); + EXPECT_THAT(Traits::layout, + ::testing::ElementsAre(ck_tile::builder::ConvInputLayout2D::GNHWC, + ck_tile::builder::ConvWeightLayout2D::GKYXC, + ck_tile::builder::ConvOutputLayout2D::GNHWK)); EXPECT_EQ(Traits::data_type, ck_tile::builder::DataType::FP16); EXPECT_EQ(Traits::input_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); EXPECT_EQ(Traits::weight_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); diff --git a/experimental/builder/test/impl/conv_algorithm_types.hpp b/experimental/builder/test/impl/conv_algorithm_types.hpp index 082b0ccbbc..2656c4a130 100644 --- a/experimental/builder/test/impl/conv_algorithm_types.hpp +++ b/experimental/builder/test/impl/conv_algorithm_types.hpp @@ -343,7 +343,7 @@ struct ConvAlgorithmTemplate : Components... template constexpr auto with_elementwise_operation(const OP& op) const { - auto result = *this; + auto result = *this; result.element_op = op; return result; } diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index 97b927b722..3be65bcb6f 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -25,12 +25,11 @@ struct TensorOperation std::array auxiliary_operand_configs{Configs...}; // Add builder to add auxiliary tensor configs - template + template constexpr auto with_auxiliary_operand_configs() const { return TensorOperation{ - .elementwise_operation = this->elementwise_operation - }; + .elementwise_operation = this->elementwise_operation}; } }; diff --git a/experimental/builder/test/test_conv_description.cpp b/experimental/builder/test/test_conv_description.cpp index 43c61ec753..22cb131866 100644 --- a/experimental/builder/test/test_conv_description.cpp +++ b/experimental/builder/test/test_conv_description.cpp @@ -34,16 +34,19 @@ struct TensorConfig ckb::DataType compute_type{ckb::DataType::UNDEFINDED}; }; -struct ConvTensorSimple { +struct ConvTensorSimple +{ TensorConfig config; }; -struct ConvTensorWithOp { +struct ConvTensorWithOp +{ TensorConfig config; TensorOp operation{}; }; -struct ConvTensorWithInvalidOp { +struct ConvTensorWithInvalidOp +{ TensorConfig config; InvalidTensorOp operation{}; }; @@ -52,46 +55,39 @@ struct ConvTensorWithInvalidOp { // This includes dimensionality, direction, data layout, and data type. struct ConvSignature { - int spatial_dim = 2; - ckb::DataType data_type = ckb::DataType::FP16; + int spatial_dim = 2; + ckb::DataType data_type = ckb::DataType::FP16; ckb::DataType accumulation_data_type = ckb::DataType::FP32; - ConvTensorSimple input = {.config = {ckb::ConvInputLayout2D::GNHWC}}; - ConvTensorSimple weight = {.config = {ckb::ConvWeightLayout2D::GKYXC}}; - ConvTensorSimple output = {.config = {ckb::ConvOutputLayout2D::GNHWK}}; + ConvTensorSimple input = {.config = {ckb::ConvInputLayout2D::GNHWC}}; + ConvTensorSimple weight = {.config = {ckb::ConvWeightLayout2D::GKYXC}}; + ConvTensorSimple output = {.config = {ckb::ConvOutputLayout2D::GNHWK}}; }; static_assert(ckb::ConvSignatureDescriptor); // Compile time tests for concepts struct ConvSignatureWithOptionalParams { - int spatial_dim = 2; - ckb::DataType data_type = ckb::DataType::FP16; - ckb::DataType accumulation_data_type = ckb::DataType::FP32; - ckb::ConvDirection direction = ckb::ConvDirection::FORWARD; - ConvTensorWithOp input = - { - .config = {ckb::ConvInputLayout2D::GNHWC, ckb::DataType::FP16}, - }; - ConvTensorWithOp weight = - { - .config = {ckb::ConvWeightLayout2D::GKYXC, ckb::DataType::FP16} - }; - ConvTensorWithOp output = - { - .config = {ckb::ConvOutputLayout2D::GNHWK, ckb::DataType::FP16}, - .operation = {ckb::ElementwiseOperation::SCALE} - }; + int spatial_dim = 2; + ckb::DataType data_type = ckb::DataType::FP16; + ckb::DataType accumulation_data_type = ckb::DataType::FP32; + ckb::ConvDirection direction = ckb::ConvDirection::FORWARD; + ConvTensorWithOp input = { + .config = {ckb::ConvInputLayout2D::GNHWC, ckb::DataType::FP16}, + }; + ConvTensorWithOp weight = {.config = {ckb::ConvWeightLayout2D::GKYXC, ckb::DataType::FP16}}; + ConvTensorWithOp output = {.config = {ckb::ConvOutputLayout2D::GNHWK, ckb::DataType::FP16}, + .operation = {ckb::ElementwiseOperation::SCALE}}; }; static_assert(ckb::ConvSignatureDescriptor); struct ConvSignatureWithInvalidOptionalParams { - int spatial_dim = 2; - ckb::DataType data_type = ckb::DataType::FP16; + int spatial_dim = 2; + ckb::DataType data_type = ckb::DataType::FP16; ckb::DataType accumulation_data_type = ckb::DataType::FP32; - ConvTensorWithInvalidOp input = {.config = {ckb::ConvInputLayout2D::GNHWC}}; - ConvTensorWithInvalidOp weight = {.config = {ckb::ConvWeightLayout2D::GKYXC}}; - ConvTensorWithInvalidOp output = {.config = {ckb::ConvOutputLayout2D::GNHWK}}; + ConvTensorWithInvalidOp input = {.config = {ckb::ConvInputLayout2D::GNHWC}}; + ConvTensorWithInvalidOp weight = {.config = {ckb::ConvWeightLayout2D::GKYXC}}; + ConvTensorWithInvalidOp output = {.config = {ckb::ConvOutputLayout2D::GNHWK}}; }; static_assert(!ckb::ConvSignatureDescriptor); From e995fcf734ad6a16359d53802c3072d770ace454 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Mon, 1 Dec 2025 13:52:22 +0000 Subject: [PATCH 26/41] Add Readme doc to describe the design. --- .../builder/include/ck_tile/builder/README.md | 247 ++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 experimental/builder/include/ck_tile/builder/README.md diff --git a/experimental/builder/include/ck_tile/builder/README.md b/experimental/builder/include/ck_tile/builder/README.md new file mode 100644 index 0000000000..680edf09a9 --- /dev/null +++ b/experimental/builder/include/ck_tile/builder/README.md @@ -0,0 +1,247 @@ +# Composable Kernel Builder Design Documentation + +This directory contains the builder framework for Composable Kernel, which provides a compile-time, type-safe interface for constructing convolution operations with various configurations. + +## Table of Contents + +- [Convolution Signature Design](#convolution-signature-design) + - [Overview](#overview) + - [Architecture](#architecture) + - [Core Components](#core-components) + - [Concepts and Validation](#concepts-and-validation) + - [Examples](#examples) + - [Design Rationale](#design-rationale) + +--- + +## Convolution Signature Design + +### Overview + +The convolution signature system provides a **compile-time description** of grouped convolution operations. A signature is a collection of properties that fully characterize a convolution kernel's mathematical and operational behavior, enabling: + +- **Compile-time validation**: Ensures type safety and correctness before kernel instantiation +- **Kernel selection**: Matches user requirements to optimized implementations +- **Specialization**: Enables optimized code paths for specific configurations +- **Composability**: Supports building complex operations from simpler components + +The signature leverages modern C++20 features, particularly **concepts**, to provide expressive, self-documenting interfaces with compile-time guarantees. + +### Architecture + +The signature system is organized into a hierarchical structure: + +``` +┌─────────────────────────────────────────────────────────┐ +│ ConvSignature │ +├─────────────────────────────────────────────────────────┤ +│ Properties: │ +│ • spatial_dim: int (1D, 2D, or 3D) │ +│ • direction: ConvDirection (Fwd/BwdData/BwdWeight) │ +│ • data_type: DataType (default data type) │ +│ • accumulation_data_type: DataType │ +│ • input: ConvTensor ──┐ │ +│ • weight: ConvTensor ──│ │ +│ • output: ConvTensor ──│ │ +└──────────────────────────────────┼──────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────┐ + │ ConvTensor │ + ├─────────────────────────────────────────┤ + │ ╔═════════════════════════════════════╗ │ + │ ║ TensorConfig (required) ║ │ + │ ╠═════════════════════════════════════╣ │ + │ ║ • layout: ConvLayout ║ │ + │ ║ • data_type: DataType (optional) ║ │ + │ ║ • compute_type: DataType (optional)║ │ + │ ╚═════════════════════════════════════╝ │ + │ │ + │ ┌─────────────────────────────────────┐ │ + │ │ TensorOperation (optional) │ │ + │ ├─────────────────────────────────────┤ │ + │ │ • elementwise_operation │ │ + │ │ • auxiliary_operand_configs[] │ │ + │ │ (each is also ConvTensor) ◄───────┼─┐ + │ └─────────────────────────────────────┘ │ │ + └─────────────────────────────────────────┘ │ + │ + Recursive ───────────────┘ +``` +Key Design Points: + - ConvSignature contains three ConvTensor instances (input, weight, output) + - All tensors share the same ConvTensor structure + - Each ConvTensor has: + - TensorConfig (required): Defines layout as well as optional data and compute type overrides + - TensorOperation (optional): Defines fused elementwise operations + - Auxiliary operands (e.g., bias) in TensorOperation also use the ConvTensor type + +### Core Components + +#### 1. Signature Level + +The top-level signature contains global properties that apply to the entire convolution operation: + +```cpp +template +concept ConvSignatureDescriptor = requires(T t) { + { t.spatial_dim } -> std::convertible_to; // 1, 2, or 3 + { t.data_type } -> std::convertible_to; // Default data type + { t.input } -> ConvTensorDescriptor; + { t.weight } -> ConvTensorDescriptor; + { t.output } -> ConvTensorDescriptor; + requires ConvolutionDirectionWellDefinedIfProvided; // Optional direction +}; +``` + +**Properties:** +- **`spatial_dim`**: Dimensionality of the convolution (1D, 2D, or 3D) +- **`direction`**: Operation type (optional, defaults to FORWARD) + - `FORWARD`: Standard forward convolution + - `BACKWARD_DATA`: Gradient computation w.r.t. input + - `BACKWARD_WEIGHT`: Gradient computation w.r.t. weights +- **`data_type`**: Default data type for all tensors (FP32, FP16, BF16, FP8, I8, U8) +- **`accumulation_data_type`**: Type used for internal accumulation + +#### 2. Tensor Level + +Each tensor (input, weight, output) has its own descriptor: + +```cpp +template +concept ConvTensorDescriptor = requires(T t) { + { t.config } -> TensorConfigDescriptor; + requires ElementwiseOpWellDefinedIfProvided; +}; +``` + +A tensor descriptor encapsulates: +- **Configuration**: Layout and data type information +- **Operation** (optional): Fused elementwise operations on this tensor + +#### 3. Tensor Configuration + +Describes the memory layout and data types: + +```cpp +template +concept TensorConfigDescriptor = requires(T t) { + { t.layout } -> std::convertible_to; + { t.data_type } -> std::convertible_to; // Optional override +}; +``` + +**Layout Types** (dimension-specific): +- **1D Convolution**: + - Input: `GNCW`, `GNWC`, `NWGC`, `NGCW`, `G_NW_C_strided` + - Weight: `GKXC`, `GKCX`, `KXGC`, `G_K_X_C_strided` + - Output: `GNKW`, `GNWK`, `NWGK`, `NGKW`, `G_NW_K_strided` + +- **2D Convolution**: + - Input: `GNCHW`, `GNHWC`, `NHWGC`, `NGCHW`, `G_NHW_C_strided` + - Weight: `GKYXC`, `GKCYX`, `KYXGC`, `G_K_YX_C_strided` + - Output: `GNKHW`, `GNHWK`, `NHWGK`, `NGKHW`, `G_NHW_K_strided` + +- **3D Convolution**: + - Input: `GNCDHW`, `GNDHWC`, `NDHWGC`, `NGCDHW`, `G_NDHW_C_strided` + - Weight: `GKZYXC`, `GKCZYX`, `KZYXGC`, `G_K_ZYX_C_strided` + - Output: `GNKDHW`, `GNDHWK`, `NDHWGK`, `NGKDHW`, `G_NDHW_K_strided` + +Where: +- `G` = Groups +- `N` = Batch size +- `C` = Input channels +- `K` = Output channels (filters) +- `W`, `H`, `D` = Width, Height, Depth (spatial dimensions) +- `X`, `Y`, `Z` = Filter dimensions + +#### 4. Tensor Operations + +Describes fused elementwise operations applied to a tensor: + +```cpp +template +concept TensorOperatorDescriptor = requires(T t) { + { t.elementwise_operation } -> std::convertible_to; + requires AuxiliaryOperandConfigsWellDefinedIfProvided; +}; +``` + +**Supported Operations:** +- `PASS_THROUGH`: No operation (identity) +- `SCALE`: Multiply by a scalar +- `CLAMP`: Clamp values to a range +- `BIAS_BNORM_CLAMP`: Bias addition + batch normalization + clamp +- `SCALEADD_SCALEADD_RELU`: Fused scale-add operations + ReLU activation + +**Auxiliary Operands:** +Some operations require additional tensor inputs (e.g., bias tensors, scaling factors). These are specified through `auxiliary_operand_configs`, which is an array of `TensorConfigDescriptor` objects describing the layout and data type of each auxiliary input. + +### Concepts and Validation + +The signature system uses C++20 concepts for compile-time validation at multiple levels: + +#### Constraint Concepts + +```cpp +// Spatial dimension must be 1, 2, or 3 +template +concept ConvSpatialDim = std::is_integral_v && (N == 1 || N == 2 || N == 3); + +// Valid data types for convolution +template +concept ValidConvDataType = + (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || + (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); +``` + +#### Validation Concept + +```cpp +// Validates a complete signature +template +concept ValidConvSignature = requires { + requires ConvSpatialDim; + requires ValidConvDataType; +}; +``` + +#### Tensor Descriptors + +The layout/data type/elementwise operation are described per tensor. This multi-level hierarchy allows: +- **Flexibility**: Each tensor can have independent layout and data type +- **Reusability**: Common configurations can be shared across different signatures +- **Extensibility**: New properties can be added to specific levels without affecting others +- **Clarity**: Separates concerns (global properties vs. tensor-specific properties) + +#### Optional Signature Fields + +Several fields in the signature are optional: +- **`direction`**: Defaults to `FORWARD` if not specified, reducing boilerplate for the common case +- **Tensor `data_type`**: Falls back to signature's default, allowing mixed-precision with minimal specification +- **Tensor `operation`**: Defaults to `PASS_THROUGH`, supporting both fused and non-fused operations with the same interface + +This design follows the principle of "make the common case simple, the complex case possible." + +#### Union-Based Layout Representation + +The `ConvLayout` type uses unions to support dimension-agnostic code: + +```cpp +struct ConvLayout { + union { + ConvInputLayout _input_layout; + ConvWeightLayout _weight_layout; + ConvOutputLayout _output_layout; + ConvAuxiliaryTensorLayout _aux_tensor_layout; + }; + // ... constructors for each type +}; +``` + +This allows: +- Single type to represent all layout variants +- Type-safe construction through overloaded constructors +- Compile-time enforcement of valid combinations through concepts + +--- From a468fce74702d4c804da6e45cb7adcd17cadfa3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Mon, 1 Dec 2025 14:05:46 +0000 Subject: [PATCH 27/41] Add link to main Readme. Fix links in the builder design doc. --- experimental/builder/README.md | 4 ++++ experimental/builder/include/ck_tile/builder/README.md | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/experimental/builder/README.md b/experimental/builder/README.md index 141a34b9f9..b5db2344a3 100644 --- a/experimental/builder/README.md +++ b/experimental/builder/README.md @@ -10,6 +10,10 @@ The builder provides a high-level, semantically-clear interface for constructing This project is a prototype for a more general builder pattern for all of composable_kernel (CK) and CKTile, but is currently limited to formalizing the interface between MIOpen and CK. +## Design descriptions + +- [CK Builder design description](include/ck_tile/builder/README.md) + ## Directory Structure - `include/ck_tile/builder/` diff --git a/experimental/builder/include/ck_tile/builder/README.md b/experimental/builder/include/ck_tile/builder/README.md index 680edf09a9..a0522a50d6 100644 --- a/experimental/builder/include/ck_tile/builder/README.md +++ b/experimental/builder/include/ck_tile/builder/README.md @@ -9,9 +9,6 @@ This directory contains the builder framework for Composable Kernel, which provi - [Architecture](#architecture) - [Core Components](#core-components) - [Concepts and Validation](#concepts-and-validation) - - [Examples](#examples) - - [Design Rationale](#design-rationale) - --- ## Convolution Signature Design From 95fbe2d23474b2286c0a7385459ad8b94cda5dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Tue, 2 Dec 2025 08:18:36 +0000 Subject: [PATCH 28/41] Clean-up data type/layout/elementwise op conversions. --- .../factory/helpers/conv_data_type_utils.hpp | 66 ++----- .../helpers/conv_elementwise_op_utils.hpp | 46 ++--- .../factory/helpers/conv_layout_utils.hpp | 180 +++++++----------- 3 files changed, 94 insertions(+), 198 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp index 4a23894041..b5555fc467 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp @@ -8,35 +8,21 @@ namespace ck_tile::builder::factory_internal { -struct CK_half +template +struct DataTypeToCK { - using type = ck::half_t; + // Catch unsupported data types at compile time + static_assert(sizeof(UnsupportedEnumValue
) == 0, + "Unsupported data type conversion to CK."); }; -struct CK_bhalf -{ - using type = ck::bhalf_t; -}; - -struct CK_float -{ - using type = float; -}; - -struct CK_int8 -{ - using type = int8_t; -}; +template <> struct DataTypeToCK { using type = ck::half_t; }; +template <> struct DataTypeToCK { using type = ck::bhalf_t; }; +template <> struct DataTypeToCK { using type = float; }; +template <> struct DataTypeToCK { using type = int32_t; }; +template <> struct DataTypeToCK { using type = int8_t; }; +template <> struct DataTypeToCK { using type = ck::f8_t; }; -struct CK_f8 -{ - using type = ck::f8_t; -}; - -struct CK_int32 -{ - using type = int32_t; -}; struct CK_empty_tuple { @@ -46,35 +32,7 @@ struct CK_empty_tuple template consteval auto ConvertDataTypeToCK() { - if constexpr(dt == DataType::FP16) - { - return CK_half{}; - } - else if constexpr(dt == DataType::BF16) - { - return CK_bhalf{}; - } - else if constexpr(dt == DataType::FP32) - { - return CK_float{}; - } - else if constexpr(dt == DataType::INT32) - { - return CK_int32{}; - } - else if constexpr(dt == DataType::I8) - { - return CK_int8{}; - } - else if constexpr(dt == DataType::FP8) - { - return CK_f8{}; - } - else - { - static_assert(sizeof(UnsupportedEnumValue
) == 0, - "Internal error. Unsupported data type conversion to CK."); - } + return DataTypeToCK
{}; } template diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp index 2c6a0d4ee1..8f08e2d34c 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp @@ -7,27 +7,35 @@ #include "ck_tile/builder/types.hpp" namespace ck_tile::builder::factory_internal { -struct CK_PassThroughOp + +template +struct ElementwiseOpToCK +{ + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported elementwise operation conversion to CK."); +}; + +template <> struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::PassThrough; }; -struct CK_ScaleOp +template <> struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::Scale; }; -struct CK_ClampOp +template <> struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::Clamp; }; -struct CK_ScaleAddScaleAddReluOp +template <> struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::ScaleAddScaleAddRelu; }; -struct CK_BiasNormalizeInInferClampOp +template <> struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp; }; @@ -38,35 +46,11 @@ consteval auto GetElementwiseOp() if constexpr(HasTensorOp) { constexpr auto op = TensorDesc.operation.elementwise_operation; - if constexpr(op == ElementwiseOperation::SCALE) - { - return CK_ScaleOp{}; - } - else if constexpr(op == ElementwiseOperation::SCALEADD_SCALEADD_RELU) - { - return CK_ScaleAddScaleAddReluOp{}; - } - else if constexpr(op == ElementwiseOperation::BIAS_BNORM_CLAMP) - { - return CK_BiasNormalizeInInferClampOp{}; - } - else if constexpr(op == ElementwiseOperation::CLAMP) - { - return CK_ClampOp{}; - } - else if constexpr(op == ElementwiseOperation::PASS_THROUGH) - { - return CK_PassThroughOp{}; - } - else - { - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Unsupported elementwise operation!"); - } + return ElementwiseOpToCK{}; } else { - return CK_PassThroughOp{}; + return ElementwiseOpToCK{}; } } diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp index ceaa62bbcb..1116e98954 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp @@ -8,6 +8,60 @@ namespace ck_tile::builder::factory_internal { +template +struct LayoutToCK +{ + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported layout conversion to CK."); +}; + +// BiasLayout +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::G_K; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::G_C; }; + +// Input 1D +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NWGC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNWC; }; + +// Input 2D +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCHW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NHWGC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNHWC; }; + +// Input 3D +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCDHW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NDHWGC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNDHWC; }; + +// Weight 1D +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKXC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCX; }; + +// Weight 2D +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKYXC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCYX; }; + +// Weight 3D +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCZYX; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKZYXC; }; + +// Output 1D +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NWGK; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNWK; }; + +// Output 2D +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKHW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NHWGK; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNHWK; }; + +// Output 3D +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKDHW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NDHWGK; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNDHWK; }; + struct EmptyAuxiliaryTensorLayout { using DsLayout = ck::Tuple<>; @@ -27,57 +81,23 @@ consteval auto GetAuxiliaryTensorLayoutValue() constexpr auto Layout = Config.layout; if constexpr(IsGenericBiasLayoutActive()) { - constexpr auto val = Layout._aux_tensor_layout._bias_layout; - if constexpr(val == BiasLayout::G_K_strided) - return ck::tensor_layout::convolution::G_K{}; - else if constexpr(val == BiasLayout::GC) - return ck::tensor_layout::convolution::GC{}; - else if constexpr(val == BiasLayout::G_C_strided) - return ck::tensor_layout::convolution::G_C{}; - else - static_assert(false, "Unsupported generic bias layout"); + return typename LayoutToCK::type{}; } else { constexpr auto out_layout = Layout._output_layout; - if constexpr(SPATIAL_DIM == 1) - { - constexpr auto val = out_layout._1d; - if constexpr(val == ConvOutputLayout1D::NWGK) - return ck::tensor_layout::convolution::NWGK{}; - else if constexpr(val == ConvOutputLayout1D::NGKW) - return ck::tensor_layout::convolution::NGKW{}; - else if constexpr(val == ConvOutputLayout1D::GNWK) - return ck::tensor_layout::convolution::GNWK{}; - } + return typename LayoutToCK::type{}; else if constexpr(SPATIAL_DIM == 2) - { - constexpr auto val = out_layout._2d; - if constexpr(val == ConvOutputLayout2D::NHWGK) - return ck::tensor_layout::convolution::NHWGK{}; - else if constexpr(val == ConvOutputLayout2D::GNHWK) - return ck::tensor_layout::convolution::GNHWK{}; - else if constexpr(val == ConvOutputLayout2D::NGKHW) - return ck::tensor_layout::convolution::NGKHW{}; - } + return typename LayoutToCK::type{}; else if constexpr(SPATIAL_DIM == 3) - { - constexpr auto val = out_layout._3d; - if constexpr(val == ConvOutputLayout3D::NDHWGK) - return ck::tensor_layout::convolution::NDHWGK{}; - else if constexpr(val == ConvOutputLayout3D::GNDHWK) - return ck::tensor_layout::convolution::GNDHWK{}; - else if constexpr(val == ConvOutputLayout3D::NGKDHW) - return ck::tensor_layout::convolution::NGKDHW{}; - } + return typename LayoutToCK::type{}; } } template consteval auto GetAuxiliaryTensorLayoutTuple(std::index_sequence) { - // TODO: Use std::tuple instead of ck::Tuple return ck::Tuple())...>{}; } @@ -113,99 +133,33 @@ template consteval auto GetInputLayout() { if constexpr(SPATIAL_DIM == 1) - { - constexpr auto val = Layout._1d; - if constexpr(val == ConvInputLayout1D::NWGC) - return ck::tensor_layout::convolution::NWGC{}; - else if constexpr(val == ConvInputLayout1D::NGCW) - return ck::tensor_layout::convolution::NGCW{}; - else if constexpr(val == ConvInputLayout1D::GNWC) - return ck::tensor_layout::convolution::GNWC{}; - } + return typename LayoutToCK::type{}; else if constexpr(SPATIAL_DIM == 2) - { - constexpr auto val = Layout._2d; - if constexpr(val == ConvInputLayout2D::NGCHW) - return ck::tensor_layout::convolution::NGCHW{}; - else if constexpr(val == ConvInputLayout2D::NHWGC) - return ck::tensor_layout::convolution::NHWGC{}; - else if constexpr(val == ConvInputLayout2D::GNHWC) - return ck::tensor_layout::convolution::GNHWC{}; - } + return typename LayoutToCK::type{}; else if constexpr(SPATIAL_DIM == 3) - { - constexpr auto val = Layout._3d; - if constexpr(val == ConvInputLayout3D::NGCDHW) - return ck::tensor_layout::convolution::NGCDHW{}; - else if constexpr(val == ConvInputLayout3D::NDHWGC) - return ck::tensor_layout::convolution::NDHWGC{}; - else if constexpr(val == ConvInputLayout3D::GNDHWC) - return ck::tensor_layout::convolution::GNDHWC{}; - } + return typename LayoutToCK::type{}; } template consteval auto GetWeightLayout() { if constexpr(SPATIAL_DIM == 1) - { - constexpr auto val = Layout._1d; - if constexpr(val == ConvWeightLayout1D::GKXC) - return ck::tensor_layout::convolution::GKXC{}; - else if constexpr(val == ConvWeightLayout1D::GKCX) - return ck::tensor_layout::convolution::GKCX{}; - } + return typename LayoutToCK::type{}; else if constexpr(SPATIAL_DIM == 2) - { - constexpr auto val = Layout._2d; - if constexpr(val == ConvWeightLayout2D::GKYXC) - return ck::tensor_layout::convolution::GKYXC{}; - else if constexpr(val == ConvWeightLayout2D::GKCYX) - return ck::tensor_layout::convolution::GKCYX{}; - } + return typename LayoutToCK::type{}; else if constexpr(SPATIAL_DIM == 3) - { - constexpr auto val = Layout._3d; - if constexpr(val == ConvWeightLayout3D::GKCZYX) - return ck::tensor_layout::convolution::GKCZYX{}; - else if constexpr(val == ConvWeightLayout3D::GKZYXC) - return ck::tensor_layout::convolution::GKZYXC{}; - } + return typename LayoutToCK::type{}; } template consteval auto GetOutputLayout() { if constexpr(SPATIAL_DIM == 1) - { - constexpr auto val = Layout._1d; - if constexpr(val == ConvOutputLayout1D::NWGK) - return ck::tensor_layout::convolution::NWGK{}; - else if constexpr(val == ConvOutputLayout1D::NGKW) - return ck::tensor_layout::convolution::NGKW{}; - else if constexpr(val == ConvOutputLayout1D::GNWK) - return ck::tensor_layout::convolution::GNWK{}; - } + return typename LayoutToCK::type{}; else if constexpr(SPATIAL_DIM == 2) - { - constexpr auto val = Layout._2d; - if constexpr(val == ConvOutputLayout2D::NGKHW) - return ck::tensor_layout::convolution::NGKHW{}; - else if constexpr(val == ConvOutputLayout2D::NHWGK) - return ck::tensor_layout::convolution::NHWGK{}; - else if constexpr(val == ConvOutputLayout2D::GNHWK) - return ck::tensor_layout::convolution::GNHWK{}; - } + return typename LayoutToCK::type{}; else if constexpr(SPATIAL_DIM == 3) - { - constexpr auto val = Layout._3d; - if constexpr(val == ConvOutputLayout3D::NGKDHW) - return ck::tensor_layout::convolution::NGKDHW{}; - else if constexpr(val == ConvOutputLayout3D::NDHWGK) - return ck::tensor_layout::convolution::NDHWGK{}; - else if constexpr(val == ConvOutputLayout3D::GNDHWK) - return ck::tensor_layout::convolution::GNDHWK{}; - } + return typename LayoutToCK::type{}; } template Date: Tue, 2 Dec 2025 09:58:01 +0000 Subject: [PATCH 29/41] Switch from dimension and tensor type specific layouts to a flat list of tensor layouts. --- .../builder/conv_signature_concepts.hpp | 73 ++++- .../ck_tile/builder/factory/conv_factory.hpp | 39 --- .../factory/helpers/conv_layout_utils.hpp | 149 +++------ .../builder/reflect/conv_description.hpp | 48 +-- .../ck_tile/builder/reflect/conv_traits.hpp | 77 ++--- .../builder/include/ck_tile/builder/types.hpp | 292 ++---------------- .../test/conv/test_ckb_conv_fwd_1d_bf16.cpp | 6 +- .../test/conv/test_ckb_conv_fwd_1d_fp16.cpp | 6 +- .../test/conv/test_ckb_conv_fwd_1d_i8.cpp | 6 +- .../test/conv/test_ckb_conv_fwd_2d_bf16.cpp | 12 +- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 10 +- .../conv/test_ckb_conv_fwd_2d_dl_fp16.cpp | 12 +- .../test/conv/test_ckb_conv_fwd_2d_fp16.cpp | 6 +- .../test/conv/test_ckb_conv_fwd_2d_fp32.cpp | 6 +- .../test/conv/test_ckb_conv_fwd_2d_fp8.cpp | 6 +- ...test_ckb_conv_fwd_2d_large_tensor_fp16.cpp | 12 +- .../test/conv/test_ckb_conv_fwd_3d_bf16.cpp | 6 +- .../test/conv/test_ckb_conv_fwd_3d_fp16.cpp | 6 +- .../test/conv/test_ckb_conv_fwd_3d_fp32.cpp | 6 +- .../builder/test/conv/test_conv_traits.cpp | 18 +- .../test/impl/conv_signature_types.hpp | 2 +- .../builder/test/test_conv_description.cpp | 20 +- 22 files changed, 249 insertions(+), 569 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index 234cfe685d..91da00ea6a 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -34,9 +34,56 @@ concept ValidConvDataType = (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); + +template +concept BiasTensorLayout = (L == TensorLayout::GC) || (L == TensorLayout::G_C_strided) || + (L == TensorLayout::G_K_strided); + +template +concept ConvInputLayout1D = (L == TensorLayout::GNCW) || (L == TensorLayout::GNWC) || + (L == TensorLayout::NWGC) || (L == TensorLayout::NGCW) || + (L == TensorLayout::G_NW_C_strided); + +template +concept ConvInputLayout2D = (L == TensorLayout::GNCHW) || (L == TensorLayout::GNHWC) || + (L == TensorLayout::NHWGC) || (L == TensorLayout::NGCHW) || + (L == TensorLayout::G_NHW_C_strided); + +template +concept ConvInputLayout3D = (L == TensorLayout::GNCDHW) || (L == TensorLayout::GNDHWC) || + (L == TensorLayout::NDHWGC) || (L == TensorLayout::NGCDHW) || + (L == TensorLayout::G_NDHW_C_strided); + +template +concept ConvWeightLayout1D = (L == TensorLayout::GKXC) || (L == TensorLayout::GKCX) || + (L == TensorLayout::KXGC) || (L == TensorLayout::G_K_X_C_strided); + +template +concept ConvWeightLayout2D = (L == TensorLayout::GKYXC) || (L == TensorLayout::GKCYX) || + (L == TensorLayout::KYXGC) || (L == TensorLayout::G_K_YX_C_strided); + +template +concept ConvWeightLayout3D = (L == TensorLayout::GKZYXC) || (L == TensorLayout::GKCZYX) || + (L == TensorLayout::KZYXGC) || (L == TensorLayout::G_K_ZYX_C_strided); + +template +concept ConvOutputLayout1D = (L == TensorLayout::GNKW) || (L == TensorLayout::GNWK) || + (L == TensorLayout::NWGK) || (L == TensorLayout::NGKW) || + (L == TensorLayout::G_NW_K_strided); + +template +concept ConvOutputLayout2D = (L == TensorLayout::GNKHW) || (L == TensorLayout::GNHWK) || + (L == TensorLayout::NHWGK) || (L == TensorLayout::NGKHW) || + (L == TensorLayout::G_NHW_K_strided); + +template +concept ConvOutputLayout3D = (L == TensorLayout::GNKDHW) || (L == TensorLayout::GNDHWK) || + (L == TensorLayout::NDHWGK) || (L == TensorLayout::NGKDHW) || + (L == TensorLayout::G_NDHW_K_strided); + template concept TensorConfigDescriptor = requires(T t) { - { t.layout } -> std::convertible_to; + { t.layout } -> std::convertible_to; // Only require that data type is defined. It might be set to undefined value, in which case the // signature's data type is used. { t.data_type } -> std::convertible_to; @@ -146,24 +193,24 @@ template concept ConvDirectionIsBackwardWeight = (Sig.direction == ConvDirection::BACKWARD_WEIGHT); // Constraints for forward convolution input layouts. -template +template concept ValidConvInputLayoutForSpatialDim = - (SpatialDim == 1 && std::same_as) || - (SpatialDim == 2 && std::same_as) || - (SpatialDim == 3 && std::same_as); + (SpatialDim == 1 && ConvInputLayout1D) || + (SpatialDim == 2 && ConvInputLayout2D) || + (SpatialDim == 3 && ConvInputLayout3D); // Constraints for forward convolution output layouts. -template +template concept ValidConvOutputLayoutForSpatialDim = - (SpatialDim == 1 && std::same_as) || - (SpatialDim == 2 && std::same_as) || - (SpatialDim == 3 && std::same_as); + (SpatialDim == 1 && ConvOutputLayout1D) || + (SpatialDim == 2 && ConvOutputLayout2D) || + (SpatialDim == 3 && ConvOutputLayout3D); // Constraints for forward convolution weight layouts. -template +template concept ValidConvWeightLayoutForSpatialDim = - (SpatialDim == 1 && std::same_as) || - (SpatialDim == 2 && std::same_as) || - (SpatialDim == 3 && std::same_as); + (SpatialDim == 1 && ConvWeightLayout1D) || + (SpatialDim == 2 && ConvWeightLayout2D) || + (SpatialDim == 3 && ConvWeightLayout3D); } // namespace ck_tile::builder diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp index 45a5b2853f..17fbc7cd4c 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp @@ -63,45 +63,6 @@ namespace ck_tile::builder::factory_internal { -template -consteval auto get_input_layout_value(ConvInputLayout layout) -{ - if constexpr(SPATIAL_DIM == 1) - return layout._1d; - else if constexpr(SPATIAL_DIM == 2) - return layout._2d; - else if constexpr(SPATIAL_DIM == 3) - return layout._3d; - else - static_assert(false, "Unsupported spatial dimension"); -} - -template -consteval auto get_weight_layout_value(ConvWeightLayout layout) -{ - if constexpr(SPATIAL_DIM == 1) - return layout._1d; - else if constexpr(SPATIAL_DIM == 2) - return layout._2d; - else if constexpr(SPATIAL_DIM == 3) - return layout._3d; - else - static_assert(false, "Unsupported spatial dimension"); -} - -template -consteval auto get_output_layout_value(ConvOutputLayout layout) -{ - if constexpr(SPATIAL_DIM == 1) - return layout._1d; - else if constexpr(SPATIAL_DIM == 2) - return layout._2d; - else if constexpr(SPATIAL_DIM == 3) - return layout._3d; - else - static_assert(false, "Unsupported spatial dimension"); -} - // The algorithm specializations for the convolution and GEMM. template requires( diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp index 1116e98954..9433adb3dc 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp @@ -8,7 +8,9 @@ namespace ck_tile::builder::factory_internal { -template +using namespace ck_tile::builder; + +template struct LayoutToCK { static_assert(sizeof(UnsupportedEnumValue) == 0, @@ -16,90 +18,68 @@ struct LayoutToCK }; // BiasLayout -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::G_K; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::G_C; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::G_K; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::G_C; }; // Input 1D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NWGC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNWC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NWGC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNWC; }; // Input 2D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCHW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NHWGC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNHWC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCHW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NHWGC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNHWC; }; // Input 3D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCDHW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NDHWGC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNDHWC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCDHW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NDHWGC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNDHWC; }; // Weight 1D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKXC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCX; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKXC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCX; }; // Weight 2D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKYXC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCYX; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKYXC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCYX; }; // Weight 3D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCZYX; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKZYXC; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCZYX; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKZYXC; }; // Output 1D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NWGK; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNWK; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NWGK; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNWK; }; // Output 2D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKHW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NHWGK; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNHWK; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKHW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NHWGK; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNHWK; }; // Output 3D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKDHW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NDHWGK; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNDHWK; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKDHW; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NDHWGK; }; +template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNDHWK; }; + +template +consteval auto TensorLayoutToCK() +{ + return typename LayoutToCK::type{}; +} struct EmptyAuxiliaryTensorLayout { using DsLayout = ck::Tuple<>; }; -template -consteval bool IsGenericBiasLayoutActive() -{ - return requires { - typename std::integral_constant; - }; -} - -template -consteval auto GetAuxiliaryTensorLayoutValue() -{ - constexpr auto Layout = Config.layout; - if constexpr(IsGenericBiasLayoutActive()) - { - return typename LayoutToCK::type{}; - } - else - { - constexpr auto out_layout = Layout._output_layout; - if constexpr(SPATIAL_DIM == 1) - return typename LayoutToCK::type{}; - else if constexpr(SPATIAL_DIM == 2) - return typename LayoutToCK::type{}; - else if constexpr(SPATIAL_DIM == 3) - return typename LayoutToCK::type{}; - } -} -template +template consteval auto GetAuxiliaryTensorLayoutTuple(std::index_sequence) { - return ck::Tuple())...>{}; + return ck::Tuple())...>{}; } template @@ -108,7 +88,7 @@ struct AuxiliaryTensorLayouts { static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); using DsLayout = - decltype(GetAuxiliaryTensorLayoutTuple( + decltype(GetAuxiliaryTensorLayoutTuple( std::make_index_sequence{})); }; @@ -129,42 +109,9 @@ consteval auto GetAuxiliaryTensorLayouts() return EmptyAuxiliaryTensorLayout{}; } -template -consteval auto GetInputLayout() -{ - if constexpr(SPATIAL_DIM == 1) - return typename LayoutToCK::type{}; - else if constexpr(SPATIAL_DIM == 2) - return typename LayoutToCK::type{}; - else if constexpr(SPATIAL_DIM == 3) - return typename LayoutToCK::type{}; -} - -template -consteval auto GetWeightLayout() -{ - if constexpr(SPATIAL_DIM == 1) - return typename LayoutToCK::type{}; - else if constexpr(SPATIAL_DIM == 2) - return typename LayoutToCK::type{}; - else if constexpr(SPATIAL_DIM == 3) - return typename LayoutToCK::type{}; -} - -template -consteval auto GetOutputLayout() -{ - if constexpr(SPATIAL_DIM == 1) - return typename LayoutToCK::type{}; - else if constexpr(SPATIAL_DIM == 2) - return typename LayoutToCK::type{}; - else if constexpr(SPATIAL_DIM == 3) - return typename LayoutToCK::type{}; -} - -template requires(ConvSpatialDim && @@ -174,17 +121,17 @@ template ()); - using BLayout = decltype(GetWeightLayout()); - using ELayout = decltype(GetOutputLayout()); + using ALayout = decltype(TensorLayoutToCK()); + using BLayout = decltype(TensorLayoutToCK()); + using ELayout = decltype(TensorLayoutToCK()); }; template consteval auto GetTensorLayout() { - constexpr auto INPUT_LAYOUT = Signature.input.config.layout._input_layout; - constexpr auto WEIGHT_LAYOUT = Signature.weight.config.layout._weight_layout; - constexpr auto OUTPUT_LAYOUT = Signature.output.config.layout._output_layout; + constexpr auto INPUT_LAYOUT = Signature.input.config.layout; + constexpr auto WEIGHT_LAYOUT = Signature.weight.config.layout; + constexpr auto OUTPUT_LAYOUT = Signature.output.config.layout; return factory_internal:: ConvTensorLayouts{}; diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp index bec4975259..54b35ff32c 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp @@ -40,16 +40,9 @@ struct ConvSignatureInfo { int spatial_dim; builder::ConvDirection direction; - std::variant - input_layout; - std::variant - weight_layout; - std::variant - output_layout; + builder::TensorLayout input_layout; + builder::TensorLayout weight_layout; + builder::TensorLayout output_layout; builder::DataType data_type; builder::ElementwiseOperation input_element_op; builder::ElementwiseOperation weight_element_op; @@ -268,41 +261,12 @@ ConvDescription Describe() { using Traits = ConvTraits; - // TODO: This is a temporary fix. We should refactor also the traits and descriptors to better - // reflect the conv signature. - auto get_input_layout = []() -> decltype(ConvSignatureInfo::input_layout) { - if constexpr(Traits::spatial_dim == 1) - return Traits::layout[0]._input_layout._1d; - else if constexpr(Traits::spatial_dim == 2) - return Traits::layout[0]._input_layout._2d; - else - return Traits::layout[0]._input_layout._3d; - }; - - auto get_weight_layout = []() -> decltype(ConvSignatureInfo::weight_layout) { - if constexpr(Traits::spatial_dim == 1) - return Traits::layout[1]._weight_layout._1d; - else if constexpr(Traits::spatial_dim == 2) - return Traits::layout[1]._weight_layout._2d; - else - return Traits::layout[1]._weight_layout._3d; - }; - - auto get_output_layout = []() -> decltype(ConvSignatureInfo::output_layout) { - if constexpr(Traits::spatial_dim == 1) - return Traits::layout[2]._output_layout._1d; - else if constexpr(Traits::spatial_dim == 2) - return Traits::layout[2]._output_layout._2d; - else - return Traits::layout[2]._output_layout._3d; - }; - return ConvDescription{ .signature = ConvSignatureInfo{.spatial_dim = Traits::spatial_dim, .direction = Traits::direction, - .input_layout = get_input_layout(), - .weight_layout = get_weight_layout(), - .output_layout = get_output_layout(), + .input_layout = Traits::layout[0], + .weight_layout = Traits::layout[1], + .output_layout = Traits::layout[2], .data_type = Traits::data_type, .input_element_op = Traits::input_element_op, .weight_element_op = Traits::weight_element_op, diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp index da5a35051e..c5f42c30cb 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp @@ -298,7 +298,10 @@ constexpr auto conv_spec() /// @brief Derives the grouped convolution layout from a device kernel `Instance` type. /// @tparam Instance The device kernel instance type. -/// @return A `builder::GroupConvLayout{1D|2D|3D}` enum value corresponding to the tensor layouts. +/// @return An std::array corresponding to the tensor layouts: +/// index 0 -> Input layout +/// index 1 -> Weight layout +/// index 2 -> Output layout template constexpr auto conv_layout() { @@ -314,30 +317,30 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout1D::GNWC, - builder::ConvWeightLayout1D::GKXC, - builder::ConvOutputLayout1D::GNWK}; + return std::array{builder::TensorLayout::GNWC, + builder::TensorLayout::GKXC, + builder::TensorLayout::GNWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout1D::NWGC, - builder::ConvWeightLayout1D::GKXC, - builder::ConvOutputLayout1D::NWGK}; + return std::array{builder::TensorLayout::NWGC, + builder::TensorLayout::GKXC, + builder::TensorLayout::NWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout1D::NGCW, - builder::ConvWeightLayout1D::GKXC, - builder::ConvOutputLayout1D::NGKW}; + return std::array{builder::TensorLayout::NGCW, + builder::TensorLayout::GKXC, + builder::TensorLayout::NGKW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout1D::NGCW, - builder::ConvWeightLayout1D::GKCX, - builder::ConvOutputLayout1D::NGKW}; + return std::array{builder::TensorLayout::NGCW, + builder::TensorLayout::GKCX, + builder::TensorLayout::NGKW}; } } else if constexpr(InstTraits::kSpatialDim == 2) @@ -345,33 +348,33 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout2D::GNHWC, - builder::ConvWeightLayout2D::GKYXC, - builder::ConvOutputLayout2D::GNHWK}; + return std::array{builder::TensorLayout::GNHWC, + builder::TensorLayout::GKYXC, + builder::TensorLayout::GNHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout2D::NHWGC, - builder::ConvWeightLayout2D::GKYXC, - builder::ConvOutputLayout2D::NHWGK}; + return std::array{builder::TensorLayout::NHWGC, + builder::TensorLayout::GKYXC, + builder::TensorLayout::NHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout2D::NGCHW, - builder::ConvWeightLayout2D::GKYXC, - builder::ConvOutputLayout2D::NGKHW}; + return std::array{builder::TensorLayout::NGCHW, + builder::TensorLayout::GKYXC, + builder::TensorLayout::NGKHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout2D::NGCHW, - builder::ConvWeightLayout2D::GKCYX, - builder::ConvOutputLayout2D::NGKHW}; + return std::array{builder::TensorLayout::NGCHW, + builder::TensorLayout::GKCYX, + builder::TensorLayout::NGKHW}; } } else if constexpr(InstTraits::kSpatialDim == 3) @@ -379,33 +382,33 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout3D::GNDHWC, - builder::ConvWeightLayout3D::GKZYXC, - builder::ConvOutputLayout3D::GNDHWK}; + return std::array{builder::TensorLayout::GNDHWC, + builder::TensorLayout::GKZYXC, + builder::TensorLayout::GNDHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout3D::NDHWGC, - builder::ConvWeightLayout3D::GKZYXC, - builder::ConvOutputLayout3D::NDHWGK}; + return std::array{builder::TensorLayout::NDHWGC, + builder::TensorLayout::GKZYXC, + builder::TensorLayout::NDHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout3D::NGCDHW, - builder::ConvWeightLayout3D::GKZYXC, - builder::ConvOutputLayout3D::NGKDHW}; + return std::array{builder::TensorLayout::NGCDHW, + builder::TensorLayout::GKZYXC, + builder::TensorLayout::NGKDHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return std::array{builder::ConvInputLayout3D::NGCDHW, - builder::ConvWeightLayout3D::GKCZYX, - builder::ConvOutputLayout3D::NGKDHW}; + return std::array{builder::TensorLayout::NGCDHW, + builder::TensorLayout::GKCZYX, + builder::TensorLayout::NGKDHW}; } } } diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 9e10388bb3..d2195447d8 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -23,133 +23,69 @@ enum class DataType U8 }; -enum class BiasLayout +enum class TensorLayout { + UNDEFINED, + + // Bias tensors GC, G_C_strided, - G_K_strided -}; + G_K_strided, -enum class ConvInputLayout1D -{ + // 1D conv input tensor GNCW, GNWC, NWGC, NGCW, - G_NW_C_strided -}; + G_NW_C_strided, -enum class ConvInputLayout2D -{ + // 2D conv input tensor GNCHW, GNHWC, NHWGC, NGCHW, - G_NHW_C_strided -}; + G_NHW_C_strided, -enum class ConvInputLayout3D -{ + // 3D conv input tensor GNCDHW, GNDHWC, NDHWGC, NGCDHW, - G_NDHW_C_strided -}; - -enum class UndefinedLayout -{ - None -}; - -struct ConvInputLayout -{ - union - { - ConvInputLayout1D _1d; - ConvInputLayout2D _2d; - ConvInputLayout3D _3d; - UndefinedLayout _undefined; - }; - - constexpr ConvInputLayout() : _undefined(UndefinedLayout::None) {} - constexpr ConvInputLayout(ConvInputLayout1D layout) : _1d(layout) {} - constexpr ConvInputLayout(ConvInputLayout2D layout) : _2d(layout) {} - constexpr ConvInputLayout(ConvInputLayout3D layout) : _3d(layout) {} - - friend constexpr bool operator==(const ConvInputLayout& lhs, const ConvInputLayout& rhs) - { - return std::bit_cast>(lhs) == - std::bit_cast>(rhs); - } -}; + G_NDHW_C_strided, -enum class ConvWeightLayout1D -{ + // 1D conv weight tensor GKXC, GKCX, KXGC, - G_K_X_C_strided -}; + G_K_X_C_strided, -enum class ConvWeightLayout2D -{ + // 2D conv weight tensor GKYXC, GKCYX, KYXGC, - G_K_YX_C_strided -}; + G_K_YX_C_strided, -enum class ConvWeightLayout3D -{ + // 3D conv weight tensor GKZYXC, GKCZYX, KZYXGC, - G_K_ZYX_C_strided -}; - -struct ConvWeightLayout -{ - union - { - ConvWeightLayout1D _1d; - ConvWeightLayout2D _2d; - ConvWeightLayout3D _3d; - UndefinedLayout _undefined; - }; - - constexpr ConvWeightLayout() : _undefined(UndefinedLayout::None) {} - constexpr ConvWeightLayout(ConvWeightLayout1D layout) : _1d(layout) {} - constexpr ConvWeightLayout(ConvWeightLayout2D layout) : _2d(layout) {} - constexpr ConvWeightLayout(ConvWeightLayout3D layout) : _3d(layout) {} - - friend constexpr bool operator==(const ConvWeightLayout& lhs, const ConvWeightLayout& rhs) - { - return std::bit_cast>(lhs) == - std::bit_cast>(rhs); - } -}; + G_K_ZYX_C_strided, -enum class ConvOutputLayout1D -{ + // 1D conv output tensor GNKW, GNWK, NWGK, NGKW, - G_NW_K_strided -}; + G_NW_K_strided, -enum class ConvOutputLayout2D -{ + // 2D conv output tensor GNKHW, GNHWK, NHWGK, NGKHW, - G_NHW_K_strided -}; + G_NHW_K_strided, -enum class ConvOutputLayout3D -{ + // 3D conv output tensor GNKDHW, GNDHWK, NDHWGK, @@ -157,80 +93,6 @@ enum class ConvOutputLayout3D G_NDHW_K_strided }; -struct ConvOutputLayout -{ - union - { - ConvOutputLayout1D _1d; - ConvOutputLayout2D _2d; - ConvOutputLayout3D _3d; - UndefinedLayout _undefined; - }; - - constexpr ConvOutputLayout() : _undefined(UndefinedLayout::None) {} - constexpr ConvOutputLayout(ConvOutputLayout1D layout) : _1d(layout) {} - constexpr ConvOutputLayout(ConvOutputLayout2D layout) : _2d(layout) {} - constexpr ConvOutputLayout(ConvOutputLayout3D layout) : _3d(layout) {} - - friend constexpr bool operator==(const ConvOutputLayout& lhs, const ConvOutputLayout& rhs) - { - return std::bit_cast>(lhs) == - std::bit_cast>(rhs); - } -}; - -struct ConvAuxiliaryTensorLayout -{ - union - { - BiasLayout _bias_layout; - ConvOutputLayout _conv_output_layout; - }; - - constexpr ConvAuxiliaryTensorLayout(BiasLayout layout) : _bias_layout(layout) {} - constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout layout) : _conv_output_layout(layout) {} - constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout1D layout) : _conv_output_layout(layout) {} - constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout2D layout) : _conv_output_layout(layout) {} - constexpr ConvAuxiliaryTensorLayout(ConvOutputLayout3D layout) : _conv_output_layout(layout) {} - - friend constexpr bool operator==(const ConvAuxiliaryTensorLayout& lhs, - const ConvAuxiliaryTensorLayout& rhs) - { - return std::bit_cast>(lhs) == - std::bit_cast>(rhs); - } -}; - -struct ConvLayout -{ - union - { - ConvInputLayout _input_layout; - ConvWeightLayout _weight_layout; - ConvOutputLayout _output_layout; - ConvAuxiliaryTensorLayout _aux_tensor_layout; - }; - - constexpr ConvLayout(ConvInputLayout layout) : _input_layout(layout) {} - constexpr ConvLayout(ConvInputLayout1D layout) : _input_layout(layout) {} - constexpr ConvLayout(ConvInputLayout2D layout) : _input_layout(layout) {} - constexpr ConvLayout(ConvInputLayout3D layout) : _input_layout(layout) {} - constexpr ConvLayout(ConvWeightLayout layout) : _weight_layout(layout) {} - constexpr ConvLayout(ConvWeightLayout1D layout) : _weight_layout(layout) {} - constexpr ConvLayout(ConvWeightLayout2D layout) : _weight_layout(layout) {} - constexpr ConvLayout(ConvWeightLayout3D layout) : _weight_layout(layout) {} - constexpr ConvLayout(ConvOutputLayout layout) : _output_layout(layout) {} - constexpr ConvLayout(ConvOutputLayout1D layout) : _output_layout(layout) {} - constexpr ConvLayout(ConvOutputLayout2D layout) : _output_layout(layout) {} - constexpr ConvLayout(ConvOutputLayout3D layout) : _output_layout(layout) {} - constexpr ConvLayout(BiasLayout layout) : _aux_tensor_layout(layout) {} - - friend constexpr bool operator==(const ConvLayout& lhs, const ConvLayout& rhs) - { - return std::bit_cast>(lhs) == - std::bit_cast>(rhs); - } -}; // Direction of the convolution operation. enum class ConvDirection @@ -500,9 +362,9 @@ inline std::ostream& operator<<(std::ostream& os, PipelineScheduler sched) } } -inline std::ostream& operator<<(std::ostream& os, ConvInputLayout1D layout) +inline std::ostream& operator<<(std::ostream& os, TensorLayout layout) { - using enum ConvInputLayout1D; + using enum TensorLayout; switch(layout) { case GNCW: return os << "GNCW"; @@ -510,155 +372,51 @@ inline std::ostream& operator<<(std::ostream& os, ConvInputLayout1D layout) case NWGC: return os << "NWGC"; case NGCW: return os << "NGCW"; case G_NW_C_strided: return os << "G_NW_C_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, ConvInputLayout2D layout) -{ - using enum ConvInputLayout2D; - switch(layout) - { case GNCHW: return os << "GNCHW"; case GNHWC: return os << "GNHWC"; case NHWGC: return os << "NHWGC"; case NGCHW: return os << "NGCHW"; case G_NHW_C_strided: return os << "G_NHW_C_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, ConvInputLayout3D layout) -{ - using enum ConvInputLayout3D; - switch(layout) - { case GNCDHW: return os << "GNCDHW"; case GNDHWC: return os << "GNDHWC"; case NDHWGC: return os << "NDHWGC"; case NGCDHW: return os << "NGCDHW"; case G_NDHW_C_strided: return os << "G_NDHW_C_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, ConvWeightLayout1D layout) -{ - using enum ConvWeightLayout1D; - switch(layout) - { case GKXC: return os << "GKXC"; case GKCX: return os << "GKCX"; case KXGC: return os << "KXGC"; case G_K_X_C_strided: return os << "G_K_X_C_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, ConvWeightLayout2D layout) -{ - using enum ConvWeightLayout2D; - switch(layout) - { case GKYXC: return os << "GKYXC"; case GKCYX: return os << "GKCYX"; case KYXGC: return os << "KYXGC"; case G_K_YX_C_strided: return os << "G_K_YX_C_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, ConvWeightLayout3D layout) -{ - using enum ConvWeightLayout3D; - switch(layout) - { case GKZYXC: return os << "GKZYXC"; case GKCZYX: return os << "GKCZYX"; case KZYXGC: return os << "KZYXGC"; case G_K_ZYX_C_strided: return os << "G_K_ZYX_C_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, ConvOutputLayout1D layout) -{ - using enum ConvOutputLayout1D; - switch(layout) - { case GNKW: return os << "GNKW"; case GNWK: return os << "GNWK"; case NWGK: return os << "NWGK"; case NGKW: return os << "NGKW"; case G_NW_K_strided: return os << "G_NW_K_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, ConvOutputLayout2D layout) -{ - using enum ConvOutputLayout2D; - switch(layout) - { case GNKHW: return os << "GNKHW"; case GNHWK: return os << "GNHWK"; case NHWGK: return os << "NHWGK"; case NGKHW: return os << "NGKHW"; case G_NHW_K_strided: return os << "G_NHW_K_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, ConvOutputLayout3D layout) -{ - using enum ConvOutputLayout3D; - switch(layout) - { case GNKDHW: return os << "GNKDHW"; case GNDHWK: return os << "GNDHWK"; case NDHWGK: return os << "NDHWGK"; case NGKDHW: return os << "NGKDHW"; case G_NDHW_K_strided: return os << "G_NDHW_K_strided"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, BiasLayout layout) -{ - using enum BiasLayout; - switch(layout) - { case GC: return os << "GC"; case G_C_strided: return os << "G_C_strided"; case G_K_strided: return os << "G_K_strided"; + case UNDEFINED: return os << "UNDEFINED"; default: return os << "Unknown"; } } -inline std::ostream& -operator<<(std::ostream& os, - const std::variant& layout) -{ - std::visit([&os](const auto& l) { os << l; }, layout); - return os; -} - -inline std::ostream& -operator<<(std::ostream& os, - const std::variant& layout) -{ - std::visit([&os](const auto& l) { os << l; }, layout); - return os; -} - -inline std::ostream& -operator<<(std::ostream& os, - const std::variant& layout) -{ - std::visit([&os](const auto& l) { os << l; }, layout); - return os; -} - // ostream operator overload for std::variant of convolution specializations inline std::ostream& operator<<(std::ostream& os, const std::variant{.elementwise_operation = ElementwiseOperation::SCALE}}}; constexpr auto FwdConvAlgorithm = diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp index 2aab68b219..f759846849 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout1D::NWGC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout1D::GKXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout1D::NWGK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NWGC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp index 051c616cc6..c042f2fa66 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp @@ -19,9 +19,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::I8, .accumulation_data_type = DataType::INT32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout1D::GNWC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout1D::GKXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout1D::GNWK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNWC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp index 06178fdae7..f6d9a675b6 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::NHWGK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -49,9 +49,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::NHWGK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index f2a9287b80..f8b66d3991 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -17,15 +17,15 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC, + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC, .data_type = DataType::BF16}}, // For demo purposes .output = ConvolutionTensor{ - .config = {.layout = ConvOutputLayout2D::NHWGK}, + .config = {.layout = TensorLayout::NHWGK}, .operation = TensorOperation<>{.elementwise_operation = ElementwiseOperation::SCALEADD_SCALEADD_RELU} - .with_auxiliary_operand_configs()}}; + .with_auxiliary_operand_configs()}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp index c29ea309b7..d4ec658ec2 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp @@ -15,9 +15,9 @@ TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Ins .direction = ConvDirection::FORWARD, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} @@ -44,9 +44,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp index f3c74f8f62..a3009bb34f 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp @@ -16,9 +16,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp index 6f393dc265..a08dc6c969 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp @@ -16,9 +16,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP32, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NGCHW}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKCYX}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::NGKHW}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NGCHW}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKCYX}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NGKHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp index df13243cd7..06009afc5d 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP8, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::NHWGK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp index 227a2aa918..f818197330 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp @@ -16,9 +16,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ @@ -48,9 +48,9 @@ TEST( .direction = ConvDirection::FORWARD, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout2D::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout2D::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout2D::GNHWK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp index 75f0a6eed3..2461f13794 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout3D::GNDHWC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout3D::GKZYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout3D::GNDHWK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNDHWC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKZYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNDHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp index c8d891f91f..9a2461ea50 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout3D::NDHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout3D::GKZYXC}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout3D::NDHWGK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NDHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKZYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NDHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp index 8ffbbe6b46..d5fe24d27c 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP32, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = ConvInputLayout3D::NGCDHW}}, - .weight = ConvolutionTensor{.config = {.layout = ConvWeightLayout3D::GKCZYX}}, - .output = ConvolutionTensor{.config = {.layout = ConvOutputLayout3D::NGKDHW}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NGCDHW}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKCZYX}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NGKDHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_conv_traits.cpp b/experimental/builder/test/conv/test_conv_traits.cpp index d10a530aa9..a6a7694703 100644 --- a/experimental/builder/test/conv/test_conv_traits.cpp +++ b/experimental/builder/test/conv/test_conv_traits.cpp @@ -86,9 +86,9 @@ TEST_F(ConvTraitsTest, ConvFwdTraitsExtraction) EXPECT_EQ(Traits::spatial_dim, 2); EXPECT_EQ(Traits::direction, ck_tile::builder::ConvDirection::FORWARD); EXPECT_THAT(Traits::layout, - ::testing::ElementsAre(ck_tile::builder::ConvInputLayout2D::GNHWC, - ck_tile::builder::ConvWeightLayout2D::GKYXC, - ck_tile::builder::ConvOutputLayout2D::GNHWK)); + ::testing::ElementsAre(ck_tile::builder::TensorLayout::GNHWC, + ck_tile::builder::TensorLayout::GKYXC, + ck_tile::builder::TensorLayout::GNHWK)); EXPECT_EQ(Traits::data_type, ck_tile::builder::DataType::FP16); EXPECT_EQ(Traits::input_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); EXPECT_EQ(Traits::weight_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); @@ -216,9 +216,9 @@ TEST_F(ConvTraitsTest, ConvFwdBaseTraitsExtraction) EXPECT_EQ(Traits::spatial_dim, 2); EXPECT_EQ(Traits::direction, ck_tile::builder::ConvDirection::FORWARD); EXPECT_THAT(Traits::layout, - ::testing::ElementsAre(ck_tile::builder::ConvInputLayout2D::GNHWC, - ck_tile::builder::ConvWeightLayout2D::GKYXC, - ck_tile::builder::ConvOutputLayout2D::GNHWK)); + ::testing::ElementsAre(ck_tile::builder::TensorLayout::GNHWC, + ck_tile::builder::TensorLayout::GKYXC, + ck_tile::builder::TensorLayout::GNHWK)); EXPECT_EQ(Traits::data_type, ck_tile::builder::DataType::FP16); EXPECT_EQ(Traits::input_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); EXPECT_EQ(Traits::weight_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); @@ -302,9 +302,9 @@ TEST_F(ConvTraitsTest, ConvFwdLargeTensorTraitsExtraction) EXPECT_EQ(Traits::spatial_dim, 2); EXPECT_EQ(Traits::direction, ck_tile::builder::ConvDirection::FORWARD); EXPECT_THAT(Traits::layout, - ::testing::ElementsAre(ck_tile::builder::ConvInputLayout2D::GNHWC, - ck_tile::builder::ConvWeightLayout2D::GKYXC, - ck_tile::builder::ConvOutputLayout2D::GNHWK)); + ::testing::ElementsAre(ck_tile::builder::TensorLayout::GNHWC, + ck_tile::builder::TensorLayout::GKYXC, + ck_tile::builder::TensorLayout::GNHWK)); EXPECT_EQ(Traits::data_type, ck_tile::builder::DataType::FP16); EXPECT_EQ(Traits::input_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); EXPECT_EQ(Traits::weight_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index 3be65bcb6f..45259d75d7 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -12,7 +12,7 @@ using namespace ck_tile::builder; struct TensorConfig { - ConvLayout layout; + TensorLayout layout; // Optional data types, override the type defined in the signature if provided. DataType data_type{DataType::UNDEFINDED}; DataType compute_type{DataType::UNDEFINDED}; diff --git a/experimental/builder/test/test_conv_description.cpp b/experimental/builder/test/test_conv_description.cpp index 22cb131866..b12f057275 100644 --- a/experimental/builder/test/test_conv_description.cpp +++ b/experimental/builder/test/test_conv_description.cpp @@ -29,7 +29,7 @@ static_assert(!ckb::TensorOperatorDescriptor); struct TensorConfig { - ckb::ConvLayout layout; + ckb::TensorLayout layout; ckb::DataType data_type{ckb::DataType::UNDEFINDED}; ckb::DataType compute_type{ckb::DataType::UNDEFINDED}; }; @@ -58,9 +58,9 @@ struct ConvSignature int spatial_dim = 2; ckb::DataType data_type = ckb::DataType::FP16; ckb::DataType accumulation_data_type = ckb::DataType::FP32; - ConvTensorSimple input = {.config = {ckb::ConvInputLayout2D::GNHWC}}; - ConvTensorSimple weight = {.config = {ckb::ConvWeightLayout2D::GKYXC}}; - ConvTensorSimple output = {.config = {ckb::ConvOutputLayout2D::GNHWK}}; + ConvTensorSimple input = {.config = {ckb::TensorLayout::GNHWC}}; + ConvTensorSimple weight = {.config = {ckb::TensorLayout::GKYXC}}; + ConvTensorSimple output = {.config = {ckb::TensorLayout::GNHWK}}; }; static_assert(ckb::ConvSignatureDescriptor); @@ -72,10 +72,10 @@ struct ConvSignatureWithOptionalParams ckb::DataType accumulation_data_type = ckb::DataType::FP32; ckb::ConvDirection direction = ckb::ConvDirection::FORWARD; ConvTensorWithOp input = { - .config = {ckb::ConvInputLayout2D::GNHWC, ckb::DataType::FP16}, + .config = {ckb::TensorLayout::GNHWC, ckb::DataType::FP16}, }; - ConvTensorWithOp weight = {.config = {ckb::ConvWeightLayout2D::GKYXC, ckb::DataType::FP16}}; - ConvTensorWithOp output = {.config = {ckb::ConvOutputLayout2D::GNHWK, ckb::DataType::FP16}, + ConvTensorWithOp weight = {.config = {ckb::TensorLayout::GKYXC, ckb::DataType::FP16}}; + ConvTensorWithOp output = {.config = {ckb::TensorLayout::GNHWK, ckb::DataType::FP16}, .operation = {ckb::ElementwiseOperation::SCALE}}; }; static_assert(ckb::ConvSignatureDescriptor); @@ -85,9 +85,9 @@ struct ConvSignatureWithInvalidOptionalParams int spatial_dim = 2; ckb::DataType data_type = ckb::DataType::FP16; ckb::DataType accumulation_data_type = ckb::DataType::FP32; - ConvTensorWithInvalidOp input = {.config = {ckb::ConvInputLayout2D::GNHWC}}; - ConvTensorWithInvalidOp weight = {.config = {ckb::ConvWeightLayout2D::GKYXC}}; - ConvTensorWithInvalidOp output = {.config = {ckb::ConvOutputLayout2D::GNHWK}}; + ConvTensorWithInvalidOp input = {.config = {ckb::TensorLayout::GNHWC}}; + ConvTensorWithInvalidOp weight = {.config = {ckb::TensorLayout::GKYXC}}; + ConvTensorWithInvalidOp output = {.config = {ckb::TensorLayout::GNHWK}}; }; static_assert(!ckb::ConvSignatureDescriptor); From c27ea3463732126bb269d26860ca7ef7cc1002c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Tue, 2 Dec 2025 10:18:11 +0000 Subject: [PATCH 30/41] Fix clang-formatting. --- .../builder/conv_signature_concepts.hpp | 50 +++-- .../factory/helpers/conv_data_type_utils.hpp | 40 +++- .../helpers/conv_elementwise_op_utils.hpp | 15 +- .../factory/helpers/conv_layout_utils.hpp | 171 ++++++++++++++---- .../ck_tile/builder/reflect/conv_traits.hpp | 52 +++--- .../builder/include/ck_tile/builder/types.hpp | 3 +- 6 files changed, 230 insertions(+), 101 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index 91da00ea6a..8dc92c6bef 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -34,25 +34,24 @@ concept ValidConvDataType = (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); - template -concept BiasTensorLayout = (L == TensorLayout::GC) || (L == TensorLayout::G_C_strided) || - (L == TensorLayout::G_K_strided); +concept BiasTensorLayout = + (L == TensorLayout::GC) || (L == TensorLayout::G_C_strided) || (L == TensorLayout::G_K_strided); template -concept ConvInputLayout1D = (L == TensorLayout::GNCW) || (L == TensorLayout::GNWC) || - (L == TensorLayout::NWGC) || (L == TensorLayout::NGCW) || - (L == TensorLayout::G_NW_C_strided); +concept ConvInputLayout1D = + (L == TensorLayout::GNCW) || (L == TensorLayout::GNWC) || (L == TensorLayout::NWGC) || + (L == TensorLayout::NGCW) || (L == TensorLayout::G_NW_C_strided); template -concept ConvInputLayout2D = (L == TensorLayout::GNCHW) || (L == TensorLayout::GNHWC) || - (L == TensorLayout::NHWGC) || (L == TensorLayout::NGCHW) || - (L == TensorLayout::G_NHW_C_strided); +concept ConvInputLayout2D = + (L == TensorLayout::GNCHW) || (L == TensorLayout::GNHWC) || (L == TensorLayout::NHWGC) || + (L == TensorLayout::NGCHW) || (L == TensorLayout::G_NHW_C_strided); template -concept ConvInputLayout3D = (L == TensorLayout::GNCDHW) || (L == TensorLayout::GNDHWC) || - (L == TensorLayout::NDHWGC) || (L == TensorLayout::NGCDHW) || - (L == TensorLayout::G_NDHW_C_strided); +concept ConvInputLayout3D = + (L == TensorLayout::GNCDHW) || (L == TensorLayout::GNDHWC) || (L == TensorLayout::NDHWGC) || + (L == TensorLayout::NGCDHW) || (L == TensorLayout::G_NDHW_C_strided); template concept ConvWeightLayout1D = (L == TensorLayout::GKXC) || (L == TensorLayout::GKCX) || @@ -67,19 +66,19 @@ concept ConvWeightLayout3D = (L == TensorLayout::GKZYXC) || (L == TensorLayout:: (L == TensorLayout::KZYXGC) || (L == TensorLayout::G_K_ZYX_C_strided); template -concept ConvOutputLayout1D = (L == TensorLayout::GNKW) || (L == TensorLayout::GNWK) || - (L == TensorLayout::NWGK) || (L == TensorLayout::NGKW) || - (L == TensorLayout::G_NW_K_strided); +concept ConvOutputLayout1D = + (L == TensorLayout::GNKW) || (L == TensorLayout::GNWK) || (L == TensorLayout::NWGK) || + (L == TensorLayout::NGKW) || (L == TensorLayout::G_NW_K_strided); template -concept ConvOutputLayout2D = (L == TensorLayout::GNKHW) || (L == TensorLayout::GNHWK) || - (L == TensorLayout::NHWGK) || (L == TensorLayout::NGKHW) || - (L == TensorLayout::G_NHW_K_strided); +concept ConvOutputLayout2D = + (L == TensorLayout::GNKHW) || (L == TensorLayout::GNHWK) || (L == TensorLayout::NHWGK) || + (L == TensorLayout::NGKHW) || (L == TensorLayout::G_NHW_K_strided); template -concept ConvOutputLayout3D = (L == TensorLayout::GNKDHW) || (L == TensorLayout::GNDHWK) || - (L == TensorLayout::NDHWGK) || (L == TensorLayout::NGKDHW) || - (L == TensorLayout::G_NDHW_K_strided); +concept ConvOutputLayout3D = + (L == TensorLayout::GNKDHW) || (L == TensorLayout::GNDHWK) || (L == TensorLayout::NDHWGK) || + (L == TensorLayout::NGKDHW) || (L == TensorLayout::G_NDHW_K_strided); template concept TensorConfigDescriptor = requires(T t) { @@ -195,22 +194,19 @@ concept ConvDirectionIsBackwardWeight = (Sig.direction == ConvDirection::BACKWAR // Constraints for forward convolution input layouts. template concept ValidConvInputLayoutForSpatialDim = - (SpatialDim == 1 && ConvInputLayout1D) || - (SpatialDim == 2 && ConvInputLayout2D) || + (SpatialDim == 1 && ConvInputLayout1D) || (SpatialDim == 2 && ConvInputLayout2D) || (SpatialDim == 3 && ConvInputLayout3D); // Constraints for forward convolution output layouts. template concept ValidConvOutputLayoutForSpatialDim = - (SpatialDim == 1 && ConvOutputLayout1D) || - (SpatialDim == 2 && ConvOutputLayout2D) || + (SpatialDim == 1 && ConvOutputLayout1D) || (SpatialDim == 2 && ConvOutputLayout2D) || (SpatialDim == 3 && ConvOutputLayout3D); // Constraints for forward convolution weight layouts. template concept ValidConvWeightLayoutForSpatialDim = - (SpatialDim == 1 && ConvWeightLayout1D) || - (SpatialDim == 2 && ConvWeightLayout2D) || + (SpatialDim == 1 && ConvWeightLayout1D) || (SpatialDim == 2 && ConvWeightLayout2D) || (SpatialDim == 3 && ConvWeightLayout3D); } // namespace ck_tile::builder diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp index b5555fc467..cc220459ed 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp @@ -12,17 +12,39 @@ template struct DataTypeToCK { // Catch unsupported data types at compile time - static_assert(sizeof(UnsupportedEnumValue
) == 0, - "Unsupported data type conversion to CK."); + static_assert(sizeof(UnsupportedEnumValue
) == 0, "Unsupported data type conversion to CK."); }; -template <> struct DataTypeToCK { using type = ck::half_t; }; -template <> struct DataTypeToCK { using type = ck::bhalf_t; }; -template <> struct DataTypeToCK { using type = float; }; -template <> struct DataTypeToCK { using type = int32_t; }; -template <> struct DataTypeToCK { using type = int8_t; }; -template <> struct DataTypeToCK { using type = ck::f8_t; }; - +template <> +struct DataTypeToCK +{ + using type = ck::half_t; +}; +template <> +struct DataTypeToCK +{ + using type = ck::bhalf_t; +}; +template <> +struct DataTypeToCK +{ + using type = float; +}; +template <> +struct DataTypeToCK +{ + using type = int32_t; +}; +template <> +struct DataTypeToCK +{ + using type = int8_t; +}; +template <> +struct DataTypeToCK +{ + using type = ck::f8_t; +}; struct CK_empty_tuple { diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp index 8f08e2d34c..c580a38611 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp @@ -15,27 +15,32 @@ struct ElementwiseOpToCK "Unsupported elementwise operation conversion to CK."); }; -template <> struct ElementwiseOpToCK +template <> +struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::PassThrough; }; -template <> struct ElementwiseOpToCK +template <> +struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::Scale; }; -template <> struct ElementwiseOpToCK +template <> +struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::Clamp; }; -template <> struct ElementwiseOpToCK +template <> +struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::ScaleAddScaleAddRelu; }; -template <> struct ElementwiseOpToCK +template <> +struct ElementwiseOpToCK { using Op = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp; }; diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp index 9433adb3dc..12dc9792bd 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp @@ -18,51 +18,159 @@ struct LayoutToCK }; // BiasLayout -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::G_K; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::G_C; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::G_K; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::G_C; +}; // Input 1D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NWGC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNWC; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NWGC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGCW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNWC; +}; // Input 2D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCHW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NHWGC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNHWC; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGCHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NHWGC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNHWC; +}; // Input 3D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGCDHW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NDHWGC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNDHWC; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGCDHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NDHWGC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNDHWC; +}; // Weight 1D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKXC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCX; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKXC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKCX; +}; // Weight 2D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKYXC; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCYX; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKYXC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKCYX; +}; // Weight 3D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKCZYX; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GKZYXC; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKCZYX; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKZYXC; +}; // Output 1D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NWGK; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNWK; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NWGK; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGKW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNWK; +}; // Output 2D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKHW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NHWGK; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNHWK; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGKHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NHWGK; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNHWK; +}; // Output 3D -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NGKDHW; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::NDHWGK; }; -template <> struct LayoutToCK { using type = ck::tensor_layout::convolution::GNDHWK; }; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGKDHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NDHWGK; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNDHWK; +}; template consteval auto TensorLayoutToCK() @@ -75,11 +183,11 @@ struct EmptyAuxiliaryTensorLayout using DsLayout = ck::Tuple<>; }; - template consteval auto GetAuxiliaryTensorLayoutTuple(std::index_sequence) { - return ck::Tuple())...>{}; + return ck::Tuple< + decltype(TensorLayoutToCK())...>{}; } template @@ -87,9 +195,8 @@ template ( - std::make_index_sequence{})); + using DsLayout = decltype(GetAuxiliaryTensorLayoutTuple( + std::make_index_sequence{})); }; // TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp index c5f42c30cb..9890b5a46f 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp @@ -298,10 +298,10 @@ constexpr auto conv_spec() /// @brief Derives the grouped convolution layout from a device kernel `Instance` type. /// @tparam Instance The device kernel instance type. -/// @return An std::array corresponding to the tensor layouts: +/// @return An std::array corresponding to the tensor layouts: /// index 0 -> Input layout /// index 1 -> Weight layout -/// index 2 -> Output layout +/// index 2 -> Output layout template constexpr auto conv_layout() { @@ -318,29 +318,29 @@ constexpr auto conv_layout() std::is_same_v) { return std::array{builder::TensorLayout::GNWC, - builder::TensorLayout::GKXC, - builder::TensorLayout::GNWK}; + builder::TensorLayout::GKXC, + builder::TensorLayout::GNWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { return std::array{builder::TensorLayout::NWGC, - builder::TensorLayout::GKXC, - builder::TensorLayout::NWGK}; + builder::TensorLayout::GKXC, + builder::TensorLayout::NWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { return std::array{builder::TensorLayout::NGCW, - builder::TensorLayout::GKXC, - builder::TensorLayout::NGKW}; + builder::TensorLayout::GKXC, + builder::TensorLayout::NGKW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { return std::array{builder::TensorLayout::NGCW, - builder::TensorLayout::GKCX, - builder::TensorLayout::NGKW}; + builder::TensorLayout::GKCX, + builder::TensorLayout::NGKW}; } } else if constexpr(InstTraits::kSpatialDim == 2) @@ -349,32 +349,32 @@ constexpr auto conv_layout() std::is_same_v) { return std::array{builder::TensorLayout::GNHWC, - builder::TensorLayout::GKYXC, - builder::TensorLayout::GNHWK}; + builder::TensorLayout::GKYXC, + builder::TensorLayout::GNHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { return std::array{builder::TensorLayout::NHWGC, - builder::TensorLayout::GKYXC, - builder::TensorLayout::NHWGK}; + builder::TensorLayout::GKYXC, + builder::TensorLayout::NHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { return std::array{builder::TensorLayout::NGCHW, - builder::TensorLayout::GKYXC, - builder::TensorLayout::NGKHW}; + builder::TensorLayout::GKYXC, + builder::TensorLayout::NGKHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { return std::array{builder::TensorLayout::NGCHW, - builder::TensorLayout::GKCYX, - builder::TensorLayout::NGKHW}; + builder::TensorLayout::GKCYX, + builder::TensorLayout::NGKHW}; } } else if constexpr(InstTraits::kSpatialDim == 3) @@ -383,32 +383,32 @@ constexpr auto conv_layout() std::is_same_v) { return std::array{builder::TensorLayout::GNDHWC, - builder::TensorLayout::GKZYXC, - builder::TensorLayout::GNDHWK}; + builder::TensorLayout::GKZYXC, + builder::TensorLayout::GNDHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { return std::array{builder::TensorLayout::NDHWGC, - builder::TensorLayout::GKZYXC, - builder::TensorLayout::NDHWGK}; + builder::TensorLayout::GKZYXC, + builder::TensorLayout::NDHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { return std::array{builder::TensorLayout::NGCDHW, - builder::TensorLayout::GKZYXC, - builder::TensorLayout::NGKDHW}; + builder::TensorLayout::GKZYXC, + builder::TensorLayout::NGKDHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { return std::array{builder::TensorLayout::NGCDHW, - builder::TensorLayout::GKCZYX, - builder::TensorLayout::NGKDHW}; + builder::TensorLayout::GKCZYX, + builder::TensorLayout::NGKDHW}; } } } diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index d2195447d8..565bb98528 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -32,7 +32,7 @@ enum class TensorLayout G_C_strided, G_K_strided, - // 1D conv input tensor + // 1D conv input tensor GNCW, GNWC, NWGC, @@ -93,7 +93,6 @@ enum class TensorLayout G_NDHW_K_strided }; - // Direction of the convolution operation. enum class ConvDirection { From ea080e662d3cc9edb7eecf95a667fd1d0fc78970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Tue, 2 Dec 2025 11:05:34 +0000 Subject: [PATCH 31/41] Fix clang-format for test code. --- .../builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp | 2 +- .../builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp | 2 +- .../builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp | 8 ++++---- .../builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp | 8 ++++---- .../builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp | 4 ++-- .../builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp | 4 ++-- .../builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp | 4 ++-- .../test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp | 8 ++++---- .../builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp | 6 +++--- .../builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp | 6 +++--- .../builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp | 6 +++--- 11 files changed, 29 insertions(+), 29 deletions(-) diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp index f759846849..d87476ad03 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp @@ -19,7 +19,7 @@ TEST(FwdConvInstances, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::NWGC}}, .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NWGK}}}; + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp index c042f2fa66..67b605154c 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp @@ -21,7 +21,7 @@ TEST(FwdConvInstances, .accumulation_data_type = DataType::INT32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNWC}}, .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNWK}}}; + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp index f6d9a675b6..7c2248bdb0 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp @@ -18,8 +18,8 @@ TEST(FwdConvInstances, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -50,8 +50,8 @@ TEST(FwdConvInstances, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp index d4ec658ec2..2d1e65eefc 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp @@ -16,8 +16,8 @@ TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Ins .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} @@ -45,8 +45,8 @@ TEST(FwdConvInstances, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp index a3009bb34f..a43100c7b9 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp @@ -17,8 +17,8 @@ TEST(FwdConvInstances, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp index a08dc6c969..b17c12e5ff 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp @@ -17,8 +17,8 @@ TEST(FwdConvInstances, .data_type = DataType::FP32, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::NGCHW}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKCYX}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NGKHW}}}; + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKCYX}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NGKHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp index 06009afc5d..74ef8bdf3a 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp @@ -18,8 +18,8 @@ TEST(FwdConvInstances, .data_type = DataType::FP8, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp index f818197330..9afa5ae4a7 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp @@ -17,8 +17,8 @@ TEST(FwdConvInstances, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ @@ -49,8 +49,8 @@ TEST( .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp index 2461f13794..78b6b1b9d8 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNDHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKZYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNDHWK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNDHWC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKZYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNDHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp index 9a2461ea50..3f7549144b 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NDHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKZYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NDHWGK}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NDHWGC}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKZYXC}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NDHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp index d5fe24d27c..5202699984 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP32, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NGCDHW}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKCZYX}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NGKDHW}}}; + .input = ConvolutionTensor{.config = {.layout = TensorLayout::NGCDHW}}, + .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKCZYX}}, + .output = ConvolutionTensor{.config = {.layout = TensorLayout::NGKDHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} From b82f502ebf36e79b346e40074e1d610ff0cea047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Tue, 2 Dec 2025 11:48:11 +0000 Subject: [PATCH 32/41] Simplify fwd conv signature definitions in the test code. --- .../test/conv/test_ckb_conv_fwd_1d_bf16.cpp | 12 +++----- .../test/conv/test_ckb_conv_fwd_1d_fp16.cpp | 15 +++++----- .../test/conv/test_ckb_conv_fwd_1d_i8.cpp | 15 +++++----- .../test/conv/test_ckb_conv_fwd_2d_bf16.cpp | 30 +++++++++---------- ...est_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp | 15 +++++----- .../conv/test_ckb_conv_fwd_2d_dl_fp16.cpp | 30 +++++++++---------- .../test/conv/test_ckb_conv_fwd_2d_fp16.cpp | 15 +++++----- .../test/conv/test_ckb_conv_fwd_2d_fp32.cpp | 15 +++++----- .../test/conv/test_ckb_conv_fwd_2d_fp8.cpp | 15 +++++----- ...test_ckb_conv_fwd_2d_large_tensor_fp16.cpp | 30 +++++++++---------- .../test/conv/test_ckb_conv_fwd_3d_bf16.cpp | 6 ++-- .../test/conv/test_ckb_conv_fwd_3d_fp16.cpp | 6 ++-- .../test/conv/test_ckb_conv_fwd_3d_fp32.cpp | 6 ++-- .../test/impl/conv_signature_types.hpp | 4 ++- 14 files changed, 100 insertions(+), 114 deletions(-) diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp index b01cd24a4e..937d17a1ff 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp @@ -18,14 +18,10 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NGCW}}, - .weight = ConvolutionTensor{.config = - { - .layout = TensorLayout::GKXC, - }}, - .output = ConvolutionTensor{ - .config = {.layout = TensorLayout::NGKW}, - .operation = TensorOperation<>{.elementwise_operation = ElementwiseOperation::SCALE}}}; + .input = {.config = {.layout = TensorLayout::NGCW}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NGKW}, + .operation = {.elementwise_operation = ElementwiseOperation::SCALE}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp index d87476ad03..e8cd8fb136 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp @@ -12,14 +12,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_1D_FP16_ChannelsFirst) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NWGK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NWGC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp index 67b605154c..014e221101 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp @@ -14,14 +14,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle_Instance_1D_FP32_ChannelsFirst_scale) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .data_type = DataType::I8, - .accumulation_data_type = DataType::INT32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNWK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::I8, + .accumulation_data_type = DataType::INT32, + .input = {.config = {.layout = TensorLayout::GNWC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::GNWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp index 7c2248bdb0..b98e28c45a 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp @@ -12,14 +12,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_BF16_ChannelsLast) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::BF16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -44,14 +43,13 @@ TEST(FwdConvInstances, TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_BF16_NHWGC_Filter3x3) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::BF16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp index f8b66d3991..bc4a5e1047 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -17,15 +17,14 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC, - .data_type = DataType::BF16}}, // For demo purposes - .output = ConvolutionTensor{ - .config = {.layout = TensorLayout::NHWGK}, - .operation = TensorOperation<>{.elementwise_operation = - ElementwiseOperation::SCALEADD_SCALEADD_RELU} + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC, .data_type = DataType::BF16}}, + .output = ConvolutionTensor{ + .config = {.layout = TensorLayout::NHWGK}, + .operation = TensorOperation<>{.elementwise_operation = + ElementwiseOperation::SCALEADD_SCALEADD_RELU} .with_auxiliary_operand_configs()}}; + TensorLayout::G_K_strided>()}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp index 2d1e65eefc..7af1448403 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp @@ -10,14 +10,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} @@ -39,14 +38,13 @@ TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Ins TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Instance_2D_FP16_FILTER_1X1_PAD0) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp index a43100c7b9..7b522403d3 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp @@ -11,14 +11,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp index b17c12e5ff..615d098c7c 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp @@ -11,14 +11,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_FP32_NGCHW_GKCYX) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP32, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NGCHW}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKCYX}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NGKHW}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKCYX}}, + .output = {.config = {.layout = TensorLayout::NGKHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp index 74ef8bdf3a..4dd9e2beef 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp @@ -12,14 +12,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_FP8_ChannelsLast) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP8, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NHWGK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP8, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp index 9afa5ae4a7..8fe58dbe82 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp @@ -11,14 +11,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ @@ -43,14 +42,13 @@ TEST( FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor_Instance_2D_FP16_GNHWC_Filter1x1Pad0) { - constexpr ConvSignature FwdConvSignature{ - .spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNHWK}}}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp index 78b6b1b9d8..2df76ab3e0 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::BF16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::GNDHWC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKZYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::GNDHWK}}}; + .input = {.config = {.layout = TensorLayout::GNDHWC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::GNDHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp index 3f7549144b..ad626d9a15 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP16, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NDHWGC}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKZYXC}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NDHWGK}}}; + .input = {.config = {.layout = TensorLayout::NDHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::NDHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp index 5202699984..85974ace5d 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp @@ -17,9 +17,9 @@ TEST(FwdConvInstances, .direction = ConvDirection::FORWARD, .data_type = DataType::FP32, .accumulation_data_type = DataType::FP32, - .input = ConvolutionTensor{.config = {.layout = TensorLayout::NGCDHW}}, - .weight = ConvolutionTensor{.config = {.layout = TensorLayout::GKCZYX}}, - .output = ConvolutionTensor{.config = {.layout = TensorLayout::NGKDHW}}}; + .input = {.config = {.layout = TensorLayout::NGCDHW}}, + .weight = {.config = {.layout = TensorLayout::GKCZYX}}, + .output = {.config = {.layout = TensorLayout::NGKDHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index 45259d75d7..ef87981c3d 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -40,7 +40,9 @@ struct ConvolutionTensor Op operation{}; }; -template +template , + typename WeightTensor = ConvolutionTensor<>, + typename OutputTensor = ConvolutionTensor<>> struct ConvSignature { int spatial_dim; From 915e6cae86af0c13b97c79c811ed4542e97e3264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Tue, 2 Dec 2025 11:56:26 +0000 Subject: [PATCH 33/41] Remove accidental edits. --- experimental/builder/test/impl/conv_algorithm_types.hpp | 8 -------- ...ouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp | 8 ++++---- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/experimental/builder/test/impl/conv_algorithm_types.hpp b/experimental/builder/test/impl/conv_algorithm_types.hpp index 2656c4a130..3331bf204f 100644 --- a/experimental/builder/test/impl/conv_algorithm_types.hpp +++ b/experimental/builder/test/impl/conv_algorithm_types.hpp @@ -339,14 +339,6 @@ struct ConvAlgorithmTemplate : Components... result.transfer = t; return result; } - - template - constexpr auto with_elementwise_operation(const OP& op) const - { - auto result = *this; - result.element_op = op; - return result; - } }; // Algorithm types diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp index 8051b29c19..538a3ddcb9 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp @@ -45,10 +45,10 @@ template using device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_bf16_instances = std::tuple< // clang-format off - //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| - //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector| - //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | Stage| | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerXdl| _NWaveNPerXdl| - //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| + //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector| + //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | Stage| | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerXdl| _NWaveNPerXdl| + //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | // generic instance DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle, BF16, PassThrough, PassThrough, ScaleAddScaleAddRelu, ConvSpec, GemmMNKPadding, 1, 64, 64, 64, 32, 8, 8, 32, 32, 2, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 8, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 8, 1, 1, 1, S<1, 16, 1, 4>, 1>, // instances for small conv.K and conv.C From 41d1bfd181214db38082be0170825800808d1e3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Tue, 2 Dec 2025 11:57:40 +0000 Subject: [PATCH 34/41] Fix comment string. --- ...ice_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp index 538a3ddcb9..defe8985a9 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp @@ -45,7 +45,7 @@ template using device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_bf16_instances = std::tuple< // clang-format off - //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| + //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector| //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | Stage| | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerXdl| _NWaveNPerXdl| //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | From 91a10e452dcb50fbc8afd0cb4d39a20b7968eba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 3 Dec 2025 10:41:01 +0000 Subject: [PATCH 35/41] Fix instance factory after rebase. --- .../ck_tile/builder/factory/conv_factory.hpp | 833 ------------------ .../builder/factory/conv_fwd_dl_factory.hpp | 13 +- .../factory/conv_fwd_large_tensor_factory.hpp | 13 +- .../builder/factory/conv_fwd_v3_factory.hpp | 12 +- .../builder/factory/conv_fwd_wmma_factory.hpp | 12 +- .../builder/factory/conv_fwd_xdl_factory.hpp | 12 +- .../factory/helpers/conv_data_type_utils.hpp | 175 ---- .../factory/helpers/conv_elementwise_op.hpp | 72 +- .../helpers/conv_elementwise_op_utils.hpp | 79 -- .../factory/helpers/conv_layout_utils.hpp | 247 ------ .../factory/helpers/conv_tensor_layout.hpp | 264 ++++-- .../factory/helpers/conv_tensor_type.hpp | 212 +++-- 12 files changed, 428 insertions(+), 1516 deletions(-) delete mode 100644 experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp delete mode 100644 experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp delete mode 100644 experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp delete mode 100644 experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp deleted file mode 100644 index 17fbc7cd4c..0000000000 --- a/experimental/builder/include/ck_tile/builder/factory/conv_factory.hpp +++ /dev/null @@ -1,833 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -// A factory for instantiating CK convolution kernels. -// -// This file translates a semantic description of a convolution operation -// (`ConvSignatureDescriptor` and `ConvAlgorithmDescriptor`) into specific, -// low-level template arguments required by the underlying CK device-level -// kernel implementations. This abstraction enables more complex build -// time logic and simplifies the kernel specification. -// -// Key Components: -// -// Template Metaprogram: -// - ConvFactory: The main factory, with specializations for different -// convolution directions (currently only forward). -// -// Template Metaprogram Helpers: -// - ConvTensorLayouts: Maps layout enums to CK layout types for different -// spatial dimensions (2D/3D) and directions. -// - ConvTensorTypes: Maps data type enums (FP16, BF16, FP32) to C++ types used by CK. -// - ConvPassThroughOps: Hard-coded pass-through element-wise operations. -// - ConvSpec: Encapsulates convolution and GEMM specialization enums. -// -// `constexpr` Helper Functions: -// - SetThreadBlockInfo: Determines thread block dimensions and tile sizes. -// - SetConvTuningInfo: Sets XDL and AK1/BK1 tuning parameters. -// - SetFwdConvBlockTransfer: Configures A/B tensor block transfer parameters. -// - SetCBlockTransfer: Configures C tensor block transfer parameters. -// - SetBlockGemmPipelineVersion: Maps pipeline version enum to CK types. -// -// The primary entry point is the `ConvFactory` struct, which is currently -// specialized for forward convolutions and produces instances of -// DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3. - -#pragma once - -#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp" -#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp" -#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp" -#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp" -// WORKAROUND: Macro namespace collision in upstream CK device operation headers. -// device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp (line 41) and -// device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp (line 51) both define -// GridwiseGemmTemplateParameters macro without #undef, causing redefinition errors. -// Use pragma push/pop to isolate the Large_Tensor header's macro scope. -#pragma push_macro("GridwiseGemmTemplateParameters") -#ifdef GridwiseGemmTemplateParameters -#undef GridwiseGemmTemplateParameters -#endif -#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp" -#pragma pop_macro("GridwiseGemmTemplateParameters") -#include "ck_tile/builder/conv_signature_concepts.hpp" -#include "ck_tile/builder/conv_algorithm_concepts.hpp" -#include "ck_tile/builder/conv_algorithm_limits.hpp" -#include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/types.hpp" -#include "ck_tile/builder/versions.hpp" - -#include "ck_tile/builder/factory/helpers/conv_layout_utils.hpp" -#include "ck_tile/builder/factory/helpers/conv_data_type_utils.hpp" -#include "ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp" - -namespace ck_tile::builder::factory_internal { - -// The algorithm specializations for the convolution and GEMM. -template - requires( - std::is_same_v) -struct ConvSpec -{ - CONV_ENUM conv_spec; - ck::tensor_operation::device::GemmSpecialization gemm_spec; -}; - -// Deduction guide for ConvSpec to simplify brace initialization. -template -ConvSpec(CONV_ENUM, GEMM_ENUM) -> ConvSpec; - -struct BlockGemmSpec -{ - ck::BlockGemmPipelineVersion pipeline_version; - ck::BlockGemmPipelineScheduler scheduler; -}; - -template -consteval BlockGemmSpec SetBlockGemm() -{ - constexpr auto& BG = ALGORITHM.block_gemm; - - ck::BlockGemmPipelineScheduler scheduler; - ck::BlockGemmPipelineVersion version; - - switch(BG.scheduler) - { - case PipelineScheduler::INTRAWAVE: scheduler = ck::BlockGemmPipelineScheduler::Intrawave; break; - case PipelineScheduler::INTERWAVE: scheduler = ck::BlockGemmPipelineScheduler::Interwave; break; - case PipelineScheduler::DEFAULT: throw "Block GEMM scheduler must be Intrawave or Interwave."; - default: throw "Unknown PipelineScheduler"; - } - - switch(BG.pipeline_version) - { - case PipelineVersion::V1: version = ck::BlockGemmPipelineVersion::v1; break; - case PipelineVersion::V2: version = ck::BlockGemmPipelineVersion::v2; break; - case PipelineVersion::V3: version = ck::BlockGemmPipelineVersion::v3; break; - case PipelineVersion::V4: version = ck::BlockGemmPipelineVersion::v4; break; - case PipelineVersion::V5: version = ck::BlockGemmPipelineVersion::v5; break; - case PipelineVersion::WEIGHT_ONLY: - throw "PipelineVersion::WEIGHT_ONLY is not supported for block GEMM."; - default: throw "Unknown PipelineVersion"; - } - - return BlockGemmSpec{.pipeline_version = version, .scheduler = scheduler}; -} - -// Block info for a convolution. -struct MNK -{ - size_t m{}; - size_t n{}; - size_t k{}; -}; -struct ConvBlock -{ - size_t block_size = 0; - MNK per_block = {}; -}; - -template -constexpr ConvBlock SetThreadBlockInfo() -{ - constexpr auto& TB = ALGORITHM.thread_block; - return ConvBlock{.block_size = TB.block_size, - .per_block = {.m = TB.tile_size.m, .n = TB.tile_size.n, .k = TB.tile_size.k}}; -} - -// Block transfer parameters for A or B tensor. -struct BlockTransfer -{ - ck::Array thread_cluster_dims = {0, 0, 0}; // k0, m, k1 - ck::Array thread_cluster_order = {0, 0, 0}; - ck::Array src_access_order = {0, 0, 0}; - size_t src_vector_dim = 0; - size_t src_scalar_per_vector = 0; - size_t lds_dst_scalar_per_vector = 0; - bool is_direct_load = false; - bool lds_padding = false; -}; - -template -constexpr BlockTransfer SetFwdConvBlockTransfer() -{ - constexpr auto& TCL = TRANSFER.block_transfer; - constexpr auto& TCO = TRANSFER.block_transfer_access_order; - constexpr auto& SAO = TRANSFER.src_access_order; - constexpr auto& LDS = TRANSFER.lds_transfer; - - BlockTransfer block_transfer{.thread_cluster_dims = {TCL.k0, TCL.m_n, TCL.k1}, - .thread_cluster_order = {TCO.order[0], TCO.order[1], TCO.order[2]}, - .src_access_order = {SAO.order[0], SAO.order[1], SAO.order[2]}, - .src_vector_dim = LDS.src_vector_dim, - .src_scalar_per_vector = LDS.src_scalar_per_vector, - .lds_dst_scalar_per_vector = LDS.lds_dst_scalar_per_vector, - .is_direct_load = LDS.is_direct_load, - .lds_padding = LDS.lds_padding}; - return block_transfer; -} - -// Block transfer parameters for C tensor. -struct CBlockTransfer -{ - size_t m_per_wave_per_shuffle = 0; - size_t n_per_wave_per_shuffle = 0; - ck::Array thread_cluster_dims = {0, 0, 0, 0}; - size_t scalar_per_vector = 0; -}; - -template -constexpr CBlockTransfer SetCBlockTransfer() -{ - constexpr auto& TCL = ALGORITHM.transfer.c.thread_cluster_dims; - constexpr auto& EPC = ALGORITHM.transfer.c.epilogue; - CBlockTransfer block_transfer{.m_per_wave_per_shuffle = EPC.m_per_wave_per_shuffle, - .n_per_wave_per_shuffle = EPC.n_per_wave_per_shuffle, - .thread_cluster_dims = - { - TCL.m_block, - TCL.m_wave_per_xdl, - TCL.n_block, - TCL.n_wave_per_xdl, - }, - .scalar_per_vector = EPC.scalar_per_vector}; - return block_transfer; -} - -template -consteval ck::LoopScheduler SetLoopScheduler() -{ - constexpr auto loop_scheduler = ALGORITHM.loop_scheduler; - using ck_loop_sched = ck::LoopScheduler; - switch(loop_scheduler) - { - case PipelineScheduler::DEFAULT: return ck_loop_sched::Default; - case PipelineScheduler::INTERWAVE: return ck_loop_sched::Interwave; - case PipelineScheduler::INTRAWAVE: throw "LoopScheduler must be either DEFAULT or INTERWAVE."; - default: throw "Unknown PipelineScheduler"; - } -} - -template -consteval ck::PipelineVersion SetGridwiseGemmPipelineVersion() -{ - constexpr auto pipeline_version = ALGORITHM.gridwise_gemm.pipeline_version; - using ck_pipeline = ck::PipelineVersion; - switch(pipeline_version) - { - case PipelineVersion::V1: return ck_pipeline::v1; - case PipelineVersion::V2: return ck_pipeline::v2; - case PipelineVersion::V3: throw "PipelineVersion::V3 is used only for stream-K."; - case PipelineVersion::V4: return ck_pipeline::v4; - case PipelineVersion::V5: throw "PipelineVersion::V5 cannot be used for gridwise GEMM."; - case PipelineVersion::WEIGHT_ONLY: return ck_pipeline::weight_only; - default: throw "Unknown GridwiseGemmPipelineVersion"; - } -} - -template -consteval ck::tensor_operation::device::GemmSpecialization SetGemmSpecialization() -{ - constexpr auto gemm_spec = ALGORITHM.gemm_specialization; - using ck_gemm_spec = ck::tensor_operation::device::GemmSpecialization; - - switch(gemm_spec) - { - case GemmSpecialization::Default: return ck_gemm_spec::Default; - case GemmSpecialization::MPadding: return ck_gemm_spec::MPadding; - case GemmSpecialization::NPadding: return ck_gemm_spec::NPadding; - case GemmSpecialization::KPadding: return ck_gemm_spec::KPadding; - case GemmSpecialization::MNPadding: return ck_gemm_spec::MNPadding; - case GemmSpecialization::MKPadding: return ck_gemm_spec::MKPadding; - case GemmSpecialization::NKPadding: return ck_gemm_spec::NKPadding; - case GemmSpecialization::MNKPadding: return ck_gemm_spec::MNKPadding; - case GemmSpecialization::OPadding: return ck_gemm_spec::OPadding; - case GemmSpecialization::MOPadding: return ck_gemm_spec::MOPadding; - case GemmSpecialization::NOPadding: return ck_gemm_spec::NOPadding; - case GemmSpecialization::KOPadding: return ck_gemm_spec::KOPadding; - case GemmSpecialization::MNOPadding: return ck_gemm_spec::MNOPadding; - case GemmSpecialization::MKOPadding: return ck_gemm_spec::MKOPadding; - case GemmSpecialization::NKOPadding: return ck_gemm_spec::NKOPadding; - case GemmSpecialization::MNKOPadding: return ck_gemm_spec::MNKOPadding; - default: throw "Unknown GemmSpecialization"; - } -} - -template -consteval ck::BlockGemmPipelineVersion SetBlockGemmPipelineVersion() -{ - constexpr auto version = ALGORITHM.pipeline_version; - using ck_pipeline = ck::BlockGemmPipelineVersion; - switch(version) - { - case PipelineVersion::V1: return ck_pipeline::v1; - case PipelineVersion::V2: return ck_pipeline::v2; - case PipelineVersion::V3: return ck_pipeline::v3; - case PipelineVersion::V4: return ck_pipeline::v4; - case PipelineVersion::V5: return ck_pipeline::v5; - default: throw "Unknown block GEMM PipelineVersion"; - } -} - -template -consteval ck::tensor_operation::device::ConvolutionForwardSpecialization SetFwdConvSpecialization() -{ - constexpr auto specialization = ALGORITHM.fwd_specialization; - using ck_conv_spec = ck::tensor_operation::device::ConvolutionForwardSpecialization; - switch(specialization) - { - case ConvFwdSpecialization::DEFAULT: return ck_conv_spec::Default; - case ConvFwdSpecialization::FILTER_1X1_PAD0: return ck_conv_spec::Filter1x1Pad0; - case ConvFwdSpecialization::FILTER_1X1_STRIDE1_PAD0: return ck_conv_spec::Filter1x1Stride1Pad0; - case ConvFwdSpecialization::FILTER_3x3: return ck_conv_spec::Filter3x3; - default: throw "Unknown ConvFwdSpecialization"; - } -} - -} // namespace ck_tile::builder::factory_internal - -namespace ck_tile::builder { - -// Primary template for the convolution factory. -template -struct ConvFactory -{ - // This will trigger if a specialization for the given convolution direction is not found. - // We should always catch this in an earlier validation check. - static_assert(false, "Unsupported device operation."); -}; - -// Factory specialization for DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3 instance -// of a grouped forward convolution kernel. -template - requires ConvDirectionIsForward && - DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3 -struct ConvFactory -{ - static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal:: - GetTensorLayout()); - using AuxiliaryLayouts = - decltype(factory_internal:: - GetAuxiliaryTensorLayouts()); - - using Types = factory_internal::FwdConvTensorDataTypes; - using Ops = decltype(factory_internal::GetElementwiseOps()); - using AlgorithmType = decltype(ALGORITHM); - - static_assert(ALGORITHM.transfer.a.lds_transfer.is_direct_load == - ALGORITHM.transfer.b.lds_transfer.is_direct_load, - "A and B block transfers must both be direct load or not."); - - static constexpr bool IS_DIRECT_LOAD = ALGORITHM.transfer.a.lds_transfer.is_direct_load; - static constexpr auto FWD_CONV_SPECIALIZATION = - factory_internal::SetFwdConvSpecialization(); - static constexpr auto GEMM_SPECIALIZATION = - factory_internal::SetGemmSpecialization(); - static constexpr factory_internal::ConvSpec SPECIALIZATION{.conv_spec = FWD_CONV_SPECIALIZATION, - .gemm_spec = GEMM_SPECIALIZATION}; - - static constexpr auto BLOCK = factory_internal::SetThreadBlockInfo(); - static constexpr auto GRIDWISE_GEMM = ALGORITHM.gridwise_gemm; - static constexpr auto A_BLOCK_TRANSFER = - factory_internal::SetFwdConvBlockTransfer(); - static constexpr auto B_BLOCK_TRANSFER = - factory_internal::SetFwdConvBlockTransfer(); - static constexpr auto C_BLOCK_TRANSFER = - factory_internal::SetCBlockTransfer(); - static constexpr auto BLOCK_GEMM = factory_internal::SetBlockGemm(); - - // Check limits for the algorithm parameters. - // TODO: Add more limits checks as needed. - static_assert(InputVectorTransferLimits); - static_assert(InputVectorTransferLimits); - static_assert(OutputVectorTransferLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - - // The forward convolution kernel class instance. - using Instance = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3< - SPATIAL_DIM, - typename Layouts::ALayout, - typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, - typename Layouts::ELayout, - typename Types::ADataType, - typename Types::BDataType, - typename Types::AccDataType, - typename Types::CShuffleDataType, - typename Types::DsDataType, - typename Types::EDataType, - typename Ops::AElementwiseOp, - typename Ops::BElementwiseOp, - typename Ops::CDEElementwiseOp, - SPECIALIZATION.conv_spec, - SPECIALIZATION.gemm_spec, - BLOCK.block_size, - BLOCK.per_block.m, - BLOCK.per_block.n, - BLOCK.per_block.k, - GRIDWISE_GEMM.ak1, - GRIDWISE_GEMM.bk1, - GRIDWISE_GEMM.m_per_xdl, - GRIDWISE_GEMM.n_per_xdl, - GRIDWISE_GEMM.m_xdl_per_wave, - GRIDWISE_GEMM.n_xdl_per_wave, - to_sequence_v, - to_sequence_v, - to_sequence_v, - A_BLOCK_TRANSFER.src_vector_dim, - A_BLOCK_TRANSFER.src_scalar_per_vector, - A_BLOCK_TRANSFER.lds_dst_scalar_per_vector, - static_cast(A_BLOCK_TRANSFER.lds_padding), - to_sequence_v, - to_sequence_v, - to_sequence_v, - B_BLOCK_TRANSFER.src_vector_dim, - B_BLOCK_TRANSFER.src_scalar_per_vector, - B_BLOCK_TRANSFER.lds_dst_scalar_per_vector, - static_cast(B_BLOCK_TRANSFER.lds_padding), - C_BLOCK_TRANSFER.m_per_wave_per_shuffle, - C_BLOCK_TRANSFER.n_per_wave_per_shuffle, - to_sequence_v, - C_BLOCK_TRANSFER.scalar_per_vector, - BLOCK_GEMM.scheduler, - BLOCK_GEMM.pipeline_version, - typename Types::AComputeType, - typename Types::BComputeType, - IS_DIRECT_LOAD>; -}; - -// Factory specialization for DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle instance -// of a grouped forward convolution kernel. -template - requires ConvDirectionIsForward && - DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle> -struct ConvFactory -{ - static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal:: - GetTensorLayout()); - using AuxiliaryLayouts = - decltype(factory_internal:: - GetAuxiliaryTensorLayouts()); - - using Types = factory_internal::FwdConvTensorDataTypes; - using Ops = decltype(factory_internal::GetElementwiseOps()); - using AlgorithmType = decltype(ALGORITHM); - - static constexpr auto FWD_CONV_SPECIALIZATION = - factory_internal::SetFwdConvSpecialization(); - static constexpr auto GEMM_SPECIALIZATION = - factory_internal::SetGemmSpecialization(); - static constexpr factory_internal::ConvSpec SPECIALIZATION{.conv_spec = FWD_CONV_SPECIALIZATION, - .gemm_spec = GEMM_SPECIALIZATION}; - - static constexpr auto LOOP_SCHEDULER = factory_internal::SetLoopScheduler(); - static constexpr auto BLOCK = factory_internal::SetThreadBlockInfo(); - static constexpr auto GRIDWISE_GEMM = ALGORITHM.gridwise_gemm; - static constexpr auto A_BLOCK_TRANSFER = - factory_internal::SetFwdConvBlockTransfer(); - static constexpr auto B_BLOCK_TRANSFER = - factory_internal::SetFwdConvBlockTransfer(); - static constexpr auto C_BLOCK_TRANSFER = - factory_internal::SetCBlockTransfer(); - - // Check limits for the algorithm parameters. - // TODO: Add more limits checks as needed. - static_assert(InputVectorTransferLimits); - static_assert(InputVectorTransferLimits); - static_assert(OutputVectorTransferLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - - // The forward convolution kernel class instance. - using Instance = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle< - SPATIAL_DIM, - typename Layouts::ALayout, - typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, - typename Layouts::ELayout, - typename Types::ADataType, - typename Types::BDataType, - typename Types::AccDataType, - typename Types::CShuffleDataType, - typename Types::DsDataType, - typename Types::EDataType, - typename Ops::AElementwiseOp, - typename Ops::BElementwiseOp, - typename Ops::CDEElementwiseOp, - SPECIALIZATION.conv_spec, - SPECIALIZATION.gemm_spec, - ALGORITHM.num_gemm_k_prefetch_stages, - BLOCK.block_size, - BLOCK.per_block.m, - BLOCK.per_block.n, - BLOCK.per_block.k, - GRIDWISE_GEMM.ak1, - GRIDWISE_GEMM.bk1, - GRIDWISE_GEMM.m_per_xdl, - GRIDWISE_GEMM.n_per_xdl, - GRIDWISE_GEMM.m_xdl_per_wave, - GRIDWISE_GEMM.n_xdl_per_wave, - to_sequence_v, - to_sequence_v, - to_sequence_v, - A_BLOCK_TRANSFER.src_vector_dim, - A_BLOCK_TRANSFER.src_scalar_per_vector, - A_BLOCK_TRANSFER.lds_dst_scalar_per_vector, - static_cast(A_BLOCK_TRANSFER.lds_padding), - to_sequence_v, - to_sequence_v, - to_sequence_v, - B_BLOCK_TRANSFER.src_vector_dim, - B_BLOCK_TRANSFER.src_scalar_per_vector, - B_BLOCK_TRANSFER.lds_dst_scalar_per_vector, - static_cast(B_BLOCK_TRANSFER.lds_padding), - C_BLOCK_TRANSFER.m_per_wave_per_shuffle, - C_BLOCK_TRANSFER.n_per_wave_per_shuffle, - to_sequence_v, - C_BLOCK_TRANSFER.scalar_per_vector, - typename Types::AComputeType, - typename Types::BComputeType, - LOOP_SCHEDULER, - ALGORITHM.num_groups_to_merge>; -}; - -// Factory specialization for DeviceGroupedConvFwdMultipleD_Wmma_CShuffle instance -// of a grouped forward convolution kernel. -template - requires ConvDirectionIsForward && - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle> -struct ConvFactory -{ - static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal:: - GetTensorLayout()); - using AuxiliaryLayouts = - decltype(factory_internal:: - GetAuxiliaryTensorLayouts()); - - using Types = factory_internal::FwdConvTensorDataTypes; - using Ops = decltype(factory_internal::GetElementwiseOps()); - using AlgorithmType = decltype(ALGORITHM); - - static constexpr auto FWD_CONV_SPECIALIZATION = - factory_internal::SetFwdConvSpecialization(); - static constexpr auto GEMM_SPECIALIZATION = - factory_internal::SetGemmSpecialization(); - static constexpr factory_internal::ConvSpec SPECIALIZATION{.conv_spec = FWD_CONV_SPECIALIZATION, - .gemm_spec = GEMM_SPECIALIZATION}; - - static constexpr auto LOOP_SCHEDULER = factory_internal::SetLoopScheduler(); - static constexpr auto BLOCK = factory_internal::SetThreadBlockInfo(); - static constexpr auto GRIDWISE_GEMM = ALGORITHM.gridwise_gemm; - static constexpr auto GRIDWISE_GEMM_PIPELINE_VERSION = - factory_internal::SetGridwiseGemmPipelineVersion(); - static constexpr auto A_BLOCK_TRANSFER = - factory_internal::SetFwdConvBlockTransfer(); - static constexpr auto B_BLOCK_TRANSFER = - factory_internal::SetFwdConvBlockTransfer(); - static constexpr auto C_BLOCK_TRANSFER = - factory_internal::SetCBlockTransfer(); - - // Check limits for the algorithm parameters. - // TODO: Add more limits checks as needed. - static_assert(InputVectorTransferLimits); - static_assert(InputVectorTransferLimits); - static_assert(OutputVectorTransferLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - - // The forward convolution kernel class instance. - using Instance = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle< - SPATIAL_DIM, - typename Layouts::ALayout, - typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, - typename Layouts::ELayout, - typename Types::ADataType, - typename Types::BDataType, - typename Types::AccDataType, - typename Types::CShuffleDataType, - typename Types::DsDataType, - typename Types::EDataType, - typename Ops::AElementwiseOp, - typename Ops::BElementwiseOp, - typename Ops::CDEElementwiseOp, - SPECIALIZATION.conv_spec, - SPECIALIZATION.gemm_spec, - ALGORITHM.num_gemm_k_prefetch_stages, - BLOCK.block_size, - BLOCK.per_block.m, - BLOCK.per_block.n, - BLOCK.per_block.k, - GRIDWISE_GEMM.k1, - GRIDWISE_GEMM.m_per_wmma, - GRIDWISE_GEMM.n_per_wmma, - GRIDWISE_GEMM.m_wmma_per_wave, - GRIDWISE_GEMM.n_wmma_per_wave, - to_sequence_v, - to_sequence_v, - to_sequence_v, - A_BLOCK_TRANSFER.src_vector_dim, - A_BLOCK_TRANSFER.src_scalar_per_vector, - A_BLOCK_TRANSFER.lds_dst_scalar_per_vector, - static_cast(A_BLOCK_TRANSFER.lds_padding), - to_sequence_v, - to_sequence_v, - to_sequence_v, - B_BLOCK_TRANSFER.src_vector_dim, - B_BLOCK_TRANSFER.src_scalar_per_vector, - B_BLOCK_TRANSFER.lds_dst_scalar_per_vector, - static_cast(B_BLOCK_TRANSFER.lds_padding), - C_BLOCK_TRANSFER.m_per_wave_per_shuffle, - C_BLOCK_TRANSFER.n_per_wave_per_shuffle, - to_sequence_v, - C_BLOCK_TRANSFER.scalar_per_vector, - LOOP_SCHEDULER, - GRIDWISE_GEMM_PIPELINE_VERSION>; -}; - -// Factory specialization for DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK instance -// of a grouped forward convolution kernel using Direct Load (DL) approach. -template - requires ConvDirectionIsForward && DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< - std::remove_const_t> -struct ConvFactory -{ - static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal:: - GetTensorLayout()); - using AuxiliaryLayouts = - decltype(factory_internal:: - GetAuxiliaryTensorLayouts()); - - using Types = factory_internal::FwdConvTensorDataTypes; - using Ops = decltype(factory_internal::GetElementwiseOps()); - using AlgorithmType = decltype(ALGORITHM); - - static constexpr auto FWD_CONV_SPECIALIZATION = - factory_internal::SetFwdConvSpecialization(); - static constexpr auto GEMM_SPECIALIZATION = - factory_internal::SetGemmSpecialization(); - - static constexpr auto BLOCK = factory_internal::SetThreadBlockInfo(); - - // DL-specific parameters from algorithm descriptor - static constexpr auto DL_THREAD_CFG = ALGORITHM.thread_config; - static constexpr ck::index_t K0PerBlock = DL_THREAD_CFG.k0_per_block; - static constexpr ck::index_t K1 = DL_THREAD_CFG.k1; - static constexpr ck::index_t M1PerThread = DL_THREAD_CFG.m1_per_thread; - static constexpr ck::index_t N1PerThread = DL_THREAD_CFG.n1_per_thread; - static constexpr ck::index_t KPerThread = DL_THREAD_CFG.k_per_thread; - - // Thread cluster from descriptor - static constexpr auto DL_CLUSTER = ALGORITHM.thread_cluster; - using M1N1ThreadClusterM1Xs = to_sequence_v; - using M1N1ThreadClusterN1Xs = to_sequence_v; - - // A Block Transfer from descriptor - K0_M0_M1_K1 tensor format - static constexpr auto DL_A_TRANSFER = ALGORITHM.transfer.a.block_transfer; - using ABlockTransferThreadSliceLengths_K0_M0_M1_K1 = - to_sequence_v; - using ABlockTransferThreadClusterLengths_K0_M0_M1_K1 = - to_sequence_v; - using ABlockTransferThreadClusterArrangeOrder = - to_sequence_v; - using ABlockTransferSrcAccessOrder = to_sequence_v; - using ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 = - to_sequence_v; - using ABlockTransferSrcVectorTensorContiguousDimOrder = - to_sequence_v; - using ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 = - to_sequence_v; - - // B Block Transfer from descriptor - K0_N0_N1_K1 tensor format - static constexpr auto DL_B_TRANSFER = ALGORITHM.transfer.b.block_transfer; - using BBlockTransferThreadSliceLengths_K0_N0_N1_K1 = - to_sequence_v; - using BBlockTransferThreadClusterLengths_K0_N0_N1_K1 = - to_sequence_v; - using BBlockTransferThreadClusterArrangeOrder = - to_sequence_v; - using BBlockTransferSrcAccessOrder = to_sequence_v; - using BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 = - to_sequence_v; - using BBlockTransferSrcVectorTensorContiguousDimOrder = - to_sequence_v; - using BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 = - to_sequence_v; - - // C Thread Transfer from descriptor - static constexpr auto DL_C_TRANSFER = ALGORITHM.transfer.c.epilogue; - using CThreadTransferSrcDstAccessOrder = to_sequence_v; - static constexpr ck::index_t CThreadTransferSrcDstVectorDim = DL_C_TRANSFER.src_dst_vector_dim; - static constexpr ck::index_t CThreadTransferDstScalarPerVector = - DL_C_TRANSFER.dst_scalar_per_vector; - - // The DL forward convolution kernel class instance - using Instance = ck::tensor_operation::device::DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< - SPATIAL_DIM, - typename Types::ADataType, - typename Types::BDataType, - typename Types::DsDataType, - typename Types::EDataType, - typename Types::AccDataType, - typename Layouts::ALayout, - typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, - typename Layouts::ELayout, - typename Ops::AElementwiseOp, - typename Ops::BElementwiseOp, - typename Ops::CDEElementwiseOp, - FWD_CONV_SPECIALIZATION, - GEMM_SPECIALIZATION, - BLOCK.block_size, - BLOCK.per_block.m, - BLOCK.per_block.n, - K0PerBlock, - K1, - M1PerThread, - N1PerThread, - KPerThread, - M1N1ThreadClusterM1Xs, - M1N1ThreadClusterN1Xs, - ABlockTransferThreadSliceLengths_K0_M0_M1_K1, - ABlockTransferThreadClusterLengths_K0_M0_M1_K1, - ABlockTransferThreadClusterArrangeOrder, - ABlockTransferSrcAccessOrder, - ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, - ABlockTransferSrcVectorTensorContiguousDimOrder, - ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, - BBlockTransferThreadSliceLengths_K0_N0_N1_K1, - BBlockTransferThreadClusterLengths_K0_N0_N1_K1, - BBlockTransferThreadClusterArrangeOrder, - BBlockTransferSrcAccessOrder, - BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, - BBlockTransferSrcVectorTensorContiguousDimOrder, - BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, - CThreadTransferSrcDstAccessOrder, - CThreadTransferSrcDstVectorDim, - CThreadTransferDstScalarPerVector>; -}; - -// Factory specialization for DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor instance -// of a grouped forward convolution kernel with large tensor support (N-splitting). -template - requires ConvDirectionIsForward && - DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor< - std::remove_const_t> -struct ConvFactory -{ - static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; - using Layouts = decltype(factory_internal:: - GetTensorLayout()); - using AuxiliaryLayouts = - decltype(factory_internal:: - GetAuxiliaryTensorLayouts()); - - using Types = factory_internal::FwdConvTensorDataTypes; - using Ops = decltype(factory_internal::GetElementwiseOps()); - using AlgorithmType = decltype(ALGORITHM); - - static constexpr auto BASE_ALGORITHM = ALGORITHM.base_algorithm; - - static constexpr auto FWD_CONV_SPECIALIZATION = - factory_internal::SetFwdConvSpecialization(); - static constexpr auto GEMM_SPECIALIZATION = - factory_internal::SetGemmSpecialization(); - static constexpr factory_internal::ConvSpec SPECIALIZATION{.conv_spec = FWD_CONV_SPECIALIZATION, - .gemm_spec = GEMM_SPECIALIZATION}; - - static constexpr auto LOOP_SCHEDULER = factory_internal::SetLoopScheduler(); - static constexpr auto BLOCK = factory_internal::SetThreadBlockInfo(); - static constexpr auto GRIDWISE_GEMM = BASE_ALGORITHM.gridwise_gemm; - static constexpr auto A_BLOCK_TRANSFER = - factory_internal::SetFwdConvBlockTransfer(); - static constexpr auto B_BLOCK_TRANSFER = - factory_internal::SetFwdConvBlockTransfer(); - static constexpr auto C_BLOCK_TRANSFER = - factory_internal::SetCBlockTransfer(); - - // Check limits for the algorithm parameters. - static_assert(InputVectorTransferLimits); - static_assert(InputVectorTransferLimits); - static_assert(OutputVectorTransferLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - static_assert(AccessOrderLimits); - - // The forward convolution kernel class instance with large tensor support. - using Instance = - ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor< - SPATIAL_DIM, - typename Layouts::ALayout, - typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, - typename Layouts::ELayout, - typename Types::ADataType, - typename Types::BDataType, - typename Types::AccDataType, - typename Types::CShuffleDataType, - typename Types::DsDataType, - typename Types::EDataType, - typename Ops::AElementwiseOp, - typename Ops::BElementwiseOp, - typename Ops::CDEElementwiseOp, - SPECIALIZATION.conv_spec, - SPECIALIZATION.gemm_spec, - BASE_ALGORITHM.num_gemm_k_prefetch_stages, - BLOCK.block_size, - BLOCK.per_block.m, - BLOCK.per_block.n, - BLOCK.per_block.k, - GRIDWISE_GEMM.ak1, - GRIDWISE_GEMM.bk1, - GRIDWISE_GEMM.m_per_xdl, - GRIDWISE_GEMM.n_per_xdl, - GRIDWISE_GEMM.m_xdl_per_wave, - GRIDWISE_GEMM.n_xdl_per_wave, - to_sequence_v, - to_sequence_v, - to_sequence_v, - A_BLOCK_TRANSFER.src_vector_dim, - A_BLOCK_TRANSFER.src_scalar_per_vector, - A_BLOCK_TRANSFER.lds_dst_scalar_per_vector, - static_cast(A_BLOCK_TRANSFER.lds_padding), - to_sequence_v, - to_sequence_v, - to_sequence_v, - B_BLOCK_TRANSFER.src_vector_dim, - B_BLOCK_TRANSFER.src_scalar_per_vector, - B_BLOCK_TRANSFER.lds_dst_scalar_per_vector, - static_cast(B_BLOCK_TRANSFER.lds_padding), - C_BLOCK_TRANSFER.m_per_wave_per_shuffle, - C_BLOCK_TRANSFER.n_per_wave_per_shuffle, - to_sequence_v, - C_BLOCK_TRANSFER.scalar_per_vector, - typename Types::AComputeType, - typename Types::BComputeType, - LOOP_SCHEDULER>; -}; - -} // namespace ck_tile::builder diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp index dee918cc1f..19bfc026f1 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp @@ -7,7 +7,6 @@ #include "ck_tile/builder/conv_signature_concepts.hpp" #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -25,11 +24,15 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + + using AuxiliaryLayouts = + decltype(internal::GetAuxiliaryTensorLayouts()); + + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); @@ -99,7 +102,7 @@ struct ConvFwdDlFactory typename Types::AccDataType, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp index 383ecbf8c9..5319a5a319 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp @@ -8,7 +8,6 @@ #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/conv_algorithm_limits.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -27,11 +26,15 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + + using AuxiliaryLayouts = + decltype(internal::GetAuxiliaryTensorLayouts()); + + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto BASE_ALGORITHM = ALGORITHM.base_algorithm; @@ -67,7 +70,7 @@ struct ConvFwdLargeTensorFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp index 90d4abe3e7..1a60a3b5e9 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp @@ -8,7 +8,6 @@ #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/conv_algorithm_limits.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -27,11 +26,14 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + using AuxiliaryLayouts = + decltype(internal::GetAuxiliaryTensorLayouts()); + + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static_assert(ALGORITHM.transfer.a.lds_transfer.is_direct_load == @@ -68,7 +70,7 @@ struct ConvFwdXdlV3Factory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp index e35b3f3d46..0b742377f8 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp @@ -8,7 +8,6 @@ #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/conv_algorithm_limits.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -27,11 +26,14 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + using AuxiliaryLayouts = + decltype(internal::GetAuxiliaryTensorLayouts()); + + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); @@ -65,7 +67,7 @@ struct ConvFwdWmmaFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp index fc5b32f799..262e3328d7 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp @@ -8,7 +8,6 @@ #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/conv_algorithm_limits.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -27,11 +26,14 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + using AuxiliaryLayouts = + decltype(internal::GetAuxiliaryTensorLayouts()); + + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); @@ -63,7 +65,7 @@ struct ConvFwdXdlFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename Layouts::DsLayout, + typename AuxiliaryLayouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp deleted file mode 100644 index cc220459ed..0000000000 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_data_type_utils.hpp +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include "ck_tile/builder/conv_signature_concepts.hpp" -#include "ck_tile/builder/types.hpp" - -namespace ck_tile::builder::factory_internal { - -template -struct DataTypeToCK -{ - // Catch unsupported data types at compile time - static_assert(sizeof(UnsupportedEnumValue
) == 0, "Unsupported data type conversion to CK."); -}; - -template <> -struct DataTypeToCK -{ - using type = ck::half_t; -}; -template <> -struct DataTypeToCK -{ - using type = ck::bhalf_t; -}; -template <> -struct DataTypeToCK -{ - using type = float; -}; -template <> -struct DataTypeToCK -{ - using type = int32_t; -}; -template <> -struct DataTypeToCK -{ - using type = int8_t; -}; -template <> -struct DataTypeToCK -{ - using type = ck::f8_t; -}; - -struct CK_empty_tuple -{ - using type = ck::Tuple<>; -}; - -template -consteval auto ConvertDataTypeToCK() -{ - return DataTypeToCK
{}; -} - -template -consteval auto GetTensorDataAndComputeTypes() -{ - constexpr auto data_type = Config.data_type; - constexpr auto compute_type = Config.compute_type; - - if constexpr(data_type == DataType::UNDEFINDED && compute_type == DataType::UNDEFINDED) - { - return std::make_pair(ConvertDataTypeToCK(), - ConvertDataTypeToCK()); - } - else if constexpr(data_type == DataType::UNDEFINDED) - { - return std::make_pair(ConvertDataTypeToCK(), - ConvertDataTypeToCK()); - } - else if constexpr(compute_type == DataType::UNDEFINDED) - { - return std::make_pair(ConvertDataTypeToCK(), - ConvertDataTypeToCK()); - } - else - { - return std::make_pair(ConvertDataTypeToCK(), - ConvertDataTypeToCK()); - } -} - -template -consteval auto GetTensorAccumulationType() -{ - constexpr auto data_type = SignatureAccDataType; - if constexpr(data_type == DataType::UNDEFINDED) - { - return ConvertDataTypeToCK(); - } - else - { - return ConvertDataTypeToCK(); - } -} - -template -consteval auto GetAuxiliaryTensorDataTypeValue() -{ - constexpr auto data_type = Config.data_type; - if constexpr(data_type == DataType::UNDEFINDED) - { - return ConvertDataTypeToCK(); - } - else - { - return ConvertDataTypeToCK(); - } -} - -template -consteval auto GetAuxiliaryTensorDataTypeTuple(std::index_sequence) -{ - return ck::Tuple< - typename decltype(GetAuxiliaryTensorDataTypeValue())::type...>{}; -} - -template -struct AuxiliaryTensorDataTypes -{ - static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); - using type = - decltype(GetAuxiliaryTensorDataTypeTuple( - std::make_index_sequence{})); -}; - -// TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). -template - requires(HasElementwiseOpWithAuxiliaryOperands) -consteval auto GetAuxiliaryTensorDataTypes() -{ - return AuxiliaryTensorDataTypes{}; -} - -template - requires(!HasElementwiseOpWithAuxiliaryOperands) -consteval auto GetAuxiliaryTensorDataTypes() -{ - return CK_empty_tuple{}; -} - -template -struct FwdConvTensorDataTypes -{ - static constexpr auto input_types = - GetTensorDataAndComputeTypes(); - static constexpr auto weight_types = - GetTensorDataAndComputeTypes(); - static constexpr auto output_types = - GetTensorDataAndComputeTypes(); - - using ADataType = typename decltype(input_types.first)::type; - using AComputeType = typename decltype(input_types.second)::type; - using BDataType = typename decltype(weight_types.first)::type; - using BComputeType = typename decltype(weight_types.second)::type; - using AccDataType = - typename decltype(GetTensorAccumulationType())::type; - using EDataType = typename decltype(output_types.first)::type; - - // This is the "compute" type for output. - using CShuffleDataType = typename decltype(output_types.second)::type; - - // Data types for the auxiliary tensors (e.g., bias). - using DsDataType = typename decltype(GetAuxiliaryTensorDataTypes())::type; -}; - -} // namespace ck_tile::builder::factory_internal diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp index 4a13f4e508..9137c3dc82 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp @@ -6,32 +6,76 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck_tile/builder/builder_utils.hpp" #include "ck_tile/builder/types.hpp" +#include "ck_tile/builder/conv_signature_concepts.hpp" namespace ck_tile::builder::factory::internal { -template -struct ElementwiseOps +template +struct ElementwiseOpToCK +{ + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported elementwise operation conversion to CK."); +}; + +template <> +struct ElementwiseOpToCK { - // This will trigger if a specialization for the given DataType is not found. - // We should always catch this in an earlier validation check. - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Internal error. Unsupported elementwise operation for convolution factory."); + using Op = ck::tensor_operation::element_wise::PassThrough; }; template <> -struct ElementwiseOps +struct ElementwiseOpToCK { - using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using CDEElementwiseOp = ck::tensor_operation::element_wise::PassThrough; + using Op = ck::tensor_operation::element_wise::Scale; }; template <> -struct ElementwiseOps +struct ElementwiseOpToCK { - using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using CDEElementwiseOp = ck::tensor_operation::element_wise::Scale; + using Op = ck::tensor_operation::element_wise::Clamp; }; +template <> +struct ElementwiseOpToCK +{ + using Op = ck::tensor_operation::element_wise::ScaleAddScaleAddRelu; +}; + +template <> +struct ElementwiseOpToCK +{ + using Op = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp; +}; + +template +consteval auto GetElementwiseOp() +{ + if constexpr(HasTensorOp) + { + constexpr auto op = TensorDesc.operation.elementwise_operation; + return ElementwiseOpToCK{}; + } + else + { + return ElementwiseOpToCK{}; + } +} + +template +struct ElementwiseOps +{ + static constexpr auto input_op = GetElementwiseOp(); + static constexpr auto weight_op = GetElementwiseOp(); + static constexpr auto output_op = GetElementwiseOp(); + using AElementwiseOp = typename decltype(input_op)::Op; + using BElementwiseOp = typename decltype(weight_op)::Op; + using CDEElementwiseOp = typename decltype(output_op)::Op; +}; + +template +constexpr auto GetElementwiseOps() +{ + return ElementwiseOps{}; +} + } // namespace ck_tile::builder::factory::internal diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp deleted file mode 100644 index c580a38611..0000000000 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op_utils.hpp +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include "ck_tile/builder/conv_signature_concepts.hpp" -#include "ck_tile/builder/types.hpp" - -namespace ck_tile::builder::factory_internal { - -template -struct ElementwiseOpToCK -{ - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Unsupported elementwise operation conversion to CK."); -}; - -template <> -struct ElementwiseOpToCK -{ - using Op = ck::tensor_operation::element_wise::PassThrough; -}; - -template <> -struct ElementwiseOpToCK -{ - using Op = ck::tensor_operation::element_wise::Scale; -}; - -template <> -struct ElementwiseOpToCK -{ - using Op = ck::tensor_operation::element_wise::Clamp; -}; - -template <> -struct ElementwiseOpToCK -{ - using Op = ck::tensor_operation::element_wise::ScaleAddScaleAddRelu; -}; - -template <> -struct ElementwiseOpToCK -{ - using Op = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp; -}; - -template -consteval auto GetElementwiseOp() -{ - if constexpr(HasTensorOp) - { - constexpr auto op = TensorDesc.operation.elementwise_operation; - return ElementwiseOpToCK{}; - } - else - { - return ElementwiseOpToCK{}; - } -} - -template -struct ElementwiseOps -{ - static constexpr auto input_op = GetElementwiseOp(); - static constexpr auto weight_op = GetElementwiseOp(); - static constexpr auto output_op = GetElementwiseOp(); - using AElementwiseOp = typename decltype(input_op)::Op; - using BElementwiseOp = typename decltype(weight_op)::Op; - using CDEElementwiseOp = typename decltype(output_op)::Op; -}; - -template -constexpr auto GetElementwiseOps() -{ - return ElementwiseOps{}; -} - -} // namespace ck_tile::builder::factory_internal diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp deleted file mode 100644 index 12dc9792bd..0000000000 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_layout_utils.hpp +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include "ck_tile/builder/conv_signature_concepts.hpp" -#include "ck_tile/builder/types.hpp" - -namespace ck_tile::builder::factory_internal { - -using namespace ck_tile::builder; - -template -struct LayoutToCK -{ - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Unsupported layout conversion to CK."); -}; - -// BiasLayout -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::G_K; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GC; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::G_C; -}; - -// Input 1D -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NWGC; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NGCW; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GNWC; -}; - -// Input 2D -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NGCHW; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NHWGC; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GNHWC; -}; - -// Input 3D -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NGCDHW; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NDHWGC; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GNDHWC; -}; - -// Weight 1D -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GKXC; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GKCX; -}; - -// Weight 2D -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GKYXC; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GKCYX; -}; - -// Weight 3D -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GKCZYX; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GKZYXC; -}; - -// Output 1D -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NWGK; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NGKW; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GNWK; -}; - -// Output 2D -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NGKHW; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NHWGK; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GNHWK; -}; - -// Output 3D -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NGKDHW; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::NDHWGK; -}; -template <> -struct LayoutToCK -{ - using type = ck::tensor_layout::convolution::GNDHWK; -}; - -template -consteval auto TensorLayoutToCK() -{ - return typename LayoutToCK::type{}; -} - -struct EmptyAuxiliaryTensorLayout -{ - using DsLayout = ck::Tuple<>; -}; - -template -consteval auto GetAuxiliaryTensorLayoutTuple(std::index_sequence) -{ - return ck::Tuple< - decltype(TensorLayoutToCK())...>{}; -} - -template - requires(ConvSpatialDim) -struct AuxiliaryTensorLayouts -{ - static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); - using DsLayout = decltype(GetAuxiliaryTensorLayoutTuple( - std::make_index_sequence{})); -}; - -// TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). -template - requires(HasElementwiseOpWithAuxiliaryOperands) -consteval auto GetAuxiliaryTensorLayouts() -{ - return AuxiliaryTensorLayouts{}; -} - -template - requires(!HasElementwiseOpWithAuxiliaryOperands) -consteval auto GetAuxiliaryTensorLayouts() -{ - return EmptyAuxiliaryTensorLayout{}; -} - -template - requires(ConvSpatialDim && - ValidConvInputLayoutForSpatialDim && - ValidConvWeightLayoutForSpatialDim && - ValidConvOutputLayoutForSpatialDim) -struct ConvTensorLayouts -{ - static_assert(DIR == ConvDirection::FORWARD, "Only Forward convolution is supported."); - using ALayout = decltype(TensorLayoutToCK()); - using BLayout = decltype(TensorLayoutToCK()); - using ELayout = decltype(TensorLayoutToCK()); -}; - -template -consteval auto GetTensorLayout() -{ - constexpr auto INPUT_LAYOUT = Signature.input.config.layout; - constexpr auto WEIGHT_LAYOUT = Signature.weight.config.layout; - constexpr auto OUTPUT_LAYOUT = Signature.output.config.layout; - - return factory_internal:: - ConvTensorLayouts{}; -} - -} // namespace ck_tile::builder::factory_internal diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp index b3effa782e..fdbe2508ee 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp @@ -6,141 +6,241 @@ #include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/utility/tuple.hpp" #include "ck_tile/builder/conv_signature_concepts.hpp" +#include "ck_tile/builder/builder_utils.hpp" namespace ck_tile::builder::factory::internal { -// Type mappings from the builder FwdGroupConvLayout enum classes to the CK tensor data types. -template - requires(ConvSpatialDim && ValidConvLayoutForSpatialDim) -struct ConvTensorLayouts +template +struct LayoutToCK { - // This will trigger if a specialization for the given layout is not found. - // We should always catch this in an earlier validation check. - using Layout = decltype(LayoutValue); - static_assert(sizeof(Layout) == 0, - "Internal error. Unsupported layout for convolution factory."); + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported layout conversion to CK."); }; -// 1D Forward Convolution Layout Specializations +// BiasLayout template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NWGC; - using BLayout = ck::tensor_layout::convolution::GKXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NWGK; + using type = ck::tensor_layout::convolution::G_K; }; - template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCW; - using BLayout = ck::tensor_layout::convolution::GKXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKW; + using type = ck::tensor_layout::convolution::GC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::G_C; }; +// Input 1D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::GNWC; - using BLayout = ck::tensor_layout::convolution::GKXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::GNWK; + using type = ck::tensor_layout::convolution::NWGC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGCW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNWC; }; +// Input 2D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCW; - using BLayout = ck::tensor_layout::convolution::GKCX; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKW; + using type = ck::tensor_layout::convolution::NGCHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NHWGC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNHWC; }; +// Input 3D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCHW; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKHW; + using type = ck::tensor_layout::convolution::NGCDHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NDHWGC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNDHWC; }; +// Weight 1D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NHWGC; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NHWGK; + using type = ck::tensor_layout::convolution::GKXC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKCX; }; +// Weight 2D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::GNHWC; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::GNHWK; + using type = ck::tensor_layout::convolution::GKYXC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKCYX; }; +// Weight 3D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCHW; - using BLayout = ck::tensor_layout::convolution::GKCYX; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKHW; + using type = ck::tensor_layout::convolution::GKCZYX; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKZYXC; }; +// Output 1D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCDHW; - using BLayout = ck::tensor_layout::convolution::GKCZYX; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKDHW; + using type = ck::tensor_layout::convolution::NWGK; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGKW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNWK; }; +// Output 2D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NDHWGC; - using BLayout = ck::tensor_layout::convolution::GKZYXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NDHWGK; + using type = ck::tensor_layout::convolution::NGKHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NHWGK; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNHWK; }; +// Output 3D +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGKDHW; +}; template <> -struct ConvTensorLayouts +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NDHWGK; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNDHWK; +}; + +template +consteval auto TensorLayoutToCK() +{ + return typename LayoutToCK::type{}; +} + +struct EmptyAuxiliaryTensorLayout { - using ALayout = ck::tensor_layout::convolution::GNDHWC; - using BLayout = ck::tensor_layout::convolution::GKZYXC; using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::GNDHWK; }; -template +template +consteval auto GetAuxiliaryTensorLayoutTuple(std::index_sequence) +{ + return ck::Tuple< + decltype(TensorLayoutToCK())...>{}; +} + +template + requires(ConvSpatialDim) +struct AuxiliaryTensorLayouts +{ + static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); + using DsLayout = decltype(GetAuxiliaryTensorLayoutTuple( + std::make_index_sequence{})); +}; + +// TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). +template + requires(HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorLayouts() +{ + return AuxiliaryTensorLayouts{}; +} + +template + requires(!HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorLayouts() +{ + return EmptyAuxiliaryTensorLayout{}; +} + +template + requires(ConvSpatialDim && + ValidConvInputLayoutForSpatialDim && + ValidConvWeightLayoutForSpatialDim && + ValidConvOutputLayoutForSpatialDim) +struct ConvTensorLayouts +{ + static_assert(DIR == ConvDirection::FORWARD, "Only Forward convolution is supported."); + using ALayout = decltype(TensorLayoutToCK()); + using BLayout = decltype(TensorLayoutToCK()); + using ELayout = decltype(TensorLayoutToCK()); +}; + +template consteval auto GetTensorLayout() { + constexpr auto INPUT_LAYOUT = Signature.input.config.layout; + constexpr auto WEIGHT_LAYOUT = Signature.weight.config.layout; + constexpr auto OUTPUT_LAYOUT = Signature.output.config.layout; - if constexpr(SPATIAL_DIM == 1) - { - return internal::ConvTensorLayouts{}; - } - else if constexpr(SPATIAL_DIM == 2) - { - return internal::ConvTensorLayouts{}; - } - else if constexpr(SPATIAL_DIM == 3) - { - return internal::ConvTensorLayouts{}; - } - else - { - static_assert(false, "Unsupported spatial dimension for convolution layout."); - } + return ConvTensorLayouts{}; } } // namespace ck_tile::builder::factory::internal diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_type.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_type.hpp index d8a8eb5da0..81de2140f2 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_type.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_type.hpp @@ -6,82 +6,172 @@ #include "ck/utility/data_type.hpp" #include "ck_tile/builder/types.hpp" #include "ck_tile/builder/builder_utils.hpp" +#include "ck_tile/builder/conv_signature_concepts.hpp" namespace ck_tile::builder::factory::internal { -// Type mappings from builder convolution data type to CK tensor types. -template -struct ConvTensorTypes +template +struct DataTypeToCK { - // This will trigger if a specialization for the given DataType is not found. - // We should always catch this in an earlier validation check. - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Internal error. Unsupported data type for convolution factory."); + // Catch unsupported data types at compile time + static_assert(sizeof(UnsupportedEnumValue
) == 0, "Unsupported data type conversion to CK."); }; template <> -struct ConvTensorTypes -{ - using ADataType = ck::half_t; - using AComputeType = ck::half_t; - using BDataType = ck::half_t; - using BComputeType = ck::half_t; - using CShuffleDataType = ck::half_t; - using DsDataTypes = ck::Tuple<>; - using AccDataType = float; - using EDataType = ck::half_t; +struct DataTypeToCK +{ + using type = ck::half_t; }; - template <> -struct ConvTensorTypes -{ - using ADataType = ck::bhalf_t; - using AComputeType = ck::bhalf_t; - using BDataType = ck::bhalf_t; - using BComputeType = ck::bhalf_t; - using CShuffleDataType = ck::bhalf_t; - using DsDataTypes = ck::Tuple<>; - using AccDataType = float; - using EDataType = ck::bhalf_t; +struct DataTypeToCK +{ + using type = ck::bhalf_t; }; - template <> -struct ConvTensorTypes -{ - using ADataType = float; - using AComputeType = float; - using BDataType = float; - using BComputeType = float; - using CShuffleDataType = float; - using DsDataTypes = ck::Tuple<>; - using AccDataType = float; - using EDataType = float; +struct DataTypeToCK +{ + using type = float; }; - template <> -struct ConvTensorTypes -{ - using ADataType = int8_t; - using AComputeType = int8_t; - using BDataType = int8_t; - using BComputeType = int8_t; - using CShuffleDataType = int8_t; - using DsDataTypes = ck::Tuple<>; - using AccDataType = int32_t; - using EDataType = int8_t; +struct DataTypeToCK +{ + using type = int32_t; +}; +template <> +struct DataTypeToCK +{ + using type = int8_t; }; - template <> -struct ConvTensorTypes -{ - using ADataType = ck::f8_t; - using AComputeType = ck::f8_t; - using BDataType = ck::f8_t; - using BComputeType = ck::f8_t; - using CShuffleDataType = ck::f8_t; - using DsDataTypes = ck::Tuple<>; - using AccDataType = float; - using EDataType = ck::f8_t; +struct DataTypeToCK +{ + using type = ck::f8_t; +}; + +struct CK_empty_tuple +{ + using type = ck::Tuple<>; +}; + +template +consteval auto ConvertDataTypeToCK() +{ + return DataTypeToCK
{}; +} + +template +consteval auto GetTensorDataAndComputeTypes() +{ + constexpr auto data_type = Config.data_type; + constexpr auto compute_type = Config.compute_type; + + if constexpr(data_type == DataType::UNDEFINDED && compute_type == DataType::UNDEFINDED) + { + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); + } + else if constexpr(data_type == DataType::UNDEFINDED) + { + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); + } + else if constexpr(compute_type == DataType::UNDEFINDED) + { + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); + } + else + { + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); + } +} + +template +consteval auto GetTensorAccumulationType() +{ + constexpr auto data_type = SignatureAccDataType; + if constexpr(data_type == DataType::UNDEFINDED) + { + return ConvertDataTypeToCK(); + } + else + { + return ConvertDataTypeToCK(); + } +} + +template +consteval auto GetAuxiliaryTensorDataTypeValue() +{ + constexpr auto data_type = Config.data_type; + if constexpr(data_type == DataType::UNDEFINDED) + { + return ConvertDataTypeToCK(); + } + else + { + return ConvertDataTypeToCK(); + } +} + +template +consteval auto GetAuxiliaryTensorDataTypeTuple(std::index_sequence) +{ + return ck::Tuple< + typename decltype(GetAuxiliaryTensorDataTypeValue())::type...>{}; +} + +template +struct AuxiliaryTensorDataTypes +{ + static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); + using type = + decltype(GetAuxiliaryTensorDataTypeTuple( + std::make_index_sequence{})); +}; + +// TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). +template + requires(HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorDataTypes() +{ + return AuxiliaryTensorDataTypes{}; +} + +template + requires(!HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorDataTypes() +{ + return CK_empty_tuple{}; +} + +template +struct FwdConvTensorDataTypes +{ + static constexpr auto input_types = + GetTensorDataAndComputeTypes(); + static constexpr auto weight_types = + GetTensorDataAndComputeTypes(); + static constexpr auto output_types = + GetTensorDataAndComputeTypes(); + + using ADataType = typename decltype(input_types.first)::type; + using AComputeType = typename decltype(input_types.second)::type; + using BDataType = typename decltype(weight_types.first)::type; + using BComputeType = typename decltype(weight_types.second)::type; + using AccDataType = + typename decltype(GetTensorAccumulationType())::type; + using EDataType = typename decltype(output_types.first)::type; + + // This is the "compute" type for output. + using CShuffleDataType = typename decltype(output_types.second)::type; + + // Data types for the auxiliary tensors (e.g., bias). + using DsDataTypes = typename decltype(GetAuxiliaryTensorDataTypes())::type; }; } // namespace ck_tile::builder::factory::internal From 5777d8b58a6aefbd083e8552835c77bb7a632e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 3 Dec 2025 12:12:30 +0000 Subject: [PATCH 36/41] Fix tests after rebase. --- .../builder/test/unit_conv_elementwise_op.cpp | 38 +++++++----- .../builder/test/unit_conv_tensor_layout.cpp | 58 +++++++++++++----- .../builder/test/unit_conv_tensor_type.cpp | 61 +++++-------------- 3 files changed, 83 insertions(+), 74 deletions(-) diff --git a/experimental/builder/test/unit_conv_elementwise_op.cpp b/experimental/builder/test/unit_conv_elementwise_op.cpp index 66593bf802..7f73a3c599 100644 --- a/experimental/builder/test/unit_conv_elementwise_op.cpp +++ b/experimental/builder/test/unit_conv_elementwise_op.cpp @@ -8,30 +8,38 @@ namespace { -using ::ck_tile::builder::factory::internal::ElementwiseOps; -using enum ::ck_tile::builder::ElementwiseOperation; +using ::ck_tile::builder::factory::internal::ElementwiseOpToCK; +using ::ck_tile::builder::ElementwiseOperation; TEST(ConvElementwiseOp, AssignsOpsForPassThrough) { - using Ops = ElementwiseOps; - - EXPECT_TRUE( - (std::is_same_v)); + using Op = ElementwiseOpToCK::Op; EXPECT_TRUE( - (std::is_same_v)); - EXPECT_TRUE( - (std::is_same_v)); + (std::is_same_v)); } TEST(ConvElementwiseOp, AssignsOpsForScale) { - using Ops = ElementwiseOps; + using Op = ElementwiseOpToCK::Op; + EXPECT_TRUE((std::is_same_v)); +} - EXPECT_TRUE( - (std::is_same_v)); - EXPECT_TRUE( - (std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); +TEST(ConvElementwiseOp, AssignsOpsForClamp) +{ + using Op = ElementwiseOpToCK::Op; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvElementwiseOp, AssignsOpsForScaleAddScaleAddRelu) +{ + using Op = ElementwiseOpToCK::Op; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvElementwiseOp, AssignsOpsForBiasNormClamp) +{ + using Op = ElementwiseOpToCK::Op; + EXPECT_TRUE((std::is_same_v)); } } // namespace diff --git a/experimental/builder/test/unit_conv_tensor_layout.cpp b/experimental/builder/test/unit_conv_tensor_layout.cpp index 6cdcc429dd..ee6d32a437 100644 --- a/experimental/builder/test/unit_conv_tensor_layout.cpp +++ b/experimental/builder/test/unit_conv_tensor_layout.cpp @@ -12,11 +12,13 @@ namespace { namespace ckb = ::ck_tile::builder; using ::ck_tile::builder::factory::internal::ConvTensorLayouts; using ::ck_tile::builder::factory::internal::GetTensorLayout; +using ::ck_tile::builder::TensorLayout; using enum ::ck_tile::builder::ConvDirection; TEST(ConvTensorLayout, AssignsLayoutsFor1D_NWGC_GKXC_NWGK) { - using TensorLayouts = ConvTensorLayouts; + using TensorLayouts = + ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -25,7 +27,8 @@ TEST(ConvTensorLayout, AssignsLayoutsFor1D_NWGC_GKXC_NWGK) TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKXC_NGKW) { - using TensorLayouts = ConvTensorLayouts; + using TensorLayouts = + ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -34,7 +37,8 @@ TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKXC_NGKW) TEST(ConvTensorLayout, AssignsLayoutsFor1D_GNWC_GKXC_GNWK) { - using TensorLayouts = ConvTensorLayouts; + using TensorLayouts = + ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -43,7 +47,8 @@ TEST(ConvTensorLayout, AssignsLayoutsFor1D_GNWC_GKXC_GNWK) TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKCX_NGKW) { - using TensorLayouts = ConvTensorLayouts; + using TensorLayouts = + ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -52,7 +57,11 @@ TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKCX_NGKW) TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKYXC_NGKHW) { - using TensorLayouts = ConvTensorLayouts; + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -61,7 +70,11 @@ TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKYXC_NGKHW) TEST(ConvTensorLayout, AssignsLayoutsFor2D_NHWGC_GKYXC_NHWGK) { - using TensorLayouts = ConvTensorLayouts; + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -70,7 +83,11 @@ TEST(ConvTensorLayout, AssignsLayoutsFor2D_NHWGC_GKYXC_NHWGK) TEST(ConvTensorLayout, AssignsLayoutsFor2D_GNHWC_GKYXC_GNHWK) { - using TensorLayouts = ConvTensorLayouts; + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -79,7 +96,11 @@ TEST(ConvTensorLayout, AssignsLayoutsFor2D_GNHWC_GKYXC_GNHWK) TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKCYX_NGKHW) { - using TensorLayouts = ConvTensorLayouts; + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -88,8 +109,11 @@ TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKCYX_NGKHW) TEST(ConvTensorLayout, AssignsLayoutsFor3D_NGCDHW_GKCZYX_NGKDHW) { - using TensorLayouts = - ConvTensorLayouts; + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -98,8 +122,11 @@ TEST(ConvTensorLayout, AssignsLayoutsFor3D_NGCDHW_GKCZYX_NGKDHW) TEST(ConvTensorLayout, AssignsLayoutsFor3D_NDHWGC_GKZYXC_NDHWGK) { - using TensorLayouts = - ConvTensorLayouts; + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); @@ -108,8 +135,11 @@ TEST(ConvTensorLayout, AssignsLayoutsFor3D_NDHWGC_GKZYXC_NDHWGK) TEST(ConvTensorLayout, AssignsLayoutsFor3D_GNDHWC_GKZYXC_GNDHWK) { - using TensorLayouts = - ConvTensorLayouts; + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); diff --git a/experimental/builder/test/unit_conv_tensor_type.cpp b/experimental/builder/test/unit_conv_tensor_type.cpp index 5aa82774da..c92b24626e 100644 --- a/experimental/builder/test/unit_conv_tensor_type.cpp +++ b/experimental/builder/test/unit_conv_tensor_type.cpp @@ -9,71 +9,42 @@ namespace { namespace ckb = ck_tile::builder; -using ck_tile::builder::factory::internal::ConvTensorTypes; +using ck_tile::builder::factory::internal::DataTypeToCK; TEST(ConvTensorType, AssignsTypesForFP16) { - using Types = ConvTensorTypes; - - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorType, AssignsTypesForBF16) { - using Types = ConvTensorTypes; - - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorType, AssignsTypesForFP32) { - using Types = ConvTensorTypes; + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); +} - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); +TEST(ConvTensorType, AssignsTypesForINT32) +{ + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorType, AssignsTypesForI8) { - using Types = ConvTensorTypes; - - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorType, AssignsTypesForFP8) { - using Types = ConvTensorTypes; - - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } } // namespace From 3949dc308ab49cadaa8015a4118b5d4834a88126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 3 Dec 2025 12:54:26 +0000 Subject: [PATCH 37/41] Unify layout handling. --- .../builder/factory/conv_fwd_dl_factory.hpp | 12 ++----- .../factory/conv_fwd_large_tensor_factory.hpp | 10 ++---- .../builder/factory/conv_fwd_v3_factory.hpp | 9 ++---- .../builder/factory/conv_fwd_wmma_factory.hpp | 9 ++---- .../builder/factory/conv_fwd_xdl_factory.hpp | 9 ++---- .../factory/helpers/conv_tensor_layout.hpp | 32 +++++++------------ 6 files changed, 22 insertions(+), 59 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp index 19bfc026f1..d2d25d99a4 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp @@ -24,14 +24,8 @@ template ()); - - using AuxiliaryLayouts = - decltype(internal::GetAuxiliaryTensorLayouts()); - - using Types = internal::FwdConvTensorDataTypes; + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); @@ -102,7 +96,7 @@ struct ConvFwdDlFactory typename Types::AccDataType, typename Layouts::ALayout, typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, + typename Layouts::DsLayout, typename Layouts::ELayout, typename Ops::AElementwiseOp, typename Ops::BElementwiseOp, diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp index 5319a5a319..2aeae0ad80 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp @@ -26,13 +26,7 @@ template ()); - - using AuxiliaryLayouts = - decltype(internal::GetAuxiliaryTensorLayouts()); - + using Layouts = internal::ConvTensorLayouts; using Types = internal::FwdConvTensorDataTypes; using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); @@ -70,7 +64,7 @@ struct ConvFwdLargeTensorFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, + typename Layouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp index 1a60a3b5e9..971490706b 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp @@ -26,12 +26,7 @@ template ()); - using AuxiliaryLayouts = - decltype(internal::GetAuxiliaryTensorLayouts()); - + using Layouts = internal::ConvTensorLayouts; using Types = internal::FwdConvTensorDataTypes; using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); @@ -70,7 +65,7 @@ struct ConvFwdXdlV3Factory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, + typename Layouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp index 0b742377f8..10ea19a317 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp @@ -26,12 +26,7 @@ template ()); - using AuxiliaryLayouts = - decltype(internal::GetAuxiliaryTensorLayouts()); - + using Layouts = internal::ConvTensorLayouts; using Types = internal::FwdConvTensorDataTypes; using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); @@ -67,7 +62,7 @@ struct ConvFwdWmmaFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, + typename Layouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp index 262e3328d7..5d36d6bfaf 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp @@ -26,12 +26,7 @@ template ()); - using AuxiliaryLayouts = - decltype(internal::GetAuxiliaryTensorLayouts()); - + using Layouts = internal::ConvTensorLayouts; using Types = internal::FwdConvTensorDataTypes; using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); @@ -65,7 +60,7 @@ struct ConvFwdXdlFactory SPATIAL_DIM, typename Layouts::ALayout, typename Layouts::BLayout, - typename AuxiliaryLayouts::DsLayout, + typename Layouts::DsLayout, typename Layouts::ELayout, typename Types::ADataType, typename Types::BDataType, diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp index fdbe2508ee..c7bf4d88e4 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp @@ -17,7 +17,7 @@ struct LayoutToCK "Unsupported layout conversion to CK."); }; -// BiasLayout +// Bias layouts template <> struct LayoutToCK { @@ -180,7 +180,7 @@ consteval auto TensorLayoutToCK() struct EmptyAuxiliaryTensorLayout { - using DsLayout = ck::Tuple<>; + using type = ck::Tuple<>; }; template @@ -195,7 +195,7 @@ template ( + using type = decltype(GetAuxiliaryTensorLayoutTuple( std::make_index_sequence{})); }; @@ -216,31 +216,21 @@ consteval auto GetAuxiliaryTensorLayouts() return EmptyAuxiliaryTensorLayout{}; } -template requires(ConvSpatialDim && - ValidConvInputLayoutForSpatialDim && - ValidConvWeightLayoutForSpatialDim && - ValidConvOutputLayoutForSpatialDim) + ValidConvInputLayoutForSpatialDim && + ValidConvWeightLayoutForSpatialDim && + ValidConvOutputLayoutForSpatialDim) struct ConvTensorLayouts { static_assert(DIR == ConvDirection::FORWARD, "Only Forward convolution is supported."); - using ALayout = decltype(TensorLayoutToCK()); - using BLayout = decltype(TensorLayoutToCK()); - using ELayout = decltype(TensorLayoutToCK()); + using ALayout = decltype(TensorLayoutToCK()); + using BLayout = decltype(TensorLayoutToCK()); + using ELayout = decltype(TensorLayoutToCK()); + using DsLayout = decltype(GetAuxiliaryTensorLayouts())::type; }; -template -consteval auto GetTensorLayout() -{ - constexpr auto INPUT_LAYOUT = Signature.input.config.layout; - constexpr auto WEIGHT_LAYOUT = Signature.weight.config.layout; - constexpr auto OUTPUT_LAYOUT = Signature.output.config.layout; - - return ConvTensorLayouts{}; -} } // namespace ck_tile::builder::factory::internal From 3a9bac560a2523be5a3207e85cc4e0aa7c215a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 3 Dec 2025 14:50:05 +0000 Subject: [PATCH 38/41] Add more conv layout unit tests. --- .../builder/test/unit_conv_tensor_layout.cpp | 465 ++++++++++++++++-- 1 file changed, 420 insertions(+), 45 deletions(-) diff --git a/experimental/builder/test/unit_conv_tensor_layout.cpp b/experimental/builder/test/unit_conv_tensor_layout.cpp index ee6d32a437..f5932f6c87 100644 --- a/experimental/builder/test/unit_conv_tensor_layout.cpp +++ b/experimental/builder/test/unit_conv_tensor_layout.cpp @@ -4,146 +4,521 @@ #include #include -// Include the helper file we're testing #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" +#include "impl/conv_signature_types.hpp" namespace { namespace ckb = ::ck_tile::builder; using ::ck_tile::builder::factory::internal::ConvTensorLayouts; -using ::ck_tile::builder::factory::internal::GetTensorLayout; using ::ck_tile::builder::TensorLayout; +using ::ck_tile::builder::DataType; +using ::ck_tile::builder::ElementwiseOperation; +using ::ck_tile::builder::factory::internal::LayoutToCK; +using ::ck_tile::builder::factory::internal::AuxiliaryTensorLayouts; + +using namespace ::ck_tile::builder::test; using enum ::ck_tile::builder::ConvDirection; TEST(ConvTensorLayout, AssignsLayoutsFor1D_NWGC_GKXC_NWGK) { - using TensorLayouts = - ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NWGC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NWGK}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKXC_NGKW) { - using TensorLayouts = - ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCW}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NGKW}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor1D_GNWC_GKXC_GNWK) { - using TensorLayouts = - ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNWC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::GNWK}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKCX_NGKW) { - using TensorLayouts = - ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCW}}, + .weight = {.config = {.layout = TensorLayout::GKCX}}, + .output = {.config = {.layout = TensorLayout::NGKW}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKYXC_NGKHW) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NGKHW}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor2D_NHWGC_GKYXC_NHWGK) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor2D_GNHWC_GKYXC_GNHWK) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKCYX_NGKHW) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKCYX}}, + .output = {.config = {.layout = TensorLayout::NGKHW}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor3D_NGCDHW_GKCZYX_NGKDHW) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCDHW}}, + .weight = {.config = {.layout = TensorLayout::GKCZYX}}, + .output = {.config = {.layout = TensorLayout::NGKDHW}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor3D_NDHWGC_GKZYXC_NDHWGK) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NDHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::NDHWGK}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor3D_GNDHWC_GKZYXC_GNDHWK) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = ConvSignature<>{ + .spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNDHWC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::GNDHWK}} + }; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); +} + +TEST(AuxiliaryTensorLayout, AssignsLayoutForG_K_strided) +{ + using CKLayout = LayoutToCK::type; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayout, AssignsLayoutForGC) +{ + using CKLayout = LayoutToCK::type; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayout, AssignsLayoutForG_C_strided) +{ + using CKLayout = LayoutToCK::type; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayout, EmptyAuxiliaryTensorLayoutIsEmptyTuple) +{ + using ::ck_tile::builder::factory::internal::EmptyAuxiliaryTensorLayout; + using EmptyLayout = EmptyAuxiliaryTensorLayout::type; + EXPECT_TRUE((std::is_same_v>)); +} + +struct MockAuxiliaryTensorConfig +{ + TensorLayout layout; +}; + +TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithG_K_Layout) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided} + }; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithGC_Layout) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC} + }; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithG_C_Layout) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_C_strided} + }; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, TwoAuxiliaryTensors) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}, + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC} + }; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 2); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, ThreeAuxiliaryTensors) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}, + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC}, + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_C_strided} + }; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 3); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, WorksWith1DConvolution) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided} + }; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, WorksWith3DConvolution) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC} + }; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + + +TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithSingleBiasG_K) +{ + using OutputOp = TensorOperation; + + static constexpr auto sig = ConvSignature< + ConvolutionTensor<>, + ConvolutionTensor<>, + ConvolutionTensor>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = { + .config = {.layout = TensorLayout::NGKHW}, + .operation = OutputOp{.elementwise_operation = ElementwiseOperation::SCALE} + } + }; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithSingleBiasGC) +{ + using OutputOp = TensorOperation; + + static constexpr auto sig = ConvSignature< + ConvolutionTensor<>, + ConvolutionTensor<>, + ConvolutionTensor>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = { + .config = {.layout = TensorLayout::NHWGK}, + .operation = OutputOp{.elementwise_operation = ElementwiseOperation::SCALE} + } + }; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithTwoAuxiliaryTensors) +{ + using OutputOp = TensorOperation< + TensorConfig{.layout = TensorLayout::G_K_strided}, + TensorConfig{.layout = TensorLayout::GC}>; + + static constexpr auto sig = ConvSignature< + ConvolutionTensor<>, + ConvolutionTensor<>, + ConvolutionTensor>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = { + .config = {.layout = TensorLayout::GNHWK}, + .operation = OutputOp{.elementwise_operation = ElementwiseOperation::SCALEADD_SCALEADD_RELU} + } + }; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvTensorLayoutsWithAuxiliary, Conv1DWithBias) +{ + using OutputOp = TensorOperation; + + static constexpr auto sig = ConvSignature< + ConvolutionTensor<>, + ConvolutionTensor<>, + ConvolutionTensor>{ + .spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NWGC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = { + .config = {.layout = TensorLayout::NWGK}, + .operation = OutputOp{.elementwise_operation = ElementwiseOperation::SCALE} + } + }; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvTensorLayoutsWithAuxiliary, Conv3DWithBias) +{ + using OutputOp = TensorOperation; + + static constexpr auto sig = ConvSignature< + ConvolutionTensor<>, + ConvolutionTensor<>, + ConvolutionTensor>{ + .spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NDHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = { + .config = {.layout = TensorLayout::NDHWGK}, + .operation = OutputOp{.elementwise_operation = ElementwiseOperation::BIAS_BNORM_CLAMP} + } + }; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); } } // namespace From b847ca57f5314e41af7c760cd295810888ba0513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 3 Dec 2025 14:51:26 +0000 Subject: [PATCH 39/41] Clang-format. --- .../builder/factory/conv_fwd_dl_factory.hpp | 6 +- .../factory/conv_fwd_large_tensor_factory.hpp | 6 +- .../builder/factory/conv_fwd_v3_factory.hpp | 6 +- .../builder/factory/conv_fwd_wmma_factory.hpp | 6 +- .../builder/factory/conv_fwd_xdl_factory.hpp | 6 +- .../factory/helpers/conv_tensor_layout.hpp | 11 +- .../builder/test/unit_conv_elementwise_op.cpp | 8 +- .../builder/test/unit_conv_tensor_layout.cpp | 416 ++++++++---------- 8 files changed, 211 insertions(+), 254 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp index d2d25d99a4..051c844d75 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp @@ -24,9 +24,9 @@ template ; - using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp index 2aeae0ad80..d323eaf240 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp @@ -26,9 +26,9 @@ template ; - using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto BASE_ALGORITHM = ALGORITHM.base_algorithm; diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp index 971490706b..6dd5330bb9 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp @@ -26,9 +26,9 @@ template ; - using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static_assert(ALGORITHM.transfer.a.lds_transfer.is_direct_load == diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp index 10ea19a317..86fadb6a5d 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp @@ -26,9 +26,9 @@ template ; - using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp index 5d36d6bfaf..c1f7c63b96 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp @@ -26,9 +26,9 @@ template ; - using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = decltype(internal::GetElementwiseOps()); using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp index c7bf4d88e4..a6c0b48c54 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp @@ -216,9 +216,7 @@ consteval auto GetAuxiliaryTensorLayouts() return EmptyAuxiliaryTensorLayout{}; } -template +template requires(ConvSpatialDim && ValidConvInputLayoutForSpatialDim && ValidConvWeightLayoutForSpatialDim && @@ -226,11 +224,10 @@ template ()); - using BLayout = decltype(TensorLayoutToCK()); - using ELayout = decltype(TensorLayoutToCK()); + using ALayout = decltype(TensorLayoutToCK()); + using BLayout = decltype(TensorLayoutToCK()); + using ELayout = decltype(TensorLayoutToCK()); using DsLayout = decltype(GetAuxiliaryTensorLayouts())::type; }; - } // namespace ck_tile::builder::factory::internal diff --git a/experimental/builder/test/unit_conv_elementwise_op.cpp b/experimental/builder/test/unit_conv_elementwise_op.cpp index 7f73a3c599..84a9c533f6 100644 --- a/experimental/builder/test/unit_conv_elementwise_op.cpp +++ b/experimental/builder/test/unit_conv_elementwise_op.cpp @@ -8,14 +8,13 @@ namespace { -using ::ck_tile::builder::factory::internal::ElementwiseOpToCK; using ::ck_tile::builder::ElementwiseOperation; +using ::ck_tile::builder::factory::internal::ElementwiseOpToCK; TEST(ConvElementwiseOp, AssignsOpsForPassThrough) { using Op = ElementwiseOpToCK::Op; - EXPECT_TRUE( - (std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); } TEST(ConvElementwiseOp, AssignsOpsForScale) @@ -39,7 +38,8 @@ TEST(ConvElementwiseOp, AssignsOpsForScaleAddScaleAddRelu) TEST(ConvElementwiseOp, AssignsOpsForBiasNormClamp) { using Op = ElementwiseOpToCK::Op; - EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE( + (std::is_same_v)); } } // namespace diff --git a/experimental/builder/test/unit_conv_tensor_layout.cpp b/experimental/builder/test/unit_conv_tensor_layout.cpp index f5932f6c87..7764e94dc6 100644 --- a/experimental/builder/test/unit_conv_tensor_layout.cpp +++ b/experimental/builder/test/unit_conv_tensor_layout.cpp @@ -10,27 +10,26 @@ namespace { namespace ckb = ::ck_tile::builder; -using ::ck_tile::builder::factory::internal::ConvTensorLayouts; -using ::ck_tile::builder::TensorLayout; using ::ck_tile::builder::DataType; using ::ck_tile::builder::ElementwiseOperation; -using ::ck_tile::builder::factory::internal::LayoutToCK; +using ::ck_tile::builder::TensorLayout; using ::ck_tile::builder::factory::internal::AuxiliaryTensorLayouts; +using ::ck_tile::builder::factory::internal::ConvTensorLayouts; +using ::ck_tile::builder::factory::internal::LayoutToCK; using namespace ::ck_tile::builder::test; using enum ::ck_tile::builder::ConvDirection; TEST(ConvTensorLayout, AssignsLayoutsFor1D_NWGC_GKXC_NWGK) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 1, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NWGC}}, - .weight = {.config = {.layout = TensorLayout::GKXC}}, - .output = {.config = {.layout = TensorLayout::NWGK}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NWGC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NWGK}}}; using TensorLayouts = ConvTensorLayouts; @@ -42,15 +41,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor1D_NWGC_GKXC_NWGK) TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKXC_NGKW) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 1, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NGCW}}, - .weight = {.config = {.layout = TensorLayout::GKXC}}, - .output = {.config = {.layout = TensorLayout::NGKW}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCW}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NGKW}}}; using TensorLayouts = ConvTensorLayouts; @@ -62,15 +60,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKXC_NGKW) TEST(ConvTensorLayout, AssignsLayoutsFor1D_GNWC_GKXC_GNWK) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 1, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::GNWC}}, - .weight = {.config = {.layout = TensorLayout::GKXC}}, - .output = {.config = {.layout = TensorLayout::GNWK}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNWC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::GNWK}}}; using TensorLayouts = ConvTensorLayouts; @@ -82,15 +79,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor1D_GNWC_GKXC_GNWK) TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKCX_NGKW) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 1, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NGCW}}, - .weight = {.config = {.layout = TensorLayout::GKCX}}, - .output = {.config = {.layout = TensorLayout::NGKW}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCW}}, + .weight = {.config = {.layout = TensorLayout::GKCX}}, + .output = {.config = {.layout = TensorLayout::NGKW}}}; using TensorLayouts = ConvTensorLayouts; @@ -102,15 +98,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKCX_NGKW) TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKYXC_NGKHW) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 2, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NGCHW}}, - .weight = {.config = {.layout = TensorLayout::GKYXC}}, - .output = {.config = {.layout = TensorLayout::NGKHW}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NGKHW}}}; using TensorLayouts = ConvTensorLayouts; @@ -122,15 +117,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKYXC_NGKHW) TEST(ConvTensorLayout, AssignsLayoutsFor2D_NHWGC_GKYXC_NHWGK) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 2, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NHWGC}}, - .weight = {.config = {.layout = TensorLayout::GKYXC}}, - .output = {.config = {.layout = TensorLayout::NHWGK}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}}}; using TensorLayouts = ConvTensorLayouts; @@ -142,15 +136,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor2D_NHWGC_GKYXC_NHWGK) TEST(ConvTensorLayout, AssignsLayoutsFor2D_GNHWC_GKYXC_GNHWK) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 2, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::GNHWC}}, - .weight = {.config = {.layout = TensorLayout::GKYXC}}, - .output = {.config = {.layout = TensorLayout::GNHWK}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; using TensorLayouts = ConvTensorLayouts; @@ -162,15 +155,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor2D_GNHWC_GKYXC_GNHWK) TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKCYX_NGKHW) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 2, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NGCHW}}, - .weight = {.config = {.layout = TensorLayout::GKCYX}}, - .output = {.config = {.layout = TensorLayout::NGKHW}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKCYX}}, + .output = {.config = {.layout = TensorLayout::NGKHW}}}; using TensorLayouts = ConvTensorLayouts; @@ -182,15 +174,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKCYX_NGKHW) TEST(ConvTensorLayout, AssignsLayoutsFor3D_NGCDHW_GKCZYX_NGKDHW) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 3, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NGCDHW}}, - .weight = {.config = {.layout = TensorLayout::GKCZYX}}, - .output = {.config = {.layout = TensorLayout::NGKDHW}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCDHW}}, + .weight = {.config = {.layout = TensorLayout::GKCZYX}}, + .output = {.config = {.layout = TensorLayout::NGKDHW}}}; using TensorLayouts = ConvTensorLayouts; @@ -202,15 +193,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor3D_NGCDHW_GKCZYX_NGKDHW) TEST(ConvTensorLayout, AssignsLayoutsFor3D_NDHWGC_GKZYXC_NDHWGK) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 3, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NDHWGC}}, - .weight = {.config = {.layout = TensorLayout::GKZYXC}}, - .output = {.config = {.layout = TensorLayout::NDHWGK}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NDHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::NDHWGK}}}; using TensorLayouts = ConvTensorLayouts; @@ -222,15 +212,14 @@ TEST(ConvTensorLayout, AssignsLayoutsFor3D_NDHWGC_GKZYXC_NDHWGK) TEST(ConvTensorLayout, AssignsLayoutsFor3D_GNDHWC_GKZYXC_GNDHWK) { - static constexpr auto sig = ConvSignature<>{ - .spatial_dim = 3, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::GNDHWC}}, - .weight = {.config = {.layout = TensorLayout::GKZYXC}}, - .output = {.config = {.layout = TensorLayout::GNDHWK}} - }; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNDHWC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::GNDHWK}}}; using TensorLayouts = ConvTensorLayouts; @@ -273,11 +262,10 @@ struct MockAuxiliaryTensorConfig TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithG_K_Layout) { static constexpr std::array aux_configs = { - MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided} - }; - + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}}; + using AuxLayouts = AuxiliaryTensorLayouts; - + EXPECT_EQ(AuxLayouts::Size, 1); using ExpectedType = ck::Tuple; EXPECT_TRUE((std::is_same_v)); @@ -286,11 +274,10 @@ TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithG_K_Layout) TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithGC_Layout) { static constexpr std::array aux_configs = { - MockAuxiliaryTensorConfig{.layout = TensorLayout::GC} - }; - + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC}}; + using AuxLayouts = AuxiliaryTensorLayouts; - + EXPECT_EQ(AuxLayouts::Size, 1); using ExpectedType = ck::Tuple; EXPECT_TRUE((std::is_same_v)); @@ -299,11 +286,10 @@ TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithGC_Layout) TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithG_C_Layout) { static constexpr std::array aux_configs = { - MockAuxiliaryTensorConfig{.layout = TensorLayout::G_C_strided} - }; - + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_C_strided}}; + using AuxLayouts = AuxiliaryTensorLayouts; - + EXPECT_EQ(AuxLayouts::Size, 1); using ExpectedType = ck::Tuple; EXPECT_TRUE((std::is_same_v)); @@ -313,14 +299,13 @@ TEST(AuxiliaryTensorLayoutIntegration, TwoAuxiliaryTensors) { static constexpr std::array aux_configs = { MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}, - MockAuxiliaryTensorConfig{.layout = TensorLayout::GC} - }; - + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC}}; + using AuxLayouts = AuxiliaryTensorLayouts; - + EXPECT_EQ(AuxLayouts::Size, 2); - using ExpectedType = ck::Tuple; + using ExpectedType = + ck::Tuple; EXPECT_TRUE((std::is_same_v)); } @@ -329,27 +314,25 @@ TEST(AuxiliaryTensorLayoutIntegration, ThreeAuxiliaryTensors) static constexpr std::array aux_configs = { MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}, MockAuxiliaryTensorConfig{.layout = TensorLayout::GC}, - MockAuxiliaryTensorConfig{.layout = TensorLayout::G_C_strided} - }; - + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_C_strided}}; + using AuxLayouts = AuxiliaryTensorLayouts; - - EXPECT_EQ(AuxLayouts::Size, 3); + + EXPECT_EQ(AuxLayouts::Size, 3); using ExpectedType = ck::Tuple; + ck::tensor_layout::convolution::GC, + ck::tensor_layout::convolution::G_C>; EXPECT_TRUE((std::is_same_v)); } TEST(AuxiliaryTensorLayoutIntegration, WorksWith1DConvolution) { static constexpr std::array aux_configs = { - MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided} - }; - + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}}; + using AuxLayouts = AuxiliaryTensorLayouts; - - EXPECT_EQ(AuxLayouts::Size, 1); + + EXPECT_EQ(AuxLayouts::Size, 1); using ExpectedType = ck::Tuple; EXPECT_TRUE((std::is_same_v)); } @@ -357,43 +340,37 @@ TEST(AuxiliaryTensorLayoutIntegration, WorksWith1DConvolution) TEST(AuxiliaryTensorLayoutIntegration, WorksWith3DConvolution) { static constexpr std::array aux_configs = { - MockAuxiliaryTensorConfig{.layout = TensorLayout::GC} - }; - + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC}}; + using AuxLayouts = AuxiliaryTensorLayouts; - - EXPECT_EQ(AuxLayouts::Size, 1); + + EXPECT_EQ(AuxLayouts::Size, 1); using ExpectedType = ck::Tuple; EXPECT_TRUE((std::is_same_v)); } - TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithSingleBiasG_K) { using OutputOp = TensorOperation; - - static constexpr auto sig = ConvSignature< - ConvolutionTensor<>, - ConvolutionTensor<>, - ConvolutionTensor>{ - .spatial_dim = 2, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NGCHW}}, - .weight = {.config = {.layout = TensorLayout::GKYXC}}, - .output = { - .config = {.layout = TensorLayout::NGKHW}, - .operation = OutputOp{.elementwise_operation = ElementwiseOperation::SCALE} - } - }; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NGKHW}, + .operation = + OutputOp{.elementwise_operation = ElementwiseOperation::SCALE}}}; using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); - + using ExpectedDsLayout = ck::Tuple; EXPECT_TRUE((std::is_same_v)); } @@ -401,92 +378,79 @@ TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithSingleBiasG_K) TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithSingleBiasGC) { using OutputOp = TensorOperation; - - static constexpr auto sig = ConvSignature< - ConvolutionTensor<>, - ConvolutionTensor<>, - ConvolutionTensor>{ - .spatial_dim = 2, - .direction = FORWARD, - .data_type = DataType::BF16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NHWGC}}, - .weight = {.config = {.layout = TensorLayout::GKYXC}}, - .output = { - .config = {.layout = TensorLayout::NHWGK}, - .operation = OutputOp{.elementwise_operation = ElementwiseOperation::SCALE} - } - }; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}, + .operation = + OutputOp{.elementwise_operation = ElementwiseOperation::SCALE}}}; using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); - + using ExpectedDsLayout = ck::Tuple; EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithTwoAuxiliaryTensors) { - using OutputOp = TensorOperation< - TensorConfig{.layout = TensorLayout::G_K_strided}, - TensorConfig{.layout = TensorLayout::GC}>; - - static constexpr auto sig = ConvSignature< - ConvolutionTensor<>, - ConvolutionTensor<>, - ConvolutionTensor>{ - .spatial_dim = 2, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::GNHWC}}, - .weight = {.config = {.layout = TensorLayout::GKYXC}}, - .output = { - .config = {.layout = TensorLayout::GNHWK}, - .operation = OutputOp{.elementwise_operation = ElementwiseOperation::SCALEADD_SCALEADD_RELU} - } - }; + using OutputOp = TensorOperation; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}, + .operation = OutputOp{.elementwise_operation = + ElementwiseOperation::SCALEADD_SCALEADD_RELU}}}; using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); - - using ExpectedDsLayout = ck::Tuple; + + using ExpectedDsLayout = + ck::Tuple; EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorLayoutsWithAuxiliary, Conv1DWithBias) { using OutputOp = TensorOperation; - - static constexpr auto sig = ConvSignature< - ConvolutionTensor<>, - ConvolutionTensor<>, - ConvolutionTensor>{ - .spatial_dim = 1, - .direction = FORWARD, - .data_type = DataType::FP32, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NWGC}}, - .weight = {.config = {.layout = TensorLayout::GKXC}}, - .output = { - .config = {.layout = TensorLayout::NWGK}, - .operation = OutputOp{.elementwise_operation = ElementwiseOperation::SCALE} - } - }; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NWGC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NWGK}, + .operation = + OutputOp{.elementwise_operation = ElementwiseOperation::SCALE}}}; using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); - + using ExpectedDsLayout = ck::Tuple; EXPECT_TRUE((std::is_same_v)); } @@ -494,29 +458,25 @@ TEST(ConvTensorLayoutsWithAuxiliary, Conv1DWithBias) TEST(ConvTensorLayoutsWithAuxiliary, Conv3DWithBias) { using OutputOp = TensorOperation; - - static constexpr auto sig = ConvSignature< - ConvolutionTensor<>, - ConvolutionTensor<>, - ConvolutionTensor>{ - .spatial_dim = 3, - .direction = FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NDHWGC}}, - .weight = {.config = {.layout = TensorLayout::GKZYXC}}, - .output = { - .config = {.layout = TensorLayout::NDHWGK}, - .operation = OutputOp{.elementwise_operation = ElementwiseOperation::BIAS_BNORM_CLAMP} - } - }; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NDHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::NDHWGK}, + .operation = OutputOp{.elementwise_operation = + ElementwiseOperation::BIAS_BNORM_CLAMP}}}; using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); - + using ExpectedDsLayout = ck::Tuple; EXPECT_TRUE((std::is_same_v)); } From aed873042bd61e27abb3f0a2ff04f64e7bdb5f35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 3 Dec 2025 15:08:37 +0000 Subject: [PATCH 40/41] Fix merge conflicts. --- .../include/ck_tile/builder/reflect/conv_description.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp index 2960ec8cfc..261c3f103d 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp @@ -107,8 +107,8 @@ class ConvDescription : public Description f.writeLine(0, signature_.spatial_dim, "D ", signature_.direction, " Convolution Kernel"); f.writeLine(1, "Signature"); f.writeLine(2, "Tensor Type: ", signature_.data_type); - f.writeLine(2, "Input Layout: ", signature._input_layout); - f.writeLine(2, "Weight Layout: ", signature._weight_layout); + f.writeLine(2, "Input Layout: ", signature_.input_layout); + f.writeLine(2, "Weight Layout: ", signature_.weight_layout); f.writeLine(2, "Output Layout: ", signature_.output_layout); f.writeLine(2, "Input elementwise operation: ", signature_.input_element_op); f.writeLine(2, "Weights elementwise operation: ", signature_.weight_element_op); From cd37f4ee90311163d1b81caf394fd32f03b1a47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Wed, 3 Dec 2025 15:20:20 +0000 Subject: [PATCH 41/41] Improve elementwise op handling. --- .../builder/factory/conv_fwd_dl_factory.hpp | 2 +- .../factory/conv_fwd_large_tensor_factory.hpp | 2 +- .../builder/factory/conv_fwd_v3_factory.hpp | 2 +- .../builder/factory/conv_fwd_wmma_factory.hpp | 2 +- .../builder/factory/conv_fwd_xdl_factory.hpp | 2 +- .../factory/helpers/conv_elementwise_op.hpp | 14 ++++---------- 6 files changed, 9 insertions(+), 15 deletions(-) diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp index 051c844d75..0c675ac7f1 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp @@ -26,7 +26,7 @@ struct ConvFwdDlFactory static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; using Layouts = internal::ConvTensorLayouts; using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp index d323eaf240..98e368ca61 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp @@ -28,7 +28,7 @@ struct ConvFwdLargeTensorFactory static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; using Layouts = internal::ConvTensorLayouts; using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static constexpr auto BASE_ALGORITHM = ALGORITHM.base_algorithm; diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp index 6dd5330bb9..79955a1f44 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp @@ -28,7 +28,7 @@ struct ConvFwdXdlV3Factory static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; using Layouts = internal::ConvTensorLayouts; using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static_assert(ALGORITHM.transfer.a.lds_transfer.is_direct_load == diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp index 86fadb6a5d..fcce46aea7 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp @@ -28,7 +28,7 @@ struct ConvFwdWmmaFactory static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; using Layouts = internal::ConvTensorLayouts; using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp index c1f7c63b96..df7fb25168 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp @@ -28,7 +28,7 @@ struct ConvFwdXdlFactory static constexpr size_t SPATIAL_DIM = SIGNATURE.spatial_dim; using Layouts = internal::ConvTensorLayouts; using Types = internal::FwdConvTensorDataTypes; - using Ops = decltype(internal::GetElementwiseOps()); + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp index 9137c3dc82..a39cd7410b 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp @@ -61,21 +61,15 @@ consteval auto GetElementwiseOp() } } -template +template struct ElementwiseOps { - static constexpr auto input_op = GetElementwiseOp(); - static constexpr auto weight_op = GetElementwiseOp(); - static constexpr auto output_op = GetElementwiseOp(); + static constexpr auto input_op = GetElementwiseOp(); + static constexpr auto weight_op = GetElementwiseOp(); + static constexpr auto output_op = GetElementwiseOp(); using AElementwiseOp = typename decltype(input_op)::Op; using BElementwiseOp = typename decltype(weight_op)::Op; using CDEElementwiseOp = typename decltype(output_op)::Op; }; -template -constexpr auto GetElementwiseOps() -{ - return ElementwiseOps{}; -} - } // namespace ck_tile::builder::factory::internal