diff --git a/experimental/builder/README.md b/experimental/builder/README.md index aa7c7d969d..18e9e58739 100644 --- a/experimental/builder/README.md +++ b/experimental/builder/README.md @@ -10,6 +10,10 @@ The builder provides a high-level, semantically-clear interface for constructing This project is a prototype for a more general builder pattern for all of composable_kernel (CK) and CKTile, but is currently limited to formalizing the interface between MIOpen and CK. +## Design descriptions + +- [CK Builder design description](include/ck_tile/builder/README.md) + ## Directory Structure - `include/ck_tile/builder/` diff --git a/experimental/builder/include/ck_tile/builder/README.md b/experimental/builder/include/ck_tile/builder/README.md new file mode 100644 index 0000000000..a0522a50d6 --- /dev/null +++ b/experimental/builder/include/ck_tile/builder/README.md @@ -0,0 +1,244 @@ +# Composable Kernel Builder Design Documentation + +This directory contains the builder framework for Composable Kernel, which provides a compile-time, type-safe interface for constructing convolution operations with various configurations. + +## Table of Contents + +- [Convolution Signature Design](#convolution-signature-design) + - [Overview](#overview) + - [Architecture](#architecture) + - [Core Components](#core-components) + - [Concepts and Validation](#concepts-and-validation) +--- + +## Convolution Signature Design + +### Overview + +The convolution signature system provides a **compile-time description** of grouped convolution operations. A signature is a collection of properties that fully characterize a convolution kernel's mathematical and operational behavior, enabling: + +- **Compile-time validation**: Ensures type safety and correctness before kernel instantiation +- **Kernel selection**: Matches user requirements to optimized implementations +- **Specialization**: Enables optimized code paths for specific configurations +- **Composability**: Supports building complex operations from simpler components + +The signature leverages modern C++20 features, particularly **concepts**, to provide expressive, self-documenting interfaces with compile-time guarantees. + +### Architecture + +The signature system is organized into a hierarchical structure: + +``` +┌─────────────────────────────────────────────────────────┐ +│ ConvSignature │ +├─────────────────────────────────────────────────────────┤ +│ Properties: │ +│ • spatial_dim: int (1D, 2D, or 3D) │ +│ • direction: ConvDirection (Fwd/BwdData/BwdWeight) │ +│ • data_type: DataType (default data type) │ +│ • accumulation_data_type: DataType │ +│ • input: ConvTensor ──┐ │ +│ • weight: ConvTensor ──│ │ +│ • output: ConvTensor ──│ │ +└──────────────────────────────────┼──────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────┐ + │ ConvTensor │ + ├─────────────────────────────────────────┤ + │ ╔═════════════════════════════════════╗ │ + │ ║ TensorConfig (required) ║ │ + │ ╠═════════════════════════════════════╣ │ + │ ║ • layout: ConvLayout ║ │ + │ ║ • data_type: DataType (optional) ║ │ + │ ║ • compute_type: DataType (optional)║ │ + │ ╚═════════════════════════════════════╝ │ + │ │ + │ ┌─────────────────────────────────────┐ │ + │ │ TensorOperation (optional) │ │ + │ ├─────────────────────────────────────┤ │ + │ │ • elementwise_operation │ │ + │ │ • auxiliary_operand_configs[] │ │ + │ │ (each is also ConvTensor) ◄───────┼─┐ + │ └─────────────────────────────────────┘ │ │ + └─────────────────────────────────────────┘ │ + │ + Recursive ───────────────┘ +``` +Key Design Points: + - ConvSignature contains three ConvTensor instances (input, weight, output) + - All tensors share the same ConvTensor structure + - Each ConvTensor has: + - TensorConfig (required): Defines layout as well as optional data and compute type overrides + - TensorOperation (optional): Defines fused elementwise operations + - Auxiliary operands (e.g., bias) in TensorOperation also use the ConvTensor type + +### Core Components + +#### 1. Signature Level + +The top-level signature contains global properties that apply to the entire convolution operation: + +```cpp +template +concept ConvSignatureDescriptor = requires(T t) { + { t.spatial_dim } -> std::convertible_to; // 1, 2, or 3 + { t.data_type } -> std::convertible_to; // Default data type + { t.input } -> ConvTensorDescriptor; + { t.weight } -> ConvTensorDescriptor; + { t.output } -> ConvTensorDescriptor; + requires ConvolutionDirectionWellDefinedIfProvided; // Optional direction +}; +``` + +**Properties:** +- **`spatial_dim`**: Dimensionality of the convolution (1D, 2D, or 3D) +- **`direction`**: Operation type (optional, defaults to FORWARD) + - `FORWARD`: Standard forward convolution + - `BACKWARD_DATA`: Gradient computation w.r.t. input + - `BACKWARD_WEIGHT`: Gradient computation w.r.t. weights +- **`data_type`**: Default data type for all tensors (FP32, FP16, BF16, FP8, I8, U8) +- **`accumulation_data_type`**: Type used for internal accumulation + +#### 2. Tensor Level + +Each tensor (input, weight, output) has its own descriptor: + +```cpp +template +concept ConvTensorDescriptor = requires(T t) { + { t.config } -> TensorConfigDescriptor; + requires ElementwiseOpWellDefinedIfProvided; +}; +``` + +A tensor descriptor encapsulates: +- **Configuration**: Layout and data type information +- **Operation** (optional): Fused elementwise operations on this tensor + +#### 3. Tensor Configuration + +Describes the memory layout and data types: + +```cpp +template +concept TensorConfigDescriptor = requires(T t) { + { t.layout } -> std::convertible_to; + { t.data_type } -> std::convertible_to; // Optional override +}; +``` + +**Layout Types** (dimension-specific): +- **1D Convolution**: + - Input: `GNCW`, `GNWC`, `NWGC`, `NGCW`, `G_NW_C_strided` + - Weight: `GKXC`, `GKCX`, `KXGC`, `G_K_X_C_strided` + - Output: `GNKW`, `GNWK`, `NWGK`, `NGKW`, `G_NW_K_strided` + +- **2D Convolution**: + - Input: `GNCHW`, `GNHWC`, `NHWGC`, `NGCHW`, `G_NHW_C_strided` + - Weight: `GKYXC`, `GKCYX`, `KYXGC`, `G_K_YX_C_strided` + - Output: `GNKHW`, `GNHWK`, `NHWGK`, `NGKHW`, `G_NHW_K_strided` + +- **3D Convolution**: + - Input: `GNCDHW`, `GNDHWC`, `NDHWGC`, `NGCDHW`, `G_NDHW_C_strided` + - Weight: `GKZYXC`, `GKCZYX`, `KZYXGC`, `G_K_ZYX_C_strided` + - Output: `GNKDHW`, `GNDHWK`, `NDHWGK`, `NGKDHW`, `G_NDHW_K_strided` + +Where: +- `G` = Groups +- `N` = Batch size +- `C` = Input channels +- `K` = Output channels (filters) +- `W`, `H`, `D` = Width, Height, Depth (spatial dimensions) +- `X`, `Y`, `Z` = Filter dimensions + +#### 4. Tensor Operations + +Describes fused elementwise operations applied to a tensor: + +```cpp +template +concept TensorOperatorDescriptor = requires(T t) { + { t.elementwise_operation } -> std::convertible_to; + requires AuxiliaryOperandConfigsWellDefinedIfProvided; +}; +``` + +**Supported Operations:** +- `PASS_THROUGH`: No operation (identity) +- `SCALE`: Multiply by a scalar +- `CLAMP`: Clamp values to a range +- `BIAS_BNORM_CLAMP`: Bias addition + batch normalization + clamp +- `SCALEADD_SCALEADD_RELU`: Fused scale-add operations + ReLU activation + +**Auxiliary Operands:** +Some operations require additional tensor inputs (e.g., bias tensors, scaling factors). These are specified through `auxiliary_operand_configs`, which is an array of `TensorConfigDescriptor` objects describing the layout and data type of each auxiliary input. + +### Concepts and Validation + +The signature system uses C++20 concepts for compile-time validation at multiple levels: + +#### Constraint Concepts + +```cpp +// Spatial dimension must be 1, 2, or 3 +template +concept ConvSpatialDim = std::is_integral_v && (N == 1 || N == 2 || N == 3); + +// Valid data types for convolution +template +concept ValidConvDataType = + (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || + (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); +``` + +#### Validation Concept + +```cpp +// Validates a complete signature +template +concept ValidConvSignature = requires { + requires ConvSpatialDim; + requires ValidConvDataType; +}; +``` + +#### Tensor Descriptors + +The layout/data type/elementwise operation are described per tensor. This multi-level hierarchy allows: +- **Flexibility**: Each tensor can have independent layout and data type +- **Reusability**: Common configurations can be shared across different signatures +- **Extensibility**: New properties can be added to specific levels without affecting others +- **Clarity**: Separates concerns (global properties vs. tensor-specific properties) + +#### Optional Signature Fields + +Several fields in the signature are optional: +- **`direction`**: Defaults to `FORWARD` if not specified, reducing boilerplate for the common case +- **Tensor `data_type`**: Falls back to signature's default, allowing mixed-precision with minimal specification +- **Tensor `operation`**: Defaults to `PASS_THROUGH`, supporting both fused and non-fused operations with the same interface + +This design follows the principle of "make the common case simple, the complex case possible." + +#### Union-Based Layout Representation + +The `ConvLayout` type uses unions to support dimension-agnostic code: + +```cpp +struct ConvLayout { + union { + ConvInputLayout _input_layout; + ConvWeightLayout _weight_layout; + ConvOutputLayout _output_layout; + ConvAuxiliaryTensorLayout _aux_tensor_layout; + }; + // ... constructors for each type +}; +``` + +This allows: +- Single type to represent all layout variants +- Type-safe construction through overloaded constructors +- Compile-time enforcement of valid combinations through concepts + +--- diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp index 05575590c4..8dc92c6bef 100644 --- a/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp +++ b/experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp @@ -28,24 +28,104 @@ namespace ck_tile::builder { template concept ConvSpatialDim = std::is_integral_v && (N == 1 || N == 2 || N == 3); -// Constraints for forward convolution layouts. -template -concept ValidConvLayoutForSpatialDim = - (SpatialDim == 1 && std::same_as) || - (SpatialDim == 2 && std::same_as) || - (SpatialDim == 3 && std::same_as); - // Constrains convolution data types to common floating-point types. template -concept ConvDataType = (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || - (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); +concept ValidConvDataType = + (T == DataType::FP32) || (T == DataType::FP16) || (T == DataType::BF16) || + (T == DataType::FP8) || (T == DataType::I8) || (T == DataType::U8); + +template +concept BiasTensorLayout = + (L == TensorLayout::GC) || (L == TensorLayout::G_C_strided) || (L == TensorLayout::G_K_strided); + +template +concept ConvInputLayout1D = + (L == TensorLayout::GNCW) || (L == TensorLayout::GNWC) || (L == TensorLayout::NWGC) || + (L == TensorLayout::NGCW) || (L == TensorLayout::G_NW_C_strided); + +template +concept ConvInputLayout2D = + (L == TensorLayout::GNCHW) || (L == TensorLayout::GNHWC) || (L == TensorLayout::NHWGC) || + (L == TensorLayout::NGCHW) || (L == TensorLayout::G_NHW_C_strided); + +template +concept ConvInputLayout3D = + (L == TensorLayout::GNCDHW) || (L == TensorLayout::GNDHWC) || (L == TensorLayout::NDHWGC) || + (L == TensorLayout::NGCDHW) || (L == TensorLayout::G_NDHW_C_strided); + +template +concept ConvWeightLayout1D = (L == TensorLayout::GKXC) || (L == TensorLayout::GKCX) || + (L == TensorLayout::KXGC) || (L == TensorLayout::G_K_X_C_strided); + +template +concept ConvWeightLayout2D = (L == TensorLayout::GKYXC) || (L == TensorLayout::GKCYX) || + (L == TensorLayout::KYXGC) || (L == TensorLayout::G_K_YX_C_strided); + +template +concept ConvWeightLayout3D = (L == TensorLayout::GKZYXC) || (L == TensorLayout::GKCZYX) || + (L == TensorLayout::KZYXGC) || (L == TensorLayout::G_K_ZYX_C_strided); + +template +concept ConvOutputLayout1D = + (L == TensorLayout::GNKW) || (L == TensorLayout::GNWK) || (L == TensorLayout::NWGK) || + (L == TensorLayout::NGKW) || (L == TensorLayout::G_NW_K_strided); + +template +concept ConvOutputLayout2D = + (L == TensorLayout::GNKHW) || (L == TensorLayout::GNHWK) || (L == TensorLayout::NHWGK) || + (L == TensorLayout::NGKHW) || (L == TensorLayout::G_NHW_K_strided); + +template +concept ConvOutputLayout3D = + (L == TensorLayout::GNKDHW) || (L == TensorLayout::GNDHWK) || (L == TensorLayout::NDHWGK) || + (L == TensorLayout::NGKDHW) || (L == TensorLayout::G_NDHW_K_strided); + +template +concept TensorConfigDescriptor = requires(T t) { + { t.layout } -> std::convertible_to; + // Only require that data type is defined. It might be set to undefined value, in which case the + // signature's data type is used. + { t.data_type } -> std::convertible_to; +}; + +template +concept HasAuxiliaryOperandConfigs = requires(T t) { + { t.auxiliary_operand_configs }; +}; +namespace detail { template -concept ConvLayout = std::same_as, GroupConvLayout>; +struct IsArrayOfTensorConfigDescriptors : std::false_type +{ +}; + +template + requires TensorConfigDescriptor +struct IsArrayOfTensorConfigDescriptors> : std::true_type +{ +}; +} // namespace detail + +template +concept ConvertibleToArrayOfTensorConfigs = + detail::IsArrayOfTensorConfigDescriptors>::value; template -concept HasElementwiseOp = requires(T t) { - { t.elementwise_operation }; +concept AuxiliaryOperandConfigsWellDefinedIfProvided = requires(T t) { + requires !HasAuxiliaryOperandConfigs || requires { + { t.auxiliary_operand_configs } -> ConvertibleToArrayOfTensorConfigs; + }; +}; + +template +concept TensorOperatorDescriptor = requires(T t) { + { t.elementwise_operation } -> std::convertible_to; + requires AuxiliaryOperandConfigsWellDefinedIfProvided; +}; + +template +concept HasTensorOp = requires(T t) { + { t.operation }; }; template @@ -56,11 +136,8 @@ concept HasConvolutionDirection = requires(T t) { // Note: it is not required to provide an ElementwiseOp, but if one is provided, check if well // defined template -concept ElementwiseOpWellDefinedIfProvided = requires(T t) { - requires !HasElementwiseOp || requires { - { t.elementwise_operation } -> std::convertible_to; - }; -}; +concept ElementwiseOpWellDefinedIfProvided = + !HasTensorOp || requires(T t) { requires TensorOperatorDescriptor; }; // Note: it is not required to provide a convolution, but if one is provided, check if well defined template @@ -70,13 +147,27 @@ concept ConvolutionDirectionWellDefinedIfProvided = requires(T t) { }; }; +// Concept for the convolution tensor +template +concept ConvTensorDescriptor = requires(T t) { + { t.config } -> TensorConfigDescriptor; + requires ElementwiseOpWellDefinedIfProvided; +}; + +template +concept HasElementwiseOpWithAuxiliaryOperands = requires(T t) { + requires HasTensorOp; + requires HasAuxiliaryOperandConfigs; +}; + // Concept for a type that defines a convolution's operational signature. template concept ConvSignatureDescriptor = requires(T t) { { t.spatial_dim } -> std::convertible_to; - { t.layout } -> ConvLayout; { t.data_type } -> std::convertible_to; - requires ElementwiseOpWellDefinedIfProvided; + { t.input } -> ConvTensorDescriptor; + { t.weight } -> ConvTensorDescriptor; + { t.output } -> ConvTensorDescriptor; requires ConvolutionDirectionWellDefinedIfProvided; }; @@ -84,7 +175,7 @@ concept ConvSignatureDescriptor = requires(T t) { template concept ValidConvSignature = requires { requires ConvSpatialDim; - requires ConvDataType; + requires ValidConvDataType; }; // Predicate for forward convolution (default if direction is not included). @@ -100,4 +191,22 @@ concept ConvDirectionIsBackwardData = (Sig.direction == ConvDirection::BACKWARD_ template concept ConvDirectionIsBackwardWeight = (Sig.direction == ConvDirection::BACKWARD_WEIGHT); +// Constraints for forward convolution input layouts. +template +concept ValidConvInputLayoutForSpatialDim = + (SpatialDim == 1 && ConvInputLayout1D) || (SpatialDim == 2 && ConvInputLayout2D) || + (SpatialDim == 3 && ConvInputLayout3D); + +// Constraints for forward convolution output layouts. +template +concept ValidConvOutputLayoutForSpatialDim = + (SpatialDim == 1 && ConvOutputLayout1D) || (SpatialDim == 2 && ConvOutputLayout2D) || + (SpatialDim == 3 && ConvOutputLayout3D); + +// Constraints for forward convolution weight layouts. +template +concept ValidConvWeightLayoutForSpatialDim = + (SpatialDim == 1 && ConvWeightLayout1D) || (SpatialDim == 2 && ConvWeightLayout2D) || + (SpatialDim == 3 && ConvWeightLayout3D); + } // namespace ck_tile::builder diff --git a/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp b/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp deleted file mode 100644 index 65a4b60588..0000000000 --- a/experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include - -#include "ck_tile/builder/types.hpp" - -namespace ck_tile::builder { -/********************************************** - * constexpr helper functions for optional parameters - **********************************************/ - -template -concept ProvidesElementwiseOperation = requires { Sig.elementwiseOperation; }; - -template -concept ProvidesConvolutionDirection = requires { Sig.direction; }; - -template -constexpr auto get_elementwise_operation() -{ - if constexpr(ProvidesElementwiseOperation) - { - return Sig.elementwise_operation; - } - else - { - return ElementwiseOperation::PASS_THROUGH; - } -} - -template -constexpr auto get_conv_direction() -{ - if constexpr(ProvidesConvolutionDirection) - { - return Sig.direction; - } - else - { - return ConvDirection::FORWARD; - } -} -} // namespace ck_tile::builder diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp index dee918cc1f..0c675ac7f1 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_dl_factory.hpp @@ -7,7 +7,6 @@ #include "ck_tile/builder/conv_signature_concepts.hpp" #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -25,11 +24,9 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp index 383ecbf8c9..98e368ca61 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_large_tensor_factory.hpp @@ -8,7 +8,6 @@ #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/conv_algorithm_limits.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -27,11 +26,9 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static constexpr auto BASE_ALGORITHM = ALGORITHM.base_algorithm; diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp index 90d4abe3e7..79955a1f44 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_v3_factory.hpp @@ -8,7 +8,6 @@ #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/conv_algorithm_limits.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -27,11 +26,9 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static_assert(ALGORITHM.transfer.a.lds_transfer.is_direct_load == diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp index e35b3f3d46..fcce46aea7 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_wmma_factory.hpp @@ -8,7 +8,6 @@ #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/conv_algorithm_limits.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -27,11 +26,9 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); diff --git a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp index fc5b32f799..df7fb25168 100644 --- a/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/conv_fwd_xdl_factory.hpp @@ -8,7 +8,6 @@ #include "ck_tile/builder/conv_algorithm_concepts.hpp" #include "ck_tile/builder/conv_algorithm_limits.hpp" #include "ck_tile/builder/builder_utils.hpp" -#include "ck_tile/builder/conv_signature_utils.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp" #include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp" @@ -27,11 +26,9 @@ template ()); - using Types = internal::ConvTensorTypes; - using Ops = internal::ElementwiseOps()>; + using Layouts = internal::ConvTensorLayouts; + using Types = internal::FwdConvTensorDataTypes; + using Ops = internal::ElementwiseOps; using AlgorithmType = decltype(ALGORITHM); static constexpr auto FWD_CONV_SPECIALIZATION = internal::SetFwdConvSpecialization(); diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp index 4a13f4e508..a39cd7410b 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_elementwise_op.hpp @@ -6,32 +6,70 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck_tile/builder/builder_utils.hpp" #include "ck_tile/builder/types.hpp" +#include "ck_tile/builder/conv_signature_concepts.hpp" namespace ck_tile::builder::factory::internal { -template -struct ElementwiseOps +template +struct ElementwiseOpToCK +{ + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported elementwise operation conversion to CK."); +}; + +template <> +struct ElementwiseOpToCK { - // This will trigger if a specialization for the given DataType is not found. - // We should always catch this in an earlier validation check. - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Internal error. Unsupported elementwise operation for convolution factory."); + using Op = ck::tensor_operation::element_wise::PassThrough; }; template <> -struct ElementwiseOps +struct ElementwiseOpToCK { - using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using CDEElementwiseOp = ck::tensor_operation::element_wise::PassThrough; + using Op = ck::tensor_operation::element_wise::Scale; }; template <> -struct ElementwiseOps +struct ElementwiseOpToCK +{ + using Op = ck::tensor_operation::element_wise::Clamp; +}; + +template <> +struct ElementwiseOpToCK +{ + using Op = ck::tensor_operation::element_wise::ScaleAddScaleAddRelu; +}; + +template <> +struct ElementwiseOpToCK +{ + using Op = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp; +}; + +template +consteval auto GetElementwiseOp() +{ + if constexpr(HasTensorOp) + { + constexpr auto op = TensorDesc.operation.elementwise_operation; + return ElementwiseOpToCK{}; + } + else + { + return ElementwiseOpToCK{}; + } +} + +template +struct ElementwiseOps { - using AElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using BElementwiseOp = ck::tensor_operation::element_wise::PassThrough; - using CDEElementwiseOp = ck::tensor_operation::element_wise::Scale; + static constexpr auto input_op = GetElementwiseOp(); + static constexpr auto weight_op = GetElementwiseOp(); + static constexpr auto output_op = GetElementwiseOp(); + using AElementwiseOp = typename decltype(input_op)::Op; + using BElementwiseOp = typename decltype(weight_op)::Op; + using CDEElementwiseOp = typename decltype(output_op)::Op; }; } // namespace ck_tile::builder::factory::internal diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp index b3effa782e..a6c0b48c54 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_layout.hpp @@ -6,141 +6,228 @@ #include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/utility/tuple.hpp" #include "ck_tile/builder/conv_signature_concepts.hpp" +#include "ck_tile/builder/builder_utils.hpp" namespace ck_tile::builder::factory::internal { -// Type mappings from the builder FwdGroupConvLayout enum classes to the CK tensor data types. -template - requires(ConvSpatialDim && ValidConvLayoutForSpatialDim) -struct ConvTensorLayouts +template +struct LayoutToCK { - // This will trigger if a specialization for the given layout is not found. - // We should always catch this in an earlier validation check. - using Layout = decltype(LayoutValue); - static_assert(sizeof(Layout) == 0, - "Internal error. Unsupported layout for convolution factory."); + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported layout conversion to CK."); }; -// 1D Forward Convolution Layout Specializations +// Bias layouts template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NWGC; - using BLayout = ck::tensor_layout::convolution::GKXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NWGK; + using type = ck::tensor_layout::convolution::G_K; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GC; }; - template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCW; - using BLayout = ck::tensor_layout::convolution::GKXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKW; + using type = ck::tensor_layout::convolution::G_C; }; +// Input 1D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::GNWC; - using BLayout = ck::tensor_layout::convolution::GKXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::GNWK; + using type = ck::tensor_layout::convolution::NWGC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGCW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNWC; }; +// Input 2D +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGCHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NHWGC; +}; template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCW; - using BLayout = ck::tensor_layout::convolution::GKCX; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKW; + using type = ck::tensor_layout::convolution::GNHWC; }; +// Input 3D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCHW; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKHW; + using type = ck::tensor_layout::convolution::NGCDHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NDHWGC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNDHWC; }; +// Weight 1D +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKXC; +}; template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NHWGC; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NHWGK; + using type = ck::tensor_layout::convolution::GKCX; }; +// Weight 2D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::GNHWC; - using BLayout = ck::tensor_layout::convolution::GKYXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::GNHWK; + using type = ck::tensor_layout::convolution::GKYXC; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKCYX; }; +// Weight 3D +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GKCZYX; +}; template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCHW; - using BLayout = ck::tensor_layout::convolution::GKCYX; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKHW; + using type = ck::tensor_layout::convolution::GKZYXC; }; +// Output 1D +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NWGK; +}; template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NGCDHW; - using BLayout = ck::tensor_layout::convolution::GKCZYX; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NGKDHW; + using type = ck::tensor_layout::convolution::NGKW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNWK; }; +// Output 2D template <> -struct ConvTensorLayouts +struct LayoutToCK { - using ALayout = ck::tensor_layout::convolution::NDHWGC; - using BLayout = ck::tensor_layout::convolution::GKZYXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::NDHWGK; + using type = ck::tensor_layout::convolution::NGKHW; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NHWGK; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNHWK; }; +// Output 3D +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NGKDHW; +}; template <> -struct ConvTensorLayouts +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::NDHWGK; +}; +template <> +struct LayoutToCK +{ + using type = ck::tensor_layout::convolution::GNDHWK; +}; + +template +consteval auto TensorLayoutToCK() +{ + return typename LayoutToCK::type{}; +} + +struct EmptyAuxiliaryTensorLayout { - using ALayout = ck::tensor_layout::convolution::GNDHWC; - using BLayout = ck::tensor_layout::convolution::GKZYXC; - using DsLayout = ck::Tuple<>; - using ELayout = ck::tensor_layout::convolution::GNDHWK; + using type = ck::Tuple<>; }; -template -consteval auto GetTensorLayout() +template +consteval auto GetAuxiliaryTensorLayoutTuple(std::index_sequence) { + return ck::Tuple< + decltype(TensorLayoutToCK())...>{}; +} + +template + requires(ConvSpatialDim) +struct AuxiliaryTensorLayouts +{ + static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); + using type = decltype(GetAuxiliaryTensorLayoutTuple( + std::make_index_sequence{})); +}; - if constexpr(SPATIAL_DIM == 1) - { - return internal::ConvTensorLayouts{}; - } - else if constexpr(SPATIAL_DIM == 2) - { - return internal::ConvTensorLayouts{}; - } - else if constexpr(SPATIAL_DIM == 3) - { - return internal::ConvTensorLayouts{}; - } - else - { - static_assert(false, "Unsupported spatial dimension for convolution layout."); - } +// TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). +template + requires(HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorLayouts() +{ + return AuxiliaryTensorLayouts{}; +} + +template + requires(!HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorLayouts() +{ + return EmptyAuxiliaryTensorLayout{}; } +template + requires(ConvSpatialDim && + ValidConvInputLayoutForSpatialDim && + ValidConvWeightLayoutForSpatialDim && + ValidConvOutputLayoutForSpatialDim) +struct ConvTensorLayouts +{ + static_assert(DIR == ConvDirection::FORWARD, "Only Forward convolution is supported."); + using ALayout = decltype(TensorLayoutToCK()); + using BLayout = decltype(TensorLayoutToCK()); + using ELayout = decltype(TensorLayoutToCK()); + using DsLayout = decltype(GetAuxiliaryTensorLayouts())::type; +}; + } // namespace ck_tile::builder::factory::internal diff --git a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_type.hpp b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_type.hpp index d8a8eb5da0..81de2140f2 100644 --- a/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_type.hpp +++ b/experimental/builder/include/ck_tile/builder/factory/helpers/conv_tensor_type.hpp @@ -6,82 +6,172 @@ #include "ck/utility/data_type.hpp" #include "ck_tile/builder/types.hpp" #include "ck_tile/builder/builder_utils.hpp" +#include "ck_tile/builder/conv_signature_concepts.hpp" namespace ck_tile::builder::factory::internal { -// Type mappings from builder convolution data type to CK tensor types. -template -struct ConvTensorTypes +template +struct DataTypeToCK { - // This will trigger if a specialization for the given DataType is not found. - // We should always catch this in an earlier validation check. - static_assert(sizeof(UnsupportedEnumValue) == 0, - "Internal error. Unsupported data type for convolution factory."); + // Catch unsupported data types at compile time + static_assert(sizeof(UnsupportedEnumValue
) == 0, "Unsupported data type conversion to CK."); }; template <> -struct ConvTensorTypes -{ - using ADataType = ck::half_t; - using AComputeType = ck::half_t; - using BDataType = ck::half_t; - using BComputeType = ck::half_t; - using CShuffleDataType = ck::half_t; - using DsDataTypes = ck::Tuple<>; - using AccDataType = float; - using EDataType = ck::half_t; +struct DataTypeToCK +{ + using type = ck::half_t; }; - template <> -struct ConvTensorTypes -{ - using ADataType = ck::bhalf_t; - using AComputeType = ck::bhalf_t; - using BDataType = ck::bhalf_t; - using BComputeType = ck::bhalf_t; - using CShuffleDataType = ck::bhalf_t; - using DsDataTypes = ck::Tuple<>; - using AccDataType = float; - using EDataType = ck::bhalf_t; +struct DataTypeToCK +{ + using type = ck::bhalf_t; }; - template <> -struct ConvTensorTypes -{ - using ADataType = float; - using AComputeType = float; - using BDataType = float; - using BComputeType = float; - using CShuffleDataType = float; - using DsDataTypes = ck::Tuple<>; - using AccDataType = float; - using EDataType = float; +struct DataTypeToCK +{ + using type = float; }; - template <> -struct ConvTensorTypes -{ - using ADataType = int8_t; - using AComputeType = int8_t; - using BDataType = int8_t; - using BComputeType = int8_t; - using CShuffleDataType = int8_t; - using DsDataTypes = ck::Tuple<>; - using AccDataType = int32_t; - using EDataType = int8_t; +struct DataTypeToCK +{ + using type = int32_t; +}; +template <> +struct DataTypeToCK +{ + using type = int8_t; }; - template <> -struct ConvTensorTypes -{ - using ADataType = ck::f8_t; - using AComputeType = ck::f8_t; - using BDataType = ck::f8_t; - using BComputeType = ck::f8_t; - using CShuffleDataType = ck::f8_t; - using DsDataTypes = ck::Tuple<>; - using AccDataType = float; - using EDataType = ck::f8_t; +struct DataTypeToCK +{ + using type = ck::f8_t; +}; + +struct CK_empty_tuple +{ + using type = ck::Tuple<>; +}; + +template +consteval auto ConvertDataTypeToCK() +{ + return DataTypeToCK
{}; +} + +template +consteval auto GetTensorDataAndComputeTypes() +{ + constexpr auto data_type = Config.data_type; + constexpr auto compute_type = Config.compute_type; + + if constexpr(data_type == DataType::UNDEFINDED && compute_type == DataType::UNDEFINDED) + { + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); + } + else if constexpr(data_type == DataType::UNDEFINDED) + { + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); + } + else if constexpr(compute_type == DataType::UNDEFINDED) + { + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); + } + else + { + return std::make_pair(ConvertDataTypeToCK(), + ConvertDataTypeToCK()); + } +} + +template +consteval auto GetTensorAccumulationType() +{ + constexpr auto data_type = SignatureAccDataType; + if constexpr(data_type == DataType::UNDEFINDED) + { + return ConvertDataTypeToCK(); + } + else + { + return ConvertDataTypeToCK(); + } +} + +template +consteval auto GetAuxiliaryTensorDataTypeValue() +{ + constexpr auto data_type = Config.data_type; + if constexpr(data_type == DataType::UNDEFINDED) + { + return ConvertDataTypeToCK(); + } + else + { + return ConvertDataTypeToCK(); + } +} + +template +consteval auto GetAuxiliaryTensorDataTypeTuple(std::index_sequence) +{ + return ck::Tuple< + typename decltype(GetAuxiliaryTensorDataTypeValue())::type...>{}; +} + +template +struct AuxiliaryTensorDataTypes +{ + static constexpr auto Size = AuxiliaryTensorConfigsValue.size(); + using type = + decltype(GetAuxiliaryTensorDataTypeTuple( + std::make_index_sequence{})); +}; + +// TODO: Currently only the ouput tensor can have auxiliary tensors (e.g., bias). +template + requires(HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorDataTypes() +{ + return AuxiliaryTensorDataTypes{}; +} + +template + requires(!HasElementwiseOpWithAuxiliaryOperands) +consteval auto GetAuxiliaryTensorDataTypes() +{ + return CK_empty_tuple{}; +} + +template +struct FwdConvTensorDataTypes +{ + static constexpr auto input_types = + GetTensorDataAndComputeTypes(); + static constexpr auto weight_types = + GetTensorDataAndComputeTypes(); + static constexpr auto output_types = + GetTensorDataAndComputeTypes(); + + using ADataType = typename decltype(input_types.first)::type; + using AComputeType = typename decltype(input_types.second)::type; + using BDataType = typename decltype(weight_types.first)::type; + using BComputeType = typename decltype(weight_types.second)::type; + using AccDataType = + typename decltype(GetTensorAccumulationType())::type; + using EDataType = typename decltype(output_types.first)::type; + + // This is the "compute" type for output. + using CShuffleDataType = typename decltype(output_types.second)::type; + + // Data types for the auxiliary tensors (e.g., bias). + using DsDataTypes = typename decltype(GetAuxiliaryTensorDataTypes())::type; }; } // namespace ck_tile::builder::factory::internal diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp index be3c208ba8..261c3f103d 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp @@ -41,8 +41,9 @@ struct ConvSignatureInfo { int spatial_dim; builder::ConvDirection direction; - std::variant - layout; + builder::TensorLayout input_layout; + builder::TensorLayout weight_layout; + builder::TensorLayout output_layout; builder::DataType data_type; builder::ElementwiseOperation input_element_op; builder::ElementwiseOperation weight_element_op; @@ -106,7 +107,9 @@ class ConvDescription : public Description f.writeLine(0, signature_.spatial_dim, "D ", signature_.direction, " Convolution Kernel"); f.writeLine(1, "Signature"); f.writeLine(2, "Tensor Type: ", signature_.data_type); - f.writeLine(2, "Memory Layout: ", signature_.layout); + f.writeLine(2, "Input Layout: ", signature_.input_layout); + f.writeLine(2, "Weight Layout: ", signature_.weight_layout); + f.writeLine(2, "Output Layout: ", signature_.output_layout); f.writeLine(2, "Input elementwise operation: ", signature_.input_element_op); f.writeLine(2, "Weights elementwise operation: ", signature_.weight_element_op); f.writeLast(2, "Output elementwise operation: ", signature_.output_element_op); @@ -264,7 +267,9 @@ conv::ConvDescription describe() conv::ConvSignatureInfo{ .spatial_dim = Traits::spatial_dim, .direction = Traits::direction, - .layout = Traits::layout, + .input_layout = Traits::layout[0], + .weight_layout = Traits::layout[1], + .output_layout = Traits::layout[2], .data_type = Traits::data_type, .input_element_op = Traits::input_element_op, .weight_element_op = Traits::weight_element_op, diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp index 316f570bcd..29ac49e549 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_traits.hpp @@ -298,7 +298,10 @@ constexpr auto conv_spec() /// @brief Derives the grouped convolution layout from a device kernel `Instance` type. /// @tparam Instance The device kernel instance type. -/// @return A `builder::GroupConvLayout{1D|2D|3D}` enum value corresponding to the tensor layouts. +/// @return An std::array corresponding to the tensor layouts: +/// index 0 -> Input layout +/// index 1 -> Weight layout +/// index 2 -> Output layout template constexpr auto conv_layout() { @@ -314,22 +317,30 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout1D::GNWC_GKXC_GNWK; + return std::array{builder::TensorLayout::GNWC, + builder::TensorLayout::GKXC, + builder::TensorLayout::GNWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout1D::NWGC_GKXC_NWGK; + return std::array{builder::TensorLayout::NWGC, + builder::TensorLayout::GKXC, + builder::TensorLayout::NWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout1D::NGCW_GKXC_NGKW; + return std::array{builder::TensorLayout::NGCW, + builder::TensorLayout::GKXC, + builder::TensorLayout::NGKW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout1D::NGCW_GKCX_NGKW; + return std::array{builder::TensorLayout::NGCW, + builder::TensorLayout::GKCX, + builder::TensorLayout::NGKW}; } } else if constexpr(InstTraits::kSpatialDim == 2) @@ -337,25 +348,33 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout2D::GNHWC_GKYXC_GNHWK; + return std::array{builder::TensorLayout::GNHWC, + builder::TensorLayout::GKYXC, + builder::TensorLayout::GNHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout2D::NHWGC_GKYXC_NHWGK; + return std::array{builder::TensorLayout::NHWGC, + builder::TensorLayout::GKYXC, + builder::TensorLayout::NHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout2D::NGCHW_GKYXC_NGKHW; + return std::array{builder::TensorLayout::NGCHW, + builder::TensorLayout::GKYXC, + builder::TensorLayout::NGKHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout2D::NGCHW_GKCYX_NGKHW; + return std::array{builder::TensorLayout::NGCHW, + builder::TensorLayout::GKCYX, + builder::TensorLayout::NGKHW}; } } else if constexpr(InstTraits::kSpatialDim == 3) @@ -363,25 +382,33 @@ constexpr auto conv_layout() if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout3D::GNDHWC_GKZYXC_GNDHWK; + return std::array{builder::TensorLayout::GNDHWC, + builder::TensorLayout::GKZYXC, + builder::TensorLayout::GNDHWK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout3D::NDHWGC_GKZYXC_NDHWGK; + return std::array{builder::TensorLayout::NDHWGC, + builder::TensorLayout::GKZYXC, + builder::TensorLayout::NDHWGK}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout3D::NGCDHW_GKZYXC_NGKDHW; + return std::array{builder::TensorLayout::NGCDHW, + builder::TensorLayout::GKZYXC, + builder::TensorLayout::NGKDHW}; } else if constexpr(std::is_same_v && std::is_same_v && std::is_same_v) { - return builder::GroupConvLayout3D::NGCDHW_GKCZYX_NGKDHW; + return std::array{builder::TensorLayout::NGCDHW, + builder::TensorLayout::GKCZYX, + builder::TensorLayout::NGKDHW}; } } } @@ -433,22 +460,10 @@ template constexpr builder::ElementwiseOperation elementwise_op() { constexpr std::string_view name = detail::elementwise_op_name(); - if constexpr(detail::case_insensitive_equal(name, "Bias")) - { - return builder::ElementwiseOperation::BIAS; - } - else if constexpr(detail::case_insensitive_equal(name, "BiasClamp")) - { - return builder::ElementwiseOperation::BIAS_CLAMP; - } - else if constexpr(detail::case_insensitive_equal(name, "BiasBnormClamp")) + if constexpr(detail::case_insensitive_equal(name, "BiasBnormClamp")) { return builder::ElementwiseOperation::BIAS_BNORM_CLAMP; } - else if constexpr(detail::case_insensitive_equal(name, "Bilinear")) - { - return builder::ElementwiseOperation::BILINEAR; - } else if constexpr(detail::case_insensitive_equal(name, "Clamp")) { return builder::ElementwiseOperation::CLAMP; @@ -461,6 +476,10 @@ constexpr builder::ElementwiseOperation elementwise_op() { return builder::ElementwiseOperation::PASS_THROUGH; } + else if constexpr(detail::case_insensitive_equal(name, "ScaleAddScaleAddRelu")) + { + return builder::ElementwiseOperation::SCALEADD_SCALEADD_RELU; + } } /// @brief Derives a gemm padding from a kernel instance type. diff --git a/experimental/builder/include/ck_tile/builder/types.hpp b/experimental/builder/include/ck_tile/builder/types.hpp index 1aeb71af10..565bb98528 100644 --- a/experimental/builder/include/ck_tile/builder/types.hpp +++ b/experimental/builder/include/ck_tile/builder/types.hpp @@ -6,64 +6,91 @@ #include #include #include +#include +#include namespace ck_tile::builder { enum class DataType { + UNDEFINDED = 0, FP32, FP16, BF16, FP8, + INT32, I8, U8 }; -// Memory layouts for 1D convolution tensors. -// G: Group, N: Batch, K: Output Channel, C: Input Channel, W: Width -// Enum defines Input, Weight, and Output tensor layouts respectively. -enum class GroupConvLayout1D +enum class TensorLayout { - GNWC_GKXC_GNWK, - NWGC_GKXC_NWGK, - NGCW_GKXC_NGKW, - NGCW_GKCX_NGKW -}; - -// Memory layouts for 2D convolution tensors. -// G: Group, N: Batch, K: Output Channel, C: Input Channel, Y: Height, X: Width, H: Height -// Enum defines Input, Weight, and Output tensor layouts respectively. -enum class GroupConvLayout2D -{ - GNHWC_GKYXC_GNHWK, - NHWGC_GKYXC_NHWGK, - NGCHW_GKYXC_NGKHW, - NGCHW_GKCYX_NGKHW -}; - -// Memory layouts for 3D convolution tensors. -// G: Group, N: Batch, K: Output Channel, C: Input Channel, Z: Depth, Y: Height, X: Width, D: Depth, -// H: Height Enum defines Input, Weight, and Output tensor layouts respectively. -enum class GroupConvLayout3D -{ - GNDHWC_GKZYXC_GNDHWK, - NDHWGC_GKZYXC_NDHWGK, - NGCDHW_GKZYXC_NGKDHW, - NGCDHW_GKCZYX_NGKDHW, -}; - -struct GroupConvLayout -{ - union - { - GroupConvLayout1D _1d; - GroupConvLayout2D _2d; - GroupConvLayout3D _3d; - }; - - constexpr GroupConvLayout(GroupConvLayout1D layout) : _1d(layout) {} - constexpr GroupConvLayout(GroupConvLayout2D layout) : _2d(layout) {} - constexpr GroupConvLayout(GroupConvLayout3D layout) : _3d(layout) {} + UNDEFINED, + + // Bias tensors + GC, + G_C_strided, + G_K_strided, + + // 1D conv input tensor + GNCW, + GNWC, + NWGC, + NGCW, + G_NW_C_strided, + + // 2D conv input tensor + GNCHW, + GNHWC, + NHWGC, + NGCHW, + G_NHW_C_strided, + + // 3D conv input tensor + GNCDHW, + GNDHWC, + NDHWGC, + NGCDHW, + G_NDHW_C_strided, + + // 1D conv weight tensor + GKXC, + GKCX, + KXGC, + G_K_X_C_strided, + + // 2D conv weight tensor + GKYXC, + GKCYX, + KYXGC, + G_K_YX_C_strided, + + // 3D conv weight tensor + GKZYXC, + GKCZYX, + KZYXGC, + G_K_ZYX_C_strided, + + // 1D conv output tensor + GNKW, + GNWK, + NWGK, + NGKW, + G_NW_K_strided, + + // 2D conv output tensor + GNKHW, + GNHWK, + NHWGK, + NGKHW, + G_NHW_K_strided, + + // 3D conv output tensor + GNKDHW, + GNDHWK, + NDHWGK, + NGKDHW, + G_NDHW_K_strided }; // Direction of the convolution operation. @@ -77,13 +104,11 @@ enum class ConvDirection // Fused element-wise operations. enum class ElementwiseOperation { - BIAS, - BIAS_CLAMP, BIAS_BNORM_CLAMP, - BILINEAR, - CLAMP, SCALE, - PASS_THROUGH + CLAMP, + PASS_THROUGH, + SCALEADD_SCALEADD_RELU }; // Enums for pipeline versions & schedulers @@ -188,8 +213,10 @@ inline std::ostream& operator<<(std::ostream& os, DataType dt) case FP32: return os << "FP32"; case BF16: return os << "BF16"; case FP8: return os << "FP8"; + case INT32: return os << "INT32"; case I8: return os << "I8"; case U8: return os << "U8"; + case UNDEFINDED: return os << "UNDEFINDED"; default: return os << "Unknown"; } } @@ -206,57 +233,16 @@ inline std::ostream& operator<<(std::ostream& os, ConvDirection dir) } } -inline std::ostream& operator<<(std::ostream& os, GroupConvLayout1D layout) -{ - using enum GroupConvLayout1D; - switch(layout) - { - case GNWC_GKXC_GNWK: return os << "GNWC_GKXC_GNWK"; - case NWGC_GKXC_NWGK: return os << "NWGC_GKXC_NWGK"; - case NGCW_GKXC_NGKW: return os << "NGCW_GKXC_NGKW"; - case NGCW_GKCX_NGKW: return os << "NGCW_GKCX_NGKW"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, GroupConvLayout2D layout) -{ - using enum GroupConvLayout2D; - switch(layout) - { - case GNHWC_GKYXC_GNHWK: return os << "GNHWC_GKYXC_GNHWK"; - case NHWGC_GKYXC_NHWGK: return os << "NHWGC_GKYXC_NHWGK"; - case NGCHW_GKYXC_NGKHW: return os << "NGCHW_GKYXC_NGKHW"; - case NGCHW_GKCYX_NGKHW: return os << "NGCHW_GKCYX_NGKHW"; - default: return os << "Unknown"; - } -} - -inline std::ostream& operator<<(std::ostream& os, GroupConvLayout3D layout) -{ - using enum GroupConvLayout3D; - switch(layout) - { - case GNDHWC_GKZYXC_GNDHWK: return os << "GNDHWC_GKZYXC_GNDHWK"; - case NDHWGC_GKZYXC_NDHWGK: return os << "NDHWGC_GKZYXC_NDHWGK"; - case NGCDHW_GKZYXC_NGKDHW: return os << "NGCDHW_GKZYXC_NGKDHW"; - case NGCDHW_GKCZYX_NGKDHW: return os << "NGCDHW_GKCZYX_NGKDHW"; - default: return os << "Unknown"; - } -} - inline std::ostream& operator<<(std::ostream& os, ElementwiseOperation op) { using enum ElementwiseOperation; switch(op) { - case BIAS: return os << "BIAS"; - case BIAS_CLAMP: return os << "BIAS_CLAMP"; - case BIAS_BNORM_CLAMP: return os << "BIAS_BNORM_CLAMP"; - case BILINEAR: return os << "BILINEAR"; case CLAMP: return os << "CLAMP"; case SCALE: return os << "SCALE"; case PASS_THROUGH: return os << "PASS_THROUGH"; + case BIAS_BNORM_CLAMP: return os << "BIAS_BNORM_CLAMP"; + case SCALEADD_SCALEADD_RELU: return os << "SCALEADD_SCALEADD_RELU"; default: return os << "Unknown"; } } @@ -375,13 +361,59 @@ inline std::ostream& operator<<(std::ostream& os, PipelineScheduler sched) } } -// ostream operator overload for std::variant of layout types -inline std::ostream& -operator<<(std::ostream& os, - const std::variant& layout) +inline std::ostream& operator<<(std::ostream& os, TensorLayout layout) { - std::visit([&os](const auto& l) { os << l; }, layout); - return os; + using enum TensorLayout; + switch(layout) + { + case GNCW: return os << "GNCW"; + case GNWC: return os << "GNWC"; + case NWGC: return os << "NWGC"; + case NGCW: return os << "NGCW"; + case G_NW_C_strided: return os << "G_NW_C_strided"; + case GNCHW: return os << "GNCHW"; + case GNHWC: return os << "GNHWC"; + case NHWGC: return os << "NHWGC"; + case NGCHW: return os << "NGCHW"; + case G_NHW_C_strided: return os << "G_NHW_C_strided"; + case GNCDHW: return os << "GNCDHW"; + case GNDHWC: return os << "GNDHWC"; + case NDHWGC: return os << "NDHWGC"; + case NGCDHW: return os << "NGCDHW"; + case G_NDHW_C_strided: return os << "G_NDHW_C_strided"; + case GKXC: return os << "GKXC"; + case GKCX: return os << "GKCX"; + case KXGC: return os << "KXGC"; + case G_K_X_C_strided: return os << "G_K_X_C_strided"; + case GKYXC: return os << "GKYXC"; + case GKCYX: return os << "GKCYX"; + case KYXGC: return os << "KYXGC"; + case G_K_YX_C_strided: return os << "G_K_YX_C_strided"; + case GKZYXC: return os << "GKZYXC"; + case GKCZYX: return os << "GKCZYX"; + case KZYXGC: return os << "KZYXGC"; + case G_K_ZYX_C_strided: return os << "G_K_ZYX_C_strided"; + case GNKW: return os << "GNKW"; + case GNWK: return os << "GNWK"; + case NWGK: return os << "NWGK"; + case NGKW: return os << "NGKW"; + case G_NW_K_strided: return os << "G_NW_K_strided"; + case GNKHW: return os << "GNKHW"; + case GNHWK: return os << "GNHWK"; + case NHWGK: return os << "NHWGK"; + case NGKHW: return os << "NGKHW"; + case G_NHW_K_strided: return os << "G_NHW_K_strided"; + case GNKDHW: return os << "GNKDHW"; + case GNDHWK: return os << "GNDHWK"; + case NDHWGK: return os << "NDHWGK"; + case NGKDHW: return os << "NGKDHW"; + case G_NDHW_K_strided: return os << "G_NDHW_K_strided"; + case GC: return os << "GC"; + case G_C_strided: return os << "G_C_strided"; + case G_K_strided: return os << "G_K_strided"; + case UNDEFINED: return os << "UNDEFINED"; + default: return os << "Unknown"; + } } // ostream operator overload for std::variant of convolution specializations diff --git a/experimental/builder/test/CMakeLists.txt b/experimental/builder/test/CMakeLists.txt index e43c88c7a7..a340a789de 100644 --- a/experimental/builder/test/CMakeLists.txt +++ b/experimental/builder/test/CMakeLists.txt @@ -119,6 +119,7 @@ add_ck_builder_test(test_ckb_instance_string # Tests the forward convolution builder across multiple data types and dimensions. # Individual tests are split into separate files to enable parallel compilation. add_ck_builder_test(test_ckb_build_fwd_instances + conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp conv/test_ckb_conv_fwd_1d_fp16.cpp conv/test_ckb_conv_fwd_1d_bf16.cpp conv/test_ckb_conv_fwd_1d_i8.cpp diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp index 1cace0cf9a..937d17a1ff 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_bf16.cpp @@ -13,11 +13,15 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_1D_BF16_ChannelsFirst_scale) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout1D::NGCW_GKXC_NGKW, - .data_type = DataType::BF16, - .elementwise_operation = ElementwiseOperation::SCALE}; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCW}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NGKW}, + .operation = {.elementwise_operation = ElementwiseOperation::SCALE}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -30,10 +34,13 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", + "256,256,256,32", + "NGCW,GKXC,EmptyTuple,NGKW", + "PassThrough,PassThrough,Scale", "Filter1x1Stride1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v2"}); + "MNKPadding", + "Intrawave", + "v2"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp index 3315eb6f64..e8cd8fb136 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_fp16.cpp @@ -10,14 +10,15 @@ using namespace ck_tile::builder::test_utils; // 1D FP16 (channels-last) with DEFAULT specialization TEST(FwdConvInstances, - Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_1D_FP16_ChannelsFirst_scale) + Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_1D_FP16_ChannelsFirst) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout1D::NWGC_GKXC_NWGK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NWGC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} @@ -28,8 +29,12 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 2, PipelineScheduler::DEFAULT); using Builder = ConvBuilder; - run_test( - {"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", "64, 64, 32, 32", "Default"}); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "NWGC,GKXC,EmptyTuple,NWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding", + "64,64,32,32", + "Default"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp index f6b18747b7..014e221101 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_1d_i8.cpp @@ -14,12 +14,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle_Instance_1D_FP32_ChannelsFirst_scale) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout1D::GNWC_GKXC_GNWK, - .data_type = DataType::I8, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 1, + .direction = ConvDirection::FORWARD, + .data_type = DataType::I8, + .accumulation_data_type = DataType::INT32, + .input = {.config = {.layout = TensorLayout::GNWC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::GNWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Wmma_CShuffle{} @@ -30,8 +31,11 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 0, PipelineScheduler::DEFAULT); using Builder = ConvBuilder; - run_test( - {"DeviceGroupedConvFwdMultipleD_Wmma_CShuffle", "128, 64, 64, 64", "Default"}); + run_test({"DeviceGroupedConvFwdMultipleD_Wmma_CShuffle", + "128,64,64,64", + "GNWC,GKXC,EmptyTuple,GNWK", + "PassThrough,PassThrough,PassThrough", + "Default"}); } #endif diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp index e0dc3225fa..b98e28c45a 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16.cpp @@ -12,12 +12,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_BF16_ChannelsLast) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::NHWGC_GKYXC_NHWGK, - .data_type = DataType::BF16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -29,22 +30,26 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", + "256,256,256,32", "Default", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v1"}); + "NHWGC,GKYXC,EmptyTuple,NHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding", + "Intrawave", + "v1"}); } // 2D BF16 NHWGC (channels-last) with Pipeline V5 and FILTER_3x3 TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_BF16_NHWGC_Filter3x3) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::NHWGC_GKYXC_NHWGK, - .data_type = DataType::BF16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -57,7 +62,10 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", "Filter3x3", - "BlkGemmPipelineVersion: v5"}); + "NHWGC,GKYXC,EmptyTuple,NHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding", + "v5"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp new file mode 100644 index 0000000000..bc4a5e1047 --- /dev/null +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_bf16_scaleadd_relu.cpp @@ -0,0 +1,46 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "utils/ckb_conv_test_configs.hpp" +#include "utils/ckb_conv_test_utils.hpp" + +namespace { + +using namespace ck_tile::builder; +using namespace ck_tile::builder::test_utils; + +TEST(FwdConvInstances, + Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_BF16_scale_add_relu) +{ + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC, .data_type = DataType::BF16}}, + .output = ConvolutionTensor{ + .config = {.layout = TensorLayout::NHWGK}, + .operation = TensorOperation<>{.elementwise_operation = + ElementwiseOperation::SCALEADD_SCALEADD_RELU} + .with_auxiliary_operand_configs()}}; + + constexpr auto FwdConvAlgorithm = + ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} + .with_thread_block(FwdThreadBlock_64_64x32x32) + .with_gemm_config(FwdGemmParams_Xdl_2x2_per_wave) + .with_transfer(FwdTransfer_4x16x1) + .with_specializations(ConvFwdSpecialization::DEFAULT, GemmSpecialization::MNKPadding) + .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT); + + using Builder = ConvBuilder; + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "NHWGC,GKYXC,Tuple(NHWGK,G_K),NHWGK", + "PassThrough,PassThrough,ScaleAddScaleAddRelu", + "64,64,32,32", + "MNKPadding", + "Default"}); +} + +} // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp index 4c4d128717..7af1448403 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_dl_fp16.cpp @@ -10,12 +10,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} @@ -26,19 +27,24 @@ TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Ins .with_dl_transfer(DlFwdTransfer); using Builder = ConvBuilder; - run_test( - {"DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", "256, 128, 128, 16", "Default"}); + run_test({"DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", + "256,128,128,16", + "Default", + "MNKPadding", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough"}); } TEST(FwdConvInstances, Create_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK_Instance_2D_FP16_FILTER_1X1_PAD0) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK{} @@ -50,8 +56,12 @@ TEST(FwdConvInstances, .with_dl_transfer(DlFwdTransfer); using Builder = ConvBuilder; - run_test( - {"DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", "256, 128, 128, 16", "Filter1x1Pad0"}); + run_test({"DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK", + "256,128,128,16", + "Filter1x1Pad0", + "MNKPadding", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp index 36b44ffb41..7b522403d3 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp16.cpp @@ -11,12 +11,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -29,10 +30,13 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", + "256,256,256,32", "Filter1x1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v3"}); + "Intrawave", + "v3", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp index b2943d91b9..615d098c7c 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp32.cpp @@ -11,12 +11,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_2D_FP32_NGCHW_GKCYX) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::NGCHW_GKCYX_NGKHW, - .data_type = DataType::FP32, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKCYX}}, + .output = {.config = {.layout = TensorLayout::NGKHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -29,10 +30,13 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 128, 128, 32", + "256,128,128,32", "Filter1x1Stride1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v4"}); + "Intrawave", + "v4", + "NGCHW,GKCYX,EmptyTuple,NGKHW", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp index d24df998fd..4dd9e2beef 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_fp8.cpp @@ -12,12 +12,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_Instance_2D_FP8_ChannelsLast) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::NHWGC_GKYXC_NHWGK, - .data_type = DataType::FP8, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP8, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle{} @@ -28,8 +29,12 @@ TEST(FwdConvInstances, .with_prefetch_config(1, 1, PipelineScheduler::DEFAULT); using Builder = ConvBuilder; - run_test( - {"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", "256, 256, 128, 32", "Default"}); + run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle", + "256,256,128,32", + "Default", + "NHWGC,GKYXC,EmptyTuple,NHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp index be0ea3d0a5..8fe58dbe82 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_2d_large_tensor_fp16.cpp @@ -11,12 +11,13 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor_Instance_2D_FP16_GNHWC) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ @@ -30,20 +31,24 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", - "256, 256, 128, 32", - "Default"}); + "256,256,128,32", + "Default", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } TEST( FwdConvInstances, Create_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor_Instance_2D_FP16_GNHWC_Filter1x1Pad0) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout2D::GNHWC_GKYXC_GNHWK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor{ @@ -57,8 +62,11 @@ TEST( using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor", - "128, 128, 128, 32", - "Filter1x1Pad0"}); + "128,128,128,32", + "Filter1x1Pad0", + "GNHWC,GKYXC,EmptyTuple,GNHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp index 0db89669f7..2df76ab3e0 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_bf16.cpp @@ -12,12 +12,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_3D_BF16_GNDHWC) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 3, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout3D::GNDHWC_GKZYXC_GNDHWK, - .data_type = DataType::BF16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 3, + .direction = ConvDirection::FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNDHWC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::GNDHWK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -29,10 +31,13 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", + "256,256,256,32", "Default", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v3"}); + "Intrawave", + "v3", + "GNDHWC,GKZYXC,EmptyTuple,GNDHWK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp index 80e12f9572..ad626d9a15 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp16.cpp @@ -12,12 +12,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_3D_FP16_NDHWGC_ChannelsLast) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 3, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout3D::NDHWGC_GKZYXC_NDHWGK, - .data_type = DataType::FP16, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 3, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NDHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::NDHWGK}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -30,10 +32,13 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 128, 128, 32", + "256,128,128,32", "Filter1x1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v4"}); + "Intrawave", + "v4", + "NDHWGC,GKZYXC,EmptyTuple,NDHWGK", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp index bfddd6efcb..85974ace5d 100644 --- a/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp +++ b/experimental/builder/test/conv/test_ckb_conv_fwd_3d_fp32.cpp @@ -12,12 +12,14 @@ using namespace ck_tile::builder::test_utils; TEST(FwdConvInstances, Create_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_Instance_3D_FP32_ChannelsFirst) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 3, - .direction = ConvDirection::FORWARD, - .layout = GroupConvLayout3D::NGCDHW_GKCZYX_NGKDHW, - .data_type = DataType::FP32, - .elementwise_operation = - ElementwiseOperation::PASS_THROUGH}; + constexpr ConvSignature FwdConvSignature{ + .spatial_dim = 3, + .direction = ConvDirection::FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCDHW}}, + .weight = {.config = {.layout = TensorLayout::GKCZYX}}, + .output = {.config = {.layout = TensorLayout::NGKDHW}}}; constexpr auto FwdConvAlgorithm = ConvAlgorithm_DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3{} @@ -30,10 +32,13 @@ TEST(FwdConvInstances, using Builder = ConvBuilder; run_test({"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3", - "256, 256, 256, 32", + "256,256,256,32", "Filter1x1Pad0", - "BlkGemmPipelineScheduler: Intrawave", - "BlkGemmPipelineVersion: v1"}); + "Intrawave", + "v1", + "NGCDHW,GKCZYX,EmptyTuple,NGKDHW", + "PassThrough,PassThrough,PassThrough", + "MNKPadding"}); } } // namespace diff --git a/experimental/builder/test/conv/test_conv_traits.cpp b/experimental/builder/test/conv/test_conv_traits.cpp index 3684ae1c86..a6a7694703 100644 --- a/experimental/builder/test/conv/test_conv_traits.cpp +++ b/experimental/builder/test/conv/test_conv_traits.cpp @@ -85,7 +85,10 @@ TEST_F(ConvTraitsTest, ConvFwdTraitsExtraction) // Verify signature information EXPECT_EQ(Traits::spatial_dim, 2); EXPECT_EQ(Traits::direction, ck_tile::builder::ConvDirection::FORWARD); - EXPECT_EQ(Traits::layout, ck_tile::builder::GroupConvLayout2D::GNHWC_GKYXC_GNHWK); + EXPECT_THAT(Traits::layout, + ::testing::ElementsAre(ck_tile::builder::TensorLayout::GNHWC, + ck_tile::builder::TensorLayout::GKYXC, + ck_tile::builder::TensorLayout::GNHWK)); EXPECT_EQ(Traits::data_type, ck_tile::builder::DataType::FP16); EXPECT_EQ(Traits::input_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); EXPECT_EQ(Traits::weight_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); @@ -212,7 +215,10 @@ TEST_F(ConvTraitsTest, ConvFwdBaseTraitsExtraction) // Verify signature information EXPECT_EQ(Traits::spatial_dim, 2); EXPECT_EQ(Traits::direction, ck_tile::builder::ConvDirection::FORWARD); - EXPECT_EQ(Traits::layout, ck_tile::builder::GroupConvLayout2D::GNHWC_GKYXC_GNHWK); + EXPECT_THAT(Traits::layout, + ::testing::ElementsAre(ck_tile::builder::TensorLayout::GNHWC, + ck_tile::builder::TensorLayout::GKYXC, + ck_tile::builder::TensorLayout::GNHWK)); EXPECT_EQ(Traits::data_type, ck_tile::builder::DataType::FP16); EXPECT_EQ(Traits::input_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); EXPECT_EQ(Traits::weight_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); @@ -295,7 +301,10 @@ TEST_F(ConvTraitsTest, ConvFwdLargeTensorTraitsExtraction) // Verify signature information EXPECT_EQ(Traits::spatial_dim, 2); EXPECT_EQ(Traits::direction, ck_tile::builder::ConvDirection::FORWARD); - EXPECT_EQ(Traits::layout, ck_tile::builder::GroupConvLayout2D::GNHWC_GKYXC_GNHWK); + EXPECT_THAT(Traits::layout, + ::testing::ElementsAre(ck_tile::builder::TensorLayout::GNHWC, + ck_tile::builder::TensorLayout::GKYXC, + ck_tile::builder::TensorLayout::GNHWK)); EXPECT_EQ(Traits::data_type, ck_tile::builder::DataType::FP16); EXPECT_EQ(Traits::input_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); EXPECT_EQ(Traits::weight_element_op, ck_tile::builder::ElementwiseOperation::PASS_THROUGH); diff --git a/experimental/builder/test/impl/conv_signature_types.hpp b/experimental/builder/test/impl/conv_signature_types.hpp index f18abb1c8d..ef87981c3d 100644 --- a/experimental/builder/test/impl/conv_signature_types.hpp +++ b/experimental/builder/test/impl/conv_signature_types.hpp @@ -10,14 +10,48 @@ namespace ck_tile::builder::test { using namespace ck_tile::builder; +struct TensorConfig +{ + TensorLayout layout; + // Optional data types, override the type defined in the signature if provided. + DataType data_type{DataType::UNDEFINDED}; + DataType compute_type{DataType::UNDEFINDED}; +}; + +template +struct TensorOperation +{ + ElementwiseOperation elementwise_operation{ElementwiseOperation::PASS_THROUGH}; + std::array auxiliary_operand_configs{Configs...}; + + // Add builder to add auxiliary tensor configs + template + constexpr auto with_auxiliary_operand_configs() const + { + return TensorOperation{ + .elementwise_operation = this->elementwise_operation}; + } +}; + +template > +struct ConvolutionTensor +{ + TensorConfig config; + Op operation{}; +}; + +template , + typename WeightTensor = ConvolutionTensor<>, + typename OutputTensor = ConvolutionTensor<>> struct ConvSignature { int spatial_dim; ConvDirection direction; - GroupConvLayout layout; DataType data_type; - ElementwiseOperation elementwise_operation; + DataType accumulation_data_type; + InputTensor input; + WeightTensor weight; + OutputTensor output; }; -static_assert(ConvSignatureDescriptor); } // namespace ck_tile::builder::test diff --git a/experimental/builder/test/test_conv_description.cpp b/experimental/builder/test/test_conv_description.cpp index 5480c2740a..689577fb3b 100644 --- a/experimental/builder/test/test_conv_description.cpp +++ b/experimental/builder/test/test_conv_description.cpp @@ -16,40 +16,79 @@ namespace ckb = ck_tile::builder; namespace ckr = ck_tile::reflect; namespace ckt = ck_tile::test; +struct TensorOp +{ + ckb::ElementwiseOperation elementwise_operation{ckb::ElementwiseOperation::PASS_THROUGH}; +}; + +struct InvalidTensorOp +{ + int elementwise_operation = 7; // invalid value +}; +static_assert(!ckb::TensorOperatorDescriptor); + +struct TensorConfig +{ + ckb::TensorLayout layout; + ckb::DataType data_type{ckb::DataType::UNDEFINDED}; + ckb::DataType compute_type{ckb::DataType::UNDEFINDED}; +}; + +struct ConvTensorSimple +{ + TensorConfig config; +}; + +struct ConvTensorWithOp +{ + TensorConfig config; + TensorOp operation{}; +}; + +struct ConvTensorWithInvalidOp +{ + TensorConfig config; + InvalidTensorOp operation{}; +}; + // Defines the signature of the convolution operation to be tested. // This includes dimensionality, direction, data layout, and data type. struct ConvSignature { - int spatial_dim = 2; - ckb::GroupConvLayout layout = ckb::GroupConvLayout2D::GNHWC_GKYXC_GNHWK; - ckb::DataType data_type = ckb::DataType::FP16; - // ckb::GroupConvDeviceOp device_operation = - // ckb::FwdGroupConvDeviceOperation::DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3; + int spatial_dim = 2; + ckb::DataType data_type = ckb::DataType::FP16; + ckb::DataType accumulation_data_type = ckb::DataType::FP32; + ConvTensorSimple input = {.config = {ckb::TensorLayout::GNHWC}}; + ConvTensorSimple weight = {.config = {ckb::TensorLayout::GKYXC}}; + ConvTensorSimple output = {.config = {ckb::TensorLayout::GNHWK}}; }; static_assert(ckb::ConvSignatureDescriptor); // Compile time tests for concepts struct ConvSignatureWithOptionalParams { - int spatial_dim = 2; - ckb::ConvDirection direction = ckb::ConvDirection::FORWARD; - ckb::GroupConvLayout layout = ckb::GroupConvLayout2D::GNHWC_GKYXC_GNHWK; - ckb::DataType data_type = ckb::DataType::FP16; - ckb::ElementwiseOperation elementwise_operation = ckb::ElementwiseOperation::PASS_THROUGH; + int spatial_dim = 2; + ckb::DataType data_type = ckb::DataType::FP16; + ckb::DataType accumulation_data_type = ckb::DataType::FP32; + ckb::ConvDirection direction = ckb::ConvDirection::FORWARD; + ConvTensorWithOp input = { + .config = {ckb::TensorLayout::GNHWC, ckb::DataType::FP16}, + }; + ConvTensorWithOp weight = {.config = {ckb::TensorLayout::GKYXC, ckb::DataType::FP16}}; + ConvTensorWithOp output = {.config = {ckb::TensorLayout::GNHWK, ckb::DataType::FP16}, + .operation = {ckb::ElementwiseOperation::SCALE}}; }; static_assert(ckb::ConvSignatureDescriptor); struct ConvSignatureWithInvalidOptionalParams { - int spatial_dim = 2; - ckb::ConvDirection direction = ckb::ConvDirection::FORWARD; - ckb::GroupConvLayout layout = ckb::GroupConvLayout2D::GNHWC_GKYXC_GNHWK; - ckb::DataType data_type = ckb::DataType::FP16; - int elementwise_operation = 7; // this should fail - // ckb::GroupConvDeviceOp device_operation = - // ckb::FwdGroupConvDeviceOperation::DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3; + int spatial_dim = 2; + ckb::DataType data_type = ckb::DataType::FP16; + ckb::DataType accumulation_data_type = ckb::DataType::FP32; + ConvTensorWithInvalidOp input = {.config = {ckb::TensorLayout::GNHWC}}; + ConvTensorWithInvalidOp weight = {.config = {ckb::TensorLayout::GKYXC}}; + ConvTensorWithInvalidOp output = {.config = {ckb::TensorLayout::GNHWK}}; }; - static_assert(!ckb::ConvSignatureDescriptor); struct DefaultAlgorithm @@ -123,7 +162,9 @@ TEST(ConvDescriptionTest, DefaultInstanceHasDetailedDescription) "2D Forward Convolution Kernel\n" "├─ Signature\n" "│ ├─ Tensor Type: FP16\n" - "│ ├─ Memory Layout: GNHWC_GKYXC_GNHWK\n" + "│ ├─ Input Layout: GNHWC\n" + "│ ├─ Weight Layout: GKYXC\n" + "│ ├─ Output Layout: GNHWK\n" "│ ├─ Input elementwise operation: PASS_THROUGH\n" "│ ├─ Weights elementwise operation: PASS_THROUGH\n" "│ └─ Output elementwise operation: PASS_THROUGH\n" diff --git a/experimental/builder/test/unit_conv_elementwise_op.cpp b/experimental/builder/test/unit_conv_elementwise_op.cpp index 66593bf802..84a9c533f6 100644 --- a/experimental/builder/test/unit_conv_elementwise_op.cpp +++ b/experimental/builder/test/unit_conv_elementwise_op.cpp @@ -8,30 +8,38 @@ namespace { -using ::ck_tile::builder::factory::internal::ElementwiseOps; -using enum ::ck_tile::builder::ElementwiseOperation; +using ::ck_tile::builder::ElementwiseOperation; +using ::ck_tile::builder::factory::internal::ElementwiseOpToCK; TEST(ConvElementwiseOp, AssignsOpsForPassThrough) { - using Ops = ElementwiseOps; - - EXPECT_TRUE( - (std::is_same_v)); - EXPECT_TRUE( - (std::is_same_v)); - EXPECT_TRUE( - (std::is_same_v)); + using Op = ElementwiseOpToCK::Op; + EXPECT_TRUE((std::is_same_v)); } TEST(ConvElementwiseOp, AssignsOpsForScale) { - using Ops = ElementwiseOps; + using Op = ElementwiseOpToCK::Op; + EXPECT_TRUE((std::is_same_v)); +} +TEST(ConvElementwiseOp, AssignsOpsForClamp) +{ + using Op = ElementwiseOpToCK::Op; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvElementwiseOp, AssignsOpsForScaleAddScaleAddRelu) +{ + using Op = ElementwiseOpToCK::Op; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvElementwiseOp, AssignsOpsForBiasNormClamp) +{ + using Op = ElementwiseOpToCK::Op; EXPECT_TRUE( - (std::is_same_v)); - EXPECT_TRUE( - (std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); + (std::is_same_v)); } } // namespace diff --git a/experimental/builder/test/unit_conv_tensor_layout.cpp b/experimental/builder/test/unit_conv_tensor_layout.cpp index 6cdcc429dd..7764e94dc6 100644 --- a/experimental/builder/test/unit_conv_tensor_layout.cpp +++ b/experimental/builder/test/unit_conv_tensor_layout.cpp @@ -4,116 +4,481 @@ #include #include -// Include the helper file we're testing #include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp" +#include "impl/conv_signature_types.hpp" namespace { namespace ckb = ::ck_tile::builder; +using ::ck_tile::builder::DataType; +using ::ck_tile::builder::ElementwiseOperation; +using ::ck_tile::builder::TensorLayout; +using ::ck_tile::builder::factory::internal::AuxiliaryTensorLayouts; using ::ck_tile::builder::factory::internal::ConvTensorLayouts; -using ::ck_tile::builder::factory::internal::GetTensorLayout; +using ::ck_tile::builder::factory::internal::LayoutToCK; + +using namespace ::ck_tile::builder::test; using enum ::ck_tile::builder::ConvDirection; TEST(ConvTensorLayout, AssignsLayoutsFor1D_NWGC_GKXC_NWGK) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NWGC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NWGK}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKXC_NGKW) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCW}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NGKW}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor1D_GNWC_GKXC_GNWK) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNWC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::GNWK}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKCX_NGKW) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCW}}, + .weight = {.config = {.layout = TensorLayout::GKCX}}, + .output = {.config = {.layout = TensorLayout::NGKW}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKYXC_NGKHW) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NGKHW}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor2D_NHWGC_GKYXC_NHWGK) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor2D_GNHWC_GKYXC_GNHWK) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKCYX_NGKHW) { - using TensorLayouts = ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKCYX}}, + .output = {.config = {.layout = TensorLayout::NGKHW}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor3D_NGCDHW_GKCZYX_NGKDHW) { - using TensorLayouts = - ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCDHW}}, + .weight = {.config = {.layout = TensorLayout::GKCZYX}}, + .output = {.config = {.layout = TensorLayout::NGKDHW}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor3D_NDHWGC_GKZYXC_NDHWGK) { - using TensorLayouts = - ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NDHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::NDHWGK}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); } TEST(ConvTensorLayout, AssignsLayoutsFor3D_GNDHWC_GKZYXC_GNDHWK) { - using TensorLayouts = - ConvTensorLayouts; + static constexpr auto sig = + ConvSignature<>{.spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNDHWC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::GNDHWK}}}; + + using TensorLayouts = ConvTensorLayouts; EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v>)); +} + +TEST(AuxiliaryTensorLayout, AssignsLayoutForG_K_strided) +{ + using CKLayout = LayoutToCK::type; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayout, AssignsLayoutForGC) +{ + using CKLayout = LayoutToCK::type; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayout, AssignsLayoutForG_C_strided) +{ + using CKLayout = LayoutToCK::type; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayout, EmptyAuxiliaryTensorLayoutIsEmptyTuple) +{ + using ::ck_tile::builder::factory::internal::EmptyAuxiliaryTensorLayout; + using EmptyLayout = EmptyAuxiliaryTensorLayout::type; + EXPECT_TRUE((std::is_same_v>)); +} + +struct MockAuxiliaryTensorConfig +{ + TensorLayout layout; +}; + +TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithG_K_Layout) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}}; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithGC_Layout) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC}}; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, SingleBiasTensorWithG_C_Layout) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_C_strided}}; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, TwoAuxiliaryTensors) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}, + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC}}; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 2); + using ExpectedType = + ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, ThreeAuxiliaryTensors) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}, + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC}, + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_C_strided}}; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 3); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, WorksWith1DConvolution) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::G_K_strided}}; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(AuxiliaryTensorLayoutIntegration, WorksWith3DConvolution) +{ + static constexpr std::array aux_configs = { + MockAuxiliaryTensorConfig{.layout = TensorLayout::GC}}; + + using AuxLayouts = AuxiliaryTensorLayouts; + + EXPECT_EQ(AuxLayouts::Size, 1); + using ExpectedType = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithSingleBiasG_K) +{ + using OutputOp = TensorOperation; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NGCHW}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NGKHW}, + .operation = + OutputOp{.elementwise_operation = ElementwiseOperation::SCALE}}}; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithSingleBiasGC) +{ + using OutputOp = TensorOperation; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::BF16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::NHWGK}, + .operation = + OutputOp{.elementwise_operation = ElementwiseOperation::SCALE}}}; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvTensorLayoutsWithAuxiliary, Conv2DWithTwoAuxiliaryTensors) +{ + using OutputOp = TensorOperation; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 2, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::GNHWC}}, + .weight = {.config = {.layout = TensorLayout::GKYXC}}, + .output = {.config = {.layout = TensorLayout::GNHWK}, + .operation = OutputOp{.elementwise_operation = + ElementwiseOperation::SCALEADD_SCALEADD_RELU}}}; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = + ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvTensorLayoutsWithAuxiliary, Conv1DWithBias) +{ + using OutputOp = TensorOperation; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 1, + .direction = FORWARD, + .data_type = DataType::FP32, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NWGC}}, + .weight = {.config = {.layout = TensorLayout::GKXC}}, + .output = {.config = {.layout = TensorLayout::NWGK}, + .operation = + OutputOp{.elementwise_operation = ElementwiseOperation::SCALE}}}; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); +} + +TEST(ConvTensorLayoutsWithAuxiliary, Conv3DWithBias) +{ + using OutputOp = TensorOperation; + + static constexpr auto sig = + ConvSignature, ConvolutionTensor<>, ConvolutionTensor>{ + .spatial_dim = 3, + .direction = FORWARD, + .data_type = DataType::FP16, + .accumulation_data_type = DataType::FP32, + .input = {.config = {.layout = TensorLayout::NDHWGC}}, + .weight = {.config = {.layout = TensorLayout::GKZYXC}}, + .output = {.config = {.layout = TensorLayout::NDHWGK}, + .operation = OutputOp{.elementwise_operation = + ElementwiseOperation::BIAS_BNORM_CLAMP}}}; + + using TensorLayouts = ConvTensorLayouts; + + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + EXPECT_TRUE((std::is_same_v)); + + using ExpectedDsLayout = ck::Tuple; + EXPECT_TRUE((std::is_same_v)); } } // namespace diff --git a/experimental/builder/test/unit_conv_tensor_type.cpp b/experimental/builder/test/unit_conv_tensor_type.cpp index 5aa82774da..c92b24626e 100644 --- a/experimental/builder/test/unit_conv_tensor_type.cpp +++ b/experimental/builder/test/unit_conv_tensor_type.cpp @@ -9,71 +9,42 @@ namespace { namespace ckb = ck_tile::builder; -using ck_tile::builder::factory::internal::ConvTensorTypes; +using ck_tile::builder::factory::internal::DataTypeToCK; TEST(ConvTensorType, AssignsTypesForFP16) { - using Types = ConvTensorTypes; - - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorType, AssignsTypesForBF16) { - using Types = ConvTensorTypes; - - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorType, AssignsTypesForFP32) { - using Types = ConvTensorTypes; + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); +} - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); +TEST(ConvTensorType, AssignsTypesForINT32) +{ + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorType, AssignsTypesForI8) { - using Types = ConvTensorTypes; - - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } TEST(ConvTensorType, AssignsTypesForFP8) { - using Types = ConvTensorTypes; - - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); - EXPECT_TRUE((std::is_same_v)); + using CKType = DataTypeToCK::type; + EXPECT_TRUE((std::is_same_v)); } } // namespace diff --git a/experimental/builder/test/utils/ckb_conv_test_configs.hpp b/experimental/builder/test/utils/ckb_conv_test_configs.hpp index 5436755608..403c2ffd79 100644 --- a/experimental/builder/test/utils/ckb_conv_test_configs.hpp +++ b/experimental/builder/test/utils/ckb_conv_test_configs.hpp @@ -178,6 +178,9 @@ constexpr GridwiseXdlGemm FwdGemmParams_Xdl_4x4_per_wave{ constexpr GridwiseXdlGemm FwdGemmParams_Xdl_4x2_per_wave{ .ak1 = 8, .bk1 = 8, .m_per_xdl = 32, .n_per_xdl = 32, .m_xdl_per_wave = 4, .n_xdl_per_wave = 2}; +constexpr GridwiseXdlGemm FwdGemmParams_Xdl_2x2_per_wave{ + .ak1 = 8, .bk1 = 8, .m_per_xdl = 32, .n_per_xdl = 32, .m_xdl_per_wave = 2, .n_xdl_per_wave = 2}; + constexpr GridwiseXdlGemm FwdGemmParams_Xdl_2x1_per_wave{ .ak1 = 8, .bk1 = 8, .m_per_xdl = 32, .n_per_xdl = 32, .m_xdl_per_wave = 2, .n_xdl_per_wave = 1}; diff --git a/experimental/builder/test/utils/ckb_conv_test_utils.hpp b/experimental/builder/test/utils/ckb_conv_test_utils.hpp index f3db734da8..508c621c2e 100644 --- a/experimental/builder/test/utils/ckb_conv_test_utils.hpp +++ b/experimental/builder/test/utils/ckb_conv_test_utils.hpp @@ -15,7 +15,7 @@ constexpr void run_test(const std::vector& kernel_instance_componen { auto instance = typename Builder::Instance{}; - const auto kernel_string = instance.GetTypeString(); + const auto kernel_string = instance.GetInstanceString(); std::cout << "Generated kernel: " << kernel_string << std::endl; EXPECT_GT(kernel_string.size(), 0);