ROCm
diff --git a/‎dnn-providers/ck-dsl-provider/src/adapters/conv_implicit_gemm/ConvImplicitGemmAdapter.cpp‎
Lines changed: 13 additions & 12 deletions b/‎dnn-providers/ck-dsl-provider/src/adapters/conv_implicit_gemm/ConvImplicitGemmAdapter.cpp‎
Lines changed: 13 additions & 12 deletions
diff --git a/‎dnn-providers/ck-dsl-provider/src/engines/conv_implicit_gemm/CkDslConvImplicitGemmEngine.hpp‎
Lines changed: 0 additions & 6 deletions b/‎dnn-providers/ck-dsl-provider/src/engines/conv_implicit_gemm/CkDslConvImplicitGemmEngine.hpp‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎dnn-providers/ck-dsl-provider/src/graph/GraphSignature.cpp‎
Lines changed: 44 additions & 112 deletions b/‎dnn-providers/ck-dsl-provider/src/graph/GraphSignature.cpp‎
Lines changed: 44 additions & 112 deletions
@@ -18,7 +18,7 @@ using ConvMode = hipdnn_flatbuffers_sdk::data_objects::ConvMode;
 using TensorAttributes = hipdnn_flatbuffers_sdk::data_objects::TensorAttributes;
 using TensorMap = ConvImplicitGemmAdapter::TensorMap;
 
-[[noreturn]] void badParam(const std::string& msg) {
+[[noreturn]] void throwBadParam(const std::string& msg) {
     throw hipdnn_plugin_sdk::HipdnnPluginException(HIPDNN_PLUGIN_STATUS_BAD_PARAM,
                                                    "ConvImplicitGemmAdapter: " + msg);
 }
@@ -29,7 +29,7 @@ const TensorAttributes& lookupTensor(const TensorMap& tensorMap, std::int64_t ui
     if (it == tensorMap.end() || it->second == nullptr) {
         std::ostringstream oss;
         oss << "tensor map missing entry for " << role << " uid=" << uid;
-        badParam(oss.str());
+        throwBadParam(oss.str());
     }
     return *it->second;
 }
@@ -39,7 +39,7 @@ std::int32_t narrowToI32(std::int64_t value, const char* fieldName) {
         value > std::numeric_limits<std::int32_t>::max()) {
         std::ostringstream oss;
         oss << "field '" << fieldName << "' value " << value << " does not fit in int32_t";
-        badParam(oss.str());
+        throwBadParam(oss.str());
     }
     return static_cast<std::int32_t>(value);
 }
@@ -52,7 +52,7 @@ void checkDtypeFp16(const TensorAttributes& t, const char* role) {
     if (t.data_type() != DataType::HALF) {
         std::ostringstream oss;
         oss << role << " data_type must be HALF (FP16); got " << static_cast<int>(t.data_type());
-        badParam(oss.str());
+        throwBadParam(oss.str());
     }
 }
 
@@ -61,7 +61,7 @@ void check4dDims(const TensorAttributes& t, const char* role) {
         std::ostringstream oss;
         oss << role << " dims must be 4-D (logical NCHW for X/Y, KCRS for W); got size "
             << (t.dims() == nullptr ? 0u : t.dims()->size());
-        badParam(oss.str());
+        throwBadParam(oss.str());
     }
 }
 
@@ -74,13 +74,13 @@ std::int32_t getDim(const TensorAttributes& t, std::uint32_t idx, const char* ro
 
 void checkSpatialAttr(const flatbuffers::Vector<std::int64_t>* attr, const char* name) {
     if (attr == nullptr) {
-        badParam(std::string("conv attribute '") + name + "' must be set");
+        throwBadParam(std::string("conv attribute '") + name + "' must be set");
     }
     if (attr->size() != 2) {
         std::ostringstream oss;
         oss << "conv attribute '" << name << "' must have size 2 (2-D conv only for M1); got size "
             << attr->size();
-        badParam(oss.str());
+        throwBadParam(oss.str());
     }
 }
 
@@ -89,7 +89,8 @@ void checkSpatialAttr(const flatbuffers::Vector<std::int64_t>* attr, const char*
 ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttributes& convAttr,
                                                         const TensorMap& tensorMap) {
     if (convAttr.conv_mode() != ConvMode::CROSS_CORRELATION) {
-        badParam("conv_mode must be CROSS_CORRELATION (true convolution is unsupported for M1)");
+        throwBadParam(
+            "conv_mode must be CROSS_CORRELATION (true convolution is unsupported for M1)");
     }
 
     const auto& X = lookupTensor(tensorMap, convAttr.x_tensor_uid(), "X");
@@ -125,7 +126,7 @@ ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttr
         std::ostringstream oss;
         oss << "X.C (" << Cx << ") must equal W.C (" << Cw
             << "); grouped convolutions are unsupported for M1";
-        badParam(oss.str());
+        throwBadParam(oss.str());
     }
 
     // We don't lift K/R/S from Y -- the conv-fwd math determines Y's
@@ -134,10 +135,10 @@ ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttr
     // deferred to I-7 (where it can use the spec's Ho()/Wo() helpers
     // once the spec is built).
     if (getDim(Y, 0, "Y", "N") != N) {
-        badParam("Y.N must equal X.N");
+        throwBadParam("Y.N must equal X.N");
     }
     if (getDim(Y, 1, "Y", "K") != K) {
-        badParam("Y.K must equal W.K");
+        throwBadParam("Y.K must equal W.K");
     }
 
     checkSpatialAttr(convAttr.pre_padding(), "pre_padding");
@@ -150,7 +151,7 @@ ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttr
     // axis; encoding asymmetric pads would require descriptor changes.
     if (convAttr.pre_padding()->Get(0) != convAttr.post_padding()->Get(0) ||
         convAttr.pre_padding()->Get(1) != convAttr.post_padding()->Get(1)) {
-        badParam("asymmetric padding is not supported");
+        throwBadParam("asymmetric padding is not supported");
     }
 
     ConvImplicitGemmSpec spec{};
 
@@ -54,12 +54,6 @@ class CkDslConvImplicitGemmEngine
         const hipdnn_flatbuffers_sdk::flatbuffer_utilities::IEngineConfig& engineConfig,
         CkDslContext& executionContext) const override;
 
-    /// Test-only accessor: lets the plan-builder test exercise the
-    /// same cache the engine uses for cache-hit verification.
-    ConvImplicitGemmPlanBuilder& planBuilderForTesting() const {
-        return *_planBuilder;
-    }
-
    private:
     std::int64_t _id;
     std::unique_ptr<ConvImplicitGemmPlanBuilder> _planBuilder;
 
@@ -4,10 +4,7 @@
 #include "GraphSignature.hpp"
 
 #include <cstdint>
-#include <cstring>
-#include <hipdnn_plugin_sdk/PluginException.hpp>
-#include <sstream>
-#include <string>
+#include <optional>
 #include <string_view>
 
 #include "version.h"
@@ -35,46 +32,23 @@ inline std::uint64_t fnv1aString(std::uint64_t h, std::string_view s) {
     return fnv1aBytes(h, s.data(), s.size());
 }
 
-inline std::uint64_t fnv1aI64(std::uint64_t h, std::int64_t v) {
-    return fnv1aBytes(h, &v, sizeof(v));
-}
-
 inline std::uint64_t fnv1aI32(std::uint64_t h, std::int32_t v) {
     return fnv1aBytes(h, &v, sizeof(v));
 }
 
-[[noreturn]] void badParam(const std::string& msg) {
-    throw hipdnn_plugin_sdk::HipdnnPluginException(HIPDNN_PLUGIN_STATUS_BAD_PARAM,
-                                                   "GraphSignature: " + msg);
-}
-
-const GraphSignature::TensorAttributes& lookupTensor(const GraphSignature::TensorMap& tensorMap,
-                                                     std::int64_t uid, const char* role) {
-    auto it = tensorMap.find(uid);
-    if (it == tensorMap.end() || it->second == nullptr) {
-        std::ostringstream oss;
-        oss << "tensor map missing entry for " << role << " uid=" << uid;
-        badParam(oss.str());
-    }
-    return *it->second;
-}
-
-void check4dDims(const GraphSignature::TensorAttributes& t, const char* role) {
-    if (t.dims() == nullptr || t.dims()->size() != 4) {
-        std::ostringstream oss;
-        oss << role << " dims must be 4-D; got size "
-            << (t.dims() == nullptr ? 0u : t.dims()->size());
-        badParam(oss.str());
-    }
+inline std::uint64_t fnv1aBool(std::uint64_t h, bool b) {
+    return fnv1aFold(h, b ? 0x01 : 0x00);
 }
 
-void checkSpatialAttr(const flatbuffers::Vector<std::int64_t>* attr, const char* name) {
-    if (attr == nullptr || attr->size() != 2) {
-        std::ostringstream oss;
-        oss << "conv attribute '" << name << "' must be a 2-element vector (2-D conv); got size "
-            << (attr == nullptr ? 0u : attr->size());
-        badParam(oss.str());
+// Fold an optional<i32> as a presence discriminator followed by the
+// value when set. The discriminator keeps ``nullopt`` distinct from a
+// present ``0`` (otherwise both would fold nothing / a zero and alias).
+inline std::uint64_t fnv1aOptI32(std::uint64_t h, const std::optional<std::int32_t>& v) {
+    h = fnv1aFold(h, v.has_value() ? 0x01 : 0x00);
+    if (v.has_value()) {
+        h = fnv1aI32(h, *v);
     }
+    return h;
 }
 
 }  // namespace
@@ -115,88 +89,46 @@ SignatureHash GraphSignature::computeForSpec(std::string_view opKind,
     h = fnv1aFold(h, 0x00);
     h = fnv1aI32(h, p.dH);
     h = fnv1aI32(h, p.dW);
-
-    return static_cast<SignatureHash>(h);
-}
-
-SignatureHash GraphSignature::computeForConvFwd(std::string_view opKind,
-                                                const ConvolutionFwdAttributes& convAttr,
-                                                const TensorMap& tensorMap) {
-    const auto& X = lookupTensor(tensorMap, convAttr.x_tensor_uid(), "X");
-    const auto& W = lookupTensor(tensorMap, convAttr.w_tensor_uid(), "W");
-    const auto& Y = lookupTensor(tensorMap, convAttr.y_tensor_uid(), "Y");
-
-    check4dDims(X, "X");
-    check4dDims(W, "W");
-    check4dDims(Y, "Y");
-    checkSpatialAttr(convAttr.pre_padding(), "pre_padding");
-    checkSpatialAttr(convAttr.stride(), "stride");
-    checkSpatialAttr(convAttr.dilation(), "dilation");
-
-    std::uint64_t h = kFnv1aOffset;
-
-    // Provider/DSL version string. Fold the entire macro contents
-    // (including the git SHA suffix) so any DSL or provider change
-    // bumps the namespace. Using the C string literal keeps the
-    // dependency at compile time -- no need to thread the version
-    // through the signature inputs at runtime.
-    h = fnv1aString(h, CK_DSL_PROVIDER_VERSION_STRING);
-
-    // Separator byte. Defensive against accidental aliasing if a
-    // future input happens to abut a numerically-identical version
-    // suffix.
     h = fnv1aFold(h, 0x00);
 
-    h = fnv1aString(h, opKind);
+    // Codegen knobs. Every field below changes the emitted HSACO (tile
+    // shape, MFMA atom, pipeline/epilogue, occupancy hints, grid
+    // swizzle, kernel name). They are all constexpr defaults in M1, so
+    // folding them is behaviour-identical today -- but it makes the key
+    // correct-by-construction for M2 autotuning, which will vary these
+    // per shape/arch. Omitting them would let an autotuned kernel
+    // collide with a default-tuned one of the same shape and hand back
+    // the wrong module. New knobs append at the bottom, same as the
+    // ConvProblem block.
+    h = fnv1aString(h, spec.name);
     h = fnv1aFold(h, 0x00);
-
-    // Dtype trio. Encoded as the raw enum value (single i32) per
-    // tensor so a dtype change (HALF -> FLOAT, etc.) gives a
-    // different hash even if the shape is unchanged.
-    h = fnv1aI32(h, static_cast<std::int32_t>(X.data_type()));
-    h = fnv1aI32(h, static_cast<std::int32_t>(W.data_type()));
-    h = fnv1aI32(h, static_cast<std::int32_t>(Y.data_type()));
-
-    // Shape trio. Fold all four logical dims per tensor (NCHW order
-    // for X/Y, KCRS for W). We include Y's dims even though they're
-    // derivable from X/W + conv attrs -- a malformed graph where Y is
-    // a different shape than the conv arithmetic predicts should miss
-    // the cache and not collide with a well-formed graph.
-    for (std::uint32_t i = 0; i < 4; ++i) {
-        h = fnv1aI64(h, X.dims()->Get(i));
-    }
-    for (std::uint32_t i = 0; i < 4; ++i) {
-        h = fnv1aI64(h, W.dims()->Get(i));
-    }
-    for (std::uint32_t i = 0; i < 4; ++i) {
-        h = fnv1aI64(h, Y.dims()->Get(i));
-    }
-
-    // Conv knobs. Padding/stride/dilation are 2-element vectors per
-    // ``checkSpatialAttr``; post_padding is folded as a defense in
-    // depth so an asymmetric-padding regression hashes differently
-    // (the adapter would reject it, but the cache shouldn't return a
-    // symmetric-padding kernel from a similar-looking key).
-    if (convAttr.post_padding() != nullptr) {
-        for (std::uint32_t i = 0; i < convAttr.post_padding()->size(); ++i) {
-            h = fnv1aI64(h, convAttr.post_padding()->Get(i));
-        }
-    }
+    h = fnv1aI32(h, spec.tile_m);
+    h = fnv1aI32(h, spec.tile_n);
+    h = fnv1aI32(h, spec.tile_k);
     h = fnv1aFold(h, 0x00);
-    for (std::uint32_t i = 0; i < 2; ++i) {
-        h = fnv1aI64(h, convAttr.pre_padding()->Get(i));
-    }
+    h = fnv1aI32(h, spec.warp_m);
+    h = fnv1aI32(h, spec.warp_n);
     h = fnv1aFold(h, 0x00);
-    for (std::uint32_t i = 0; i < 2; ++i) {
-        h = fnv1aI64(h, convAttr.stride()->Get(i));
-    }
+    h = fnv1aI32(h, spec.warp_tile_m);
+    h = fnv1aI32(h, spec.warp_tile_n);
+    h = fnv1aI32(h, spec.warp_tile_k);
     h = fnv1aFold(h, 0x00);
-    for (std::uint32_t i = 0; i < 2; ++i) {
-        h = fnv1aI64(h, convAttr.dilation()->Get(i));
-    }
+    h = fnv1aI32(h, spec.wave_size);
     h = fnv1aFold(h, 0x00);
-
-    h = fnv1aI32(h, static_cast<std::int32_t>(convAttr.conv_mode()));
+    h = fnv1aString(h, spec.pipeline);
+    h = fnv1aFold(h, 0x00);
+    h = fnv1aString(h, spec.epilogue);
+    h = fnv1aFold(h, 0x00);
+    h = fnv1aBool(h, spec.async_dma);
+    h = fnv1aBool(h, spec.unroll_k);
+    h = fnv1aOptI32(h, spec.lds_k_pad);
+    h = fnv1aFold(h, 0x00);
+    h = fnv1aBool(h, spec.chiplet_swizzle);
+    h = fnv1aI32(h, spec.chiplet_wgm);
+    h = fnv1aI32(h, spec.chiplet_num_xcds);
+    h = fnv1aI32(h, spec.chiplet_chunk_size);
+    h = fnv1aFold(h, 0x00);
+    h = fnv1aOptI32(h, spec.waves_per_eu);
 
     return static_cast<SignatureHash>(h);
 }
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@ using ConvMode = hipdnn_flatbuffers_sdk::data_objects::ConvMode;`
`18`	`18`	`using TensorAttributes = hipdnn_flatbuffers_sdk::data_objects::TensorAttributes;`
`19`	`19`	`using TensorMap = ConvImplicitGemmAdapter::TensorMap;`
`20`	`20`
`21`		`-[[noreturn]] void badParam(const std::string& msg) {`
	`21`	`+[[noreturn]] void throwBadParam(const std::string& msg) {`
`22`	`22`	`throw hipdnn_plugin_sdk::HipdnnPluginException(HIPDNN_PLUGIN_STATUS_BAD_PARAM,`
`23`	`23`	`"ConvImplicitGemmAdapter: " + msg);`
`24`	`24`	`}`
`@@ -29,7 +29,7 @@ const TensorAttributes& lookupTensor(const TensorMap& tensorMap, std::int64_t ui`
`29`	`29`	`if (it == tensorMap.end() \|\| it->second == nullptr) {`
`30`	`30`	`std::ostringstream oss;`
`31`	`31`	`oss << "tensor map missing entry for " << role << " uid=" << uid;`
`32`		`- badParam(oss.str());`
	`32`	`+ throwBadParam(oss.str());`
`33`	`33`	`}`
`34`	`34`	`return *it->second;`
`35`	`35`	`}`
`@@ -39,7 +39,7 @@ std::int32_t narrowToI32(std::int64_t value, const char* fieldName) {`
`39`	`39`	`value > std::numeric_limits<std::int32_t>::max()) {`
`40`	`40`	`std::ostringstream oss;`
`41`	`41`	`oss << "field '" << fieldName << "' value " << value << " does not fit in int32_t";`
`42`		`- badParam(oss.str());`
	`42`	`+ throwBadParam(oss.str());`
`43`	`43`	`}`
`44`	`44`	`return static_cast<std::int32_t>(value);`
`45`	`45`	`}`
`@@ -52,7 +52,7 @@ void checkDtypeFp16(const TensorAttributes& t, const char* role) {`
`52`	`52`	`if (t.data_type() != DataType::HALF) {`
`53`	`53`	`std::ostringstream oss;`
`54`	`54`	`oss << role << " data_type must be HALF (FP16); got " << static_cast<int>(t.data_type());`
`55`		`- badParam(oss.str());`
	`55`	`+ throwBadParam(oss.str());`
`56`	`56`	`}`
`57`	`57`	`}`
`58`	`58`
`@@ -61,7 +61,7 @@ void check4dDims(const TensorAttributes& t, const char* role) {`
`61`	`61`	`std::ostringstream oss;`
`62`	`62`	`oss << role << " dims must be 4-D (logical NCHW for X/Y, KCRS for W); got size "`
`63`	`63`	`<< (t.dims() == nullptr ? 0u : t.dims()->size());`
`64`		`- badParam(oss.str());`
	`64`	`+ throwBadParam(oss.str());`
`65`	`65`	`}`
`66`	`66`	`}`
`67`	`67`
`@@ -74,13 +74,13 @@ std::int32_t getDim(const TensorAttributes& t, std::uint32_t idx, const char* ro`
`74`	`74`
`75`	`75`	`void checkSpatialAttr(const flatbuffers::Vector<std::int64_t>* attr, const char* name) {`
`76`	`76`	`if (attr == nullptr) {`
`77`		`- badParam(std::string("conv attribute '") + name + "' must be set");`
	`77`	`+ throwBadParam(std::string("conv attribute '") + name + "' must be set");`
`78`	`78`	`}`
`79`	`79`	`if (attr->size() != 2) {`
`80`	`80`	`std::ostringstream oss;`
`81`	`81`	`oss << "conv attribute '" << name << "' must have size 2 (2-D conv only for M1); got size "`
`82`	`82`	`<< attr->size();`
`83`		`- badParam(oss.str());`
	`83`	`+ throwBadParam(oss.str());`
`84`	`84`	`}`
`85`	`85`	`}`
`86`	`86`
`@@ -89,7 +89,8 @@ void checkSpatialAttr(const flatbuffers::Vector<std::int64_t>* attr, const char*`
`89`	`89`	`ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttributes& convAttr,`
`90`	`90`	`const TensorMap& tensorMap) {`
`91`	`91`	`if (convAttr.conv_mode() != ConvMode::CROSS_CORRELATION) {`
`92`		`- badParam("conv_mode must be CROSS_CORRELATION (true convolution is unsupported for M1)");`
	`92`	`+ throwBadParam(`
	`93`	`+ "conv_mode must be CROSS_CORRELATION (true convolution is unsupported for M1)");`
`93`	`94`	`}`
`94`	`95`
`95`	`96`	`const auto& X = lookupTensor(tensorMap, convAttr.x_tensor_uid(), "X");`
`@@ -125,7 +126,7 @@ ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttr`
`125`	`126`	`std::ostringstream oss;`
`126`	`127`	`oss << "X.C (" << Cx << ") must equal W.C (" << Cw`
`127`	`128`	`<< "); grouped convolutions are unsupported for M1";`
`128`		`- badParam(oss.str());`
	`129`	`+ throwBadParam(oss.str());`
`129`	`130`	`}`
`130`	`131`
`131`	`132`	`// We don't lift K/R/S from Y -- the conv-fwd math determines Y's`
`@@ -134,10 +135,10 @@ ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttr`
`134`	`135`	`// deferred to I-7 (where it can use the spec's Ho()/Wo() helpers`
`135`	`136`	`// once the spec is built).`
`136`	`137`	`if (getDim(Y, 0, "Y", "N") != N) {`
`137`		`- badParam("Y.N must equal X.N");`
	`138`	`+ throwBadParam("Y.N must equal X.N");`
`138`	`139`	`}`
`139`	`140`	`if (getDim(Y, 1, "Y", "K") != K) {`
`140`		`- badParam("Y.K must equal W.K");`
	`141`	`+ throwBadParam("Y.K must equal W.K");`
`141`	`142`	`}`
`142`	`143`
`143`	`144`	`checkSpatialAttr(convAttr.pre_padding(), "pre_padding");`
`@@ -150,7 +151,7 @@ ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttr`
`150`	`151`	`// axis; encoding asymmetric pads would require descriptor changes.`
`151`	`152`	`if (convAttr.pre_padding()->Get(0) != convAttr.post_padding()->Get(0) \|\|`
`152`	`153`	`convAttr.pre_padding()->Get(1) != convAttr.post_padding()->Get(1)) {`
`153`		`- badParam("asymmetric padding is not supported");`
	`154`	`+ throwBadParam("asymmetric padding is not supported");`
`154`	`155`	`}`
`155`	`156`
`156`	`157`	`ConvImplicitGemmSpec spec{};`