Skip to content

Commit a43ac83

Browse files
[CK DSL] Provider cleanup: dedup graph signature, drop dead code
Non-functional cleanups in ck-dsl-provider; no change to the JIT or launch behaviour. Graph signature: - Remove the legacy FlatBuffer-walking GraphSignature::computeForConvFwd. The cache key now derives solely from the adapter-built ConvImplicitGemmSpec via computeForSpec, keeping the adapter the single FlatBuffer reader and removing a parallel read path that could drift. - Rewrite GraphSignatureTest around computeForSpec: per-field and codegen-knob sensitivity, position-aliasing, optional-knob presence, and an adapter-built-spec round-trip. - Clarify the SignatureHash doc and spell out the on-disk-cache (M3) precondition: dtype/arch/layout must join the key before entries can be persisted. Naming: - Rename badParam -> throwBadParam in ConvImplicitGemmAdapter to reflect that it is [[noreturn]]. Dead code (zero consumers, confirmed against the provider, tests, and the plugin ABI): - EmbeddedInterpreter::importCheck -- I-3 bring-up scaffolding, unused since the integration test landed. - HipModule::kind() and its write-only _kind member. - The unused HipModule::launch(std::vector, ...) overload. - CkDslConvImplicitGemmEngine::planBuilderForTesting() -- a test seam with no test. Kept deliberately as forward-compat per dsl_docs/hipdnn_provider/plan.md: KernelArtifact::isa (M3 disk cache) and the I64/F32/F16 launch-ABI surface (M2/M5 ops). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent fedcc74 commit a43ac83

10 files changed

Lines changed: 275 additions & 292 deletions

File tree

dnn-providers/ck-dsl-provider/src/adapters/conv_implicit_gemm/ConvImplicitGemmAdapter.cpp

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ using ConvMode = hipdnn_flatbuffers_sdk::data_objects::ConvMode;
1818
using TensorAttributes = hipdnn_flatbuffers_sdk::data_objects::TensorAttributes;
1919
using TensorMap = ConvImplicitGemmAdapter::TensorMap;
2020

21-
[[noreturn]] void badParam(const std::string& msg) {
21+
[[noreturn]] void throwBadParam(const std::string& msg) {
2222
throw hipdnn_plugin_sdk::HipdnnPluginException(HIPDNN_PLUGIN_STATUS_BAD_PARAM,
2323
"ConvImplicitGemmAdapter: " + msg);
2424
}
@@ -29,7 +29,7 @@ const TensorAttributes& lookupTensor(const TensorMap& tensorMap, std::int64_t ui
2929
if (it == tensorMap.end() || it->second == nullptr) {
3030
std::ostringstream oss;
3131
oss << "tensor map missing entry for " << role << " uid=" << uid;
32-
badParam(oss.str());
32+
throwBadParam(oss.str());
3333
}
3434
return *it->second;
3535
}
@@ -39,7 +39,7 @@ std::int32_t narrowToI32(std::int64_t value, const char* fieldName) {
3939
value > std::numeric_limits<std::int32_t>::max()) {
4040
std::ostringstream oss;
4141
oss << "field '" << fieldName << "' value " << value << " does not fit in int32_t";
42-
badParam(oss.str());
42+
throwBadParam(oss.str());
4343
}
4444
return static_cast<std::int32_t>(value);
4545
}
@@ -52,7 +52,7 @@ void checkDtypeFp16(const TensorAttributes& t, const char* role) {
5252
if (t.data_type() != DataType::HALF) {
5353
std::ostringstream oss;
5454
oss << role << " data_type must be HALF (FP16); got " << static_cast<int>(t.data_type());
55-
badParam(oss.str());
55+
throwBadParam(oss.str());
5656
}
5757
}
5858

@@ -61,7 +61,7 @@ void check4dDims(const TensorAttributes& t, const char* role) {
6161
std::ostringstream oss;
6262
oss << role << " dims must be 4-D (logical NCHW for X/Y, KCRS for W); got size "
6363
<< (t.dims() == nullptr ? 0u : t.dims()->size());
64-
badParam(oss.str());
64+
throwBadParam(oss.str());
6565
}
6666
}
6767

@@ -74,13 +74,13 @@ std::int32_t getDim(const TensorAttributes& t, std::uint32_t idx, const char* ro
7474

7575
void checkSpatialAttr(const flatbuffers::Vector<std::int64_t>* attr, const char* name) {
7676
if (attr == nullptr) {
77-
badParam(std::string("conv attribute '") + name + "' must be set");
77+
throwBadParam(std::string("conv attribute '") + name + "' must be set");
7878
}
7979
if (attr->size() != 2) {
8080
std::ostringstream oss;
8181
oss << "conv attribute '" << name << "' must have size 2 (2-D conv only for M1); got size "
8282
<< attr->size();
83-
badParam(oss.str());
83+
throwBadParam(oss.str());
8484
}
8585
}
8686

@@ -89,7 +89,8 @@ void checkSpatialAttr(const flatbuffers::Vector<std::int64_t>* attr, const char*
8989
ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttributes& convAttr,
9090
const TensorMap& tensorMap) {
9191
if (convAttr.conv_mode() != ConvMode::CROSS_CORRELATION) {
92-
badParam("conv_mode must be CROSS_CORRELATION (true convolution is unsupported for M1)");
92+
throwBadParam(
93+
"conv_mode must be CROSS_CORRELATION (true convolution is unsupported for M1)");
9394
}
9495

9596
const auto& X = lookupTensor(tensorMap, convAttr.x_tensor_uid(), "X");
@@ -125,7 +126,7 @@ ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttr
125126
std::ostringstream oss;
126127
oss << "X.C (" << Cx << ") must equal W.C (" << Cw
127128
<< "); grouped convolutions are unsupported for M1";
128-
badParam(oss.str());
129+
throwBadParam(oss.str());
129130
}
130131

131132
// We don't lift K/R/S from Y -- the conv-fwd math determines Y's
@@ -134,10 +135,10 @@ ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttr
134135
// deferred to I-7 (where it can use the spec's Ho()/Wo() helpers
135136
// once the spec is built).
136137
if (getDim(Y, 0, "Y", "N") != N) {
137-
badParam("Y.N must equal X.N");
138+
throwBadParam("Y.N must equal X.N");
138139
}
139140
if (getDim(Y, 1, "Y", "K") != K) {
140-
badParam("Y.K must equal W.K");
141+
throwBadParam("Y.K must equal W.K");
141142
}
142143

143144
checkSpatialAttr(convAttr.pre_padding(), "pre_padding");
@@ -150,7 +151,7 @@ ConvImplicitGemmSpec ConvImplicitGemmAdapter::buildSpec(const ConvolutionFwdAttr
150151
// axis; encoding asymmetric pads would require descriptor changes.
151152
if (convAttr.pre_padding()->Get(0) != convAttr.post_padding()->Get(0) ||
152153
convAttr.pre_padding()->Get(1) != convAttr.post_padding()->Get(1)) {
153-
badParam("asymmetric padding is not supported");
154+
throwBadParam("asymmetric padding is not supported");
154155
}
155156

156157
ConvImplicitGemmSpec spec{};

dnn-providers/ck-dsl-provider/src/engines/conv_implicit_gemm/CkDslConvImplicitGemmEngine.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,6 @@ class CkDslConvImplicitGemmEngine
5454
const hipdnn_flatbuffers_sdk::flatbuffer_utilities::IEngineConfig& engineConfig,
5555
CkDslContext& executionContext) const override;
5656

57-
/// Test-only accessor: lets the plan-builder test exercise the
58-
/// same cache the engine uses for cache-hit verification.
59-
ConvImplicitGemmPlanBuilder& planBuilderForTesting() const {
60-
return *_planBuilder;
61-
}
62-
6357
private:
6458
std::int64_t _id;
6559
std::unique_ptr<ConvImplicitGemmPlanBuilder> _planBuilder;

dnn-providers/ck-dsl-provider/src/graph/GraphSignature.cpp

Lines changed: 44 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@
44
#include "GraphSignature.hpp"
55

66
#include <cstdint>
7-
#include <cstring>
8-
#include <hipdnn_plugin_sdk/PluginException.hpp>
9-
#include <sstream>
10-
#include <string>
7+
#include <optional>
118
#include <string_view>
129

1310
#include "version.h"
@@ -35,46 +32,23 @@ inline std::uint64_t fnv1aString(std::uint64_t h, std::string_view s) {
3532
return fnv1aBytes(h, s.data(), s.size());
3633
}
3734

38-
inline std::uint64_t fnv1aI64(std::uint64_t h, std::int64_t v) {
39-
return fnv1aBytes(h, &v, sizeof(v));
40-
}
41-
4235
inline std::uint64_t fnv1aI32(std::uint64_t h, std::int32_t v) {
4336
return fnv1aBytes(h, &v, sizeof(v));
4437
}
4538

46-
[[noreturn]] void badParam(const std::string& msg) {
47-
throw hipdnn_plugin_sdk::HipdnnPluginException(HIPDNN_PLUGIN_STATUS_BAD_PARAM,
48-
"GraphSignature: " + msg);
49-
}
50-
51-
const GraphSignature::TensorAttributes& lookupTensor(const GraphSignature::TensorMap& tensorMap,
52-
std::int64_t uid, const char* role) {
53-
auto it = tensorMap.find(uid);
54-
if (it == tensorMap.end() || it->second == nullptr) {
55-
std::ostringstream oss;
56-
oss << "tensor map missing entry for " << role << " uid=" << uid;
57-
badParam(oss.str());
58-
}
59-
return *it->second;
60-
}
61-
62-
void check4dDims(const GraphSignature::TensorAttributes& t, const char* role) {
63-
if (t.dims() == nullptr || t.dims()->size() != 4) {
64-
std::ostringstream oss;
65-
oss << role << " dims must be 4-D; got size "
66-
<< (t.dims() == nullptr ? 0u : t.dims()->size());
67-
badParam(oss.str());
68-
}
39+
inline std::uint64_t fnv1aBool(std::uint64_t h, bool b) {
40+
return fnv1aFold(h, b ? 0x01 : 0x00);
6941
}
7042

71-
void checkSpatialAttr(const flatbuffers::Vector<std::int64_t>* attr, const char* name) {
72-
if (attr == nullptr || attr->size() != 2) {
73-
std::ostringstream oss;
74-
oss << "conv attribute '" << name << "' must be a 2-element vector (2-D conv); got size "
75-
<< (attr == nullptr ? 0u : attr->size());
76-
badParam(oss.str());
43+
// Fold an optional<i32> as a presence discriminator followed by the
44+
// value when set. The discriminator keeps ``nullopt`` distinct from a
45+
// present ``0`` (otherwise both would fold nothing / a zero and alias).
46+
inline std::uint64_t fnv1aOptI32(std::uint64_t h, const std::optional<std::int32_t>& v) {
47+
h = fnv1aFold(h, v.has_value() ? 0x01 : 0x00);
48+
if (v.has_value()) {
49+
h = fnv1aI32(h, *v);
7750
}
51+
return h;
7852
}
7953

8054
} // namespace
@@ -115,88 +89,46 @@ SignatureHash GraphSignature::computeForSpec(std::string_view opKind,
11589
h = fnv1aFold(h, 0x00);
11690
h = fnv1aI32(h, p.dH);
11791
h = fnv1aI32(h, p.dW);
118-
119-
return static_cast<SignatureHash>(h);
120-
}
121-
122-
SignatureHash GraphSignature::computeForConvFwd(std::string_view opKind,
123-
const ConvolutionFwdAttributes& convAttr,
124-
const TensorMap& tensorMap) {
125-
const auto& X = lookupTensor(tensorMap, convAttr.x_tensor_uid(), "X");
126-
const auto& W = lookupTensor(tensorMap, convAttr.w_tensor_uid(), "W");
127-
const auto& Y = lookupTensor(tensorMap, convAttr.y_tensor_uid(), "Y");
128-
129-
check4dDims(X, "X");
130-
check4dDims(W, "W");
131-
check4dDims(Y, "Y");
132-
checkSpatialAttr(convAttr.pre_padding(), "pre_padding");
133-
checkSpatialAttr(convAttr.stride(), "stride");
134-
checkSpatialAttr(convAttr.dilation(), "dilation");
135-
136-
std::uint64_t h = kFnv1aOffset;
137-
138-
// Provider/DSL version string. Fold the entire macro contents
139-
// (including the git SHA suffix) so any DSL or provider change
140-
// bumps the namespace. Using the C string literal keeps the
141-
// dependency at compile time -- no need to thread the version
142-
// through the signature inputs at runtime.
143-
h = fnv1aString(h, CK_DSL_PROVIDER_VERSION_STRING);
144-
145-
// Separator byte. Defensive against accidental aliasing if a
146-
// future input happens to abut a numerically-identical version
147-
// suffix.
14892
h = fnv1aFold(h, 0x00);
14993

150-
h = fnv1aString(h, opKind);
94+
// Codegen knobs. Every field below changes the emitted HSACO (tile
95+
// shape, MFMA atom, pipeline/epilogue, occupancy hints, grid
96+
// swizzle, kernel name). They are all constexpr defaults in M1, so
97+
// folding them is behaviour-identical today -- but it makes the key
98+
// correct-by-construction for M2 autotuning, which will vary these
99+
// per shape/arch. Omitting them would let an autotuned kernel
100+
// collide with a default-tuned one of the same shape and hand back
101+
// the wrong module. New knobs append at the bottom, same as the
102+
// ConvProblem block.
103+
h = fnv1aString(h, spec.name);
151104
h = fnv1aFold(h, 0x00);
152-
153-
// Dtype trio. Encoded as the raw enum value (single i32) per
154-
// tensor so a dtype change (HALF -> FLOAT, etc.) gives a
155-
// different hash even if the shape is unchanged.
156-
h = fnv1aI32(h, static_cast<std::int32_t>(X.data_type()));
157-
h = fnv1aI32(h, static_cast<std::int32_t>(W.data_type()));
158-
h = fnv1aI32(h, static_cast<std::int32_t>(Y.data_type()));
159-
160-
// Shape trio. Fold all four logical dims per tensor (NCHW order
161-
// for X/Y, KCRS for W). We include Y's dims even though they're
162-
// derivable from X/W + conv attrs -- a malformed graph where Y is
163-
// a different shape than the conv arithmetic predicts should miss
164-
// the cache and not collide with a well-formed graph.
165-
for (std::uint32_t i = 0; i < 4; ++i) {
166-
h = fnv1aI64(h, X.dims()->Get(i));
167-
}
168-
for (std::uint32_t i = 0; i < 4; ++i) {
169-
h = fnv1aI64(h, W.dims()->Get(i));
170-
}
171-
for (std::uint32_t i = 0; i < 4; ++i) {
172-
h = fnv1aI64(h, Y.dims()->Get(i));
173-
}
174-
175-
// Conv knobs. Padding/stride/dilation are 2-element vectors per
176-
// ``checkSpatialAttr``; post_padding is folded as a defense in
177-
// depth so an asymmetric-padding regression hashes differently
178-
// (the adapter would reject it, but the cache shouldn't return a
179-
// symmetric-padding kernel from a similar-looking key).
180-
if (convAttr.post_padding() != nullptr) {
181-
for (std::uint32_t i = 0; i < convAttr.post_padding()->size(); ++i) {
182-
h = fnv1aI64(h, convAttr.post_padding()->Get(i));
183-
}
184-
}
105+
h = fnv1aI32(h, spec.tile_m);
106+
h = fnv1aI32(h, spec.tile_n);
107+
h = fnv1aI32(h, spec.tile_k);
185108
h = fnv1aFold(h, 0x00);
186-
for (std::uint32_t i = 0; i < 2; ++i) {
187-
h = fnv1aI64(h, convAttr.pre_padding()->Get(i));
188-
}
109+
h = fnv1aI32(h, spec.warp_m);
110+
h = fnv1aI32(h, spec.warp_n);
189111
h = fnv1aFold(h, 0x00);
190-
for (std::uint32_t i = 0; i < 2; ++i) {
191-
h = fnv1aI64(h, convAttr.stride()->Get(i));
192-
}
112+
h = fnv1aI32(h, spec.warp_tile_m);
113+
h = fnv1aI32(h, spec.warp_tile_n);
114+
h = fnv1aI32(h, spec.warp_tile_k);
193115
h = fnv1aFold(h, 0x00);
194-
for (std::uint32_t i = 0; i < 2; ++i) {
195-
h = fnv1aI64(h, convAttr.dilation()->Get(i));
196-
}
116+
h = fnv1aI32(h, spec.wave_size);
197117
h = fnv1aFold(h, 0x00);
198-
199-
h = fnv1aI32(h, static_cast<std::int32_t>(convAttr.conv_mode()));
118+
h = fnv1aString(h, spec.pipeline);
119+
h = fnv1aFold(h, 0x00);
120+
h = fnv1aString(h, spec.epilogue);
121+
h = fnv1aFold(h, 0x00);
122+
h = fnv1aBool(h, spec.async_dma);
123+
h = fnv1aBool(h, spec.unroll_k);
124+
h = fnv1aOptI32(h, spec.lds_k_pad);
125+
h = fnv1aFold(h, 0x00);
126+
h = fnv1aBool(h, spec.chiplet_swizzle);
127+
h = fnv1aI32(h, spec.chiplet_wgm);
128+
h = fnv1aI32(h, spec.chiplet_num_xcds);
129+
h = fnv1aI32(h, spec.chiplet_chunk_size);
130+
h = fnv1aFold(h, 0x00);
131+
h = fnv1aOptI32(h, spec.waves_per_eu);
200132

201133
return static_cast<SignatureHash>(h);
202134
}

0 commit comments

Comments
 (0)