Skip to content

Commit a7e19ae

Browse files
authored
# 📢 cuDNN Frontend v1.14 — Release Notes (#158)
**Preferred version for:** [cuDNN 9.12.0](https://docs.nvidia.com/deeplearning/cudnn/backend/latest/release-notes.html#cudnn-9-12-0) and above **Minimum Python version:** `3.9` (previously `3.8`, now obsolete) **Updated pip wheels:** Available for Python `3.13` --- ## 🚀 Improvements ### 🔹 SDPA - Introduced a **unified SDPA node** → reduces graph creation latency & simplifies the SDPA graph creation. *(No API changes required from users.)* - Improved support checks for **SDPA FP8 forward propagation (fprop)**. - Improved support checks for **Hopper backward propagation (bprop)** to fix a bug (introduced in `9.11`) affecting certain large head-dimension combinations of *d_qk* and *d_v*. - Added new SDPA samples with: - **Paged prefill** - **Ragged Q tensor decode** --- ### 🔹 Normalizations - Added support for **fused LayerNorm with ReLU**. - Included sample: [`LayerNorm with ReLU bitmask dump`](samples/cpp/norm/layernorm_bitmask_relu.cpp). --- ### 🔹 Matmul - Added Python sample for **low-precision FP8/FP4 matrix multiplications**. --- ### 🔹 Other Updates - Added Python bindings for **deviceless graph compilation**. → Sample: [`test_deviceless_aot_compilation.py`](test/python/test_deviceless_aot_compilation.py) - Addressed GitHub issue: [#151](#151). --- ✅ **Recommended Action**: Upgrade to cuDNN Frontend v1.14,0 for full compatibility with cuDNN `9.12.0+`, improved SDPA support, additional normalization support, and deviceless graph compilation features.
1 parent 31b2c5d commit a7e19ae

40 files changed

+3215
-1300
lines changed

‎CMakeLists.txt‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cmake_minimum_required(VERSION 3.23)
22

3-
project(cudnn_frontend VERSION 1.13.0)
3+
project(cudnn_frontend VERSION 1.14.0)
44

55
option(CUDNN_FRONTEND_SKIP_JSON_LIB "Defines whether FE should not include nlohmann/json.hpp." OFF)
66
option(CUDNN_FRONTEND_BUILD_SAMPLES "Defines if samples are built or not." ON)

‎include/cudnn_frontend/backend/backend_descriptor.h‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class backend_descriptor {
8989
*/
9090
error_t
9191
initialize(cudnnBackendDescriptorType_t type) {
92-
CHECK_CUDNN_ERROR(detail::create_descriptor(type, &desc));
92+
_CUDNN_CHECK_CUDNN_ERROR(detail::create_descriptor(type, &desc));
9393
return {error_code_t::OK, ""};
9494
}
9595

@@ -99,7 +99,7 @@ class backend_descriptor {
9999
*/
100100
error_t
101101
finalize() {
102-
CHECK_CUDNN_ERROR(detail::finalize(desc));
102+
_CUDNN_CHECK_CUDNN_ERROR(detail::finalize(desc));
103103
return {error_code_t::OK, ""};
104104
}
105105

‎include/cudnn_frontend/backend/device_properties.h‎

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -82,16 +82,16 @@ class DeviceProperties : public detail::backend_descriptor {
8282
"CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION is only available starting 9.8.");
8383

8484
int64_t serializationSize;
85-
CHECK_CUDNN_ERROR(detail::get_attribute(
85+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(
8686
get_ptr(), CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION, CUDNN_TYPE_CHAR, 0, &serializationSize, nullptr));
8787
serialization_buf.resize(static_cast<size_t>(serializationSize));
8888

89-
CHECK_CUDNN_ERROR(detail::get_attribute(get_ptr(),
90-
CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION,
91-
CUDNN_TYPE_CHAR,
92-
serializationSize,
93-
&serializationSize,
94-
serialization_buf.data()));
89+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(get_ptr(),
90+
CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION,
91+
CUDNN_TYPE_CHAR,
92+
serializationSize,
93+
&serializationSize,
94+
serialization_buf.data()));
9595
return {};
9696
#else
9797
(void)serialization_buf;
@@ -107,18 +107,18 @@ class DeviceProperties : public detail::backend_descriptor {
107107
error_code_t::CUDNN_BACKEND_API_FAILED,
108108
"CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION is only available starting 9.8.");
109109

110-
// Check if the kernel cache is already initialized
110+
// Check if the device properties is already initialized
111111
RETURN_CUDNN_FRONTEND_ERROR_IF(
112112
get_ptr() != nullptr, error_code_t::CUDNN_BACKEND_API_FAILED, "Device properties is already initialized.");
113113

114-
// // Initialize the device properties descriptor
114+
// Initialize the device properties descriptor
115115
CHECK_CUDNN_FRONTEND_ERROR(initialize(CUDNN_BACKEND_DEVICEPROP_DESCRIPTOR));
116116

117-
CHECK_CUDNN_ERROR(detail::set_attribute(get_ptr(),
118-
CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION,
119-
CUDNN_TYPE_CHAR,
120-
serialized_buf.size(),
121-
serialized_buf.data()));
117+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(get_ptr(),
118+
CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION,
119+
CUDNN_TYPE_CHAR,
120+
serialized_buf.size(),
121+
serialized_buf.data()));
122122

123123
CHECK_CUDNN_FRONTEND_ERROR(finalize());
124124
return {};
@@ -141,12 +141,12 @@ class DeviceProperties : public detail::backend_descriptor {
141141
}
142142

143143
if (handle != nullptr) {
144-
CHECK_CUDNN_ERROR(
144+
_CUDNN_CHECK_CUDNN_ERROR(
145145
detail::set_attribute(get_ptr(), CUDNN_ATTR_DEVICEPROP_HANDLE, CUDNN_TYPE_HANDLE, 1, &handle));
146146
}
147147

148148
if (device_id >= 0) {
149-
CHECK_CUDNN_ERROR(
149+
_CUDNN_CHECK_CUDNN_ERROR(
150150
detail::set_attribute(get_ptr(), CUDNN_ATTR_DEVICEPROP_DEVICE_ID, CUDNN_TYPE_INT32, 1, &device_id));
151151
}
152152

‎include/cudnn_frontend/backend/execution_helpers.h‎

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@ create_variant_pack(backend_descriptor& variant_pack,
2626
std::vector<void*>& device_ptrs,
2727
std::vector<int64_t> const& uids,
2828
void* workspace_ptr) {
29-
CHECK_CUDNN_ERROR(detail::set_attribute(
29+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(
3030
variant_pack.get_ptr(), CUDNN_ATTR_VARIANT_PACK_WORKSPACE, CUDNN_TYPE_VOID_PTR, 1, &workspace_ptr));
3131

32-
CHECK_CUDNN_ERROR(detail::set_attribute(variant_pack.get_ptr(),
33-
CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS,
34-
CUDNN_TYPE_VOID_PTR,
35-
device_ptrs.size(),
36-
device_ptrs.data()));
32+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(variant_pack.get_ptr(),
33+
CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS,
34+
CUDNN_TYPE_VOID_PTR,
35+
device_ptrs.size(),
36+
device_ptrs.data()));
3737

38-
CHECK_CUDNN_ERROR(detail::set_attribute(
38+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(
3939
variant_pack.get_ptr(), CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS, CUDNN_TYPE_INT64, uids.size(), uids.data()));
4040

41-
CHECK_CUDNN_ERROR(detail::finalize(variant_pack.get_ptr()));
41+
_CUDNN_CHECK_CUDNN_ERROR(detail::finalize(variant_pack.get_ptr()));
4242

4343
return {error_code_t::OK, ""};
4444
}

‎include/cudnn_frontend/backend/kernel_cache.h‎

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -81,16 +81,16 @@ class KernelCache : public detail::backend_descriptor {
8181

8282
int64_t serializationSize;
8383
std::vector<char> serialization_buf;
84-
CHECK_CUDNN_ERROR(detail::get_attribute(
84+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(
8585
get_ptr(), CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION, CUDNN_TYPE_CHAR, 0, &serializationSize, nullptr));
8686
serialization_buf.resize(static_cast<size_t>(serializationSize));
8787

88-
CHECK_CUDNN_ERROR(detail::get_attribute(get_ptr(),
89-
CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION,
90-
CUDNN_TYPE_CHAR,
91-
serializationSize,
92-
&serializationSize,
93-
serialization_buf.data()));
88+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(get_ptr(),
89+
CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION,
90+
CUDNN_TYPE_CHAR,
91+
serializationSize,
92+
&serializationSize,
93+
serialization_buf.data()));
9494
std::string json_string(serialization_buf.begin(), serialization_buf.end());
9595
str_json = std::move(json_string);
9696
return {};
@@ -117,11 +117,11 @@ class KernelCache : public detail::backend_descriptor {
117117

118118
std::vector<char> serialization_buf;
119119
serialization_buf.assign(json_cache.begin(), json_cache.end());
120-
CHECK_CUDNN_ERROR(detail::set_attribute(get_ptr(),
121-
CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION,
122-
CUDNN_TYPE_CHAR,
123-
serialization_buf.size(),
124-
serialization_buf.data()));
120+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(get_ptr(),
121+
CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION,
122+
CUDNN_TYPE_CHAR,
123+
serialization_buf.size(),
124+
serialization_buf.data()));
125125
return {};
126126
#else
127127
(void)json_cache;
@@ -146,7 +146,7 @@ class KernelCache : public detail::backend_descriptor {
146146
error_code_t::GRAPH_NOT_SUPPORTED,
147147
"CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH is only available starting 9.5.");
148148
if (op_graph) {
149-
CHECK_CUDNN_ERROR(detail::set_attribute(
149+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(
150150
get_ptr(), CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH, CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &op_graph));
151151
}
152152
#else

‎include/cudnn_frontend/backend/plan_helpers.h‎

Lines changed: 56 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ namespace cudnn_frontend::detail {
2525
inline error_t
2626
get_workspace_size(ManagedOpaqueDescriptor& engine_config, int64_t& workspace) {
2727
#if CUDNN_VERSION >= 90200
28-
CHECK_CUDNN_ERROR(detail::get_attribute(engine_config->get_backend_descriptor(),
29-
CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE,
30-
CUDNN_TYPE_INT64,
31-
1,
32-
nullptr,
33-
&workspace));
28+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(engine_config->get_backend_descriptor(),
29+
CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE,
30+
CUDNN_TYPE_INT64,
31+
1,
32+
nullptr,
33+
&workspace));
3434
return {error_code_t::OK, ""};
3535
#else
3636
(void)engine_config;
@@ -43,12 +43,12 @@ get_workspace_size(ManagedOpaqueDescriptor& engine_config, int64_t& workspace) {
4343
inline error_t
4444
get_shared_memory_size(ManagedOpaqueDescriptor& engine_config, int32_t& shared_memory_size) {
4545
#if CUDNN_VERSION >= 90200
46-
CHECK_CUDNN_ERROR(detail::get_attribute(engine_config->get_backend_descriptor(),
47-
CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED,
48-
CUDNN_TYPE_INT32,
49-
1,
50-
nullptr,
51-
&shared_memory_size));
46+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(engine_config->get_backend_descriptor(),
47+
CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED,
48+
CUDNN_TYPE_INT32,
49+
1,
50+
nullptr,
51+
&shared_memory_size));
5252
return {error_code_t::OK, ""};
5353
#else
5454
(void)engine_config;
@@ -63,30 +63,30 @@ create_engine(backend_descriptor& engine,
6363
int64_t const engine_id,
6464
cudnnBackendDescriptor_t op_graph,
6565
std::shared_ptr<const DeviceProperties> device_properties = nullptr) {
66-
CHECK_CUDNN_ERROR(detail::set_attribute(
66+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(
6767
engine.get_ptr(), CUDNN_ATTR_ENGINE_OPERATION_GRAPH, CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &op_graph));
6868

6969
// Validate before setting
7070
int64_t count;
71-
CHECK_CUDNN_ERROR(detail::get_attribute(
71+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(
7272
op_graph, CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT, CUDNN_TYPE_INT64, 1, nullptr, &count));
7373
RETURN_CUDNN_FRONTEND_ERROR_IF(
7474
engine_id >= count || engine_id < 0, error_code_t::INVALID_VALUE, "Invalid engine id.");
7575

76-
CHECK_CUDNN_ERROR(
76+
_CUDNN_CHECK_CUDNN_ERROR(
7777
detail::set_attribute(engine.get_ptr(), CUDNN_ATTR_ENGINE_GLOBAL_INDEX, CUDNN_TYPE_INT64, 1, &engine_id));
7878

7979
if (device_properties != nullptr) {
8080
#if (CUDNN_VERSION >= 90800)
81-
CHECK_CUDNN_ERROR(detail::set_attribute(engine.get_ptr(),
82-
CUDNN_ATTR_ENGINE_DEVICEPROP,
83-
CUDNN_TYPE_BACKEND_DESCRIPTOR,
84-
1,
85-
&device_properties->get_ptr()));
81+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(engine.get_ptr(),
82+
CUDNN_ATTR_ENGINE_DEVICEPROP,
83+
CUDNN_TYPE_BACKEND_DESCRIPTOR,
84+
1,
85+
&device_properties->get_ptr()));
8686
#endif
8787
}
8888

89-
CHECK_CUDNN_ERROR(detail::finalize(engine.get_ptr()));
89+
_CUDNN_CHECK_CUDNN_ERROR(detail::finalize(engine.get_ptr()));
9090

9191
return {error_code_t::OK, ""};
9292
}
@@ -119,37 +119,37 @@ query_knobs(int64_t const engine_id, cudnnBackendDescriptor_t op_graph, std::vec
119119

120120
// This is the actual number of knobs that is supported by the engine
121121
int64_t knobs_size;
122-
CHECK_CUDNN_ERROR(detail::get_attribute(engine.get_ptr(),
123-
CUDNN_ATTR_ENGINE_KNOB_INFO,
124-
CUDNN_TYPE_BACKEND_DESCRIPTOR,
125-
CUDNN_KNOB_TYPE_COUNTS,
126-
&knobs_size,
127-
backend_knobs.data()));
122+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(engine.get_ptr(),
123+
CUDNN_ATTR_ENGINE_KNOB_INFO,
124+
CUDNN_TYPE_BACKEND_DESCRIPTOR,
125+
CUDNN_KNOB_TYPE_COUNTS,
126+
&knobs_size,
127+
backend_knobs.data()));
128128

129129
for (int64_t i = 0; i < knobs_size; i++) {
130130
cudnnBackendKnobType_t type;
131131
int64_t elemCount;
132-
CHECK_CUDNN_ERROR(detail::get_attribute(
132+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(
133133
frontend_knobs[i].get_ptr(), CUDNN_ATTR_KNOB_INFO_TYPE, CUDNN_TYPE_KNOB_TYPE, 1, &elemCount, &type));
134134

135135
int64_t maxValue;
136-
CHECK_CUDNN_ERROR(detail::get_attribute(frontend_knobs[i].get_ptr(),
137-
CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE,
138-
CUDNN_TYPE_INT64,
139-
1,
140-
&elemCount,
141-
&maxValue));
136+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(frontend_knobs[i].get_ptr(),
137+
CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE,
138+
CUDNN_TYPE_INT64,
139+
1,
140+
&elemCount,
141+
&maxValue));
142142

143143
int64_t minValue;
144-
CHECK_CUDNN_ERROR(detail::get_attribute(frontend_knobs[i].get_ptr(),
145-
CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE,
146-
CUDNN_TYPE_INT64,
147-
1,
148-
&elemCount,
149-
&minValue));
144+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(frontend_knobs[i].get_ptr(),
145+
CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE,
146+
CUDNN_TYPE_INT64,
147+
1,
148+
&elemCount,
149+
&minValue));
150150

151151
int64_t stride;
152-
CHECK_CUDNN_ERROR(detail::get_attribute(
152+
_CUDNN_CHECK_CUDNN_ERROR(detail::get_attribute(
153153
frontend_knobs[i].get_ptr(), CUDNN_ATTR_KNOB_INFO_STRIDE, CUDNN_TYPE_INT64, 1, &elemCount, &stride));
154154

155155
auto frontend_knob_type = convert_from_backend_knob_type(type);
@@ -169,13 +169,13 @@ set_knob_choices(std::unordered_map<KnobType_t, int64_t> const& user_choices,
169169
"Failed to create knob_choice's backend descriptor.");
170170

171171
cudnnBackendKnobType_t backend_type;
172-
CHECK_CUDNN_ERROR(convert_to_backend_knob_type(type, backend_type));
173-
CHECK_CUDNN_ERROR(detail::set_attribute(
172+
_CUDNN_CHECK_CUDNN_ERROR(convert_to_backend_knob_type(type, backend_type));
173+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(
174174
knob_choice.get_ptr(), CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE, CUDNN_TYPE_KNOB_TYPE, 1, &backend_type));
175-
CHECK_CUDNN_ERROR(detail::set_attribute(
175+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(
176176
knob_choice.get_ptr(), CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE, CUDNN_TYPE_INT64, 1, &choice));
177177

178-
CHECK_CUDNN_ERROR(detail::finalize(knob_choice.get_ptr()));
178+
_CUDNN_CHECK_CUDNN_ERROR(detail::finalize(knob_choice.get_ptr()));
179179

180180
knob_choices.push_back(std::move(knob_choice));
181181
}
@@ -187,24 +187,24 @@ inline error_t
187187
create_engine_config(ManagedOpaqueDescriptor& engine_config,
188188
backend_descriptor& engine,
189189
std::vector<detail::backend_descriptor>& knob_choices) {
190-
CHECK_CUDNN_ERROR(detail::set_attribute(engine_config->get_backend_descriptor(),
191-
CUDNN_ATTR_ENGINECFG_ENGINE,
192-
CUDNN_TYPE_BACKEND_DESCRIPTOR,
193-
1,
194-
&(engine.get_ptr())));
190+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(engine_config->get_backend_descriptor(),
191+
CUDNN_ATTR_ENGINECFG_ENGINE,
192+
CUDNN_TYPE_BACKEND_DESCRIPTOR,
193+
1,
194+
&(engine.get_ptr())));
195195

196196
std::vector<cudnnBackendDescriptor_t> backend_knob_choices(CUDNN_KNOB_TYPE_COUNTS);
197197
for (size_t i = 0; i < knob_choices.size(); i++) {
198198
backend_knob_choices[i] = knob_choices[i].get_ptr();
199199
}
200-
CHECK_CUDNN_ERROR(detail::set_attribute(engine_config->get_backend_descriptor(),
201-
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES,
202-
CUDNN_TYPE_BACKEND_DESCRIPTOR,
203-
knob_choices.size(),
204-
backend_knob_choices.data()));
200+
_CUDNN_CHECK_CUDNN_ERROR(detail::set_attribute(engine_config->get_backend_descriptor(),
201+
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES,
202+
CUDNN_TYPE_BACKEND_DESCRIPTOR,
203+
knob_choices.size(),
204+
backend_knob_choices.data()));
205205

206206
// Finalizing the descriptor
207-
CHECK_CUDNN_ERROR(detail::finalize(engine_config->get_backend_descriptor()));
207+
_CUDNN_CHECK_CUDNN_ERROR(detail::finalize(engine_config->get_backend_descriptor()));
208208

209209
return {error_code_t::OK, ""};
210210
}

0 commit comments

Comments
 (0)