NVIDIA
diff --git a/‎CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions b/‎CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cmake/cuDNN.cmake‎
Lines changed: 15 additions & 11 deletions b/‎cmake/cuDNN.cmake‎
Lines changed: 15 additions & 11 deletions
diff --git a/‎include/cudnn_frontend/backend/kernel_cache.h‎
Lines changed: 69 additions & 6 deletions b/‎include/cudnn_frontend/backend/kernel_cache.h‎
Lines changed: 69 additions & 6 deletions
diff --git a/‎include/cudnn_frontend/graph_interface.h‎
Lines changed: 58 additions & 3 deletions b/‎include/cudnn_frontend/graph_interface.h‎
Lines changed: 58 additions & 3 deletions
diff --git a/‎include/cudnn_frontend/graph_properties.h‎
Lines changed: 48 additions & 0 deletions b/‎include/cudnn_frontend/graph_properties.h‎
Lines changed: 48 additions & 0 deletions
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.17)
+cmake_minimum_required(VERSION 3.18)
 
-project(cudnn_frontend VERSION 1.11.0)
+project(cudnn_frontend VERSION 1.12.0)
 
 option(CUDNN_FRONTEND_SKIP_JSON_LIB "Defines whether FE should not include nlohmann/json.hpp." OFF)
 option(CUDNN_FRONTEND_BUILD_SAMPLES "Defines if samples are built or not." ON)
 
@@ -12,11 +12,17 @@ string(REGEX MATCH "#define CUDNN_MAJOR [1-9]+" macrodef "${cudnn_version_header
 string(REGEX MATCH "[1-9]+" CUDNN_MAJOR_VERSION "${macrodef}")
 
 function(find_cudnn_library NAME)
+    if(NOT "${ARGV1}" STREQUAL "OPTIONAL")
+        set(_cudnn_required "REQUIRED")
+    else()
+        set(_cudnn_required "")
+    endif()
+
     find_library(
         ${NAME}_LIBRARY ${NAME} "lib${NAME}.so.${CUDNN_MAJOR_VERSION}"
         HINTS $ENV{CUDNN_LIBRARY_PATH} ${CUDNN_LIBRARY_PATH} $ENV{CUDNN_PATH} ${CUDNN_PATH} ${Python_SITEARCH}/nvidia/cudnn ${CUDAToolkit_LIBRARY_DIR}
         PATH_SUFFIXES lib64 lib/x64 lib
-        REQUIRED
+        ${_cudnn_required}
     )
 
     if(${NAME}_LIBRARY)
@@ -30,8 +36,6 @@ function(find_cudnn_library NAME)
     else()
         message(STATUS "${NAME} not found.")
     endif()
-
-
 endfunction()
 
 find_cudnn_library(cudnn)
@@ -87,22 +91,22 @@ if(CUDNN_MAJOR_VERSION EQUAL 8)
         CUDNN::cudnn_ops_infer
     )
 elseif(CUDNN_MAJOR_VERSION EQUAL 9)
-    find_cudnn_library(cudnn_cnn)
-    find_cudnn_library(cudnn_adv)
     find_cudnn_library(cudnn_graph)
-    find_cudnn_library(cudnn_ops)
     find_cudnn_library(cudnn_engines_runtime_compiled)
-    find_cudnn_library(cudnn_engines_precompiled)
-    find_cudnn_library(cudnn_heuristic)
+    find_cudnn_library(cudnn_ops OPTIONAL)
+    find_cudnn_library(cudnn_cnn OPTIONAL)
+    find_cudnn_library(cudnn_adv OPTIONAL)
+    find_cudnn_library(cudnn_engines_precompiled OPTIONAL)
+    find_cudnn_library(cudnn_heuristic OPTIONAL)
 
     target_link_libraries(
         CUDNN::cudnn_all
         INTERFACE
-        CUDNN::cudnn_adv
-        CUDNN::cudnn_ops
-        CUDNN::cudnn_cnn
         CUDNN::cudnn_graph
         CUDNN::cudnn_engines_runtime_compiled
+        CUDNN::cudnn_ops
+        CUDNN::cudnn_cnn
+        CUDNN::cudnn_adv
         CUDNN::cudnn_engines_precompiled
         CUDNN::cudnn_heuristic
     )
 
@@ -68,10 +68,68 @@ class KernelCache : public detail::backend_descriptor {
             return {error_code_t::CUDNN_BACKEND_API_FAILED,
                     "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR: Check CUDNN_VERSION >= 9.4"};
         }
-        return {error_code_t::OK, ""};
+        return {};
+    }
+
+    error_t
+    to_json(std::string &str_json) const {
+        str_json.clear();
+#if (CUDNN_VERSION >= 91000)
+        RETURN_CUDNN_FRONTEND_ERROR_IF(detail::get_backend_version() < 91000,
+                                       error_code_t::CUDNN_BACKEND_API_FAILED,
+                                       "CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION is only available starting 9.10.");
+
+        int64_t serializationSize;
+        std::vector<char> serialization_buf;
+        CHECK_CUDNN_ERROR(detail::get_attribute(
+            get_ptr(), CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION, CUDNN_TYPE_CHAR, 0, &serializationSize, nullptr));
+        serialization_buf.resize(static_cast<size_t>(serializationSize));
+
+        CHECK_CUDNN_ERROR(detail::get_attribute(get_ptr(),
+                                                CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION,
+                                                CUDNN_TYPE_CHAR,
+                                                serializationSize,
+                                                &serializationSize,
+                                                serialization_buf.data()));
+        std::string json_string(serialization_buf.begin(), serialization_buf.end());
+        str_json = json_string;
+        return {};
+#else
+        (void)str_json;
+        return {error_code_t::CUDNN_BACKEND_API_FAILED,
+                "CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION is only available starting 9.10."};
+#endif
+    }
+
+    error_t
+    from_json(const std::string &json_cache) {
+#if (CUDNN_VERSION >= 91000)
+        RETURN_CUDNN_FRONTEND_ERROR_IF(detail::get_backend_version() < 91000,
+                                       error_code_t::CUDNN_BACKEND_API_FAILED,
+                                       "CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION is only available starting 9.10.");
+
+        // Check if the kernel cache is already initialized
+        RETURN_CUDNN_FRONTEND_ERROR_IF(
+            get_ptr() != nullptr, error_code_t::CUDNN_BACKEND_API_FAILED, "Kernel cache is already initialized.");
+
+        // // Initialize the kernel cache descriptor
+        CHECK_CUDNN_FRONTEND_ERROR(initialize(CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR));
+
+        std::vector<char> serialization_buf;
+        serialization_buf.assign(json_cache.begin(), json_cache.end());
+        CHECK_CUDNN_ERROR(detail::set_attribute(get_ptr(),
+                                                CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION,
+                                                CUDNN_TYPE_CHAR,
+                                                serialization_buf.size(),
+                                                serialization_buf.data()));
+        return {};
+#else
+        (void)json_cache;
+        return {error_code_t::CUDNN_BACKEND_API_FAILED,
+                "CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION is only available starting 9.10."};
+#endif
     }
 
-   private:
     // Responsible for initializing, setting operation graph attribute, and finalizing kernel cache
     // Check for both compile-time and runtime cuDNN version
     error_t
@@ -80,26 +138,31 @@ class KernelCache : public detail::backend_descriptor {
         RETURN_CUDNN_FRONTEND_ERROR_IF(detail::get_backend_version() < 90400,
                                        error_code_t::GRAPH_NOT_SUPPORTED,
                                        "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR is only available starting 9.4.");
-        CHECK_CUDNN_FRONTEND_ERROR(initialize(CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR));
+        if (get_ptr() == nullptr) {
+            CHECK_CUDNN_FRONTEND_ERROR(initialize(CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR));
+        }
 #if (CUDNN_VERSION >= 90500)
         RETURN_CUDNN_FRONTEND_ERROR_IF(detail::get_backend_version() < 90500,
                                        error_code_t::GRAPH_NOT_SUPPORTED,
                                        "CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH is only available starting 9.5.");
-        CHECK_CUDNN_ERROR(detail::set_attribute(
-            get_ptr(), CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH, CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &op_graph));
+        if (op_graph) {
+            CHECK_CUDNN_ERROR(detail::set_attribute(
+                get_ptr(), CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH, CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &op_graph));
+        }
 #else
         (void)op_graph;
 #endif
         CHECK_CUDNN_FRONTEND_ERROR(finalize());
         finalized = true;
-        return {error_code_t::OK, ""};
+        return {};
 #else
         (void)op_graph;
         return {error_code_t::CUDNN_BACKEND_API_FAILED,
                 "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR is only available starting 9.4."};
 #endif
     }
 
+   private:
     bool finalized = false;
 };
 }  // namespace cudnn_frontend
@@ -15,6 +15,7 @@
 #include "node/dbn_weight.h"
 #include "node/genstats.h"
 #include "node/layernorm.h"
+#include "node/adaptive_layernorm.h"
 #include "node/instancenorm.h"
 #include "node/rmsnorm.h"
 #include "node/resample.h"
@@ -557,7 +558,6 @@ class Graph : public ICudnn, public INode {
 
         // Validate the nodes, which in turn also infers missing tensor attributes.
         CHECK_CUDNN_FRONTEND_ERROR(validate_subtree());
-
         // Validate all outputs, which should now have everything set to be lowered to backend.
         for (auto const &output : full_graph_outputs) {
             CHECK_CUDNN_FRONTEND_ERROR(output->validate());
@@ -914,6 +914,11 @@ class Graph : public ICudnn, public INode {
                                                                 std::shared_ptr<Tensor_attributes>,
                                                                 Layernorm_attributes);
 
+    std::array<std::shared_ptr<Tensor_attributes>, 3> adalayernorm(std::shared_ptr<Tensor_attributes>,
+                                                                   std::shared_ptr<Tensor_attributes>,
+                                                                   std::shared_ptr<Tensor_attributes>,
+                                                                   AdaLayernorm_attributes);
+
     std::array<std::shared_ptr<Tensor_attributes>, 3> instancenorm(std::shared_ptr<Tensor_attributes>,
                                                                    std::shared_ptr<Tensor_attributes>,
                                                                    std::shared_ptr<Tensor_attributes>,
@@ -968,6 +973,11 @@ class Graph : public ICudnn, public INode {
                                                                          std::shared_ptr<Tensor_attributes>,
                                                                          Layernorm_backward_attributes);
 
+    std::array<std::shared_ptr<Tensor_attributes>, 3> adalayernorm_backward(std::shared_ptr<Tensor_attributes>,
+                                                                            std::shared_ptr<Tensor_attributes>,
+                                                                            std::shared_ptr<Tensor_attributes>,
+                                                                            AdaLayernorm_backward_attributes);
+
     std::array<std::shared_ptr<Tensor_attributes>, 3> instancenorm_backward(std::shared_ptr<Tensor_attributes>,
                                                                             std::shared_ptr<Tensor_attributes>,
                                                                             std::shared_ptr<Tensor_attributes>,
@@ -1182,7 +1192,6 @@ class Graph : public ICudnn, public INode {
         j["nodes"];
         j["tensors"];
         std::unordered_set<std::string> tensors;
-
         for (const auto &sub_node : full_json["nodes"]) {
             // Create a short version of the node
             auto short_node       = sub_node;
@@ -1212,7 +1221,6 @@ class Graph : public ICudnn, public INode {
                 }
 
                 std::string tensor_name = tensor_info["name"].get<std::string>();
-
                 // Update short_node inputs
                 short_node["inputs"][port_name] = tensor_name;
 
@@ -1699,6 +1707,31 @@ Graph::layernorm(std::shared_ptr<Tensor_attributes> x,
     return {Y, MEAN, INV_VARIANCE};
 }
 
+inline std::array<std::shared_ptr<Tensor_attributes>, 3>
+Graph::adalayernorm(std::shared_ptr<Tensor_attributes> x,
+                    std::shared_ptr<Tensor_attributes> scale,
+                    std::shared_ptr<Tensor_attributes> bias,
+                    AdaLayernorm_attributes attributes) {
+    // Set outputs
+    auto Y = attributes.outputs[AdaLayernorm_attributes::output_names::Y] = output_tensor(attributes.name + "::Y");
+    std::shared_ptr<Tensor_attributes> MEAN                               = nullptr;
+    std::shared_ptr<Tensor_attributes> INV_VARIANCE                       = nullptr;
+    if (attributes.forward_phase == NormFwdPhase_t::TRAINING) {
+        MEAN = attributes.outputs[AdaLayernorm_attributes::output_names::MEAN] =
+            output_tensor(attributes.name + "::MEAN");
+        INV_VARIANCE = attributes.outputs[AdaLayernorm_attributes::output_names::INV_VARIANCE] =
+            output_tensor(attributes.name + "::INV_VARIANCE");
+    }
+    // Set inputs
+    attributes.inputs[AdaLayernorm_attributes::input_names::X]     = x;
+    attributes.inputs[AdaLayernorm_attributes::input_names::SCALE] = scale;
+    attributes.inputs[AdaLayernorm_attributes::input_names::BIAS]  = bias;
+
+    sub_nodes.emplace_back(std::make_unique<AdaLayerNormNode>(std::move(attributes), context));
+
+    return {Y, MEAN, INV_VARIANCE};
+}
+
 inline std::array<std::shared_ptr<Tensor_attributes>, 3>
 Graph::instancenorm(std::shared_ptr<Tensor_attributes> x,
                     std::shared_ptr<Tensor_attributes> scale,
@@ -1848,6 +1881,28 @@ Graph::layernorm_backward(std::shared_ptr<Tensor_attributes> dy,
     return {DX, DSCALE, DBIAS};
 }
 
+inline std::array<std::shared_ptr<Tensor_attributes>, 3>
+Graph::adalayernorm_backward(std::shared_ptr<Tensor_attributes> dy,
+                             std::shared_ptr<Tensor_attributes> x,
+                             std::shared_ptr<Tensor_attributes> scale,
+                             AdaLayernorm_backward_attributes attributes) {
+    // Set outputs
+    auto DX = attributes.outputs[AdaLayernorm_backward_attributes::output_names::DX] =
+        output_tensor(attributes.name + "::DX");
+    auto DSCALE = attributes.outputs[AdaLayernorm_backward_attributes::output_names::DSCALE] =
+        output_tensor(attributes.name + "::DSCALE");
+    auto DBIAS = attributes.outputs[AdaLayernorm_backward_attributes::output_names::DBIAS] =
+        output_tensor(attributes.name + "::DBIAS");
+    // Set inputs
+    attributes.inputs[AdaLayernorm_backward_attributes::input_names::DY]    = dy;
+    attributes.inputs[AdaLayernorm_backward_attributes::input_names::X]     = x;
+    attributes.inputs[AdaLayernorm_backward_attributes::input_names::SCALE] = scale;
+
+    sub_nodes.emplace_back(std::make_unique<DAdaLayerNormNode>(std::move(attributes), context));
+
+    return {DX, DSCALE, DBIAS};
+}
+
 inline std::shared_ptr<Tensor_attributes>
 Graph::conv_fprop(std::shared_ptr<Tensor_attributes> x,
                   std::shared_ptr<Tensor_attributes> w,
 
@@ -943,6 +943,54 @@ class Layernorm_attributes : public Attributes<Layernorm_attributes> {
     }
 };
 
+class AdaLayernorm_attributes : public Attributes<AdaLayernorm_attributes> {
+    friend class Attributes<AdaLayernorm_attributes>;
+    friend class AdaLayerNormNode;
+    friend class Graph;
+
+    NormFwdPhase_t forward_phase = NormFwdPhase_t::NOT_SET;
+
+   public:
+    enum class input_names { X, SCALE, BIAS, EPSILON };
+    std::unordered_map<input_names, std::shared_ptr<Tensor_attributes>> inputs;
+    enum class output_names { Y, MEAN, INV_VARIANCE };
+    std::unordered_map<output_names, std::shared_ptr<Tensor_attributes>> outputs;
+    NLOHMANN_DEFINE_TYPE_INTRUSIVE(AdaLayernorm_attributes, name, compute_data_type, inputs, outputs, forward_phase)
+
+    AdaLayernorm_attributes&
+    set_forward_phase(NormFwdPhase_t const value) {
+        forward_phase = value;
+        return *this;
+    }
+
+    AdaLayernorm_attributes&
+    set_epsilon(std::shared_ptr<Tensor_attributes>& value) {
+        inputs[AdaLayernorm_attributes::input_names::EPSILON] = value;
+        return *this;
+    }
+};
+
+class AdaLayernorm_backward_attributes : public Attributes<AdaLayernorm_backward_attributes> {
+    friend class Attributes<AdaLayernorm_backward_attributes>;
+    friend class DAdaLayerNormNode;
+    friend class Graph;
+
+   public:
+    enum class input_names { DY, X, SCALE, MEAN, INV_VARIANCE, EPSILON };
+    std::unordered_map<input_names, std::shared_ptr<Tensor_attributes>> inputs;
+    enum class output_names { DX, DSCALE, DBIAS };
+    std::unordered_map<output_names, std::shared_ptr<Tensor_attributes>> outputs;
+    NLOHMANN_DEFINE_TYPE_INTRUSIVE(AdaLayernorm_backward_attributes, name, compute_data_type, inputs, outputs)
+
+    AdaLayernorm_backward_attributes&
+    set_saved_mean_and_inv_variance(std::shared_ptr<Tensor_attributes> mean,
+                                    std::shared_ptr<Tensor_attributes> inv_variance) {
+        inputs[AdaLayernorm_backward_attributes::input_names::MEAN]         = mean;
+        inputs[AdaLayernorm_backward_attributes::input_names::INV_VARIANCE] = inv_variance;
+        return *this;
+    }
+};
+
 class Instancenorm_attributes : public Attributes<Instancenorm_attributes> {
     friend class Attributes<Instancenorm_attributes>;
     friend class InstanceNormNode;