NVIDIA
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.FE.1.0.md‎
Lines changed: 24 additions & 12 deletions b/‎README.FE.1.0.md‎
Lines changed: 24 additions & 12 deletions
diff --git a/‎benchmark/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎benchmark/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/benchmark_flash_attention.py‎
Lines changed: 1 addition & 0 deletions b/‎benchmark/benchmark_flash_attention.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/cuda_graphs.md‎ renamed to ‎docs/cuda-graphs.md‎
Lines changed: 7 additions & 7 deletions b/‎docs/cuda_graphs.md‎ renamed to ‎docs/cuda-graphs.md‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎docs/custom-execution-plan.md‎
Lines changed: 39 additions & 0 deletions b/‎docs/custom-execution-plan.md‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎docs/dynamic_kernel_cache.md‎ renamed to ‎docs/dynamic-kernel-cache.md‎
Lines changed: 4 additions & 6 deletions b/‎docs/dynamic_kernel_cache.md‎ renamed to ‎docs/dynamic-kernel-cache.md‎
Lines changed: 4 additions & 6 deletions
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.17)
 
-project(cudnn_frontend VERSION 1.9.0)
+project(cudnn_frontend VERSION 1.10.0)
 
 option(CUDNN_FRONTEND_SKIP_JSON_LIB "Defines whether FE should not include nlohmann/json.hpp." OFF)
 option(CUDNN_FRONTEND_BUILD_SAMPLES "Defines if samples are built or not." ON)
 
@@ -121,6 +121,30 @@ This method guarantees that executing the graph using plans queried will succeed
 cudnn_frontend::error_t cudnn_frontend::graph::Graph::check_support(cudnnHandle_t h);
 ```
 
+### Querying Plan Properties (Optional)
+You can query the properties of the plan at a given index, or of the candidate plan using the following methods:
+
+```
+cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::get_behavior_notes_for_plan_at_index(int64_t const plan_index, std::vector<cudnn_frontend::BehaviorNote_t> &);
+cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::get_behavior_notes(std::vector<cudnn_frontend::BehaviorNote_t> &);
+```
+
+The `notes` argument acts as the out parameter.
+
+### Filtering Plans (Optional)
+Users can filter plans on numerical, behavioral notes, or plans that do not provide desired functional correctness.
+
+```
+cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::select_numeric_notes(std::vector<cudnn_frontend::NumericalNote_t> const&);
+cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::select_behavior_notes(std::vector<cudnn_frontend::BehaviorNote_t> const&);
+
+cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::deselect_numeric_notes(std::vector<cudnn_frontend::NumericalNote_t> const&);
+cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::deselect_behavior_notes(std::vector<cudnn_frontend::BehaviorNote_t> const&);
+cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::deselect_workspace_greater_than(int64_t const workspace);
+cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::deselect_shared_mem_greater_than(int64_t const shared_memory);
+```
+
+
 ###  Building the Execution Plan
 
 This function builds execution plans queried with `create_execution_plan(...)` API.
@@ -146,18 +170,6 @@ cudnn_frontend::Graph::build_plan_at_index(
     int64_t plan_index
 );
 ```
-### Filtering Plans (Optional)
-Users can filter plans on numerical, behavioral notes, or plans that do not provide desired functional correctness.
-
-```
-cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::select_numeric_notes(std::vector<cudnn_frontend::NumericalNote_t> const&);
-cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::select_behavior_notes(std::vector<cudnn_frontend::BehaviorNote_t> const&);
-
-cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::deselect_numeric_notes(std::vector<cudnn_frontend::NumericalNote_t> const&);
-cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::deselect_behavior_notes(std::vector<cudnn_frontend::BehaviorNote_t> const&);
-cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::deselect_workspace_greater_than(int64_t const workspace);
-cudnn_frontend::graph::Graph& cudnn_frontend::graph::Plans::deselect_shared_mem_greater_than(int64_t const shared_memory);
-```
 
 ### Autotuning
 
 
@@ -1,4 +1,4 @@
-FROM nvcr.io/nvidia/pytorch:24.07-py3
+FROM nvcr.io/nvidia/pytorch:24.12-py3
 
 RUN apt-get update && \
     wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
 
@@ -565,6 +565,7 @@ def time_fwd(func, *args, **kwargs):
             is_inference=is_infer,
             attn_scale=attn_scale,
             use_causal_mask=is_causal,
+            use_padding_mask=False,
         )
 
         o_fwd.set_output(True).set_dim(o_gpu.size()).set_stride(
 
@@ -1,31 +1,31 @@
+# CUDA Graphs
 
-
-### `populate_cuda_graph`
+## `populate_cuda_graph`
 
 The `populate_cuda_graph` function is a member function of the `Graph` class. It is used to populate a CUDA graph with the necessary data and operations.
 
-#### Parameters
+### Parameters
 
 - `handle`: A cuDNN handle.
 - `uid_to_device_ptrs`: A map of tensor UIDs to device pointers.
 - `workspace`: A pointer to the workspace memory.
 - `cudnn_cuda_graph`: A pointer to the CUDA graph.
 
-#### Return Value
+### Return Value
 
 - An `error_t` object indicating the success or failure of the function.
 
-### `update_cuda_graph`
+## `update_cuda_graph`
 
 The `update_cuda_graph` function is a member function of the `Graph` class. It is used to update a CUDA graph with the necessary data and operations.
 
-#### Parameters
+### Parameters
 
 - `handle`: A cuDNN handle.
 - `uid_to_device_ptrs`: A map of tensor UIDs to device pointers.
 - `workspace`: A pointer to the workspace memory.
 - `cudnn_cuda_graph`: A pointer to the CUDA graph.
 
-#### Return Value
+### Return Value
 
 - An `error_t` object indicating the success or failure of the function.
@@ -0,0 +1,39 @@
+Here is an example of creating a custom execution plan with hardcoded engine and knobs. Please see coresponding C++ sample in `samples/cpp/misc/custom_plan.cpp`.
+
+### Get engine count
+```
+inline error_t
+get_engine_count(int64_t &count);
+```
+#### Parameters
+
+- `count`: number of engines [out parameter]
+
+#### Return Value
+- An `error_t` object indicating the success or failure of the function.
+
+### Get knobs supported by an engine
+```
+inline error_t
+get_knobs_for_engine(int64_t const engine, std::vector<Knob> &);
+```
+#### Parameters
+
+- `engine`: engine index
+- `knobs`: list of knobs [out parameter]
+
+#### Return Value
+- An `error_t` object indicating the success or failure of the function.
+
+### Create a plan with particular engine and knobs
+```
+error_t
+create_execution_plan(int64_t const engine_id, std::unordered_map<KnobType_t, int64_t> const &knobs);
+```
+#### Parameters
+
+- `engine_id`: engine index
+- `knobs`: knobs
+
+#### Return Value
+- An `error_t` object indicating the success or failure of the function.
@@ -1,16 +1,15 @@
-## Table of Contents
-1. [Dynamic Shapes APIs](#Dynamic-Shapes)
-2. [Kernel Cache APIs](#Kernel-Cache)
+# Dynamic Shapes and Kernel Cache
+
+## Dynamic Shapes
 
-### Dynamic Shapes
 Causes other APIs (such as the kernel cache) to treat the graph as a dynamic shape graph.
 
 The API to achieve the above is:
 ```cpp
 graph.set_dynamic_shape_enabled(true)
 ```
 
-### Kernel Cache
+## Kernel Cache
 The kernel cache significantly reduces plan build time by re-using a previously compiled kernel for a given execution plan. Kernel caching is enabled only for dynamic shape graphs.
 
 If a graph's kernel cache attribute is set, the kernel cache will store the kernel which was compiled for the graph's execution plan. 
@@ -25,4 +24,3 @@ The API to set a dynamic shape graph's kernel cache is:
 ```cpp
 graph.set_kernel_cache(kernel_cache)
 ```
-
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-FROM nvcr.io/nvidia/pytorch:24.07-py3`
	`1`	`+FROM nvcr.io/nvidia/pytorch:24.12-py3`
`2`	`2`
`3`	`3`	`RUN apt-get update && \`
`4`	`4`	`wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \`
Original file line number	Diff line number	Diff line change
`@@ -565,6 +565,7 @@ def time_fwd(func, args, *kwargs):`
`565`	`565`	`is_inference=is_infer,`
`566`	`566`	`attn_scale=attn_scale,`
`567`	`567`	`use_causal_mask=is_causal,`
	`568`	`+ use_padding_mask=False,`
`568`	`569`	`)`
`569`	`570`
`570`	`571`	`o_fwd.set_output(True).set_dim(o_gpu.size()).set_stride(`