Add debug tracking fields and num_splits parameter

Copilot · titaiwangms · Copilot · commit 53c333fd83da · 2026-01-20T21:59:42.000Z
Co-authored-by: titaiwangms &lt;18010845+titaiwangms@users.noreply.github.com&gt;
diff --git a/onnxruntime/core/providers/cuda/llm/attention.cc b/onnxruntime/core/providers/cuda/llm/attention.cc
@@ -225,6 +225,7 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
     gqa_parameters.local_window_size = -1;  // No local window for standard attention
     gqa_parameters.zeros_count = 0;
     gqa_parameters.zero_ptr = nullptr;
+    gqa_parameters.num_splits = 1;  // No splits for unfused path
 
     // Construct GroupQueryAttentionData
     onnxruntime::contrib::cuda::GroupQueryAttentionData<CudaT> gqa_data;
@@ -278,6 +279,16 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
     gqa_data.k = nullptr;
     gqa_data.v = nullptr;
 
+#ifndef NDEBUG
+    // Initialize debug tracking fields
+    gqa_data.unpacked_qkv_buffer_size = 0;
+    gqa_data.rotary_buffer_size = 0;
+    gqa_data.position_ids_buffer_size = 0;
+    gqa_data.unpacked_qkv_max_used = 0;
+    gqa_data.rotary_max_used = 0;
+    gqa_data.position_ids_max_used = 0;
+#endif
+
     // Call GQA kernel
     auto& device_prop = GetDeviceProp();
     cublasHandle_t cublas = GetCublasHandle(context);