Set softcap to 0.0f explicitly with comment

Copilot · titaiwangms · Copilot · commit 0e7a632d578b · 2026-01-20T22:02:11.000Z
Co-authored-by: titaiwangms &lt;18010845+titaiwangms@users.noreply.github.com&gt;
diff --git a/onnxruntime/core/providers/cuda/llm/attention.cc b/onnxruntime/core/providers/cuda/llm/attention.cc
@@ -218,7 +218,7 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
     gqa_parameters.rotary_interleaved = false;
     gqa_parameters.use_smooth_softmax = false;
     gqa_parameters.scale = parameters.scale;
-    gqa_parameters.softcap = parameters.softcap;
+    gqa_parameters.softcap = 0.0f;  // Validated to be 0.0f above
     gqa_parameters.mask_type = onnxruntime::contrib::AttentionMaskType::MASK_NONE;
     gqa_parameters.qkv_format = contribop_parameters.qkv_format;
     gqa_parameters.past_kv_format = onnxruntime::contrib::AttentionQkvFormat::Q_K_V_BNSH;