We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 042ff32 commit 0e7a632Copy full SHA for 0e7a632
onnxruntime/core/providers/cuda/llm/attention.cc
@@ -218,7 +218,7 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
218
gqa_parameters.rotary_interleaved = false;
219
gqa_parameters.use_smooth_softmax = false;
220
gqa_parameters.scale = parameters.scale;
221
- gqa_parameters.softcap = parameters.softcap;
+ gqa_parameters.softcap = 0.0f; // Validated to be 0.0f above
222
gqa_parameters.mask_type = onnxruntime::contrib::AttentionMaskType::MASK_NONE;
223
gqa_parameters.qkv_format = contribop_parameters.qkv_format;
224
gqa_parameters.past_kv_format = onnxruntime::contrib::AttentionQkvFormat::Q_K_V_BNSH;
0 commit comments