@@ -554,13 +554,13 @@ Status LaunchGetSequenceLengths(
554554}
555555
556556// Trace function for debugging
557- #define ORT_GQA_TRACE (func_name ) \
558- DEBUG_PRINTF (" [GQA %s] is_packed_qkv: %d, is_first_prompt: %d, is_subsequent_prompt: %d, past_present_share_buffer: %d" , \
559- func_name, \
560- static_cast <int >(parameters.is_packed_qkv), \
561- static_cast <int >(parameters.is_first_prompt), \
562- static_cast <int >(parameters.is_subsequent_prompt), \
563- static_cast <int >(parameters.past_present_share_buffer));
557+ #define ORT_GQA_TRACE (func_name ) \
558+ DUMP_PRINTF (" [GQA %s] is_packed_qkv: %d, is_first_prompt: %d, is_subsequent_prompt: %d, past_present_share_buffer: %d" , \
559+ func_name, \
560+ static_cast <int >(parameters.is_packed_qkv), \
561+ static_cast <int >(parameters.is_first_prompt), \
562+ static_cast <int >(parameters.is_subsequent_prompt), \
563+ static_cast <int >(parameters.past_present_share_buffer));
564564
565565// //////// Kernels (supports right padding but not left padding)
566566// Use flash attention for all workloads (rotary, kv append, attention, etc.). No extra kernel is used in this path.
@@ -706,8 +706,8 @@ Status FlashDecoding(
706706
707707 bool past_bsnh = past_kv_format == AttentionQkvFormat::Q_K_V_BSNH;
708708
709- DEBUG_PRINTF (" [FlashDecoding] key=%p, value=%p, present_key=%p, present_value=%p, seqlens_k=%p, is_packed_qkv=%d" ,
710- key, value, present_key, present_value, seqlens_k, static_cast <int >(parameters.is_packed_qkv ));
709+ DUMP_PRINTF (" [FlashDecoding] key=%p, value=%p, present_key=%p, present_value=%p, seqlens_k=%p, is_packed_qkv=%d" ,
710+ key, value, present_key, present_value, seqlens_k, static_cast <int >(parameters.is_packed_qkv ));
711711
712712 ORT_RETURN_IF_ERROR (onnxruntime::flash::mha_fwd_kvcache (
713713 device_prop, stream, query, present_key, present_value, key, value, data.output ,
0 commit comments