We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 63d8f2a commit 90d18a6Copy full SHA for 90d18a6
1 file changed
benchmarks/bench_trtllm_gen_mla.py
@@ -82,7 +82,7 @@ def bench_trtllm_mla(batch_size, q_len_per_request, seq_len, page_size, dtype):
82
bmm1_scale=1.0 / ((128 + 64) ** 0.5),
83
bmm2_scale=1.0,
84
)
85
- # benchmark using CUPTI
+ # benchmark
86
measurements = bench_gpu_time(
87
lambda: flashinfer.decode.trtllm_batch_decode_with_kv_cache_mla(
88
query=query,
0 commit comments