Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion benchmarks/routines/flashinfer_benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["fa2", "fa2_tc", "auto", "cudnn", "trtllm-gen", "trtllm-native"],
"10.3": ["fa2", "fa2_tc", "auto", "cudnn", "trtllm-gen", "trtllm-native"],
"12.0": ["fa2", "fa2_tc", "auto", "cudnn", "trtllm-native"],
"12.1": ["fa2", "fa2_tc", "auto", "cudnn", "trtllm-native"],
},
"BatchPrefillWithPagedKVCacheWrapper": {
# NOTE: trtllm-native calls trtllm_batch_context_with_kv_cache
Expand All @@ -310,6 +311,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["fa2", "auto", "cudnn", "cudnn-native", "trtllm-gen", "trtllm-native"],
"10.3": ["fa2", "auto", "cudnn", "cudnn-native", "trtllm-gen", "trtllm-native"],
"12.0": ["fa2", "auto", "cudnn", "cudnn-native"],
"12.1": ["fa2", "auto", "cudnn", "cudnn-native"],
},
"BatchPrefillWithRaggedKVCacheWrapper": {
# NOTE: trtllm-native calls trtllm_ragged_attention_deepseek
Expand All @@ -322,6 +324,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["fa2", "cudnn", "cudnn-native", "cutlass", "trtllm-native"],
"10.3": ["fa2", "cudnn", "cudnn-native", "cutlass", "trtllm-native"],
"12.0": ["fa2", "cudnn", "cudnn-native"],
"12.1": ["fa2", "cudnn", "cudnn-native"],
},
"BatchMLAPagedAttentionWrapper": {
# NOTE: trtllm-native calls trtllm_batch_decode_with_kv_cache_mla
Expand All @@ -334,6 +337,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["fa2", "cutlass", "trtllm-native", "cute-dsl"],
"10.3": ["fa2", "cutlass", "trtllm-native"],
"12.0": ["fa2"],
"12.1": ["fa2"],
},
# GEMM
"gemm_fp8_nt_groupwise": {
Expand All @@ -345,6 +349,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cutlass"],
"10.3": ["cutlass"],
"12.0": [],
"12.1": [],
},
"group_gemm_fp8_nt_groupwise": {
"7.5": [],
Expand All @@ -355,6 +360,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cutlass"],
"10.3": ["cutlass"],
"12.0": [],
"12.1": [],
},
"bmm_fp8": {
"7.5": [],
Expand All @@ -365,6 +371,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cudnn", "cublas", "cutlass"],
"10.3": ["cudnn", "cublas", "cutlass"],
"12.0": ["cudnn", "cublas"],
"12.1": ["cudnn", "cublas"],
},
"bmm_mxfp8": {
"7.5": [],
Expand All @@ -375,6 +382,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cudnn"],
"10.3": ["cudnn"],
"12.0": [],
"12.1": [],
},
"mm_mxfp8": {
"7.5": [],
Expand All @@ -386,6 +394,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.3": ["cutlass", "cute-dsl", "trtllm"],
"11.0": ["cutlass"],
"12.0": [],
"12.1": [],
},
# Note: mm_fp4, mm_bf16, and bmm_bf16 use support checkers to filter backends, so they are not listed here
# MOE
Expand All @@ -398,6 +407,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["trtllm"],
"10.3": ["trtllm"],
"12.0": [],
"12.1": [],
},
"trtllm_fp8_block_scale_moe": {
"7.5": [],
Expand All @@ -408,6 +418,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["trtllm"],
"10.3": ["trtllm"],
"12.0": [],
"12.1": [],
},
"trtllm_fp8_per_tensor_scale_moe": {
"7.5": [],
Expand All @@ -418,6 +429,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["trtllm"],
"10.3": ["trtllm"],
"12.0": [],
"12.1": [],
},
"cutlass_fused_moe": {
"7.5": [],
Expand All @@ -427,7 +439,8 @@ def dtype_str_to_torch_dtype(dtype_str):
"9.0": [],
"10.0": ["cutlass"],
"10.3": ["cutlass"],
"12.0": [],
"12.0": ["cutlass"],
"12.1": ["cutlass"],
},
"cute_dsl_fp4_block_scale_moe": {
"7.5": [],
Expand All @@ -438,6 +451,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cute-dsl"],
"10.3": ["cute-dsl"],
"12.0": [],
"12.1": [],
},
# NORM
"rmsnorm": {
Expand All @@ -449,6 +463,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"rmsnorm_quant": {
"7.5": ["cuda"],
Expand All @@ -459,6 +474,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"fused_add_rmsnorm_quant": {
"7.5": ["cuda"],
Expand All @@ -469,6 +485,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
# NORM - FP4 Quantization (Blackwell SM100+ only, CuTe-DSL kernels)
"rmsnorm_fp4quant": {
Expand All @@ -480,6 +497,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cute-dsl"],
"10.3": ["cute-dsl"],
"12.0": ["cute-dsl"],
"12.1": ["cute-dsl"],
},
"add_rmsnorm_fp4quant": {
"7.5": [],
Expand All @@ -490,6 +508,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cute-dsl"],
"10.3": ["cute-dsl"],
"12.0": ["cute-dsl"],
"12.1": ["cute-dsl"],
},
# QUANTIZATION
"mxfp8_quantize": {
Expand All @@ -501,6 +520,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda", "cute-dsl"],
"10.3": ["cuda", "cute-dsl"],
"12.0": ["cuda", "cute-dsl"],
"12.1": ["cuda", "cute-dsl"],
},
"mxfp4_quantize": {
"7.5": [],
Expand All @@ -511,6 +531,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda", "cute-dsl"],
"10.3": ["cuda", "cute-dsl"],
"12.0": ["cuda", "cute-dsl"],
"12.1": ["cuda", "cute-dsl"],
},
"nvfp4_quantize": {
"7.5": [],
Expand All @@ -521,6 +542,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda", "cute-dsl"],
"10.3": ["cuda", "cute-dsl"],
"12.0": ["cuda", "cute-dsl"],
"12.1": ["cuda", "cute-dsl"],
},
"nvfp4_batched_quantize": {
"7.5": [],
Expand All @@ -531,6 +553,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
# SAMPLING
"softmax": {
Expand All @@ -542,6 +565,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"sampling_from_probs": {
"7.5": ["cuda"],
Expand All @@ -552,6 +576,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"sampling_from_logits": {
"7.5": ["cuda"],
Expand All @@ -562,6 +587,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_k_sampling_from_probs": {
"7.5": ["cuda"],
Expand All @@ -572,6 +598,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_p_sampling_from_probs": {
"7.5": ["cuda"],
Expand All @@ -582,6 +609,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_k_top_p_sampling_from_probs": {
"7.5": ["cuda"],
Expand All @@ -592,6 +620,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_k_top_p_sampling_from_logits": {
"7.5": ["cuda"],
Expand All @@ -602,6 +631,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"min_p_sampling_from_probs": {
"7.5": ["cuda"],
Expand All @@ -612,6 +642,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_k_renorm_probs": {
"7.5": ["cuda"],
Expand All @@ -622,6 +653,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_p_renorm_probs": {
"7.5": ["cuda"],
Expand All @@ -632,6 +664,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_k_mask_logits": {
"7.5": ["cuda"],
Expand All @@ -642,6 +675,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"chain_speculative_sampling": {
"7.5": ["cuda"],
Expand All @@ -652,6 +686,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_k": {
"7.5": ["cuda"],
Expand All @@ -662,6 +697,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_k_page_table_transform": {
"7.5": ["cuda"],
Expand All @@ -672,6 +708,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"top_k_ragged_transform": {
"7.5": ["cuda"],
Expand All @@ -682,6 +719,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
# ROPE
"apply_rope": {
Expand All @@ -693,6 +731,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"apply_rope_pos_ids": {
"7.5": ["cuda"],
Expand All @@ -703,6 +742,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"apply_llama31_rope": {
"7.5": ["cuda"],
Expand All @@ -713,6 +753,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"apply_llama31_rope_pos_ids": {
"7.5": ["cuda"],
Expand All @@ -723,6 +764,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"apply_rope_with_cos_sin_cache": {
"7.5": ["cuda"],
Expand All @@ -733,6 +775,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"mla_rope_quantize_fp8": {
"7.5": [],
Expand All @@ -743,6 +786,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"rope_quantize_fp8": {
"7.5": [],
Expand All @@ -753,6 +797,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
"rope_quantize_fp8_append_paged_kv_cache": {
"7.5": [],
Expand All @@ -763,6 +808,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.0": ["cuda"],
"10.3": ["cuda"],
"12.0": ["cuda"],
"12.1": ["cuda"],
},
# MAMBA
"selective_state_update": {
Expand All @@ -775,6 +821,7 @@ def dtype_str_to_torch_dtype(dtype_str):
"10.3": ["flashinfer", "triton"],
"11.0": ["flashinfer", "triton"],
"12.0": ["flashinfer", "triton"],
"12.1": ["flashinfer", "triton"],
},
}

Expand Down
Loading