| example_dequant_gemv_fp16xint4 |
0.0052611 |
0.00556904 |
0.944705 |
| example_mha_fwd_bhsd |
0.009312 |
0.009664 |
0.963576 |
| example_mha_fwd_bshd_wgmma_pipelined |
0.014688 |
0.015104 |
0.972458 |
| example_per_token_cast_to_fp8 |
0.00885861 |
0.0091083 |
0.972587 |
| example_dequant_gemm_bf16_mxfp4_hopper |
0.011872 |
0.012096 |
0.981481 |
| example_gqa_sink_fwd_bhsd_wgmma_pipelined |
0.013135 |
0.0133685 |
0.982534 |
| example_tilelang_nsa_decode |
0.00833827 |
0.00848195 |
0.98306 |
| example_mha_bwd_bshd |
0.0323905 |
0.032925 |
0.983766 |
| example_dequant_gemm_fp4_hopper |
0.010944 |
0.011104 |
0.985591 |
| example_mha_sink_fwd_bhsd |
0.0112312 |
0.0113914 |
0.985937 |
| example_mha_sink_bwd_bhsd_sliding_window |
0.0250769 |
0.0253728 |
0.988338 |
| example_mha_fwd_bhsd_wgmma_pipelined |
0.015296 |
0.015456 |
0.989648 |
| example_warp_specialize_gemm_copy_0_gemm_1 |
0.03088 |
0.031168 |
0.99076 |
| example_gemm_intrinsics |
0.027712 |
0.027904 |
0.993119 |
| sparse_mla_fwd_pipelined |
0.0984533 |
0.0987996 |
0.996495 |
| example_vertical_slash_sparse_attn |
0.00148107 |
0.00148577 |
0.996837 |
| example_tilelang_gemm_fp8_intrinsic |
0.005984 |
0.006 |
0.997333 |
| example_mha_sink_fwd_bhsd_wgmma_pipelined |
0.0149988 |
0.0150285 |
0.998024 |
| example_gemm |
0.017281 |
0.017312 |
0.998209 |
| example_fusedmoe_tilelang |
0.217889 |
0.218217 |
0.998497 |
| example_tilelang_gemm_fp8 |
0.01376 |
0.013776 |
0.998839 |
| example_mha_sink_fwd_bhsd_sliding_window |
0.0113744 |
0.0113844 |
0.999122 |
| example_mha_sink_bwd_bhsd |
0.0410323 |
0.0410627 |
0.99926 |
| example_gqa_bwd |
0.0362193 |
0.0362422 |
0.999368 |
| example_group_per_split_token_cast_to_fp8 |
0.0100313 |
0.0100362 |
0.999512 |
| example_gemv |
0.0481227 |
0.048128 |
0.99989 |
| example_linear_attn_bwd |
0.114526 |
0.114538 |
0.999895 |
| sparse_mla_bwd |
0.247906 |
0.247916 |
0.99996 |
| example_gemm_autotune |
0.020544 |
0.020544 |
1 |
| example_dequant_gemm_w4a8 |
0.00624 |
0.00624 |
1 |
| block_sparse_attn_tilelang |
0.0112801 |
0.0112754 |
1.00042 |
| topk_selector |
0.0449657 |
0.044939 |
1.00059 |
| example_tilelang_gemm_splitk_vectorize_atomicadd |
0.0413707 |
0.0413452 |
1.00062 |
| tilelang_example_sparse_tensorcore |
0.0133828 |
0.0133715 |
1.00085 |
| example_convolution_autotune |
0.69616 |
0.695552 |
1.00087 |
| example_linear_attn_fwd |
0.0277008 |
0.0276757 |
1.00091 |
| example_mha_inference |
0.0652448 |
0.0651648 |
1.00123 |
| example_tilelang_gemm_splitk |
0.0402086 |
0.0401445 |
1.0016 |
| example_dequant_groupedgemm_bf16_mxfp4_hopper |
0.0145726 |
0.0145491 |
1.00162 |
| example_mha_fwd_varlen |
1.43004 |
1.42759 |
1.00172 |
| example_tilelang_nsa_fwd |
0.00870593 |
0.00868398 |
1.00253 |
| example_dequant_gemm_bf16_mxfp4_hopper_tma |
0.01168 |
0.011648 |
1.00275 |
| example_topk |
0.010048 |
0.010016 |
1.00319 |
| example_mha_sink_fwd_bhsd_wgmma_pipelined_sliding_window |
0.0147675 |
0.0147171 |
1.00342 |
| example_gqa_bwd_wgmma_pipelined |
0.0461091 |
0.0459494 |
1.00348 |
| example_mha_bwd_bhsd |
0.0310782 |
0.03097 |
1.00349 |
| example_gqa_sink_bwd_bhsd |
0.0315594 |
0.0314257 |
1.00425 |
| example_blocksparse_gemm |
0.020372 |
0.0202743 |
1.00482 |
| example_gqa_sink_fwd_bhsd_wgmma_pipelined_sliding_window |
0.0132417 |
0.0131672 |
1.00566 |
| example_mla_decode |
0.327616 |
0.32576 |
1.0057 |
| example_gqa_bwd_tma_reduce_varlen |
0.0477759 |
0.0474919 |
1.00598 |
| example_mha_fwd_bshd |
0.023712 |
0.023552 |
1.00679 |
| fp8_lighting_indexer |
0.0293298 |
0.0291277 |
1.00694 |
| example_gqa_decode |
0.043808 |
0.043488 |
1.00736 |
| example_tilelang_gemm_fp8_2xAcc |
0.135392 |
0.134336 |
1.00786 |
| example_gqa_fwd_bshd |
0.056896 |
0.056448 |
1.00794 |
| example_warp_specialize_gemm_barrierpipe_stage2 |
0.031456 |
0.031136 |
1.01028 |
| example_warp_specialize_gemm_copy_1_gemm_0 |
0.030368 |
0.030048 |
1.01065 |
| example_gemm_schedule |
0.0276354 |
0.0273229 |
1.01144 |
| example_convolution |
0.94832 |
0.937536 |
1.0115 |
| example_tilelang_sparse_gqa_decode_varlen_mask |
0.0211596 |
0.0208851 |
1.01314 |
| example_elementwise_add |
0.0193343 |
0.0190617 |
1.0143 |
| example_dynamic |
0.023712 |
0.02336 |
1.01507 |
| example_mha_bwd_bshd_wgmma_pipelined |
0.0221314 |
0.0218026 |
1.01508 |
| sparse_mla_fwd |
0.358282 |
0.351806 |
1.01841 |
| example_gqa_sink_bwd_bhsd_sliding_window |
0.0203378 |
0.0199598 |
1.01894 |
| example_dequant_gemm_bf16_fp4_hopper |
0.010816 |
0.010592 |
1.02115 |
| example_gqa_fwd_bshd_wgmma_pipelined |
0.046112 |
0.04512 |
1.02199 |
| example_tilelang_block_sparse_attn |
0.00847182 |
0.0081466 |
1.03992 |
| example_tilelang_sparse_gqa_decode_varlen_indice |
0.0162253 |
0.013691 |
1.18511 |