flashinfer-ai · flashinfer-bot · Mar 6, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
@@ -368,8 +368,8 @@ def dtype_str_to_torch_dtype(dtype_str):
         "8.6": [],
         "8.9": [],
         "9.0": [],
-        "10.0": ["cutlass"],
-        "10.3": ["cutlass"],
+        "10.0": ["cutlass", "cute-dsl"],
+        "10.3": ["cutlass", "cute-dsl"],
         "11.0": ["cutlass"],
         "12.0": [],
     },

@@ -1091,7 +1091,11 @@ def testMmFp4(args):
                 b=mat2_fp4.T if backend != "trtllm" else mat2_fp4_trtllm.T,
                 a_descale=input_inv_s,
                 b_descale=mat2_inv_s.T if backend != "trtllm" else mat2_inv_s_trtllm.T,
-                alpha=alpha,
+                alpha=(
+                    torch.tensor([1.0], dtype=torch.float32, device=device)
+                    if (not use_nvfp4 and backend == "cute-dsl")
+                    else alpha
+                ),
                 out_dtype=res_dtype,
                 block_size=16
                 if use_nvfp4
@@ -1129,7 +1133,11 @@ def run_backend(
                 b=mat2_fp4.T if backend != "trtllm" else mat2_fp4_trtllm.T,
                 a_descale=input_inv_s,
                 b_descale=mat2_inv_s.T if backend != "trtllm" else mat2_inv_s_trtllm.T,
-                alpha=alpha,
+                alpha=(
+                    torch.tensor([1.0], dtype=torch.float32, device=device)
+                    if (not use_nvfp4 and backend == "cute-dsl")
+                    else alpha
+                ),
                 out_dtype=res_dtype,
                 block_size=block_size,
                 use_8x4_sf_layout=not use_128x4_sf_layout,
@@ -1289,9 +1297,7 @@ def testMmMxfp8(args):
     res_dtype = args.out_dtype
     is_cuda_graph_compatible = not args.no_cuda_graph
     run_refcheck = args.refcheck
-    autotune_supported_backends = [
-        "cutlass",
-    ]
+    autotune_supported_backends = ["cutlass", "cute-dsl"]
     res = []
 
     backends = filter_backends_by_compute_capability(backends, args.routine, device)
@@ -1344,7 +1350,7 @@ def testMmMxfp8(args):
         print(f"[VVERBOSE] {mat2_scale.dtype = }")
 
     def run_backend(backend, input_mxfp8, mat2_mxfp8, input_scale, mat2_scale):
-        if backend == "cutlass":
+        if backend in ["cutlass", "cute-dsl", "auto"]:
             return flashinfer.gemm.mm_mxfp8(
                 a=input_mxfp8,
                 b=mat2_mxfp8.t(),  # mm_mxfp8 expects b.t()