[GDN] Address review feedback from Gemini

arpera · arpera · commit 57a19c7d03c0 · 2026-04-23T21:01:35.000+03:00
Signed-off-by: Artem Perevedentsev &lt;aperevedents@nvidia.com&gt;
diff --git a/vllm/model_executor/layers/mamba/gdn_linear_attn.py b/vllm/model_executor/layers/mamba/gdn_linear_attn.py
@@ -68,20 +68,6 @@
 logger = init_logger(__name__)
 
 
-def _has_cutlass_dsl_cu13() -> bool:
-    """Whether the CUDA-13 CuTe-DSL shared libs are installed.
-    """
-    try:
-        from importlib.metadata import distribution
-    except ImportError:
-        return False
-    try:
-        distribution("nvidia-cutlass-dsl-libs-cu13")
-    except Exception:
-        return False
-    return True
-
-
 def _should_use_flashinfer_gdn_prefill(
     backend: str, head_k_dim: int | None
 ) -> bool:
@@ -106,7 +92,9 @@ def _should_use_flashinfer_gdn_prefill(
         return False  # Neither Hopper nor Blackwell.
     if head_k_dim != 128:
         return False
-    if not _has_cutlass_dsl_cu13():
+    if current_platform.get_cuda_runtime_major() < 13:
+        return False
+    if not current_platform.has_cutlass_dsl_cu13():
         return False
     return True
 
@@ -121,7 +109,7 @@ def _log_gdn_backend_decision(
     device_cap = (
         str(current_platform.get_device_capability()) if is_cuda else "n/a"
     )
-    cutlass_dsl_cu13_installed = _has_cutlass_dsl_cu13()
+    cutlass_dsl_cu13_installed = current_platform.has_cutlass_dsl_cu13()
     logger.info_once(
         "GDN prefill backend inputs:\n"
         "  requested=%s\n"
@@ -202,6 +190,12 @@ def __init__(self, head_k_dim: int | None = None) -> None:
         backend = str(backend_cfg).strip().lower()
 
         use_flashinfer = _should_use_flashinfer_gdn_prefill(backend, head_k_dim)
+        if backend == "flashinfer" and not use_flashinfer:
+            logger.warning_once(
+                "GDN prefill backend 'flashinfer' is selected but "
+                "cannot use this kernel on the current platform. "
+                "Falling back to Triton/FLA."
+            )
         _log_gdn_backend_decision(backend, head_k_dim, use_flashinfer)
 
         self._forward_method = (
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -359,6 +359,25 @@ def is_device_capability_family(
             return False
         return (current_capability.to_int() // 10) == (capability // 10)
 
+    @classmethod
+    def get_cuda_runtime_major(cls) -> int:
+        """Major ``torch.version.cuda`` version, or ``0`` if undetermined."""
+        major = (torch.version.cuda or "0").split(".", 1)[0]
+        return int(major) if major.isdigit() else 0
+
+    @classmethod
+    def has_cutlass_dsl_cu13(cls) -> bool:
+        """Whether ``nvidia-cutlass-dsl-libs-cu13`` is installed."""
+        try:
+            from importlib.metadata import distribution
+        except ImportError:
+            return False
+        try:
+            distribution("nvidia-cutlass-dsl-libs-cu13")
+        except Exception:
+            return False
+        return True
+
     @classmethod
     def get_device_name(cls, device_id: int = 0) -> str:
         """Get the name of a device."""