Fix Import Error for Flash Attention V2 + Disable triton_tutorial_flash_v2 memory fault shape

Nick Riasanovsky · facebook-github-bot · commit 25855fad35f9 · 2025-03-21T16:37:03.000-07:00
Summary: Fixes an import error if your environment doesn't have flash attention v2 but you try and run all of the operators.

Reviewed By: dhruvak3

Differential Revision: D71598757

fbshipit-source-id: 850e4ded958414f644ed2a4276484d7735adf818
diff --git a/tritonbench/operators/flash_attention/operator.py b/tritonbench/operators/flash_attention/operator.py
@@ -67,15 +67,16 @@
     )
 
     from .test_fmha_utils import make_packed_qkv
+
+    HAS_FLASH_V2 = True
 except (ImportError, IOError, AttributeError):
-    pass
+    HAS_FLASH_V2 = False
 
 HAS_CUDA_124 = (
     torch.cuda.is_available() and torch.version.cuda and torch.version.cuda >= "12.4"
 )
 
 # [Optional] flash_attn v3
-HAS_FLASH_V3 = True
 try:
     torch_lib_path = os.path.join(os.path.dirname(__file__), "lib")
     with add_ld_library_path(torch_lib_path):
@@ -85,6 +86,8 @@
         from ai_codesign.gen_ai.flash_attention_v2.hopper.flash_attn_interface import (
             flash_attn_func as flash_attn_v3,
         )
+
+        HAS_FLASH_V3 = True
     except (ImportError, IOError, AttributeError):
         HAS_FLASH_V3 = False
 
@@ -244,7 +247,7 @@ def sdpa_flash_attention(q, k, v):
             v,
         )
 
-    @register_benchmark()
+    @register_benchmark(enabled=HAS_FLASH_V2)
     def flash_v2(
         self,
         q: torch.Tensor,
@@ -533,6 +536,10 @@ def get_ctx_vals():
             shapes = ctx_vals
         requires_grad = True
         for shape in shapes:
+            if torch.version.hip is not None and shape == (4, 32, 1, 128):
+                # AMD ROCm has an issue running triton_tutorial_flash_v2
+                # on shape (4, 32, 1, 128). Skip it for now.
+                continue
             BATCH, H, N_CTX, D_HEAD = shape
             q = torch.randn(
                 (BATCH, H, N_CTX, D_HEAD),