xinhe-nv
diff --git a/‎tensorrt_llm/_torch/auto_deploy/custom_ops/attention/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎tensorrt_llm/_torch/auto_deploy/custom_ops/attention/__init__.py‎
Lines changed: 2 additions & 1 deletion
@@ -22,7 +22,7 @@
 - trtllm_attention: TRT-LLM thop.attention-based optimized attention
 - triton_attention: Triton-based attention implementations
 - triton_attention_with_kv_cache: Triton attention with KV cache support
-- triton_attention_with_paged_kv_cache: Triton attention with paged KV cache
+- triton_paged_attention: Triton paged attention (two-stage flash-decode) with HND layout
 - onnx_attention: Placeholder ops for ONNX export of attention mechanisms
 """
 
@@ -34,5 +34,6 @@
     "triton_attention",
     "triton_attention_with_kv_cache",
     "triton_attention_with_paged_kv_cache",
+    "triton_paged_attention",
     "onnx_attention",
 ]