flashinfer-ai · fdz-1999 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025 · Feb 3, 2026
@@ -76,6 +76,22 @@ struct RaggedParams {
   __host__ __device__ __forceinline__ uint32_t get_kv_len(uint32_t batch_idx) const {
     return kv_indptr[batch_idx + 1] - kv_indptr[batch_idx];
   }
+
+  __host__ __device__ __forceinline__ uint32_t get_q_block_expanding_offset(uint32_t batch_idx) const {
+{% if 'maybe_q_block_expanding_offset' in additional_params_decl %}
+    return (maybe_q_block_expanding_offset != nullptr) ? maybe_q_block_expanding_offset[batch_idx] : 0;
+{% else %}
+    return 0;
+{% endif %}
+  }
+
+  __host__ __device__ __forceinline__ uint32_t get_kv_block_expanding_offset(uint32_t batch_idx) const {
+{% if 'maybe_kv_block_expanding_offset' in additional_params_decl %}
+    return (maybe_kv_block_expanding_offset != nullptr) ? maybe_kv_block_expanding_offset[batch_idx] : 0;
+{% else %}
+    return 0;
+{% endif %}
+  }
 };
 
 struct PagedParams {
@@ -116,6 +132,22 @@ struct PagedParams {
   __host__ __device__ __forceinline__ uint32_t get_kv_len(uint32_t batch_idx) const {
     return paged_kv.get_length(batch_idx);
   }
+
+  __host__ __device__ __forceinline__ uint32_t get_q_block_expanding_offset(uint32_t batch_idx) const {
+{% if 'maybe_q_block_expanding_offset' in additional_params_decl %}
+    return (maybe_q_block_expanding_offset != nullptr) ? maybe_q_block_expanding_offset[batch_idx] : 0;
+{% else %}
+    return 0;
+{% endif %}
+  }
+
+  __host__ __device__ __forceinline__ uint32_t get_kv_block_expanding_offset(uint32_t batch_idx) const {
+{% if 'maybe_kv_block_expanding_offset' in additional_params_decl %}
+    return (maybe_kv_block_expanding_offset != nullptr) ? maybe_kv_block_expanding_offset[batch_idx] : 0;
+{% else %}
+    return 0;
+{% endif %}
+  }
 };
 
 {{ variant_decl }}
@@ -66,6 +66,31 @@ struct RaggedParams {
   int window_left;
 
   bool causal;
+
+  // Block Expanding support
+  __host__ __device__ __forceinline__ uint32_t get_qo_len(uint32_t batch_idx) const {
+    return qo_lens[batch_idx];
+  }
+
+  __host__ __device__ __forceinline__ uint32_t get_kv_len(uint32_t batch_idx) const {
+    return kv_lens[batch_idx];
+  }
+
+  __host__ __device__ __forceinline__ uint32_t get_q_block_expanding_offset(uint32_t batch_idx) const {
+{% if 'maybe_q_block_expanding_offset' in additional_params_decl %}
+    return (additional_params.maybe_q_block_expanding_offset != nullptr) ? additional_params.maybe_q_block_expanding_offset[batch_idx] : 0;
+{% else %}
+    return 0;
+{% endif %}
+  }
+
+  __host__ __device__ __forceinline__ uint32_t get_kv_block_expanding_offset(uint32_t batch_idx) const {
+{% if 'maybe_kv_block_expanding_offset' in additional_params_decl %}
+    return (additional_params.maybe_kv_block_expanding_offset != nullptr) ? additional_params.maybe_kv_block_expanding_offset[batch_idx] : 0;
+{% else %}
+    return 0;
+{% endif %}
+  }
 };
 
 struct PagedParams {
@@ -117,6 +142,31 @@ struct PagedParams {
   int window_left;
 
   bool causal;
+
+  // Block Expanding support
+  __host__ __device__ __forceinline__ uint32_t get_qo_len(uint32_t batch_idx) const {
+    return qo_lens[batch_idx];
+  }
+
+  __host__ __device__ __forceinline__ uint32_t get_kv_len(uint32_t batch_idx) const {
+    return kv_lens[batch_idx];
+  }
+
+  __host__ __device__ __forceinline__ uint32_t get_q_block_expanding_offset(uint32_t batch_idx) const {
+{% if 'maybe_q_block_expanding_offset' in additional_params_decl %}
+    return (additional_params.maybe_q_block_expanding_offset != nullptr) ? additional_params.maybe_q_block_expanding_offset[batch_idx] : 0;
+{% else %}
+    return 0;
+{% endif %}
+  }
+
+  __host__ __device__ __forceinline__ uint32_t get_kv_block_expanding_offset(uint32_t batch_idx) const {
+{% if 'maybe_kv_block_expanding_offset' in additional_params_decl %}
+    return (additional_params.maybe_kv_block_expanding_offset != nullptr) ? additional_params.maybe_kv_block_expanding_offset[batch_idx] : 0;
+{% else %}
+    return 0;
+{% endif %}
+  }
 };
 
 {{ variant_decl }}
@@ -67,6 +67,26 @@ struct Params {
   __host__ __device__ __forceinline__ uint32_t get_kv_len(uint32_t batch_idx) const {
     return kv_len;
   }
+
+  // SinglePrefill: q_block_expanding_offset support
+  // If q_block_expanding_offset parameter is provided, use it; otherwise return 0
+  __host__ __device__ __forceinline__ uint32_t get_q_block_expanding_offset(uint32_t batch_idx) const {
+{% if has_q_block_expanding_offset %}
+    return static_cast<uint32_t>(q_block_expanding_offset);
+{% else %}
+    return 0;
+{% endif %}
+  }
+
+  // SinglePrefill: kv_block_expanding_offset support (for Cascade Current Chunk)
+  // If kv_block_expanding_offset parameter is provided, use it; otherwise return 0
+  __host__ __device__ __forceinline__ uint32_t get_kv_block_expanding_offset(uint32_t batch_idx) const {
+{% if has_kv_block_expanding_offset %}
+    return static_cast<uint32_t>(kv_block_expanding_offset);
+{% else %}
+    return 0;
+{% endif %}
+  }
 };
 
 {{ variant_decl }}
@@ -62,6 +62,33 @@ struct Params {
   int window_left;
 
   bool causal;
+
+  // Block Expanding support
+  __host__ __device__ __forceinline__ uint32_t get_qo_len(uint32_t batch_idx) const {
+    return qo_len;
+  }
+
+  __host__ __device__ __forceinline__ uint32_t get_kv_len(uint32_t batch_idx) const {
+    return kv_len;
+  }
+
+  // SinglePrefill: q_block_expanding_offset support
+  __host__ __device__ __forceinline__ uint32_t get_q_block_expanding_offset(uint32_t batch_idx) const {
+{% if has_q_block_expanding_offset %}
+    return static_cast<uint32_t>(additional_params.q_block_expanding_offset);
+{% else %}
+    return 0;
+{% endif %}
+  }
+
+  // SinglePrefill: kv_block_expanding_offset support (for Cascade Current Chunk)
+  __host__ __device__ __forceinline__ uint32_t get_kv_block_expanding_offset(uint32_t batch_idx) const {
+{% if has_kv_block_expanding_offset %}
+    return static_cast<uint32_t>(additional_params.kv_block_expanding_offset);
+{% else %}
+    return 0;
+{% endif %}
+  }
 };
 
-{{ variant_decl }}
+{{ variant_decl }}
@@ -20,6 +20,7 @@
 from .version import __version__ as __version__
 from .version import __git_version__ as __git_version__
 
+from . import dllm as dllm
 
 from . import jit as jit
 from .activation import gelu_and_mul as gelu_and_mul

@@ -0,0 +1,37 @@
+from .block_extend import (
+    block_extend_attention_with_offset,
+    block_extend_cascade,
+    get_block_extend_module_with_offset,
+    BLOCK_EXTEND_V2_WITH_OFFSET_VARIANT_DECL,
+    BLOCK_EXTEND_V3_WITH_OFFSET_VARIANT_DECL,
+)
+
+from .batch_block_extend import (
+    BatchBlockExtendPagedOffsetWrapper,
+    BatchBlockExtendRaggedOffsetWrapper,
+    batch_block_extend_cascade,
+    sglang_style_cascade_attention,
+    _BATCH_BE_OFFSET_VARIANT_DECL,
+    _BATCH_BE_OFFSET_VARIANT_DECL_FA3,
+    _check_batch_be_aot_available,
+    _get_batch_be_aot_path,
+    _get_batch_be_module_uri,
+)
+
+__all__ = [
+    # Single Prefill with offset (FA2/FA3 auto-select)
+    "block_extend_attention_with_offset",
+    "get_block_extend_module_with_offset",
+    "BLOCK_EXTEND_V2_WITH_OFFSET_VARIANT_DECL",
+    "BLOCK_EXTEND_V3_WITH_OFFSET_VARIANT_DECL",
+    # Cascade + block extend (SGLang 风格: causal + merge_state)
+    "block_extend_cascade",
+    "batch_block_extend_cascade",
+    "sglang_style_cascade_attention",
+    # Batch Prefill with offset versions
+    "BatchBlockExtendPagedOffsetWrapper",
+    "BatchBlockExtendRaggedOffsetWrapper",
+    # Batch Offset variant declarations
+    "_BATCH_BE_OFFSET_VARIANT_DECL",
+    "_BATCH_BE_OFFSET_VARIANT_DECL_FA3",
+]