File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 66
77from vllm .logger import init_logger
88from vllm .model_executor .models import ModelRegistry
9- from vllm .platforms import current_platform
109from vllm .utils .math_utils import cdiv , round_up
1110from vllm .utils .torch_utils import STR_DTYPE_TO_TORCH_DTYPE
1211from vllm .v1 .attention .backends .registry import AttentionBackendEnum
@@ -148,17 +147,6 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
148147 ).page_size_bytes
149148 else :
150149 kernel_block_alignment_size = 16
151- if (
152- current_platform .is_device_capability_family (100 )
153- and model_config .get_head_size () == 256
154- and (
155- attention_config .backend is None
156- or attention_config .backend == AttentionBackendEnum .FLASHINFER
157- )
158- ):
159- # https://github.com/flashinfer-ai/flashinfer/issues/1993 reports that`
160- # head size 256 and block size 16 is not supported on blackwell.
161- kernel_block_alignment_size = 32
162150 attn_page_size_1_token = FullAttentionSpec (
163151 block_size = 1 ,
164152 num_kv_heads = model_config .get_num_kv_heads (parallel_config ),
Original file line number Diff line number Diff line change @@ -630,15 +630,6 @@ def __init__(
630630 self .paged_kv_indices = self ._make_buffer (max_num_pages )
631631 self .paged_kv_last_page_len = self ._make_buffer (max_num_reqs )
632632
633- if self .head_dim == 256 and current_platform .is_device_capability_family (100 ):
634- # https://github.com/flashinfer-ai/flashinfer/issues/1993 reports that
635- # head size 256 and block size 16 is not supported on blackwell.
636- assert kv_cache_spec .block_size != 16 , (
637- "There is a bug in FlashInfer "
638- "block_size 16 head size 256 support. Please avoid this combination by "
639- "passing --block-size 32 or --block-size 64."
640- )
641-
642633 def _make_buffer (
643634 self , * size : int | torch .SymInt , dtype : torch .dtype = torch .int32
644635 ) -> CpuGpuBuffer :
You can’t perform that action at this time.
0 commit comments