File tree Expand file tree Collapse file tree
vllm_rbln/v1/attention/backends Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1203,7 +1203,7 @@ def build(
12031203 query_start_loc = query_start_loc ,
12041204 max_seq_len = query_max_seq_len ,
12051205 seq_lens = seq_lens_tensor .to (self .device )
1206- if not self .is_batch_attention_opt or is_prefills [0 ]
1206+ if not self .is_batch_attention_opt or is_prefills [0 ] or batch_pad <= 1
12071207 else seq_idx .to (self .device ),
12081208 block_tables = block_tables_tensor .to (self .device ),
12091209 slot_mapping = slot_mapping ,
@@ -1437,15 +1437,15 @@ def forward(
14371437 value ,
14381438 kv_cache ,
14391439 attn_metadata .cache_seq_lens .to (torch .int32 )
1440- if self .is_batch_attention_opt
1440+ if self .is_batch_attention_opt and b_size > 1
14411441 else attn_metadata .cache_seq_lens ,
14421442 attn_metadata .cache_offsets ,
14431443 self .scale ,
14441444 attn_metadata .local_block_tables ,
14451445 self .scale , # dummy
14461446 ]
14471447 if not envs .VLLM_RBLN_USE_CUSTOM_KERNEL :
1448- if self .is_batch_attention_opt :
1448+ if self .is_batch_attention_opt and b_size > 1 :
14491449 decode_args .append (attn_metadata .swa_attn_masks )
14501450 else :
14511451 decode_args .append (None )
You can’t perform that action at this time.
0 commit comments