We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 0836be3 commit 006aea1Copy full SHA for 006aea1
vllm/v1/attention/backends/utils.py
@@ -528,7 +528,6 @@ def split_decodes_and_prefills(
528
# requests may have a query length of 0 but since they are padding its fine
529
# to treat them as decodes (ensures num_decodes matches the captured size)
530
if torch.all((query_lens == query_lens[0]) | (query_lens == 0)):
531
- assert num_reqs * query_lens[0] == num_tokens, "tokens not padded correctly"
532
return num_reqs, 0, num_tokens, 0 # all decodes
533
is_prefill = query_lens != query_lens[0]
534
else:
0 commit comments