Skip to content

Commit d979e00

Browse files
author
Rui Wang
committed
[Fix] minicpm flashinfer backend: fix begin_forward args
1 parent 915b0b6 commit d979e00

1 file changed

Lines changed: 13 additions & 1 deletion

File tree

python/sglang/srt/layers/attention/minicpm_backend.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1756,14 +1756,26 @@ def init_forward_metadata_replay_cuda_graph(
17561756
kv_indptr_view = self.decode_cuda_graph_metadata[
17571757
"flashinfer_kv_indptr"
17581758
][: sparse_bs + 1]
1759+
kv_indptr_view[0] = 0
1760+
if sparse_real_bs > 0:
1761+
actual_seqlens = metadata.sparse_cache_seqlens_int32[
1762+
:sparse_real_bs
1763+
].clone()
1764+
actual_seqlens = torch.clamp(
1765+
actual_seqlens, max=self.num_sparse_topk_tokens
1766+
)
1767+
kv_indptr_view[1 : sparse_real_bs + 1] = torch.cumsum(
1768+
actual_seqlens, dim=0
1769+
)
1770+
kv_indptr_view[sparse_real_bs:].fill_(kv_indptr_view[sparse_real_bs])
1771+
17591772
# kv_indices only needs num_sparse_topk_tokens per batch
17601773
kv_indices_view = self.decode_cuda_graph_metadata[
17611774
"flashinfer_kv_indices"
17621775
][: sparse_bs * self.num_sparse_topk_tokens]
17631776
kv_last_page_len_view = self.decode_cuda_graph_metadata[
17641777
"flashinfer_kv_last_page_len"
17651778
][:sparse_bs]
1766-
kv_indptr_view[sparse_real_bs:].fill_(kv_indptr_view[-1])
17671779
kv_last_page_len_view[sparse_real_bs:].fill_(0)
17681780

17691781
# Retrieve the wrapper stored during capture

0 commit comments

Comments
 (0)