File tree Expand file tree Collapse file tree
python/sglang/srt/layers/attention Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1756,14 +1756,26 @@ def init_forward_metadata_replay_cuda_graph(
17561756 kv_indptr_view = self .decode_cuda_graph_metadata [
17571757 "flashinfer_kv_indptr"
17581758 ][: sparse_bs + 1 ]
1759+ kv_indptr_view [0 ] = 0
1760+ if sparse_real_bs > 0 :
1761+ actual_seqlens = metadata .sparse_cache_seqlens_int32 [
1762+ :sparse_real_bs
1763+ ].clone ()
1764+ actual_seqlens = torch .clamp (
1765+ actual_seqlens , max = self .num_sparse_topk_tokens
1766+ )
1767+ kv_indptr_view [1 : sparse_real_bs + 1 ] = torch .cumsum (
1768+ actual_seqlens , dim = 0
1769+ )
1770+ kv_indptr_view [sparse_real_bs :].fill_ (kv_indptr_view [sparse_real_bs ])
1771+
17591772 # kv_indices only needs num_sparse_topk_tokens per batch
17601773 kv_indices_view = self .decode_cuda_graph_metadata [
17611774 "flashinfer_kv_indices"
17621775 ][: sparse_bs * self .num_sparse_topk_tokens ]
17631776 kv_last_page_len_view = self .decode_cuda_graph_metadata [
17641777 "flashinfer_kv_last_page_len"
17651778 ][:sparse_bs ]
1766- kv_indptr_view [sparse_real_bs :].fill_ (kv_indptr_view [- 1 ])
17671779 kv_last_page_len_view [sparse_real_bs :].fill_ (0 )
17681780
17691781 # Retrieve the wrapper stored during capture
You can’t perform that action at this time.
0 commit comments