Skip to content

Commit 16aac38

Browse files
fix concurrency > 1
1 parent c7368e7 commit 16aac38

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

python/sglang/srt/managers/schedule_batch.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1663,6 +1663,14 @@ def filter_batch(
16631663
if self.multimodal_inputs is not None:
16641664
self.multimodal_inputs = [self.multimodal_inputs[i] for i in keep_indices]
16651665
self.req_pool_indices = self.req_pool_indices[keep_indices_device]
1666+
1667+
if self.spec_algorithm.is_eagle() and self.enable_overlap:
1668+
# In eagle overlap mode, seq_lens is mutated in the EagleWorkerClient's forward_stream,
1669+
# but we copy seq_lens in the scheduler's stream. This is a problem because seq_lens may
1670+
# not have been mutated by EagleWorkerClient before the scheduler stream starts making
1671+
# a copy of it. To avoid this, we synchronize all streams before copying seq_lens.
1672+
torch.cuda.synchronize()
1673+
16661674
self.seq_lens = self.seq_lens[keep_indices_device]
16671675
self.orig_seq_lens = self.orig_seq_lens[keep_indices_device]
16681676
self.out_cache_loc = None

0 commit comments

Comments
 (0)