undo faulty update

timmy-feng · timmy-feng · commit 9eb686abe5e6 · 2025-08-23T16:44:28.000-04:00
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
@@ -1764,6 +1764,7 @@ def run_batch(
 
                 model_worker_batch = batch.get_model_worker_batch()
                 if self.enable_overlap:
+                    # TODO (timmy): Do not alias seq_lens between forward and scheduler threads.
                     # Optimistically estimate the seq_lens_cpu for the next draft forward
                     model_worker_batch.seq_lens_cpu.add_(self.server_args.speculative_num_steps + 1)
 
diff --git a/python/sglang/srt/managers/scheduler_output_processor_mixin.py b/python/sglang/srt/managers/scheduler_output_processor_mixin.py
@@ -231,10 +231,6 @@ def process_batch_result_decode(
             self.token_to_kv_pool_allocator.free(free_cache_loc_cpu.to("cuda", non_blocking=True))
 
         if self.spec_algorithm.is_eagle():
-            # TODO (timmy): when does this happen?
-            if batch.seq_lens is not None:
-                batch.seq_lens.add_(logits_output.accept_length + 1)
-
             accept_length = logits_output.accept_length.tolist()
             idx_to_batch = [i for i, length in enumerate(accept_length) for _ in range(length + 1)]
         else:
diff --git a/python/sglang/srt/speculative/eagle_worker.py b/python/sglang/srt/speculative/eagle_worker.py
@@ -338,9 +338,6 @@ def forward_batch_speculative_generation(
                 )
             return logits_output, next_token_ids, None, bid, False, batch.spec_info
         else:
-            # Clone seq_lens because it will be modified in-place by verify
-            batch.seq_lens = batch.seq_lens.clone()
-
             with self.draft_tp_context(self.draft_model_runner.tp_group):
                 spec_info = self.draft(batch)
             logits_output, verify_output, can_run_cuda_graph = (