Skip to content

Commit a060901

Browse files
committed
add release and resume mr
1 parent dd70beb commit a060901

File tree

1 file changed

+103
-37
lines changed

1 file changed

+103
-37
lines changed

docker/patch/latest/sglang.patch

Lines changed: 103 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py
2-
index 199885244..742ad0639 100644
2+
index 1998852..742ad06 100644
33
--- a/python/sglang/srt/disaggregation/decode.py
44
+++ b/python/sglang/srt/disaggregation/decode.py
55
@@ -314,6 +314,13 @@ class DecodePreallocQueue:
@@ -17,7 +17,7 @@ index 199885244..742ad0639 100644
1717
"""Add a request to the pending queue."""
1818
if self._check_if_req_exceed_kv_capacity(req):
1919
diff --git a/python/sglang/srt/disaggregation/mooncake/conn.py b/python/sglang/srt/disaggregation/mooncake/conn.py
20-
index 32e8c0b69..df913da7b 100644
20+
index 32e8c0b..df913da 100644
2121
--- a/python/sglang/srt/disaggregation/mooncake/conn.py
2222
+++ b/python/sglang/srt/disaggregation/mooncake/conn.py
2323
@@ -1079,6 +1079,19 @@ class MooncakeKVManager(CommonKVManager):
@@ -41,7 +41,7 @@ index 32e8c0b69..df913da7b 100644
4141
class MooncakeKVSender(CommonKVSender):
4242

4343
diff --git a/python/sglang/srt/disaggregation/prefill.py b/python/sglang/srt/disaggregation/prefill.py
44-
index ac11013f8..478e469f6 100644
44+
index ac11013..478e469 100644
4545
--- a/python/sglang/srt/disaggregation/prefill.py
4646
+++ b/python/sglang/srt/disaggregation/prefill.py
4747
@@ -309,6 +309,13 @@ class PrefillBootstrapQueue:
@@ -59,7 +59,7 @@ index ac11013f8..478e469f6 100644
5959
class SchedulerDisaggregationPrefillMixin:
6060
"""
6161
diff --git a/python/sglang/srt/distributed/parallel_state.py b/python/sglang/srt/distributed/parallel_state.py
62-
index 0478526ef..cfb1aa669 100644
62+
index 0478526..cfb1aa6 100644
6363
--- a/python/sglang/srt/distributed/parallel_state.py
6464
+++ b/python/sglang/srt/distributed/parallel_state.py
6565
@@ -1797,7 +1797,10 @@ def get_tensor_model_parallel_world_size():
@@ -75,7 +75,7 @@ index 0478526ef..cfb1aa669 100644
7575

7676
def get_pipeline_model_parallel_world_size():
7777
diff --git a/python/sglang/srt/entrypoints/engine.py b/python/sglang/srt/entrypoints/engine.py
78-
index 21909706b..8fac5f162 100644
78+
index 2190970..8fac5f1 100644
7979
--- a/python/sglang/srt/entrypoints/engine.py
8080
+++ b/python/sglang/srt/entrypoints/engine.py
8181
@@ -49,6 +49,7 @@ from sglang.srt.managers.io_struct import (
@@ -112,7 +112,7 @@ index 21909706b..8fac5f162 100644
112112
"""Get weights by parameter name."""
113113
obj = GetWeightsByNameReqInput(name=name, truncate_size=truncate_size)
114114
diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py
115-
index 88705cc35..c8dc052f1 100644
115+
index 88705cc..c8dc052 100644
116116
--- a/python/sglang/srt/entrypoints/http_server.py
117117
+++ b/python/sglang/srt/entrypoints/http_server.py
118118
@@ -107,6 +107,7 @@ from sglang.srt.managers.io_struct import (
@@ -146,7 +146,7 @@ index 88705cc35..c8dc052f1 100644
146146
@app.post("/update_weight_version")
147147
async def update_weight_version(obj: UpdateWeightVersionReqInput, request: Request):
148148
diff --git a/python/sglang/srt/layers/attention/nsa/nsa_indexer.py b/python/sglang/srt/layers/attention/nsa/nsa_indexer.py
149-
index c9e82e4b1..58270e34a 100644
149+
index c9e82e4..58270e3 100644
150150
--- a/python/sglang/srt/layers/attention/nsa/nsa_indexer.py
151151
+++ b/python/sglang/srt/layers/attention/nsa/nsa_indexer.py
152152
@@ -3,6 +3,7 @@ from __future__ import annotations
@@ -190,7 +190,7 @@ index c9e82e4b1..58270e34a 100644
190190
if enable_dual_stream:
191191
current_stream = torch.cuda.current_stream()
192192
diff --git a/python/sglang/srt/layers/layernorm.py b/python/sglang/srt/layers/layernorm.py
193-
index b07164c53..8e6722ce0 100644
193+
index b07164c..8e6722c 100644
194194
--- a/python/sglang/srt/layers/layernorm.py
195195
+++ b/python/sglang/srt/layers/layernorm.py
196196
@@ -83,15 +83,12 @@ class RMSNorm(MultiPlatformOp):
@@ -249,7 +249,7 @@ index b07164c53..8e6722ce0 100644
249249
hidden_size = x.shape[-1]
250250
if hidden_size != self.hidden_size:
251251
diff --git a/python/sglang/srt/layers/logits_processor.py b/python/sglang/srt/layers/logits_processor.py
252-
index fa7431048..cd33ea735 100644
252+
index fa74310..cd33ea7 100644
253253
--- a/python/sglang/srt/layers/logits_processor.py
254254
+++ b/python/sglang/srt/layers/logits_processor.py
255255
@@ -878,11 +878,6 @@ class LogitsProcessor(nn.Module):
@@ -265,7 +265,7 @@ index fa7431048..cd33ea735 100644
265265
logits = torch.matmul(
266266
hidden_states.to(lm_head.weight.dtype), lm_head.weight.T
267267
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
268-
index a1885fade..14d692365 100644
268+
index a1885fa..14d6923 100644
269269
--- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
270270
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
271271
@@ -14,6 +14,7 @@ import torch.nn.functional as F
@@ -289,7 +289,7 @@ index a1885fade..14d692365 100644
289289
intermediate_cache3.view(*intermediate_cache3.shape),
290290
out_hidden_states[begin_chunk_idx:end_chunk_idx],
291291
diff --git a/python/sglang/srt/layers/moe/routed_experts_capturer.py b/python/sglang/srt/layers/moe/routed_experts_capturer.py
292-
index 00bd68755..5a3ca8a67 100644
292+
index 00bd687..5a3ca8a 100644
293293
--- a/python/sglang/srt/layers/moe/routed_experts_capturer.py
294294
+++ b/python/sglang/srt/layers/moe/routed_experts_capturer.py
295295
@@ -1,5 +1,6 @@
@@ -360,7 +360,7 @@ index 00bd68755..5a3ca8a67 100644
360360

361361
def get_routed_experts(
362362
diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
363-
index c5e5a11fc..6b788fb1d 100644
363+
index c5e5a11..6b788fb 100644
364364
--- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
365365
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
366366
@@ -1016,13 +1016,38 @@ class CompressedTensorsWNA16MoEMethod(CompressedTensorsMoEMethod):
@@ -456,7 +456,7 @@ index c5e5a11fc..6b788fb1d 100644
456456
def create_moe_runner(
457457
self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
458458
diff --git a/python/sglang/srt/layers/rotary_embedding.py b/python/sglang/srt/layers/rotary_embedding.py
459-
index 56516b41b..cb2ebca60 100644
459+
index 56516b4..cb2ebca 100644
460460
--- a/python/sglang/srt/layers/rotary_embedding.py
461461
+++ b/python/sglang/srt/layers/rotary_embedding.py
462462
@@ -135,9 +135,7 @@ class RotaryEmbedding(MultiPlatformOp):
@@ -481,7 +481,7 @@ index 56516b41b..cb2ebca60 100644
481481
assert (
482482
fused_set_kv_buffer_arg is None
483483
diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py
484-
index 55bef5652..35ad68b1c 100644
484+
index 55bef56..35ad68b 100644
485485
--- a/python/sglang/srt/layers/sampler.py
486486
+++ b/python/sglang/srt/layers/sampler.py
487487
@@ -108,16 +108,11 @@ class Sampler(nn.Module):
@@ -505,7 +505,7 @@ index 55bef5652..35ad68b1c 100644
505505
if not get_global_server_args().sampling_backend == "ascend" or (
506506
return_logprob and not SGLANG_RETURN_ORIGINAL_LOGPROB
507507
diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py
508-
index 879e1bfa6..de52085fa 100644
508+
index 879e1bf..de52085 100644
509509
--- a/python/sglang/srt/managers/io_struct.py
510510
+++ b/python/sglang/srt/managers/io_struct.py
511511
@@ -1286,6 +1286,19 @@ class UpdateWeightsFromIPCReqOutput(BaseReq):
@@ -529,7 +529,7 @@ index 879e1bfa6..de52085fa 100644
529529
@dataclass
530530
class InitWeightsSendGroupForRemoteInstanceReqOutput(BaseReq):
531531
diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py
532-
index 468d8fb8a..229a9a2dc 100644
532+
index 468d8fb..229a9a2 100644
533533
--- a/python/sglang/srt/managers/schedule_batch.py
534534
+++ b/python/sglang/srt/managers/schedule_batch.py
535535
@@ -2181,7 +2181,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
@@ -543,7 +543,7 @@ index 468d8fb8a..229a9a2dc 100644
543543

544544

545545
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
546-
index bca1c31e6..0c82e37a4 100644
546+
index bca1c31..0c82e37 100644
547547
--- a/python/sglang/srt/managers/scheduler.py
548548
+++ b/python/sglang/srt/managers/scheduler.py
549549
@@ -97,6 +97,7 @@ from sglang.srt.managers.io_struct import (
@@ -563,7 +563,7 @@ index bca1c31e6..0c82e37a4 100644
563563
(ReleaseMemoryOccupationReqInput, self.release_memory_occupation),
564564
(ResumeMemoryOccupationReqInput, self.resume_memory_occupation),
565565
diff --git a/python/sglang/srt/managers/scheduler_output_processor_mixin.py b/python/sglang/srt/managers/scheduler_output_processor_mixin.py
566-
index e40586c24..32d98aee4 100644
566+
index e40586c..32d98ae 100644
567567
--- a/python/sglang/srt/managers/scheduler_output_processor_mixin.py
568568
+++ b/python/sglang/srt/managers/scheduler_output_processor_mixin.py
569569
@@ -10,6 +10,7 @@ from sglang.srt.disaggregation.utils import DisaggregationMode
@@ -575,7 +575,7 @@ index e40586c24..32d98aee4 100644
575575
AbortReq,
576576
BatchEmbeddingOutput,
577577
diff --git a/python/sglang/srt/managers/scheduler_update_weights_mixin.py b/python/sglang/srt/managers/scheduler_update_weights_mixin.py
578-
index 293a84350..68911c433 100644
578+
index 293a843..d0404db 100644
579579
--- a/python/sglang/srt/managers/scheduler_update_weights_mixin.py
580580
+++ b/python/sglang/srt/managers/scheduler_update_weights_mixin.py
581581
@@ -1,6 +1,7 @@
@@ -617,7 +617,7 @@ index 293a84350..68911c433 100644
617617

618618
def get_weights_by_name(self: Scheduler, recv_req: GetWeightsByNameReqInput):
619619
parameter = self.tp_worker.get_weights_by_name(recv_req)
620-
@@ -137,6 +148,13 @@ class SchedulerUpdateWeightsMixin:
620+
@@ -137,11 +148,19 @@ class SchedulerUpdateWeightsMixin:
621621
self.memory_saver_adapter.pause(GPU_MEMORY_TYPE_KV_CACHE)
622622
self.flush_cache()
623623

@@ -631,7 +631,18 @@ index 293a84350..68911c433 100644
631631
if GPU_MEMORY_TYPE_WEIGHTS in tags:
632632
self.stashed_model_static_state = _export_static_state(
633633
self.tp_worker.model_runner.model
634-
@@ -177,6 +195,13 @@ class SchedulerUpdateWeightsMixin:
634+
)
635+
torch.distributed.barrier(self.tp_cpu_group)
636+
+ self.tp_worker.model_runner.remote_instance_unregister_memory_region()
637+
self.memory_saver_adapter.pause(GPU_MEMORY_TYPE_WEIGHTS)
638+
639+
if GPU_MEMORY_TYPE_CUDA_GRAPH in tags:
640+
@@ -173,10 +192,18 @@ class SchedulerUpdateWeightsMixin:
641+
self.stashed_model_static_state,
642+
)
643+
del self.stashed_model_static_state
644+
+ self.tp_worker.model_runner.remote_instance_register_memory_region()
645+
635646
if GPU_MEMORY_TYPE_KV_CACHE in tags:
636647
self.memory_saver_adapter.resume(GPU_MEMORY_TYPE_KV_CACHE)
637648

@@ -646,7 +657,7 @@ index 293a84350..68911c433 100644
646657

647658
def check_weights(self: Scheduler, recv_req: CheckWeightsReqInput):
648659
diff --git a/python/sglang/srt/managers/tokenizer_communicator_mixin.py b/python/sglang/srt/managers/tokenizer_communicator_mixin.py
649-
index e5d42bed8..412293b30 100644
660+
index e5d42be..412293b 100644
650661
--- a/python/sglang/srt/managers/tokenizer_communicator_mixin.py
651662
+++ b/python/sglang/srt/managers/tokenizer_communicator_mixin.py
652663
@@ -49,6 +49,8 @@ from sglang.srt.managers.io_struct import (
@@ -698,7 +709,7 @@ index e5d42bed8..412293b30 100644
698709
self,
699710
obj: InitWeightsSendGroupForRemoteInstanceReqInput,
700711
diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
701-
index f4fc29e29..5ef12cca6 100644
712+
index f4fc29e..5ef12cc 100644
702713
--- a/python/sglang/srt/managers/tokenizer_manager.py
703714
+++ b/python/sglang/srt/managers/tokenizer_manager.py
704715
@@ -1652,12 +1652,13 @@ class TokenizerManager(TokenizerCommunicatorMixin, TokenizerManagerMultiItemMixi
@@ -722,7 +733,7 @@ index f4fc29e29..5ef12cca6 100644
722733
recv_obj.output_token_logprobs_val[recv_obj_index]
723734
)
724735
diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py
725-
index 1f1875254..51d8651ce 100644
736+
index 1f18752..51d8651 100644
726737
--- a/python/sglang/srt/managers/tp_worker.py
727738
+++ b/python/sglang/srt/managers/tp_worker.py
728739
@@ -27,6 +27,7 @@ from sglang.srt.managers.io_struct import (
@@ -746,7 +757,7 @@ index 1f1875254..51d8651ce 100644
746757
parameter = self.model_runner.get_weights_by_name(
747758
recv_req.name, recv_req.truncate_size
748759
diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
749-
index 1d69c0582..c849913e9 100644
760+
index 1d69c05..b516607 100644
750761
--- a/python/sglang/srt/model_executor/model_runner.py
751762
+++ b/python/sglang/srt/model_executor/model_runner.py
752763
@@ -558,7 +558,8 @@ class ModelRunner(ModelRunnerKVCacheMixin):
@@ -759,7 +770,45 @@ index 1d69c0582..c849913e9 100644
759770

760771
if self.device == "cuda":
761772
self.init_cublas()
762-
@@ -2224,11 +2225,19 @@ class ModelRunner(ModelRunnerKVCacheMixin):
773+
@@ -635,6 +636,37 @@ class ModelRunner(ModelRunnerKVCacheMixin):
774+
f"{local_ip}:{self.remote_instance_transfer_engine.get_rpc_port()}"
775+
)
776+
777+
+ def remote_instance_register_memory_region(self):
778+
+ if self.remote_instance_transfer_engine is None:
779+
+ return
780+
+
781+
+ logger.debug("Registering memory regions to transfer engine after memory saver resume")
782+
+ self.remote_instance_transfer_engine_weight_info = register_memory_region(
783+
+ self.model, self.remote_instance_transfer_engine
784+
+ )
785+
+
786+
+ def remote_instance_unregister_memory_region(self):
787+
+ if self.remote_instance_transfer_engine is None:
788+
+ return
789+
+
790+
+ logger.debug("Unregistering old memory regions from transfer engine")
791+
+ registered_blocks = []
792+
+ old_addrs = set()
793+
+ for name, (data_ptr, numel, element_size) in (
794+
+ self.remote_instance_transfer_engine_weight_info.items()
795+
+ ):
796+
+ if data_ptr not in old_addrs:
797+
+ old_addrs.add(data_ptr)
798+
+ registered_blocks.append((data_ptr, numel * element_size))
799+
+
800+
+ for addr, size in registered_blocks:
801+
+ try:
802+
+ self.remote_instance_transfer_engine.unregister_memory(addr)
803+
+ except Exception as e:
804+
+ logger.debug(f"Failed to unregister memory at {addr}: {e}")
805+
+
806+
+ self.remote_instance_transfer_engine_weight_info = None
807+
+
808+
def model_specific_adjustment(self):
809+
server_args = self.server_args
810+
811+
@@ -2224,11 +2256,19 @@ class ModelRunner(ModelRunnerKVCacheMixin):
763812
output.expert_distribution_metrics = recorder_outputs.get("metrics")
764813

765814
# Copy cached routing experts' buffers back to CPU cache
@@ -784,7 +833,7 @@ index 1d69c0582..c849913e9 100644
784833

785834
if self.eplb_manager is not None:
786835
self.eplb_manager.on_forward_pass_end()
787-
@@ -2436,6 +2445,42 @@ class ModelRunner(ModelRunnerKVCacheMixin):
836+
@@ -2436,6 +2476,42 @@ class ModelRunner(ModelRunnerKVCacheMixin):
788837
logger.error(f"IPC weight update failed: {e}")
789838
return False, str(e)
790839

@@ -828,7 +877,7 @@ index 1d69c0582..c849913e9 100644
828877
def _model_load_weights_direct(model, named_tensors: List[Tuple[str, torch.Tensor]]):
829878
params_dict = dict(model.named_parameters())
830879
diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py
831-
index 2918461d3..d44c8aaa0 100644
880+
index 2918461..d44c8aa 100644
832881
--- a/python/sglang/srt/models/deepseek_v2.py
833882
+++ b/python/sglang/srt/models/deepseek_v2.py
834883
@@ -2704,7 +2704,11 @@ class DeepseekV2AttentionMLA(nn.Module):
@@ -873,7 +922,7 @@ index 2918461d3..d44c8aaa0 100644
873922
if is_nextn and enable_nextn_moe_bf16_cast_to_fp8(self.quant_config):
874923
self._mark_nextn_moe_weights_as_ue8m0()
875924
diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py
876-
index a7dbadec6..c83a41338 100644
925+
index a7dbade..c83a413 100644
877926
--- a/python/sglang/srt/models/qwen2.py
878927
+++ b/python/sglang/srt/models/qwen2.py
879928
@@ -90,9 +90,6 @@ class Qwen2MLP(nn.Module):
@@ -911,7 +960,7 @@ index a7dbadec6..c83a41338 100644
911960
if get_global_server_args().rl_on_policy_target is not None
912961
else {}
913962
diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py
914-
index 3ad9f6736..0b9c7f499 100644
963+
index 3ad9f67..0b9c7f4 100644
915964
--- a/python/sglang/srt/models/qwen2_moe.py
916965
+++ b/python/sglang/srt/models/qwen2_moe.py
917966
@@ -586,7 +586,17 @@ class Qwen2MoeModel(nn.Module):
@@ -934,7 +983,7 @@ index 3ad9f6736..0b9c7f499 100644
934983
self.norm = PPMissingLayer(return_tuple=True)
935984

936985
diff --git a/python/sglang/srt/models/qwen3.py b/python/sglang/srt/models/qwen3.py
937-
index 9220831f6..47a1a4e4c 100644
986+
index 9220831..47a1a4e 100644
938987
--- a/python/sglang/srt/models/qwen3.py
939988
+++ b/python/sglang/srt/models/qwen3.py
940989
@@ -90,8 +90,8 @@ class Qwen3Attention(nn.Module):
@@ -960,7 +1009,7 @@ index 9220831f6..47a1a4e4c 100644
9601009
if get_global_server_args().rl_on_policy_target is not None
9611010
else {}
9621011
diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py
963-
index e11678a9e..e277d46f2 100644
1012+
index e11678a..e277d46 100644
9641013
--- a/python/sglang/srt/models/qwen3_moe.py
9651014
+++ b/python/sglang/srt/models/qwen3_moe.py
9661015
@@ -22,6 +22,7 @@ import math
@@ -1070,7 +1119,7 @@ index e11678a9e..e277d46f2 100644
10701119

10711120
self.layer_communicator = LayerCommunicator(
10721121
diff --git a/python/sglang/srt/models/qwen3_vl.py b/python/sglang/srt/models/qwen3_vl.py
1073-
index 891913078..c9dbecd23 100644
1122+
index 8919130..c9dbecd 100644
10741123
--- a/python/sglang/srt/models/qwen3_vl.py
10751124
+++ b/python/sglang/srt/models/qwen3_vl.py
10761125
@@ -397,28 +397,68 @@ class Qwen3VLMoeVisionModel(nn.Module, RotaryPosMixin):
@@ -1186,7 +1235,7 @@ index 891913078..c9dbecd23 100644
11861235
positions,
11871236
hidden_states,
11881237
diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
1189-
index 54d4e415a..de7620c20 100644
1238+
index 54d4e41..84831ad 100644
11901239
--- a/python/sglang/srt/server_args.py
11911240
+++ b/python/sglang/srt/server_args.py
11921241
@@ -523,6 +523,7 @@ class ServerArgs:
@@ -1209,8 +1258,25 @@ index 54d4e415a..de7620c20 100644
12091258
parser.add_argument(
12101259
"--disable-cuda-graph-padding",
12111260
action="store_true",
1261+
@@ -4972,11 +4978,11 @@ class ServerArgs:
1262+
f"Failed to import mooncake.engine. Does not support using TransferEngine as remote instance weight loader backend."
1263+
)
1264+
return False
1265+
- elif self.enable_memory_saver:
1266+
- logger.warning(
1267+
- "Memory saver is enabled, which is not compatible with TransferEngine. Does not support using TransferEngine as remote instance weight loader backend."
1268+
- )
1269+
- return False
1270+
+ # elif self.enable_memory_saver:
1271+
+ # logger.warning(
1272+
+ # "Memory saver is enabled, which is not compatible with TransferEngine. Does not support using TransferEngine as remote instance weight loader backend."
1273+
+ # )
1274+
+ # return False
1275+
else:
1276+
return True
1277+
12121278
diff --git a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
1213-
index 5fe45086c..c95fbd0f6 100644
1279+
index 5fe4508..c95fbd0 100644
12141280
--- a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
12151281
+++ b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
12161282
@@ -341,7 +341,10 @@ class EAGLEDraftCudaGraphRunner:
@@ -1237,7 +1303,7 @@ index 5fe45086c..c95fbd0f6 100644
12371303
self.req_pool_indices[:raw_bs].copy_(forward_batch.req_pool_indices)
12381304

12391305
diff --git a/python/sglang/srt/speculative/eagle_info.py b/python/sglang/srt/speculative/eagle_info.py
1240-
index 1bf3816e9..b5b41dba4 100644
1306+
index 1bf3816..b5b41db 100644
12411307
--- a/python/sglang/srt/speculative/eagle_info.py
12421308
+++ b/python/sglang/srt/speculative/eagle_info.py
12431309
@@ -778,6 +778,10 @@ class EagleDraftInput(SpecInput, EagleDraftInputV2Mixin):
@@ -1280,7 +1346,7 @@ index 1bf3816e9..b5b41dba4 100644
12801346

12811347
@dataclass
12821348
diff --git a/python/sglang/srt/speculative/eagle_worker.py b/python/sglang/srt/speculative/eagle_worker.py
1283-
index a702df4f8..61d9ae366 100644
1349+
index a702df4..61d9ae3 100644
12841350
--- a/python/sglang/srt/speculative/eagle_worker.py
12851351
+++ b/python/sglang/srt/speculative/eagle_worker.py
12861352
@@ -231,7 +231,7 @@ class EAGLEWorker(TpModelWorker):

0 commit comments

Comments
 (0)