11diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py
2- index 199885244..742ad0639 100644
2+ index 1998852..742ad06 100644
33--- a/python/sglang/srt/disaggregation/decode.py
44+++ b/python/sglang/srt/disaggregation/decode.py
55@@ -314,6 +314,13 @@ class DecodePreallocQueue:
@@ -17,7 +17,7 @@ index 199885244..742ad0639 100644
1717 """Add a request to the pending queue."""
1818 if self._check_if_req_exceed_kv_capacity(req):
1919diff --git a/python/sglang/srt/disaggregation/mooncake/conn.py b/python/sglang/srt/disaggregation/mooncake/conn.py
20- index 32e8c0b69..df913da7b 100644
20+ index 32e8c0b..df913da 100644
2121--- a/python/sglang/srt/disaggregation/mooncake/conn.py
2222+++ b/python/sglang/srt/disaggregation/mooncake/conn.py
2323@@ -1079,6 +1079,19 @@ class MooncakeKVManager(CommonKVManager):
@@ -41,7 +41,7 @@ index 32e8c0b69..df913da7b 100644
4141 class MooncakeKVSender(CommonKVSender):
4242
4343diff --git a/python/sglang/srt/disaggregation/prefill.py b/python/sglang/srt/disaggregation/prefill.py
44- index ac11013f8..478e469f6 100644
44+ index ac11013..478e469 100644
4545--- a/python/sglang/srt/disaggregation/prefill.py
4646+++ b/python/sglang/srt/disaggregation/prefill.py
4747@@ -309,6 +309,13 @@ class PrefillBootstrapQueue:
@@ -59,7 +59,7 @@ index ac11013f8..478e469f6 100644
5959 class SchedulerDisaggregationPrefillMixin:
6060 """
6161diff --git a/python/sglang/srt/distributed/parallel_state.py b/python/sglang/srt/distributed/parallel_state.py
62- index 0478526ef..cfb1aa669 100644
62+ index 0478526..cfb1aa6 100644
6363--- a/python/sglang/srt/distributed/parallel_state.py
6464+++ b/python/sglang/srt/distributed/parallel_state.py
6565@@ -1797,7 +1797,10 @@ def get_tensor_model_parallel_world_size():
@@ -75,7 +75,7 @@ index 0478526ef..cfb1aa669 100644
7575
7676 def get_pipeline_model_parallel_world_size():
7777diff --git a/python/sglang/srt/entrypoints/engine.py b/python/sglang/srt/entrypoints/engine.py
78- index 21909706b..8fac5f162 100644
78+ index 2190970..8fac5f1 100644
7979--- a/python/sglang/srt/entrypoints/engine.py
8080+++ b/python/sglang/srt/entrypoints/engine.py
8181@@ -49,6 +49,7 @@ from sglang.srt.managers.io_struct import (
@@ -112,7 +112,7 @@ index 21909706b..8fac5f162 100644
112112 """Get weights by parameter name."""
113113 obj = GetWeightsByNameReqInput(name=name, truncate_size=truncate_size)
114114diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py
115- index 88705cc35..c8dc052f1 100644
115+ index 88705cc..c8dc052 100644
116116--- a/python/sglang/srt/entrypoints/http_server.py
117117+++ b/python/sglang/srt/entrypoints/http_server.py
118118@@ -107,6 +107,7 @@ from sglang.srt.managers.io_struct import (
@@ -146,7 +146,7 @@ index 88705cc35..c8dc052f1 100644
146146 @app.post("/update_weight_version")
147147 async def update_weight_version(obj: UpdateWeightVersionReqInput, request: Request):
148148diff --git a/python/sglang/srt/layers/attention/nsa/nsa_indexer.py b/python/sglang/srt/layers/attention/nsa/nsa_indexer.py
149- index c9e82e4b1..58270e34a 100644
149+ index c9e82e4..58270e3 100644
150150--- a/python/sglang/srt/layers/attention/nsa/nsa_indexer.py
151151+++ b/python/sglang/srt/layers/attention/nsa/nsa_indexer.py
152152@@ -3,6 +3,7 @@ from __future__ import annotations
@@ -190,7 +190,7 @@ index c9e82e4b1..58270e34a 100644
190190 if enable_dual_stream:
191191 current_stream = torch.cuda.current_stream()
192192diff --git a/python/sglang/srt/layers/layernorm.py b/python/sglang/srt/layers/layernorm.py
193- index b07164c53..8e6722ce0 100644
193+ index b07164c..8e6722c 100644
194194--- a/python/sglang/srt/layers/layernorm.py
195195+++ b/python/sglang/srt/layers/layernorm.py
196196@@ -83,15 +83,12 @@ class RMSNorm(MultiPlatformOp):
@@ -249,7 +249,7 @@ index b07164c53..8e6722ce0 100644
249249 hidden_size = x.shape[-1]
250250 if hidden_size != self.hidden_size:
251251diff --git a/python/sglang/srt/layers/logits_processor.py b/python/sglang/srt/layers/logits_processor.py
252- index fa7431048..cd33ea735 100644
252+ index fa74310..cd33ea7 100644
253253--- a/python/sglang/srt/layers/logits_processor.py
254254+++ b/python/sglang/srt/layers/logits_processor.py
255255@@ -878,11 +878,6 @@ class LogitsProcessor(nn.Module):
@@ -265,7 +265,7 @@ index fa7431048..cd33ea735 100644
265265 logits = torch.matmul(
266266 hidden_states.to(lm_head.weight.dtype), lm_head.weight.T
267267diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
268- index a1885fade..14d692365 100644
268+ index a1885fa..14d6923 100644
269269--- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
270270+++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
271271@@ -14,6 +14,7 @@ import torch.nn.functional as F
@@ -289,7 +289,7 @@ index a1885fade..14d692365 100644
289289 intermediate_cache3.view(*intermediate_cache3.shape),
290290 out_hidden_states[begin_chunk_idx:end_chunk_idx],
291291diff --git a/python/sglang/srt/layers/moe/routed_experts_capturer.py b/python/sglang/srt/layers/moe/routed_experts_capturer.py
292- index 00bd68755..5a3ca8a67 100644
292+ index 00bd687..5a3ca8a 100644
293293--- a/python/sglang/srt/layers/moe/routed_experts_capturer.py
294294+++ b/python/sglang/srt/layers/moe/routed_experts_capturer.py
295295@@ -1,5 +1,6 @@
@@ -360,7 +360,7 @@ index 00bd68755..5a3ca8a67 100644
360360
361361 def get_routed_experts(
362362diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
363- index c5e5a11fc..6b788fb1d 100644
363+ index c5e5a11..6b788fb 100644
364364--- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
365365+++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
366366@@ -1016,13 +1016,38 @@ class CompressedTensorsWNA16MoEMethod(CompressedTensorsMoEMethod):
@@ -456,7 +456,7 @@ index c5e5a11fc..6b788fb1d 100644
456456 def create_moe_runner(
457457 self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
458458diff --git a/python/sglang/srt/layers/rotary_embedding.py b/python/sglang/srt/layers/rotary_embedding.py
459- index 56516b41b..cb2ebca60 100644
459+ index 56516b4..cb2ebca 100644
460460--- a/python/sglang/srt/layers/rotary_embedding.py
461461+++ b/python/sglang/srt/layers/rotary_embedding.py
462462@@ -135,9 +135,7 @@ class RotaryEmbedding(MultiPlatformOp):
@@ -481,7 +481,7 @@ index 56516b41b..cb2ebca60 100644
481481 assert (
482482 fused_set_kv_buffer_arg is None
483483diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py
484- index 55bef5652..35ad68b1c 100644
484+ index 55bef56..35ad68b 100644
485485--- a/python/sglang/srt/layers/sampler.py
486486+++ b/python/sglang/srt/layers/sampler.py
487487@@ -108,16 +108,11 @@ class Sampler(nn.Module):
@@ -505,7 +505,7 @@ index 55bef5652..35ad68b1c 100644
505505 if not get_global_server_args().sampling_backend == "ascend" or (
506506 return_logprob and not SGLANG_RETURN_ORIGINAL_LOGPROB
507507diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py
508- index 879e1bfa6..de52085fa 100644
508+ index 879e1bf..de52085 100644
509509--- a/python/sglang/srt/managers/io_struct.py
510510+++ b/python/sglang/srt/managers/io_struct.py
511511@@ -1286,6 +1286,19 @@ class UpdateWeightsFromIPCReqOutput(BaseReq):
@@ -529,7 +529,7 @@ index 879e1bfa6..de52085fa 100644
529529 @dataclass
530530 class InitWeightsSendGroupForRemoteInstanceReqOutput(BaseReq):
531531diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py
532- index 468d8fb8a..229a9a2dc 100644
532+ index 468d8fb..229a9a2 100644
533533--- a/python/sglang/srt/managers/schedule_batch.py
534534+++ b/python/sglang/srt/managers/schedule_batch.py
535535@@ -2181,7 +2181,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
@@ -543,7 +543,7 @@ index 468d8fb8a..229a9a2dc 100644
543543
544544
545545diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
546- index bca1c31e6..0c82e37a4 100644
546+ index bca1c31..0c82e37 100644
547547--- a/python/sglang/srt/managers/scheduler.py
548548+++ b/python/sglang/srt/managers/scheduler.py
549549@@ -97,6 +97,7 @@ from sglang.srt.managers.io_struct import (
@@ -563,7 +563,7 @@ index bca1c31e6..0c82e37a4 100644
563563 (ReleaseMemoryOccupationReqInput, self.release_memory_occupation),
564564 (ResumeMemoryOccupationReqInput, self.resume_memory_occupation),
565565diff --git a/python/sglang/srt/managers/scheduler_output_processor_mixin.py b/python/sglang/srt/managers/scheduler_output_processor_mixin.py
566- index e40586c24..32d98aee4 100644
566+ index e40586c..32d98ae 100644
567567--- a/python/sglang/srt/managers/scheduler_output_processor_mixin.py
568568+++ b/python/sglang/srt/managers/scheduler_output_processor_mixin.py
569569@@ -10,6 +10,7 @@ from sglang.srt.disaggregation.utils import DisaggregationMode
@@ -575,7 +575,7 @@ index e40586c24..32d98aee4 100644
575575 AbortReq,
576576 BatchEmbeddingOutput,
577577diff --git a/python/sglang/srt/managers/scheduler_update_weights_mixin.py b/python/sglang/srt/managers/scheduler_update_weights_mixin.py
578- index 293a84350..68911c433 100644
578+ index 293a843..d0404db 100644
579579--- a/python/sglang/srt/managers/scheduler_update_weights_mixin.py
580580+++ b/python/sglang/srt/managers/scheduler_update_weights_mixin.py
581581@@ -1,6 +1,7 @@
@@ -617,7 +617,7 @@ index 293a84350..68911c433 100644
617617
618618 def get_weights_by_name(self: Scheduler, recv_req: GetWeightsByNameReqInput):
619619 parameter = self.tp_worker.get_weights_by_name(recv_req)
620- @@ -137,6 +148,13 @@ class SchedulerUpdateWeightsMixin:
620+ @@ -137,11 +148,19 @@ class SchedulerUpdateWeightsMixin:
621621 self.memory_saver_adapter.pause(GPU_MEMORY_TYPE_KV_CACHE)
622622 self.flush_cache()
623623
@@ -631,7 +631,18 @@ index 293a84350..68911c433 100644
631631 if GPU_MEMORY_TYPE_WEIGHTS in tags:
632632 self.stashed_model_static_state = _export_static_state(
633633 self.tp_worker.model_runner.model
634- @@ -177,6 +195,13 @@ class SchedulerUpdateWeightsMixin:
634+ )
635+ torch.distributed.barrier(self.tp_cpu_group)
636+ + self.tp_worker.model_runner.remote_instance_unregister_memory_region()
637+ self.memory_saver_adapter.pause(GPU_MEMORY_TYPE_WEIGHTS)
638+
639+ if GPU_MEMORY_TYPE_CUDA_GRAPH in tags:
640+ @@ -173,10 +192,18 @@ class SchedulerUpdateWeightsMixin:
641+ self.stashed_model_static_state,
642+ )
643+ del self.stashed_model_static_state
644+ + self.tp_worker.model_runner.remote_instance_register_memory_region()
645+
635646 if GPU_MEMORY_TYPE_KV_CACHE in tags:
636647 self.memory_saver_adapter.resume(GPU_MEMORY_TYPE_KV_CACHE)
637648
@@ -646,7 +657,7 @@ index 293a84350..68911c433 100644
646657
647658 def check_weights(self: Scheduler, recv_req: CheckWeightsReqInput):
648659diff --git a/python/sglang/srt/managers/tokenizer_communicator_mixin.py b/python/sglang/srt/managers/tokenizer_communicator_mixin.py
649- index e5d42bed8..412293b30 100644
660+ index e5d42be..412293b 100644
650661--- a/python/sglang/srt/managers/tokenizer_communicator_mixin.py
651662+++ b/python/sglang/srt/managers/tokenizer_communicator_mixin.py
652663@@ -49,6 +49,8 @@ from sglang.srt.managers.io_struct import (
@@ -698,7 +709,7 @@ index e5d42bed8..412293b30 100644
698709 self,
699710 obj: InitWeightsSendGroupForRemoteInstanceReqInput,
700711diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
701- index f4fc29e29..5ef12cca6 100644
712+ index f4fc29e..5ef12cc 100644
702713--- a/python/sglang/srt/managers/tokenizer_manager.py
703714+++ b/python/sglang/srt/managers/tokenizer_manager.py
704715@@ -1652,12 +1652,13 @@ class TokenizerManager(TokenizerCommunicatorMixin, TokenizerManagerMultiItemMixi
@@ -722,7 +733,7 @@ index f4fc29e29..5ef12cca6 100644
722733 recv_obj.output_token_logprobs_val[recv_obj_index]
723734 )
724735diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py
725- index 1f1875254..51d8651ce 100644
736+ index 1f18752..51d8651 100644
726737--- a/python/sglang/srt/managers/tp_worker.py
727738+++ b/python/sglang/srt/managers/tp_worker.py
728739@@ -27,6 +27,7 @@ from sglang.srt.managers.io_struct import (
@@ -746,7 +757,7 @@ index 1f1875254..51d8651ce 100644
746757 parameter = self.model_runner.get_weights_by_name(
747758 recv_req.name, recv_req.truncate_size
748759diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
749- index 1d69c0582..c849913e9 100644
760+ index 1d69c05..b516607 100644
750761--- a/python/sglang/srt/model_executor/model_runner.py
751762+++ b/python/sglang/srt/model_executor/model_runner.py
752763@@ -558,7 +558,8 @@ class ModelRunner(ModelRunnerKVCacheMixin):
@@ -759,7 +770,45 @@ index 1d69c0582..c849913e9 100644
759770
760771 if self.device == "cuda":
761772 self.init_cublas()
762- @@ -2224,11 +2225,19 @@ class ModelRunner(ModelRunnerKVCacheMixin):
773+ @@ -635,6 +636,37 @@ class ModelRunner(ModelRunnerKVCacheMixin):
774+ f"{local_ip}:{self.remote_instance_transfer_engine.get_rpc_port()}"
775+ )
776+
777+ + def remote_instance_register_memory_region(self):
778+ + if self.remote_instance_transfer_engine is None:
779+ + return
780+ +
781+ + logger.debug("Registering memory regions to transfer engine after memory saver resume")
782+ + self.remote_instance_transfer_engine_weight_info = register_memory_region(
783+ + self.model, self.remote_instance_transfer_engine
784+ + )
785+ +
786+ + def remote_instance_unregister_memory_region(self):
787+ + if self.remote_instance_transfer_engine is None:
788+ + return
789+ +
790+ + logger.debug("Unregistering old memory regions from transfer engine")
791+ + registered_blocks = []
792+ + old_addrs = set()
793+ + for name, (data_ptr, numel, element_size) in (
794+ + self.remote_instance_transfer_engine_weight_info.items()
795+ + ):
796+ + if data_ptr not in old_addrs:
797+ + old_addrs.add(data_ptr)
798+ + registered_blocks.append((data_ptr, numel * element_size))
799+ +
800+ + for addr, size in registered_blocks:
801+ + try:
802+ + self.remote_instance_transfer_engine.unregister_memory(addr)
803+ + except Exception as e:
804+ + logger.debug(f"Failed to unregister memory at {addr}: {e}")
805+ +
806+ + self.remote_instance_transfer_engine_weight_info = None
807+ +
808+ def model_specific_adjustment(self):
809+ server_args = self.server_args
810+
811+ @@ -2224,11 +2256,19 @@ class ModelRunner(ModelRunnerKVCacheMixin):
763812 output.expert_distribution_metrics = recorder_outputs.get("metrics")
764813
765814 # Copy cached routing experts' buffers back to CPU cache
@@ -784,7 +833,7 @@ index 1d69c0582..c849913e9 100644
784833
785834 if self.eplb_manager is not None:
786835 self.eplb_manager.on_forward_pass_end()
787- @@ -2436,6 +2445 ,42 @@ class ModelRunner(ModelRunnerKVCacheMixin):
836+ @@ -2436,6 +2476 ,42 @@ class ModelRunner(ModelRunnerKVCacheMixin):
788837 logger.error(f"IPC weight update failed: {e}")
789838 return False, str(e)
790839
@@ -828,7 +877,7 @@ index 1d69c0582..c849913e9 100644
828877 def _model_load_weights_direct(model, named_tensors: List[Tuple[str, torch.Tensor]]):
829878 params_dict = dict(model.named_parameters())
830879diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py
831- index 2918461d3..d44c8aaa0 100644
880+ index 2918461..d44c8aa 100644
832881--- a/python/sglang/srt/models/deepseek_v2.py
833882+++ b/python/sglang/srt/models/deepseek_v2.py
834883@@ -2704,7 +2704,11 @@ class DeepseekV2AttentionMLA(nn.Module):
@@ -873,7 +922,7 @@ index 2918461d3..d44c8aaa0 100644
873922 if is_nextn and enable_nextn_moe_bf16_cast_to_fp8(self.quant_config):
874923 self._mark_nextn_moe_weights_as_ue8m0()
875924diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py
876- index a7dbadec6..c83a41338 100644
925+ index a7dbade..c83a413 100644
877926--- a/python/sglang/srt/models/qwen2.py
878927+++ b/python/sglang/srt/models/qwen2.py
879928@@ -90,9 +90,6 @@ class Qwen2MLP(nn.Module):
@@ -911,7 +960,7 @@ index a7dbadec6..c83a41338 100644
911960 if get_global_server_args().rl_on_policy_target is not None
912961 else {}
913962diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py
914- index 3ad9f6736..0b9c7f499 100644
963+ index 3ad9f67..0b9c7f4 100644
915964--- a/python/sglang/srt/models/qwen2_moe.py
916965+++ b/python/sglang/srt/models/qwen2_moe.py
917966@@ -586,7 +586,17 @@ class Qwen2MoeModel(nn.Module):
@@ -934,7 +983,7 @@ index 3ad9f6736..0b9c7f499 100644
934983 self.norm = PPMissingLayer(return_tuple=True)
935984
936985diff --git a/python/sglang/srt/models/qwen3.py b/python/sglang/srt/models/qwen3.py
937- index 9220831f6..47a1a4e4c 100644
986+ index 9220831..47a1a4e 100644
938987--- a/python/sglang/srt/models/qwen3.py
939988+++ b/python/sglang/srt/models/qwen3.py
940989@@ -90,8 +90,8 @@ class Qwen3Attention(nn.Module):
@@ -960,7 +1009,7 @@ index 9220831f6..47a1a4e4c 100644
9601009 if get_global_server_args().rl_on_policy_target is not None
9611010 else {}
9621011diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py
963- index e11678a9e..e277d46f2 100644
1012+ index e11678a..e277d46 100644
9641013--- a/python/sglang/srt/models/qwen3_moe.py
9651014+++ b/python/sglang/srt/models/qwen3_moe.py
9661015@@ -22,6 +22,7 @@ import math
@@ -1070,7 +1119,7 @@ index e11678a9e..e277d46f2 100644
10701119
10711120 self.layer_communicator = LayerCommunicator(
10721121diff --git a/python/sglang/srt/models/qwen3_vl.py b/python/sglang/srt/models/qwen3_vl.py
1073- index 891913078..c9dbecd23 100644
1122+ index 8919130..c9dbecd 100644
10741123--- a/python/sglang/srt/models/qwen3_vl.py
10751124+++ b/python/sglang/srt/models/qwen3_vl.py
10761125@@ -397,28 +397,68 @@ class Qwen3VLMoeVisionModel(nn.Module, RotaryPosMixin):
@@ -1186,7 +1235,7 @@ index 891913078..c9dbecd23 100644
11861235 positions,
11871236 hidden_states,
11881237diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
1189- index 54d4e415a..de7620c20 100644
1238+ index 54d4e41..84831ad 100644
11901239--- a/python/sglang/srt/server_args.py
11911240+++ b/python/sglang/srt/server_args.py
11921241@@ -523,6 +523,7 @@ class ServerArgs:
@@ -1209,8 +1258,25 @@ index 54d4e415a..de7620c20 100644
12091258 parser.add_argument(
12101259 "--disable-cuda-graph-padding",
12111260 action="store_true",
1261+ @@ -4972,11 +4978,11 @@ class ServerArgs:
1262+ f"Failed to import mooncake.engine. Does not support using TransferEngine as remote instance weight loader backend."
1263+ )
1264+ return False
1265+ - elif self.enable_memory_saver:
1266+ - logger.warning(
1267+ - "Memory saver is enabled, which is not compatible with TransferEngine. Does not support using TransferEngine as remote instance weight loader backend."
1268+ - )
1269+ - return False
1270+ + # elif self.enable_memory_saver:
1271+ + # logger.warning(
1272+ + # "Memory saver is enabled, which is not compatible with TransferEngine. Does not support using TransferEngine as remote instance weight loader backend."
1273+ + # )
1274+ + # return False
1275+ else:
1276+ return True
1277+
12121278diff --git a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
1213- index 5fe45086c..c95fbd0f6 100644
1279+ index 5fe4508..c95fbd0 100644
12141280--- a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
12151281+++ b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
12161282@@ -341,7 +341,10 @@ class EAGLEDraftCudaGraphRunner:
@@ -1237,7 +1303,7 @@ index 5fe45086c..c95fbd0f6 100644
12371303 self.req_pool_indices[:raw_bs].copy_(forward_batch.req_pool_indices)
12381304
12391305diff --git a/python/sglang/srt/speculative/eagle_info.py b/python/sglang/srt/speculative/eagle_info.py
1240- index 1bf3816e9..b5b41dba4 100644
1306+ index 1bf3816..b5b41db 100644
12411307--- a/python/sglang/srt/speculative/eagle_info.py
12421308+++ b/python/sglang/srt/speculative/eagle_info.py
12431309@@ -778,6 +778,10 @@ class EagleDraftInput(SpecInput, EagleDraftInputV2Mixin):
@@ -1280,7 +1346,7 @@ index 1bf3816e9..b5b41dba4 100644
12801346
12811347 @dataclass
12821348diff --git a/python/sglang/srt/speculative/eagle_worker.py b/python/sglang/srt/speculative/eagle_worker.py
1283- index a702df4f8..61d9ae366 100644
1349+ index a702df4..61d9ae3 100644
12841350--- a/python/sglang/srt/speculative/eagle_worker.py
12851351+++ b/python/sglang/srt/speculative/eagle_worker.py
12861352@@ -231,7 +231,7 @@ class EAGLEWorker(TpModelWorker):
0 commit comments