@@ -16,32 +16,35 @@ index aa10cb08d..d41c31a09 100644
1616 self.hf_config.architectures[0] = "Glm4MoeForCausalLMNextN"
1717
1818diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py
19- index 51af67636..54716de5c 100644
19+ index 51af67636..661ea6fd6 100644
2020--- a/python/sglang/srt/disaggregation/decode.py
2121+++ b/python/sglang/srt/disaggregation/decode.py
22- @@ -315,6 +315,13 @@ class DecodePreallocQueue:
22+ @@ -315,6 +315,16 @@ class DecodePreallocQueue:
2323 )
2424 return kv_manager
2525
2626+ def release_memory_occupation(self):
27- + if hasattr(self.kv_manager, "close"):
28- + self.kv_manager.close()
27+ + self.queue.clear()
28+ + self.retracted_queue.clear()
29+ + if hasattr(self.kv_manager, "deregister_buffer_to_engine"):
30+ + self.kv_manager.deregister_buffer_to_engine()
2931+
3032+ def resume_memory_occupation(self):
31- + self.kv_manager = self._init_kv_manager()
33+ + if hasattr(self.kv_manager, "register_buffer_to_engine"):
34+ + self.kv_manager.register_buffer_to_engine()
3235+
3336 def add(self, req: Req, is_retracted: bool = False) -> None:
3437 """Add a request to the pending queue."""
3538 if self._check_if_req_exceed_kv_capacity(req):
3639diff --git a/python/sglang/srt/disaggregation/mooncake/conn.py b/python/sglang/srt/disaggregation/mooncake/conn.py
37- index 32e8c0b69..df913da7b 100644
40+ index 32e8c0b69..dc93c5c5f 100644
3841--- a/python/sglang/srt/disaggregation/mooncake/conn.py
3942+++ b/python/sglang/srt/disaggregation/mooncake/conn.py
40- @@ -1079 ,6 +1079 ,19 @@ class MooncakeKVManager(CommonKVManager):
41- f"Losing connection with prefill instance (bootstrap_addr: {failed_bootstrap_addr}), {len(affected_rooms)} requests affected"
42- )
43+ @@ -253 ,6 +253 ,19 @@ class MooncakeKVManager(CommonKVManager):
44+ self.kv_args.state_data_ptrs, self.kv_args.state_data_lens
45+ )
4346
44- + def close (self):
47+ + def deregister_buffer_to_engine (self):
4548+ # Batch deregister KV data buffers
4649+ if self.kv_args.kv_data_ptrs:
4750+ self.engine.batch_deregister(self.kv_args.kv_data_ptrs)
@@ -54,23 +57,25 @@ index 32e8c0b69..df913da7b 100644
5457+ if self.kv_args.state_data_ptrs:
5558+ self.engine.batch_deregister(self.kv_args.state_data_ptrs)
5659+
57-
58- class MooncakeKVSender(CommonKVSender) :
59-
60+ def _transfer_data(self, mooncake_session_id, transfer_blocks):
61+ if not transfer_blocks :
62+ return 0
6063diff --git a/python/sglang/srt/disaggregation/prefill.py b/python/sglang/srt/disaggregation/prefill.py
61- index a6eed743a..0124d8917 100644
64+ index a6eed743a..24a72ca70 100644
6265--- a/python/sglang/srt/disaggregation/prefill.py
6366+++ b/python/sglang/srt/disaggregation/prefill.py
64- @@ -306,6 +306,13 @@ class PrefillBootstrapQueue:
67+ @@ -306,6 +306,15 @@ class PrefillBootstrapQueue:
6568 else:
6669 return bootstrapped_reqs, failed_reqs
6770
6871+ def release_memory_occupation(self):
69- + if hasattr(self.kv_manager, "close"):
70- + self.kv_manager.close()
72+ + self.queue.clear()
73+ + if hasattr(self.kv_manager, "deregister_buffer_to_engine"):
74+ + self.kv_manager.deregister_buffer_to_engine()
7175+
7276+ def resume_memory_occupation(self):
73- + self.kv_manager = self._init_kv_manager()
77+ + if hasattr(self.kv_manager, "register_buffer_to_engine"):
78+ + self.kv_manager.register_buffer_to_engine()
7479+
7580
7681 class SchedulerDisaggregationPrefillMixin:
0 commit comments