Skip to content

Commit 6485964

Browse files
authored
[docker] alleviate pd memory leakage (#1525)
1 parent e70f2b1 commit 6485964

File tree

2 files changed

+24
-19
lines changed

2 files changed

+24
-19
lines changed

docker/patch/latest/sglang.patch

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,32 +16,35 @@ index aa10cb08d..d41c31a09 100644
1616
self.hf_config.architectures[0] = "Glm4MoeForCausalLMNextN"
1717

1818
diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py
19-
index 51af67636..54716de5c 100644
19+
index 51af67636..661ea6fd6 100644
2020
--- a/python/sglang/srt/disaggregation/decode.py
2121
+++ b/python/sglang/srt/disaggregation/decode.py
22-
@@ -315,6 +315,13 @@ class DecodePreallocQueue:
22+
@@ -315,6 +315,16 @@ class DecodePreallocQueue:
2323
)
2424
return kv_manager
2525

2626
+ def release_memory_occupation(self):
27-
+ if hasattr(self.kv_manager, "close"):
28-
+ self.kv_manager.close()
27+
+ self.queue.clear()
28+
+ self.retracted_queue.clear()
29+
+ if hasattr(self.kv_manager, "deregister_buffer_to_engine"):
30+
+ self.kv_manager.deregister_buffer_to_engine()
2931
+
3032
+ def resume_memory_occupation(self):
31-
+ self.kv_manager = self._init_kv_manager()
33+
+ if hasattr(self.kv_manager, "register_buffer_to_engine"):
34+
+ self.kv_manager.register_buffer_to_engine()
3235
+
3336
def add(self, req: Req, is_retracted: bool = False) -> None:
3437
"""Add a request to the pending queue."""
3538
if self._check_if_req_exceed_kv_capacity(req):
3639
diff --git a/python/sglang/srt/disaggregation/mooncake/conn.py b/python/sglang/srt/disaggregation/mooncake/conn.py
37-
index 32e8c0b69..df913da7b 100644
40+
index 32e8c0b69..dc93c5c5f 100644
3841
--- a/python/sglang/srt/disaggregation/mooncake/conn.py
3942
+++ b/python/sglang/srt/disaggregation/mooncake/conn.py
40-
@@ -1079,6 +1079,19 @@ class MooncakeKVManager(CommonKVManager):
41-
f"Losing connection with prefill instance (bootstrap_addr: {failed_bootstrap_addr}), {len(affected_rooms)} requests affected"
42-
)
43+
@@ -253,6 +253,19 @@ class MooncakeKVManager(CommonKVManager):
44+
self.kv_args.state_data_ptrs, self.kv_args.state_data_lens
45+
)
4346

44-
+ def close(self):
47+
+ def deregister_buffer_to_engine(self):
4548
+ # Batch deregister KV data buffers
4649
+ if self.kv_args.kv_data_ptrs:
4750
+ self.engine.batch_deregister(self.kv_args.kv_data_ptrs)
@@ -54,23 +57,25 @@ index 32e8c0b69..df913da7b 100644
5457
+ if self.kv_args.state_data_ptrs:
5558
+ self.engine.batch_deregister(self.kv_args.state_data_ptrs)
5659
+
57-
58-
class MooncakeKVSender(CommonKVSender):
59-
60+
def _transfer_data(self, mooncake_session_id, transfer_blocks):
61+
if not transfer_blocks:
62+
return 0
6063
diff --git a/python/sglang/srt/disaggregation/prefill.py b/python/sglang/srt/disaggregation/prefill.py
61-
index a6eed743a..0124d8917 100644
64+
index a6eed743a..24a72ca70 100644
6265
--- a/python/sglang/srt/disaggregation/prefill.py
6366
+++ b/python/sglang/srt/disaggregation/prefill.py
64-
@@ -306,6 +306,13 @@ class PrefillBootstrapQueue:
67+
@@ -306,6 +306,15 @@ class PrefillBootstrapQueue:
6568
else:
6669
return bootstrapped_reqs, failed_reqs
6770

6871
+ def release_memory_occupation(self):
69-
+ if hasattr(self.kv_manager, "close"):
70-
+ self.kv_manager.close()
72+
+ self.queue.clear()
73+
+ if hasattr(self.kv_manager, "deregister_buffer_to_engine"):
74+
+ self.kv_manager.deregister_buffer_to_engine()
7175
+
7276
+ def resume_memory_occupation(self):
73-
+ self.kv_manager = self._init_kv_manager()
77+
+ if hasattr(self.kv_manager, "register_buffer_to_engine"):
78+
+ self.kv_manager.register_buffer_to_engine()
7479
+
7580

7681
class SchedulerDisaggregationPrefillMixin:

docker/version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
nightly-dev-20260129a
1+
nightly-dev-20260130a

0 commit comments

Comments
 (0)