fix ps alloc err & avoid mem fragmentation

youzhedian · hongchao · commit 2aea6080615f · 2026-01-29T15:39:23.000Z
diff --git a/checkpoint_engine/worker.py b/checkpoint_engine/worker.py
@@ -52,9 +52,13 @@ def update_weights_from_ipc(
     zmq_handle: str,
     device_id: int,
     *,
-    run: Callable[[list[tuple[str, torch.Tensor]]], None],
+    weight_loader: Callable[[list[tuple[str, torch.Tensor]]], None],
+    pre_hook: Callable[[], None] | None = None,
+    process_weight_after_loading: Callable[[], None] | None = None,
     post_hook: Callable[[], None] | None = None,
 ):
+    if pre_hook is not None:
+        pre_hook()
     socket = zmq_ctx.socket(zmq.REP)
     socket.connect(zmq_handle)
     buffer: torch.Tensor | None = None
@@ -74,14 +78,14 @@ def update_weights_from_ipc(
         while True:
             payload: list[FlattenedTensorMetadata] | Exception | None = socket.recv_pyobj()
             if payload is None:  # done signal
-                if post_hook is not None:
-                    post_hook()
+                if process_weight_after_loading is not None:
+                    process_weight_after_loading()
                 device_manager.device_module.synchronize()
                 socket.send(b"")
                 break
             if isinstance(payload, list):  # still updating weights
                 try:
-                    run(_extract_weights(payload, buffer))
+                    weight_loader(_extract_weights(payload, buffer))
                     device_manager.device_module.synchronize()
                     socket.send(b"")
                 except Exception as e:  # noqa: BLE001
@@ -102,6 +106,9 @@ def update_weights_from_ipc(
         gc.collect()
         device_manager.device_module.empty_cache()
 
+        if post_hook is not None:
+            post_hook()
+
 
 class VllmColocateWorkerExtension:
     """
@@ -177,7 +184,7 @@ def _load_weights(weights: _WEIGHTS_TYPE):
             ):
                 self.model_runner.drafter.model.load_weights(weights=weights)
 
-        def _post_hook():
+        def _process_weight_after_loading():
             process_weights_after_loading(self.model_runner.model, self.model_config, self.device)
             # Also trigger drafter model's post processing if MTP is enabled
             if (
@@ -188,10 +195,15 @@ def _post_hook():
                     self.model_runner.drafter.model, self.model_config, self.device
                 )
 
+        def _pre_hook():
+            torch.cuda.empty_cache()
+
         update_weights_from_ipc(
             self._zmq_ctx,
             zmq_handles[self._device_uuid],
             device_id=self.device.index,
-            run=_load_weights,
-            post_hook=_post_hook,
+            pre_hook=_pre_hook,
+            weight_loader=_load_weights,
+            process_weight_after_loading=_process_weight_after_loading,
+            post_hook=getattr(self, "_sampler_warmup", None),
         )