From 4b6d203adcb1739491f835fd52f087273d755000 Mon Sep 17 00:00:00 2001
From: kip-cxj <cuixiaojin@huawei.com>
Date: Thu, 15 Jan 2026 20:32:38 +0800
Subject: [PATCH 1/3] fix: npu free host cache

---
 checkpoint_engine/ps.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/checkpoint_engine/ps.py b/checkpoint_engine/ps.py
index 4990575..26e9899 100644
--- a/checkpoint_engine/ps.py
+++ b/checkpoint_engine/ps.py
@@ -407,7 +407,11 @@ def _unpin(t: torch.Tensor):
             del self._memory_pool[checkpoint_name]
         # see https://github.com/pytorch/pytorch/blob/31d5c675394705f8a6bc767f80ae14bf4f01246b/torch/csrc/cuda/Module.cpp#L2018
         # this works by using torch>=2.5.0
-        torch._C._host_emptyCache()
+        if self.device_manager.device_type == "cuda":
+            torch._C._host_emptyCache()
+        else:
+            import gc
+            gc.collect()
 
     def gather_metas(self, checkpoint_name: str):
         """

From e767a80a014864031fb50087a7aebe9a60db43f2 Mon Sep 17 00:00:00 2001
From: kip-cxj <cuixiaojin@huawei.com>
Date: Thu, 15 Jan 2026 20:37:46 +0800
Subject: [PATCH 2/3] fix pre-commit

---
 checkpoint_engine/ps.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/checkpoint_engine/ps.py b/checkpoint_engine/ps.py
index 26e9899..8986a00 100644
--- a/checkpoint_engine/ps.py
+++ b/checkpoint_engine/ps.py
@@ -411,6 +411,7 @@ def _unpin(t: torch.Tensor):
             torch._C._host_emptyCache()
         else:
             import gc
+
             gc.collect()
 
     def gather_metas(self, checkpoint_name: str):

From ca65a2235f7e8b697dc5e7a00b6a2f963205b749 Mon Sep 17 00:00:00 2001
From: kip-cxj <cuixiaojin@huawei.com>
Date: Mon, 19 Jan 2026 15:33:10 +0800
Subject: [PATCH 3/3] add comments

---
 checkpoint_engine/ps.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/checkpoint_engine/ps.py b/checkpoint_engine/ps.py
index 8986a00..20f5be6 100644
--- a/checkpoint_engine/ps.py
+++ b/checkpoint_engine/ps.py
@@ -410,6 +410,7 @@ def _unpin(t: torch.Tensor):
         if self.device_manager.device_type == "cuda":
             torch._C._host_emptyCache()
         else:
+            # torch._C._host_emptyCache() is not supported on NPU, so we call gc.collect() to empty host cache.
             import gc
 
             gc.collect()