22#include " ops.h"
33#include < c10/core/Device.h>
44#include < c10/xpu/XPUFunctions.h>
5+ #include < memory>
56
67namespace vllm ::xpu {
78
@@ -17,13 +18,17 @@ namespace vllm::xpu {
1718
1819class XPUHostViewAllocator : public c10 ::Allocator {
1920 public:
21+ struct OwnerContext {
22+ torch::Tensor owner;
23+ };
24+
2025 /* *
2126 * @brief Constructor
2227 * @param host_ptr Pre-allocated host memory pointer
2328 * @param size Size of the host memory (in bytes)
2429 */
25- XPUHostViewAllocator (void * host_ptr, size_t size)
26- : host_ptr_(host_ptr), size_(size) {}
30+ XPUHostViewAllocator (void * host_ptr, size_t size, torch::Tensor owner )
31+ : host_ptr_(host_ptr), size_(size), owner_(std::move(owner)) {}
2732
2833 /* *
2934 * @brief Allocate memory (actually just validates and wraps existing host
@@ -36,15 +41,20 @@ class XPUHostViewAllocator : public c10::Allocator {
3641 // Verify requested memory size doesn't exceed pre-allocated memory size
3742 TORCH_CHECK (
3843 n <= size_, " Requested size exceeds allocated host pointer size" );
39- // Return wrapped data pointer with no-op deleter since memory is externally
40- // managed
44+ // Use unique_ptr for RAII: if current_device() or DataPtr construction
45+ // throws, the OwnerContext is automatically cleaned up instead of leaked.
46+ auto ctx = std::make_unique<OwnerContext>(OwnerContext{owner_});
4147 auto device_id = c10::xpu::current_device ();
42- return {
43- host_ptr_, // Actual data pointer
44- host_ptr_, // Context pointer (same as data pointer here)
45- [](void *) {}, // No-op deleter, doesn't actually free memory
46- c10::Device (c10::DeviceType::XPU, device_id) // Device type set to XPU
47- };
48+
49+ c10::DataPtr data_ptr{
50+ host_ptr_,
51+ ctx.get (),
52+ [](void * ptr) { delete static_cast <OwnerContext*>(ptr); },
53+ c10::Device (c10::DeviceType::XPU, device_id)};
54+
55+ // DataPtr now owns the context via its deleter — release from unique_ptr.
56+ ctx.release ();
57+ return data_ptr;
4858 }
4959
5060 /* *
@@ -71,6 +81,7 @@ class XPUHostViewAllocator : public c10::Allocator {
7181 private:
7282 void * const host_ptr_; // Pre-allocated host memory pointer
7383 const size_t size_; // Size of pre-allocated memory
84+ torch::Tensor owner_; // Keeps pinned host storage alive
7485};
7586} // namespace vllm::xpu
7687
@@ -92,7 +103,8 @@ torch::Tensor get_xpu_view_from_cpu_tensor(torch::Tensor& cpu_tensor) {
92103 auto scalar_type = cpu_tensor.scalar_type ();
93104
94105 size_t byte_size = cpu_tensor.numel () * cpu_tensor.element_size ();
95- vllm::xpu::XPUHostViewAllocator allocator (host_ptr, byte_size);
106+ // Keep `cpu_tensor` storage alive through the view tensor's lifetime.
107+ vllm::xpu::XPUHostViewAllocator allocator (host_ptr, byte_size, cpu_tensor);
96108 c10::DataPtr data_ptr = allocator.allocate (byte_size);
97109 c10::Storage storage (
98110 c10::Storage::use_byte_size_t (), byte_size, std::move (data_ptr));
0 commit comments