Skip to content

Commit 3fb2f1e

Browse files
Fix XPU CPU-view tensor lifetime (vllm-project#262)
Signed-off-by: chaojun-zhang <chaojun.zhang@intel.com> Co-authored-by: Harish Subramony <harish.subramony@intel.com>
1 parent 9ff7faa commit 3fb2f1e

2 files changed

Lines changed: 44 additions & 11 deletions

File tree

csrc/xpu_view.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "ops.h"
33
#include <c10/core/Device.h>
44
#include <c10/xpu/XPUFunctions.h>
5+
#include <memory>
56

67
namespace vllm::xpu {
78

@@ -17,13 +18,17 @@ namespace vllm::xpu {
1718

1819
class XPUHostViewAllocator : public c10::Allocator {
1920
public:
21+
struct OwnerContext {
22+
torch::Tensor owner;
23+
};
24+
2025
/**
2126
* @brief Constructor
2227
* @param host_ptr Pre-allocated host memory pointer
2328
* @param size Size of the host memory (in bytes)
2429
*/
25-
XPUHostViewAllocator(void* host_ptr, size_t size)
26-
: host_ptr_(host_ptr), size_(size) {}
30+
XPUHostViewAllocator(void* host_ptr, size_t size, torch::Tensor owner)
31+
: host_ptr_(host_ptr), size_(size), owner_(std::move(owner)) {}
2732

2833
/**
2934
* @brief Allocate memory (actually just validates and wraps existing host
@@ -36,15 +41,20 @@ class XPUHostViewAllocator : public c10::Allocator {
3641
// Verify requested memory size doesn't exceed pre-allocated memory size
3742
TORCH_CHECK(
3843
n <= size_, "Requested size exceeds allocated host pointer size");
39-
// Return wrapped data pointer with no-op deleter since memory is externally
40-
// managed
44+
// Use unique_ptr for RAII: if current_device() or DataPtr construction
45+
// throws, the OwnerContext is automatically cleaned up instead of leaked.
46+
auto ctx = std::make_unique<OwnerContext>(OwnerContext{owner_});
4147
auto device_id = c10::xpu::current_device();
42-
return {
43-
host_ptr_, // Actual data pointer
44-
host_ptr_, // Context pointer (same as data pointer here)
45-
[](void*) {}, // No-op deleter, doesn't actually free memory
46-
c10::Device(c10::DeviceType::XPU, device_id) // Device type set to XPU
47-
};
48+
49+
c10::DataPtr data_ptr{
50+
host_ptr_,
51+
ctx.get(),
52+
[](void* ptr) { delete static_cast<OwnerContext*>(ptr); },
53+
c10::Device(c10::DeviceType::XPU, device_id)};
54+
55+
// DataPtr now owns the context via its deleter — release from unique_ptr.
56+
ctx.release();
57+
return data_ptr;
4858
}
4959

5060
/**
@@ -71,6 +81,7 @@ class XPUHostViewAllocator : public c10::Allocator {
7181
private:
7282
void* const host_ptr_; // Pre-allocated host memory pointer
7383
const size_t size_; // Size of pre-allocated memory
84+
torch::Tensor owner_; // Keeps pinned host storage alive
7485
};
7586
} // namespace vllm::xpu
7687

@@ -92,7 +103,8 @@ torch::Tensor get_xpu_view_from_cpu_tensor(torch::Tensor& cpu_tensor) {
92103
auto scalar_type = cpu_tensor.scalar_type();
93104

94105
size_t byte_size = cpu_tensor.numel() * cpu_tensor.element_size();
95-
vllm::xpu::XPUHostViewAllocator allocator(host_ptr, byte_size);
106+
// Keep `cpu_tensor` storage alive through the view tensor's lifetime.
107+
vllm::xpu::XPUHostViewAllocator allocator(host_ptr, byte_size, cpu_tensor);
96108
c10::DataPtr data_ptr = allocator.allocate(byte_size);
97109
c10::Storage storage(
98110
c10::Storage::use_byte_size_t(), byte_size, std::move(data_ptr));

tests/test_uva.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import gc
34

45
import pytest
56
import torch
@@ -69,3 +70,23 @@ def test_gpu_write(device):
6970
assert cpu_tensor[0, 0] == 2
7071
assert cpu_tensor[2, 3] == 4
7172
assert cpu_tensor[4, 5] == -2
73+
74+
75+
@pytest.mark.parametrize("device", XPU_DEVICES)
76+
def test_view_lifetime_after_owner_drop(device):
77+
torch.set_default_device(device)
78+
cpu_tensor = torch.arange(100,
79+
dtype=torch.int32,
80+
device="cpu",
81+
pin_memory=True).view(10, 10)
82+
xpu_view = torch.ops._C.get_xpu_view_from_cpu_tensor(cpu_tensor)
83+
84+
# Drop the original owner reference and force Python GC.
85+
del cpu_tensor
86+
gc.collect()
87+
88+
# Exercise both read and write from the XPU view after owner drop.
89+
assert xpu_view[2, 3].item() == 23
90+
xpu_view.add_(1)
91+
assert xpu_view[0, 0].item() == 1
92+
assert xpu_view[9, 9].item() == 100

0 commit comments

Comments
 (0)