Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 23 additions & 11 deletions csrc/xpu_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "ops.h"
#include <c10/core/Device.h>
#include <c10/xpu/XPUFunctions.h>
#include <memory>

namespace vllm::xpu {

Expand All @@ -17,13 +18,17 @@ namespace vllm::xpu {

class XPUHostViewAllocator : public c10::Allocator {
public:
struct OwnerContext {
torch::Tensor owner;
};

/**
* @brief Constructor
* @param host_ptr Pre-allocated host memory pointer
* @param size Size of the host memory (in bytes)
*/
XPUHostViewAllocator(void* host_ptr, size_t size)
: host_ptr_(host_ptr), size_(size) {}
XPUHostViewAllocator(void* host_ptr, size_t size, torch::Tensor owner)
: host_ptr_(host_ptr), size_(size), owner_(std::move(owner)) {}

/**
* @brief Allocate memory (actually just validates and wraps existing host
Expand All @@ -36,15 +41,20 @@ class XPUHostViewAllocator : public c10::Allocator {
// Verify requested memory size doesn't exceed pre-allocated memory size
TORCH_CHECK(
n <= size_, "Requested size exceeds allocated host pointer size");
// Return wrapped data pointer with no-op deleter since memory is externally
// managed
// Use unique_ptr for RAII: if current_device() or DataPtr construction
// throws, the OwnerContext is automatically cleaned up instead of leaked.
auto ctx = std::make_unique<OwnerContext>(OwnerContext{owner_});
auto device_id = c10::xpu::current_device();
return {
host_ptr_, // Actual data pointer
host_ptr_, // Context pointer (same as data pointer here)
[](void*) {}, // No-op deleter, doesn't actually free memory
c10::Device(c10::DeviceType::XPU, device_id) // Device type set to XPU
};

c10::DataPtr data_ptr{
host_ptr_,
ctx.get(),
[](void* ptr) { delete static_cast<OwnerContext*>(ptr); },
c10::Device(c10::DeviceType::XPU, device_id)};

// DataPtr now owns the context via its deleter — release from unique_ptr.
ctx.release();
return data_ptr;
}

/**
Expand All @@ -71,6 +81,7 @@ class XPUHostViewAllocator : public c10::Allocator {
private:
void* const host_ptr_; // Pre-allocated host memory pointer
const size_t size_; // Size of pre-allocated memory
torch::Tensor owner_; // Keeps pinned host storage alive
};
} // namespace vllm::xpu

Expand All @@ -92,7 +103,8 @@ torch::Tensor get_xpu_view_from_cpu_tensor(torch::Tensor& cpu_tensor) {
auto scalar_type = cpu_tensor.scalar_type();

size_t byte_size = cpu_tensor.numel() * cpu_tensor.element_size();
vllm::xpu::XPUHostViewAllocator allocator(host_ptr, byte_size);
// Keep `cpu_tensor` storage alive through the view tensor's lifetime.
vllm::xpu::XPUHostViewAllocator allocator(host_ptr, byte_size, cpu_tensor);
c10::DataPtr data_ptr = allocator.allocate(byte_size);
c10::Storage storage(
c10::Storage::use_byte_size_t(), byte_size, std::move(data_ptr));
Expand Down
21 changes: 21 additions & 0 deletions tests/test_uva.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import gc

import pytest
import torch
Expand Down Expand Up @@ -69,3 +70,23 @@ def test_gpu_write(device):
assert cpu_tensor[0, 0] == 2
assert cpu_tensor[2, 3] == 4
assert cpu_tensor[4, 5] == -2


@pytest.mark.parametrize("device", XPU_DEVICES)
def test_view_lifetime_after_owner_drop(device):
torch.set_default_device(device)
cpu_tensor = torch.arange(100,
dtype=torch.int32,
device="cpu",
pin_memory=True).view(10, 10)
xpu_view = torch.ops._C.get_xpu_view_from_cpu_tensor(cpu_tensor)

# Drop the original owner reference and force Python GC.
del cpu_tensor
gc.collect()

# Exercise both read and write from the XPU view after owner drop.
assert xpu_view[2, 3].item() == 23
xpu_view.add_(1)
assert xpu_view[0, 0].item() == 1
assert xpu_view[9, 9].item() == 100
Loading