Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 62 additions & 54 deletions src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,44 +38,27 @@ UsedVersion getUsedVclVersion(uint16_t pluginMajor, uint16_t pluginMinor, const
return {usedMajor, usedMinor};
}

struct vcl_allocator : vcl_allocator2_t {
vcl_allocator() : vcl_allocator2_t{allocate, deallocate} {}

static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) {
vcl_allocator* vclAllocator = static_cast<vcl_allocator*>(allocator);
vclAllocator->m_size = intel_npu::utils::align_size_to_standard_page_size(size);
auto allocatedPtr = reinterpret_cast<uint8_t*>(
vclAllocator->m_allocator.allocate(vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE));
if (allocatedPtr == nullptr) {
OPENVINO_THROW("Failed to allocate aligned memory for allocator");
}
memset(allocatedPtr + size, 0, vclAllocator->m_size - size);
vclAllocator->m_allocated = allocatedPtr;
return allocatedPtr;
}

static void deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
if (ptr == nullptr) {
OPENVINO_THROW("Pointer is nullptr in deallocate!");
struct vcl_allocator_3 : vcl_allocator2_t {
vcl_allocator_3() : vcl_allocator2_t{allocate, deallocate}, m_allocator(intel_npu::utils::STANDARD_PAGE_SIZE) {}

~vcl_allocator_3() {
for (auto& item : m_info) {
if (item.first) {
m_allocator.deallocate(item.first, item.second, intel_npu::utils::STANDARD_PAGE_SIZE);
}
}
vcl_allocator* vclAllocator = static_cast<vcl_allocator*>(allocator);
vclAllocator->m_allocator.deallocate(ptr, vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE);
m_info.clear();
}
ov::Allocator m_allocator;
uint8_t* m_allocated = nullptr;
size_t m_size = 0;
};

struct vcl_allocator_2 : vcl_allocator2_t {
vcl_allocator_2() : vcl_allocator2_t{allocate, deallocate} {}

static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) {
vcl_allocator_2* vclAllocator = static_cast<vcl_allocator_2*>(allocator);
vcl_allocator_3* vclAllocator = static_cast<vcl_allocator_3*>(allocator);
size_t alignedSize = intel_npu::utils::align_size_to_standard_page_size(size);
auto allocatedPtr = reinterpret_cast<uint8_t*>(

uint8_t* allocatedPtr = static_cast<uint8_t*>(
vclAllocator->m_allocator.allocate(alignedSize, intel_npu::utils::STANDARD_PAGE_SIZE));

if (allocatedPtr == nullptr) {
OPENVINO_THROW("Failed to allocate aligned memory for allocator");
OPENVINO_THROW("Failed to allocate aligned memory in vcl_allocator_3");
}
memset(allocatedPtr + size, 0, alignedSize - size);
vclAllocator->m_info.emplace_back(std::make_pair(allocatedPtr, alignedSize));
Expand All @@ -86,23 +69,32 @@ struct vcl_allocator_2 : vcl_allocator2_t {
if (ptr == nullptr) {
OPENVINO_THROW("Pointer is nullptr in deallocate!");
}
vcl_allocator_2* vclAllocator = static_cast<vcl_allocator_2*>(allocator);
vcl_allocator_3* vclAllocator = static_cast<vcl_allocator_3*>(allocator);

for (auto it = vclAllocator->m_info.begin(); it != vclAllocator->m_info.end(); ++it) {
if (it->first == ptr) {
vclAllocator->m_info.erase(it);
break;
}
}

// 1 is the placeholder value, as size is not needed in deallocate
vclAllocator->m_allocator.deallocate(ptr, 1, intel_npu::utils::STANDARD_PAGE_SIZE);
}
ov::Allocator m_allocator;
intel_npu::utils::AlignedAllocator m_allocator;
std::vector<std::pair<uint8_t*, size_t>> m_info;
};

ov::Tensor make_tensor_from_aligned_addr(uint8_t* allocated, size_t size) {
ov::Allocator allocator;
ov::Tensor make_tensor_from_aligned_addr(uint8_t* allocated,
size_t size,
std::shared_ptr<vcl_allocator_3> sourceAllocator) {
auto tensor = ov::Tensor(ov::element::u8, ov::Shape{size}, allocated);
auto impl = ov::get_tensor_impl(std::move(tensor));
std::shared_ptr<void> ptr(allocated, [allocator = std::move(allocator), size](uint8_t* p) mutable {
std::shared_ptr<void> ptr(allocated, [sourceAllocator](uint8_t* p) {
if (p == nullptr) {
OPENVINO_THROW("Pointer is nullptr in memory deallocation of make_tensor_from_aligned_addr!");
}
allocator.deallocate(p, size, intel_npu::utils::STANDARD_PAGE_SIZE);
vcl_allocator_3::deallocate(sourceAllocator.get(), p);
});
impl._so = std::move(ptr);
return ov::make_tensor(impl);
Expand Down Expand Up @@ -310,11 +302,11 @@ ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& mode
// support the lastest vcl api
// For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
_logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL");
vcl_allocator allocator;
auto allocator = std::make_shared<vcl_allocator_3>();
uint8_t* blob = nullptr;
size_t size = 0;
size_t blobSize = 0;

auto result = vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, &allocator, &blob, &size);
auto result = vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, allocator.get(), &blob, &blobSize);
if (result != VCL_RESULT_SUCCESS) {
OPENVINO_THROW("Compilation failed. vclAllocatedExecutableCreate2 result: 0x",
std::hex,
Expand All @@ -323,17 +315,30 @@ ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& mode
getLatestVCLLog(_logHandle));
}

if (size == 0 || blob == nullptr) {
OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null");
}
OPENVINO_ASSERT(blobSize != 0 && blob != nullptr,
"Failed to create VCL executable, the blob size is zero or the blob is null");

// Retrieve the real allocated size for the blob from the allocator
auto it = std::find_if(allocator->m_info.begin(),
allocator->m_info.end(),
[blob](const std::pair<uint8_t*, size_t>& item) {
return item.first == blob;
});

OPENVINO_ASSERT(it != allocator->m_info.end(), "Failed to find the allocated blob in the allocator records");
size_t alignedBlobSize = it->second;

// The allocated size from VCL will be equal or smaller than the allocated size in allocator
_logger.debug("Blob size from VCL: %zu ptr %p", size, static_cast<void*>(blob));
_logger.debug("Allocated vector size: %zu ptr: %p",
allocator.m_size,
static_cast<void*>(allocator.m_allocated));
_logger.debug("Blob size from VCL: %zu ptr %p", blobSize, static_cast<void*>(blob));
_logger.debug("Allocated vector size: %zu ptr: %p", alignedBlobSize, static_cast<void*>(blob));

_logger.debug("compile end, blob size:%d", allocator.m_size);
return make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size);
ov::Tensor alignedBlob = make_tensor_from_aligned_addr(blob, alignedBlobSize, allocator);

_logger.debug("compile end, blob size:%zu", alignedBlobSize);
// Remove only the transferred blob, leaving any possible temporary allocations to be safely freed by
// ~vcl_allocator_3
allocator->m_info.erase(it);
return alignedBlob;
} else {
OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later",
_vclVersion.major,
Expand Down Expand Up @@ -392,20 +397,23 @@ std::vector<ov::Tensor> VCLCompilerImpl::compileWsOneShot(const std::shared_ptr<
_logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);

_logger.debug("Using vclAllocatedExecutableCreateWSOneShot");
vcl_allocator_2 allocator;
auto allocator = std::make_shared<vcl_allocator_3>();

THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot",
vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator),
vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, allocator.get()),
_logHandle);

if (allocator.m_info.size() == 0) {
if (allocator->m_info.size() == 0) {
OPENVINO_THROW("Failed to create VCL executable, blobCount is zero");
}

std::vector<ov::Tensor> initMainTensors;
for (auto& blob : allocator.m_info) {
initMainTensors.emplace_back(make_tensor_from_aligned_addr(blob.first, blob.second));
for (const auto& blob : allocator->m_info) {
initMainTensors.emplace_back(make_tensor_from_aligned_addr(blob.first, blob.second, allocator));
}
// Clean up m_info, delegating actual physical frees strictly to the Tensor/Deleter from now on.
allocator->m_info.clear();

return initMainTensors;
}

Expand Down
Loading