diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp index 76dd94241f6371..59a249d42f0674 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp @@ -38,44 +38,27 @@ UsedVersion getUsedVclVersion(uint16_t pluginMajor, uint16_t pluginMinor, const return {usedMajor, usedMinor}; } -struct vcl_allocator : vcl_allocator2_t { - vcl_allocator() : vcl_allocator2_t{allocate, deallocate} {} - - static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) { - vcl_allocator* vclAllocator = static_cast(allocator); - vclAllocator->m_size = intel_npu::utils::align_size_to_standard_page_size(size); - auto allocatedPtr = reinterpret_cast( - vclAllocator->m_allocator.allocate(vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE)); - if (allocatedPtr == nullptr) { - OPENVINO_THROW("Failed to allocate aligned memory for allocator"); - } - memset(allocatedPtr + size, 0, vclAllocator->m_size - size); - vclAllocator->m_allocated = allocatedPtr; - return allocatedPtr; - } - - static void deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) { - if (ptr == nullptr) { - OPENVINO_THROW("Pointer is nullptr in deallocate!"); +struct vcl_allocator_3 : vcl_allocator2_t { + vcl_allocator_3() : vcl_allocator2_t{allocate, deallocate}, m_allocator(intel_npu::utils::STANDARD_PAGE_SIZE) {} + + ~vcl_allocator_3() { + for (auto& item : m_info) { + if (item.first) { + m_allocator.deallocate(item.first, item.second, intel_npu::utils::STANDARD_PAGE_SIZE); + } } - vcl_allocator* vclAllocator = static_cast(allocator); - vclAllocator->m_allocator.deallocate(ptr, vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE); + m_info.clear(); } - ov::Allocator m_allocator; - uint8_t* m_allocated = nullptr; - size_t m_size = 0; -}; - -struct vcl_allocator_2 : vcl_allocator2_t { - vcl_allocator_2() : vcl_allocator2_t{allocate, deallocate} {} static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) { - vcl_allocator_2* vclAllocator = static_cast(allocator); + vcl_allocator_3* vclAllocator = static_cast(allocator); size_t alignedSize = intel_npu::utils::align_size_to_standard_page_size(size); - auto allocatedPtr = reinterpret_cast( + + uint8_t* allocatedPtr = static_cast( vclAllocator->m_allocator.allocate(alignedSize, intel_npu::utils::STANDARD_PAGE_SIZE)); + if (allocatedPtr == nullptr) { - OPENVINO_THROW("Failed to allocate aligned memory for allocator"); + OPENVINO_THROW("Failed to allocate aligned memory in vcl_allocator_3"); } memset(allocatedPtr + size, 0, alignedSize - size); vclAllocator->m_info.emplace_back(std::make_pair(allocatedPtr, alignedSize)); @@ -86,23 +69,32 @@ struct vcl_allocator_2 : vcl_allocator2_t { if (ptr == nullptr) { OPENVINO_THROW("Pointer is nullptr in deallocate!"); } - vcl_allocator_2* vclAllocator = static_cast(allocator); + vcl_allocator_3* vclAllocator = static_cast(allocator); + + for (auto it = vclAllocator->m_info.begin(); it != vclAllocator->m_info.end(); ++it) { + if (it->first == ptr) { + vclAllocator->m_info.erase(it); + break; + } + } + // 1 is the placeholder value, as size is not needed in deallocate vclAllocator->m_allocator.deallocate(ptr, 1, intel_npu::utils::STANDARD_PAGE_SIZE); } - ov::Allocator m_allocator; + intel_npu::utils::AlignedAllocator m_allocator; std::vector> m_info; }; -ov::Tensor make_tensor_from_aligned_addr(uint8_t* allocated, size_t size) { - ov::Allocator allocator; +ov::Tensor make_tensor_from_aligned_addr(uint8_t* allocated, + size_t size, + std::shared_ptr sourceAllocator) { auto tensor = ov::Tensor(ov::element::u8, ov::Shape{size}, allocated); auto impl = ov::get_tensor_impl(std::move(tensor)); - std::shared_ptr ptr(allocated, [allocator = std::move(allocator), size](uint8_t* p) mutable { + std::shared_ptr ptr(allocated, [sourceAllocator](uint8_t* p) { if (p == nullptr) { OPENVINO_THROW("Pointer is nullptr in memory deallocation of make_tensor_from_aligned_addr!"); } - allocator.deallocate(p, size, intel_npu::utils::STANDARD_PAGE_SIZE); + vcl_allocator_3::deallocate(sourceAllocator.get(), p); }); impl._so = std::move(ptr); return ov::make_tensor(impl); @@ -310,11 +302,11 @@ ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr& mode // support the lastest vcl api // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2 _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL"); - vcl_allocator allocator; + auto allocator = std::make_shared(); uint8_t* blob = nullptr; - size_t size = 0; + size_t blobSize = 0; - auto result = vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, &allocator, &blob, &size); + auto result = vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, allocator.get(), &blob, &blobSize); if (result != VCL_RESULT_SUCCESS) { OPENVINO_THROW("Compilation failed. vclAllocatedExecutableCreate2 result: 0x", std::hex, @@ -323,17 +315,30 @@ ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr& mode getLatestVCLLog(_logHandle)); } - if (size == 0 || blob == nullptr) { - OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null"); - } + OPENVINO_ASSERT(blobSize != 0 && blob != nullptr, + "Failed to create VCL executable, the blob size is zero or the blob is null"); + + // Retrieve the real allocated size for the blob from the allocator + auto it = std::find_if(allocator->m_info.begin(), + allocator->m_info.end(), + [blob](const std::pair& item) { + return item.first == blob; + }); + + OPENVINO_ASSERT(it != allocator->m_info.end(), "Failed to find the allocated blob in the allocator records"); + size_t alignedBlobSize = it->second; + // The allocated size from VCL will be equal or smaller than the allocated size in allocator - _logger.debug("Blob size from VCL: %zu ptr %p", size, static_cast(blob)); - _logger.debug("Allocated vector size: %zu ptr: %p", - allocator.m_size, - static_cast(allocator.m_allocated)); + _logger.debug("Blob size from VCL: %zu ptr %p", blobSize, static_cast(blob)); + _logger.debug("Allocated vector size: %zu ptr: %p", alignedBlobSize, static_cast(blob)); - _logger.debug("compile end, blob size:%d", allocator.m_size); - return make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size); + ov::Tensor alignedBlob = make_tensor_from_aligned_addr(blob, alignedBlobSize, allocator); + + _logger.debug("compile end, blob size:%zu", alignedBlobSize); + // Remove only the transferred blob, leaving any possible temporary allocations to be safely freed by + // ~vcl_allocator_3 + allocator->m_info.erase(it); + return alignedBlob; } else { OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later", _vclVersion.major, @@ -392,20 +397,23 @@ std::vector VCLCompilerImpl::compileWsOneShot(const std::shared_ptr< _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor); _logger.debug("Using vclAllocatedExecutableCreateWSOneShot"); - vcl_allocator_2 allocator; + auto allocator = std::make_shared(); THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot", - vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator), + vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, allocator.get()), _logHandle); - if (allocator.m_info.size() == 0) { + if (allocator->m_info.size() == 0) { OPENVINO_THROW("Failed to create VCL executable, blobCount is zero"); } std::vector initMainTensors; - for (auto& blob : allocator.m_info) { - initMainTensors.emplace_back(make_tensor_from_aligned_addr(blob.first, blob.second)); + for (const auto& blob : allocator->m_info) { + initMainTensors.emplace_back(make_tensor_from_aligned_addr(blob.first, blob.second, allocator)); } + // Clean up m_info, delegating actual physical frees strictly to the Tensor/Deleter from now on. + allocator->m_info.clear(); + return initMainTensors; }