openvinotoolkit · ShaojieZhuIntel · Apr 27, 2026 · Apr 30, 2026 · Apr 30, 2026
@@ -38,44 +38,27 @@ UsedVersion getUsedVclVersion(uint16_t pluginMajor, uint16_t pluginMinor, const
     return {usedMajor, usedMinor};
 }
 
-struct vcl_allocator : vcl_allocator2_t {
-    vcl_allocator() : vcl_allocator2_t{allocate, deallocate} {}
-
-    static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) {
-        vcl_allocator* vclAllocator = static_cast<vcl_allocator*>(allocator);
-        vclAllocator->m_size = intel_npu::utils::align_size_to_standard_page_size(size);
-        auto allocatedPtr = reinterpret_cast<uint8_t*>(
-            vclAllocator->m_allocator.allocate(vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE));
-        if (allocatedPtr == nullptr) {
-            OPENVINO_THROW("Failed to allocate aligned memory for allocator");
-        }
-        memset(allocatedPtr + size, 0, vclAllocator->m_size - size);
-        vclAllocator->m_allocated = allocatedPtr;
-        return allocatedPtr;
-    }
-
-    static void deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
-        if (ptr == nullptr) {
-            OPENVINO_THROW("Pointer is nullptr in deallocate!");
+struct vcl_allocator_3 : vcl_allocator2_t {
+    vcl_allocator_3() : vcl_allocator2_t{allocate, deallocate}, m_allocator(intel_npu::utils::STANDARD_PAGE_SIZE) {}
+
+    ~vcl_allocator_3() {
+        for (auto& item : m_info) {
+            if (item.first) {
+                m_allocator.deallocate(item.first, item.second, intel_npu::utils::STANDARD_PAGE_SIZE);
+            }
         }
-        vcl_allocator* vclAllocator = static_cast<vcl_allocator*>(allocator);
-        vclAllocator->m_allocator.deallocate(ptr, vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE);
+        m_info.clear();
     }
-    ov::Allocator m_allocator;
-    uint8_t* m_allocated = nullptr;
-    size_t m_size = 0;
-};
-
-struct vcl_allocator_2 : vcl_allocator2_t {
-    vcl_allocator_2() : vcl_allocator2_t{allocate, deallocate} {}
 
     static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) {
-        vcl_allocator_2* vclAllocator = static_cast<vcl_allocator_2*>(allocator);
+        vcl_allocator_3* vclAllocator = static_cast<vcl_allocator_3*>(allocator);
         size_t alignedSize = intel_npu::utils::align_size_to_standard_page_size(size);
-        auto allocatedPtr = reinterpret_cast<uint8_t*>(
+
+        uint8_t* allocatedPtr = static_cast<uint8_t*>(
             vclAllocator->m_allocator.allocate(alignedSize, intel_npu::utils::STANDARD_PAGE_SIZE));
+
         if (allocatedPtr == nullptr) {
-            OPENVINO_THROW("Failed to allocate aligned memory for allocator");
+            OPENVINO_THROW("Failed to allocate aligned memory in vcl_allocator_3");
         }
         memset(allocatedPtr + size, 0, alignedSize - size);
         vclAllocator->m_info.emplace_back(std::make_pair(allocatedPtr, alignedSize));
@@ -86,23 +69,32 @@ struct vcl_allocator_2 : vcl_allocator2_t {
         if (ptr == nullptr) {
             OPENVINO_THROW("Pointer is nullptr in deallocate!");
         }
-        vcl_allocator_2* vclAllocator = static_cast<vcl_allocator_2*>(allocator);
+        vcl_allocator_3* vclAllocator = static_cast<vcl_allocator_3*>(allocator);
+
+        for (auto it = vclAllocator->m_info.begin(); it != vclAllocator->m_info.end(); ++it) {
+            if (it->first == ptr) {
+                vclAllocator->m_info.erase(it);
+                break;
+            }
+        }
+
         // 1 is the placeholder value, as size is not needed in deallocate
         vclAllocator->m_allocator.deallocate(ptr, 1, intel_npu::utils::STANDARD_PAGE_SIZE);
     }
-    ov::Allocator m_allocator;
+    intel_npu::utils::AlignedAllocator m_allocator;
     std::vector<std::pair<uint8_t*, size_t>> m_info;
 };
 
-ov::Tensor make_tensor_from_aligned_addr(uint8_t* allocated, size_t size) {
-    ov::Allocator allocator;
+ov::Tensor make_tensor_from_aligned_addr(uint8_t* allocated,
+                                         size_t size,
+                                         std::shared_ptr<vcl_allocator_3> sourceAllocator) {
     auto tensor = ov::Tensor(ov::element::u8, ov::Shape{size}, allocated);
     auto impl = ov::get_tensor_impl(std::move(tensor));
-    std::shared_ptr<void> ptr(allocated, [allocator = std::move(allocator), size](uint8_t* p) mutable {
+    std::shared_ptr<void> ptr(allocated, [sourceAllocator](uint8_t* p) {
         if (p == nullptr) {
             OPENVINO_THROW("Pointer is nullptr in memory deallocation of make_tensor_from_aligned_addr!");
         }
-        allocator.deallocate(p, size, intel_npu::utils::STANDARD_PAGE_SIZE);
+        vcl_allocator_3::deallocate(sourceAllocator.get(), p);
     });
     impl._so = std::move(ptr);
     return ov::make_tensor(impl);
@@ -310,11 +302,11 @@ ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& mode
         // support the lastest vcl api
         // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
         _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL");
-        vcl_allocator allocator;
+        auto allocator = std::make_shared<vcl_allocator_3>();
         uint8_t* blob = nullptr;
-        size_t size = 0;
+        size_t blobSize = 0;
 
-        auto result = vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, &allocator, &blob, &size);
+        auto result = vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, allocator.get(), &blob, &blobSize);
         if (result != VCL_RESULT_SUCCESS) {
             OPENVINO_THROW("Compilation failed. vclAllocatedExecutableCreate2 result: 0x",
                            std::hex,
@@ -323,17 +315,30 @@ ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& mode
                            getLatestVCLLog(_logHandle));
         }
 
-        if (size == 0 || blob == nullptr) {
-            OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null");
-        }
+        OPENVINO_ASSERT(blobSize != 0 && blob != nullptr,
+                        "Failed to create VCL executable, the blob size is zero or the blob is null");
+
+        // Retrieve the real allocated size for the blob from the allocator
+        auto it = std::find_if(allocator->m_info.begin(),
+                               allocator->m_info.end(),
+                               [blob](const std::pair<uint8_t*, size_t>& item) {
+                                   return item.first == blob;
+                               });
+
+        OPENVINO_ASSERT(it != allocator->m_info.end(), "Failed to find the allocated blob in the allocator records");
+        size_t alignedBlobSize = it->second;
+
         // The allocated size from VCL will be equal or smaller than the allocated size in allocator
-        _logger.debug("Blob size from VCL: %zu ptr %p", size, static_cast<void*>(blob));
-        _logger.debug("Allocated vector size: %zu ptr: %p",
-                      allocator.m_size,
-                      static_cast<void*>(allocator.m_allocated));
+        _logger.debug("Blob size from VCL: %zu ptr %p", blobSize, static_cast<void*>(blob));
+        _logger.debug("Allocated vector size: %zu ptr: %p", alignedBlobSize, static_cast<void*>(blob));
 
-        _logger.debug("compile end, blob size:%d", allocator.m_size);
-        return make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size);
+        ov::Tensor alignedBlob = make_tensor_from_aligned_addr(blob, alignedBlobSize, allocator);
+
+        _logger.debug("compile end, blob size:%zu", alignedBlobSize);
+        // Remove only the transferred blob, leaving any possible temporary allocations to be safely freed by
+        // ~vcl_allocator_3
+        allocator->m_info.erase(it);
+        return alignedBlob;
     } else {
         OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later",
                        _vclVersion.major,
@@ -392,20 +397,23 @@ std::vector<ov::Tensor> VCLCompilerImpl::compileWsOneShot(const std::shared_ptr<
     _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
 
     _logger.debug("Using vclAllocatedExecutableCreateWSOneShot");
-    vcl_allocator_2 allocator;
+    auto allocator = std::make_shared<vcl_allocator_3>();
 
     THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot",
-                          vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator),
+                          vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, allocator.get()),
                           _logHandle);
 
-    if (allocator.m_info.size() == 0) {
+    if (allocator->m_info.size() == 0) {
         OPENVINO_THROW("Failed to create VCL executable, blobCount is zero");
     }
 
     std::vector<ov::Tensor> initMainTensors;
-    for (auto& blob : allocator.m_info) {
-        initMainTensors.emplace_back(make_tensor_from_aligned_addr(blob.first, blob.second));
+    for (const auto& blob : allocator->m_info) {
+        initMainTensors.emplace_back(make_tensor_from_aligned_addr(blob.first, blob.second, allocator));
     }
+    // Clean up m_info, delegating actual physical frees strictly to the Tensor/Deleter from now on.
+    allocator->m_info.clear();
+
     return initMainTensors;
 }