openvinotoolkit · pereanub · Feb 13, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
@@ -165,12 +165,8 @@ struct NetworkMetadata final {
  * name and compiled network in a format executable by device
  */
 struct NetworkDescription final {
-    NetworkDescription(std::vector<uint8_t>&& compiledNetwork, NetworkMetadata&& metadata)
-        : compiledNetwork(std::move(compiledNetwork)),
-          metadata(std::move(metadata)) {}
     NetworkDescription(ov::Tensor&& compiledNetWorkTensor, NetworkMetadata&& metadata)
-        : compiledNetwork(),
-          metadata(std::move(metadata)),
+        : metadata(std::move(metadata)),
           compiledNetworkTensor(std::move(compiledNetWorkTensor)) {}
     // Force move semantics to prevent blob copies
     NetworkDescription(const NetworkDescription&) = delete;
@@ -179,8 +175,6 @@ struct NetworkDescription final {
     NetworkDescription& operator=(NetworkDescription&&) = default;
     ~NetworkDescription() = default;
 
-    std::vector<uint8_t> compiledNetwork;
-
     NetworkMetadata metadata;
 
     ov::Tensor compiledNetworkTensor;

@@ -18,6 +18,7 @@
 #include "ze_graph_ext_wrappers.hpp"
 
 namespace {
+
 struct UsedVersion {
     int Major;
     int Minor;
@@ -58,123 +59,75 @@ bool isUseBaseModelSerializer(UsedVersion useVersion, const intel_npu::FilteredC
     return false;
 }
 
-template <typename T>
-class ByteAlignedAllocator {
-private:
-    intel_npu::utils::AlignedAllocator allocator_;
-
-public:
-    using value_type = T;
-    using size_type = std::size_t;
-    using difference_type = std::ptrdiff_t;
-    using pointer = T*;
-    using const_pointer = const T*;
-
-    template <typename U>
-    struct rebind {
-        using other = ByteAlignedAllocator<U>;
-    };
-
-    ByteAlignedAllocator() : allocator_(intel_npu::utils::STANDARD_PAGE_SIZE) {}
-
-    ByteAlignedAllocator(const ByteAlignedAllocator& other) : allocator_(intel_npu::utils::STANDARD_PAGE_SIZE) {}
-
-    template <typename U>
-    ByteAlignedAllocator(const ByteAlignedAllocator<U>& other) : allocator_(intel_npu::utils::STANDARD_PAGE_SIZE) {}
-
-    ByteAlignedAllocator& operator=(const ByteAlignedAllocator& other) {
-        return *this;
-    }
-
-    T* allocate(size_t n) {
-        size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(n);
-        return static_cast<T*>(allocator_.allocate(aligned_size, 1));
-    }
-
-    void deallocate(T* ptr, size_t n) {
-        size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(n);
-        allocator_.deallocate(ptr, aligned_size, 1);
-    }
-
-    template <typename U>
-    bool operator==(const ByteAlignedAllocator<U>& other) const {
-        return allocator_.is_equal(other.allocator_);
-    }
+struct vcl_allocator : vcl_allocator2_t {
+    vcl_allocator() : vcl_allocator2_t{allocate, deallocate} {}
 
-    template <typename U>
-    bool operator!=(const ByteAlignedAllocator<U>& other) const {
-        return !(*this == other);
-    }
-
-    size_type max_size() const noexcept {
-        return std::numeric_limits<size_type>::max() / sizeof(T);
-    }
-};
-
-using AlignedVector = std::vector<uint8_t, ByteAlignedAllocator<uint8_t>>;
-struct vcl_allocator_vector : vcl_allocator2_t {
-    vcl_allocator_vector() : vcl_allocator2_t{vector_allocate, vector_deallocate} {}
-
-    static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) {
-        vcl_allocator_vector* vecAllocator = static_cast<vcl_allocator_vector*>(allocator);
-        size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(size);
-        auto newVec = std::make_shared<AlignedVector>();
-        vecAllocator->m_vec = newVec;
-        vecAllocator->m_vec->resize(aligned_size);
-        if (intel_npu::utils::memory_and_size_aligned_to_standard_page_size(vecAllocator->m_vec->data(),
-                                                                            vecAllocator->m_vec->size()) == false) {
-            OPENVINO_THROW("vcl_allocator_vector: allocated memory is not aligned to standard page size");
+    static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) {
+        vcl_allocator* vclAllocator = static_cast<vcl_allocator*>(allocator);
+        vclAllocator->m_size = intel_npu::utils::align_size_to_standard_page_size(size);
+        auto allocatedPtr = reinterpret_cast<uint8_t*>(
+            vclAllocator->m_allocator.allocate(vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE));
+        if (allocatedPtr == nullptr) {
+            OPENVINO_THROW("Failed to allocate aligned memory for allocator");
+        } else {
+            memset(allocatedPtr + size, 0, vclAllocator->m_size - size);
         }
-        return vecAllocator->m_vec->data();
-    }
-
-    static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
-        vcl_allocator_vector* vecAllocator = static_cast<vcl_allocator_vector*>(allocator);
-        vecAllocator->m_vec->clear();
-        vecAllocator->m_vec->shrink_to_fit();
+        vclAllocator->m_allocated = allocatedPtr;
+        return allocatedPtr;
     }
 
-    std::shared_ptr<AlignedVector> m_vec;
-};
-
-struct vcl_allocator_vector_2 : vcl_allocator2_t {
-    vcl_allocator_vector_2() : vcl_allocator2_t{vector_allocate, vector_deallocate} {}
-
-    static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) {
-        vcl_allocator_vector_2* vecAllocator = static_cast<vcl_allocator_vector_2*>(allocator);
-        size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(size);
-        auto newVec = std::make_shared<AlignedVector>();
-        newVec->resize(aligned_size);
-        uint8_t* ptr = newVec->data();
-        if (intel_npu::utils::memory_and_size_aligned_to_standard_page_size(newVec->data(), newVec->size()) == false) {
-            OPENVINO_THROW("vcl_allocator_vector: allocated memory is not aligned to standard page size");
+    static void deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
+        if (ptr == nullptr) {
+            OPENVINO_THROW("Pointer is nullptr in deallocate!");
         }
-        vecAllocator->m_vector.emplace_back(newVec);
-
-        return ptr;
+        vcl_allocator* vclAllocator = static_cast<vcl_allocator*>(allocator);
+        vclAllocator->m_allocator.deallocate(ptr, vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE);
     }
+    ov::Allocator m_allocator;
+    uint8_t* m_allocated = nullptr;
+    size_t m_size = 0;
+};
 
-    static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
-        vcl_allocator_vector_2* vecAllocator = static_cast<vcl_allocator_vector_2*>(allocator);
-        auto it = std::find_if(vecAllocator->m_vector.begin(), vecAllocator->m_vector.end(), [ptr](const auto& vec) {
-            return vec->data() == ptr;
-        });
+struct vcl_allocator_2 : vcl_allocator2_t {
+    vcl_allocator_2() : vcl_allocator2_t{allocate, deallocate} {}
 
-        if (it != vecAllocator->m_vector.end()) {
-            vecAllocator->m_vector.erase(it);
-            vecAllocator->m_vector.shrink_to_fit();
+    static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) {
+        vcl_allocator_2* vclAllocator = static_cast<vcl_allocator_2*>(allocator);
+        size_t alignedSize = intel_npu::utils::align_size_to_standard_page_size(size);
+        auto allocatedPtr = reinterpret_cast<uint8_t*>(
+            vclAllocator->m_allocator.allocate(alignedSize, intel_npu::utils::STANDARD_PAGE_SIZE));
+        if (allocatedPtr == nullptr) {
+            OPENVINO_THROW("Failed to allocate aligned memory for allocator");
         } else {
-            OPENVINO_THROW("vcl_allocator_vector_2: pointer to deallocate not found");
+            memset(allocatedPtr + size, 0, alignedSize - size);
         }
+        vclAllocator->m_info.emplace_back(std::make_pair(allocatedPtr, alignedSize));
+        return allocatedPtr;
     }
 
-    std::vector<std::shared_ptr<AlignedVector>> m_vector;
+    static void deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
+        if (ptr == nullptr) {
+            OPENVINO_THROW("Pointer is nullptr in deallocate!");
+        }
+        vcl_allocator_2* vclAllocator = static_cast<vcl_allocator_2*>(allocator);
+        // 1 is the placeholder value, as size is not needed in deallocate
+        vclAllocator->m_allocator.deallocate(ptr, 1, intel_npu::utils::STANDARD_PAGE_SIZE);
+    }
+    ov::Allocator m_allocator;
+    std::vector<std::pair<uint8_t*, size_t>> m_info;
 };
 
-ov::Tensor make_tensor_from_aligned_vector(std::shared_ptr<AlignedVector> vector) {
-    auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector->size()}, vector->data());
+ov::Tensor make_tensor_from_aligned_addr(uint8_t* allocated, size_t size) {
+    ov::Allocator allocator;
+    auto tensor = ov::Tensor(ov::element::u8, ov::Shape{size}, allocated);
     auto impl = ov::get_tensor_impl(std::move(tensor));
-    impl._so = vector;
+    std::shared_ptr<void> ptr(allocated, [allocator, size](uint8_t* p) mutable {
+        if (p == nullptr) {
+            OPENVINO_THROW("Pointer is nullptr in memory deallocation of make_tensor_from_aligned_addr!");
+        }
+        allocator.deallocate(p, size, intel_npu::utils::STANDARD_PAGE_SIZE);
+    });
+    impl._so = ptr;
     return ov::make_tensor(impl);
 }
 
@@ -481,7 +434,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         // support the lastest vcl api
         // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
         _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL");
-        vcl_allocator_vector allocator;
+        vcl_allocator allocator;
         uint8_t* blob = nullptr;
         size_t size = 0;
 
@@ -494,14 +447,15 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         // The allocated size from VCL will be equal or smaller than the allocated size in allocator
         _logger.debug("Blob size from VCL: %zu ptr %p", size, static_cast<void*>(blob));
         _logger.debug("Allocated vector size: %zu ptr: %p",
-                      allocator.m_vec->size(),
-                      static_cast<void*>(allocator.m_vec->data()));
+                      allocator.m_size,
+                      static_cast<void*>(allocator.m_allocated));
 
         // Use empty metadata as VCL does not support metadata extraction
         NetworkMetadata metadata;
 
-        _logger.debug("compile end, blob size:%d", allocator.m_vec->size());
-        return NetworkDescription(make_tensor_from_aligned_vector(allocator.m_vec), std::move(metadata));
+        _logger.debug("compile end, blob size:%d", allocator.m_size);
+        return NetworkDescription(make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size),
+                                  std::move(metadata));
     } else {
         OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later",
                        _vclVersion.major,
@@ -554,22 +508,23 @@ std::vector<std::shared_ptr<NetworkDescription>> VCLCompilerImpl::compileWsOneSh
     _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
 
     _logger.debug("Using vclAllocatedExecutableCreateWSOneShot");
-    vcl_allocator_vector_2 allocator;
+    vcl_allocator_2 allocator;
 
     THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot",
                           vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator),
                           _logHandle);
 
-    if (allocator.m_vector.size() == 0) {
+    if (allocator.m_info.size() == 0) {
         OPENVINO_THROW("Failed to create VCL executable, blobCount is zero");
     }
 
     std::vector<std::shared_ptr<NetworkDescription>> networkDescrs;
-    for (auto& blob : allocator.m_vector) {
+    for (auto& blob : allocator.m_info) {
         // Use empty metadata as VCL does not support metadata extraction
         NetworkMetadata metadata;
         networkDescrs.emplace_back(
-            std::make_shared<NetworkDescription>(make_tensor_from_aligned_vector(blob), std::move(metadata)));
+            std::make_shared<NetworkDescription>(make_tensor_from_aligned_addr(blob.first, blob.second),
+                                                 std::move(metadata)));
     }
     return networkDescrs;
 }

@@ -24,19 +24,6 @@
 #include "weightless_graph.hpp"
 #include "weightless_utils.hpp"
 
-namespace {
-
-ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
-    auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector.size()}, vector.data());
-    auto impl = ov::get_tensor_impl(std::move(tensor));
-    std::shared_ptr<std::vector<uint8_t>> sharedCompiledNetwork =
-        std::make_shared<std::vector<uint8_t>>(std::move(vector));
-    impl._so = std::move(sharedCompiledNetwork);
-    return ov::make_tensor(impl);
-}
-
-}  // namespace
-
 namespace intel_npu {
 
 PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct)
@@ -80,11 +67,8 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
     _logger.debug("compile end");
 
     ov::Tensor tensor;
-    if (networkDesc.compiledNetwork.size() > 0) {
-        tensor = make_tensor_from_vector(networkDesc.compiledNetwork);
-    } else {
-        tensor = std::move(networkDesc.compiledNetworkTensor);
-    }
+    tensor = std::move(networkDesc.compiledNetworkTensor);
+
     GraphDescriptor graphDesc;
     NetworkMetadata networkMeta;
 
@@ -151,12 +135,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
         OPENVINO_ASSERT(initMainNetworkDescriptions.size() > 0, "No init schedules have been returned by the compiler");
         std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions =
             std::move(initMainNetworkDescriptions);
-
-        if (mainNetworkDescription->compiledNetwork.size() > 0) {
-            tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
-        } else {
-            tensorMain = std::move(mainNetworkDescription->compiledNetworkTensor);
-        }
+        tensorMain = std::move(mainNetworkDescription->compiledNetworkTensor);
 
         if (_zeGraphExt) {
             // Depending on the config, we may get an error when trying to
@@ -179,11 +158,8 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
         initNetworkMetadata.reserve(initNetworkDescriptions.size());
         for (auto& networkDesc : initNetworkDescriptions) {
             ov::Tensor tensor;
-            if (networkDesc->compiledNetwork.size() > 0) {
-                tensor = make_tensor_from_vector(networkDesc->compiledNetwork);
-            } else {
-                tensor = std::move(networkDesc->compiledNetworkTensor);
-            }
+            tensor = std::move(networkDesc->compiledNetworkTensor);
+
             GraphDescriptor initGraphDesc;
             NetworkMetadata initNetworkMeta;
             if (_zeGraphExt) {
@@ -219,11 +195,8 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
         while (auto networkDescription =
                    std::make_shared<NetworkDescription>(_compiler->compileWsIterative(targetModel, localConfig, i++))) {
             ov::Tensor tensor;
-            if (networkDescription->compiledNetwork.size() > 0) {
-                tensor = make_tensor_from_vector(networkDescription->compiledNetwork);
-            } else {
-                tensor = std::move(networkDescription->compiledNetworkTensor);
-            }
+            tensor = std::move(networkDescription->compiledNetworkTensor);
+
             GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
             NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc);