diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp index 78f5492e890805..6133f1efac69b2 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp @@ -165,12 +165,8 @@ struct NetworkMetadata final { * name and compiled network in a format executable by device */ struct NetworkDescription final { - NetworkDescription(std::vector&& compiledNetwork, NetworkMetadata&& metadata) - : compiledNetwork(std::move(compiledNetwork)), - metadata(std::move(metadata)) {} NetworkDescription(ov::Tensor&& compiledNetWorkTensor, NetworkMetadata&& metadata) - : compiledNetwork(), - metadata(std::move(metadata)), + : metadata(std::move(metadata)), compiledNetworkTensor(std::move(compiledNetWorkTensor)) {} // Force move semantics to prevent blob copies NetworkDescription(const NetworkDescription&) = delete; @@ -179,8 +175,6 @@ struct NetworkDescription final { NetworkDescription& operator=(NetworkDescription&&) = default; ~NetworkDescription() = default; - std::vector compiledNetwork; - NetworkMetadata metadata; ov::Tensor compiledNetworkTensor; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp index 6bf736f9bff797..2d7c114b31e317 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp @@ -18,6 +18,7 @@ #include "ze_graph_ext_wrappers.hpp" namespace { + struct UsedVersion { int Major; int Minor; @@ -58,123 +59,73 @@ bool isUseBaseModelSerializer(UsedVersion useVersion, const intel_npu::FilteredC return false; } -template -class ByteAlignedAllocator { -private: - intel_npu::utils::AlignedAllocator allocator_; - -public: - using value_type = T; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using pointer = T*; - using const_pointer = const T*; - - template - struct rebind { - using other = ByteAlignedAllocator; - }; - - ByteAlignedAllocator() : allocator_(intel_npu::utils::STANDARD_PAGE_SIZE) {} - - ByteAlignedAllocator(const ByteAlignedAllocator& other) : allocator_(intel_npu::utils::STANDARD_PAGE_SIZE) {} - - template - ByteAlignedAllocator(const ByteAlignedAllocator& other) : allocator_(intel_npu::utils::STANDARD_PAGE_SIZE) {} +struct vcl_allocator : vcl_allocator2_t { + vcl_allocator() : vcl_allocator2_t{allocate, deallocate} {} - ByteAlignedAllocator& operator=(const ByteAlignedAllocator& other) { - return *this; - } - - T* allocate(size_t n) { - size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(n); - return static_cast(allocator_.allocate(aligned_size, 1)); - } - - void deallocate(T* ptr, size_t n) { - size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(n); - allocator_.deallocate(ptr, aligned_size, 1); - } - - template - bool operator==(const ByteAlignedAllocator& other) const { - return allocator_.is_equal(other.allocator_); - } - - template - bool operator!=(const ByteAlignedAllocator& other) const { - return !(*this == other); - } - - size_type max_size() const noexcept { - return std::numeric_limits::max() / sizeof(T); - } -}; - -using AlignedVector = std::vector>; -struct vcl_allocator_vector : vcl_allocator2_t { - vcl_allocator_vector() : vcl_allocator2_t{vector_allocate, vector_deallocate} {} - - static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) { - vcl_allocator_vector* vecAllocator = static_cast(allocator); - size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(size); - auto newVec = std::make_shared(); - vecAllocator->m_vec = newVec; - vecAllocator->m_vec->resize(aligned_size); - if (intel_npu::utils::memory_and_size_aligned_to_standard_page_size(vecAllocator->m_vec->data(), - vecAllocator->m_vec->size()) == false) { - OPENVINO_THROW("vcl_allocator_vector: allocated memory is not aligned to standard page size"); + static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) { + vcl_allocator* vclAllocator = static_cast(allocator); + vclAllocator->m_size = intel_npu::utils::align_size_to_standard_page_size(size); + auto allocatedPtr = reinterpret_cast( + vclAllocator->m_allocator.allocate(vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE)); + if (allocatedPtr == nullptr) { + OPENVINO_THROW("Failed to allocate aligned memory for allocator"); } - return vecAllocator->m_vec->data(); + memset(allocatedPtr + size, 0, vclAllocator->m_size - size); + vclAllocator->m_allocated = allocatedPtr; + return allocatedPtr; } - static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) { - vcl_allocator_vector* vecAllocator = static_cast(allocator); - vecAllocator->m_vec->clear(); - vecAllocator->m_vec->shrink_to_fit(); + static void deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) { + if (ptr == nullptr) { + OPENVINO_THROW("Pointer is nullptr in deallocate!"); + } + vcl_allocator* vclAllocator = static_cast(allocator); + vclAllocator->m_allocator.deallocate(ptr, vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE); } - - std::shared_ptr m_vec; + ov::Allocator m_allocator; + uint8_t* m_allocated = nullptr; + size_t m_size = 0; }; -struct vcl_allocator_vector_2 : vcl_allocator2_t { - vcl_allocator_vector_2() : vcl_allocator2_t{vector_allocate, vector_deallocate} {} - - static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) { - vcl_allocator_vector_2* vecAllocator = static_cast(allocator); - size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(size); - auto newVec = std::make_shared(); - newVec->resize(aligned_size); - uint8_t* ptr = newVec->data(); - if (intel_npu::utils::memory_and_size_aligned_to_standard_page_size(newVec->data(), newVec->size()) == false) { - OPENVINO_THROW("vcl_allocator_vector: allocated memory is not aligned to standard page size"); - } - vecAllocator->m_vector.emplace_back(newVec); +struct vcl_allocator_2 : vcl_allocator2_t { + vcl_allocator_2() : vcl_allocator2_t{allocate, deallocate} {} - return ptr; + static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) { + vcl_allocator_2* vclAllocator = static_cast(allocator); + size_t alignedSize = intel_npu::utils::align_size_to_standard_page_size(size); + auto allocatedPtr = reinterpret_cast( + vclAllocator->m_allocator.allocate(alignedSize, intel_npu::utils::STANDARD_PAGE_SIZE)); + if (allocatedPtr == nullptr) { + OPENVINO_THROW("Failed to allocate aligned memory for allocator"); + } + memset(allocatedPtr + size, 0, alignedSize - size); + vclAllocator->m_info.emplace_back(std::make_pair(allocatedPtr, alignedSize)); + return allocatedPtr; } - static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) { - vcl_allocator_vector_2* vecAllocator = static_cast(allocator); - auto it = std::find_if(vecAllocator->m_vector.begin(), vecAllocator->m_vector.end(), [ptr](const auto& vec) { - return vec->data() == ptr; - }); - - if (it != vecAllocator->m_vector.end()) { - vecAllocator->m_vector.erase(it); - vecAllocator->m_vector.shrink_to_fit(); - } else { - OPENVINO_THROW("vcl_allocator_vector_2: pointer to deallocate not found"); + static void deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) { + if (ptr == nullptr) { + OPENVINO_THROW("Pointer is nullptr in deallocate!"); } + vcl_allocator_2* vclAllocator = static_cast(allocator); + // 1 is the placeholder value, as size is not needed in deallocate + vclAllocator->m_allocator.deallocate(ptr, 1, intel_npu::utils::STANDARD_PAGE_SIZE); } - - std::vector> m_vector; + ov::Allocator m_allocator; + std::vector> m_info; }; -ov::Tensor make_tensor_from_aligned_vector(std::shared_ptr vector) { - auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector->size()}, vector->data()); +ov::Tensor make_tensor_from_aligned_addr(uint8_t* allocated, size_t size) { + ov::Allocator allocator; + auto tensor = ov::Tensor(ov::element::u8, ov::Shape{size}, allocated); auto impl = ov::get_tensor_impl(std::move(tensor)); - impl._so = vector; + std::shared_ptr ptr(allocated, [allocator, size](uint8_t* p) mutable { + if (p == nullptr) { + OPENVINO_THROW("Pointer is nullptr in memory deallocation of make_tensor_from_aligned_addr!"); + } + allocator.deallocate(p, size, intel_npu::utils::STANDARD_PAGE_SIZE); + }); + impl._so = ptr; return ov::make_tensor(impl); } @@ -481,7 +432,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr(blob)); _logger.debug("Allocated vector size: %zu ptr: %p", - allocator.m_vec->size(), - static_cast(allocator.m_vec->data())); + allocator.m_size, + static_cast(allocator.m_allocated)); // Use empty metadata as VCL does not support metadata extraction NetworkMetadata metadata; - _logger.debug("compile end, blob size:%d", allocator.m_vec->size()); - return NetworkDescription(make_tensor_from_aligned_vector(allocator.m_vec), std::move(metadata)); + _logger.debug("compile end, blob size:%d", allocator.m_size); + return NetworkDescription(make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size), + std::move(metadata)); } else { OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later", _vclVersion.major, @@ -554,22 +506,23 @@ std::vector> VCLCompilerImpl::compileWsOneSh _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor); _logger.debug("Using vclAllocatedExecutableCreateWSOneShot"); - vcl_allocator_vector_2 allocator; + vcl_allocator_2 allocator; THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot", vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator), _logHandle); - if (allocator.m_vector.size() == 0) { + if (allocator.m_info.size() == 0) { OPENVINO_THROW("Failed to create VCL executable, blobCount is zero"); } std::vector> networkDescrs; - for (auto& blob : allocator.m_vector) { + for (auto& blob : allocator.m_info) { // Use empty metadata as VCL does not support metadata extraction NetworkMetadata metadata; networkDescrs.emplace_back( - std::make_shared(make_tensor_from_aligned_vector(blob), std::move(metadata))); + std::make_shared(make_tensor_from_aligned_addr(blob.first, blob.second), + std::move(metadata))); } return networkDescrs; } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index f33f9ed4098134..5af149fd9ab67a 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -24,19 +24,6 @@ #include "weightless_graph.hpp" #include "weightless_utils.hpp" -namespace { - -ov::Tensor make_tensor_from_vector(std::vector& vector) { - auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector.size()}, vector.data()); - auto impl = ov::get_tensor_impl(std::move(tensor)); - std::shared_ptr> sharedCompiledNetwork = - std::make_shared>(std::move(vector)); - impl._so = std::move(sharedCompiledNetwork); - return ov::make_tensor(impl); -} - -} // namespace - namespace intel_npu { PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct) @@ -80,11 +67,8 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr 0) { - tensor = make_tensor_from_vector(networkDesc.compiledNetwork); - } else { - tensor = std::move(networkDesc.compiledNetworkTensor); - } + tensor = std::move(networkDesc.compiledNetworkTensor); + GraphDescriptor graphDesc; NetworkMetadata networkMeta; @@ -151,12 +135,7 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr 0, "No init schedules have been returned by the compiler"); std::vector> initNetworkDescriptions = std::move(initMainNetworkDescriptions); - - if (mainNetworkDescription->compiledNetwork.size() > 0) { - tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork); - } else { - tensorMain = std::move(mainNetworkDescription->compiledNetworkTensor); - } + tensorMain = std::move(mainNetworkDescription->compiledNetworkTensor); if (_zeGraphExt) { // Depending on the config, we may get an error when trying to @@ -179,11 +158,8 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptrcompiledNetwork.size() > 0) { - tensor = make_tensor_from_vector(networkDesc->compiledNetwork); - } else { - tensor = std::move(networkDesc->compiledNetworkTensor); - } + tensor = std::move(networkDesc->compiledNetworkTensor); + GraphDescriptor initGraphDesc; NetworkMetadata initNetworkMeta; if (_zeGraphExt) { @@ -219,11 +195,8 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr(_compiler->compileWsIterative(targetModel, localConfig, i++))) { ov::Tensor tensor; - if (networkDescription->compiledNetwork.size() > 0) { - tensor = make_tensor_from_vector(networkDescription->compiledNetwork); - } else { - tensor = std::move(networkDescription->compiledNetworkTensor); - } + tensor = std::move(networkDescription->compiledNetworkTensor); + GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc);