Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,8 @@ struct NetworkMetadata final {
* name and compiled network in a format executable by device
*/
struct NetworkDescription final {
NetworkDescription(std::vector<uint8_t>&& compiledNetwork, NetworkMetadata&& metadata)
: compiledNetwork(std::move(compiledNetwork)),
metadata(std::move(metadata)) {}
NetworkDescription(ov::Tensor&& compiledNetWorkTensor, NetworkMetadata&& metadata)
: compiledNetwork(),
metadata(std::move(metadata)),
: metadata(std::move(metadata)),
compiledNetworkTensor(std::move(compiledNetWorkTensor)) {}
// Force move semantics to prevent blob copies
NetworkDescription(const NetworkDescription&) = delete;
Expand All @@ -179,8 +175,6 @@ struct NetworkDescription final {
NetworkDescription& operator=(NetworkDescription&&) = default;
~NetworkDescription() = default;

std::vector<uint8_t> compiledNetwork;

NetworkMetadata metadata;

ov::Tensor compiledNetworkTensor;
Expand Down
177 changes: 66 additions & 111 deletions src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "ze_graph_ext_wrappers.hpp"

namespace {

struct UsedVersion {
int Major;
int Minor;
Expand Down Expand Up @@ -58,123 +59,75 @@ bool isUseBaseModelSerializer(UsedVersion useVersion, const intel_npu::FilteredC
return false;
}

template <typename T>
class ByteAlignedAllocator {
private:
intel_npu::utils::AlignedAllocator allocator_;

public:
using value_type = T;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using pointer = T*;
using const_pointer = const T*;

template <typename U>
struct rebind {
using other = ByteAlignedAllocator<U>;
};

ByteAlignedAllocator() : allocator_(intel_npu::utils::STANDARD_PAGE_SIZE) {}

ByteAlignedAllocator(const ByteAlignedAllocator& other) : allocator_(intel_npu::utils::STANDARD_PAGE_SIZE) {}

template <typename U>
ByteAlignedAllocator(const ByteAlignedAllocator<U>& other) : allocator_(intel_npu::utils::STANDARD_PAGE_SIZE) {}

ByteAlignedAllocator& operator=(const ByteAlignedAllocator& other) {
return *this;
}

T* allocate(size_t n) {
size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(n);
return static_cast<T*>(allocator_.allocate(aligned_size, 1));
}

void deallocate(T* ptr, size_t n) {
size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(n);
allocator_.deallocate(ptr, aligned_size, 1);
}

template <typename U>
bool operator==(const ByteAlignedAllocator<U>& other) const {
return allocator_.is_equal(other.allocator_);
}
struct vcl_allocator : vcl_allocator2_t {
vcl_allocator() : vcl_allocator2_t{allocate, deallocate} {}

template <typename U>
bool operator!=(const ByteAlignedAllocator<U>& other) const {
return !(*this == other);
}

size_type max_size() const noexcept {
return std::numeric_limits<size_type>::max() / sizeof(T);
}
};

using AlignedVector = std::vector<uint8_t, ByteAlignedAllocator<uint8_t>>;
struct vcl_allocator_vector : vcl_allocator2_t {
vcl_allocator_vector() : vcl_allocator2_t{vector_allocate, vector_deallocate} {}

static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) {
vcl_allocator_vector* vecAllocator = static_cast<vcl_allocator_vector*>(allocator);
size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(size);
auto newVec = std::make_shared<AlignedVector>();
vecAllocator->m_vec = newVec;
vecAllocator->m_vec->resize(aligned_size);
if (intel_npu::utils::memory_and_size_aligned_to_standard_page_size(vecAllocator->m_vec->data(),
vecAllocator->m_vec->size()) == false) {
OPENVINO_THROW("vcl_allocator_vector: allocated memory is not aligned to standard page size");
static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) {
vcl_allocator* vclAllocator = static_cast<vcl_allocator*>(allocator);
vclAllocator->m_size = intel_npu::utils::align_size_to_standard_page_size(size);
auto allocatedPtr = reinterpret_cast<uint8_t*>(
vclAllocator->m_allocator.allocate(vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE));
if (allocatedPtr == nullptr) {
OPENVINO_THROW("Failed to allocate aligned memory for allocator");
} else {
memset(allocatedPtr + size, 0, vclAllocator->m_size - size);
}
return vecAllocator->m_vec->data();
}

static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
vcl_allocator_vector* vecAllocator = static_cast<vcl_allocator_vector*>(allocator);
vecAllocator->m_vec->clear();
vecAllocator->m_vec->shrink_to_fit();
vclAllocator->m_allocated = allocatedPtr;
return allocatedPtr;
}

std::shared_ptr<AlignedVector> m_vec;
};

struct vcl_allocator_vector_2 : vcl_allocator2_t {
vcl_allocator_vector_2() : vcl_allocator2_t{vector_allocate, vector_deallocate} {}

static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) {
vcl_allocator_vector_2* vecAllocator = static_cast<vcl_allocator_vector_2*>(allocator);
size_t aligned_size = intel_npu::utils::align_size_to_standard_page_size(size);
auto newVec = std::make_shared<AlignedVector>();
newVec->resize(aligned_size);
uint8_t* ptr = newVec->data();
if (intel_npu::utils::memory_and_size_aligned_to_standard_page_size(newVec->data(), newVec->size()) == false) {
OPENVINO_THROW("vcl_allocator_vector: allocated memory is not aligned to standard page size");
static void deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
if (ptr == nullptr) {
OPENVINO_THROW("Pointer is nullptr in deallocate!");
}
vecAllocator->m_vector.emplace_back(newVec);

return ptr;
vcl_allocator* vclAllocator = static_cast<vcl_allocator*>(allocator);
vclAllocator->m_allocator.deallocate(ptr, vclAllocator->m_size, intel_npu::utils::STANDARD_PAGE_SIZE);
}
ov::Allocator m_allocator;
uint8_t* m_allocated = nullptr;
size_t m_size = 0;
};

static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
vcl_allocator_vector_2* vecAllocator = static_cast<vcl_allocator_vector_2*>(allocator);
auto it = std::find_if(vecAllocator->m_vector.begin(), vecAllocator->m_vector.end(), [ptr](const auto& vec) {
return vec->data() == ptr;
});
struct vcl_allocator_2 : vcl_allocator2_t {
vcl_allocator_2() : vcl_allocator2_t{allocate, deallocate} {}

if (it != vecAllocator->m_vector.end()) {
vecAllocator->m_vector.erase(it);
vecAllocator->m_vector.shrink_to_fit();
static uint8_t* allocate(vcl_allocator2_t* allocator, size_t size) {
vcl_allocator_2* vclAllocator = static_cast<vcl_allocator_2*>(allocator);
size_t alignedSize = intel_npu::utils::align_size_to_standard_page_size(size);
auto allocatedPtr = reinterpret_cast<uint8_t*>(
vclAllocator->m_allocator.allocate(alignedSize, intel_npu::utils::STANDARD_PAGE_SIZE));
if (allocatedPtr == nullptr) {
OPENVINO_THROW("Failed to allocate aligned memory for allocator");
} else {
OPENVINO_THROW("vcl_allocator_vector_2: pointer to deallocate not found");
memset(allocatedPtr + size, 0, alignedSize - size);
}
vclAllocator->m_info.emplace_back(std::make_pair(allocatedPtr, alignedSize));
return allocatedPtr;
}

std::vector<std::shared_ptr<AlignedVector>> m_vector;
static void deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
if (ptr == nullptr) {
OPENVINO_THROW("Pointer is nullptr in deallocate!");
}
vcl_allocator_2* vclAllocator = static_cast<vcl_allocator_2*>(allocator);
// 1 is the placeholder value, as size is not needed in deallocate
vclAllocator->m_allocator.deallocate(ptr, 1, intel_npu::utils::STANDARD_PAGE_SIZE);
}
ov::Allocator m_allocator;
std::vector<std::pair<uint8_t*, size_t>> m_info;
};

ov::Tensor make_tensor_from_aligned_vector(std::shared_ptr<AlignedVector> vector) {
auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector->size()}, vector->data());
ov::Tensor make_tensor_from_aligned_addr(uint8_t* allocated, size_t size) {
ov::Allocator allocator;
auto tensor = ov::Tensor(ov::element::u8, ov::Shape{size}, allocated);
auto impl = ov::get_tensor_impl(std::move(tensor));
impl._so = vector;
std::shared_ptr<void> ptr(allocated, [allocator, size](uint8_t* p) mutable {
if (p == nullptr) {
OPENVINO_THROW("Pointer is nullptr in memory deallocation of make_tensor_from_aligned_addr!");
}
allocator.deallocate(p, size, intel_npu::utils::STANDARD_PAGE_SIZE);
});
impl._so = ptr;
return ov::make_tensor(impl);
}

Expand Down Expand Up @@ -481,7 +434,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
// support the lastest vcl api
// For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
_logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL");
vcl_allocator_vector allocator;
vcl_allocator allocator;
uint8_t* blob = nullptr;
size_t size = 0;

Expand All @@ -494,14 +447,15 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
// The allocated size from VCL will be equal or smaller than the allocated size in allocator
_logger.debug("Blob size from VCL: %zu ptr %p", size, static_cast<void*>(blob));
_logger.debug("Allocated vector size: %zu ptr: %p",
allocator.m_vec->size(),
static_cast<void*>(allocator.m_vec->data()));
allocator.m_size,
static_cast<void*>(allocator.m_allocated));

// Use empty metadata as VCL does not support metadata extraction
NetworkMetadata metadata;

_logger.debug("compile end, blob size:%d", allocator.m_vec->size());
return NetworkDescription(make_tensor_from_aligned_vector(allocator.m_vec), std::move(metadata));
_logger.debug("compile end, blob size:%d", allocator.m_size);
return NetworkDescription(make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size),
std::move(metadata));
} else {
OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later",
_vclVersion.major,
Expand Down Expand Up @@ -554,22 +508,23 @@ std::vector<std::shared_ptr<NetworkDescription>> VCLCompilerImpl::compileWsOneSh
_logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);

_logger.debug("Using vclAllocatedExecutableCreateWSOneShot");
vcl_allocator_vector_2 allocator;
vcl_allocator_2 allocator;

THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot",
vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator),
_logHandle);

if (allocator.m_vector.size() == 0) {
if (allocator.m_info.size() == 0) {
OPENVINO_THROW("Failed to create VCL executable, blobCount is zero");
}

std::vector<std::shared_ptr<NetworkDescription>> networkDescrs;
for (auto& blob : allocator.m_vector) {
for (auto& blob : allocator.m_info) {
// Use empty metadata as VCL does not support metadata extraction
NetworkMetadata metadata;
networkDescrs.emplace_back(
std::make_shared<NetworkDescription>(make_tensor_from_aligned_vector(blob), std::move(metadata)));
std::make_shared<NetworkDescription>(make_tensor_from_aligned_addr(blob.first, blob.second),
std::move(metadata)));
}
return networkDescrs;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,6 @@
#include "weightless_graph.hpp"
#include "weightless_utils.hpp"

namespace {

ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector.size()}, vector.data());
auto impl = ov::get_tensor_impl(std::move(tensor));
std::shared_ptr<std::vector<uint8_t>> sharedCompiledNetwork =
std::make_shared<std::vector<uint8_t>>(std::move(vector));
impl._so = std::move(sharedCompiledNetwork);
return ov::make_tensor(impl);
}

} // namespace

namespace intel_npu {

PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct)
Expand Down Expand Up @@ -80,11 +67,8 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
_logger.debug("compile end");

ov::Tensor tensor;
if (networkDesc.compiledNetwork.size() > 0) {
tensor = make_tensor_from_vector(networkDesc.compiledNetwork);
} else {
tensor = std::move(networkDesc.compiledNetworkTensor);
}
tensor = std::move(networkDesc.compiledNetworkTensor);

GraphDescriptor graphDesc;
NetworkMetadata networkMeta;

Expand Down Expand Up @@ -151,12 +135,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
OPENVINO_ASSERT(initMainNetworkDescriptions.size() > 0, "No init schedules have been returned by the compiler");
std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions =
std::move(initMainNetworkDescriptions);

if (mainNetworkDescription->compiledNetwork.size() > 0) {
tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
} else {
tensorMain = std::move(mainNetworkDescription->compiledNetworkTensor);
}
tensorMain = std::move(mainNetworkDescription->compiledNetworkTensor);

if (_zeGraphExt) {
// Depending on the config, we may get an error when trying to
Expand All @@ -179,11 +158,8 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
initNetworkMetadata.reserve(initNetworkDescriptions.size());
for (auto& networkDesc : initNetworkDescriptions) {
ov::Tensor tensor;
if (networkDesc->compiledNetwork.size() > 0) {
tensor = make_tensor_from_vector(networkDesc->compiledNetwork);
} else {
tensor = std::move(networkDesc->compiledNetworkTensor);
}
tensor = std::move(networkDesc->compiledNetworkTensor);

GraphDescriptor initGraphDesc;
NetworkMetadata initNetworkMeta;
if (_zeGraphExt) {
Expand Down Expand Up @@ -219,11 +195,8 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
while (auto networkDescription =
std::make_shared<NetworkDescription>(_compiler->compileWsIterative(targetModel, localConfig, i++))) {
ov::Tensor tensor;
if (networkDescription->compiledNetwork.size() > 0) {
tensor = make_tensor_from_vector(networkDescription->compiledNetwork);
} else {
tensor = std::move(networkDescription->compiledNetworkTensor);
}
tensor = std::move(networkDescription->compiledNetworkTensor);

GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc);

Expand Down
Loading