Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ class DynamicGraph final : public IDynamicGraph {
DynamicGraph(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ov::Tensor blob,
bool blobAllocatedByPlugin,
const FilteredConfig& config);
const FilteredConfig& config,
bool isOptimizedDynamicStridesSupported);

std::pair<uint64_t, std::optional<std::vector<uint64_t>>> export_blob(std::ostream& stream) const override;

Expand Down Expand Up @@ -230,7 +231,7 @@ class DynamicGraph final : public IDynamicGraph {
* @details The attribute contains a value only if the plugin performs the batches splitting operation.
*/
std::optional<std::size_t> _batchSize = std::nullopt;

bool _isOptimizedDynamicStridesSupported = false;
Logger _logger;

std::unique_ptr<Impl> _impl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ class ZeGraphExtWrappers {

void evict_memory(const GraphDescriptor& graphDescriptor) const;

bool isOptimizedDynamicStridesSupported() const {
return _isOptimizedDynamicStridesSupported;
}

private:
void getMetadata(ze_graph_handle_t graphHandle,
uint32_t indexUsedByDriver,
Expand All @@ -103,6 +107,7 @@ class ZeGraphExtWrappers {
std::shared_ptr<ZeroInitStructsHolder> _zeroInitStruct;
uint32_t _graphExtVersion;
bool _isCompilerOptionQuerySupported;
bool _isOptimizedDynamicStridesSupported;

Logger _logger;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ class DynamicGraphImpl : public DynamicGraph::Impl {
using MemRefType = DynamicGraph::MemRefType;

public:
DynamicGraphImpl() : _engineProperties{}, _logger("DynamicGraphImpl", Logger::global().level()) {}
DynamicGraphImpl(bool isOptimizedDynamicStridesSupported)
: _engineProperties{},
_isOptimizedDynamicStridesSupported(isOptimizedDynamicStridesSupported),
_logger("DynamicGraphImpl", Logger::global().level()) {}
void initialize(std::optional<ov::Tensor>& blob, NetworkMetadata& metadata) override;
void createExecutionEngine(std::optional<ov::Tensor>& blob);
void prepareMetadata(NetworkMetadata& metadata);
Expand Down Expand Up @@ -62,6 +65,7 @@ class DynamicGraphImpl : public DynamicGraph::Impl {
npu_vm_runtime_properties_t _engineProperties;
DynamicGraph::GraphArguments _binding;
bool _initialized = false;
bool _isOptimizedDynamicStridesSupported = false;
Logger _logger;
};

Expand Down Expand Up @@ -309,9 +313,12 @@ void DynamicGraphImpl::executeGraph(const std::shared_ptr<ZeroInitStructsHolder>
args._impl ? std::static_pointer_cast<DynamicGraph::GraphArgumentsImpl>(args._impl)
: std::make_shared<DynamicGraph::GraphArgumentsImpl>();

std::vector<uint64_t> commandListIndexArray;
bool noTensorChange = true;
npu_vm_runtime_execute_params_t* params = &argsImpl->_executeParams;
for (auto& in : args._inputs) {
auto inputSize = args._inputs.size();
for (size_t i = 0; i < inputSize; ++i) {
auto& in = args._inputs[i];
std::shared_ptr<DynamicGraph::MemRefTypeImpl> inImpl =
std::static_pointer_cast<DynamicGraph::MemRefTypeImpl>(in._impl);
if (inImpl == nullptr) {
Expand All @@ -323,9 +330,18 @@ void DynamicGraphImpl::executeGraph(const std::shared_ptr<ZeroInitStructsHolder>
argsImpl->_inputMemRefs.push_back(inImpl->_memRef);
} else if (inImpl->_ptrUpdated || inImpl->_shapeUpdated || inImpl->_strideUpdated) {
noTensorChange = false;
if (!inImpl->_shapeUpdated && (inImpl->_strideUpdated || inImpl->_strideUpdated)) {
_logger.debug("Input tensor stride or pointer change detected for index %d, but shape is not updated, "
"which is an optimized case for dynamic shape with static dimensions. ",
static_cast<int>(i));
commandListIndexArray.push_back(i);
} else {
_logger.debug("Input tensor pointer change detected for index %d", static_cast<int>(i));
}
}
}
for (auto& out : args._outputs) {
for (size_t i = 0; i < args._outputs.size(); ++i) {
auto& out = args._outputs[i];
std::shared_ptr<DynamicGraph::MemRefTypeImpl> outImpl =
std::static_pointer_cast<DynamicGraph::MemRefTypeImpl>(out._impl);
if (outImpl == nullptr) {
Expand All @@ -337,6 +353,14 @@ void DynamicGraphImpl::executeGraph(const std::shared_ptr<ZeroInitStructsHolder>
argsImpl->_outputMemRefs.push_back(outImpl->_memRef);
} else if (outImpl->_ptrUpdated || outImpl->_shapeUpdated || outImpl->_strideUpdated) {
noTensorChange = false;
if (!outImpl->_shapeUpdated && (outImpl->_strideUpdated || outImpl->_ptrUpdated)) {
_logger.debug("Output tensor stride or pointer change detected for index %d, but shape is not updated, "
"which is an optimized case for dynamic shape with static dimensions. ",
static_cast<int>(i));
commandListIndexArray.push_back(inputSize + i);
} else {
_logger.debug("Output tensor pointer change detected for index %d", static_cast<int>(i));
}
}
}

Expand All @@ -348,7 +372,28 @@ void DynamicGraphImpl::executeGraph(const std::shared_ptr<ZeroInitStructsHolder>
zeCommandListReset(cmdList);
}
} else {
_logger.debug("Reuse command list without update since no tensor change detected");
if (!commandListIndexArray.empty()) {
_logger.debug("Update command list with new tensor pointer");
if (params->executionContext == nullptr) {
OPENVINO_THROW(
"Execution context is not created, can not reuse command list with UpdateMutableCommandList API");
}

if (npuVMRuntimeUpdateMutableCommandList(_engine,
params,
const_cast<uint64_t*>(commandListIndexArray.data()),
commandListIndexArray.size()) != NPU_VM_RUNTIME_RESULT_SUCCESS) {
OPENVINO_THROW("Failed to execute VM runtime engine to update commandlist");
}

// according to spec, CloseCommandList should be called after
// UpdateMutableCommandList is called.
for (auto& cmdList : commandLists) {
zeCommandListClose(cmdList);
}
} else {
_logger.debug("Reuse command list without update since no tensor change detected");
}

auto result = zeCommandQueueExecuteCommandLists(commandQueue,
static_cast<uint32_t>(commandLists.size()),
Expand Down Expand Up @@ -441,17 +486,19 @@ void DynamicGraphImpl::predictOutputShape(std::vector<MemRefType>& inputDescript
DynamicGraph::DynamicGraph(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ov::Tensor blob,
bool blobAllocatedByPlugin,
const FilteredConfig& config)
const FilteredConfig& config,
bool isOptimizedDynamicStridesSupported)
: _zeroInitStruct(zeroInitStruct),
_blob(std::move(blob)),
_isOptimizedDynamicStridesSupported(isOptimizedDynamicStridesSupported),
_logger("DynamicGraph", config.get<LOG_LEVEL>()) {
_logger.info("Create DynamicGraph");
if (!config.get<CREATE_EXECUTOR>() || config.get<DEFER_WEIGHTS_LOAD>()) {
_logger.info("Graph initialize is deferred from the \"Graph\" constructor");
return;
}

_impl = std::make_unique<DynamicGraphImpl>();
_impl = std::make_unique<DynamicGraphImpl>(isOptimizedDynamicStridesSupported);

// TODO: metadata needs to be parsed even when CREATE_EXECUTOR is 0 or DEFER_WEIGHTS_LOAD is YES, keep here to
// support pure compilation without vm runtime initialize VM execution engine, metadata, input&output
Expand Down Expand Up @@ -583,7 +630,7 @@ void DynamicGraph::initialize_impl(const FilteredConfig& config) {
_logger.debug("Graph initialize start");

if (!_impl) {
_impl = std::make_unique<DynamicGraphImpl>();
_impl = std::make_unique<DynamicGraphImpl>(_isOptimizedDynamicStridesSupported);
// initialize VM execution engine, metadata, input&output descriptors
_impl->initialize(_blob, _metadata);
_num_of_subgraphs = _impl->getNumSubgraphs();
Expand Down
6 changes: 5 additions & 1 deletion src/plugins/intel_npu/src/compiler_adapter/src/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ std::shared_ptr<IGraph> Parser::parse(const ov::Tensor& mainBlob,
}
if (header.find("llvm") != std::string::npos || header.find("NPUByte\x00") != std::string::npos) {
_logger.debug("Create graph for dynamic blob, use internal function to get metadata!");
return std::make_shared<DynamicGraph>(_zeroInitStruct, mainBlob, true, config);
return std::make_shared<DynamicGraph>(_zeroInitStruct,
mainBlob,
true,
config,
_zeGraphExt->isOptimizedDynamicStridesSupported());
}

GraphDescriptor mainGraphDesc;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
if (config.get<COMPILATION_MODE>() == "HostCompile") {
// metadata will be obtained in initialze() of DynamicGraph
_logger.debug("Use dynamicGraph to hold blob for HostCompile mode!");
return std::make_shared<DynamicGraph>(_zeroInitStruct, std::move(tensor), true, config);
return std::make_shared<DynamicGraph>(_zeroInitStruct,
std::move(tensor),
true,
config,
_zeGraphExt->isOptimizedDynamicStridesSupported());
}

GraphDescriptor graphDesc;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,15 @@ ZeGraphExtWrappers::ZeGraphExtWrappers(const std::shared_ptr<ZeroInitStructsHold
nullptr) == ZE_RESULT_SUCCESS;
}
#endif
if (_isCompilerOptionQuerySupported) {
_isOptimizedDynamicStridesSupported =
_zeroInitStruct->getGraphDdiTable().pfnCompilerIsOptionSupported(_zeroInitStruct->getDevice(),
ZE_NPU_DRIVER_OPTIONS,
"OPTIMIZED_DYNAMIC_STRIDE",
nullptr) == ZE_RESULT_SUCCESS;
_logger.debug("OPTIMIZED_DYNAMIC_STRIDE compiler option is %s",
_isOptimizedDynamicStridesSupported ? "supported" : "not supported");
}
}

ZeGraphExtWrappers::~ZeGraphExtWrappers() {
Expand Down
Loading
Loading