Skip to content

Commit 5604706

Browse files
[NPU] Add EliminateIdentity pass before IR serialize (#33166)
### Details: - *Eliminates `Identity` ops in NPU plugin before passing the serialized model to the NPU driver.* - *WIP* ### Tickets: - *CVS-177222* --------- Signed-off-by: Stefania Hergane <stefania-persida.hergane@intel.com> Co-authored-by: Mircea-Aurelian Dan <mircea-aurelian.dan@intel.com>
1 parent aab89e9 commit 5604706

2 files changed

Lines changed: 20 additions & 6 deletions

File tree

src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@ class IRSerializer {
3535
/**
3636
* @brief Serialize OpenVINO model to target buffer
3737
*/
38-
void serializeModelToBuffer(uint8_t* xml, uint8_t* weights);
38+
void serializeModelToBuffer(uint8_t* xml,
39+
uint8_t* weights,
40+
std::optional<ze_graph_compiler_version_info_t> compilerVersionOpt = std::nullopt);
3941

4042
/**
4143
* @brief Serialize input / output information to string format.
@@ -65,7 +67,9 @@ class IRSerializer {
6567
/**
6668
* @brief Serialize OpenVINO model to target stream
6769
*/
68-
void serializeModelToStream(std::ostream& xml, std::ostream& weights);
70+
void serializeModelToStream(std::ostream& xml,
71+
std::ostream& weights,
72+
std::optional<ze_graph_compiler_version_info_t> compilerVersionOpt = std::nullopt);
6973

7074
/**
7175
* @brief Get size of xml and weights from model

src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "intel_npu/config/options.hpp"
1414
#include "openvino/pass/manager.hpp"
1515
#include "openvino/pass/serialize.hpp"
16+
#include "transformations/common_optimizations/nop_elimination.hpp"
1617
#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp"
1718

1819
namespace {
@@ -156,7 +157,9 @@ IRSerializer::IRSerializer(const std::shared_ptr<const ov::Model>& origModel, co
156157
countModelSize();
157158
}
158159

159-
void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weights) {
160+
void IRSerializer::serializeModelToStream(std::ostream& xml,
161+
std::ostream& weights,
162+
std::optional<ze_graph_compiler_version_info_t> compilerVersionOpt) {
160163
_logger.debug("serializeModelToStream");
161164
const auto passConfig = std::make_shared<ov::pass::PassConfig>();
162165
ov::pass::Manager manager(std::move(passConfig), "NPU:serializeModelToStream");
@@ -167,6 +170,11 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh
167170
_logger.info("Downgrade op for opset smaller than 11");
168171
}
169172

173+
if (compilerVersionOpt.has_value() &&
174+
(compilerVersionOpt.value().major < 7 ||
175+
(compilerVersionOpt.value().major == 7 && compilerVersionOpt.value().minor <= 26))) {
176+
manager.register_pass<ov::pass::EliminateIdentity>();
177+
}
170178
manager.register_pass<ov::pass::Serialize>(xml, weights);
171179

172180
// Depending on the driver version, the compiler attached to it may request this information as an indicator of the
@@ -214,15 +222,17 @@ void IRSerializer::countModelSize() {
214222
_logger.debug("countModelSize completed, xml size: %d, weights size: %d", _xmlSize, _weightsSize);
215223
}
216224

217-
void IRSerializer::serializeModelToBuffer(uint8_t* xml, uint8_t* weights) {
225+
void IRSerializer::serializeModelToBuffer(uint8_t* xml,
226+
uint8_t* weights,
227+
std::optional<ze_graph_compiler_version_info_t> compilerVersionOpt) {
218228
_logger.debug("serializeModelToBuffer");
219229

220230
writer_streambuf xmlStreamBuf(xml);
221231
writer_streambuf weightsStreamBuf(weights);
222232
std::ostream xmlStream(&xmlStreamBuf);
223233
std::ostream weightsStream(&weightsStreamBuf);
224234

225-
serializeModelToStream(xmlStream, weightsStream);
235+
serializeModelToStream(xmlStream, weightsStream, std::move(compilerVersionOpt));
226236

227237
_logger.debug("serializeModelToBuffer end");
228238
}
@@ -274,7 +284,7 @@ SerializedIR IRSerializer::serializeIR(const std::shared_ptr<const ov::Model>& m
274284
uint64_t weightsOffset = offset;
275285
offset += weightsSize;
276286

277-
serializeModelToBuffer(serializedIR + xmlOffset, serializedIR + weightsOffset);
287+
serializeModelToBuffer(serializedIR + xmlOffset, serializedIR + weightsOffset, compilerVersion);
278288

279289
OPENVINO_ASSERT(offset == sizeOfSerializedIR);
280290

0 commit comments

Comments
 (0)