From 6ad8b26806e5a462fe353f1483aa712001fa4b18 Mon Sep 17 00:00:00 2001
From: Eugene Smirnov <eugene.smirnov@intel.com>
Date: Sat, 18 Oct 2025 00:39:39 +0200
Subject: [PATCH 01/14] gemma-2 patterns added to preserve tail constants
 matcher

---
 .../plugin/npuw/partitioning/patterns/opt.cpp | 31 ++++++++++++++++---
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
index 2f8df0ed28e326..7860470234aa38 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -1929,7 +1929,17 @@ PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(PreserveConstDictMatM
     auto qcvtm = opp::wrap_type<ov::op::v0::Convert>({qmuls});
     auto qmmi = opp::any_input();
     auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, qcvtm});
-    auto qres = opp::wrap_type<ov::op::v0::Result>({qmm});
+
+
+    // MatMul -> Divide -> Tanh -> Multiply -> Result
+    auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
+    auto tanh = opp::wrap_type<ov::op::v0::Tanh>({div});
+    auto matmul_multiply = opp::wrap_type<ov::op::v1::Multiply>({tanh, opp::any_input()});
+
+    auto matmul_or = std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0),
+                                                                                  matmul_multiply->output(0)});
+
+    auto qres = opp::wrap_type<ov::op::v0::Result>({matmul_or});
 
     // Note: Use [=] to make sure the above objects stay alive in the callback
     auto callback = [=](ov::pass::pattern::Matcher& m) {
@@ -1968,9 +1978,19 @@ PreserveConstDictMatMulSymm::PreserveConstDictMatMulSymm(PreserveConstDictMatMul
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
     auto qcvtw = opp::wrap_type<ov::op::v0::Convert>({qweight});
     auto qmuls = opp::wrap_type<ov::op::v1::Multiply>({qcvtw, qcoeff});
+    auto optional_kvt = opp::optional<ov::op::v0::Convert>({qmuls});
     auto qmmi = opp::any_input();
-    auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, qmuls});
-    auto qres = opp::wrap_type<ov::op::v0::Result>({qmm});
+    auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, optional_kvt});
+
+    // MatMul -> Divide -> Tanh -> Multiply -> Result
+    auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
+    auto tanh = opp::wrap_type<ov::op::v0::Tanh>({div});
+    auto matmul_multiply = opp::wrap_type<ov::op::v1::Multiply>({tanh, opp::any_input()});
+
+    auto matmul_or = std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0),
+                                                                                  matmul_multiply->output(0)});
+
+    auto qres = opp::wrap_type<ov::op::v0::Result>({matmul_or});
 
     // Note: Use [=] to make sure the above objects stay alive in the callback
     auto callback = [=](ov::pass::pattern::Matcher& m) {
@@ -1988,11 +2008,12 @@ PreserveConstDictMatMulSymm::PreserveConstDictMatMulSymm(PreserveConstDictMatMul
 
         if ((ov::element::f8e4m3 == matched_qweight->get_element_type() ||
              ov::element::f8e5m2 == matched_qweight->get_element_type() ||
-             ov::element::f8e8m0 == matched_qweight->get_element_type()) &&
+             ov::element::f8e8m0 == matched_qweight->get_element_type() ||
+             ov::element::i8 == matched_qweight->get_element_type()) &&
             qcoeff_shape[1] == 1 && !matched_matmul->get_transpose_a() && matched_matmul->get_transpose_b()) {
             to_keep.get().push_back(matched_qweight);
             to_keep.get().push_back(matched_qcoeff);
-            return false;  // root hasn't changed
+            return true;  // root hasn't changed
         }
         return false;  // root hasn't changed
     };

From 29cb65e8b145c8d64dfe368b3cbd918d35d58a79 Mon Sep 17 00:00:00 2001
From: esmirno1 <eugene.smirnov@intel.com>
Date: Mon, 15 Dec 2025 11:46:00 +0000
Subject: [PATCH 02/14] review fixes

---
 .../src/plugin/npuw/partitioning/patterns/opt.cpp | 15 +++++++--------
 .../src/plugin/npuw/partitioning/patterns/opt.hpp |  6 +++---
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
index 7860470234aa38..6e26bf8e98a2fe 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -1930,14 +1930,13 @@ PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(PreserveConstDictMatM
     auto qmmi = opp::any_input();
     auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, qcvtm});
 
-
     // MatMul -> Divide -> Tanh -> Multiply -> Result
     auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
     auto tanh = opp::wrap_type<ov::op::v0::Tanh>({div});
     auto matmul_multiply = opp::wrap_type<ov::op::v1::Multiply>({tanh, opp::any_input()});
 
-    auto matmul_or = std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0),
-                                                                                  matmul_multiply->output(0)});
+    auto matmul_or =
+        std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0), matmul_multiply->output(0)});
 
     auto qres = opp::wrap_type<ov::op::v0::Result>({matmul_or});
 
@@ -1973,7 +1972,7 @@ PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(PreserveConstDictMatM
 //     Const(S) ----------------> Multiply -> MatMul -> Result
 //     ???(Act) ---------------------------->
 
-PreserveConstDictMatMulSymm::PreserveConstDictMatMulSymm(PreserveConstDictMatMulSymm::Results to_keep) {
+PreserveConstDictMatMulFP8::PreserveConstDictMatMulFP8(PreserveConstDictMatMulFP8::Results to_keep) {
     auto qweight = opp::wrap_type<ov::op::v0::Constant>();
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
     auto qcvtw = opp::wrap_type<ov::op::v0::Convert>({qweight});
@@ -1987,8 +1986,8 @@ PreserveConstDictMatMulSymm::PreserveConstDictMatMulSymm(PreserveConstDictMatMul
     auto tanh = opp::wrap_type<ov::op::v0::Tanh>({div});
     auto matmul_multiply = opp::wrap_type<ov::op::v1::Multiply>({tanh, opp::any_input()});
 
-    auto matmul_or = std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0),
-                                                                                  matmul_multiply->output(0)});
+    auto matmul_or =
+        std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0), matmul_multiply->output(0)});
 
     auto qres = opp::wrap_type<ov::op::v0::Result>({matmul_or});
 
@@ -2013,11 +2012,11 @@ PreserveConstDictMatMulSymm::PreserveConstDictMatMulSymm(PreserveConstDictMatMul
             qcoeff_shape[1] == 1 && !matched_matmul->get_transpose_a() && matched_matmul->get_transpose_b()) {
             to_keep.get().push_back(matched_qweight);
             to_keep.get().push_back(matched_qcoeff);
-            return true;  // root hasn't changed
+            return false;  // root hasn't changed
         }
         return false;  // root hasn't changed
     };
-    register_matcher(std::make_shared<opp::Matcher>(qres, "OptPreserveConstDictMatMulSymm"), std::move(callback));
+    register_matcher(std::make_shared<opp::Matcher>(qres, "OptPreserveConstDictMatMulFP8"), std::move(callback));
 }
 
 SliceLastMatmul::SliceLastMatmul() {
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
index f25f03c4b0bb94..9cc5c4ca5b1b04 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
@@ -232,14 +232,14 @@ class PreserveConstDictMatMulAsymm : public ov::pass::MatcherPass {
     PreserveConstDictMatMulAsymm(Results to_keep);
 };
 
-class PreserveConstDictMatMulSymm : public ov::pass::MatcherPass {
+class PreserveConstDictMatMulFP8 : public ov::pass::MatcherPass {
 public:
-    OPENVINO_MATCHER_PASS_RTTI("npuw::patterns::opt::PreserveConstDictMatMulSymm");
+    OPENVINO_MATCHER_PASS_RTTI("npuw::patterns::opt::PreserveConstDictMatMulFP8");
 
     using CPtr = std::shared_ptr<ov::op::v0::Constant>;
     using Results = std::reference_wrapper<std::vector<CPtr>>;
 
-    PreserveConstDictMatMulSymm(Results to_keep);
+    PreserveConstDictMatMulFP8(Results to_keep);
 };
 
 // Slice last Matmul

From 3c19c327fea170d3af8ee7690378b31fb1c193d0 Mon Sep 17 00:00:00 2001
From: Eugene Smirnov <eugene.smirnov@intel.com>
Date: Mon, 15 Dec 2025 23:04:10 +0100
Subject: [PATCH 03/14] build fixes

---
 src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp        | 2 +-
 .../intel_npu/src/plugin/npuw/partitioning/partitioning.cpp     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index 27a21d64704001..4167a744b59eb1 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -609,7 +609,7 @@ bool ov::npuw::CompiledModel::should_use_quantized_host_gather(const std::shared
 
     ov::pass::GraphRewrite rewr2;
     rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(to_keep));
-    rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulSymm>(std::ref(to_keep));
+    rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulFP8>(std::ref(to_keep));
     rewr2.run_on_model(model);
     // FIXME: since 3-model pipeline is the default option, the tail will be separate,
     // so we need to match either head or tail pattern here for host gather quantized feature to work.
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
index 491f1bd477eff5..cfb883e7c9966f 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -1462,7 +1462,7 @@ void Partitioner::saveTailDictConstants(const std::string& func_name) {
 
     ov::pass::GraphRewrite rewr;
     rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(to_keep));
-    rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulSymm>(std::ref(to_keep));
+    rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulFP8>(std::ref(to_keep));
     rewr.run_on_model(model_group.front());
 
     for (auto&& const_to_keep : to_keep) {

From 31252936d73460329c8c272ba1b173bf34443d3c Mon Sep 17 00:00:00 2001
From: Eugene Smirnov <eugene.smirnov@intel.com>
Date: Mon, 9 Mar 2026 18:34:20 +0100
Subject: [PATCH 04/14] version check added to preserve constants feature

---
 .../al/include/intel_npu/config/config.hpp    |  7 +++
 .../src/al/include/intel_npu/config/npuw.hpp  |  1 +
 .../intel_npu/npuw_private_properties.hpp     |  9 ++++
 .../intel_npu/src/al/src/config/npuw.cpp      |  1 +
 .../src/plugin/npuw/compiled_model.cpp        |  7 ++-
 .../src/plugin/npuw/llm_compiled_model.cpp    | 23 +++++++++-
 .../plugin/npuw/partitioning/partitioning.cpp |  7 ++-
 .../plugin/npuw/partitioning/partitioning.hpp |  9 ++--
 .../plugin/npuw/partitioning/patterns/opt.cpp | 45 +++++++++++--------
 .../plugin/npuw/partitioning/patterns/opt.hpp |  5 ++-
 .../npuw/partitioning/patterns/sdpa.cpp       |  4 +-
 .../intel_npu/src/plugin/src/plugin.cpp       |  1 +
 .../intel_npu/src/plugin/src/properties.cpp   |  1 +
 .../internal/plugin/test_properties.hpp       |  1 +
 14 files changed, 89 insertions(+), 32 deletions(-)

diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp
index ebae76152394f6..8204217ed27e79 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp
@@ -57,6 +57,13 @@ TYPE_PRINTER(std::size_t)
 #ifndef ONEAPI_MAKE_VERSION
 /// @brief Generates generic 'oneAPI' API versions
 #    define ONEAPI_MAKE_VERSION(_major, _minor) ((_major << 16) | (_minor & 0x0000ffff))
+
+/// @brief extract 'oneAPI' API major version
+#    define ONEAPI_VERSION_MAJOR(_version) ((_version) >> 16)
+
+/// @brief extract 'oneAPI' API minor version
+#    define ONEAPI_VERSION_MINOR(_version) ((_version) & 0x0000ffff)
+
 #endif  // ONEAPI_MAKE_VERSION
 
 //
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
index 4773cf0c06c476..31825a5de6ae77 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
@@ -112,6 +112,7 @@ DEFINE_OPT(NPUW_CWAI, bool, false, npuw::partitioning::cwai, RunTime);
 DEFINE_OPT(NPUW_DQ, bool, false, npuw::partitioning::dyn_quant, RunTime);
 DEFINE_OPT(NPUW_DQ_FULL, bool, true, npuw::partitioning::dyn_quant_full, RunTime);
 DEFINE_OPT(NPUW_PMM, std::string, "2", npuw::partitioning::par_matmul_merge_dims, RunTime);
+DEFINE_OPT(NPUW_MM_GATED, bool, true, npuw::partitioning::matmul_gate_preserve_constants, RunTime);
 DEFINE_OPT(NPUW_SLICE_OUT, bool, false, npuw::partitioning::slice_out, RunTime);
 DEFINE_OPT(NPUW_HOST_GATHER, bool, true, npuw::partitioning::host_gather, RunTime);
 DEFINE_OPT(NPUW_SPATIAL, bool, false, npuw::partitioning::spatial, RunTime);
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
index 7771184166bb12..8fe337c82094ac 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
@@ -216,6 +216,15 @@ static constexpr ov::Property<bool> dyn_quant_full{"NPUW_DQ_FULL"};
  */
 static constexpr ov::Property<std::string> par_matmul_merge_dims{"NPUW_PMM"};
 
+/**
+ * @brief
+ * Type: bool.
+ * whether to preserve constants for gated version of matmul
+ * on some version of compiler - might produce incorrect results when enabled
+ * Default value: YES
+ */
+static constexpr ov::Property<bool> matmul_gate_preserve_constants{"NPUW_MM_GATED"};
+
 /**
  * @brief
  * Type: bool.
diff --git a/src/plugins/intel_npu/src/al/src/config/npuw.cpp b/src/plugins/intel_npu/src/al/src/config/npuw.cpp
index bdd984c2aac873..b503377ab061f3 100644
--- a/src/plugins/intel_npu/src/al/src/config/npuw.cpp
+++ b/src/plugins/intel_npu/src/al/src/config/npuw.cpp
@@ -29,6 +29,7 @@ void intel_npu::registerNPUWOptions(OptionsDesc& desc) {
     desc.add<NPUW_DQ>();
     desc.add<NPUW_DQ_FULL>();
     desc.add<NPUW_PMM>();
+    desc.add<NPUW_MM_GATED>();
     desc.add<NPUW_SLICE_OUT>();
     desc.add<NPUW_SPATIAL>();
     desc.add<NPUW_SPATIAL_NWAY>();
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index 4e6233a56e969d..8556124c02a317 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -635,8 +635,10 @@ bool ov::npuw::CompiledModel::should_use_quantized_host_gather(const std::shared
     std::vector<CPtr> to_keep;
 
     ov::pass::GraphRewrite rewr2;
-    rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(to_keep));
-    rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulFP8>(std::ref(to_keep));
+    ctx.mm_gate =  m_cfg.get<::intel_npu::NPUW_MM_GATED>();
+
+    rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(ctx), std::ref(to_keep));
+    rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulFP8>(std::ref(ctx), std::ref(to_keep));
     rewr2.run_on_model(model);
     // FIXME: since 3-model pipeline is the default option, the tail will be separate,
     // so we need to match either head or tail pattern here for host gather quantized feature to work.
@@ -2506,6 +2508,7 @@ void ov::npuw::CompiledModel::implement_properties() {
                           BIND(npuw::partitioning::dyn_quant, NPUW_DQ),
                           BIND(npuw::partitioning::dyn_quant_full, NPUW_DQ_FULL),
                           BIND(npuw::partitioning::par_matmul_merge_dims, NPUW_PMM),
+                          BIND(npuw::partitioning::matmul_gate_preserve_constants, NPUW_MM_GATED),
                           BIND(npuw::partitioning::slice_out, NPUW_SLICE_OUT),
                           BIND(npuw::partitioning::spatial, NPUW_SPATIAL),
                           BIND(npuw::partitioning::spatial_nway, NPUW_SPATIAL_NWAY),
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index b00973605e5a2e..9f5ba74a2e944f 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -1162,6 +1162,7 @@ struct NPUDesc {
     std::string arch;
     int64_t max_tiles = 0;
     bool compiler_dq = false;
+    bool compiler_matmul_gate = false;
     int64_t compiler_ver = 0;
 };
 
@@ -1184,6 +1185,18 @@ std::optional<NPUDesc> extract_npu_descriptor(const std::shared_ptr<const ov::IP
     }
 
     desc.compiler_ver = plugin->get_property(ov::intel_npu::compiler_version.name(), ov::AnyMap{}).as<int64_t>();
+    LOG_INFO("Compiler version: " << ONEAPI_VERSION_MAJOR(desc.compiler_ver) << "." << ONEAPI_VERSION_MINOR(desc.compiler_ver));
+
+    constexpr std::string_view compiler_gate_support_msg
+        = "Compiler: accurate gated matmul (MatMul -> Divide -> Tanh -> Multiply -> Result) : ";
+
+    if (desc.compiler_ver >= ONEAPI_MAKE_VERSION(7, 28)) {
+        // accuracy for gated matmul fixed at 7.28
+        desc.compiler_matmul_gate = true;
+        LOG_INFO(compiler_gate_support_msg << "supported");
+    } else {
+        LOG_WARN(compiler_gate_support_msg << "unsupported");
+    }
 
     return std::make_optional(std::move(desc));
 }
@@ -1227,6 +1240,12 @@ ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
         config.erase("NPUW_DCOFF_TYPE");
         config.erase("NPUW_DCOFF_SCALE");
     }
+
+    //default version is ON - while for older compiler it might be turned off
+    if (npudesc.has_value()) {
+        config.emplace("NPUW_MM_GATED", (npudesc->compiler_matmul_gate ? "YES" : "NO"));
+    }
+
     return config;
 }
 
@@ -1239,6 +1258,7 @@ ov::AnyMap get_default_common_config(const std::optional<NPUDesc>& npudesc) {
     } else {
         config.emplace("NPUW_FUNCALL_FOR_ALL", "YES");
     }
+
     return config;
 }
 
@@ -1255,6 +1275,7 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model, c
             config.emplace("NPUW_PMM", "NO");
         }
     }
+
     return config;
 }
 
@@ -1838,7 +1859,7 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
     if (!m_is_embedding) {
         if (!m_use_chunk_prefill) {
             // TODO: sometimes it is ok if we cannot find any empty inputs or not?
-            NPUW_ASSERT(remove_empty_kv_inputs(prefill_model));
+            remove_empty_kv_inputs(prefill_model);
         } else {
             LOG_DEBUG("Don't remove input key/values from prefill model.");
             LOG_DEBUG("Ask prefill model to output key/values for prefill chunk size tokens.");
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
index 5b3b4800b3be69..afd6c69de967c4 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -1494,9 +1494,12 @@ void Partitioner::saveTailDictConstants(const std::string& func_name) {
     using CPtr = std::shared_ptr<ov::op::v0::Constant>;
     std::vector<CPtr> to_keep;
 
+    ov::npuw::patterns::opt::Context ctx;
+    ctx.mm_gate =  cfg.get<::intel_npu::NPUW_MM_GATED>();
+
     ov::pass::GraphRewrite rewr;
-    rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(to_keep));
-    rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulFP8>(std::ref(to_keep));
+    rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(ctx), std::ref(to_keep));
+    rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulFP8>(std::ref(ctx), std::ref(to_keep));
     rewr.run_on_model(model_group.front());
 
     for (auto&& const_to_keep : to_keep) {
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
index c8dff61741a6dd..51389725663892 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
@@ -76,7 +76,7 @@ struct Subgraph {
 
     using Ref = std::reference_wrapper<Subgraph>;
 
-    void settag(const std::string& t) {
+    void settag(const std::string & t) {
         LOG_DEBUG("Subgraph set-tag=" << t);
         _tag = t;
     }
@@ -111,16 +111,15 @@ struct Function {
     // FIXME: shouldn't be here. Needed to not unpack some lazy closures in DCOFF
     std::set<std::size_t> _idx_lazy_unpack;
 
-    void settag(const std::string& t) {
+    void settag(const std::string & t) {
         LOG_DEBUG("Function set-tag=" << t);
         _tag = t;
     }
     std::string gettag() const {
         return _tag;
     }
-
 private:
-    std::string _tag;  // derived from the partitioning
+    std::string _tag; // derived from the partitioning
 };
 
 struct Group {
@@ -140,7 +139,7 @@ struct Group {
 
     ov::npuw::Subgraph sg;
 
-    void settag(const std::string& t) {
+    void settag(const std::string & t) {
         LOG_DEBUG("group set-tag=" << t);
         _tag = t;
     }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
index 3fe2ae44830602..29bb144a5314ae 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -1919,7 +1919,7 @@ CompressDictMatMulf32::CompressDictMatMulf32(Context::Ref ctx) {
 //     Const(S) ---------------------> Multiply -> to(f32) -> MatMul -> Result
 //     ???(Act) -------------------------------------------->
 
-PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(PreserveConstDictMatMulAsymm::Results to_keep) {
+PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx, PreserveConstDictMatMulAsymm::Results to_keep) {
     auto qweight = opp::wrap_type<ov::op::v0::Constant>();
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
     auto qzerop = opp::wrap_type<ov::op::v0::Constant>();
@@ -1930,16 +1930,22 @@ PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(PreserveConstDictMatM
     auto qcvtm = opp::wrap_type<ov::op::v0::Convert>({qmuls});
     auto qmmi = opp::any_input();
     auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, qcvtm});
+    std::shared_ptr<Node> qres;
+
 
     // MatMul -> Divide -> Tanh -> Multiply -> Result
-    auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
-    auto tanh = opp::wrap_type<ov::op::v0::Tanh>({div});
-    auto matmul_multiply = opp::wrap_type<ov::op::v1::Multiply>({tanh, opp::any_input()});
+    if (ctx.get().mm_gate) {
+        auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
+        auto tanh = opp::wrap_type<ov::op::v0::Tanh>({div});
+        auto matmul_multiply = opp::wrap_type<ov::op::v1::Multiply>({tanh, opp::any_input()});
 
-    auto matmul_or =
-        std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0), matmul_multiply->output(0)});
+        auto matmul_or =
+            std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0), matmul_multiply->output(0)});
 
-    auto qres = opp::wrap_type<ov::op::v0::Result>({matmul_or});
+        qres = opp::wrap_type<ov::op::v0::Result>({matmul_or});
+    } else {
+        qres = opp::wrap_type<ov::op::v0::Result>({qmm});
+    }
 
     // Note: Use [=] to make sure the above objects stay alive in the callback
     auto callback = [=](ov::pass::pattern::Matcher& m) {
@@ -1973,7 +1979,7 @@ PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(PreserveConstDictMatM
 //     Const(S) ----------------> Multiply -> MatMul -> Result
 //     ???(Act) ---------------------------->
 
-PreserveConstDictMatMulFP8::PreserveConstDictMatMulFP8(PreserveConstDictMatMulFP8::Results to_keep) {
+PreserveConstDictMatMulFP8::PreserveConstDictMatMulFP8(Context::Ref ctx, PreserveConstDictMatMulFP8::Results to_keep) {
     auto qweight = opp::wrap_type<ov::op::v0::Constant>();
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
     auto qcvtw = opp::wrap_type<ov::op::v0::Convert>({qweight});
@@ -1981,16 +1987,20 @@ PreserveConstDictMatMulFP8::PreserveConstDictMatMulFP8(PreserveConstDictMatMulFP
     auto optional_kvt = opp::optional<ov::op::v0::Convert>({qmuls});
     auto qmmi = opp::any_input();
     auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, optional_kvt});
+    std::shared_ptr<Node> qres;
+    // // MatMul -> Divide -> Tanh -> Multiply -> Result
+    if (ctx.get().mm_gate) {
+        auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
+        auto tanh = opp::wrap_type<ov::op::v0::Tanh>({div});
+        auto matmul_multiply = opp::wrap_type<ov::op::v1::Multiply>({tanh, opp::any_input()});
 
-    // MatMul -> Divide -> Tanh -> Multiply -> Result
-    auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
-    auto tanh = opp::wrap_type<ov::op::v0::Tanh>({div});
-    auto matmul_multiply = opp::wrap_type<ov::op::v1::Multiply>({tanh, opp::any_input()});
-
-    auto matmul_or =
-        std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0), matmul_multiply->output(0)});
+        auto matmul_or =
+            std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{qmm->output(0), matmul_multiply->output(0)});
 
-    auto qres = opp::wrap_type<ov::op::v0::Result>({matmul_or});
+        qres = opp::wrap_type<ov::op::v0::Result>({matmul_or});
+    } else {
+        qres = opp::wrap_type<ov::op::v0::Result>({qmm});
+    }
 
     // Note: Use [=] to make sure the above objects stay alive in the callback
     auto callback = [=](ov::pass::pattern::Matcher& m) {
@@ -2008,8 +2018,7 @@ PreserveConstDictMatMulFP8::PreserveConstDictMatMulFP8(PreserveConstDictMatMulFP
 
         if ((ov::element::f8e4m3 == matched_qweight->get_element_type() ||
              ov::element::f8e5m2 == matched_qweight->get_element_type() ||
-             ov::element::f8e8m0 == matched_qweight->get_element_type() ||
-             ov::element::i8 == matched_qweight->get_element_type()) &&
+             ov::element::f8e8m0 == matched_qweight->get_element_type()) &&
             qcoeff_shape[1] == 1 && !matched_matmul->get_transpose_a() && matched_matmul->get_transpose_b()) {
             to_keep.get().push_back(matched_qweight);
             to_keep.get().push_back(matched_qcoeff);
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
index 03d69748866823..c9463f249cb8cb 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
@@ -23,6 +23,7 @@ struct Context {
     std::string pmm_dims;
     bool is_spatial = false;
     bool mm_dq_full = true;
+    bool mm_gate = false;
 
     using PPtr = std::shared_ptr<ov::op::v0::Parameter>;
     using NPtr = std::shared_ptr<ov::Node>;
@@ -229,7 +230,7 @@ class PreserveConstDictMatMulAsymm : public ov::pass::MatcherPass {
     using CPtr = std::shared_ptr<ov::op::v0::Constant>;
     using Results = std::reference_wrapper<std::vector<CPtr>>;
 
-    PreserveConstDictMatMulAsymm(Results to_keep);
+    PreserveConstDictMatMulAsymm(Context::Ref ctx, Results to_keep);
 };
 
 class PreserveConstDictMatMulFP8 : public ov::pass::MatcherPass {
@@ -239,7 +240,7 @@ class PreserveConstDictMatMulFP8 : public ov::pass::MatcherPass {
     using CPtr = std::shared_ptr<ov::op::v0::Constant>;
     using Results = std::reference_wrapper<std::vector<CPtr>>;
 
-    PreserveConstDictMatMulFP8(Results to_keep);
+    PreserveConstDictMatMulFP8(Context::Ref ctx, Results to_keep);
 };
 
 // Slice last Matmul
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
index f1fc3e31449c51..eff81a3bc4ddeb 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
@@ -109,7 +109,7 @@ SDPADecomposed::SDPADecomposed(const std::shared_ptr<ov::npuw::online::Snapshot>
     auto convert1 = opp::wrap_type<ov::op::v0::Convert>({opp::any_input()});
     auto concat1 = opp::wrap_type<ov::op::v0::Concat>({convert1, opp::any_input()});
 
-    // GQA optional nodes
+    //GQA optional nodes
     auto unsqueeze1 = opp::optional<ov::op::v0::Unsqueeze>({concat1, opp::any_input()});
     auto broadcast1 = opp::optional<ov::op::v3::Broadcast>({unsqueeze1, opp::any_input()});
     auto reshape1 = opp::optional<ov::op::v1::Reshape>({broadcast1, opp::any_input()});
@@ -117,7 +117,7 @@ SDPADecomposed::SDPADecomposed(const std::shared_ptr<ov::npuw::online::Snapshot>
     auto convert2 = opp::wrap_type<ov::op::v0::Convert>({opp::any_input()});
     auto concat2 = opp::wrap_type<ov::op::v0::Concat>({convert2, opp::any_input()});
 
-    // GQA optional nodes
+    //GQA optional nodes
     auto unsqueeze2 = opp::optional<ov::op::v0::Unsqueeze>({concat2, opp::any_input()});
     auto broadcast2 = opp::optional<ov::op::v3::Broadcast>({unsqueeze2, opp::any_input()});
     auto reshape2 = opp::optional<ov::op::v1::Reshape>({broadcast2, opp::any_input()});
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 85b73eaec383d7..36c483efbf390d 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -304,6 +304,7 @@ void init_config(const IEngineBackend* backend, OptionsDesc& options, FilteredCo
     REGISTER_OPTION(NPUW_DQ);
     REGISTER_OPTION(NPUW_DQ_FULL);
     REGISTER_OPTION(NPUW_PMM);
+    REGISTER_OPTION(NPUW_MM_GATED);
     REGISTER_OPTION(NPUW_SLICE_OUT);
     REGISTER_OPTION(NPUW_SPATIAL);
     REGISTER_OPTION(NPUW_SPATIAL_NWAY);
diff --git a/src/plugins/intel_npu/src/plugin/src/properties.cpp b/src/plugins/intel_npu/src/plugin/src/properties.cpp
index c46b3b80c26a02..dbbd48e254ec86 100644
--- a/src/plugins/intel_npu/src/plugin/src/properties.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/properties.cpp
@@ -591,6 +591,7 @@ void Properties::registerPluginProperties() {
     TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::partitioning::dyn_quant, NPUW_DQ);
     TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::partitioning::dyn_quant_full, NPUW_DQ_FULL);
     TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::partitioning::par_matmul_merge_dims, NPUW_PMM);
+    TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::partitioning::matmul_gate_preserve_constants, NPUW_MM_GATED);
     TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::partitioning::slice_out, NPUW_SLICE_OUT);
     TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::partitioning::spatial, NPUW_SPATIAL);
     TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::partitioning::spatial_nway, NPUW_SPATIAL_NWAY);
diff --git a/src/plugins/intel_npu/tests/functional/internal/plugin/test_properties.hpp b/src/plugins/intel_npu/tests/functional/internal/plugin/test_properties.hpp
index f5f67d180c776c..4b96ec16fb6b62 100644
--- a/src/plugins/intel_npu/tests/functional/internal/plugin/test_properties.hpp
+++ b/src/plugins/intel_npu/tests/functional/internal/plugin/test_properties.hpp
@@ -199,6 +199,7 @@ class PropertiesManagerTests : public ov::test::behavior::OVPluginTestBase,
         REGISTER_OPTION(NPUW_DQ);
         REGISTER_OPTION(NPUW_DQ_FULL);
         REGISTER_OPTION(NPUW_PMM);
+        REGISTER_OPTION(NPUW_MM_GATED);
         REGISTER_OPTION(NPUW_SLICE_OUT);
         REGISTER_OPTION(NPUW_SPATIAL);
         REGISTER_OPTION(NPUW_SPATIAL_NWAY);

From 5274f046a80d9e5752a972459b0dafacc13af6c1 Mon Sep 17 00:00:00 2001
From: Eugene Smirnov <eugene.smirnov@intel.com>
Date: Mon, 9 Mar 2026 22:45:58 +0100
Subject: [PATCH 05/14] extra spaces removed

---
 src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index d0f2e7d18dbc50..ae62c28cd0bb14 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -1259,7 +1259,6 @@ ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
     if (npudesc.has_value()) {
         config.emplace("NPUW_MM_GATED", (npudesc->compiler_matmul_gate ? "YES" : "NO"));
     }
-
     return config;
 }
 
@@ -1272,7 +1271,6 @@ ov::AnyMap get_default_common_config(const std::optional<NPUDesc>& npudesc) {
     } else {
         config.emplace("NPUW_FUNCALL_FOR_ALL", "YES");
     }
-
     return config;
 }
 
@@ -1289,7 +1287,6 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model, c
             config.emplace("NPUW_PMM", "NO");
         }
     }
-
     return config;
 }
 

From 268048a84bb77f62b26b52b431af0c76fbec5a90 Mon Sep 17 00:00:00 2001
From: esmirno1 <eugene.smirnov@intel.com>
Date: Mon, 9 Mar 2026 22:03:28 +0000
Subject: [PATCH 06/14] clang-format fixed

---
 src/plugins/intel_cpu/thirdparty/shl                     | 1 +
 src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp | 2 +-
 .../intel_npu/src/plugin/npuw/llm_compiled_model.cpp     | 9 +++++----
 .../src/plugin/npuw/partitioning/partitioning.cpp        | 2 +-
 .../src/plugin/npuw/partitioning/partitioning.hpp        | 9 +++++----
 .../src/plugin/npuw/partitioning/patterns/opt.cpp        | 4 ++--
 .../src/plugin/npuw/partitioning/patterns/sdpa.cpp       | 4 ++--
 7 files changed, 17 insertions(+), 14 deletions(-)
 create mode 160000 src/plugins/intel_cpu/thirdparty/shl

diff --git a/src/plugins/intel_cpu/thirdparty/shl b/src/plugins/intel_cpu/thirdparty/shl
new file mode 160000
index 00000000000000..27992eaf41ef96
--- /dev/null
+++ b/src/plugins/intel_cpu/thirdparty/shl
@@ -0,0 +1 @@
+Subproject commit 27992eaf41ef967ed228ea8d801b1aa489ea8997
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index 8556124c02a317..72e68655d1658b 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -635,7 +635,7 @@ bool ov::npuw::CompiledModel::should_use_quantized_host_gather(const std::shared
     std::vector<CPtr> to_keep;
 
     ov::pass::GraphRewrite rewr2;
-    ctx.mm_gate =  m_cfg.get<::intel_npu::NPUW_MM_GATED>();
+    ctx.mm_gate = m_cfg.get<::intel_npu::NPUW_MM_GATED>();
 
     rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(ctx), std::ref(to_keep));
     rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulFP8>(std::ref(ctx), std::ref(to_keep));
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index ae62c28cd0bb14..9c0e3adf9fc826 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -1199,10 +1199,11 @@ std::optional<NPUDesc> extract_npu_descriptor(const std::shared_ptr<const ov::IP
                                                ov::AnyMap{{ov::intel_npu::compiler_type.name(), target_compiler_type}})
                                 .as<int64_t>();
     }
-    LOG_INFO("Compiler version: " << ONEAPI_VERSION_MAJOR(desc.compiler_ver) << "." << ONEAPI_VERSION_MINOR(desc.compiler_ver));
+    LOG_INFO("Compiler version: " << ONEAPI_VERSION_MAJOR(desc.compiler_ver) << "."
+                                  << ONEAPI_VERSION_MINOR(desc.compiler_ver));
 
-    constexpr std::string_view compiler_gate_support_msg
-        = "Compiler: accurate gated matmul (MatMul -> Divide -> Tanh -> Multiply -> Result) : ";
+    constexpr std::string_view compiler_gate_support_msg =
+        "Compiler: accurate gated matmul (MatMul -> Divide -> Tanh -> Multiply -> Result) : ";
 
     if (desc.compiler_ver >= ONEAPI_MAKE_VERSION(7, 28)) {
         // accuracy for gated matmul fixed at 7.28
@@ -1255,7 +1256,7 @@ ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
         config.erase("NPUW_DCOFF_SCALE");
     }
 
-    //default version is ON - while for older compiler it might be turned off
+    // default version is ON - while for older compiler it might be turned off
     if (npudesc.has_value()) {
         config.emplace("NPUW_MM_GATED", (npudesc->compiler_matmul_gate ? "YES" : "NO"));
     }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
index afd6c69de967c4..d558932e187155 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -1495,7 +1495,7 @@ void Partitioner::saveTailDictConstants(const std::string& func_name) {
     std::vector<CPtr> to_keep;
 
     ov::npuw::patterns::opt::Context ctx;
-    ctx.mm_gate =  cfg.get<::intel_npu::NPUW_MM_GATED>();
+    ctx.mm_gate = cfg.get<::intel_npu::NPUW_MM_GATED>();
 
     ov::pass::GraphRewrite rewr;
     rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(ctx), std::ref(to_keep));
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
index 51389725663892..c8dff61741a6dd 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
@@ -76,7 +76,7 @@ struct Subgraph {
 
     using Ref = std::reference_wrapper<Subgraph>;
 
-    void settag(const std::string & t) {
+    void settag(const std::string& t) {
         LOG_DEBUG("Subgraph set-tag=" << t);
         _tag = t;
     }
@@ -111,15 +111,16 @@ struct Function {
     // FIXME: shouldn't be here. Needed to not unpack some lazy closures in DCOFF
     std::set<std::size_t> _idx_lazy_unpack;
 
-    void settag(const std::string & t) {
+    void settag(const std::string& t) {
         LOG_DEBUG("Function set-tag=" << t);
         _tag = t;
     }
     std::string gettag() const {
         return _tag;
     }
+
 private:
-    std::string _tag; // derived from the partitioning
+    std::string _tag;  // derived from the partitioning
 };
 
 struct Group {
@@ -139,7 +140,7 @@ struct Group {
 
     ov::npuw::Subgraph sg;
 
-    void settag(const std::string & t) {
+    void settag(const std::string& t) {
         LOG_DEBUG("group set-tag=" << t);
         _tag = t;
     }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
index 29bb144a5314ae..ef510b32e9c4e3 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -1919,7 +1919,8 @@ CompressDictMatMulf32::CompressDictMatMulf32(Context::Ref ctx) {
 //     Const(S) ---------------------> Multiply -> to(f32) -> MatMul -> Result
 //     ???(Act) -------------------------------------------->
 
-PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx, PreserveConstDictMatMulAsymm::Results to_keep) {
+PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx,
+                                                           PreserveConstDictMatMulAsymm::Results to_keep) {
     auto qweight = opp::wrap_type<ov::op::v0::Constant>();
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
     auto qzerop = opp::wrap_type<ov::op::v0::Constant>();
@@ -1932,7 +1933,6 @@ PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx, Pre
     auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, qcvtm});
     std::shared_ptr<Node> qres;
 
-
     // MatMul -> Divide -> Tanh -> Multiply -> Result
     if (ctx.get().mm_gate) {
         auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
index eff81a3bc4ddeb..f1fc3e31449c51 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
@@ -109,7 +109,7 @@ SDPADecomposed::SDPADecomposed(const std::shared_ptr<ov::npuw::online::Snapshot>
     auto convert1 = opp::wrap_type<ov::op::v0::Convert>({opp::any_input()});
     auto concat1 = opp::wrap_type<ov::op::v0::Concat>({convert1, opp::any_input()});
 
-    //GQA optional nodes
+    // GQA optional nodes
     auto unsqueeze1 = opp::optional<ov::op::v0::Unsqueeze>({concat1, opp::any_input()});
     auto broadcast1 = opp::optional<ov::op::v3::Broadcast>({unsqueeze1, opp::any_input()});
     auto reshape1 = opp::optional<ov::op::v1::Reshape>({broadcast1, opp::any_input()});
@@ -117,7 +117,7 @@ SDPADecomposed::SDPADecomposed(const std::shared_ptr<ov::npuw::online::Snapshot>
     auto convert2 = opp::wrap_type<ov::op::v0::Convert>({opp::any_input()});
     auto concat2 = opp::wrap_type<ov::op::v0::Concat>({convert2, opp::any_input()});
 
-    //GQA optional nodes
+    // GQA optional nodes
     auto unsqueeze2 = opp::optional<ov::op::v0::Unsqueeze>({concat2, opp::any_input()});
     auto broadcast2 = opp::optional<ov::op::v3::Broadcast>({unsqueeze2, opp::any_input()});
     auto reshape2 = opp::optional<ov::op::v1::Reshape>({broadcast2, opp::any_input()});

From 0a811c9fd2006c6bc29ed179ab1d71592f4fb51c Mon Sep 17 00:00:00 2001
From: esmirno1 <eugene.smirnov@intel.com>
Date: Mon, 9 Mar 2026 22:12:28 +0000
Subject: [PATCH 07/14] Revert "clang-format fixed"

This reverts commit 268048a84bb77f62b26b52b431af0c76fbec5a90.
---
 src/plugins/intel_cpu/thirdparty/shl                     | 1 -
 src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp | 2 +-
 .../intel_npu/src/plugin/npuw/llm_compiled_model.cpp     | 9 ++++-----
 .../src/plugin/npuw/partitioning/partitioning.cpp        | 2 +-
 .../src/plugin/npuw/partitioning/partitioning.hpp        | 9 ++++-----
 .../src/plugin/npuw/partitioning/patterns/opt.cpp        | 4 ++--
 .../src/plugin/npuw/partitioning/patterns/sdpa.cpp       | 4 ++--
 7 files changed, 14 insertions(+), 17 deletions(-)
 delete mode 160000 src/plugins/intel_cpu/thirdparty/shl

diff --git a/src/plugins/intel_cpu/thirdparty/shl b/src/plugins/intel_cpu/thirdparty/shl
deleted file mode 160000
index 27992eaf41ef96..00000000000000
--- a/src/plugins/intel_cpu/thirdparty/shl
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 27992eaf41ef967ed228ea8d801b1aa489ea8997
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index 72e68655d1658b..8556124c02a317 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -635,7 +635,7 @@ bool ov::npuw::CompiledModel::should_use_quantized_host_gather(const std::shared
     std::vector<CPtr> to_keep;
 
     ov::pass::GraphRewrite rewr2;
-    ctx.mm_gate = m_cfg.get<::intel_npu::NPUW_MM_GATED>();
+    ctx.mm_gate =  m_cfg.get<::intel_npu::NPUW_MM_GATED>();
 
     rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(ctx), std::ref(to_keep));
     rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulFP8>(std::ref(ctx), std::ref(to_keep));
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index 9c0e3adf9fc826..ae62c28cd0bb14 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -1199,11 +1199,10 @@ std::optional<NPUDesc> extract_npu_descriptor(const std::shared_ptr<const ov::IP
                                                ov::AnyMap{{ov::intel_npu::compiler_type.name(), target_compiler_type}})
                                 .as<int64_t>();
     }
-    LOG_INFO("Compiler version: " << ONEAPI_VERSION_MAJOR(desc.compiler_ver) << "."
-                                  << ONEAPI_VERSION_MINOR(desc.compiler_ver));
+    LOG_INFO("Compiler version: " << ONEAPI_VERSION_MAJOR(desc.compiler_ver) << "." << ONEAPI_VERSION_MINOR(desc.compiler_ver));
 
-    constexpr std::string_view compiler_gate_support_msg =
-        "Compiler: accurate gated matmul (MatMul -> Divide -> Tanh -> Multiply -> Result) : ";
+    constexpr std::string_view compiler_gate_support_msg
+        = "Compiler: accurate gated matmul (MatMul -> Divide -> Tanh -> Multiply -> Result) : ";
 
     if (desc.compiler_ver >= ONEAPI_MAKE_VERSION(7, 28)) {
         // accuracy for gated matmul fixed at 7.28
@@ -1256,7 +1255,7 @@ ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
         config.erase("NPUW_DCOFF_SCALE");
     }
 
-    // default version is ON - while for older compiler it might be turned off
+    //default version is ON - while for older compiler it might be turned off
     if (npudesc.has_value()) {
         config.emplace("NPUW_MM_GATED", (npudesc->compiler_matmul_gate ? "YES" : "NO"));
     }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
index d558932e187155..afd6c69de967c4 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -1495,7 +1495,7 @@ void Partitioner::saveTailDictConstants(const std::string& func_name) {
     std::vector<CPtr> to_keep;
 
     ov::npuw::patterns::opt::Context ctx;
-    ctx.mm_gate = cfg.get<::intel_npu::NPUW_MM_GATED>();
+    ctx.mm_gate =  cfg.get<::intel_npu::NPUW_MM_GATED>();
 
     ov::pass::GraphRewrite rewr;
     rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(ctx), std::ref(to_keep));
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
index c8dff61741a6dd..51389725663892 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
@@ -76,7 +76,7 @@ struct Subgraph {
 
     using Ref = std::reference_wrapper<Subgraph>;
 
-    void settag(const std::string& t) {
+    void settag(const std::string & t) {
         LOG_DEBUG("Subgraph set-tag=" << t);
         _tag = t;
     }
@@ -111,16 +111,15 @@ struct Function {
     // FIXME: shouldn't be here. Needed to not unpack some lazy closures in DCOFF
     std::set<std::size_t> _idx_lazy_unpack;
 
-    void settag(const std::string& t) {
+    void settag(const std::string & t) {
         LOG_DEBUG("Function set-tag=" << t);
         _tag = t;
     }
     std::string gettag() const {
         return _tag;
     }
-
 private:
-    std::string _tag;  // derived from the partitioning
+    std::string _tag; // derived from the partitioning
 };
 
 struct Group {
@@ -140,7 +139,7 @@ struct Group {
 
     ov::npuw::Subgraph sg;
 
-    void settag(const std::string& t) {
+    void settag(const std::string & t) {
         LOG_DEBUG("group set-tag=" << t);
         _tag = t;
     }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
index ef510b32e9c4e3..29bb144a5314ae 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -1919,8 +1919,7 @@ CompressDictMatMulf32::CompressDictMatMulf32(Context::Ref ctx) {
 //     Const(S) ---------------------> Multiply -> to(f32) -> MatMul -> Result
 //     ???(Act) -------------------------------------------->
 
-PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx,
-                                                           PreserveConstDictMatMulAsymm::Results to_keep) {
+PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx, PreserveConstDictMatMulAsymm::Results to_keep) {
     auto qweight = opp::wrap_type<ov::op::v0::Constant>();
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
     auto qzerop = opp::wrap_type<ov::op::v0::Constant>();
@@ -1933,6 +1932,7 @@ PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx,
     auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, qcvtm});
     std::shared_ptr<Node> qres;
 
+
     // MatMul -> Divide -> Tanh -> Multiply -> Result
     if (ctx.get().mm_gate) {
         auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
index f1fc3e31449c51..eff81a3bc4ddeb 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
@@ -109,7 +109,7 @@ SDPADecomposed::SDPADecomposed(const std::shared_ptr<ov::npuw::online::Snapshot>
     auto convert1 = opp::wrap_type<ov::op::v0::Convert>({opp::any_input()});
     auto concat1 = opp::wrap_type<ov::op::v0::Concat>({convert1, opp::any_input()});
 
-    // GQA optional nodes
+    //GQA optional nodes
     auto unsqueeze1 = opp::optional<ov::op::v0::Unsqueeze>({concat1, opp::any_input()});
     auto broadcast1 = opp::optional<ov::op::v3::Broadcast>({unsqueeze1, opp::any_input()});
     auto reshape1 = opp::optional<ov::op::v1::Reshape>({broadcast1, opp::any_input()});
@@ -117,7 +117,7 @@ SDPADecomposed::SDPADecomposed(const std::shared_ptr<ov::npuw::online::Snapshot>
     auto convert2 = opp::wrap_type<ov::op::v0::Convert>({opp::any_input()});
     auto concat2 = opp::wrap_type<ov::op::v0::Concat>({convert2, opp::any_input()});
 
-    // GQA optional nodes
+    //GQA optional nodes
     auto unsqueeze2 = opp::optional<ov::op::v0::Unsqueeze>({concat2, opp::any_input()});
     auto broadcast2 = opp::optional<ov::op::v3::Broadcast>({unsqueeze2, opp::any_input()});
     auto reshape2 = opp::optional<ov::op::v1::Reshape>({broadcast2, opp::any_input()});

From b872130e2a0da1864f31fe11b5b5c616347c64cc Mon Sep 17 00:00:00 2001
From: esmirno1 <eugene.smirnov@intel.com>
Date: Mon, 9 Mar 2026 22:15:16 +0000
Subject: [PATCH 08/14] clang-format fixed

---
 src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp | 2 +-
 .../intel_npu/src/plugin/npuw/llm_compiled_model.cpp     | 9 +++++----
 .../src/plugin/npuw/partitioning/partitioning.cpp        | 2 +-
 .../src/plugin/npuw/partitioning/partitioning.hpp        | 9 +++++----
 .../src/plugin/npuw/partitioning/patterns/opt.cpp        | 4 ++--
 .../src/plugin/npuw/partitioning/patterns/sdpa.cpp       | 4 ++--
 6 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index 8556124c02a317..72e68655d1658b 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -635,7 +635,7 @@ bool ov::npuw::CompiledModel::should_use_quantized_host_gather(const std::shared
     std::vector<CPtr> to_keep;
 
     ov::pass::GraphRewrite rewr2;
-    ctx.mm_gate =  m_cfg.get<::intel_npu::NPUW_MM_GATED>();
+    ctx.mm_gate = m_cfg.get<::intel_npu::NPUW_MM_GATED>();
 
     rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(ctx), std::ref(to_keep));
     rewr2.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulFP8>(std::ref(ctx), std::ref(to_keep));
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index ae62c28cd0bb14..9c0e3adf9fc826 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -1199,10 +1199,11 @@ std::optional<NPUDesc> extract_npu_descriptor(const std::shared_ptr<const ov::IP
                                                ov::AnyMap{{ov::intel_npu::compiler_type.name(), target_compiler_type}})
                                 .as<int64_t>();
     }
-    LOG_INFO("Compiler version: " << ONEAPI_VERSION_MAJOR(desc.compiler_ver) << "." << ONEAPI_VERSION_MINOR(desc.compiler_ver));
+    LOG_INFO("Compiler version: " << ONEAPI_VERSION_MAJOR(desc.compiler_ver) << "."
+                                  << ONEAPI_VERSION_MINOR(desc.compiler_ver));
 
-    constexpr std::string_view compiler_gate_support_msg
-        = "Compiler: accurate gated matmul (MatMul -> Divide -> Tanh -> Multiply -> Result) : ";
+    constexpr std::string_view compiler_gate_support_msg =
+        "Compiler: accurate gated matmul (MatMul -> Divide -> Tanh -> Multiply -> Result) : ";
 
     if (desc.compiler_ver >= ONEAPI_MAKE_VERSION(7, 28)) {
         // accuracy for gated matmul fixed at 7.28
@@ -1255,7 +1256,7 @@ ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
         config.erase("NPUW_DCOFF_SCALE");
     }
 
-    //default version is ON - while for older compiler it might be turned off
+    // default version is ON - while for older compiler it might be turned off
     if (npudesc.has_value()) {
         config.emplace("NPUW_MM_GATED", (npudesc->compiler_matmul_gate ? "YES" : "NO"));
     }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
index afd6c69de967c4..d558932e187155 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -1495,7 +1495,7 @@ void Partitioner::saveTailDictConstants(const std::string& func_name) {
     std::vector<CPtr> to_keep;
 
     ov::npuw::patterns::opt::Context ctx;
-    ctx.mm_gate =  cfg.get<::intel_npu::NPUW_MM_GATED>();
+    ctx.mm_gate = cfg.get<::intel_npu::NPUW_MM_GATED>();
 
     ov::pass::GraphRewrite rewr;
     rewr.add_matcher<ov::npuw::patterns::opt::PreserveConstDictMatMulAsymm>(std::ref(ctx), std::ref(to_keep));
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
index 51389725663892..c8dff61741a6dd 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
@@ -76,7 +76,7 @@ struct Subgraph {
 
     using Ref = std::reference_wrapper<Subgraph>;
 
-    void settag(const std::string & t) {
+    void settag(const std::string& t) {
         LOG_DEBUG("Subgraph set-tag=" << t);
         _tag = t;
     }
@@ -111,15 +111,16 @@ struct Function {
     // FIXME: shouldn't be here. Needed to not unpack some lazy closures in DCOFF
     std::set<std::size_t> _idx_lazy_unpack;
 
-    void settag(const std::string & t) {
+    void settag(const std::string& t) {
         LOG_DEBUG("Function set-tag=" << t);
         _tag = t;
     }
     std::string gettag() const {
         return _tag;
     }
+
 private:
-    std::string _tag; // derived from the partitioning
+    std::string _tag;  // derived from the partitioning
 };
 
 struct Group {
@@ -139,7 +140,7 @@ struct Group {
 
     ov::npuw::Subgraph sg;
 
-    void settag(const std::string & t) {
+    void settag(const std::string& t) {
         LOG_DEBUG("group set-tag=" << t);
         _tag = t;
     }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
index 29bb144a5314ae..ef510b32e9c4e3 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -1919,7 +1919,8 @@ CompressDictMatMulf32::CompressDictMatMulf32(Context::Ref ctx) {
 //     Const(S) ---------------------> Multiply -> to(f32) -> MatMul -> Result
 //     ???(Act) -------------------------------------------->
 
-PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx, PreserveConstDictMatMulAsymm::Results to_keep) {
+PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx,
+                                                           PreserveConstDictMatMulAsymm::Results to_keep) {
     auto qweight = opp::wrap_type<ov::op::v0::Constant>();
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
     auto qzerop = opp::wrap_type<ov::op::v0::Constant>();
@@ -1932,7 +1933,6 @@ PreserveConstDictMatMulAsymm::PreserveConstDictMatMulAsymm(Context::Ref ctx, Pre
     auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, qcvtm});
     std::shared_ptr<Node> qres;
 
-
     // MatMul -> Divide -> Tanh -> Multiply -> Result
     if (ctx.get().mm_gate) {
         auto div = opp::wrap_type<ov::op::v1::Multiply, ov::op::v1::Divide>({qmm, opp::any_input()});
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
index eff81a3bc4ddeb..f1fc3e31449c51 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/sdpa.cpp
@@ -109,7 +109,7 @@ SDPADecomposed::SDPADecomposed(const std::shared_ptr<ov::npuw::online::Snapshot>
     auto convert1 = opp::wrap_type<ov::op::v0::Convert>({opp::any_input()});
     auto concat1 = opp::wrap_type<ov::op::v0::Concat>({convert1, opp::any_input()});
 
-    //GQA optional nodes
+    // GQA optional nodes
     auto unsqueeze1 = opp::optional<ov::op::v0::Unsqueeze>({concat1, opp::any_input()});
     auto broadcast1 = opp::optional<ov::op::v3::Broadcast>({unsqueeze1, opp::any_input()});
     auto reshape1 = opp::optional<ov::op::v1::Reshape>({broadcast1, opp::any_input()});
@@ -117,7 +117,7 @@ SDPADecomposed::SDPADecomposed(const std::shared_ptr<ov::npuw::online::Snapshot>
     auto convert2 = opp::wrap_type<ov::op::v0::Convert>({opp::any_input()});
     auto concat2 = opp::wrap_type<ov::op::v0::Concat>({convert2, opp::any_input()});
 
-    //GQA optional nodes
+    // GQA optional nodes
     auto unsqueeze2 = opp::optional<ov::op::v0::Unsqueeze>({concat2, opp::any_input()});
     auto broadcast2 = opp::optional<ov::op::v3::Broadcast>({unsqueeze2, opp::any_input()});
     auto reshape2 = opp::optional<ov::op::v1::Reshape>({broadcast2, opp::any_input()});

From fb12f9ac50e3c5875d498a80c8ccc81c841a29a3 Mon Sep 17 00:00:00 2001
From: Eugene Smirnov <eugene.smirnov@intel.com>
Date: Wed, 11 Mar 2026 23:53:16 +0100
Subject: [PATCH 09/14] code-review-fixes

---
 .../src/plugin/include/properties.hpp         |  1 +
 .../src/plugin/npuw/llm_compiled_model.cpp    | 80 ++++++++++++-------
 .../plugin/npuw/partitioning/patterns/opt.cpp |  4 +-
 3 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/include/properties.hpp b/src/plugins/intel_npu/src/plugin/include/properties.hpp
index c95e07fc17fa6b..fbab3a9451b1fa 100644
--- a/src/plugins/intel_npu/src/plugin/include/properties.hpp
+++ b/src/plugins/intel_npu/src/plugin/include/properties.hpp
@@ -156,6 +156,7 @@ class Properties final {
         ov::intel_npu::npuw::partitioning::dyn_quant.name(),
         ov::intel_npu::npuw::partitioning::dyn_quant_full.name(),
         ov::intel_npu::npuw::partitioning::par_matmul_merge_dims.name(),
+        ov::intel_npu::npuw::partitioning::matmul_gate_preserve_constants.name(),
         ov::intel_npu::npuw::partitioning::slice_out.name(),
         ov::intel_npu::npuw::partitioning::spatial.name(),
         ov::intel_npu::npuw::partitioning::spatial_nway.name(),
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index e869a9c486d21e..b2ed9557bf5960 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -43,6 +43,23 @@
 
 namespace opp = ov::pass::pattern;
 
+// specific function that match subgraph appeared as result of lpt transformations
+auto match_down_up_convert_subgraph_after_lpt = [](const ov::Output<ov::Node>& input) {
+    auto upconvert = opp::wrap_type<ov::op::v0::Convert>({input}, opp::type_matches(ov::element::f32));
+
+    auto upscale = opp::wrap_type<ov::op::v0::Constant>(opp::rank_equals(0));
+    auto upmul = opp::wrap_type<ov::op::v1::Multiply>({upconvert, upscale});
+
+    auto downscale = opp::wrap_type<ov::op::v0::Constant>(opp::rank_equals(0));
+    auto downmul = opp::wrap_type<ov::op::v1::Multiply>({upmul, downscale});
+
+    auto downconvert =
+        opp::wrap_type<ov::op::v0::Convert>({downmul},
+                                            opp::type_matches_any({ov::element::f8e4m3, ov::element::f8e5m2}));
+
+    return downconvert;
+};
+
 class RemoveEmptyKVTensors : public ov::pass::MatcherPass {
 public:
     OPENVINO_MATCHER_PASS_RTTI("npuw::LLMCompiledModel::RemoveEmptyKVTensors");
@@ -54,7 +71,10 @@ class RemoveEmptyKVTensors : public ov::pass::MatcherPass {
 
     RemoveEmptyKVTensors(Context::Ref ctx) {
         auto param = opp::wrap_type<ov::op::v0::Parameter>();
-        auto concat = opp::wrap_type<ov::op::v0::Concat>({param, opp::any_input()});
+        auto param_or =
+             std::make_shared<opp::op::Or>(ov::OutputVector{param, match_down_up_convert_subgraph_after_lpt(param)});
+
+        auto concat = opp::wrap_type<ov::op::v0::Concat>({param_or, opp::any_input()});
 
         auto callback = [=](opp::Matcher& m) {
             auto& node_to_output = m.get_pattern_value_map();
@@ -63,15 +83,28 @@ class RemoveEmptyKVTensors : public ov::pass::MatcherPass {
 
             ctx.get().old_params.push_back(matched_param);
 
-            auto users = matched_param->get_users();
-            if (users.size() == 2u) {
-                auto shapeof_node = ov::is_type<ov::op::v3::ShapeOf>(users[0]) ? users[0] : users[1];
-                NPUW_ASSERT(ov::is_type<ov::op::v3::ShapeOf>(shapeof_node));
-                auto cst_node =
-                    ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, matched_param->get_shape());
-                ov::replace_node(shapeof_node, cst_node);
-            } else {
-                NPUW_ASSERT(users.size() == 1u);
+            // Use concat's first input source node to find ShapeOf users.
+            // This works universally for both plain parameter and down_up_convert subgraph cases,
+            // because in the subgraph case matched_param->get_users() would return the Convert node
+            // (first node of the subgraph), not the ShapeOf.
+            auto concat_input0_node = matched_node_concat->input(0).get_source_output().get_node_shared_ptr();
+            auto users = concat_input0_node->get_users();
+
+            // In subgraph case the parameter itself may also have a ShapeOf user,
+            // so check both the concat input node and the parameter.
+            if (concat_input0_node != matched_param) {
+                auto param_users = matched_param->get_users();
+                users.insert(users.end(), param_users.begin(), param_users.end());
+            }
+
+            // Remove duplicates (concat itself will appear in users)
+            // Find and replace ShapeOf nodes with constants
+            for (auto& user : users) {
+                if (ov::is_type<ov::op::v3::ShapeOf>(user)) {
+                    auto cst_node =
+                        ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, matched_param->get_shape());
+                    ov::replace_node(user, cst_node);
+                }
             }
 
             // Redirect second concat input to every node which reads from concat
@@ -323,22 +356,6 @@ class GroupQueryAttentionDecomposition : public ov::pass::MatcherPass {
 class RedirectNewKvToOutput : public ov::pass::MatcherPass {
 public:
     RedirectNewKvToOutput() {
-        auto match_down_up_convert_subgraph = [](const ov::Output<ov::Node>& input) {
-            auto upconvert = opp::wrap_type<ov::op::v0::Convert>({input}, opp::type_matches(ov::element::f32));
-
-            auto upscale = opp::wrap_type<ov::op::v0::Constant>(opp::rank_equals(0));
-            auto upmul = opp::wrap_type<ov::op::v1::Multiply>({upconvert, upscale});
-
-            auto downscale = opp::wrap_type<ov::op::v0::Constant>(opp::rank_equals(0));
-            auto downmul = opp::wrap_type<ov::op::v1::Multiply>({upmul, downscale});
-
-            auto downconvert =
-                opp::wrap_type<ov::op::v0::Convert>({downmul},
-                                                    opp::type_matches_any({ov::element::f8e4m3, ov::element::f8e5m2}));
-
-            return downconvert;
-        };
-
         // example of fp8 inputs to concat
         // input0 : float8e4m3[1,32,1151,96]
         // input1 : float8e4m3[1,32,1,96]
@@ -348,13 +365,13 @@ class RedirectNewKvToOutput : public ov::pass::MatcherPass {
         // TODO: this matcher logic better to cover with unit-tests
         auto input0 = opp::wrap_type<ov::op::v0::Parameter>();
         auto input0_or =
-            std::make_shared<opp::op::Or>(ov::OutputVector{input0, match_down_up_convert_subgraph(input0)});
+            std::make_shared<opp::op::Or>(ov::OutputVector{input0, match_down_up_convert_subgraph_after_lpt(input0)});
 
         auto input1 = opp::any_input();
 
         auto kv_concat = opp::wrap_type<ov::op::v0::Concat>({input0_or, input1});
         auto result1 = opp::wrap_type<ov::op::v0::Result>(kv_concat);
-        auto result2 = opp::wrap_type<ov::op::v0::Result>(match_down_up_convert_subgraph(kv_concat));
+        auto result2 = opp::wrap_type<ov::op::v0::Result>(match_down_up_convert_subgraph_after_lpt(kv_concat));
 
         auto result_or = std::make_shared<opp::op::Or>(ov::OutputVector{result1, result2});
 
@@ -1262,7 +1279,9 @@ ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
         config.erase("NPUW_DCOFF_SCALE");
     }
 
-    // default version is ON - while for older compiler it might be turned off
+    // default value is ON
+    // for compiler versions >= 7.28 value is ON
+    // for other compiler versions value is OFF
     if (npudesc.has_value()) {
         config.emplace("NPUW_MM_GATED", (npudesc->compiler_matmul_gate ? "YES" : "NO"));
     }
@@ -1896,8 +1915,7 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
 
     if (!m_is_embedding) {
         if (!m_use_chunk_prefill) {
-            // TODO: sometimes it is ok if we cannot find any empty inputs or not?
-            remove_empty_kv_inputs(prefill_model);
+            NPUW_ASSERT(remove_empty_kv_inputs(prefill_model));
         } else {
             LOG_DEBUG("Don't remove input key/values from prefill model.");
             LOG_DEBUG("Ask prefill model to output key/values for prefill chunk size tokens.");
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
index ef510b32e9c4e3..a0a941f55e2209 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -1984,9 +1984,9 @@ PreserveConstDictMatMulFP8::PreserveConstDictMatMulFP8(Context::Ref ctx, Preserv
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
     auto qcvtw = opp::wrap_type<ov::op::v0::Convert>({qweight});
     auto qmuls = opp::wrap_type<ov::op::v1::Multiply>({qcvtw, qcoeff});
-    auto optional_kvt = opp::optional<ov::op::v0::Convert>({qmuls});
+    auto optional_cvt = opp::optional<ov::op::v0::Convert>({qmuls});
     auto qmmi = opp::any_input();
-    auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, optional_kvt});
+    auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, optional_cvt});
     std::shared_ptr<Node> qres;
     // // MatMul -> Divide -> Tanh -> Multiply -> Result
     if (ctx.get().mm_gate) {

From a0186bd99dca06cda0a54a4b375eae2dcd135523 Mon Sep 17 00:00:00 2001
From: esmirno1 <eugene.smirnov@intel.com>
Date: Wed, 11 Mar 2026 23:23:59 +0000
Subject: [PATCH 10/14] clang-format-fixes

---
 src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index 3d4a6f97d068b4..e8ceead8870324 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -72,7 +72,7 @@ class RemoveEmptyKVTensors : public ov::pass::MatcherPass {
     RemoveEmptyKVTensors(Context::Ref ctx) {
         auto param = opp::wrap_type<ov::op::v0::Parameter>();
         auto param_or =
-             std::make_shared<opp::op::Or>(ov::OutputVector{param, match_down_up_convert_subgraph_after_lpt(param)});
+            std::make_shared<opp::op::Or>(ov::OutputVector{param, match_down_up_convert_subgraph_after_lpt(param)});
 
         auto concat = opp::wrap_type<ov::op::v0::Concat>({param_or, opp::any_input()});
 

From d673f36f07ea6f6bc6d63478652d6daac85b21e0 Mon Sep 17 00:00:00 2001
From: Eugene Smirnov <eugene.smirnov@intel.com>
Date: Thu, 12 Mar 2026 00:51:23 +0100
Subject: [PATCH 11/14] clang-format-fixes

---
 .../src/plugin/npuw/llm_compiled_model.cpp      | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index e8ceead8870324..966c957f756632 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -72,22 +72,25 @@ class RemoveEmptyKVTensors : public ov::pass::MatcherPass {
     RemoveEmptyKVTensors(Context::Ref ctx) {
         auto param = opp::wrap_type<ov::op::v0::Parameter>();
         auto param_or =
-            std::make_shared<opp::op::Or>(ov::OutputVector{param, match_down_up_convert_subgraph_after_lpt(param)});
+            std::make_shared<opp::op::Or>(ov::OutputVector{param,
+                match_down_up_convert_subgraph_after_lpt(param)});
 
         auto concat = opp::wrap_type<ov::op::v0::Concat>({param_or, opp::any_input()});
 
         auto callback = [=](opp::Matcher& m) {
             auto& node_to_output = m.get_pattern_value_map();
-            auto matched_param = ov::as_type_ptr<ov::op::v0::Parameter>(node_to_output.at(param).get_node_shared_ptr());
+            auto matched_param =
+                ov::as_type_ptr<ov::op::v0::Parameter>(node_to_output.at(param).get_node_shared_ptr());
             auto matched_node_concat = node_to_output.at(concat).get_node_shared_ptr();
 
             ctx.get().old_params.push_back(matched_param);
 
             // Use concat's first input source node to find ShapeOf users.
             // This works universally for both plain parameter and down_up_convert subgraph cases,
-            // because in the subgraph case matched_param->get_users() would return the Convert node
-            // (first node of the subgraph), not the ShapeOf.
-            auto concat_input0_node = matched_node_concat->input(0).get_source_output().get_node_shared_ptr();
+            // because in the subgraph case matched_param->get_users() would return the Convert
+            // node (first node of the subgraph), not the ShapeOf.
+            auto concat_input0_node =
+                matched_node_concat->input(0).get_source_output().get_node_shared_ptr();
             auto users = concat_input0_node->get_users();
 
             // In subgraph case the parameter itself may also have a ShapeOf user,
@@ -542,7 +545,7 @@ class OldPhi3SlidingMaskMatcher : public ov::pass::MatcherPass {
                             "Sliding window size constant must be of size 1, but got " +
                                 std::to_string(matched_neg_window_size->get_output_size()));
 
-            // 1.(K range <= (Q_pos range - sliding window).T) | (K range > Q range.T)
+            // 1.(K range > (Q_pos range - sliding window).T) & (K range <= Q range.T)
             auto query_range_as_pos_ids =
                 std::make_shared<ov::op::v0::Convert>(matched_pos_ids_input, ov::element::f32);
             std::vector<int64_t> vector_shape{-1, 1};
@@ -556,7 +559,7 @@ class OldPhi3SlidingMaskMatcher : public ov::pass::MatcherPass {
                 std::make_shared<ov::op::v1::LessEqual>(matched_key_range_f32, query_range_as_pos_left_bound);
             matched_bitwise_or->input(1).replace_source_output(forget_left_mask_for_right_padding);
 
-            // 2. (K range <= (Q range - sliding window).T) & (K range >= shape(past_key_values, 2))
+            // 2. (K range > (Q range - sliding window).T) | (K range < shape(past_key_values, 2))
             auto past_kv_len_f32 = std::make_shared<ov::op::v0::Convert>(matched_past_kv_len, ov::element::f32);
             auto only_present_tokens_mask =
                 std::make_shared<ov::op::v1::GreaterEqual>(matched_key_range_f32, past_kv_len_f32);

From d52dc2e3d767310b7bff0a861d584cf8ab34fae6 Mon Sep 17 00:00:00 2001
From: Eugene Smirnov <eugene.smirnov@intel.com>
Date: Thu, 12 Mar 2026 02:07:09 +0100
Subject: [PATCH 12/14] clang-format fixes

---
 .../intel_npu/src/plugin/npuw/llm_compiled_model.cpp     | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index 966c957f756632..e43781e03b080f 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -72,15 +72,13 @@ class RemoveEmptyKVTensors : public ov::pass::MatcherPass {
     RemoveEmptyKVTensors(Context::Ref ctx) {
         auto param = opp::wrap_type<ov::op::v0::Parameter>();
         auto param_or =
-            std::make_shared<opp::op::Or>(ov::OutputVector{param,
-                match_down_up_convert_subgraph_after_lpt(param)});
+            std::make_shared<opp::op::Or>(ov::OutputVector{param, match_down_up_convert_subgraph_after_lpt(param)});
 
         auto concat = opp::wrap_type<ov::op::v0::Concat>({param_or, opp::any_input()});
 
         auto callback = [=](opp::Matcher& m) {
             auto& node_to_output = m.get_pattern_value_map();
-            auto matched_param =
-                ov::as_type_ptr<ov::op::v0::Parameter>(node_to_output.at(param).get_node_shared_ptr());
+            auto matched_param = ov::as_type_ptr<ov::op::v0::Parameter>(node_to_output.at(param).get_node_shared_ptr());
             auto matched_node_concat = node_to_output.at(concat).get_node_shared_ptr();
 
             ctx.get().old_params.push_back(matched_param);
@@ -89,8 +87,7 @@ class RemoveEmptyKVTensors : public ov::pass::MatcherPass {
             // This works universally for both plain parameter and down_up_convert subgraph cases,
             // because in the subgraph case matched_param->get_users() would return the Convert
             // node (first node of the subgraph), not the ShapeOf.
-            auto concat_input0_node =
-                matched_node_concat->input(0).get_source_output().get_node_shared_ptr();
+            auto concat_input0_node = matched_node_concat->input(0).get_source_output().get_node_shared_ptr();
             auto users = concat_input0_node->get_users();
 
             // In subgraph case the parameter itself may also have a ShapeOf user,

From 8e04ee147c02c52b25af50168ff849ae2a1f901b Mon Sep 17 00:00:00 2001
From: Eugene Smirnov <eugene.smirnov@intel.com>
Date: Thu, 12 Mar 2026 13:13:44 +0100
Subject: [PATCH 13/14] comments restored

---
 src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index e43781e03b080f..603e55bd38ecc1 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -542,7 +542,7 @@ class OldPhi3SlidingMaskMatcher : public ov::pass::MatcherPass {
                             "Sliding window size constant must be of size 1, but got " +
                                 std::to_string(matched_neg_window_size->get_output_size()));
 
-            // 1.(K range > (Q_pos range - sliding window).T) & (K range <= Q range.T)
+            // 1.(K range <= (Q_pos range - sliding window).T) | (K range > Q range.T)
             auto query_range_as_pos_ids =
                 std::make_shared<ov::op::v0::Convert>(matched_pos_ids_input, ov::element::f32);
             std::vector<int64_t> vector_shape{-1, 1};
@@ -556,7 +556,7 @@ class OldPhi3SlidingMaskMatcher : public ov::pass::MatcherPass {
                 std::make_shared<ov::op::v1::LessEqual>(matched_key_range_f32, query_range_as_pos_left_bound);
             matched_bitwise_or->input(1).replace_source_output(forget_left_mask_for_right_padding);
 
-            // 2. (K range > (Q range - sliding window).T) | (K range < shape(past_key_values, 2))
+            // 2. (K range <= (Q range - sliding window).T) & (K range >= shape(past_key_values, 2))
             auto past_kv_len_f32 = std::make_shared<ov::op::v0::Convert>(matched_past_kv_len, ov::element::f32);
             auto only_present_tokens_mask =
                 std::make_shared<ov::op::v1::GreaterEqual>(matched_key_range_f32, past_kv_len_f32);

From 87029d78794a045304ea0083f25233ead5c83ea9 Mon Sep 17 00:00:00 2001
From: Eugene Smirnov <eugene.smirnov@intel.com>
Date: Thu, 12 Mar 2026 13:36:13 +0100
Subject: [PATCH 14/14] comment corrected

---
 src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index 603e55bd38ecc1..fd1db77da35611 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -97,7 +97,6 @@ class RemoveEmptyKVTensors : public ov::pass::MatcherPass {
                 users.insert(users.end(), param_users.begin(), param_users.end());
             }
 
-            // Remove duplicates (concat itself will appear in users)
             // Find and replace ShapeOf nodes with constants
             for (auto& user : users) {
                 if (ov::is_type<ov::op::v3::ShapeOf>(user)) {