openvinotoolkit · p-durandin · Apr 23, 2025 · Feb 18, 2025 · Feb 18, 2025 · Feb 18, 2025
@@ -0,0 +1,22 @@
+// Copyright (C) 2024 Intel Corporation
-// Copyright (C) 2024 Intel Corporation
+// Copyright (C) 2025 Intel Corporation
-// Copyright (C) 2024 Intel Corporation
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "transformations_visibility.hpp"
+#include "openvino/pass/graph_rewrite.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API ConstantsReduce : public ov::pass::GraphRewrite {
+public:
+    OPENVINO_GRAPH_REWRITE_RTTI("ConstantsReduce");
+    ConstantsReduce();
-    ConstantsReduce();
+    ConstantsReduce() = default;
-    ConstantsReduce();
+    ConstantsReduce() = default;
+
+    bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
+};
+
+}  // namespace pass
+}  // namespace ov
@@ -0,0 +1,110 @@
+// Copyright (C) 2024 Intel Corporation
-// Copyright (C) 2024 Intel Corporation
+// Copyright (C) 2025 Intel Corporation
-// Copyright (C) 2024 Intel Corporation
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/constants_reduce.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/util/log.hpp"
+#include "itt.hpp"
+
+namespace ov {
+namespace pass {
+
+using BlobCacheKey = std::shared_ptr<ov::Node>;
+
+struct KeyHash {
+        std::size_t operator()(const BlobCacheKey& key) const {
+        std::size_t hash_value = 0;
+
+        auto node = ov::as_type_ptr<op::v0::Constant>(key);
+
+        auto type = node->get_output_element_type(0);
+        auto shape = node->get_shape();
+
+        for (auto dim : shape) {
+            hash_value ^= std::hash<size_t>{}(dim);
+        }
+
+        hash_value ^= std::hash<std::string>{}(type.c_type_string());
+        return hash_value;
+    }
+};
+
+struct KeyEqual {
+    bool operator()(const BlobCacheKey& lhs, const BlobCacheKey& rhs) const {
+        auto lhs_node = ov::as_type_ptr<op::v0::Constant>(lhs);
+        auto rhs_node = ov::as_type_ptr<op::v0::Constant>(rhs);
+
+        auto lhs_type = lhs_node->get_output_element_type(0);
+        auto rhs_type = rhs_node->get_output_element_type(0);
+
+        if (lhs_type != rhs_type)
+            return false;
+
+        auto lhs_shape = lhs_node->get_shape();
+        auto rhs_shape = rhs_node->get_shape();
+
+        if (lhs_shape != rhs_shape)
+            return false;
+
+        std::size_t lhs_size = lhs_node->get_byte_size();
+        std::size_t rhs_size = rhs_node->get_byte_size();
+
+        if (lhs_size != rhs_size)
+            return false;
+
+        // Retrieve buffer pointers
+        const char* lhs_data = lhs_node->get_data_ptr<char>();
+        const char* rhs_data = rhs_node->get_data_ptr<char>();
+
+        if (lhs_data == rhs_data)
+            return true;
+
+        return std::memcmp(lhs_data, rhs_data, lhs_size) == 0;
+    }
+};
+
+ConstantsReduce::ConstantsReduce() {}
+
+bool ConstantsReduce::run_on_model(const std::shared_ptr<ov::Model>& m) {
+    RUN_ON_MODEL_SCOPE(ConstantsReduce);
+
+    std::unordered_map<BlobCacheKey, std::shared_ptr<ov::Node>, KeyHash, KeyEqual> blobMemCache;
+
+    int copies = 0;
+
+    const std::vector<std::shared_ptr<ov::Node>> ops = m->get_ops();
-    const std::vector<std::shared_ptr<ov::Node>> ops = m->get_ops();
+    const auto& ops = m->get_ops();
-    const std::vector<std::shared_ptr<ov::Node>> ops = m->get_ops();
+    const auto& ops = m->get_ops();
+    for (auto& op : ops) {
+        if (!ov::is_type<ov::op::v0::Constant>(op)) continue;
+
+        auto const_node = ov::as_type_ptr<op::v0::Constant>(op);
+
+        // Limit size of node reading to avoid reading large tensors
+        if (const_node->get_byte_size() > 256) continue;
+
+        const auto cache_key = op;
+        auto bufIter = blobMemCache.find(cache_key);
+
+        if (bufIter == blobMemCache.end()) {
+            blobMemCache[cache_key] = op;
+        } else {
+            copies++;
+            auto users = const_node->get_users();
+            for (auto user : users) {
+                for (size_t i = 0; i < user->get_input_size(); i++) {
+                    if (user->input_value(i) == op->output(0)) {
+                        user->input(i).replace_source_output(blobMemCache[cache_key]);
+                    }
+                }
+            }
+        }
+    }
+
+    OPENVINO_DEBUG("Reduced ", copies, " constant node duplications from model");
+
+    // Return true if we have made any replacements
+    return copies > 0;
+}
+
+}  // namespace pass
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
@@ -97,8 +97,6 @@ class ProgramBuilder final {
     std::vector<cldnn::primitive_id> profiling_ids;
 
     std::map<size_t, cldnn::layout> inputLayouts;
-    using BlobCacheKey = std::tuple<const char*, ov::Shape, ov::element::Type>;
-    std::map<BlobCacheKey, cldnn::primitive_id> blobMemCache;
 
     std::shared_ptr<cldnn::program> get_compiled_program() const;
     std::shared_ptr<cldnn::topology> get_topology() const { return m_topology; }

diff --git a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
@@ -88,38 +88,26 @@ static void create_data(ProgramBuilder& p, const ov::Shape& const_shape, const s
     cldnn::primitive_id constPrimID;
     auto data = op->get_data_ptr<char>();
 
-    const auto cache_key = std::make_tuple(data, const_shape, op->get_output_element_type(0));
-
-    auto bufIter = p.blobMemCache.find(cache_key);
-
-    if (bufIter != p.blobMemCache.end()) {
-        constPrimID = bufIter->second;
-        p.primitive_ids[initialconstPrimID] = constPrimID;
-        p.profiling_ids.push_back(initialconstPrimID);
-    } else {
-        cldnn::memory::ptr mem = nullptr;
-        if (constLayout.bytes_count() > 0) {
+    cldnn::memory::ptr mem = nullptr;
+    if (constLayout.bytes_count() > 0) {
             mem = p.get_engine().allocate_memory(constLayout, false);
-        } else {
-            // In the case of empty const data with {0} shape, it has zero byte.
-            // To avoid zero byte memory allocation issue, reinterpret one dimension memory to zero dimension memory.
-            auto one_dim_layout = cldnn::layout(ov::PartialShape({1}), constLayout.data_type, constLayout.format);
-            auto one_dim_mem = p.get_engine().allocate_memory(one_dim_layout, false);
-            mem = p.get_engine().reinterpret_buffer(*one_dim_mem, constLayout);
-        }
-
-        GPU_DEBUG_LOG << "[" << initialconstPrimID << ": constant] layout: "
-                        << constLayout.to_short_string() << ", mem_ptr(" << mem << ", " << mem->size() << " bytes)"<< std::endl;
-        auto& stream = p.get_engine().get_service_stream();
-        cldnn::mem_lock<char> lock{mem, stream};
-        auto buf = lock.data();
-        auto bufSize = constLayout.bytes_count();
-
-        std::memcpy(&buf[0], &data[0], bufSize);
-        p.add_primitive(*op, cldnn::data(initialconstPrimID, mem));
-        p.blobMemCache[cache_key] = initialconstPrimID;
-        constPrimID = initialconstPrimID;
+    } else {
+        // To avoid zero byte memory allocation issue, reinterpret one dimension memory to zero dimension memory.
+        auto one_dim_layout = cldnn::layout(ov::PartialShape({1}), constLayout.data_type, constLayout.format);
+        auto one_dim_mem = p.get_engine().allocate_memory(one_dim_layout, false);
+        mem = p.get_engine().reinterpret_buffer(*one_dim_mem, constLayout);
     }
+
+    GPU_DEBUG_LOG << "[" << initialconstPrimID << ": constant] layout: "
+                    << constLayout.to_short_string() << ", mem_ptr(" << mem << ", " << mem->size() << " bytes)"<< std::endl;
+    auto& stream = p.get_engine().get_service_stream();
+    cldnn::mem_lock<char> lock{mem, stream};
+    auto buf = lock.data();
+    auto bufSize = constLayout.bytes_count();
+
+    std::memcpy(&buf[0], &data[0], bufSize);
+    p.add_primitive(*op, cldnn::data(initialconstPrimID, mem));
+    constPrimID = initialconstPrimID;
 }
 
 static bool is_btiwise(Node* node) {

@@ -106,6 +106,7 @@
 #include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
 #include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp"
 #include "transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp"
+#include "transformations/common_optimizations/constants_reduce.hpp"
 #include "transformations/control_flow/unroll_tensor_iterator.hpp"
 #include "transformations/convert_pooling_to_reduce.hpp"
 #include "transformations/convert_precision.hpp"
@@ -1192,6 +1193,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         // Remove Pad in front of MaxPool if both the pads_begin and pads_end are zero.
         manager.register_pass<ov::pass::EliminatePad>();
 
+        manager.register_pass<ov::pass::ConstantsReduce>();
+
         // This is supposed to be the last pass to ensure that we don't have name collisions until
         // GPU plugin stops using friendly names for program creation
         manager.register_pass<ov::pass::ResolveNameCollisions>(true);