-
Notifications
You must be signed in to change notification settings - Fork 2.6k
[Transformations][GPU] Constant tensor deduplication pass #29052
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 19 commits
Commits
Show all changes
24 commits
Select commit
Hold shift + click to select a range
81a251c
[GPU] Constants duplicate reduction
dnkurek b0319c0
[GPU] Add data computation hash
dnkurek 36561a8
[GPU] Add hash collision safety and size limits
dnkurek bb4e391
[GPU] Improve and fix
dnkurek 8ebcc72
Improve
dnkurek 3a6a3dd
Merge branch 'master' into constants
dnkurek a9bb558
Merge branch 'master' into constants
akladiev 2714bfc
Merge branch 'master' into constants
dnkurek 11dc772
Update commit
dnkurek 6ea44e8
Merge branch 'master' into constants
dnkurek 01fb8e6
Update transformations_pipeline.cpp
dnkurek 52a8e48
Update constants_reduce.cpp
dnkurek b1e498b
Merge branch 'master' into constants
dnkurek 42ffbc4
Update constants_reduce.hpp
dnkurek f14c431
Update constants_reduce.cpp
dnkurek 286250e
Update constants_reduce.hpp
dnkurek 68ea41b
Update constants_reduce.cpp
dnkurek 8ddc160
Update constants_reduce.hpp
dnkurek 37f289f
Update constants_reduce.cpp
dnkurek 49ff00c
Update constant.cpp
dnkurek 53f6caa
Update program_builder.hpp
dnkurek 7294f31
Update constants_reduce.hpp
dnkurek 341a535
Update constants_reduce.cpp
dnkurek a4b3b7e
Update constants_reduce.cpp
dnkurek File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
19 changes: 19 additions & 0 deletions
19
src/common/transformations/include/transformations/common_optimizations/constants_reduce.hpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
// Copyright (C) 2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "openvino/pass/matcher_pass.hpp" | ||
#include "transformations_visibility.hpp" | ||
|
||
namespace ov::pass { | ||
|
||
class TRANSFORMATIONS_API ConstantsReduce : public ov::pass::ModelPass { | ||
public: | ||
OPENVINO_MODEL_PASS_RTTI("ConstantsReduce"); | ||
ConstantsReduce() = default; | ||
bool run_on_model(const std::shared_ptr<ov::Model>& m) override; | ||
}; | ||
|
||
} // namespace ov::pass |
119 changes: 119 additions & 0 deletions
119
src/common/transformations/src/transformations/common_optimizations/constants_reduce.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
// Copyright (C) 2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "transformations/common_optimizations/constants_reduce.hpp" | ||
|
||
#include "itt.hpp" | ||
#include "openvino/op/constant.hpp" | ||
#include "openvino/util/log.hpp" | ||
|
||
#define LARGE_TENSOR_BYTE_SIZE 64 | ||
|
||
namespace ov::pass { | ||
|
||
using BlobCacheKey = std::shared_ptr<ov::Node>; | ||
|
||
struct KeyHash { | ||
std::size_t operator()(const BlobCacheKey& key) const { | ||
std::size_t hash = 0; | ||
|
||
auto node = ov::as_type_ptr<op::v0::Constant>(key); | ||
dnkurek marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
auto type = node->get_output_element_type(0); | ||
auto shape = node->get_shape(); | ||
std::size_t size = node->get_byte_size(); | ||
const char* data = node->get_data_ptr<char>(); | ||
|
||
for (auto dim : shape) { | ||
hash ^= std::hash<size_t>{}(dim); | ||
} | ||
|
||
for (std::size_t i = 0; i < size; i++) { | ||
hash ^= ((hash << 5) + hash) + data[i]; | ||
} | ||
|
||
hash ^= type.hash(); | ||
hash ^= size; | ||
|
||
return hash; | ||
} | ||
}; | ||
|
||
struct KeyEqual { | ||
bool operator()(const BlobCacheKey& lhs, const BlobCacheKey& rhs) const { | ||
auto lhs_node = ov::as_type_ptr<op::v0::Constant>(lhs); | ||
auto rhs_node = ov::as_type_ptr<op::v0::Constant>(rhs); | ||
dnkurek marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
auto lhs_type = lhs_node->get_output_element_type(0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part of code looks similar to this function Consider expert these function to tensor_util.hpp (part of dev API ) and re-use it. The Constant node can provide tensor view |
||
auto rhs_type = rhs_node->get_output_element_type(0); | ||
|
||
if (lhs_type != rhs_type) | ||
return false; | ||
|
||
auto lhs_shape = lhs_node->get_shape(); | ||
auto rhs_shape = rhs_node->get_shape(); | ||
|
||
if (lhs_shape != rhs_shape) | ||
return false; | ||
|
||
std::size_t lhs_size = lhs_node->get_byte_size(); | ||
std::size_t rhs_size = rhs_node->get_byte_size(); | ||
|
||
if (lhs_size != rhs_size) | ||
return false; | ||
|
||
// Retrieve buffer pointers | ||
const char* lhs_data = lhs_node->get_data_ptr<char>(); | ||
const char* rhs_data = rhs_node->get_data_ptr<char>(); | ||
|
||
if (lhs_data == rhs_data) | ||
dnkurek marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return true; | ||
|
||
return std::memcmp(lhs_data, rhs_data, lhs_size) == 0; | ||
} | ||
}; | ||
|
||
bool ConstantsReduce::run_on_model(const std::shared_ptr<ov::Model>& m) { | ||
MATCHER_SCOPE(ConstantsReduce); | ||
|
||
std::unordered_map<BlobCacheKey, std::shared_ptr<ov::Node>, KeyHash, KeyEqual> blobMemCache; | ||
|
||
const auto& ops = m->get_ops(); | ||
|
||
unsigned int copies = 0; | ||
|
||
for (auto& op : ops) { | ||
if (!ov::is_type<ov::op::v0::Constant>(op)) | ||
continue; | ||
|
||
auto const_node = ov::as_type_ptr<op::v0::Constant>(op); | ||
|
||
// Limit size of node reading to avoid reading large tensors | ||
if (const_node->get_byte_size() > LARGE_TENSOR_BYTE_SIZE) | ||
continue; | ||
|
||
const auto cache_key = op; | ||
auto bufIter = blobMemCache.find(cache_key); | ||
|
||
if (bufIter == blobMemCache.end()) { | ||
blobMemCache[cache_key] = op; | ||
} else { | ||
copies++; | ||
auto users = const_node->get_users(); | ||
for (auto user : users) { | ||
for (size_t i = 0; i < user->get_input_size(); i++) { | ||
if (user->input_value(i) == op->output(0)) { | ||
user->input(i).replace_source_output(blobMemCache[cache_key]); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
OPENVINO_DEBUG("Reduced ", copies, " constant node duplications from model"); | ||
|
||
// Return true if we have made any replacements | ||
return copies > 0; | ||
} | ||
|
||
} // namespace ov::pass |
111 changes: 111 additions & 0 deletions
111
src/common/transformations/tests/common_optimizations/constants_reduce.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
// Copyright (C) 2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#define _USE_MATH_DEFINES | ||
|
||
#include "transformations/common_optimizations/constants_reduce.hpp" | ||
|
||
#include <gtest/gtest.h> | ||
#include <math.h> | ||
|
||
#include <memory> | ||
|
||
#include "common_test_utils/ov_test_utils.hpp" | ||
#include "openvino/core/model.hpp" | ||
#include "openvino/opsets/opset8.hpp" | ||
#include "openvino/pass/manager.hpp" | ||
|
||
using namespace testing; | ||
using namespace ov; | ||
|
||
TEST(TransformationTests, ConstantsReduce) { | ||
auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4}); | ||
|
||
// Intentionally equal to each other | ||
auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1); | ||
auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2); | ||
|
||
auto f = std::make_shared<Model>(NodeVector{add_2}, ParameterVector{param}); | ||
|
||
pass::Manager pass_manager; | ||
pass_manager.register_pass<ov::pass::ConstantsReduce>(); | ||
pass_manager.run_passes(f); | ||
|
||
// One constant should be reduced since they are equal | ||
ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 1); | ||
} | ||
|
||
TEST(TransformationTests, ConstantsReduceChain) { | ||
auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4}); | ||
|
||
// Intentionally equal to each other | ||
auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_constant_3 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_constant_4 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
|
||
// Intentionally different | ||
auto add_constant_5 = opset8::Constant::create(element::f32, Shape{1, 4}, {2.0, 2.0, 3.0, 4.0}); | ||
auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1); | ||
auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2); | ||
auto add_3 = std::make_shared<opset8::Add>(add_2, add_constant_3); | ||
auto add_4 = std::make_shared<opset8::Add>(add_3, add_constant_4); | ||
auto add_5 = std::make_shared<opset8::Add>(add_4, add_constant_5); | ||
|
||
auto f = std::make_shared<Model>(NodeVector{add_5}, ParameterVector{param}); | ||
|
||
pass::Manager pass_manager; | ||
pass_manager.register_pass<ov::pass::ConstantsReduce>(); | ||
pass_manager.run_passes(f); | ||
|
||
// All constants should be reduced to one except the one that is different | ||
ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 2); | ||
} | ||
|
||
TEST(TransformationTests, ConstantsReduceChain2) { | ||
auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4}); | ||
|
||
// Intentionally equal to each other | ||
auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_constant_3 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_constant_4 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_constant_5 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
|
||
auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1); | ||
auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2); | ||
auto add_3 = std::make_shared<opset8::Add>(add_2, add_constant_3); | ||
auto add_4 = std::make_shared<opset8::Add>(add_3, add_constant_4); | ||
auto add_5 = std::make_shared<opset8::Add>(add_4, add_constant_5); | ||
|
||
auto f = std::make_shared<Model>(NodeVector{add_5}, ParameterVector{param}); | ||
|
||
pass::Manager pass_manager; | ||
pass_manager.register_pass<ov::pass::ConstantsReduce>(); | ||
pass_manager.run_passes(f); | ||
|
||
// All constants should be reduced to one | ||
ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 1); | ||
} | ||
|
||
TEST(TransformationTests, ConstantsReduceNeg) { | ||
auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4}); | ||
|
||
// Intentionally unequal to each other | ||
auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0}); | ||
auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.5}); | ||
auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1); | ||
auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2); | ||
|
||
auto f = std::make_shared<Model>(NodeVector{add_2}, ParameterVector{param}); | ||
|
||
pass::Manager pass_manager; | ||
pass_manager.register_pass<ov::pass::ConstantsReduce>(); | ||
pass_manager.run_passes(f); | ||
|
||
// No reduction here | ||
ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 2); | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we need to use ModelPass?
it looks like, it can be MatcherPass and we can match wrap_type inside