Skip to content

Hetero support continuous batching #30371

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c0da1ef
hetero support remote tensor
WeldonWangwang Apr 24, 2025
33063bd
Hetero support continuous batching
WeldonWangwang Apr 29, 2025
0e6c022
clean code
WeldonWangwang Apr 30, 2025
df95fdc
Merge branch 'master' into wangwang/hetero_support_cb
WeldonWangwang May 11, 2025
940e5be
Fix submodel with paged-attention ops
WeldonWangwang May 15, 2025
82c0609
clean code
WeldonWangwang May 15, 2025
589f559
clean code
WeldonWangwang May 15, 2025
1b6f2ee
Fix build error
WeldonWangwang May 15, 2025
1b7fd80
Fix code style
WeldonWangwang May 16, 2025
0d6328e
Fix code style
WeldonWangwang May 16, 2025
284e025
Fix build error
WeldonWangwang May 16, 2025
d67fb74
Fix test cases
WeldonWangwang May 16, 2025
a5f05e7
Rename some classes
WeldonWangwang May 19, 2025
74cad18
Remove the debug log
WeldonWangwang May 26, 2025
2ca7b07
Merge branch 'master' into wangwang/hetero_support_cb
WeldonWangwang May 27, 2025
97acbdd
Get num_kv_heads from PA op
WeldonWangwang Jun 10, 2025
2dcde0f
Fix reshape ops connected to PA
WeldonWangwang Jun 12, 2025
a4b653a
Refactor the code
WeldonWangwang Jun 18, 2025
c3772d4
Fix the code style
WeldonWangwang Jun 18, 2025
c71b235
Fix the code style
WeldonWangwang Jun 18, 2025
e0d4927
Fix test cases
WeldonWangwang Jun 18, 2025
8661919
Fix test cases and build error
WeldonWangwang Jun 19, 2025
5ba6afd
Simplify compile_model API
WeldonWangwang Jun 19, 2025
ba80016
Merge branch 'master' into wangwang/hetero_support_cb
WeldonWangwang Jun 19, 2025
7b15bae
Merge branch 'master' into wangwang/hetero_support_cb
WeldonWangwang Jun 20, 2025
9b03405
Rename some variables
WeldonWangwang Jun 24, 2025
940e509
Fix code style
WeldonWangwang Jun 24, 2025
bf67c0f
Merge branch 'master' into wangwang/hetero_support_cb
wangleis Jun 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 28 additions & 76 deletions src/plugins/hetero/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,110 +17,62 @@
#include "openvino/runtime/properties.hpp"
#include "openvino/util/common_util.hpp"
#include "openvino/util/xml_parse_utils.hpp"
#include "plugin.hpp"
#include "properties.hpp"

ov::hetero::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
const std::vector<ov::hetero::SubmodelInfo>& submodels,
const SubgraphsMappingInfo& mapping_info,
const std::shared_ptr<const ov::IPlugin>& plugin,
ov::hetero::RemoteContext::Ptr context,
const Configuration& cfg)
: ov::ICompiledModel(model, plugin),
: ov::ICompiledModel(model, plugin, context),
m_cfg(cfg),
m_name(model->get_friendly_name()),
m_loaded_from_cache(false) {
m_loaded_from_cache(false),
m_mapping_info(mapping_info) {
try {
compile_model(model);
compile_model(submodels);
} catch (const std::exception& e) {
OPENVINO_THROW("Standard exception from compilation library: ", e.what());
} catch (...) {
OPENVINO_THROW("Generic exception is thrown");
}
}

void ov::hetero::CompiledModel::compile_model(const std::shared_ptr<ov::Model>& model) {
ov::SupportedOpsMap query_model_result;
bool user_set_affinities = false;
// Get user defined affinity
for (const auto& node : model->get_ordered_ops()) {
auto& node_info = node->get_rt_info();
auto it_info = node_info.find("affinity");
if (it_info != node_info.end()) {
OPENVINO_ASSERT(it_info->second.is<std::string>(), "Unexpected type of \"affinity\" attribute");
query_model_result.emplace(node->get_friendly_name(), it_info->second.as<std::string>());
user_set_affinities = true;
}
}
void ov::hetero::CompiledModel::compile_model(const std::vector<ov::hetero::SubmodelInfo>& submodels) {
const bool add_exclusive = submodels.size() > 1;
const auto& hetero_plugin = get_hetero_plugin();
const auto& core = hetero_plugin->get_core();
const auto& device_properties = m_cfg.get_device_properties();

m_compiled_submodels.clear();
m_compiled_submodels.reserve(submodels.size());

for (const auto& [device, sub_model] : submodels) {
// get meta devices properties for the target device
auto meta_devices = hetero_plugin->get_properties_per_device(device, device_properties);

auto compile_device_model = [&](CompiledModelDesc& compiled_model_desc, bool add_exclusive) {
auto meta_devices =
get_hetero_plugin()->get_properties_per_device(compiled_model_desc.device, m_cfg.get_device_properties());
// disable caching for subgraphs, because the whole HETERO model is cached
auto device_config = meta_devices[compiled_model_desc.device];
auto device_config = meta_devices.at(device);
device_config[ov::cache_dir.name()] = "";

// set exclusive_async_requests in case when model is split
if (add_exclusive) {
auto supported_internal_properties =
get_hetero_plugin()->get_core()->get_property(compiled_model_desc.device,
ov::internal::supported_properties);
auto supported_internal_properties = core->get_property(device, ov::internal::supported_properties);
if (std::find(supported_internal_properties.begin(),
supported_internal_properties.end(),
ov::internal::exclusive_async_requests) != supported_internal_properties.end()) {
// adds property if it is not set yet
device_config.insert(ov::internal::exclusive_async_requests(true));
}
}
compiled_model_desc.compiled_model = get_hetero_plugin()->get_core()->compile_model(compiled_model_desc.model,
compiled_model_desc.device,
device_config);
};

if (user_set_affinities) {
// All affinities must be defined by user
ov::hetero::SubgraphsVector ordered_subgraphs;
std::tie(ordered_subgraphs, m_mapping_info) =
get_model_subgraphs(model, query_model_result, user_set_affinities, m_cfg.dump_dot_files());

m_compiled_submodels.resize(ordered_subgraphs.size());
bool add_exclusive = ordered_subgraphs.size() > 1;
size_t id = 0;
for (const auto& subgraph : ordered_subgraphs) {
m_compiled_submodels[id].device = subgraph._affinity;
m_compiled_submodels[id].model = std::make_shared<ov::Model>(subgraph._results,
subgraph._sinks,
subgraph._parameters,
m_name + '_' + std::to_string(id));
compile_device_model(m_compiled_submodels[id], add_exclusive);
++id;
}
} else {
// Restore properties in order to pass "device priorities" together
// with devices properties
auto full_properties = m_cfg.get_hetero_properties();
for (const auto& property : m_cfg.get_device_properties())
full_properties[property.first] = property.second;

// This function modifes original model
auto cloned_model = model->clone();
std::tie(query_model_result, m_mapping_info) =
get_hetero_plugin()->query_model_update(cloned_model, full_properties, true);

ov::hetero::op::DeviceSubgraphVector ordered_subgraphs;
for (const auto& op : cloned_model->get_ordered_ops()) {
if (const auto& subgraph = ov::as_type_ptr<ov::hetero::op::DeviceSubgraph>(op)) {
ordered_subgraphs.push_back(subgraph);
} else {
OPENVINO_ASSERT(ov::op::util::is_output(op) || ov::op::util::is_parameter(op) ||
ov::op::util::is_sink(op));
}
}
m_compiled_submodels.resize(ordered_subgraphs.size());
bool add_exclusive = ordered_subgraphs.size() > 1;
size_t id = 0;
for (const auto& subgraph : ordered_subgraphs) {
m_compiled_submodels[id].device = subgraph->get_affinity();
m_compiled_submodels[id].model = subgraph->get_function();
compile_device_model(m_compiled_submodels[id], add_exclusive);
++id;
}
// compile the submodel and add to the compiled submodels list
CompiledModelDesc desc;
desc.device = device;
desc.model = sub_model;
desc.compiled_model = core->compile_model(sub_model, device, device_config);
m_compiled_submodels.emplace_back(std::move(desc));
}
set_inputs_and_outputs();
}
Expand Down
9 changes: 7 additions & 2 deletions src/plugins/hetero/src/compiled_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include "config.hpp"
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/so_ptr.hpp"
#include "plugin.hpp"
#include "remote_context.hpp"
#include "subgraph_collector.hpp"

namespace ov {
Expand All @@ -18,7 +20,10 @@ class InferRequest;
class CompiledModel : public ov::ICompiledModel {
public:
CompiledModel(const std::shared_ptr<ov::Model>& model,
const std::vector<ov::hetero::SubmodelInfo>& compiled_submodels,
const SubgraphsMappingInfo& mapping_info,
const std::shared_ptr<const ov::IPlugin>& plugin,
ov::hetero::RemoteContext::Ptr context,
const Configuration& cfg);

CompiledModel(std::istream& model,
Expand All @@ -43,7 +48,7 @@ class CompiledModel : public ov::ICompiledModel {
private:
friend class InferRequest;

void compile_model(const std::shared_ptr<ov::Model>& model);
void compile_model(const std::vector<ov::hetero::SubmodelInfo>& submodels);

std::shared_ptr<const Plugin> get_hetero_plugin() const;

Expand All @@ -66,4 +71,4 @@ class CompiledModel : public ov::ICompiledModel {
std::vector<CompiledModelDesc> m_compiled_submodels;
};
} // namespace hetero
} // namespace ov
} // namespace ov
89 changes: 87 additions & 2 deletions src/plugins/hetero/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,103 @@
#include "openvino/runtime/properties.hpp"
#include "openvino/util/common_util.hpp"
#include "properties.hpp"
#include "remote_context.hpp"

ov::hetero::Plugin::Plugin() {
set_device_name("HETERO");
}

std::pair<ov::hetero::SubgraphsMappingInfo, std::vector<ov::hetero::SubmodelInfo>> ov::hetero::Plugin::split_graph(
const std::shared_ptr<ov::Model>& model,
Configuration config) const {
std::vector<ov::hetero::SubmodelInfo> submodels;
ov::SupportedOpsMap query_model_result;
SubgraphsMappingInfo mapping_info;
const std::string model_name = model->get_friendly_name();
bool user_set_affinities = false;
// Get user defined affinity
for (const auto& node : model->get_ordered_ops()) {
const auto& rt_info = node->get_rt_info();
const auto it = rt_info.find("affinity");
if (it != rt_info.end()) {
OPENVINO_ASSERT(it->second.is<std::string>(), "Unexpected type of \"affinity\" attribute");
query_model_result.emplace(node->get_friendly_name(), it->second.as<std::string>());
user_set_affinities = true;
}
}

if (user_set_affinities) {
// All affinities must be defined by user
ov::hetero::SubgraphsVector ordered_subgraphs;
std::tie(ordered_subgraphs, mapping_info) =
get_model_subgraphs(model, query_model_result, true, m_cfg.dump_dot_files());

submodels.resize(ordered_subgraphs.size());
for (size_t i = 0; i < ordered_subgraphs.size(); ++i) {
const auto& subgraph = ordered_subgraphs[i];
submodels[i].first = subgraph._affinity;
submodels[i].second = std::make_shared<ov::Model>(subgraph._results,
subgraph._sinks,
subgraph._parameters,
model_name + "_" + std::to_string(i));
}

return {mapping_info, submodels};
}

// Restore properties in order to pass "device priorities" together
// with devices properties
auto full_properties = config.get_hetero_properties();
for (const auto& [device, props] : config.get_device_properties()) {
full_properties[device] = props;
}

auto cloned_model = model->clone();
std::tie(query_model_result, mapping_info) = query_model_update(cloned_model, full_properties, true);

ov::hetero::op::DeviceSubgraphVector ordered_subgraphs;
for (const auto& op : cloned_model->get_ordered_ops()) {
if (const auto& subgraph = ov::as_type_ptr<ov::hetero::op::DeviceSubgraph>(op)) {
ordered_subgraphs.push_back(subgraph);
} else {
OPENVINO_ASSERT(ov::op::util::is_output(op) || ov::op::util::is_parameter(op) || ov::op::util::is_sink(op),
"Unexpected node type found in model after query_model_update()");
}
}

submodels.resize(ordered_subgraphs.size());
for (size_t i = 0; i < ordered_subgraphs.size(); ++i) {
submodels[i].first = ordered_subgraphs[i]->get_affinity();
submodels[i].second = ordered_subgraphs[i]->get_function();
}

return {mapping_info, submodels};
}

std::shared_ptr<ov::ICompiledModel> ov::hetero::Plugin::compile_model(const std::shared_ptr<const ov::Model>& model,
const ov::AnyMap& properties) const {
OV_ITT_SCOPED_TASK(itt::domains::Hetero, "Plugin::compile_model");

auto config = Configuration{properties, m_cfg};
auto compiled_model = std::make_shared<CompiledModel>(model->clone(), shared_from_this(), config);
return compiled_model;
auto cloned_model = model->clone();
SubgraphsMappingInfo mapping_info;
std::vector<ov::hetero::SubmodelInfo> submodels;
std::tie(mapping_info, submodels) = split_graph(cloned_model, config);
ov::hetero::RemoteContext::Ptr remote_context;
try {
std::map<std::string, ov::SoPtr<ov::IRemoteContext>> contexts_map;
for (const auto& [device_name, _] : submodels) {
contexts_map.insert({device_name, get_core()->get_default_context(device_name)});
}
remote_context = std::make_shared<ov::hetero::RemoteContext>(std::move(contexts_map));
} catch (const ov::Exception&) {
}
return std::make_shared<CompiledModel>(cloned_model,
submodels,
mapping_info,
shared_from_this(),
remote_context,
config);
}

std::shared_ptr<ov::ICompiledModel> ov::hetero::Plugin::compile_model(
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/hetero/src/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
namespace ov {
namespace hetero {

using SubmodelInfo = std::pair<std::string, std::shared_ptr<ov::Model>>;

class CompiledModel;

class Plugin : public ov::IPlugin {
Expand Down Expand Up @@ -68,6 +70,10 @@ class Plugin : public ov::IPlugin {
const ov::AnyMap& properties,
bool allow_exception = false) const;

std::pair<ov::hetero::SubgraphsMappingInfo, std::vector<SubmodelInfo>> split_graph(
const std::shared_ptr<ov::Model>& model,
Configuration config) const;

Configuration m_cfg;

mutable size_t independent_submodel_size = 0;
Expand Down
47 changes: 47 additions & 0 deletions src/plugins/hetero/src/remote_context.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "remote_context.hpp"

#include <memory>

#include "openvino/runtime/make_tensor.hpp"
#include "remote_tensor.hpp"

namespace ov {
namespace hetero {

RemoteContext::RemoteContext(std::map<std::string, ov::SoPtr<ov::IRemoteContext>> contexts)
: m_contexts(std::move(contexts)) {
if (m_contexts.empty()) {
OPENVINO_ASSERT("HETERO RemoteContext must have at least one underlying context");
}
}
const ov::AnyMap& RemoteContext::get_property() const {
return m_contexts.begin()->second->get_property();
}

std::shared_ptr<RemoteContext> RemoteContext::get_this_shared_ptr() {
return std::static_pointer_cast<RemoteContext>(shared_from_this());
}

ov::SoPtr<ov::IRemoteTensor> RemoteContext::create_tensor(const ov::element::Type& type,
const ov::Shape& shape,
const ov::AnyMap& params) {
std::vector<ov::SoPtr<ov::IRemoteTensor>> tensors;
tensors.reserve(m_contexts.size());
for (const auto& item : m_contexts) {
tensors.emplace_back(item.second->create_tensor(type, shape, params));
}
auto remote_tensor_ptr = std::make_shared<ov::hetero::RemoteTensor>(get_this_shared_ptr(), tensors);
return ov::SoPtr<ov::IRemoteTensor>(remote_tensor_ptr);
}

const std::string& RemoteContext::get_device_name() const {
static const std::string name = "HETERO";
return name;
}

} // namespace hetero
} // namespace ov
32 changes: 32 additions & 0 deletions src/plugins/hetero/src/remote_context.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <string>

#include "openvino/runtime/iremote_context.hpp"

namespace ov {
namespace hetero {
class RemoteContext : public ov::IRemoteContext {
public:
using Ptr = std::shared_ptr<RemoteContext>;

RemoteContext(std::map<std::string, ov::SoPtr<ov::IRemoteContext>> contexts);

const std::string& get_device_name() const override;
const ov::AnyMap& get_property() const override;

ov::SoPtr<ov::IRemoteTensor> create_tensor(const ov::element::Type& type,
const ov::Shape& shape,
const ov::AnyMap& params) override;

private:
std::shared_ptr<RemoteContext> get_this_shared_ptr();
std::map<std::string, ov::SoPtr<ov::IRemoteContext>> m_contexts;
};

} // namespace hetero
} // namespace ov
Loading
Loading