Skip to content

Commit 0a59cdb

Browse files
committed
rename ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS - > ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
Signed-off-by: xiping.yan <xiping.yan@intel.com>
1 parent b996dea commit 0a59cdb

File tree

5 files changed

+15
-16
lines changed

5 files changed

+15
-16
lines changed

cmake/features.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ option(ENABLE_TESTS "Enable tests build" ON)
1010
option(ENABLE_TOOLS "Enable tools build" ON)
1111
option(ENABLE_GGUF "Enable support for GGUF format" ON)
1212
option(ENABLE_XGRAMMAR "Enable support for structured output generation with xgrammar backend" ON)
13-
option(ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS "Enable offloading model weights (load/release)" OFF)
13+
option(ENABLE_DYNAMIC_WEIGHT_MANAGEMENT "Enable offloading model weights (load/release)" OFF)
1414

1515
# Disable building samples for NPM package
1616
if(CPACK_GENERATOR STREQUAL "NPM")

src/cpp/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ if(ENABLE_GGUF)
146146
target_compile_definitions(${TARGET_NAME_OBJ} PRIVATE ENABLE_GGUF)
147147
endif()
148148

149-
if(ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS)
150-
target_compile_definitions(${TARGET_NAME_OBJ} PRIVATE ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS=1)
149+
if(ENABLE_DYNAMIC_WEIGHT_MANAGEMENT)
150+
target_compile_definitions(${TARGET_NAME_OBJ} PRIVATE ENABLE_DYNAMIC_WEIGHT_MANAGEMENT=1)
151151
endif()
152152

153153
target_include_directories(${TARGET_NAME_OBJ} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}")

src/cpp/src/module_genai/modules/md_denoiser_loop/splitted_model_infer.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ CSplittedModelInfer::CSplittedModelInfer(const std::string& model_path,
1515
: m_dynamic_load_model_weights(dynamic_load_model_weights),
1616
m_is_gpu(device.find("GPU") != std::string::npos || device.find("gpu") != std::string::npos),
1717
m_properties(properties) {
18-
#ifndef ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
18+
#ifndef ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
1919
OPENVINO_ASSERT(!m_dynamic_load_model_weights,
2020
"Dynamic loading of model weights is not enabled in this build. Please set "
21-
"ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS to 1 and rebuild.");
21+
"ENABLE_DYNAMIC_WEIGHT_MANAGEMENT to 'ON' and rebuild.");
2222
#endif
2323

2424
if (m_dynamic_load_model_weights) {
@@ -89,8 +89,7 @@ void CSplittedModelInfer::get_splitted_model_paths(const std::string& model_path
8989
void CSplittedModelInfer::load_model(const std::string& model_path,
9090
const ov::AnyMap& properties,
9191
const std::string& device) {
92-
#if USE_FULL_MODEL
93-
#else
92+
#if !USE_FULL_MODEL
9493
{
9594
auto model = utils::singleton_core().read_model(m_preprocess_model_path);
9695
m_preprocess_compiled_model = utils::singleton_core().compile_model(model, device, properties);
@@ -119,7 +118,7 @@ void CSplittedModelInfer::load_model(const std::string& model_path,
119118
properties_splitted_model[ov::weights_path.name()] =
120119
std::filesystem::path(path).replace_extension(".bin").string();
121120
auto cm = utils::singleton_core().compile_model(model, m_context, properties_splitted_model);
122-
# ifdef ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
121+
# ifdef ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
123122
// Release model weights after compilation to save GPU memory. Load weights again in infer() when
124123
// weights are needed.
125124
cm.release_model_weights();
@@ -163,7 +162,7 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
163162
OPENVINO_ASSERT(num_splitted_models > 1,
164163
"Splitted models should be at least 2, but got " + std::to_string(num_splitted_models));
165164

166-
# ifdef ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
165+
# ifdef ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
167166
# if ENABLE_MULTIPLE_THREAD_LOAD_MODEL_WEIGHT
168167
std::future<bool> future_flag;
169168
if (m_dynamic_load_model_weights) {
@@ -175,7 +174,7 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
175174
m_compiled_models[0].load_model_weights();
176175
}
177176
# endif // ENABLE_MULTIPLE_THREAD_LOAD_MODEL_WEIGHT
178-
# endif // ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
177+
# endif // ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
179178

180179
// Preprocess
181180
for (const auto& input : inputs) {
@@ -209,7 +208,7 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
209208
PROFILE(pm, "splitted_model_infer_" + std::to_string(i));
210209
ov::InferRequest curInferRequest;
211210
if (m_dynamic_load_model_weights) {
212-
# ifdef ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
211+
# ifdef ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
213212
if (i + 1 < num_splitted_models) {
214213
# if ENABLE_MULTIPLE_THREAD_LOAD_MODEL_WEIGHT
215214
next_future_flag = thread_utils::load_model_weights_async(m_compiled_models[i + 1]);
@@ -222,7 +221,7 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
222221
future_flag.wait();
223222
# endif // ENABLE_MULTIPLE_THREAD_LOAD_MODEL_WEIGHT
224223
curInferRequest = m_compiled_models[i].create_infer_request();
225-
# endif // ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
224+
# endif // ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
226225
} else {
227226
curInferRequest = m_infer_requests[i];
228227
}
@@ -238,7 +237,7 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
238237
curInferRequest.infer();
239238
}
240239

241-
# ifdef ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
240+
# ifdef ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
242241
if (m_dynamic_load_model_weights) {
243242
# if ENABLE_MULTIPLE_THREAD_LOAD_MODEL_WEIGHT
244243
auto release_future =
@@ -256,7 +255,7 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
256255
# if ENABLE_MULTIPLE_THREAD_LOAD_MODEL_WEIGHT
257256
future_flag = std::move(next_future_flag);
258257
# endif
259-
# endif // ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
258+
# endif // ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
260259
}
261260

262261
GENAI_DEBUG(

src/cpp/src/module_genai/utils/thread_helper.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace ov::genai::module::thread_utils {
1616
# define ENABLE_MULTIPLE_THREAD_LOAD_MODEL_WEIGHT 0 // Current multiple threads may cause GPU crash.
1717
#endif
1818

19-
#ifdef ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
19+
#ifdef ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
2020
inline std::future<bool> load_model_weights_async(ov::CompiledModel compiled_model) {
2121
auto load_fun = [compiled_model]() mutable -> bool {
2222
PROFILE(pm, "load_model_weights async");

tests/module_genai/cpp/modules/DenoiserLoopModule.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ std::vector<DenoiserLoopTestData> denoiser_loop_test_data() {
5858
wan_data_splitted_model.splitted_model = true;
5959
datas.push_back(wan_data_splitted_model);
6060

61-
#ifdef ENABLE_DYNAMIC_LOAD_MODEL_WEIGHTS
61+
#ifdef ENABLE_DYNAMIC_WEIGHT_MANAGEMENT
6262
// Dynamic load weights for Split model
6363
DenoiserLoopTestData wan_data_dyn_weights = wan_data;
6464
wan_data_dyn_weights.splitted_model = true;

0 commit comments

Comments
 (0)