Skip to content

Commit 623e9e8

Browse files
authored
add profiling (#122)
Signed-off-by: xiping.yan <xiping.yan@intel.com>
1 parent b7b5982 commit 623e9e8

File tree

6 files changed

+83
-22
lines changed

6 files changed

+83
-22
lines changed

src/cpp/src/module_genai/modules/autoencoder_kl_wan.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,10 @@ void AutoencoderKLWan::warmup(size_t num_frames) {
157157

158158
// Run inference to trigger JIT compilation
159159
m_decoder_request.set_input_tensor(dummy);
160-
m_decoder_request.infer();
160+
{
161+
PROFILE(pm, "AutoencoderKLWan::Warmup infer");
162+
m_decoder_request.infer();
163+
}
161164

162165
auto warmup_end = std::chrono::high_resolution_clock::now();
163166
double warmup_time_ms = std::chrono::duration<double, std::milli>(warmup_end - warmup_start).count();
@@ -245,7 +248,7 @@ ov::Tensor AutoencoderKLWan::decode(ov::Tensor latents) {
245248
ov::Tensor AutoencoderKLWan::decode_single(ov::Tensor latents) {
246249
m_decoder_request.set_input_tensor(latents);
247250
{
248-
PROFILE(pm, "vae_decoder infer");
251+
PROFILE(pm, "AutoencoderKLWan::decode_single infer");
249252
m_decoder_request.infer();
250253
}
251254
ov::Tensor output = m_decoder_request.get_output_tensor();

src/cpp/src/module_genai/modules/md_denoiser_loop/class.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -287,16 +287,17 @@ ov::Tensor DenoiserLoopModule::run(
287287
}
288288

289289
if (m_splitted_model) {
290-
PROFILE(pm, "splitted_model_infer");
291290
ov::AnyMap splitted_model_inputs = {{"hidden_states", input_hidden_states},
292291
{"timestep", input_timestep},
293292
{"encoder_hidden_states", input_encoder_hidden_states}};
293+
PROFILE(pm, "DenoiserLoopModule::run m_splitted_model_infer infer");
294294
m_splitted_model_infer->infer(splitted_model_inputs);
295295
} else {
296296
m_request.set_tensor("hidden_states", input_hidden_states);
297297
m_request.set_tensor("timestep", input_timestep);
298298
m_request.set_tensor("encoder_hidden_states", input_encoder_hidden_states);
299-
PROFILE(pm, "infer");
299+
300+
PROFILE(pm, "DenoiserLoopModule::run infer");
300301
m_request.infer();
301302
}
302303

@@ -367,34 +368,40 @@ ov::Tensor DenoiserLoopModule::run(
367368
}
368369

369370
if (m_splitted_model) {
370-
PROFILE(pm, "splitted_model_infer");
371371
ov::AnyMap splitted_model_inputs = {{"hidden_states", latents},
372372
{"timestep", timestep},
373373
{"encoder_hidden_states", prompt_tensor}};
374374
m_splitted_model_infer->set_output_tensor(0, noise_pred);
375+
376+
PROFILE(pm, "DenoiserLoopModule::run m_splitted_model_infer infer");
375377
m_splitted_model_infer->infer(splitted_model_inputs);
376378
} else {
377379
m_request.set_tensor("hidden_states", latents);
378380
m_request.set_tensor("timestep", timestep);
379381
m_request.set_tensor("encoder_hidden_states", prompt_tensor);
380382
m_request.set_output_tensor(0, noise_pred);
383+
384+
PROFILE(pm, "DenoiserLoopModule::run m_request infer");
381385
m_request.infer();
382386
}
383387

384388
if (guidance_scale > 1.0f && negative_prompt_tensor.has_value()) {
385389
if (m_splitted_model) {
386-
PROFILE(pm, "splitted_model_infer_uncond");
387390
ov::AnyMap splitted_model_inputs = {{"hidden_states", latents},
388391
{"timestep", timestep},
389392
{"encoder_hidden_states", negative_prompt_tensor.value()}};
390393
m_splitted_model_infer->set_output_tensor(0, noise_uncond);
394+
395+
PROFILE(pm, "DenoiserLoopModule::run m_splitted_model_infer infer");
391396
m_splitted_model_infer->infer(splitted_model_inputs);
392397
}
393398
else {
394399
m_request.set_tensor("hidden_states", latents);
395400
m_request.set_tensor("timestep", timestep);
396401
m_request.set_tensor("encoder_hidden_states", negative_prompt_tensor.value());
397402
m_request.set_output_tensor(0, noise_uncond);
403+
404+
PROFILE(pm, "DenoiserLoopModule::run m_request infer_uncond");
398405
m_request.infer();
399406
}
400407

src/cpp/src/module_genai/modules/md_denoiser_loop/splitted_model_infer.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,10 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
156156
m_full_infer_request.set_tensor(input.first, input.second.as<ov::Tensor>());
157157
}
158158

159-
m_full_infer_request.infer();
159+
{
160+
PROFILE(pm, "CSplittedModelInfer::infer m_full_infer_request");
161+
m_full_infer_request.infer();
162+
}
160163
#else
161164
int num_splitted_models = static_cast<int>(m_compiled_models.size());
162165
OPENVINO_ASSERT(num_splitted_models > 1,
@@ -180,7 +183,11 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
180183
for (const auto& input : inputs) {
181184
m_preprocess_infer_request.set_tensor(input.first, input.second.as<ov::Tensor>());
182185
}
183-
m_preprocess_infer_request.infer();
186+
187+
{
188+
PROFILE(pm, "CSplittedModelInfer::infer m_preprocess_infer_request");
189+
m_preprocess_infer_request.infer();
190+
}
184191

185192
// The "tokens" tensor produced by the preprocess stage is used as the initial hidden_states.
186193
ov::Tensor hidden_states_tensor = m_preprocess_infer_request.get_tensor("tokens");
@@ -233,7 +240,7 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
233240
curInferRequest.set_tensor("rotary_cos", rotary_cos_tensor);
234241
curInferRequest.set_tensor("rotary_sin", rotary_sin_tensor);
235242
{
236-
PROFILE(pmi, "infer");
243+
PROFILE(pmi, "CSplittedModelInfer::infer curInferRequest");
237244
curInferRequest.infer();
238245
}
239246

@@ -268,7 +275,10 @@ void CSplittedModelInfer::infer(const ov::AnyMap& inputs) {
268275
m_postprocess_infer_request.set_tensor("ppf", ppf_tensor);
269276
m_postprocess_infer_request.set_tensor("pph", pph_tensor);
270277
m_postprocess_infer_request.set_tensor("ppw", ppw_tensor);
271-
m_postprocess_infer_request.infer();
278+
{
279+
PROFILE(pm, "CSplittedModelInfer::infer m_postprocess_infer_request");
280+
m_postprocess_infer_request.infer();
281+
}
272282
#endif
273283
}
274284

src/cpp/src/module_genai/modules/md_llm_inference_sdpa.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "module_genai/utils/com_utils.hpp"
2828
#include "modeling/models/qwen3_vl/processing_qwen3_vl.hpp"
2929
#include "modeling/models/qwen3_omni/processing_qwen3_omni.hpp"
30+
#include "module_genai/utils/profiler.hpp"
3031

3132
namespace ov {
3233
namespace genai {
@@ -357,7 +358,10 @@ std::string LLMInferenceSDPAModule::run_text_decode(const ov::Tensor& input_ids,
357358
}
358359

359360
const auto t_prefill0 = std::chrono::steady_clock::now();
360-
text_req.infer();
361+
{
362+
PROFILE(pm, "LLMInferenceSDPAModule::run_text_decode prefill infer");
363+
text_req.infer();
364+
}
361365
const auto t_prefill1 = std::chrono::steady_clock::now();
362366
int64_t next_id = argmax_last(text_req.get_tensor(TIO::kLogits));
363367

@@ -396,7 +400,11 @@ std::string LLMInferenceSDPAModule::run_text_decode(const ov::Tensor& input_ids,
396400
text_req.set_tensor(TIO::kVisualPosMask, dec_vis_mask);
397401
}
398402

399-
text_req.infer();
403+
{
404+
PROFILE(pm, "LLMInferenceSDPAModule::run_text_decode step infer");
405+
text_req.infer();
406+
}
407+
400408
next_id = argmax_last(text_req.get_tensor(TIO::kLogits));
401409
generated.push_back(next_id);
402410
++decode_steps;
@@ -481,7 +489,10 @@ std::string LLMInferenceSDPAModule::run_vl_decode(const ov::Tensor& input_ids,
481489
}
482490

483491
const auto t_prefill0 = std::chrono::steady_clock::now();
484-
text_req.infer();
492+
{
493+
PROFILE(pm, "LLMInferenceSDPAModule::run_vl_decode prefill infer");
494+
text_req.infer();
495+
}
485496
const auto t_prefill1 = std::chrono::steady_clock::now();
486497
int64_t next_id = argmax_last(text_req.get_tensor(TIO::kLogits));
487498

@@ -540,7 +551,10 @@ std::string LLMInferenceSDPAModule::run_vl_decode(const ov::Tensor& input_ids,
540551
text_req.set_tensor(TIO::kVisualEmbeds, dec_vis);
541552
text_req.set_tensor(TIO::kVisualPosMask, dec_vis_mask);
542553

543-
text_req.infer();
554+
{
555+
PROFILE(pm, "LLMInferenceSDPAModule::run_vl_decode step infer");
556+
text_req.infer();
557+
}
544558
next_id = argmax_last(text_req.get_tensor(TIO::kLogits));
545559
generated.push_back(next_id);
546560
++decode_steps;
@@ -629,7 +643,10 @@ std::string LLMInferenceSDPAModule::run_vl_decode(const ov::Tensor& input_ids,
629643
}
630644

631645
const auto t_prefill0 = std::chrono::steady_clock::now();
632-
text_req.infer();
646+
{
647+
PROFILE(pm, "LLMInferenceSDPAModule::run_text_decode prefill infer");
648+
text_req.infer();
649+
}
633650
const auto t_prefill1 = std::chrono::steady_clock::now();
634651
int64_t next_id = argmax_last(text_req.get_tensor(TIO::kLogits));
635652

@@ -682,7 +699,11 @@ std::string LLMInferenceSDPAModule::run_vl_decode(const ov::Tensor& input_ids,
682699
text_req.set_tensor(name, decode_deepstack[i]);
683700
}
684701

685-
text_req.infer();
702+
{
703+
PROFILE(pm, "LLMInferenceSDPAModule::run_qwen3_omni_decode step infer");
704+
text_req.infer();
705+
}
706+
686707
next_id = argmax_last(text_req.get_tensor(TIO::kLogits));
687708
generated.push_back(next_id);
688709
++decode_steps;

src/cpp/src/module_genai/modules/md_vision_encoder.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "visual_language/vision_encoder.hpp"
2424
#include "visual_language/vl_sdpa_transformations.hpp"
2525
#include "models/qwen3_omni/qwen3_omni_config.hpp"
26+
#include "module_genai/utils/profiler.hpp"
2627

2728

2829
namespace ov {
@@ -377,7 +378,11 @@ std::pair<ov::Tensor, ov::Tensor> VisionEncoderModule::embed(const EncodedImage
377378
}
378379
vision_embeddings_merger.set_tensor("rotary_pos_emb", rotary_pos_emb);
379380
vision_embeddings_merger.set_tensor("window_index", window_index);
380-
vision_embeddings_merger.infer();
381+
{
382+
PROFILE(pm, "VisionEncoderModule::embed vision_embeddings_merger infer");
383+
vision_embeddings_merger.infer();
384+
}
385+
381386
ov::Tensor processed_vision_embeds = vision_embeddings_merger.get_output_tensor();
382387

383388
auto out_vision_shape = processed_vision_embeds.get_shape();
@@ -440,8 +445,12 @@ Qwen3_5VisionEmbeddingResult VisionEncoderModule::embed(
440445
if (model_type == VLMModelType::QWEN3_OMNI) {
441446
vision_embed_request.set_tensor("attention_mask", build_vision_attention_mask(grid_thw));
442447
}
443-
444-
vision_embed_request.infer();
448+
449+
{
450+
PROFILE(pm, "VisionEncoderModule::embed vision_embed_request infer");
451+
vision_embed_request.infer();
452+
}
453+
445454
ov::Tensor vision_embeds = vision_embed_request.get_tensor("visual_embeds");
446455

447456
const auto &ids_shape = input_ids.get_shape();
@@ -651,7 +660,11 @@ Qwen3OmniVisionEmbeddingResult VisionEncoderModule::embed(
651660
vision_embed_request.set_tensor("rotary_sin", vision_input.value().rotary_sin);
652661
vision_embed_request.set_tensor("attention_mask", build_vision_attention_mask(vision_input.value().grid_thw));
653662

654-
vision_embed_request.infer();
663+
{
664+
PROFILE(pm, "VisionEncoderModule::embed vision_embed_request infer");
665+
vision_embed_request.infer();
666+
}
667+
655668
vision_embeds = vision_embed_request.get_tensor("visual_embeds");
656669
grid_thw = vision_input.value().grid_thw;
657670

src/cpp/src/module_genai/modules/unipc_multistep_scheduler.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "openvino/op/strided_slice.hpp"
2525
#include "utils.hpp"
2626
#include "module_genai/utils/tensor_utils.hpp"
27+
#include "module_genai/utils/profiler.hpp"
2728

2829

2930
namespace ov::genai::module {
@@ -1085,7 +1086,10 @@ ov::Tensor UniPCMultistepScheduler::multistep_uni_c_bh_update(
10851086
} else {
10861087
m_c_solver.set_input_tensor(0, R);
10871088
m_c_solver.set_input_tensor(1, b);
1088-
m_c_solver.infer();
1089+
{
1090+
PROFILE(pm, "UniPCMultistepScheduler::multistep_uni_p_bh_update m_c_solver infer");
1091+
m_c_solver.infer();
1092+
}
10891093
rhos_c = m_c_solver.get_output_tensor(0);
10901094
}
10911095

@@ -1226,7 +1230,10 @@ ov::Tensor UniPCMultistepScheduler::multistep_uni_p_bh_update(
12261230
} else {
12271231
m_p_solver.set_input_tensor(0, R);
12281232
m_p_solver.set_input_tensor(1, b);
1229-
m_p_solver.infer();
1233+
{
1234+
PROFILE(pm, "UniPCMultistepScheduler::multistep_uni_p_bh_update m_p_solver infer");
1235+
m_p_solver.infer();
1236+
}
12301237
rhos_p = m_p_solver.get_output_tensor(0);
12311238
}
12321239
}

0 commit comments

Comments
 (0)