Skip to content

Commit 7b82fb7

Browse files
committed
fix issue.
Signed-off-by: xipingya <xiping.yan@intel.com>
1 parent f3d9e81 commit 7b82fb7

File tree

2 files changed

+59
-7
lines changed

2 files changed

+59
-7
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
global_context:
2+
model_type: "qwen3_omni"
3+
4+
pipeline_modules:
5+
pipeline_params:
6+
type: "ParameterModule"
7+
outputs:
8+
- name: "prompts"
9+
type: "String"
10+
11+
text_to_speech:
12+
type: "TextToSpeechModule"
13+
device: "CPU"
14+
inputs:
15+
- name: "text"
16+
type: "String"
17+
source: "pipeline_params.prompts"
18+
outputs:
19+
- name: "audios"
20+
type: "VecOVTensor"
21+
- name: "sample_rates"
22+
type: "VecInt"
23+
- name: "generated_texts"
24+
type: "VecString"
25+
params:
26+
config_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/config.json"
27+
tokenizer_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/"
28+
embedding_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/qwen3_omni_talker_embedding_model.xml"
29+
prefill_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/qwen3_omni_talker_prefill_model.xml"
30+
decode_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/qwen3_omni_talker_decode_model.xml"
31+
codec_embedding_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/qwen3_omni_talker_codec_embedding_model.xml"
32+
code_predictor_ar_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4"
33+
code_predictor_single_codec_embed_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4"
34+
code_predictor_single_codec_embedding_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/qwen3_omni_code_predictor_codec_embed_model.xml"
35+
speech_decoder_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/qwen3_omni_speech_decoder_model.xml"
36+
sample_codec_token_greedy_search: true
37+
merge_ar_and_sce_ov_models: true
38+
39+
pipeline_result:
40+
type: "ResultModule"
41+
description: "Collects final results and formats the output structure."
42+
inputs:
43+
- name: "audios"
44+
type: "VecOVTensor"
45+
source: "text_to_speech.audios"
46+
- name: "sample_rates"
47+
type: "VecInt"
48+
source: "text_to_speech.sample_rates"
49+
- name: "generated_texts"
50+
type: "VecString"
51+
source: "text_to_speech.generated_texts"

src/cpp/src/module_genai/modules/md_text_to_speech/models/qwen3_omni.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ void TextToSpeechImpl_Qwen3Omni::load_code_predictor_models(const ov::AnyMap& tt
203203
}
204204
OPENVINO_ASSERT(!m_code_predictor_single_codec_embed_infers.empty(), "No single-codec-embed step models found in " + sce_dir.string());
205205

206-
if (m_sample_codec_token_greedy_search) {
206+
if (m_sample_codec_token_greedy_search && m_merge_ar_and_sce_ov_models) {
207207
GENAI_INFO("TextToSpeechModule[" + module_desc->name +
208208
"]: sample_codec_token_greedy_search is enabled, will use greedy decoding in sample_codec_token");
209209
merge_code_predictor_ov_models(ar_models, sce_models);
@@ -361,7 +361,7 @@ std::shared_ptr<ov::Model> merge_neighbor_models(std::shared_ptr<ov::Model>& mod
361361
auto model_1_output_embeddings = model_1->get_results()[0]->input_value(0);
362362
auto model_1_output_layer_tokens = model_1->get_results()[1]->input_value(0);
363363

364-
// Append model_1's output Embeddings to model_1's input inputs_embeds, and then take it as model_2's inputs_embeds.
364+
// Append model_1's output embeddings (all previous steps) to inputs_embeds for model_2.
365365
auto merged_inputs_embeds = std::make_shared<ov::op::v0::Concat>(
366366
ov::OutputVector{model_1_inputs_embeds->output(0), model_1_output_embeddings},
367367
1);
@@ -370,12 +370,13 @@ std::shared_ptr<ov::Model> merge_neighbor_models(std::shared_ptr<ov::Model>& mod
370370
model_2->inputs()[1].replace(model_1_output_layer_tokens);
371371

372372
auto model_2_output_embeddings = model_2->get_results()[0]->input_value(0);
373-
auto merged_2_outputs_embeddings =
374-
std::make_shared<ov::op::v0::Concat>(ov::OutputVector{merged_inputs_embeds, model_2_output_embeddings}, 1);
375-
376-
auto merged_2_outputs_embeddings_result = std::make_shared<ov::op::v0::Result>(merged_2_outputs_embeddings);
373+
auto merged_output_embeddings = std::make_shared<ov::op::v0::Concat>(
374+
ov::OutputVector{model_1_output_embeddings, model_2_output_embeddings},
375+
1);
376+
377+
auto merged_output_embeddings_result = std::make_shared<ov::op::v0::Result>(merged_output_embeddings);
377378

378-
return std::make_shared<ov::Model>(ov::ResultVector{merged_2_outputs_embeddings_result, model_2->get_results()[1]},
379+
return std::make_shared<ov::Model>(ov::ResultVector{merged_output_embeddings_result, model_2->get_results()[1]},
379380
ov::ParameterVector{model_1_inputs_embeds, model_1_input_current_layer_tokens},
380381
"merged_model");
381382
};

0 commit comments

Comments
 (0)