@@ -203,7 +203,7 @@ void TextToSpeechImpl_Qwen3Omni::load_code_predictor_models(const ov::AnyMap& tt
203203 }
204204 OPENVINO_ASSERT (!m_code_predictor_single_codec_embed_infers.empty (), " No single-codec-embed step models found in " + sce_dir.string ());
205205
206- if (m_sample_codec_token_greedy_search) {
206+ if (m_sample_codec_token_greedy_search && m_merge_ar_and_sce_ov_models ) {
207207 GENAI_INFO (" TextToSpeechModule[" + module_desc->name +
208208 " ]: sample_codec_token_greedy_search is enabled, will use greedy decoding in sample_codec_token" );
209209 merge_code_predictor_ov_models (ar_models, sce_models);
@@ -361,7 +361,7 @@ std::shared_ptr<ov::Model> merge_neighbor_models(std::shared_ptr<ov::Model>& mod
361361 auto model_1_output_embeddings = model_1->get_results ()[0 ]->input_value (0 );
362362 auto model_1_output_layer_tokens = model_1->get_results ()[1 ]->input_value (0 );
363363
364- // Append model_1's output Embeddings to model_1's input inputs_embeds, and then take it as model_2's inputs_embeds .
364+ // Append model_1's output embeddings (all previous steps) to inputs_embeds for model_2.
365365 auto merged_inputs_embeds = std::make_shared<ov::op::v0::Concat>(
366366 ov::OutputVector{model_1_inputs_embeds->output (0 ), model_1_output_embeddings},
367367 1 );
@@ -370,12 +370,13 @@ std::shared_ptr<ov::Model> merge_neighbor_models(std::shared_ptr<ov::Model>& mod
370370 model_2->inputs ()[1 ].replace (model_1_output_layer_tokens);
371371
372372 auto model_2_output_embeddings = model_2->get_results ()[0 ]->input_value (0 );
373- auto merged_2_outputs_embeddings =
374- std::make_shared<ov::op::v0::Concat>(ov::OutputVector{merged_inputs_embeds, model_2_output_embeddings}, 1 );
375-
376- auto merged_2_outputs_embeddings_result = std::make_shared<ov::op::v0::Result>(merged_2_outputs_embeddings);
373+ auto merged_output_embeddings = std::make_shared<ov::op::v0::Concat>(
374+ ov::OutputVector{model_1_output_embeddings, model_2_output_embeddings},
375+ 1 );
376+
377+ auto merged_output_embeddings_result = std::make_shared<ov::op::v0::Result>(merged_output_embeddings);
377378
378- return std::make_shared<ov::Model>(ov::ResultVector{merged_2_outputs_embeddings_result , model_2->get_results ()[1 ]},
379+ return std::make_shared<ov::Model>(ov::ResultVector{merged_output_embeddings_result , model_2->get_results ()[1 ]},
379380 ov::ParameterVector{model_1_inputs_embeds, model_1_input_current_layer_tokens},
380381 " merged_model" );
381382};
0 commit comments