tts test, update to best pipeline config.

xipingyan · xipingyan · commit 7a062039e3ae · 2026-03-25T09:14:02.000+08:00
Signed-off-by: xiping.yan &lt;xiping.yan@intel.com&gt;
diff --git a/samples/cpp/module_genai/config_yaml/Qwen3-Omni/config_prompt_tts_int4.yaml b/samples/cpp/module_genai/config_yaml/Qwen3-Omni/config_prompt_tts_int4.yaml
@@ -34,7 +34,7 @@ pipeline_modules:
       code_predictor_single_codec_embedding_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/qwen3_omni_code_predictor_codec_embed_model.xml"
       speech_decoder_model_path: "./tests/module_genai/cpp/test_models/Qwen3-Omni-4B-Instruct-multilingual-int4/qwen3_omni_speech_decoder_model.xml"
       sample_codec_token_greedy_search: true
-      merge_ar_and_sce_ov_models: false
+      merge_ar_and_sce_ov_models: true
       force_ar_model_inference_precision_f32: false
 
   pipeline_result:
diff --git a/src/cpp/src/module_genai/modules/md_text_to_speech/models/qwen3_omni.cpp b/src/cpp/src/module_genai/modules/md_text_to_speech/models/qwen3_omni.cpp
@@ -209,7 +209,7 @@ void TextToSpeechImpl_Qwen3Omni::load_code_predictor_models(const ov::AnyMap& tt
     if (m_sample_codec_token_greedy_search && m_merge_ar_and_sce_ov_models) {
         GENAI_INFO("TextToSpeechModule[" + module_desc->name +
                    "]: sample_codec_token_greedy_search is enabled, will use greedy decoding in sample_codec_token");
-        merge_code_predictor_ov_models(ar_models, sce_models);
+        merge_code_predictor_ov_models(ar_models, sce_models, tts_props);
         if (m_enable_merge_ov_models) {
             // Release original infer requests to save memory since they won't be used anymore.
             m_code_predictor_ar_infers.clear();
@@ -387,7 +387,8 @@ std::shared_ptr<ov::Model> merge_neighbor_models(std::shared_ptr<ov::Model>& mod
 };
 
 void TextToSpeechImpl_Qwen3Omni::merge_code_predictor_ov_models(std::vector<std::shared_ptr<ov::Model>>& ar_models,
-                                                                std::vector<std::shared_ptr<ov::Model>>& sce_models) {
+                                                                std::vector<std::shared_ptr<ov::Model>>& sce_models,
+                                                                const ov::AnyMap& tts_props) {
     if (ar_models.size() < 2) {
         GENAI_WARN("TextToSpeechModule[" + module_desc->name + "]: Not enough AR models to merge (found " +
                    std::to_string(ar_models.size()) + "), will skip merging and use separate AR/SCE infer requests");
diff --git a/src/cpp/src/module_genai/modules/md_text_to_speech/models/qwen3_omni.hpp b/src/cpp/src/module_genai/modules/md_text_to_speech/models/qwen3_omni.hpp
@@ -69,7 +69,8 @@ class TextToSpeechImpl_Qwen3Omni : public TextToSpeechModule {
 
     void load_code_predictor_models(const ov::AnyMap& tts_props);
     void merge_code_predictor_ov_models(std::vector<std::shared_ptr<ov::Model>>& ar_models,
-                                        std::vector<std::shared_ptr<ov::Model>>& sce_models);
+                                        std::vector<std::shared_ptr<ov::Model>>& sce_models,
+                                        const ov::AnyMap& tts_props);
     std::vector<int64_t> code_predictor_ar_infers_merged_ov(int cp_steps,
                                                             std::vector<float>& autoregressive_sequence,
                                                             size_t batch,