Change input shape for 3D position_ids of Qwen 2.5 VL with M-RoPE (openvinotoolkit#3400)

CuriousPanCake · yatarkan · web-flow · commit 603d66f38fbf · 2026-03-12T09:34:17.000Z
The analysis has shown that the correct shape for the 3D position_ids tensor of Qwen 2.5 VL in the ContinuousBatching mode is not flattening, but [3, total_token_num]. It allows to preserve the correct 3D semantics for M-RoPE of the model. Instead of Reshaping on the transformation side, it is correct and logical to provide proper tensor from the GenAI side. - Ticket: [CVS-167316](https://jira.devtools.intel.com/browse/CVS-167316) Signed-off-by: Andrii Staikov <andrii.staikov@intel.com> --------- Co-authored-by: Yaroslav Tarkan <yaroslav.tarkan@intel.com>
diff --git a/src/cpp/src/continuous_batching/model_runner.hpp b/src/cpp/src/continuous_batching/model_runner.hpp
@@ -514,9 +514,11 @@ class ModelRunner {
         if (hidden_state_input && hidden_state_input.get_size() > 0) {
             m_request.set_tensor("hidden_states", hidden_state_input);
         }
-        if (position_ids.get_shape().size() == 3) {
-            // flatten positions ids for 3D position ids case
-            position_ids.set_shape({ov::shape_size(position_ids.get_shape())});
+        if (position_ids.get_shape().size() == 3 && position_ids.get_shape()[0] == 3 &&
+            position_ids.get_shape()[1] == 1) {
+            // M-RoPE: squeeze pseudo-batch dim [3, 1, total_token_num] -> [3, total_token_num]
+            const auto& position_ids_shape = position_ids.get_shape();
+            position_ids.set_shape({position_ids_shape[0], position_ids_shape[2]});
         }
         // typical LLM parameters
         if (!m_cached_position_ids) {
diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
@@ -637,12 +637,10 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
     }
 };
 
-// TODO: remove it when QWEN ticket-167316/GEMMA3 ticket-171180 is fixed
+// TODO: remove it when GEMMA3 ticket-171180 is fixed
 bool requires_sdpa(const std::filesystem::path& models_dir) {
     auto vlm_config = utils::from_config_json_if_exists<VLMConfig>(models_dir, "config.json");
-    return vlm_config.model_type == VLMModelType::QWEN2_VL ||
-           vlm_config.model_type == VLMModelType::QWEN2_5_VL ||
-           vlm_config.model_type == VLMModelType::GEMMA3;
+    return vlm_config.model_type == VLMModelType::GEMMA3;
 }
 
 VLMPipeline::VLMPipeline(

Original file line number	Diff line number	Diff line change
`@@ -637,12 +637,10 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{`
`637`	`637`	`}`
`638`	`638`	`};`
`639`	`639`
`640`		`-// TODO: remove it when QWEN ticket-167316/GEMMA3 ticket-171180 is fixed`
	`640`	`+// TODO: remove it when GEMMA3 ticket-171180 is fixed`
`641`	`641`	`bool requires_sdpa(const std::filesystem::path& models_dir) {`
`642`	`642`	`auto vlm_config = utils::from_config_json_if_exists<VLMConfig>(models_dir, "config.json");`
`643`		`- return vlm_config.model_type == VLMModelType::QWEN2_VL \|\|`
`644`		`- vlm_config.model_type == VLMModelType::QWEN2_5_VL \|\|`
`645`		`- vlm_config.model_type == VLMModelType::GEMMA3;`
	`643`	`+ return vlm_config.model_type == VLMModelType::GEMMA3;`
`646`	`644`	`}`
`647`	`645`
`648`	`646`	`VLMPipeline::VLMPipeline(`