Skip to content

Commit 863ae6f

Browse files
committed
Add Qwen 3.5 cpp pipeline sample
Add Qwen 3.5 cpp pipeline sample. Signed-off-by: Ziniu Lin <ziniu.lin@intel.com>
1 parent 641aef8 commit 863ae6f

File tree

8 files changed

+191
-19
lines changed

8 files changed

+191
-19
lines changed
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
global_context:
2+
model_type: "qwen3_5"
3+
4+
pipeline_modules:
5+
pipeline_params:
6+
type: "ParameterModule"
7+
outputs:
8+
- name: "image"
9+
type: "OVTensor"
10+
- name: "prompt"
11+
type: "String"
12+
13+
image_preprocessor:
14+
type: "ImagePreprocessModule"
15+
device: "GPU"
16+
description: "Image or Video preprocessing."
17+
inputs:
18+
- name: "image"
19+
type: "OVTensor"
20+
source: "pipeline_params.image"
21+
outputs:
22+
- name: "pixel_values"
23+
type: "OVTensor"
24+
- name: "grid_thw"
25+
type: "OVTensor"
26+
- name: "pos_embeds"
27+
type: "OVTensor"
28+
- name: "rotary_cos"
29+
type: "OVTensor"
30+
- name: "rotary_sin"
31+
type: "OVTensor"
32+
params:
33+
model_path: "./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
34+
35+
prompt_encoder:
36+
type: "TextEncoderModule"
37+
device: "GPU"
38+
inputs:
39+
- name: "prompt"
40+
type: "String"
41+
source: "pipeline_params.prompt"
42+
- name: "grid_thw"
43+
type: "OVTensor"
44+
source: "image_preprocessor.grid_thw"
45+
outputs:
46+
- name: "input_ids"
47+
type: "OVTensor"
48+
- name: "mask"
49+
type: "OVTensor"
50+
params:
51+
model_path: "./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
52+
53+
vision_encoder:
54+
type: "VisionEncoderModule"
55+
device: "GPU"
56+
inputs:
57+
- name: "preprocessed_image"
58+
type: "OVTensor"
59+
source: "image_preprocessor.pixel_values"
60+
- name: "grid_thw"
61+
type: "OVTensor"
62+
source: "image_preprocessor.grid_thw"
63+
- name: "pos_embeds"
64+
type: "OVTensor"
65+
source: "image_preprocessor.pos_embeds"
66+
- name: "rotary_cos"
67+
type: "OVTensor"
68+
source: "image_preprocessor.rotary_cos"
69+
- name: "rotary_sin"
70+
type: "OVTensor"
71+
source: "image_preprocessor.rotary_sin"
72+
- name: "input_ids"
73+
type: "OVTensor"
74+
source: "prompt_encoder.input_ids"
75+
- name: "attention_mask"
76+
type: "OVTensor"
77+
source: "prompt_encoder.mask"
78+
outputs:
79+
- name: "image_embedding"
80+
type: "OVTensor"
81+
- name: "visual_pos_mask"
82+
type: "OVTensor"
83+
- name: "position_ids"
84+
type: "OVTensor"
85+
- name: "rope_delta"
86+
type: "OVTensor"
87+
params:
88+
model_path: "./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
89+
vision_start_token_id: 248053
90+
91+
llm:
92+
type: "LLMInferenceSDPAModule"
93+
device: "GPU"
94+
inputs:
95+
- name: "input_ids"
96+
type: "OVTensor"
97+
source: "prompt_encoder.input_ids"
98+
- name: "visual_embeds"
99+
type: "OVTensor"
100+
source: "vision_encoder.image_embedding"
101+
- name: "visual_pos_mask"
102+
type: "OVTensor"
103+
source: "vision_encoder.visual_pos_mask"
104+
- name: "grid_thw"
105+
type: "OVTensor"
106+
source: "image_preprocessor.grid_thw"
107+
- name: "position_ids"
108+
type: "OVTensor"
109+
source: "vision_encoder.position_ids"
110+
- name: "rope_delta"
111+
type: "OVTensor"
112+
source: "vision_encoder.rope_delta"
113+
outputs:
114+
- name: "generated_text"
115+
type: "String"
116+
params:
117+
model_path: "./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
118+
max_new_tokens: 512
119+
120+
pipeline_result:
121+
type: "ResultModule"
122+
description: "Collects final results and formats the output structure."
123+
inputs:
124+
- name: "generated_text"
125+
type: "String"
126+
source: "llm.generated_text"
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
global_context:
2+
model_type: "qwen3_5"
3+
4+
pipeline_modules:
5+
pipeline_params:
6+
type: "ParameterModule"
7+
outputs:
8+
- name: "prompt"
9+
type: "String"
10+
11+
prompt_encoder:
12+
type: "TextEncoderModule"
13+
device: "GPU"
14+
inputs:
15+
- name: "prompt"
16+
type: "String"
17+
source: "pipeline_params.prompt"
18+
outputs:
19+
- name: "input_ids"
20+
type: "OVTensor"
21+
- name: "mask"
22+
type: "OVTensor"
23+
params:
24+
model_path: "./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
25+
26+
llm:
27+
type: "LLMInferenceSDPAModule"
28+
device: "GPU"
29+
inputs:
30+
- name: "input_ids"
31+
type: "OVTensor"
32+
source: "prompt_encoder.input_ids"
33+
outputs:
34+
- name: "generated_text"
35+
type: "String"
36+
params:
37+
model_path: "./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
38+
max_new_tokens: 512
39+
40+
pipeline_result:
41+
type: "ResultModule"
42+
description: "Collects final results and formats the output structure."
43+
inputs:
44+
- name: "generated_text"
45+
type: "String"
46+
source: "llm.generated_text"

src/cpp/src/module_genai/modules/md_llm_inference_sdpa.cpp

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,9 @@ void LLMInferenceSDPAModule::run() {
461461
// Determine VL mode: all three additional inputs must be present
462462
const bool is_vl = (this->inputs.find("visual_embeds") != this->inputs.end() &&
463463
this->inputs.find("visual_pos_mask") != this->inputs.end() &&
464-
this->inputs.find("grid_thw") != this->inputs.end());
464+
this->inputs.find("grid_thw") != this->inputs.end() &&
465+
this->inputs.find("position_ids") != this->inputs.end() &&
466+
this->inputs.find("rope_delta") != this->inputs.end());
465467

466468
ov::genai::modeling::models::Qwen3_5InputPlanner planner(m_model_config);
467469

@@ -470,18 +472,13 @@ void LLMInferenceSDPAModule::run() {
470472
ov::Tensor visual_embeds = inputs["visual_embeds"].data.as<ov::Tensor>();
471473
ov::Tensor visual_pos_mask = inputs["visual_pos_mask"].data.as<ov::Tensor>();
472474
ov::Tensor grid_thw = inputs["grid_thw"].data.as<ov::Tensor>();
475+
ov::Tensor position_ids = inputs["position_ids"].data.as<ov::Tensor>();
476+
ov::Tensor rope_delta = inputs["rope_delta"].data.as<ov::Tensor>();
473477

474-
// Compute 3D MRoPE position_ids and rope_deltas from grid_thw
475-
auto plan = planner.build_plan(input_ids, &attention_mask, &grid_thw);
476-
477-
// Scatter raw visual embeddings to full sequence length
478-
auto visual_padded =
479-
ov::genai::modeling::models::Qwen3_5InputPlanner::scatter_visual_embeds(
480-
visual_embeds, plan.visual_pos_mask);
481478

482479
std::string generated_text = run_vl_decode(input_ids, attention_mask,
483-
plan.position_ids, plan.rope_deltas,
484-
visual_padded, plan.visual_pos_mask);
480+
position_ids, rope_delta,
481+
visual_embeds, visual_pos_mask);
485482
GENAI_INFO("LLM output: " + generated_text);
486483
this->outputs["generated_text"].data = generated_text;
487484
} else {

src/cpp/src/module_genai/modules/md_vision_encoder.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ VisionEncoderModule::~VisionEncoderModule() {}
9696

9797
bool VisionEncoderModule::initialize() {
9898
const auto &params = module_desc->params;
99+
VLMModelType model_type = to_vlm_model_type(module_desc->model_type);
99100
auto it_path = params.find("model_path");
100101
if (it_path == params.end()) {
101102
GENAI_ERR("VisionEncoderModule[" + module_desc->name + "]: 'model_path' not found in params");
@@ -121,16 +122,18 @@ bool VisionEncoderModule::initialize() {
121122
model = utils::singleton_core().read_model(model_path);
122123
model_path = model_path.parent_path();
123124
} else {
124-
if (!std::filesystem::exists(model_path / "openvino_vision_embeddings_merger_model.xml")) {
125+
auto model_file_path = model_path / "openvino_vision_embeddings_merger_model.xml";
126+
if (model_type == VLMModelType::QWEN3_5) {
127+
model_file_path = model_path / "qwen3_5_vision.xml";
128+
}
129+
if (!std::filesystem::exists(model_file_path)) {
125130
GENAI_ERR("VisionEncoderModule[" + module_desc->name + "]: model file not found at " +
126-
(model_path / "openvino_vision_embeddings_merger_model.xml").string());
131+
model_file_path.string());
127132
return false;
128133
}
129-
model = utils::singleton_core().read_model(
130-
model_path / "openvino_vision_embeddings_merger_model.xml");
134+
model = utils::singleton_core().read_model(model_file_path);
131135
}
132136

133-
auto model_type = to_vlm_model_type(module_desc->model_type);
134137
if (model_type == VLMModelType::QWEN2_VL || model_type == VLMModelType::QWEN2_5_VL) {
135138
utils::request_vl_sdpa_transformations(model);
136139
}

tests/module_genai/cpp/modules/ImagePreprocesModule.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ class Qwen3_5ImagePreprocessModuleTest : public ModuleTestBase, public ::testing
233233

234234
image_preprocessor["outputs"] = outputs;
235235
YAML::Node model_path;
236-
model_path["model_path"] = TEST_MODEL::Qwen3_5();
236+
model_path["model_path"] = TEST_MODEL::Qwen3_5_0_8B();
237237
image_preprocessor["params"] = model_path;
238238
pipeline_modules["image_preprocessor"] = image_preprocessor;
239239

tests/module_genai/cpp/modules/TextEncoderModule.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ class Qwen3_5TextEncoderModuleTest : public ModuleTestBase, public ::testing::Te
189189
cur_node["outputs"].push_back(output_node("rotary_cos", to_string(DataType::OVTensor)));
190190
cur_node["outputs"].push_back(output_node("rotary_sin", to_string(DataType::OVTensor)));
191191
cur_node["params"] = YAML::Node();
192-
cur_node["params"]["model_path"] = TEST_MODEL::Qwen3_5();
192+
cur_node["params"]["model_path"] = TEST_MODEL::Qwen3_5_0_8B();
193193
pipeline_modules[image_preprocessor_name] = cur_node;
194194
}
195195

tests/module_genai/cpp/modules/VisionEncoderModule.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ class Qwen3_5VisionEncoderModuleTest
279279
cur_node["outputs"].push_back(output_node("position_ids", to_string(DataType::OVTensor)));
280280
cur_node["outputs"].push_back(output_node("rope_delta", to_string(DataType::OVTensor)));
281281
cur_node["params"] = YAML::Node();
282-
cur_node["params"]["model_path"] = TEST_MODEL::Qwen3_5() + "qwen3_5_vision.xml";
282+
cur_node["params"]["model_path"] = TEST_MODEL::Qwen3_5_0_8B() + "qwen3_5_vision.xml";
283283
cur_node["params"]["vision_start_token_id"] = 248053;
284284
pipeline_modules[vision_encoder_name] = cur_node;
285285
}

tests/module_genai/cpp/utils/model_yaml.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ std::string Wan_2_1() {
3030
}
3131

3232
std::string Qwen3_5() {
33-
return get_model_path() + "/Qwen3.5-0.8B/";
33+
return get_model_path() + "/Qwen3.5-35B-A3B-Base_VL_OV_IR/";
3434
}
3535

3636
std::string Qwen3_5_0_8B() {

0 commit comments

Comments
 (0)