1+ global_context :
2+ model_type : " qwen3_5"
3+
4+ pipeline_modules :
5+ pipeline_params :
6+ type : " ParameterModule"
7+ outputs :
8+ - name : " image"
9+ type : " OVTensor"
10+ - name : " prompt"
11+ type : " String"
12+
13+ image_preprocessor :
14+ type : " ImagePreprocessModule"
15+ device : " GPU"
16+ description : " Image or Video preprocessing."
17+ inputs :
18+ - name : " image"
19+ type : " OVTensor"
20+ source : " pipeline_params.image"
21+ outputs :
22+ - name : " pixel_values"
23+ type : " OVTensor"
24+ - name : " grid_thw"
25+ type : " OVTensor"
26+ - name : " pos_embeds"
27+ type : " OVTensor"
28+ - name : " rotary_cos"
29+ type : " OVTensor"
30+ - name : " rotary_sin"
31+ type : " OVTensor"
32+ params :
33+ model_path : " ./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
34+
35+ prompt_encoder :
36+ type : " TextEncoderModule"
37+ device : " GPU"
38+ inputs :
39+ - name : " prompt"
40+ type : " String"
41+ source : " pipeline_params.prompt"
42+ - name : " grid_thw"
43+ type : " OVTensor"
44+ source : " image_preprocessor.grid_thw"
45+ outputs :
46+ - name : " input_ids"
47+ type : " OVTensor"
48+ - name : " mask"
49+ type : " OVTensor"
50+ params :
51+ model_path : " ./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
52+
53+ vision_encoder :
54+ type : " VisionEncoderModule"
55+ device : " GPU"
56+ inputs :
57+ - name : " preprocessed_image"
58+ type : " OVTensor"
59+ source : " image_preprocessor.pixel_values"
60+ - name : " grid_thw"
61+ type : " OVTensor"
62+ source : " image_preprocessor.grid_thw"
63+ - name : " pos_embeds"
64+ type : " OVTensor"
65+ source : " image_preprocessor.pos_embeds"
66+ - name : " rotary_cos"
67+ type : " OVTensor"
68+ source : " image_preprocessor.rotary_cos"
69+ - name : " rotary_sin"
70+ type : " OVTensor"
71+ source : " image_preprocessor.rotary_sin"
72+ - name : " input_ids"
73+ type : " OVTensor"
74+ source : " prompt_encoder.input_ids"
75+ - name : " attention_mask"
76+ type : " OVTensor"
77+ source : " prompt_encoder.mask"
78+ outputs :
79+ - name : " image_embedding"
80+ type : " OVTensor"
81+ - name : " visual_pos_mask"
82+ type : " OVTensor"
83+ - name : " position_ids"
84+ type : " OVTensor"
85+ - name : " rope_delta"
86+ type : " OVTensor"
87+ params :
88+ model_path : " ./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
89+ vision_start_token_id : 248053
90+
91+ llm :
92+ type : " LLMInferenceSDPAModule"
93+ device : " GPU"
94+ inputs :
95+ - name : " input_ids"
96+ type : " OVTensor"
97+ source : " prompt_encoder.input_ids"
98+ - name : " visual_embeds"
99+ type : " OVTensor"
100+ source : " vision_encoder.image_embedding"
101+ - name : " visual_pos_mask"
102+ type : " OVTensor"
103+ source : " vision_encoder.visual_pos_mask"
104+ - name : " grid_thw"
105+ type : " OVTensor"
106+ source : " image_preprocessor.grid_thw"
107+ - name : " position_ids"
108+ type : " OVTensor"
109+ source : " vision_encoder.position_ids"
110+ - name : " rope_delta"
111+ type : " OVTensor"
112+ source : " vision_encoder.rope_delta"
113+ outputs :
114+ - name : " generated_text"
115+ type : " String"
116+ params :
117+ model_path : " ./tests/module_genai/cpp/test_models/Qwen3.5-0.8B/"
118+ max_new_tokens : 512
119+
120+ pipeline_result :
121+ type : " ResultModule"
122+ description : " Collects final results and formats the output structure."
123+ inputs :
124+ - name : " generated_text"
125+ type : " String"
126+ source : " llm.generated_text"
0 commit comments