Skip to content

Commit 99dbde6

Browse files
committed
added a unit test for LLMInferenceSDPAModule for Qwen3.5
Signed-off-by: Zhang, Xiaolin <xiaolin.zhang@intel.com>
1 parent a6dccf2 commit 99dbde6

File tree

1 file changed

+200
-0
lines changed

1 file changed

+200
-0
lines changed
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
// Copyright (C) 2026 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#include "../utils/load_image.hpp"
5+
#include "../utils/model_yaml.hpp"
6+
#include "../utils/ut_modules_base.hpp"
7+
#include "../utils/utils.hpp"
8+
9+
#include "modeling/models/qwen3_5/processing_qwen3_5.hpp"
10+
11+
// Parameters for test:
12+
// string: mode ("text" or "vl")
13+
// string: device
14+
using test_params = std::tuple<std::string, std::string>;
15+
using namespace ov::genai::module;
16+
17+
// ============================================================================
18+
// Test fixture
19+
// ============================================================================
20+
21+
class LLMInferenceSDPAModuleTest : public ModuleTestBase,
22+
public ::testing::TestWithParam<test_params> {
23+
private:
24+
std::string _mode;
25+
std::string _device;
26+
27+
std::string _module_name = "sdpa_llm_infer";
28+
29+
public:
30+
static std::string get_test_case_name(const testing::TestParamInfo<test_params>& obj) {
31+
const auto& mode = std::get<0>(obj.param);
32+
const auto& device = std::get<1>(obj.param);
33+
std::string result;
34+
result += "Device_" + device;
35+
result += "_Mode_" + mode;
36+
return result;
37+
}
38+
39+
void SetUp() override {
40+
REGISTER_TEST_NAME();
41+
std::tie(_mode, _device) = GetParam();
42+
}
43+
44+
void TearDown() override {}
45+
46+
protected:
47+
// ------------------------------------------------------------------
48+
// YAML generation — configure the module for text or VL mode
49+
// ------------------------------------------------------------------
50+
std::string get_yaml_content() override {
51+
YAML::Node config;
52+
config["global_context"]["model_type"] = "qwen3_5";
53+
54+
YAML::Node pipeline_modules = config["pipeline_modules"];
55+
56+
YAML::Node llm_sdpa;
57+
llm_sdpa["type"] = "LLMInferenceSDPAModule";
58+
llm_sdpa["device"] = _device;
59+
llm_sdpa["description"] = "LLM Inference SDPA Module test.";
60+
61+
// ---- Inputs ----
62+
YAML::Node inputs;
63+
// input_ids is always required
64+
inputs.push_back(input_node("input_ids", "OVTensor"));
65+
66+
if (_mode == "vl") {
67+
inputs.push_back(input_node("visual_embeds", "OVTensor"));
68+
inputs.push_back(input_node("visual_pos_mask", "OVTensor"));
69+
inputs.push_back(input_node("grid_thw", "OVTensor"));
70+
}
71+
llm_sdpa["inputs"] = inputs;
72+
73+
// ---- Outputs ----
74+
YAML::Node outputs;
75+
outputs.push_back(output_node("generated_text", "String"));
76+
llm_sdpa["outputs"] = outputs;
77+
78+
// ---- Params ----
79+
YAML::Node params;
80+
params["model_path"] = TEST_MODEL::Qwen3_5();
81+
params["max_new_tokens"] = "16";
82+
llm_sdpa["params"] = params;
83+
84+
pipeline_modules[_module_name] = llm_sdpa;
85+
return YAML::Dump(config);
86+
}
87+
88+
// ------------------------------------------------------------------
89+
// Input preparation
90+
// ------------------------------------------------------------------
91+
ov::AnyMap prepare_inputs() override {
92+
ov::AnyMap inputs;
93+
94+
if (_mode == "text") {
95+
// Text mode: provide tokenized input_ids directly.
96+
// Token ids for a simple prompt (placeholder ids).
97+
std::vector<int64_t> token_values = {9707}; // e.g. "Hello"
98+
ov::Tensor input_ids(ov::element::i64, {1, token_values.size()});
99+
std::copy(token_values.begin(), token_values.end(), input_ids.data<int64_t>());
100+
inputs["input_ids"] = input_ids;
101+
102+
} else {
103+
// VL mode: provide input_ids with vision placeholder tokens,
104+
// plus synthetic visual embeddings.
105+
//
106+
// Sequence layout: [vision_start] [img_tok × N_vis] [vision_end] [text_tok]
107+
//
108+
// Read model config to get hidden_size and special token ids dynamically
109+
auto model_cfg = ov::genai::modeling::models::Qwen3_5Config::from_json_file(
110+
TEST_MODEL::Qwen3_5());
111+
const int64_t vision_start_id = model_cfg.vision_start_token_id;
112+
const int64_t image_token_id = model_cfg.image_token_id;
113+
const int64_t vision_end_id = model_cfg.vision_end_token_id;
114+
constexpr int64_t text_token_id = 9707;
115+
116+
// grid_thw = [1, 4, 4] with spatial_merge_size=2 gives:
117+
// n_vis = T * (H/merge) * (W/merge) = 1 * 2 * 2 = 4
118+
constexpr size_t n_vis = 4; // visual tokens count
119+
constexpr size_t seq_len = 2 + n_vis + 1; // start + vis×N + end + text
120+
const size_t hidden = static_cast<size_t>(model_cfg.text.hidden_size);
121+
122+
// input_ids: [vision_start, img, img, img, img, vision_end, text]
123+
ov::Tensor input_ids(ov::element::i64, {1, seq_len});
124+
auto* ids = input_ids.data<int64_t>();
125+
ids[0] = vision_start_id;
126+
for (size_t i = 0; i < n_vis; ++i)
127+
ids[1 + i] = image_token_id;
128+
ids[1 + n_vis] = vision_end_id;
129+
ids[2 + n_vis] = text_token_id;
130+
inputs["input_ids"] = input_ids;
131+
132+
// visual_embeds: compact visual embeddings [n_vis, hidden]
133+
// scatter_visual_embeds expects 2D [V, H] (flat across all images)
134+
ov::Tensor visual_embeds(ov::element::f32, {n_vis, hidden});
135+
std::fill_n(visual_embeds.data<float>(), n_vis * hidden, 0.01f);
136+
inputs["visual_embeds"] = visual_embeds;
137+
138+
// visual_pos_mask: boolean [1, seq_len] — true at visual token positions
139+
ov::Tensor visual_pos_mask(ov::element::boolean, {1, seq_len});
140+
auto* mask = visual_pos_mask.data<bool>();
141+
mask[0] = false; // vision_start
142+
for (size_t i = 0; i < n_vis; ++i)
143+
mask[1 + i] = true; // image tokens
144+
mask[1 + n_vis] = false; // vision_end
145+
mask[2 + n_vis] = false; // text
146+
inputs["visual_pos_mask"] = visual_pos_mask;
147+
148+
// grid_thw: [N_images, 3] — (T, H, W) for 3D MRoPE
149+
// Single image: T=1, H=4, W=4, spatial_merge_size=2 → n_vis = 1×2×2 = 4
150+
ov::Tensor grid_thw(ov::element::i64, {1, 3});
151+
auto* thw = grid_thw.data<int64_t>();
152+
thw[0] = 1; // T
153+
thw[1] = 4; // H
154+
thw[2] = 4; // W
155+
inputs["grid_thw"] = grid_thw;
156+
}
157+
return inputs;
158+
}
159+
160+
// ------------------------------------------------------------------
161+
// Output verification — print generated text to stdout
162+
// ------------------------------------------------------------------
163+
void check_outputs(ov::genai::module::ModulePipeline& pipe) override {
164+
auto generated_text = pipe.get_output("generated_text").as<std::string>();
165+
166+
if (std::getenv("VERBOSE")) {
167+
std::cout << "\n======== LLMInferenceSDPA Output ========\n"
168+
<< " Mode : " << _mode << "\n"
169+
<< " Device : " << _device << "\n"
170+
<< " Output : \"" << generated_text << "\"\n"
171+
<< "=========================================\n" << std::endl;
172+
}
173+
174+
EXPECT_FALSE(generated_text.empty()) << "Generated text should not be empty";
175+
}
176+
};
177+
178+
// ============================================================================
179+
// Test cases
180+
// ============================================================================
181+
182+
TEST_P(LLMInferenceSDPAModuleTest, ModuleTest) {
183+
run();
184+
}
185+
186+
// ============================================================================
187+
// Parameterised instantiation
188+
// ============================================================================
189+
190+
static std::vector<test_params> g_test_params = {
191+
// Text mode
192+
{"text", TEST_MODEL::get_device()},
193+
// VL mode
194+
{"vl", TEST_MODEL::get_device()},
195+
};
196+
197+
INSTANTIATE_TEST_SUITE_P(ModuleTestSuite,
198+
LLMInferenceSDPAModuleTest,
199+
::testing::ValuesIn(g_test_params),
200+
LLMInferenceSDPAModuleTest::get_test_case_name);

0 commit comments

Comments
 (0)