Skip to content

Commit e711ea5

Browse files
committed
tmp
Signed-off-by: xiping.yan <xiping.yan@intel.com>
1 parent 5428546 commit e711ea5

File tree

3 files changed

+67
-9
lines changed

3 files changed

+67
-9
lines changed

src/cpp/src/modeling/models/qwen3_5/processing_qwen3_5.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1414,6 +1414,7 @@ Qwen3_5VisionInputs Qwen3_5VisionPreprocessor::preprocess(const ov::Tensor& imag
14141414
}
14151415

14161416
std::vector<float> frame;
1417+
float* frame_ptr = frame.data();
14171418
resize_bilinear_to_chw(src_img,
14181419
in_h,
14191420
in_w,
@@ -1423,7 +1424,7 @@ Qwen3_5VisionInputs Qwen3_5VisionPreprocessor::preprocess(const ov::Tensor& imag
14231424
out_w,
14241425
preprocess_cfg_.image_mean,
14251426
preprocess_cfg_.image_std,
1426-
frame);
1427+
frame_ptr);
14271428

14281429
const size_t frames = 1;
14291430
size_t padded_frames = frames;

src/cpp/src/module_genai/modules/model/qwen3_5/qwen3_5preprocessor.cpp

Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,15 @@ Qwen3_5PreprocessorOutput Qwen3_5Preprocessor::preprocess(const ov::Tensor &imag
7070
}
7171

7272
std::vector<float> frame;
73+
float* frame_data = frame.data();
7374
resize_bilinear_to_chw(src_img,
7475
in_h,
7576
in_w,
7677
channels,
7778
nchw,
7879
out_h,
7980
out_w,
80-
frame);
81+
frame_data);
8182

8283
const size_t frames = 1;
8384
size_t padded_frames = frames;
@@ -194,7 +195,8 @@ Qwen3_5PreprocessorOutput Qwen3_5Preprocessor::preprocess_video(const ov::Tensor
194195
OPENVINO_ASSERT(channels == 3U, "video must have 3 channels");
195196

196197
const size_t factor = static_cast<size_t>(m_preprocess_config.patch_size * m_preprocess_config.merge_size);
197-
198+
199+
ov::Tensor resized_video;
198200
if (m_preprocess_config.do_resize) {
199201
auto resized_size = qwen3vl_utils::smart_resize(frame_num,
200202
in_h,
@@ -206,11 +208,15 @@ Qwen3_5PreprocessorOutput Qwen3_5Preprocessor::preprocess_video(const ov::Tensor
206208
if (resized_size.height % m_preprocess_config.patch_size != 0 || resized_size.width % m_preprocess_config.patch_size != 0) {
207209
OPENVINO_THROW("Resized image must be divisible by patch_size");
208210
}
209-
}
210-
211-
212211

212+
resized_video = resize(video, resized_size);
213+
}
214+
else {
215+
resized_video = video;
216+
}
213217

218+
// rescale_and_normalize
219+
OPENVINO_THROW("Video preprocessing is not implemented yet");
214220
return {};
215221
}
216222

@@ -284,15 +290,64 @@ std::pair<size_t, size_t> Qwen3_5Preprocessor::smart_resize(size_t height,
284290
return {h_bar, w_bar};
285291
}
286292

293+
ov::Tensor Qwen3_5Preprocessor::resize(const ov::Tensor& src, ImageSize dst_size) {
294+
if (src.get_element_type() != ov::element::u8) {
295+
OPENVINO_THROW("Only uint8 source tensor is supported for resizing");
296+
}
297+
if (src.get_shape().size() != 4) {
298+
OPENVINO_THROW("Source tensor must have shape [B, H, W, C]");
299+
}
300+
301+
const size_t batch = src.get_shape()[0];
302+
const size_t src_h = src.get_shape()[1];
303+
const size_t src_w = src.get_shape()[2];
304+
const size_t channels = src.get_shape()[3];
305+
OPENVINO_ASSERT(channels == 3U, "Source tensor must have 3 channels");
306+
307+
ov::Tensor dst(ov::element::f32, {channels, static_cast<size_t>(dst_size.height), static_cast<size_t>(dst_size.width)});
308+
309+
if (src_h == dst_size.height && src_w == dst_size.width) {
310+
// No resizing needed, just convert to f32 and change layout to CHW
311+
const uint8_t* src_data = src.data<const uint8_t>();
312+
float* dst_data = dst.data<float>();
313+
for (size_t c = 0; c < channels; ++c) {
314+
for (size_t h = 0; h < src_h; ++h) {
315+
for (size_t w = 0; w < src_w; ++w) {
316+
size_t src_idx = (h * src_w + w) * channels + c;
317+
size_t dst_idx = (c * src_h + h) * src_w + w;
318+
dst_data[dst_idx] = static_cast<float>(src_data[src_idx]);
319+
}
320+
}
321+
}
322+
return dst;
323+
}
324+
325+
// Process each batch.
326+
for (size_t b = 0; b < batch; ++b) {
327+
const uint8_t* src_data = src.data<const uint8_t>() + b * src_h * src_w * channels;
328+
float* dst_data = dst.data<float>() + b * channels * dst_size.height * dst_size.width;
329+
330+
resize_bilinear_to_chw(src_data,
331+
src_h,
332+
src_w,
333+
channels,
334+
false,
335+
dst_size.height,
336+
dst_size.width,
337+
dst_data);
338+
}
339+
return dst;
340+
}
341+
287342
void Qwen3_5Preprocessor::resize_bilinear_to_chw(const uint8_t *src,
288343
size_t src_h,
289344
size_t src_w,
290345
size_t channels,
291346
bool nchw,
292347
size_t dst_h,
293348
size_t dst_w,
294-
std::vector<float> &dst_chw) {
295-
dst_chw.assign(channels * dst_h * dst_w, 0.0f);
349+
float*& dst_chw) {
350+
OPENVINO_ASSERT(dst_chw != nullptr, "dst_chw pointer cannot be null");
296351
const float scale_y = static_cast<float>(src_h) / static_cast<float>(dst_h);
297352
const float scale_x = static_cast<float>(src_w) / static_cast<float>(dst_w);
298353

src/cpp/src/module_genai/modules/model/qwen3_5/qwen3_5preprocessor.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "openvino/runtime/tensor.hpp"
1212
#include "qwen3_5config.hpp"
1313
#include "module_genai/utils/vision_preprocess.hpp"
14+
#include "visual_language/vision_encoder.hpp"
1415

1516
namespace ov::genai::module {
1617

@@ -49,14 +50,15 @@ class Qwen3_5Preprocessor {
4950

5051
std::pair<size_t, size_t> smart_resize(size_t height, size_t width, size_t factor);
5152

53+
ov::Tensor resize(const ov::Tensor& src, ImageSize dst_size);
5254
void resize_bilinear_to_chw(const uint8_t* src,
5355
size_t src_h,
5456
size_t src_w,
5557
size_t channels,
5658
bool nchw,
5759
size_t dst_h,
5860
size_t dst_w,
59-
std::vector<float>& dst_chw);
61+
float*& dst_chw);
6062

6163
ov::Tensor build_pos_embeds(const ov::Tensor &grid_thw);
6264

0 commit comments

Comments
 (0)