@@ -70,14 +70,15 @@ Qwen3_5PreprocessorOutput Qwen3_5Preprocessor::preprocess(const ov::Tensor &imag
7070 }
7171
7272 std::vector<float > frame;
73+ float * frame_data = frame.data ();
7374 resize_bilinear_to_chw (src_img,
7475 in_h,
7576 in_w,
7677 channels,
7778 nchw,
7879 out_h,
7980 out_w,
80- frame );
81+ frame_data );
8182
8283 const size_t frames = 1 ;
8384 size_t padded_frames = frames;
@@ -194,7 +195,8 @@ Qwen3_5PreprocessorOutput Qwen3_5Preprocessor::preprocess_video(const ov::Tensor
194195 OPENVINO_ASSERT (channels == 3U , " video must have 3 channels" );
195196
196197 const size_t factor = static_cast <size_t >(m_preprocess_config.patch_size * m_preprocess_config.merge_size );
197-
198+
199+ ov::Tensor resized_video;
198200 if (m_preprocess_config.do_resize ) {
199201 auto resized_size = qwen3vl_utils::smart_resize (frame_num,
200202 in_h,
@@ -206,11 +208,15 @@ Qwen3_5PreprocessorOutput Qwen3_5Preprocessor::preprocess_video(const ov::Tensor
206208 if (resized_size.height % m_preprocess_config.patch_size != 0 || resized_size.width % m_preprocess_config.patch_size != 0 ) {
207209 OPENVINO_THROW (" Resized image must be divisible by patch_size" );
208210 }
209- }
210-
211-
212211
212+ resized_video = resize (video, resized_size);
213+ }
214+ else {
215+ resized_video = video;
216+ }
213217
218+ // rescale_and_normalize
219+ OPENVINO_THROW (" Video preprocessing is not implemented yet" );
214220 return {};
215221}
216222
@@ -284,15 +290,64 @@ std::pair<size_t, size_t> Qwen3_5Preprocessor::smart_resize(size_t height,
284290 return {h_bar, w_bar};
285291}
286292
293+ ov::Tensor Qwen3_5Preprocessor::resize (const ov::Tensor& src, ImageSize dst_size) {
294+ if (src.get_element_type () != ov::element::u8 ) {
295+ OPENVINO_THROW (" Only uint8 source tensor is supported for resizing" );
296+ }
297+ if (src.get_shape ().size () != 4 ) {
298+ OPENVINO_THROW (" Source tensor must have shape [B, H, W, C]" );
299+ }
300+
301+ const size_t batch = src.get_shape ()[0 ];
302+ const size_t src_h = src.get_shape ()[1 ];
303+ const size_t src_w = src.get_shape ()[2 ];
304+ const size_t channels = src.get_shape ()[3 ];
305+ OPENVINO_ASSERT (channels == 3U , " Source tensor must have 3 channels" );
306+
307+ ov::Tensor dst (ov::element::f32 , {channels, static_cast <size_t >(dst_size.height ), static_cast <size_t >(dst_size.width )});
308+
309+ if (src_h == dst_size.height && src_w == dst_size.width ) {
310+ // No resizing needed, just convert to f32 and change layout to CHW
311+ const uint8_t * src_data = src.data <const uint8_t >();
312+ float * dst_data = dst.data <float >();
313+ for (size_t c = 0 ; c < channels; ++c) {
314+ for (size_t h = 0 ; h < src_h; ++h) {
315+ for (size_t w = 0 ; w < src_w; ++w) {
316+ size_t src_idx = (h * src_w + w) * channels + c;
317+ size_t dst_idx = (c * src_h + h) * src_w + w;
318+ dst_data[dst_idx] = static_cast <float >(src_data[src_idx]);
319+ }
320+ }
321+ }
322+ return dst;
323+ }
324+
325+ // Process each batch.
326+ for (size_t b = 0 ; b < batch; ++b) {
327+ const uint8_t * src_data = src.data <const uint8_t >() + b * src_h * src_w * channels;
328+ float * dst_data = dst.data <float >() + b * channels * dst_size.height * dst_size.width ;
329+
330+ resize_bilinear_to_chw (src_data,
331+ src_h,
332+ src_w,
333+ channels,
334+ false ,
335+ dst_size.height ,
336+ dst_size.width ,
337+ dst_data);
338+ }
339+ return dst;
340+ }
341+
287342void Qwen3_5Preprocessor::resize_bilinear_to_chw (const uint8_t *src,
288343 size_t src_h,
289344 size_t src_w,
290345 size_t channels,
291346 bool nchw,
292347 size_t dst_h,
293348 size_t dst_w,
294- std::vector< float > & dst_chw) {
295- dst_chw. assign (channels * dst_h * dst_w, 0 . 0f );
349+ float *& dst_chw) {
350+ OPENVINO_ASSERT (dst_chw != nullptr , " dst_chw pointer cannot be null " );
296351 const float scale_y = static_cast <float >(src_h) / static_cast <float >(dst_h);
297352 const float scale_x = static_cast <float >(src_w) / static_cast <float >(dst_w);
298353
0 commit comments