Skip to content

Commit fe19a77

Browse files
committed
mean, std, change to const.
Signed-off-by: xipingya <xiping.yan@intel.com>
1 parent e7cce01 commit fe19a77

File tree

2 files changed

+29
-78
lines changed

2 files changed

+29
-78
lines changed

src/cpp/src/visual_language/inputs_embedder.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -166,21 +166,13 @@ std::vector<ov::Tensor> InputsEmbedder::IInputsEmbedder::to_single_image_tensors
166166
std::vector<ov::genai::EncodedImage> InputsEmbedder::IInputsEmbedder::encode_images(const std::vector<ov::Tensor>& images, const bool& is_video) {
167167
std::vector<ov::Tensor> single_images = to_single_image_tensors(images);
168168
if (is_video) {
169-
auto t1 = std::chrono::high_resolution_clock::now();
170-
auto res = m_vision_encoder->encode_video(single_images);
171-
auto t2 = std::chrono::high_resolution_clock::now();
172-
std::cout << "encode video time: " << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count()
173-
<< std::endl;
169+
return m_vision_encoder->encode_video(single_images);
174170
}
175171

176-
auto t1 = std::chrono::high_resolution_clock::now();
177172
std::vector<EncodedImage> embeds;
178173
for (const ov::Tensor& image : single_images) {
179174
embeds.emplace_back(m_vision_encoder->encode(image));
180175
}
181-
auto t2 = std::chrono::high_resolution_clock::now();
182-
std::cout << "image video time: " << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count()
183-
<< std::endl;
184176
return embeds;
185177
}
186178

src/cpp/src/visual_language/qwen2vl/classes.cpp

Lines changed: 28 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,9 @@ const std::string NATIVE_TAG = "<|vision_start|><|image_pad|><|vision_end|>";
3131
std::pair<std::shared_ptr<ov::Model>, std::shared_ptr<ov::op::v0::Result>> patch_preprocess_branch_image(
3232
std::shared_ptr<ov::op::v0::Parameter> raw_images_1,
3333
std::shared_ptr<ov::op::v0::Parameter> resize_target_shape,
34-
std::shared_ptr<ov::op::v0::Parameter> image_mean,
35-
std::shared_ptr<ov::op::v0::Parameter> image_scale,
34+
std::shared_ptr<ov::op::v0::Constant> image_mean,
35+
std::shared_ptr<ov::op::v0::Constant> image_scale,
3636
std::shared_ptr<ov::op::v0::Parameter> broadcast_shape) {
37-
std::cout << "patch_preprocess_branch_image" << std::endl;
3837
auto raw_images_f32_1 = std::make_shared<ov::op::v0::Convert>(raw_images_1, ov::element::f32);
3938
auto img_trans_1 = std::make_shared<ov::op::v1::Transpose>(
4039
raw_images_f32_1,
@@ -55,11 +54,7 @@ std::pair<std::shared_ptr<ov::Model>, std::shared_ptr<ov::op::v0::Result>> patch
5554
auto temporal_images = std::make_shared<ov::op::v3::Broadcast>(resized_images_s_1, broadcast_shape);
5655
auto results = std::make_shared<ov::op::v0::Result>(temporal_images);
5756
return {std::make_shared<ov::Model>(results,
58-
ov::ParameterVector{raw_images_1,
59-
resize_target_shape,
60-
image_mean,
61-
image_scale,
62-
broadcast_shape},
57+
ov::ParameterVector{raw_images_1, resize_target_shape, broadcast_shape},
6358
"then_body"),
6459
results};
6560
}
@@ -69,21 +64,16 @@ std::pair<std::shared_ptr<ov::Model>, std::shared_ptr<ov::op::v0::Result>> patch
6964
std::shared_ptr<ov::op::v0::Parameter> raw_images_1,
7065
std::shared_ptr<ov::op::v0::Parameter> raw_images_2,
7166
std::shared_ptr<ov::op::v0::Parameter> resize_target_shape,
72-
std::shared_ptr<ov::op::v0::Parameter> image_mean,
73-
std::shared_ptr<ov::op::v0::Parameter> image_scale) {
74-
std::cout << "patch_preprocess_branch_video" << std::endl;
67+
std::shared_ptr<ov::op::v0::Constant> image_mean,
68+
std::shared_ptr<ov::op::v0::Constant> image_scale) {
7569
auto raw_images_f32_1 = std::make_shared<ov::op::v0::Convert>(raw_images_1, ov::element::f32);
7670
auto raw_images_f32_2 = std::make_shared<ov::op::v0::Convert>(raw_images_2, ov::element::f32);
7771
auto img_trans_1 = std::make_shared<ov::op::v1::Transpose>(
7872
raw_images_f32_1,
7973
std::make_shared<ov::op::v0::Constant>(ov::element::i32, Shape{4}, std::vector<int32_t>{0, 3, 1, 2}));
80-
img_trans_1->set_friendly_name("img_trans_1");
81-
img_trans_1->output(0).get_tensor().set_names({"img_trans_1"});
8274
auto img_trans_2 = std::make_shared<ov::op::v1::Transpose>(
8375
raw_images_f32_2,
8476
std::make_shared<ov::op::v0::Constant>(ov::element::i32, Shape{4}, std::vector<int32_t>{0, 3, 1, 2}));
85-
img_trans_2->set_friendly_name("img_trans_2");
86-
img_trans_2->output(0).get_tensor().set_names({"img_trans_2"});
8777

8878
ov::op::v0::Interpolate::Attributes attrs = {};
8979
attrs.axes = {2, 3};
@@ -109,24 +99,28 @@ std::pair<std::shared_ptr<ov::Model>, std::shared_ptr<ov::op::v0::Result>> patch
10999

110100
auto result_temperal_images = std::make_shared<ov::op::v0::Result>(temporal_images);
111101
auto result_tmp = std::make_shared<ov::op::v0::Result>(same_image);
112-
return {std::make_shared<ov::Model>(
113-
ov::ResultVector{result_temperal_images, result_tmp},
114-
ov::ParameterVector{same_image, raw_images_1, raw_images_2, resize_target_shape, image_mean, image_scale},
115-
"else_body"),
116-
result_temperal_images};
102+
return {
103+
std::make_shared<ov::Model>(ov::ResultVector{result_temperal_images, result_tmp},
104+
ov::ParameterVector{same_image, raw_images_1, raw_images_2, resize_target_shape},
105+
"else_body"),
106+
result_temperal_images};
117107
}
118108

119-
std::shared_ptr<ov::Model> patch_preprocess_into_model(std::shared_ptr<ov::Model> model_org) {
120-
std::cout << "patch_preprocess_into_model" << std::endl;
109+
std::shared_ptr<ov::Model> patch_preprocess_into_model(std::shared_ptr<ov::Model> model_org, const ProcessorConfig& config) {
110+
std::vector<float> a_image_mean(config.image_mean.begin(), config.image_mean.end());
111+
std::vector<float> a_image_std(config.image_std.begin(), config.image_std.end());
112+
for (auto& v : a_image_mean) v *= 255.0f;
113+
for (auto& v : a_image_std) v = 1.0f / (v * 255.0f);
114+
auto image_mean = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{1, config.image_mean.size(), 1, 1}, a_image_mean.data());
115+
auto image_std = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{1, config.image_mean.size(), 1, 1}, a_image_std.data());
116+
121117
auto same_image = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1});
122118
same_image->set_friendly_name("same_image");
123119
same_image->output(0).get_tensor().set_names({"same_image"});
124120

125121
auto raw_images_1 = std::make_shared<ov::op::v0::Parameter>(ov::element::u8, ov::PartialShape{-1, -1, -1, -1});
126122
auto raw_images_2 = std::make_shared<ov::op::v0::Parameter>(ov::element::u8, ov::PartialShape{-1, -1, -1, -1});
127123
auto resize_target_shape = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{2});
128-
auto image_mean = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, -1, 1, 1});
129-
auto image_scale = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, -1, 1, 1});
130124

131125
auto broadcast_shape = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{4});
132126
auto temp_shape8d = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{8});
@@ -141,39 +135,28 @@ std::shared_ptr<ov::Model> patch_preprocess_into_model(std::shared_ptr<ov::Model
141135
resize_target_shape->set_friendly_name("resize_target_shape");
142136
resize_target_shape->output(0).get_tensor().set_names({"resize_target_shape"});
143137

144-
image_mean->set_friendly_name("image_mean");
145-
image_mean->output(0).get_tensor().set_names({"image_mean"});
146-
147-
image_scale->set_friendly_name("image_scale");
148-
image_scale->output(0).get_tensor().set_names({"image_scale"});
149-
150138
broadcast_shape->set_friendly_name("broadcast_shape");
151139
broadcast_shape->output(0).get_tensor().set_names({"broadcast_shape"});
152140

153141
auto then_raw_images_1 = std::make_shared<ov::op::v0::Parameter>(ov::element::u8, ov::PartialShape{-1, -1, -1, -1});
154-
auto then_raw_images_2 = std::make_shared<ov::op::v0::Parameter>(ov::element::u8, ov::PartialShape{-1, -1, -1, -1});
155142
auto then_resize_target_shape = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{2});
156-
auto then_image_mean = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, -1, 1, 1});
157-
auto then_image_scale = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, -1, 1, 1});
158143
auto then_broadcast_shape = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{4});
159144
auto model_then = patch_preprocess_branch_image(then_raw_images_1,
160145
then_resize_target_shape,
161-
then_image_mean,
162-
then_image_scale,
146+
image_mean,
147+
image_std,
163148
then_broadcast_shape);
164149

165150
auto else_same_image = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1});
166151
auto else_raw_images_1 = std::make_shared<ov::op::v0::Parameter>(ov::element::u8, ov::PartialShape{-1, -1, -1, -1});
167152
auto else_raw_images_2 = std::make_shared<ov::op::v0::Parameter>(ov::element::u8, ov::PartialShape{-1, -1, -1, -1});
168153
auto else_resize_target_shape = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{2});
169-
auto else_image_mean = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, -1, 1, 1});
170-
auto else_image_scale = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, -1, 1, 1});
171154
auto model_else = patch_preprocess_branch_video(else_same_image,
172155
else_raw_images_1,
173156
else_raw_images_2,
174157
else_resize_target_shape,
175-
else_image_mean,
176-
else_image_scale);
158+
image_mean,
159+
image_std);
177160

178161
auto if_op = std::make_shared<ov::op::v8::If>();
179162
if_op->set_then_body(model_then.first);
@@ -183,13 +166,9 @@ std::shared_ptr<ov::Model> patch_preprocess_into_model(std::shared_ptr<ov::Model
183166
if_op->set_input(raw_images_1->output(0), nullptr, else_raw_images_1);
184167
if_op->set_input(raw_images_2->output(0), nullptr, else_raw_images_2);
185168
if_op->set_input(resize_target_shape->output(0), nullptr, else_resize_target_shape);
186-
if_op->set_input(image_mean->output(0), nullptr, else_image_mean);
187-
if_op->set_input(image_scale->output(0), nullptr, else_image_scale);
188169

189170
if_op->set_input(raw_images_1->output(0), then_raw_images_1, nullptr);
190171
if_op->set_input(resize_target_shape->output(0), then_resize_target_shape, nullptr);
191-
if_op->set_input(image_mean->output(0), then_image_mean, nullptr);
192-
if_op->set_input(image_scale->output(0), then_image_scale, nullptr);
193172
if_op->set_input(broadcast_shape->output(0), then_broadcast_shape, nullptr);
194173

195174
auto temporal_images = if_op->set_output(model_then.second, model_else.second);
@@ -223,8 +202,6 @@ std::shared_ptr<ov::Model> patch_preprocess_into_model(std::shared_ptr<ov::Model
223202
raw_images_1,
224203
raw_images_2,
225204
resize_target_shape,
226-
image_mean,
227-
image_scale,
228205
broadcast_shape,
229206
temp_shape8d,
230207
temp_shape4d,
@@ -539,8 +516,9 @@ std::unique_ptr<CircularBufferQueue<ov::InferRequest>> VisionEncoderQwen2VL::cre
539516
}
540517

541518
VisionEncoderQwen2VL::VisionEncoderQwen2VL(const std::filesystem::path& model_dir, const std::string& device, const ov::AnyMap properties) : VisionEncoder(model_dir, device, properties) {
519+
ProcessorConfig config = utils::from_any_map({}, m_processor_config);
542520
auto model_org = utils::singleton_core().read_model(model_dir / "openvino_vision_embeddings_model.xml");
543-
auto model = patch_preprocess_into_model(model_org);
521+
auto model = patch_preprocess_into_model(model_org, config);
544522
auto compiled_model = utils::singleton_core().compile_model(model, device, properties);
545523
m_ireq_queue_vision_encoder = create_ireq(compiled_model);
546524
}
@@ -552,8 +530,9 @@ VisionEncoderQwen2VL::VisionEncoderQwen2VL(const ModelsMap& models_map,
552530
const auto& vision_encoder_model = utils::get_model_weights_pair(models_map, "vision_embeddings").first;
553531
const auto& vision_encoder_weights = utils::get_model_weights_pair(models_map, "vision_embeddings").second;
554532

533+
ProcessorConfig config = utils::from_any_map({}, m_processor_config);
555534
auto model_org = utils::singleton_core().read_model(vision_encoder_model, vision_encoder_weights);
556-
auto model = patch_preprocess_into_model(model_org);
535+
auto model = patch_preprocess_into_model(model_org, config);
557536

558537
auto compiled_model = utils::singleton_core().compile_model(model, device, device_config);
559538
m_ireq_queue_vision_encoder = create_ireq(compiled_model);
@@ -644,23 +623,13 @@ std::vector<EncodedImage> VisionEncoderQwen2VL::encode_video(const std::vector<o
644623
ov::Tensor temp_shape8d(ov::element::i64, ov::Shape{8}, a_temp_shape8d);
645624
ov::Tensor temp_shape4d(ov::element::i64, ov::Shape{4}, a_temp_shape4d);
646625
ov::Tensor last_shape(ov::element::i64, ov::Shape{2}, last_output_shape);
647-
648-
std::vector<float> a_image_mean(config.image_mean.begin(), config.image_mean.end());
649-
std::vector<float> a_image_scale(config.image_std.begin(), config.image_std.end());
650-
for(auto& v : a_image_mean) v *= 255.0f;
651-
for(auto& v : a_image_scale) v = 1.0f / (v*255.0f);
652-
653-
ov::Tensor image_mean(ov::element::f32, ov::Shape{1,a_image_mean.size(),1,1}, a_image_mean.data());
654-
ov::Tensor image_scale(ov::element::f32, ov::Shape{1,a_image_scale.size(),1,1}, a_image_scale.data());
655-
626+
656627
ov::Tensor same_image(ov::element::f32, ov::Shape{1}, std::vector<float>{0}.data());
657628

658629
encoder.set_tensor("same_image", same_image);
659630
encoder.set_tensor("raw_images_1", raw_image_1);
660631
encoder.set_tensor("raw_images_2", raw_image_2);
661632
encoder.set_tensor("resize_target_shape", target_shape);
662-
encoder.set_tensor("image_mean", image_mean);
663-
encoder.set_tensor("image_scale", image_scale);
664633
encoder.set_tensor("broadcast_shape", broadcast_shape);
665634
encoder.set_tensor("temp_shape8d", temp_shape8d);
666635
encoder.set_tensor("temp_shape4d", temp_shape4d);
@@ -722,22 +691,12 @@ EncodedImage VisionEncoderQwen2VL::encode(const ov::Tensor& image, const ov::Any
722691
ov::Tensor temp_shape8d(ov::element::i64, ov::Shape{8}, a_temp_shape8d);
723692
ov::Tensor temp_shape4d(ov::element::i64, ov::Shape{4}, a_temp_shape4d);
724693
ov::Tensor last_shape(ov::element::i64, ov::Shape{2}, last_output_shape);
725-
726-
std::vector<float> a_image_mean(config.image_mean.begin(), config.image_mean.end());
727-
std::vector<float> a_image_scale(config.image_std.begin(), config.image_std.end());
728-
for(auto& v : a_image_mean) v *= 255.0f;
729-
for(auto& v : a_image_scale) v = 1.0f / (v*255.0f);
730-
731-
ov::Tensor image_mean(ov::element::f32, ov::Shape{1,a_image_mean.size(),1,1}, a_image_mean.data());
732-
ov::Tensor image_scale(ov::element::f32, ov::Shape{1,a_image_scale.size(),1,1}, a_image_scale.data());
733-
734694
ov::Tensor same_image(ov::element::f32, ov::Shape{1}, std::vector<float>{0}.data());
695+
735696
encoder.set_tensor("same_image", same_image);
736697
encoder.set_tensor("raw_images_1", raw_images);
737698
encoder.set_tensor("raw_images_2", raw_images);
738699
encoder.set_tensor("resize_target_shape", target_shape);
739-
encoder.set_tensor("image_mean", image_mean);
740-
encoder.set_tensor("image_scale", image_scale);
741700
encoder.set_tensor("broadcast_shape", broadcast_shape);
742701
encoder.set_tensor("temp_shape8d", temp_shape8d);
743702
encoder.set_tensor("temp_shape4d", temp_shape4d);

0 commit comments

Comments
 (0)