-
Notifications
You must be signed in to change notification settings - Fork 369
Enable VLM lookup. #2707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable VLM lookup. #2707
Changes from all commits
db9da07
4a8901c
bbb9de3
eec1fa7
e96d490
3a7b3a5
f2fc501
5ee3df6
432de9b
04450fd
6752836
89b3422
53c26aa
3f4c58d
b402f64
9d618ef
9d2af10
d00f80c
98b351c
de46c3c
10ba409
88162d4
05a7557
ab20000
097070a
300e236
58c9458
6820fb8
70f83e0
c577083
2cf0d60
c4f85c1
a7c2d9b
9ecec20
961f3f4
0b4721a
623ded6
0988a02
42bf6d0
4309d87
4d96def
af046aa
c6550d9
fbca036
fccebd1
e31400b
fd31b9b
612478e
774248c
d5e0025
33207dd
3439216
1d95cf7
0e2ddbf
7a5f4c9
95bb668
0943c86
bfa6964
ff4902b
60aaf30
7fd1a81
e0348e3
8db3eab
1306f1e
1b2b2c4
dce056a
db1efa5
6d3ef16
aa3d7d2
8ad719a
20caac3
001f2c2
8a8e4a8
96db59d
490d6e1
52c6526
e4afba8
798367b
6fab469
327e1f7
16264af
e4af71f
1168413
27872c1
4ffa17e
79d84c0
e2dd8e1
b485321
a808c6a
7498964
34c464d
341e75e
272b0ad
3f6ea3f
75ca555
b1fccab
5ba3083
e89dbcf
2ed13af
d6f521d
4283371
0029288
6da42d4
b9ee7d4
c2b1e78
32a79d8
fe814bd
67dcdee
5cc1ecb
d10014f
25894e9
9d958c7
31a734d
7abcc46
9025af0
d3a85a7
c237463
1d86405
de8ddd8
b2ac629
b16ea2f
e7d60df
ae135d3
30b6aa1
f300e46
e3a3d12
7352267
8479454
99b125b
59d5b57
59ce498
2d25b5b
2872b9e
5796a27
33765a2
e094876
b871180
3ae64d0
eb787a0
24bc943
5bd9a8a
3778f5a
0077763
a694712
0f55024
dfdc600
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -268,6 +268,7 @@ ContinuousBatchingPipeline::IContinuousBatchingPipeline::generate( | |
| std::vector<ov::Tensor> input_embeds_list; | ||
| std::vector<ov::Tensor> token_type_ids_list; | ||
| std::vector<std::pair<ov::Tensor, std::optional<int64_t>>> position_ids_list; | ||
| std::vector<ov::Tensor> original_prompt_ids_list; | ||
|
|
||
| std::vector<VLMPerfMetrics> vlm_perf_metrics(prompts.size()); | ||
| std::vector<EncodedImage> encoded_images = {}; | ||
|
|
@@ -300,6 +301,12 @@ ContinuousBatchingPipeline::IContinuousBatchingPipeline::generate( | |
| std::string templated_history = m_tokenizer.apply_chat_template(m_history, true); | ||
|
|
||
| m_inputs_embedder->set_apply_chat_template_status(false); | ||
|
|
||
| if (sampling_params[0].is_prompt_lookup()) { | ||
| auto prompt_ids = m_inputs_embedder->encode_prompt(prompt); | ||
xipingyan marked this conversation as resolved.
Show resolved
Hide resolved
popovaan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| original_prompt_ids_list.push_back(prompt_ids); | ||
| } | ||
sunxiaoxia2022 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if (m_inputs_embedder->has_token_type_ids()) { | ||
| auto [embeds, tt_ids] = m_inputs_embedder->get_inputs_embeds_with_token_type_ids(templated_history, | ||
xipingyan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| m_history_images, | ||
|
|
@@ -340,6 +347,11 @@ ContinuousBatchingPipeline::IContinuousBatchingPipeline::generate( | |
|
|
||
| m_inputs_embedder->set_apply_chat_template_status(sampling_params[i].apply_chat_template); | ||
|
|
||
| if (sampling_params[i].is_prompt_lookup()) { | ||
| auto prompt_ids = m_inputs_embedder->encode_prompt(prompt); | ||
xipingyan marked this conversation as resolved.
Show resolved
Hide resolved
xipingyan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| original_prompt_ids_list.push_back(prompt_ids); | ||
| } | ||
|
Comment on lines
+350
to
+353
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should prompt lookup also be applied to other
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a good question. @yatarkan Currently I don't have plan to apply it to other
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's OK for now but
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hi @Wovchena @yatarkan , About start_chart and finish_chart, there is no issue. BTW, @sunxiaoxia2022 will help me to fix
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @yatarkan @Wovchena @xipingyan Hi, ChatHistory issue has been resolved, and added test case with ChatHistory in
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @yatarkan @Wovchena @xipingyan , Added python samples and accuracy test. Please take a look, thank you!
Wovchena marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if (m_inputs_embedder->has_token_type_ids()) { | ||
| auto [embeds, tt_ids] = m_inputs_embedder->get_inputs_embeds_with_token_type_ids(unified_prompt, | ||
xipingyan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| encoded_images, | ||
|
|
@@ -360,7 +372,12 @@ ContinuousBatchingPipeline::IContinuousBatchingPipeline::generate( | |
| } | ||
| } | ||
| std::vector<VLMDecodedResults> results; | ||
| std::vector<EncodedGenerationResult> encoded_results = generate(input_embeds_list, sampling_params, streamer, token_type_ids_list, position_ids_list); | ||
| std::vector<EncodedGenerationResult> encoded_results = generate(input_embeds_list, | ||
| sampling_params, | ||
| streamer, | ||
| token_type_ids_list, | ||
| position_ids_list, | ||
| original_prompt_ids_list); | ||
| for (size_t i = 0; i < prompts.size(); i++) { | ||
| auto result = encoded_results[i]; | ||
| VLMDecodedResults gen_result; | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -113,6 +113,8 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::~ContinuousBatchingImpl() { | |||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| void ContinuousBatchingPipeline::ContinuousBatchingImpl::generate_candidates_for_prompt_lookup() {} | ||||||||||||
|
||||||||||||
| void ContinuousBatchingPipeline::ContinuousBatchingImpl::generate_candidates_for_prompt_lookup() {} | |
| void ContinuousBatchingPipeline::ContinuousBatchingImpl::generate_candidates() { | |
| // Intentionally left as a no-op in the base implementation. | |
| // PromptLookupImpl overrides this method to perform candidate generation. | |
| } |
Uh oh!
There was an error while loading. Please reload this page.