Skip to content

Commit 222e2cc

Browse files
WovchenaAlexKoff88
andauthored
[WWB]: align phi3_v (openvinotoolkit#1853)
Improve similarity score from 0.83 to 0.93. Ticket 162669 --------- Co-authored-by: Alexander Kozlov <kozzzloff@list.ru>
1 parent 8c0aeb1 commit 222e2cc

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

tools/who_what_benchmark/whowhatbench/visualtext_evaluator.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,19 @@ def prepare_default_data(num_samples=None):
3030
)
3131

3232

33+
def fix_phi3_v_eos_token_id(model_type, tokenizer):
34+
"""
35+
phi3_v configs aren't consistent. Override the default
36+
eos_token_id with the one from a tokenizer similar to
37+
an example in
38+
https://huggingface.co/microsoft/Phi-3.5-vision-instruct
39+
"""
40+
if 'phi3_v' == model_type:
41+
return {"eos_token_id": tokenizer.eos_token_id}
42+
else:
43+
return dict()
44+
45+
3346
@register_evaluator("visual-text")
3447
class VisualTextEvaluator(TextEvaluator):
3548
def __init__(
@@ -121,6 +134,7 @@ def default_gen_answer(
121134
inputs = preprocess_inputs(prompt, image, processor, tokenizer, config=model.config)
122135
tokens = model.generate(
123136
**inputs,
137+
**fix_phi3_v_eos_token_id(model.config.model_type, tokenizer),
124138
do_sample=False,
125139
max_new_tokens=max_new_tokens,
126140
tokenizer=tokenizer,

tools/who_what_benchmark/whowhatbench/wwb.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from whowhatbench.model_loaders import load_model
1515
from whowhatbench import EVALUATOR_REGISTRY
16+
from whowhatbench.visualtext_evaluator import fix_phi3_v_eos_token_id
1617

1718
# Configure logging
1819
logging.basicConfig(level=logging.INFO)
@@ -334,7 +335,13 @@ def genai_gen_inpainting(model, prompt, image, mask, num_inference_steps, genera
334335

335336
def genai_gen_visual_text(model, prompt, image, processor, tokenizer, max_new_tokens, crop_question):
336337
image_data = ov.Tensor(np.array(image)[None])
337-
out = model.generate(prompt, image=image_data, do_sample=False, max_new_tokens=max_new_tokens)
338+
out = model.generate(
339+
prompt,
340+
**fix_phi3_v_eos_token_id(model.config.model_type, tokenizer),
341+
image=image_data,
342+
do_sample=False,
343+
max_new_tokens=max_new_tokens
344+
)
338345
return out.texts[0]
339346

340347

0 commit comments

Comments
 (0)