Skip to content
Draft
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
e6df5a0
transformers v5.5
echarlaix Apr 15, 2026
4fcd786
update setup
echarlaix Apr 15, 2026
9b439b4
set min required transformers verion to v4.57
echarlaix Apr 15, 2026
88d4f1a
fix position ids generation
echarlaix Apr 16, 2026
c31a88b
convbert
echarlaix Apr 16, 2026
da7c410
fix position_ids generation for qwenvl models
echarlaix Apr 17, 2026
5ca59b8
check for _prepare_position_ids_for_generation
echarlaix Apr 17, 2026
661f0a5
fix qwen3vl
echarlaix Apr 17, 2026
b52550f
fix transformers version
echarlaix Apr 20, 2026
c5f9c47
transformers v5.3
echarlaix Apr 20, 2026
ee941ff
datasets test extra setup
echarlaix Apr 20, 2026
4a77bce
create attention_mask when needed as not created in generate since v5.2
echarlaix Apr 22, 2026
06187d0
fix mm_token_type_ids
echarlaix Apr 23, 2026
9857954
add torchcodec tests install
echarlaix Apr 23, 2026
95a6efd
add missing get_vision_position_ids
echarlaix Apr 24, 2026
dbc13a9
add transformers v5.3 check
echarlaix Apr 24, 2026
fd94a59
workflow
echarlaix Apr 24, 2026
43bd816
worflow
echarlaix Apr 24, 2026
31ffbb4
fix for Qwen2_5
echarlaix Apr 24, 2026
8ae40d3
revert
echarlaix Apr 24, 2026
6c05f54
disable for datasets metric test when datasets <v4
echarlaix Apr 24, 2026
c09aab1
transformers v5.4
echarlaix Apr 24, 2026
620f7a5
Merge branch 'main' into transformers-v5.5
echarlaix Apr 24, 2026
24982a5
add missing
echarlaix Apr 24, 2026
a80912e
use sdpa_mask instead of sdpa_mask_without_vmap for transformers v5 o…
echarlaix Apr 27, 2026
1652bc8
fix eager mask
echarlaix Apr 27, 2026
2699f22
ov_sdpa_mask_without_vmap
echarlaix Apr 28, 2026
d1a61da
remove inheritance to Qwen3VLVisionModel and Qwen3VLModel
echarlaix Apr 28, 2026
dbdf3af
transformers v5.3
echarlaix Apr 28, 2026
fd8d155
transformers-v5 branch
echarlaix Apr 28, 2026
cef3755
Merge branch 'main' into transformers-v5.5
echarlaix Apr 28, 2026
888bfb9
transformers v5.4
echarlaix Apr 28, 2026
60582f8
add transformers v5.3 for tests temporarily
echarlaix Apr 29, 2026
ad5aea6
use sdpa_mask for transformers v5 or higher
echarlaix Apr 29, 2026
f9788e3
remove tests for v5.4 for incompatible models
echarlaix Apr 29, 2026
ea462e4
add comments
echarlaix Apr 29, 2026
9acdc49
simplify
echarlaix Apr 29, 2026
588f89e
add comments
echarlaix Apr 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_openvino.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
"*diffusion*",
"*quantization*",
]
transformers-version: ["4.45.0", "4.57.6", "latest"]
transformers-version: ["4.57.6", "latest"]

runs-on: ubuntu-22.04

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_openvino_slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-22.04", "windows-2022"]
transformers-version: ["4.45.0", "latest"]
transformers-version: ["4.57.6", "latest"]
include:
- transformers-version: "main"
os: "ubuntu-22.04"
Expand Down
34 changes: 24 additions & 10 deletions optimum/exporters/openvino/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,19 +251,33 @@ def patch_cos_sin_cached_fp32(model):
)


def ov_sdpa_mask_without_vmap(batch_size, **kwargs) -> Optional[torch.Tensor]:
kwargs.pop("use_vmap", None)
if is_transformers_version(">=", "5.4"):
q_length = kwargs.pop("q_length", None)
if isinstance(q_length, torch.Tensor):
q_offset = kwargs.pop("q_offset", 0)
q_length = torch.arange(q_offset, q_offset + q_length, device=q_length.device)
return sdpa_mask(batch_size=batch_size, q_length=q_length, use_vmap=False, **kwargs)
else:
return sdpa_mask_without_vmap(batch_size=batch_size, **kwargs)


# Adapted from https://github.com/huggingface/transformers/blob/v4.53.0/src/transformers/masking_utils.py#L433
# Specifically for OpenVINO, we use torch.finfo(torch.float16).min instead of torch.finfo(dtype).min
def eager_mask_without_vmap(*args, **kwargs) -> Optional[torch.Tensor]:
def eager_mask_without_vmap(batch_size, **kwargs) -> Optional[torch.Tensor]:
kwargs.pop("allow_is_causal_skip", None)
dtype = kwargs.get("dtype", torch.float32)
mask = sdpa_mask_without_vmap(*args, allow_is_causal_skip=False, **kwargs)
# we use torch.finfo(torch.float16).min instead torch.finfo(dtype).min to avoid an overflow but not
# sure this is the right way to handle this, we are basically pretending that -65,504 is -inf
mask = torch.where(
mask,
torch.tensor(0.0, device=mask.device, dtype=dtype),
torch.tensor(torch.finfo(torch.float16).min, device=mask.device, dtype=dtype),
)
kwargs.pop("allow_torch_fix", None)
dtype = kwargs.pop("dtype", torch.float32)
mask = ov_sdpa_mask_without_vmap(batch_size, allow_is_causal_skip=False, allow_torch_fix=False, **kwargs)
if mask is not None:
# we use torch.finfo(torch.float16).min instead torch.finfo(dtype).min to avoid an overflow but not
# sure this is the right way to handle this, we are basically pretending that -65,504 is -inf
mask = torch.where(
mask,
torch.tensor(0.0, device=mask.device, dtype=dtype),
torch.tensor(torch.finfo(torch.float16).min, device=mask.device, dtype=dtype),
)
return mask


Expand Down
3 changes: 2 additions & 1 deletion optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,8 @@ def _update_model_kwargs_for_generation(
outputs=outputs, model_kwargs=model_kwargs, is_encoder_decoder=is_encoder_decoder, **kwargs
)

if "position_ids" in model_kwargs:
# _prepare_position_ids_for_generation will infer position ids since transformers v5.2
if "position_ids" in model_kwargs and not hasattr(self, "_prepare_position_ids_for_generation"):
position_ids = model_kwargs["position_ids"]
new_position_id = position_ids[..., -1:].clone()
new_position_id += 1
Expand Down
2 changes: 2 additions & 0 deletions optimum/intel/openvino/modeling_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,8 @@ def forward(

# Add the attention_mask inputs when needed
if "attention_mask" in self.input_names:
if attention_mask is None:
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

before v5.2 attention_mask was created in generate by calling _prepare_attention_mask_for_generation https://github.com/huggingface/transformers/blob/v5.1.0/src/transformers/generation/utils.py#L2530 not the case of encoder_decoder model since v5.2 https://github.com/huggingface/transformers/blob/v5.2.0/src/transformers/generation/utils.py#L2555

attention_mask = torch.ones_like(inputs[self.main_input_name])
inputs["attention_mask"] = attention_mask

# Run inference
Expand Down
478 changes: 161 additions & 317 deletions optimum/intel/openvino/modeling_visual_language.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

INSTALL_REQUIRE = [
"torch>=2.1",
"optimum-onnx@git+https://github.com/huggingface/optimum-onnx.git@transformers-v5",
"transformers>=4.45,<5.1",
"optimum-onnx@git+https://github.com/huggingface/optimum-onnx.git@xadupre/transformers5",
"transformers>=4.57,<5.5",
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

setting min transformers version to v4.57, will replicate everywhere once validated cc @rkazants

"setuptools",
"huggingface-hub>=0.23.2,<2.0",
"nncf>=2.19.0",
Expand Down
12 changes: 7 additions & 5 deletions tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
TemporaryDirectory,
)
from optimum.intel.pipelines import pipeline as optimum_pipeline
from optimum.intel.utils.import_utils import _langchain_hf_available, is_transformers_version
from optimum.intel.utils.import_utils import _langchain_hf_available, is_datasets_version, is_transformers_version
from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
from optimum.utils import (
DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
Expand Down Expand Up @@ -931,8 +931,8 @@ def test_pipeline(self, model_arch):
@pytest.mark.run_slow
@slow
@pytest.mark.skipif(
is_transformers_version(">=", "5.3"),
reason="requires transformers < v5.3 since question-answering pipeline is deprecated in v5.3",
is_transformers_version(">=", "5.3") or is_datasets_version("<", "4"),
reason="requires datasets >= 4 or transformers < v5.3 since question-answering pipeline is deprecated in v5.3",
)
def test_metric(self):
model_id = "distilbert-base-cased-distilled-squad"
Expand Down Expand Up @@ -1113,7 +1113,6 @@ class OVModelForMaskedLMIntegrationTest(unittest.TestCase):
"albert",
"bert",
"camembert",
"convbert",
"deberta",
"deberta-v2",
"distilbert",
Expand All @@ -1131,13 +1130,16 @@ class OVModelForMaskedLMIntegrationTest(unittest.TestCase):
)

# accuracy issue, need additional investigation
if is_transformers_version("<", "4.51.0"):
if is_transformers_version("<", "4.51"):
SUPPORTED_ARCHITECTURES += ("nystromformer",)

# TODO: add fix for v5 and update MAX_TRANSFORMERS_VERSION accordingly
if is_transformers_version("<", "5"):
SUPPORTED_ARCHITECTURES += ("data2vec-text", "flaubert", "xlm")

if is_transformers_version("!=", "5.2"):
SUPPORTED_ARCHITECTURES += ("convbert",)
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


@parameterized.expand(SUPPORTED_ARCHITECTURES)
def test_compare_to_transformers(self, model_arch):
model_id = MODEL_NAMES[model_arch]
Expand Down
Loading