Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/nncf/common/graph/patterns/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,14 @@ def join_patterns(self, other: "GraphPattern", edges: list[tuple[Hashable, Hasha
remapped_edges.append(new_edge)
self._graph.add_edges_from(remapped_edges)

def add_node(self, **attrs: dict[str, Any]) -> int:
def add_node(self, **attrs: Any) -> int:
if GraphPattern.METATYPE_ATTR in attrs and not isinstance(attrs[GraphPattern.METATYPE_ATTR], list):
attrs[GraphPattern.METATYPE_ATTR] = cast(Any, [attrs[GraphPattern.METATYPE_ATTR]])
self._graph.add_node(self._node_counter, **attrs)
self._node_counter += 1
return self._node_counter - 1

def add_edge(self, u_name: str, v_name: str) -> None:
def add_edge(self, u_name: int | str, v_name: int | str) -> None:
self._graph.add_edge(u_name, v_name)

def add_edges_from(self, ebunch_to_add: list[Any], **attr: dict[str, Any]) -> None:
Expand Down
24 changes: 7 additions & 17 deletions src/nncf/onnx/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from nncf.onnx.graph.metatypes import onnx_metatypes as om
from nncf.onnx.graph.metatypes.groups import MATMUL_METATYPES
from nncf.onnx.hardware.fused_patterns import atomic_activations_operations
from nncf.quantization.ignored_patterns import create_rope_pattern

ONNX_IGNORED_PATTERNS = Registry("IGNORED_PATTERNS")

Expand Down Expand Up @@ -161,24 +162,13 @@ def create_se_block() -> GraphPattern:

@ONNX_IGNORED_PATTERNS.register(IgnoredPatternNames.ROPE)
def create_rope() -> GraphPattern:
pattern = GraphPattern()
matmul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.ONNXMatMulMetatype}
)
transpose_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "TRANSPOSE", GraphPattern.METATYPE_ATTR: om.ONNXTransposeMetatype}
)
concat_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.ONNXConcatMetatype}
return create_rope_pattern(
mm_metatype=om.ONNXMatMulMetatype,
transpose_metatype=om.ONNXTransposeMetatype,
concat_metatype=om.ONNXConcatMetatype,
cos_metatype=om.ONNXCosMetatype,
sin_metatype=om.ONNXSinMetatype,
)
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.ONNXCosMetatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.ONNXSinMetatype})

pattern.add_edge(matmul_node, transpose_node)
pattern.add_edge(transpose_node, concat_node)
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
return pattern


@ONNX_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
Expand Down
24 changes: 7 additions & 17 deletions src/nncf/openvino/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from nncf.common.utils.registry import Registry
from nncf.openvino.graph.metatypes import openvino_metatypes as om
from nncf.openvino.graph.metatypes.groups import LINEAR_OPERATIONS
from nncf.quantization.ignored_patterns import create_rope_pattern

OPENVINO_IGNORED_PATTERNS = Registry("IGNORED_PATTERNS")

Expand Down Expand Up @@ -168,24 +169,13 @@ def create_se_block() -> GraphPattern:

@OPENVINO_IGNORED_PATTERNS.register(IgnoredPatternNames.ROPE)
def create_rope() -> GraphPattern:
pattern = GraphPattern()
matmul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.OVMatMulMetatype}
)
transpose_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "TRANSPOSE", GraphPattern.METATYPE_ATTR: om.OVTransposeMetatype}
)
concat_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.OVConcatMetatype}
return create_rope_pattern(
mm_metatype=om.OVMatMulMetatype,
transpose_metatype=om.OVTransposeMetatype,
concat_metatype=om.OVConcatMetatype,
cos_metatype=om.OVCosMetatype,
sin_metatype=om.OVSinMetatype,
)
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.OVCosMetatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.OVSinMetatype})

pattern.add_edge(matmul_node, transpose_node)
pattern.add_edge(transpose_node, concat_node)
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
return pattern


@OPENVINO_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
Expand Down
63 changes: 63 additions & 0 deletions src/nncf/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) 2026 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nncf.common.graph.operator_metatypes import OperatorMetatype
from nncf.common.graph.patterns.patterns import GraphPattern


def create_rope_pattern(
mm_metatype: type[OperatorMetatype],
transpose_metatype: type[OperatorMetatype],
concat_metatype: type[OperatorMetatype],
cos_metatype: type[OperatorMetatype],
sin_metatype: type[OperatorMetatype],
) -> GraphPattern:
"""
Creates Rotary Positional Embedding (RoPE) pattern.
Scheme:

(matmul) (matmul)
| |
(transpose) (concat)
| / \
(concat) (cos) (sin)
/ \
(cos) (sin)

:param mm_metatype: MatMul metatype.
:param transpose_metatype: Transpose metatype.
:param concat_metatype: Concat metatype.
:param cos_metatype: Cos metatype.
:param sin_metatype: Sin metatype.
:return: The Rotary Positional Embedding (RoPE) pattern.
"""
ret_pattern = GraphPattern()
for with_transpose in [True, False]:
pattern = GraphPattern()
matmul_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: mm_metatype})
concat_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: concat_metatype}
)
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: cos_metatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: sin_metatype})

if with_transpose:
transpose_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "TRANSPOSE", GraphPattern.METATYPE_ATTR: transpose_metatype}
)
pattern.add_edge(matmul_node, transpose_node)
pattern.add_edge(transpose_node, concat_node)
else:
pattern.add_edge(matmul_node, concat_node)
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
ret_pattern.add_pattern_alternative(pattern)
return ret_pattern
22 changes: 7 additions & 15 deletions src/nncf/torch/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from nncf.common.graph.patterns.patterns import GraphPattern
from nncf.common.graph.patterns.patterns import IgnoredPatternNames
from nncf.common.utils.registry import Registry
from nncf.quantization.ignored_patterns import create_rope_pattern
from nncf.torch.graph import operator_metatypes as om
from nncf.torch.graph.pattern_operations import ATOMIC_ACTIVATIONS_OPERATIONS
from nncf.torch.graph.pattern_operations import LINEAR_OPERATIONS
Expand Down Expand Up @@ -234,22 +235,13 @@ def get_se_block_with_bias_and_reshape() -> GraphPattern:

@PT_IGNORED_PATTERNS.register(IgnoredPatternNames.ROPE)
def create_rope() -> GraphPattern:
pattern = GraphPattern()
matmul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.PTMatMulMetatype}
)
transpose_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "TRANSPOSE", GraphPattern.METATYPE_ATTR: om.PTTransposeMetatype}
return create_rope_pattern(
mm_metatype=om.PTMatMulMetatype,
transpose_metatype=om.PTTransposeMetatype,
concat_metatype=om.PTCatMetatype,
cos_metatype=om.PTCosMetatype,
sin_metatype=om.PTSinMetatype,
)
concat_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.PTCatMetatype})
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.PTCosMetatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.PTSinMetatype})

pattern.add_edge(matmul_node, transpose_node)
pattern.add_edge(transpose_node, concat_node)
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
return pattern


@PT_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
Expand Down
18 changes: 13 additions & 5 deletions tests/cross_fw/test_templates/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,16 +494,24 @@ def forward(self, x):
class RoPEModel(nn.Module):
INPUT_SIZE = [1, 10]

def __init__(self):
def __init__(self, with_transpose: bool, with_reshape: bool):
super().__init__()
self._with_transpose = with_transpose
self._with_reshape = with_reshape
data_shape = [5] if with_reshape else [1, 5, 1]
with set_torch_seed():
self.data = torch.randn([5])
self.data = nn.Parameter(torch.randn(data_shape))

def forward(self, x):
x = torch.unsqueeze(x, dim=0)
reshape = torch.reshape(self.data, [1, 5, 1])
x = torch.matmul(reshape, x)
x = torch.transpose(x, 2, 1)

if self._with_reshape:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this added? Is it part of pattern?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was changed because the model with reshape is not compressed by default, by the WC algorithm. It is not the part of the pattern, but with the reshape it is not possible to check that ignored pattern worked out properly. The reason why the reshape was added in the first place is unclear #3059

data = torch.reshape(self.data, [1, 5, 1])
else:
data = self.data
x = torch.matmul(data, x)
if self._with_transpose:
x = torch.transpose(x, 2, 1)
x = torch.cat([x], dim=2)
x1 = x.sin()
x2 = x.cos()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def get_matmul_model() -> TModel:

@staticmethod
@abstractmethod
def get_RoPE_model() -> TModel:
def get_RoPE_model(with_transpose: bool) -> TModel:
"""Returns a backend model for test_rope_weight_compression."""

@staticmethod
Expand Down Expand Up @@ -407,6 +407,11 @@ def get_different_channel_size_model(channel_sizes: list[int]) -> TModel:
def get_num_int4_nodes(model: TModel):
"Returns number of int4 nodes."

@staticmethod
@abstractmethod
def get_num_int8_nodes(model: TModel):
"Returns number of int4 nodes."
Comment on lines +410 to +413
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring for get_num_int8_nodes incorrectly says it returns the number of int4 nodes. This is misleading for implementers of the template and readers of the test expectations; update it to describe int8 nodes.

Copilot uses AI. Check for mistakes.

@staticmethod
@abstractmethod
def get_num_int4_group_sizes(model: TModel) -> dict[int, int]:
Expand Down Expand Up @@ -445,15 +450,20 @@ def test_awq_with_ignored_scope(self, mocker, is_3d_weights):
int4_num_nodes = self.get_num_int4_nodes(compressed_model)
assert int4_num_nodes == int4_ref_num_compressed, int4_num_nodes

def test_rope_weight_compression(self):
model = self.get_RoPE_model()
@pytest.mark.parametrize("with_transpose", [True, False])
def test_rope_weight_compression(self, with_transpose):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How ``test_rope_weight_compression` checks ROPE patterns?
If remove ROPE patterns, this tests still passed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

model = self.get_RoPE_model(with_transpose=with_transpose)
sz = 8
n_samples = 10

dataset = Dataset(
[self.to_tensor(np.ones([1, i + 1, sz], dtype=np.float32)) for i in range(n_samples)],
self.get_transform_func(),
)
# First matmul is always compressed in INT8 format,
# as there is only one matmul in the target model
# the check int8 num ref == 0 checks that the
# ignored ROPE pattern is being applied
Comment on lines +463 to +466
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The explanatory comment about the RoPE test is confusing/contradictory: it states the MatMul is always compressed to INT8, but then references expecting int8_num_nodes == 0 without explaining that this assertion is specifically checking the ignored-pattern path (i.e., compression would otherwise happen). Reword to clearly tie the expected INT8 compression to the default backup-mode behavior and why zero INT8 nodes indicates RoPE was ignored.

Copilot uses AI. Check for mistakes.
compressed_model = compress_weights(
model,
mode=CompressWeightsMode.INT4_SYM,
Expand All @@ -462,9 +472,9 @@ def test_rope_weight_compression(self):
dataset=dataset,
)

int4_ref_num_compressed = 0
int4_num_nodes = self.get_num_int4_nodes(compressed_model)
assert int4_num_nodes == int4_ref_num_compressed
int8_ref_num_compressed = 0
int8_num_nodes = self.get_num_int8_nodes(compressed_model)
assert int8_num_nodes == int8_ref_num_compressed

def test_sam_pe_weight_compression(self):
model = self.get_SAM_PE_model()
Expand Down
22 changes: 16 additions & 6 deletions tests/onnx/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def get_matmul_model() -> onnx.ModelProto:
return mb.build()

@staticmethod
def get_RoPE_model() -> onnx.ModelProto:
def get_RoPE_model(with_transpose: bool) -> onnx.ModelProto:
"""
Builds a model to be used in the TemplateWeightCompression.test_rope_weight_compression() test.
"""
Expand All @@ -387,13 +387,15 @@ def get_RoPE_model() -> onnx.ModelProto:
x = mb.add_input("input", (1, 10))
x = mb.add_unsqueeze(x, axes=(2,))
x = mb.add_matmul(x, shape=(1, 5))
x = mb.add_transpose(x, perm=[0, 2, 1])
if with_transpose:
x = mb.add_transpose(x, perm=[0, 2, 1])
x = mb.add_concat([x], axis=-1)
x1 = mb.add_sin(x)
x2 = mb.add_cos(x)

mb.add_output(x1, (1, 5, 10))
mb.add_output(x2, (1, 5, 10))
output_shape = (1, 5, 10) if with_transpose else (1, 10, 5)
mb.add_output(x1, output_shape)
mb.add_output(x2, output_shape)

return mb.build()

Expand Down Expand Up @@ -804,11 +806,19 @@ def get_different_channel_size_model(channel_sizes: list[int]) -> onnx.ModelProt

return mb.build(opset_version=21)

@classmethod
def get_num_int4_nodes(cls, model: onnx.ModelProto) -> int:
return cls._get_num_typed_nodes(model, [onnx.TensorProto.UINT4, onnx.TensorProto.INT4])

@classmethod
def get_num_int8_nodes(cls, model: onnx.ModelProto) -> int:
return cls._get_num_typed_nodes(model, [onnx.TensorProto.UINT8, onnx.TensorProto.INT8])

@staticmethod
def get_num_int4_nodes(model: onnx.ModelProto) -> int:
def _get_num_typed_nodes(model: onnx.ModelProto, types: list[onnx.TensorProto]):
num = 0
for i in model.graph.initializer:
if i.data_type in [onnx.TensorProto.UINT4, onnx.TensorProto.INT4]:
if i.data_type in types:
num += 1
Comment on lines 817 to 822
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

types is a list of ONNX tensor data type enum values (ints like onnx.TensorProto.INT8), but the annotation list[onnx.TensorProto] refers to the TensorProto message type. Consider annotating as Sequence[int] (or the specific DataType enum type) to match actual usage and avoid misleading typing.

Copilot uses AI. Check for mistakes.
return num

Expand Down
16 changes: 10 additions & 6 deletions tests/openvino/native/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1320,18 +1320,22 @@ def _create_ov_model(self):


class RoPEModel(OVReferenceModel):
def _create_ov_model(self):
def _create_ov_model(self, with_transpose: bool, with_broadcast: bool):
position_ids = opset.parameter([1, 10], name="position_ids")

unsqueeze = opset.unsqueeze(position_ids, 0, name="unsqueeze")
convert = opset.convert(unsqueeze, ov.Type.f32, name="convert")

broadcast_data = self._rng.random((1, 5, 1)).astype(np.float32)
broadcast_shape = [1, 5, 1]
broadcast = opset.broadcast(broadcast_data, broadcast_shape, name="broadcast")
data = self._rng.random((1, 5, 1)).astype(np.float32)
if with_broadcast:
broadcast_shape = [1, 5, 1]
data = opset.broadcast(data, broadcast_shape, name="broadcast")

matmul = opset.matmul(broadcast, convert, transpose_a=False, transpose_b=False, name="MatMul")
transpose = opset.transpose(matmul, [0, 2, 1], name="transpose")
matmul = opset.matmul(data, convert, transpose_a=False, transpose_b=False, name="MatMul")
if with_transpose:
transpose = opset.transpose(matmul, [0, 2, 1], name="transpose")
else:
transpose = matmul
concat = opset.concat([transpose], axis=0, name="concat")
sin = opset.sin(concat, name="sin")
cos = opset.cos(concat, name="cos")
Expand Down
7 changes: 6 additions & 1 deletion tests/openvino/native/quantization/test_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# limitations under the License.


from functools import partial
from pathlib import Path

import numpy as np
Expand Down Expand Up @@ -95,7 +96,11 @@ def test_real_models_fq_placement(model_name_params, tmp_path):
compare_nncf_graphs(quantized_model, path_ref_graph)


@pytest.mark.parametrize("model_creator_func", [MatmulSoftmaxMatmulBlock, RoPEModel])
@pytest.mark.parametrize(
"model_creator_func",
[MatmulSoftmaxMatmulBlock, partial(RoPEModel, with_transpose=True, with_broadcast=True)],
ids=["MatmulSoftmaxMatmulBlock", "RoPEModel"],
)
def test_transformer_models_fq_placement(model_creator_func, tmp_path):
model = model_creator_func()
quantized_model = quantize_model(
Expand Down
Loading