Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/nncf/common/graph/patterns/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,14 @@ def join_patterns(self, other: "GraphPattern", edges: list[tuple[Hashable, Hasha
remapped_edges.append(new_edge)
self._graph.add_edges_from(remapped_edges)

def add_node(self, **attrs: dict[str, Any]) -> int:
def add_node(self, **attrs: Any) -> int:
if GraphPattern.METATYPE_ATTR in attrs and not isinstance(attrs[GraphPattern.METATYPE_ATTR], list):
attrs[GraphPattern.METATYPE_ATTR] = cast(Any, [attrs[GraphPattern.METATYPE_ATTR]])
self._graph.add_node(self._node_counter, **attrs)
self._node_counter += 1
return self._node_counter - 1

def add_edge(self, u_name: str, v_name: str) -> None:
def add_edge(self, u_name: int | str, v_name: int | str) -> None:
self._graph.add_edge(u_name, v_name)

def add_edges_from(self, ebunch_to_add: list[Any], **attr: dict[str, Any]) -> None:
Expand Down
24 changes: 7 additions & 17 deletions src/nncf/onnx/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from nncf.onnx.graph.metatypes import onnx_metatypes as om
from nncf.onnx.graph.metatypes.groups import MATMUL_METATYPES
from nncf.onnx.hardware.fused_patterns import atomic_activations_operations
from nncf.quantization.ignored_patterns import create_rope_pattern

ONNX_IGNORED_PATTERNS = Registry("IGNORED_PATTERNS")

Expand Down Expand Up @@ -161,24 +162,13 @@ def create_se_block() -> GraphPattern:

@ONNX_IGNORED_PATTERNS.register(IgnoredPatternNames.ROPE)
def create_rope() -> GraphPattern:
pattern = GraphPattern()
matmul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.ONNXMatMulMetatype}
)
transpose_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "TRANSPOSE", GraphPattern.METATYPE_ATTR: om.ONNXTransposeMetatype}
)
concat_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.ONNXConcatMetatype}
return create_rope_pattern(
mm_metatype=om.ONNXMatMulMetatype,
transpose_metatype=om.ONNXTransposeMetatype,
concat_metatype=om.ONNXConcatMetatype,
cos_metatype=om.ONNXCosMetatype,
sin_metatype=om.ONNXSinMetatype,
)
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.ONNXCosMetatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.ONNXSinMetatype})

pattern.add_edge(matmul_node, transpose_node)
pattern.add_edge(transpose_node, concat_node)
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
return pattern


@ONNX_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
Expand Down
24 changes: 7 additions & 17 deletions src/nncf/openvino/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from nncf.common.utils.registry import Registry
from nncf.openvino.graph.metatypes import openvino_metatypes as om
from nncf.openvino.graph.metatypes.groups import LINEAR_OPERATIONS
from nncf.quantization.ignored_patterns import create_rope_pattern

OPENVINO_IGNORED_PATTERNS = Registry("IGNORED_PATTERNS")

Expand Down Expand Up @@ -168,24 +169,13 @@ def create_se_block() -> GraphPattern:

@OPENVINO_IGNORED_PATTERNS.register(IgnoredPatternNames.ROPE)
def create_rope() -> GraphPattern:
pattern = GraphPattern()
matmul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.OVMatMulMetatype}
)
transpose_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "TRANSPOSE", GraphPattern.METATYPE_ATTR: om.OVTransposeMetatype}
)
concat_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.OVConcatMetatype}
return create_rope_pattern(
mm_metatype=om.OVMatMulMetatype,
transpose_metatype=om.OVTransposeMetatype,
concat_metatype=om.OVConcatMetatype,
cos_metatype=om.OVCosMetatype,
sin_metatype=om.OVSinMetatype,
)
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.OVCosMetatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.OVSinMetatype})

pattern.add_edge(matmul_node, transpose_node)
pattern.add_edge(transpose_node, concat_node)
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
return pattern


@OPENVINO_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
Expand Down
63 changes: 63 additions & 0 deletions src/nncf/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) 2026 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nncf.common.graph.operator_metatypes import OperatorMetatype
from nncf.common.graph.patterns.patterns import GraphPattern


def create_rope_pattern(
mm_metatype: type[OperatorMetatype],
transpose_metatype: type[OperatorMetatype],
concat_metatype: type[OperatorMetatype],
cos_metatype: type[OperatorMetatype],
sin_metatype: type[OperatorMetatype],
) -> GraphPattern:
"""
Creates Rotary Positional Embedding (RoPE) pattern.
Scheme:

(matmul) (matmul)
| |
(transpose) (concat)
| / \
(concat) (cos) (sin)
/ \
(cos) (sin)

:param mm_metatype: MatMul metatype.
:param transpose_metatype: Transpose metatype.
:param concat_metatype: Concat metatype.
:param cos_metatype: Cos metatype.
:param sin_metatype: Sin metatype.
:return: The Rotary Positional Embedding (RoPE) pattern.
"""
ret_pattern = GraphPattern()
for with_transpose in [True, False]:
pattern = GraphPattern()
matmul_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: mm_metatype})
concat_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: concat_metatype}
)
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: cos_metatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: sin_metatype})

if with_transpose:
transpose_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "TRANSPOSE", GraphPattern.METATYPE_ATTR: transpose_metatype}
)
pattern.add_edge(matmul_node, transpose_node)
pattern.add_edge(transpose_node, concat_node)
else:
pattern.add_edge(matmul_node, concat_node)
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
ret_pattern.add_pattern_alternative(pattern)
return ret_pattern
22 changes: 7 additions & 15 deletions src/nncf/torch/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from nncf.common.graph.patterns.patterns import GraphPattern
from nncf.common.graph.patterns.patterns import IgnoredPatternNames
from nncf.common.utils.registry import Registry
from nncf.quantization.ignored_patterns import create_rope_pattern
from nncf.torch.graph import operator_metatypes as om
from nncf.torch.graph.pattern_operations import ATOMIC_ACTIVATIONS_OPERATIONS
from nncf.torch.graph.pattern_operations import LINEAR_OPERATIONS
Expand Down Expand Up @@ -234,22 +235,13 @@ def get_se_block_with_bias_and_reshape() -> GraphPattern:

@PT_IGNORED_PATTERNS.register(IgnoredPatternNames.ROPE)
def create_rope() -> GraphPattern:
pattern = GraphPattern()
matmul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.PTMatMulMetatype}
)
transpose_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "TRANSPOSE", GraphPattern.METATYPE_ATTR: om.PTTransposeMetatype}
return create_rope_pattern(
mm_metatype=om.PTMatMulMetatype,
transpose_metatype=om.PTTransposeMetatype,
concat_metatype=om.PTCatMetatype,
cos_metatype=om.PTCosMetatype,
sin_metatype=om.PTSinMetatype,
)
concat_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.PTCatMetatype})
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.PTCosMetatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.PTSinMetatype})

pattern.add_edge(matmul_node, transpose_node)
pattern.add_edge(transpose_node, concat_node)
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
return pattern


@PT_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
Expand Down
18 changes: 13 additions & 5 deletions tests/cross_fw/test_templates/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,16 +494,24 @@ def forward(self, x):
class RoPEModel(nn.Module):
INPUT_SIZE = [1, 10]

def __init__(self):
def __init__(self, with_transpose: bool, with_reshape: bool):
super().__init__()
self._with_transpose = with_transpose
self._with_reshape = with_reshape
data_shape = [5] if with_reshape else [1, 5, 1]
with set_torch_seed():
self.data = torch.randn([5])
self.data = nn.Parameter(torch.randn(data_shape))

def forward(self, x):
x = torch.unsqueeze(x, dim=0)
reshape = torch.reshape(self.data, [1, 5, 1])
x = torch.matmul(reshape, x)
x = torch.transpose(x, 2, 1)

if self._with_reshape:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this added? Is it part of pattern?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was changed because the model with reshape is not compressed by default, by the WC algorithm. It is not the part of the pattern, but with the reshape it is not possible to check that ignored pattern worked out properly. The reason why the reshape was added in the first place is unclear #3059

data = torch.reshape(self.data, [1, 5, 1])
else:
data = self.data
x = torch.matmul(data, x)
if self._with_transpose:
x = torch.transpose(x, 2, 1)
x = torch.cat([x], dim=2)
x1 = x.sin()
x2 = x.cos()
Expand Down
62 changes: 62 additions & 0 deletions tests/cross_fw/test_templates/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,3 +590,65 @@ def __init__(
]
original_mock_graph = create_mock_graph(nodes, node_edges)
self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls=nncf_graph_cls)


class NNCFGraphRoPE:
# Original graph
# Input_1 Const_1
# | /
# MatMul_1
# |
# [Transpose_1]
# |
# Concat_1
# / \
# Sin_1 Cos_1
# | |
# Output_1 Output_2

def __init__(
self,
matmul_metatype,
concat_metatype,
sin_metatype,
cos_metatype,
const_metatype,
transpose_metatype=None,
with_transpose: bool = False,
matmul_layer_attrs=None,
default_layer_attrs=None,
nncf_graph_cls=NNCFGraph,
):
nodes = [
NodeWithType("Input_1", InputNoopMetatype, layer_attributes=default_layer_attrs),
NodeWithType("Const_1", const_metatype, layer_attributes=default_layer_attrs),
NodeWithType("MatMul_1", matmul_metatype, layer_attributes=matmul_layer_attrs),
NodeWithType("Concat_1", concat_metatype, layer_attributes=default_layer_attrs),
NodeWithType("Sin_1", sin_metatype, layer_attributes=default_layer_attrs),
NodeWithType("Cos_1", cos_metatype, layer_attributes=default_layer_attrs),
NodeWithType("Output_1", OutputNoopMetatype, layer_attributes=default_layer_attrs),
NodeWithType("Output_2", OutputNoopMetatype, layer_attributes=default_layer_attrs),
]
node_edges = [
("Input_1", "MatMul_1"),
("Const_1", "MatMul_1"),
]

if with_transpose:
assert transpose_metatype is not None
nodes.append(NodeWithType("Transpose_1", transpose_metatype, layer_attributes=default_layer_attrs))
node_edges += [
("MatMul_1", "Transpose_1"),
("Transpose_1", "Concat_1"),
]
else:
node_edges.append(("MatMul_1", "Concat_1"))

node_edges += [
("Concat_1", "Sin_1"),
("Concat_1", "Cos_1"),
("Sin_1", "Output_1"),
("Cos_1", "Output_2"),
]
original_mock_graph = create_mock_graph(nodes, node_edges)
self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls=nncf_graph_cls)
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def get_matmul_model() -> TModel:

@staticmethod
@abstractmethod
def get_RoPE_model() -> TModel:
def get_RoPE_model(with_transpose: bool) -> TModel:
"""Returns a backend model for test_rope_weight_compression."""

@staticmethod
Expand Down Expand Up @@ -407,6 +407,11 @@ def get_different_channel_size_model(channel_sizes: list[int]) -> TModel:
def get_num_int4_nodes(model: TModel):
"Returns number of int4 nodes."

@staticmethod
@abstractmethod
def get_num_int8_nodes(model: TModel):
"Returns number of int4 nodes."
Comment on lines +410 to +413
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring for get_num_int8_nodes incorrectly says it returns the number of int4 nodes. This is misleading for implementers of the template and readers of the test expectations; update it to describe int8 nodes.

Copilot uses AI. Check for mistakes.

@staticmethod
@abstractmethod
def get_num_int4_group_sizes(model: TModel) -> dict[int, int]:
Expand Down Expand Up @@ -445,15 +450,20 @@ def test_awq_with_ignored_scope(self, mocker, is_3d_weights):
int4_num_nodes = self.get_num_int4_nodes(compressed_model)
assert int4_num_nodes == int4_ref_num_compressed, int4_num_nodes

def test_rope_weight_compression(self):
model = self.get_RoPE_model()
@pytest.mark.parametrize("with_transpose", [True, False])
def test_rope_weight_compression(self, with_transpose):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How ``test_rope_weight_compression` checks ROPE patterns?
If remove ROPE patterns, this tests still passed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

model = self.get_RoPE_model(with_transpose=with_transpose)
sz = 8
n_samples = 10

dataset = Dataset(
[self.to_tensor(np.ones([1, i + 1, sz], dtype=np.float32)) for i in range(n_samples)],
self.get_transform_func(),
)
# First matmul is always compressed in INT8 format,
# as there is only one matmul in the target model
# the check int8 num ref == 0 checks that the
# ignored ROPE pattern is being applied
Comment on lines +463 to +466
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The explanatory comment about the RoPE test is confusing/contradictory: it states the MatMul is always compressed to INT8, but then references expecting int8_num_nodes == 0 without explaining that this assertion is specifically checking the ignored-pattern path (i.e., compression would otherwise happen). Reword to clearly tie the expected INT8 compression to the default backup-mode behavior and why zero INT8 nodes indicates RoPE was ignored.

Copilot uses AI. Check for mistakes.
compressed_model = compress_weights(
model,
mode=CompressWeightsMode.INT4_SYM,
Expand All @@ -462,9 +472,9 @@ def test_rope_weight_compression(self):
dataset=dataset,
)

int4_ref_num_compressed = 0
int4_num_nodes = self.get_num_int4_nodes(compressed_model)
assert int4_num_nodes == int4_ref_num_compressed
int8_ref_num_compressed = 0
int8_num_nodes = self.get_num_int8_nodes(compressed_model)
assert int8_num_nodes == int8_ref_num_compressed

def test_sam_pe_weight_compression(self):
model = self.get_SAM_PE_model()
Expand Down
29 changes: 26 additions & 3 deletions tests/cross_fw/test_templates/test_quantizer_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
from nncf.quantization.passes import transform_to_inference_graph
from nncf.quantization.range_estimator import RangeEstimatorParametersSet
from tests.cross_fw.test_templates.models import NNCFGraphRoPE
from tests.cross_fw.test_templates.models import NNCFGraphToTest
from tests.cross_fw.test_templates.models import NNCFGraphToTestDepthwiseConv
from tests.cross_fw.test_templates.models import NNCFGraphToTestSumAggregation
Expand Down Expand Up @@ -469,12 +470,34 @@ def test_constant_branch_model_qconfig(self, constant_branch_nncf_graph):
state = self._get_q_setup(constant_branch_nncf_graph.nncf_graph)
assert state == self.REF_CONSTANT_BRANCH_SETUP_STATE

def _get_q_setup(self, nncf_graph):
min_max_algo = MinMaxQuantization()
@staticmethod
@abstractmethod
def get_rope_nncf_graph(with_transpose: bool) -> NNCFGraphRoPE:
pass

REF_ROPE_SETUP_STATE = {
"quantization_points": {},
"unified_scale_groups": {},
"shared_input_operation_set_groups": {},
}

@pytest.mark.parametrize("with_transpose", [True, False])
def test_rope_model_qconfig(self, with_transpose):
rope_nncf_graph = self.get_rope_nncf_graph(with_transpose)
state = self._get_q_setup(rope_nncf_graph.nncf_graph, model_type=ModelType.TRANSFORMER)
assert state == self.REF_ROPE_SETUP_STATE

def _get_q_setup(self, nncf_graph, model_type=None):
min_max_algo = MinMaxQuantization(model_type=model_type) if model_type is not None else MinMaxQuantization()
min_max_algo._backend_entity = self.get_algo_backend()
inference_nncf_graph = self._transform_to_inference_graph(nncf_graph, min_max_algo)
ignored_patterns = GraphPattern()
if model_type is not None:
ignored_patterns = PatternsManager.get_full_ignored_pattern_graph(
backend=self.get_backend_type(), device=TargetDevice.ANY, model_type=model_type
)
return min_max_algo._get_quantizer_setup(
nncf_graph, inference_nncf_graph, hw_patterns=GraphPattern(), ignored_patterns=GraphPattern()
nncf_graph, inference_nncf_graph, hw_patterns=GraphPattern(), ignored_patterns=ignored_patterns
).get_state()

@pytest.mark.parametrize(
Expand Down
Loading