-
Notifications
You must be signed in to change notification settings - Fork 277
[WC] Scale Estimation transpose_a support #3839
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -139,17 +139,21 @@ def apply( | |
| continue | ||
| _, weight_port_id = weight_data[0] | ||
|
|
||
| if self._backend_entity.matmul_has_transposed_activations(wp.node_with_weight, graph): | ||
| msg = "Transposed activations are not supported yet for the Scale Estimation algorithm" | ||
| raise nncf.UnsupportedModelError(msg) | ||
|
|
||
| weight = self._backend_entity.get_weight(wp.node_with_weight, weight_port_id, model, graph) | ||
|
|
||
| activation_port_id = self._backend_entity.get_activation_port_id(wp.node_with_weight, graph) | ||
| act_shape = graph.get_input_edge_by_port_id(wp.node_with_weight, activation_port_id).tensor_shape | ||
| act_ch_axis = self._backend_entity.get_activation_channel_axis( | ||
| wp.node_with_weight, activation_port_id, act_shape | ||
| ) | ||
| act_ch_axis %= len(act_shape) | ||
|
||
|
|
||
| scale, zero_point = self.calculate_quantization_params( | ||
| stats, | ||
| weight, | ||
| wp.reduction_axes, | ||
| config, | ||
| act_ch_axis, | ||
| self._subset_size, | ||
| self._initial_steps, | ||
| self._scale_steps, | ||
|
|
@@ -165,6 +169,7 @@ def calculate_quantization_params( | |
| weight: Tensor, | ||
| reduction_axes: tuple[int, ...], | ||
| config: WeightCompressionConfig, | ||
| act_ch_axis: int = -1, | ||
| subset_size: int = 32, | ||
| initial_steps: int = 5, | ||
| scale_steps: int = 10, | ||
|
|
@@ -185,6 +190,7 @@ def calculate_quantization_params( | |
| :param weight: The weight tensor that is being quantized. | ||
| :param reduction_axes: Tuple specifying the axes along which the reduction is performed for quantization. | ||
| :param config: Configuration parameters for the weight compression, including quantization settings. | ||
| :param act_ch_axis: The activation channel axis. | ||
| :param subset_size: The number of samples to use for scale estimation. Defaults to 32. | ||
| :param initial_steps: The number of steps for initial scale rectification using activation statistics. | ||
| Defaults to 5. | ||
|
|
@@ -195,7 +201,7 @@ def calculate_quantization_params( | |
| """ | ||
| reduction_axis = reduction_axes[0] | ||
|
|
||
| s, X = process_stats(statistics, subset_size) | ||
| s, X = process_stats(statistics, subset_size, act_ch_axis=act_ch_axis) | ||
|
|
||
| X = X.astype(TensorDataType.float32) | ||
| weight = weight.astype(TensorDataType.float32) | ||
|
|
@@ -382,23 +388,6 @@ def calculate_quantization_params( | |
|
|
||
| return result_scale, zp | ||
|
|
||
| @staticmethod | ||
| def activations_to_wc_statistics(activations: list[Tensor]) -> WCTensorStatistic: | ||
| """ | ||
| Mimic the activation reducing logic from WeightCompression.get_statistic_points. | ||
|
|
||
| :param activations: List of raw activations. | ||
| :return: Instance of WCTensorStatistic class containing reduced activations and shapes. | ||
| """ | ||
| mean_values = [] | ||
| shapes = [] | ||
| for act in activations: | ||
| shapes.append(act.shape) | ||
| reduction_shape = tuple(range(act.ndim - 1)) | ||
| mean_values.append(fns.mean(act, axis=reduction_shape)) | ||
| wc_statistics = WCTensorStatistic(mean_values, shapes) | ||
| return wc_statistics | ||
|
|
||
|
|
||
| def get_target_zero_mask(compressed_weights: Tensor, zp: Optional[Tensor] = None) -> tuple[Tensor, Tensor]: | ||
| """ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,14 +78,18 @@ def _create_ov_model(self, input_shape=None, reshape_shape=None, matmul_w_shape= | |
|
|
||
|
|
||
| class SimpleMoEModel(OVReferenceModel): | ||
| def _create_ov_model(self, num_experts=2, hidden_dim=8, out_dim=16, seq_len=4): | ||
| def _create_ov_model(self, num_experts=2, hidden_dim=8, out_dim=16, seq_len=4, tranpsose_a: bool = False): | ||
daniil-lyakhov marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| input_shape = [num_experts, seq_len, hidden_dim] | ||
| input_1 = opset.parameter(input_shape, name="Input") | ||
|
|
||
| weight_data = np.arange(0, num_experts * hidden_dim * out_dim, dtype=np.float32) | ||
| weight_data = weight_data.reshape(num_experts, hidden_dim, out_dim) | ||
|
|
||
| matmul = opset.matmul(input_1, weight_data, transpose_a=False, transpose_b=False, name="MoE_MatMul") | ||
| if tranpsose_a: | ||
| transpose = opset.transpose(input_1, (0, 2, 1)) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please check, looks like it never runs
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch, I fixed the test and asked @anzr299 to remove the skip. Ticket 179366 |
||
| else: | ||
| transpose = input_1 | ||
| matmul = opset.matmul(transpose, weight_data, transpose_a=False, transpose_b=False, name="MoE_MatMul") | ||
|
|
||
| result = opset.result(matmul, name="Result") | ||
| result.get_output_tensor(0).set_names(set(["Result"])) | ||
|
|
@@ -1366,13 +1370,17 @@ def _create_ov_model(self): | |
|
|
||
|
|
||
| class MatMul(OVReferenceModel): | ||
| def _create_ov_model(self): | ||
| def _create_ov_model(self, transpose_a: bool = False): | ||
| input_node = opset.parameter([1, 4, 8], name="Input") | ||
|
|
||
| weights_data = np.arange(0, 16 * 8, dtype=np.float32).reshape(16, 8) | ||
| weights_node = opset.constant(weights_data, dtype=np.float32, name="Weights") | ||
|
|
||
| matmul_node = opset.matmul(input_node, weights_node, transpose_a=False, transpose_b=True, name="MatMul") | ||
| if transpose_a: | ||
| transpose = opset.transpose(input_node, (0, 2, 1)) | ||
| else: | ||
| transpose = input_node | ||
| matmul_node = opset.matmul(transpose, weights_node, transpose_a=transpose_a, transpose_b=True, name="MatMul") | ||
|
|
||
| result_node = opset.result(matmul_node, name="Result") | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like copypast from awq.py.
Please think about to refactor it into a shared function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
WeightCompressionAlgoBackend.get_activation_channel_axis_and_shapeis instroduced, please check