[bugfix][AWQ] make compute_layer_means not modify weight (#2114)

HDCharles · web-flow · commit e25cbf1b670e · 2025-12-15T18:35:03.000Z
Summary

_compute_layer_means was making in-place changes to the weights which
made the first iteration (until the weights are refreshed from the
cached versions) useless

flow was

1) cache weight
2) compute mean (modify weight)
3) scale weight 
4) calculate loss
4) restore weight, goto 3

so the first run of 3&amp;4 were gibberish

note this bug has been around for a while and we hadn't detected it
because throwing away a single iteration was fine as long as the rest
worked


TEST PLAN: (new unit test)
python tests/llmcompressor/modifiers/awq/test_base.py

---------

Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;
diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
@@ -694,7 +694,7 @@ def _compute_layer_means(layers: list[Module]) -> torch.Tensor:
                     f" layer {type(layer)}, skipping"
                 )
                 continue
-            weight = layer.weight
+            weight = layer.weight.clone()
             orig_shape = weight.shape
 
             q_args = getattr_chain(layer, "quantization_scheme.weights", None)
diff --git a/tests/llmcompressor/modifiers/awq/test_base.py b/tests/llmcompressor/modifiers/awq/test_base.py
@@ -296,6 +296,47 @@ def test_compute_layer_means(n_balance_layers, group_size, n_input_features):
     assert_close(auto_awq_means, llmc_awq_means)
 
 
+@pytest.mark.unit
+@torch.no_grad
+def test_compute_layer_means_does_not_modify_weights():
+    """
+    Test that _compute_layer_means does not modify the original layer weights.
+    This is a regression test for a bug where in-place operations (abs_, div_)
+    were modifying the original weights.
+    """
+    # Create test layers with known weight values
+    n_layers = 3
+    n_input_features = 16
+    layers = [torch.nn.Linear(n_input_features, 8) for _ in range(n_layers)]
+
+    # Set up quantization scheme for channel-wise quantization
+    for layer in layers:
+        setattr(
+            layer,
+            "quantization_scheme",
+            QuantizationScheme(
+                targets=["Linear"],
+                weights=QuantizationArgs(
+                    strategy=QuantizationStrategy.CHANNEL,
+                ),
+            ),
+        )
+
+    # Store copies of original weights before calling _compute_layer_means
+    original_weights = [layer.weight.clone() for layer in layers]
+
+    # Call _compute_layer_means which should NOT modify the original weights
+    AWQModifier._compute_layer_means(layers)
+
+    # Verify that the original weights remain unchanged
+    for i, layer in enumerate(layers):
+        assert_close(
+            layer.weight,
+            original_weights[i],
+            msg=f"Layer {i} weight was modified by _compute_layer_means",
+        )
+
+
 @pytest.mark.unit
 @pytest.mark.parametrize(
     "rows, cols, block_height, block_width",

Original file line number	Diff line number	Diff line change
`@@ -694,7 +694,7 @@ def _compute_layer_means(layers: list[Module]) -> torch.Tensor:`
`694`	`694`	`f" layer {type(layer)}, skipping"`
`695`	`695`	`)`
`696`	`696`	`continue`
`697`		`- weight = layer.weight`
	`697`	`+ weight = layer.weight.clone()`
`698`	`698`	`orig_shape = weight.shape`
`699`	`699`
`700`	`700`	`q_args = getattr_chain(layer, "quantization_scheme.weights", None)`