Skip to content

Commit e25cbf1

Browse files
authored
[bugfix][AWQ] make compute_layer_means not modify weight (#2114)
Summary _compute_layer_means was making in-place changes to the weights which made the first iteration (until the weights are refreshed from the cached versions) useless flow was 1) cache weight 2) compute mean (modify weight) 3) scale weight 4) calculate loss 4) restore weight, goto 3 so the first run of 3&4 were gibberish note this bug has been around for a while and we hadn't detected it because throwing away a single iteration was fine as long as the rest worked TEST PLAN: (new unit test) python tests/llmcompressor/modifiers/awq/test_base.py --------- Signed-off-by: HDCharles <[email protected]>
1 parent 8cfb5ec commit e25cbf1

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

src/llmcompressor/modifiers/awq/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ def _compute_layer_means(layers: list[Module]) -> torch.Tensor:
694694
f" layer {type(layer)}, skipping"
695695
)
696696
continue
697-
weight = layer.weight
697+
weight = layer.weight.clone()
698698
orig_shape = weight.shape
699699

700700
q_args = getattr_chain(layer, "quantization_scheme.weights", None)

tests/llmcompressor/modifiers/awq/test_base.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,47 @@ def test_compute_layer_means(n_balance_layers, group_size, n_input_features):
296296
assert_close(auto_awq_means, llmc_awq_means)
297297

298298

299+
@pytest.mark.unit
300+
@torch.no_grad
301+
def test_compute_layer_means_does_not_modify_weights():
302+
"""
303+
Test that _compute_layer_means does not modify the original layer weights.
304+
This is a regression test for a bug where in-place operations (abs_, div_)
305+
were modifying the original weights.
306+
"""
307+
# Create test layers with known weight values
308+
n_layers = 3
309+
n_input_features = 16
310+
layers = [torch.nn.Linear(n_input_features, 8) for _ in range(n_layers)]
311+
312+
# Set up quantization scheme for channel-wise quantization
313+
for layer in layers:
314+
setattr(
315+
layer,
316+
"quantization_scheme",
317+
QuantizationScheme(
318+
targets=["Linear"],
319+
weights=QuantizationArgs(
320+
strategy=QuantizationStrategy.CHANNEL,
321+
),
322+
),
323+
)
324+
325+
# Store copies of original weights before calling _compute_layer_means
326+
original_weights = [layer.weight.clone() for layer in layers]
327+
328+
# Call _compute_layer_means which should NOT modify the original weights
329+
AWQModifier._compute_layer_means(layers)
330+
331+
# Verify that the original weights remain unchanged
332+
for i, layer in enumerate(layers):
333+
assert_close(
334+
layer.weight,
335+
original_weights[i],
336+
msg=f"Layer {i} weight was modified by _compute_layer_means",
337+
)
338+
339+
299340
@pytest.mark.unit
300341
@pytest.mark.parametrize(
301342
"rows, cols, block_height, block_width",

0 commit comments

Comments
 (0)