Skip to content

Commit ab5c2fe

Browse files
authored
Fix g_idx init in transformers-like API (#2204)
Signed-off-by: Kaihui-intel <[email protected]>
1 parent 1be4b3b commit ab5c2fe

File tree

2 files changed

+4
-9
lines changed

2 files changed

+4
-9
lines changed

neural_compressor/transformers/models/modeling_auto.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def build_woq_model(model, quantization_config):
7070
not getattr(quantization_config, "sym", False),
7171
)
7272
use_optimum_format = True
73+
g_idx = hasattr(m, "g_idx") and m.g_idx is not None
7374

7475
with init_empty_weights():
7576
new_module = INCWeightOnlyLinear(
@@ -80,7 +81,7 @@ def build_woq_model(model, quantization_config):
8081
group_size=quantization_config.group_size,
8182
zp=zp,
8283
bias=m.bias is not None,
83-
g_idx=True,
84+
g_idx=g_idx,
8485
use_optimum_format=use_optimum_format,
8586
)
8687
set_module(model, n, new_module)

neural_compressor/transformers/quantization/utils.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -206,14 +206,8 @@ def _replace_linear(
206206
device=device,
207207
use_optimum_format=getattr(module, "use_optimum_format", True),
208208
)
209-
if quantization_config.quant_method.value == "gptq":
210-
g_idx = getattr(
211-
module,
212-
"g_idx",
213-
torch.zeros(in_features, dtype=torch.int32).to(device),
214-
)
215-
else:
216-
g_idx = None
209+
# g_idx is only present when using GPTQ quantization method
210+
g_idx = module.g_idx if hasattr(module, "g_idx") else None
217211
model._modules[name].set_scales_zps_gidx(
218212
(
219213
module.scales

0 commit comments

Comments
 (0)