-
Notifications
You must be signed in to change notification settings - Fork 490
Add actorder support for GPTQ block quantization #2616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
bd31a8e
bcff7ba
fe28610
d4135b9
89642b2
dad5c97
ee13f33
cfe55d9
ea8229f
293427d
ae53389
2539e72
52f5fd7
2499c14
30e2f9e
7682c0a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -111,12 +111,19 @@ def quantize_weight( | |
|
|
||
| scale, zero_point = observer(W) | ||
| # handle g_idx and activation ordering | ||
| if strategy in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP): | ||
| g_idx_to_save = None | ||
| if strategy in ( | ||
| QuantizationStrategy.GROUP, | ||
| QuantizationStrategy.TENSOR_GROUP, | ||
| QuantizationStrategy.BLOCK, | ||
| ): | ||
| # mapping from column index to group index | ||
| g_idx = ( | ||
| torch.arange(num_columns, device=W.device, dtype=torch.int) | ||
| // quant_args.group_size | ||
| divisor = ( | ||
| quant_args.group_size | ||
| if strategy != QuantizationStrategy.BLOCK | ||
| else quant_args.block_structure[1] | ||
| ) | ||
| g_idx = torch.arange(num_columns, device=W.device, dtype=torch.int) // divisor | ||
|
|
||
| if actorder == ActivationOrdering.GROUP: | ||
| W, H, perm = _apply_activation_ordering(W, H) | ||
|
|
@@ -217,8 +224,8 @@ def quantize_weight( | |
| global_scale=global_scale, | ||
| ) | ||
| elif strategy == QuantizationStrategy.BLOCK: | ||
| block_width = quant_args.block_structure[1] | ||
| block_column_idx = (i1 + i) // block_width | ||
| column_idx = i1 + i | ||
|
rk119 marked this conversation as resolved.
|
||
| block_column_idx = g_idx[column_idx] | ||
| q = fake_quantize( | ||
| q.unsqueeze(1), | ||
| scale[:, block_column_idx : block_column_idx + 1], | ||
|
|
@@ -253,24 +260,18 @@ def quantize_weight( | |
| else: | ||
| W[:, i2:] -= w_err | ||
|
|
||
| has_gidx = False | ||
| if strategy in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP): | ||
| if actorder == ActivationOrdering.WEIGHT: | ||
| # restore original permutation | ||
| invperm = torch.argsort(perm) | ||
| W = W[:, invperm] | ||
|
|
||
| elif actorder == ActivationOrdering.GROUP: | ||
| if strategy in ( | ||
|
rk119 marked this conversation as resolved.
Outdated
|
||
| QuantizationStrategy.GROUP, | ||
| QuantizationStrategy.TENSOR_GROUP, | ||
| QuantizationStrategy.BLOCK, | ||
| ): | ||
| if actorder in (ActivationOrdering.WEIGHT, ActivationOrdering.GROUP): | ||
| # restore original permutation | ||
| invperm = torch.argsort(perm) | ||
| W = W[:, invperm] | ||
| g_idx = g_idx[invperm] | ||
|
|
||
| # only save g_idx if mapping is not identity | ||
| has_gidx = True | ||
|
|
||
| if not has_gidx: | ||
| g_idx = None | ||
| if actorder == ActivationOrdering.GROUP: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can simplify this, only group act order saves g_idx, can just check for that
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, done! |
||
| g_idx_to_save = g_idx[invperm] | ||
|
|
||
| if isinstance(module, transformers.Conv1D): | ||
| W.transpose_(0, 1) | ||
|
|
@@ -282,8 +283,8 @@ def quantize_weight( | |
| "weight_scale": scale.to(dtype=final_dtype), | ||
| "weight_zero_point": zero_point.to(dtype=quant_args.zp_dtype), | ||
| } | ||
| if g_idx is not None: | ||
| q_param_dict["weight_g_idx"] = g_idx | ||
| if g_idx_to_save is not None: | ||
| q_param_dict["weight_g_idx"] = g_idx_to_save | ||
| return (loss, q_param_dict) | ||
|
|
||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.