Skip to content

Commit d765109

Browse files
kylesayrsclaude
andcommitted
Remove sequential_targets from modifiers
- Remove sequential_targets argument from AWQ, GPTQ, SparseGPT, and Wanda modifiers - Remove get_no_split_params() utility function - Update all code to use model._get_no_split_modules("auto") directly - Remove sequential_targets validation and inference logic - Update tests and documentation Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 531a5a3 commit d765109

File tree

13 files changed

+14
-143
lines changed

13 files changed

+14
-143
lines changed

examples/multimodal_audio/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ recipe = [
3232
GPTQModifier(
3333
targets="Linear",
3434
scheme="W4A16",
35-
sequential_targets=["WhisperEncoderLayer", "WhisperDecoderLayer"],
3635
ignore=["lm_head"],
3736
)
3837
]

src/llmcompressor/modifiers/awq/base.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,6 @@ class AWQModifier(Modifier, QuantizationMixin):
120120
- on_finalize
121121
- clear resolved mappings and captured activations
122122
123-
:param sequential_targets: list of module names to compress in
124-
the same calibration pass
125123
:param mappings: list activation layers to smooth, and which layers to
126124
scale the output such that activations are smoothed.
127125
Each entry of the mapping list should be a list itself, in which the first
@@ -157,7 +155,6 @@ class AWQModifier(Modifier, QuantizationMixin):
157155
model_config: ConfigDict = ConfigDict(arbitrary_types_allowed=True)
158156

159157
# User-provided vars (in addition to QuantizationMixin args)
160-
sequential_targets: str | list[str] | None = None
161158
mappings: list[AWQMapping] | None = None
162159
offload_device: torch.device | None | Sentinel = Sentinel("not_provided")
163160
duo_scaling: bool | Literal["both"] = True

src/llmcompressor/modifiers/gptq/base.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,6 @@ class GPTQModifier(Modifier, QuantizationMixin):
8282
- remove_hooks()
8383
- model.apply(freeze_module_quantization)
8484
85-
:param sequential_targets: list of layer names to compress during GPTQ, or
86-
'__ALL__' to compress every layer in the model
8785
:param block_size: Used to determine number of columns to compress in one pass
8886
:param dampening_frac: Amount of dampening to apply to H, as a fraction of the
8987
diagonal norm
@@ -118,7 +116,6 @@ class GPTQModifier(Modifier, QuantizationMixin):
118116
"""
119117

120118
# gptq modifier arguments
121-
sequential_targets: Union[str, List[str], None] = None
122119
block_size: int = 128
123120
dampening_frac: Optional[float] = 0.01
124121
# TODO: this does not serialize / will be incorrectly written

src/llmcompressor/modifiers/pruning/sparsegpt/base.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,8 @@ class SparseGPTModifier(SparsityModifierBase):
6565
previously pruned model, defaults to False.
6666
:param offload_hessians: Set to True for decreased memory usage but increased
6767
runtime.
68-
:param sequential_targets: list of layer names to compress
69-
during SparseGPT, or '__ALL__' to compress every layer
70-
in the model. Alias for `targets`
71-
:param targets: list of layer names to compress during SparseGPT, or '__ALL__'
72-
to compress every layer in the model. Alias for `sequential_targets`
68+
:param targets: list of module names to quantize if a scheme is provided. Defaults
69+
to Linear layers
7370
:param ignore: optional list of module class names or submodule names to not
7471
quantize even if they match a target. Defaults to empty list.
7572
"""

src/llmcompressor/modifiers/pruning/sparsegpt/sgpt_base.py

Lines changed: 2 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@ class SparsityModifierBase(Modifier):
3232
owl_lmbda: float | None = None
3333

3434
# data pipeline arguments
35-
sequential_update: bool | None = False # deprecated
36-
sequential_targets: str | list[str] | None = None
3735
targets: str | list[str] = ["Linear"]
3836
ignore: list[str] = Field(default_factory=list)
3937

@@ -44,17 +42,6 @@ class SparsityModifierBase(Modifier):
4442
_target_layers: dict[str, torch.nn.Module] = PrivateAttr(default_factory=dict)
4543
_module_sparsities: dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
4644

47-
@field_validator("sequential_update", mode="before")
48-
def validate_sequential_update(cls, value: bool) -> bool:
49-
if not value:
50-
warnings.warn(
51-
"`sequential_update=False` is no longer supported, setting "
52-
"sequential_update=True",
53-
DeprecationWarning,
54-
)
55-
56-
return True
57-
5845
@field_validator("sparsity_profile", mode="before")
5946
def validate_sparsity_profile(cls, value: str | None) -> bool:
6047
if value is None:
@@ -111,12 +98,8 @@ def on_initialize(self, state: "State", **kwargs) -> bool:
11198
dataloader: torch.utils.data.DataLoader = state.data.calib
11299

113100
# infer module and sequential targets
114-
# Note: only pass sequential_targets from kwargs, not the full kwargs dict
115-
# which may contain 'model' and cause duplicate argument errors
116-
self.sequential_targets = self._infer_sequential_targets(
117-
model, sequential_targets=kwargs.get("sequential_targets")
118-
)
119-
layers = dict(match_named_modules(model, self.sequential_targets))
101+
sequential_targets = model._get_no_split_modules("auto")
102+
layers = dict(match_named_modules(model, sequential_targets))
120103
self._target_layers = dict(
121104
match_named_modules(model, self.targets)
122105
) # layers containing targets
@@ -194,33 +177,6 @@ def on_end(self, state: State, event: Event, **kwargs):
194177
self.ended_ = True
195178
self.remove_hooks()
196179

197-
def _infer_sequential_targets(
198-
self, model: torch.nn.Module, **kwargs
199-
) -> str | list[str]:
200-
targets_from_kwargs = kwargs.get("sequential_targets")
201-
202-
# Validate that sequential_targets is not provided from both sources
203-
if self.sequential_targets is not None and targets_from_kwargs is not None:
204-
raise ValueError(
205-
"sequential_targets was provided both in the modifier config and in "
206-
"oneshot() dataset_args. Please provide sequential_targets in only "
207-
"one location to avoid conflicts."
208-
)
209-
210-
match self.sequential_targets:
211-
case None:
212-
# Check if sequential_targets was passed via kwargs (from dataset_args)
213-
if targets_from_kwargs is not None:
214-
if isinstance(targets_from_kwargs, str):
215-
return [targets_from_kwargs]
216-
return targets_from_kwargs
217-
# Fall back to auto-inference
218-
return get_no_split_params(model)
219-
case str():
220-
return [self.sequential_targets]
221-
case _:
222-
return self.sequential_targets
223-
224180
def _infer_owl_layer_sparsity(
225181
self,
226182
model: torch.nn.Module,

src/llmcompressor/modifiers/pruning/wanda/base.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,8 @@ class WandaPruningModifier(SparsityModifierBase):
5555
shape. Defaults to 0:0 which represents an unstructured mask.
5656
:param owl_m: Number of outliers to use for OWL
5757
:param owl_lmbda: Lambda value to use for OWL
58-
:param sequential_targets: list of layer names to compress during OBCQ, or '__ALL__'
59-
to compress every layer in the model. Alias for `targets`
60-
:param targets: list of layer names to compress during OBCQ, or '__ALL__'
61-
to compress every layer in the model. Alias for `sequential_targets`
58+
:param targets: list of module names to quantize if a scheme is provided. Defaults
59+
to Linear layers
6260
:param ignore: optional list of module class names or submodule names to not
6361
quantize even if they match a target. Defaults to empty list.
6462
"""

src/llmcompressor/pipelines/sequential/helpers.py

Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -430,55 +430,21 @@ def graph_is_well_formed(graph: Graph) -> bool:
430430

431431

432432
def get_sequential_targets(
433-
modifiers: list[Modifier], model: PreTrainedModel, args: "DatasetArguments"
433+
model: PreTrainedModel, args: "DatasetArguments"
434434
) -> list[str]:
435435
"""
436-
Infer sequential targets from modifiers list and dataset args
436+
Infer sequential targets from dataset args
437437
438438
:param model: model being calibrated
439-
:param modifiers: list of modifiers being applied during calibration
440439
:param dataset_args: dataset arguments passed by user
441440
:return: list of sequential targets
442441
"""
443-
modifier_targets = [
444-
(modifier, modifier.sequential_targets)
445-
for modifier in modifiers
446-
if getattr(modifier, "sequential_targets", None) is not None
447-
]
448-
449-
# deprecation warning
450-
if len(modifier_targets) >= 1:
451-
logger.warning(
452-
"Passing sequential targets through modifiers is deprecated, "
453-
"please use `oneshot(sequential_targets=...)`"
454-
)
455-
456-
# cannot infer from multiple modifiers
457-
if len(modifier_targets) >= 2:
458-
types = [type(modifier) for modifier, _ in modifier_targets]
459-
raise ValueError(
460-
"Cannot infer sequential targets from multiple sequential modifiers "
461-
f"({types})"
462-
)
463-
464-
# resolve single modifier
465-
if len(modifier_targets) == 1:
466-
if args.sequential_targets is not None:
467-
raise ValueError(
468-
f"Got sequential targets from both {type(modifier_targets[0][0])} "
469-
"and dataset arguments `sequential_targets`"
470-
)
471-
472-
sequential_targets = modifier_targets[0][1]
473-
474-
# if no modifiers, use data args
475-
else:
476-
sequential_targets = args.sequential_targets # may be `None`
442+
sequential_targets = args.sequential_targets
477443

478444
# validate and infer
479445
match sequential_targets:
480446
case None:
481-
return get_no_split_params(model)
447+
return model._get_no_split_modules("auto")
482448
case str():
483449
return [sequential_targets]
484450
case _:

src/llmcompressor/pipelines/sequential/pipeline.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,7 @@ def __call__(
9292
dispatch_for_sequential(model, onload_device)
9393

9494
# prepare to trace subgraphs
95-
modifiers = session.lifecycle.recipe.modifiers
96-
sequential_targets = get_sequential_targets(modifiers, model, dataset_args)
95+
sequential_targets = get_sequential_targets(model, dataset_args)
9796
ignore = dataset_args.tracing_ignore
9897

9998
# trace subgraphs

src/llmcompressor/transformers/tracing/debug.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import transformers
88
from transformers import AutoProcessor, PreTrainedModel
99

10-
from llmcompressor.utils.pytorch.module import get_no_split_params
1110
from llmcompressor.pipelines.sequential.helpers import trace_subgraphs, Subgraph
1211
from llmcompressor.transformers import TextGenerationDataset
1312
from llmcompressor.args import DatasetArguments
@@ -87,7 +86,7 @@ def trace(
8786

8887
# infer sequential targets
8988
if sequential_targets is None:
90-
sequential_targets = get_no_split_params(model)
89+
sequential_targets = model._get_no_split_modules("auto")
9190
if isinstance(sequential_targets, str):
9291
sequential_targets = [sequential_targets]
9392

src/llmcompressor/utils/pytorch/module.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
"expand_special_targets",
1919
"build_parameterized_layers",
2020
"qat_active",
21-
"get_no_split_params",
2221
]
2322

2423
ALL_TARGET = "__ALL__"
@@ -128,26 +127,6 @@ def qat_active(module: Module) -> bool:
128127
return False
129128

130129

131-
def get_no_split_params(model: PreTrainedModel) -> Union[str, List[str]]:
132-
"""
133-
Get list of module classes that shouldn't be split when sharding. For
134-
Hugging Face Transformer models, this is the decoder layer type. For other
135-
types of models, this just returns all module names.
136-
137-
:return: list of class names that shouldn't be split
138-
"""
139-
try:
140-
# Transformers < v5 support
141-
no_split_modules = model._get_no_split_modules("auto")
142-
except AttributeError:
143-
# Transformers v5 support
144-
no_split_modules = model._no_split_modules
145-
if len(no_split_modules) <= 0:
146-
return ALL_TARGET
147-
148-
return no_split_modules
149-
150-
151130
# https://discuss.pytorch.org/t/how-to-access-to-a-layer-by-module-name/83797/8
152131

153132

0 commit comments

Comments
 (0)