Remove sequential_targets from modifiers

kylesayrs · claude · kylesayrs · commit d76510916fe9 · 2026-04-02T16:16:40.000-04:00
- Remove sequential_targets argument from AWQ, GPTQ, SparseGPT, and Wanda modifiers
- Remove get_no_split_params() utility function
- Update all code to use model._get_no_split_modules("auto") directly
- Remove sequential_targets validation and inference logic
- Update tests and documentation

Co-Authored-By: Claude Sonnet 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/examples/multimodal_audio/README.md b/examples/multimodal_audio/README.md
@@ -32,7 +32,6 @@ recipe = [
     GPTQModifier(
         targets="Linear",
         scheme="W4A16",
-        sequential_targets=["WhisperEncoderLayer", "WhisperDecoderLayer"],
         ignore=["lm_head"],
     )
 ]
diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
@@ -120,8 +120,6 @@ class AWQModifier(Modifier, QuantizationMixin):
     - on_finalize
         - clear resolved mappings and captured activations
 
-    :param sequential_targets: list of module names to compress in
-        the same calibration pass
     :param mappings: list activation layers to smooth, and which layers to
         scale the output such that activations are smoothed.
         Each entry of the mapping list should be a list itself, in which the first
@@ -157,7 +155,6 @@ class AWQModifier(Modifier, QuantizationMixin):
     model_config: ConfigDict = ConfigDict(arbitrary_types_allowed=True)
 
     # User-provided vars (in addition to QuantizationMixin args)
-    sequential_targets: str | list[str] | None = None
     mappings: list[AWQMapping] | None = None
     offload_device: torch.device | None | Sentinel = Sentinel("not_provided")
     duo_scaling: bool | Literal["both"] = True
diff --git a/src/llmcompressor/modifiers/gptq/base.py b/src/llmcompressor/modifiers/gptq/base.py
@@ -82,8 +82,6 @@ class GPTQModifier(Modifier, QuantizationMixin):
         - remove_hooks()
         - model.apply(freeze_module_quantization)
 
-    :param sequential_targets: list of layer names to compress during GPTQ, or
-        '__ALL__' to compress every layer in the model
     :param block_size: Used to determine number of columns to compress in one pass
     :param dampening_frac: Amount of dampening to apply to H, as a fraction of the
         diagonal norm
@@ -118,7 +116,6 @@ class GPTQModifier(Modifier, QuantizationMixin):
     """
 
     # gptq modifier arguments
-    sequential_targets: Union[str, List[str], None] = None
     block_size: int = 128
     dampening_frac: Optional[float] = 0.01
     # TODO: this does not serialize / will be incorrectly written
diff --git a/src/llmcompressor/modifiers/pruning/sparsegpt/base.py b/src/llmcompressor/modifiers/pruning/sparsegpt/base.py
@@ -65,11 +65,8 @@ class SparseGPTModifier(SparsityModifierBase):
         previously pruned model, defaults to False.
     :param offload_hessians: Set to True for decreased memory usage but increased
         runtime.
-    :param sequential_targets: list of layer names to compress
-        during SparseGPT, or '__ALL__' to compress every layer
-        in the model. Alias for `targets`
-    :param targets: list of layer names to compress during SparseGPT, or '__ALL__'
-        to compress every layer in the model. Alias for `sequential_targets`
+    :param targets: list of module names to quantize if a scheme is provided. Defaults
+        to Linear layers
     :param ignore: optional list of module class names or submodule names to not
         quantize even if they match a target. Defaults to empty list.
     """
diff --git a/src/llmcompressor/modifiers/pruning/sparsegpt/sgpt_base.py b/src/llmcompressor/modifiers/pruning/sparsegpt/sgpt_base.py
@@ -32,8 +32,6 @@ class SparsityModifierBase(Modifier):
     owl_lmbda: float | None = None
 
     # data pipeline arguments
-    sequential_update: bool | None = False  # deprecated
-    sequential_targets: str | list[str] | None = None
     targets: str | list[str] = ["Linear"]
     ignore: list[str] = Field(default_factory=list)
 
@@ -44,17 +42,6 @@ class SparsityModifierBase(Modifier):
     _target_layers: dict[str, torch.nn.Module] = PrivateAttr(default_factory=dict)
     _module_sparsities: dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
 
-    @field_validator("sequential_update", mode="before")
-    def validate_sequential_update(cls, value: bool) -> bool:
-        if not value:
-            warnings.warn(
-                "`sequential_update=False` is no longer supported, setting "
-                "sequential_update=True",
-                DeprecationWarning,
-            )
-
-        return True
-
     @field_validator("sparsity_profile", mode="before")
     def validate_sparsity_profile(cls, value: str | None) -> bool:
         if value is None:
@@ -111,12 +98,8 @@ def on_initialize(self, state: "State", **kwargs) -> bool:
         dataloader: torch.utils.data.DataLoader = state.data.calib
 
         # infer module and sequential targets
-        # Note: only pass sequential_targets from kwargs, not the full kwargs dict
-        # which may contain 'model' and cause duplicate argument errors
-        self.sequential_targets = self._infer_sequential_targets(
-            model, sequential_targets=kwargs.get("sequential_targets")
-        )
-        layers = dict(match_named_modules(model, self.sequential_targets))
+        sequential_targets = model._get_no_split_modules("auto")
+        layers = dict(match_named_modules(model, sequential_targets))
         self._target_layers = dict(
             match_named_modules(model, self.targets)
         )  # layers containing targets
@@ -194,33 +177,6 @@ def on_end(self, state: State, event: Event, **kwargs):
         self.ended_ = True
         self.remove_hooks()
 
-    def _infer_sequential_targets(
-        self, model: torch.nn.Module, **kwargs
-    ) -> str | list[str]:
-        targets_from_kwargs = kwargs.get("sequential_targets")
-
-        # Validate that sequential_targets is not provided from both sources
-        if self.sequential_targets is not None and targets_from_kwargs is not None:
-            raise ValueError(
-                "sequential_targets was provided both in the modifier config and in "
-                "oneshot() dataset_args. Please provide sequential_targets in only "
-                "one location to avoid conflicts."
-            )
-
-        match self.sequential_targets:
-            case None:
-                # Check if sequential_targets was passed via kwargs (from dataset_args)
-                if targets_from_kwargs is not None:
-                    if isinstance(targets_from_kwargs, str):
-                        return [targets_from_kwargs]
-                    return targets_from_kwargs
-                # Fall back to auto-inference
-                return get_no_split_params(model)
-            case str():
-                return [self.sequential_targets]
-            case _:
-                return self.sequential_targets
-
     def _infer_owl_layer_sparsity(
         self,
         model: torch.nn.Module,
diff --git a/src/llmcompressor/modifiers/pruning/wanda/base.py b/src/llmcompressor/modifiers/pruning/wanda/base.py
@@ -55,10 +55,8 @@ class WandaPruningModifier(SparsityModifierBase):
         shape. Defaults to 0:0 which represents an unstructured mask.
     :param owl_m: Number of outliers to use for OWL
     :param owl_lmbda: Lambda value to use for OWL
-    :param sequential_targets: list of layer names to compress during OBCQ, or '__ALL__'
-        to compress every layer in the model. Alias for `targets`
-    :param targets: list of layer names to compress during OBCQ, or '__ALL__'
-        to compress every layer in the model. Alias for `sequential_targets`
+    :param targets: list of module names to quantize if a scheme is provided. Defaults
+        to Linear layers
     :param ignore: optional list of module class names or submodule names to not
         quantize even if they match a target. Defaults to empty list.
     """
diff --git a/src/llmcompressor/pipelines/sequential/helpers.py b/src/llmcompressor/pipelines/sequential/helpers.py
@@ -430,55 +430,21 @@ def graph_is_well_formed(graph: Graph) -> bool:
 
 
 def get_sequential_targets(
-    modifiers: list[Modifier], model: PreTrainedModel, args: "DatasetArguments"
+    model: PreTrainedModel, args: "DatasetArguments"
 ) -> list[str]:
     """
-    Infer sequential targets from modifiers list and dataset args
+    Infer sequential targets from dataset args
 
     :param model: model being calibrated
-    :param modifiers: list of modifiers being applied during calibration
     :param dataset_args: dataset arguments passed by user
     :return: list of sequential targets
     """
-    modifier_targets = [
-        (modifier, modifier.sequential_targets)
-        for modifier in modifiers
-        if getattr(modifier, "sequential_targets", None) is not None
-    ]
-
-    # deprecation warning
-    if len(modifier_targets) >= 1:
-        logger.warning(
-            "Passing sequential targets through modifiers is deprecated, "
-            "please use `oneshot(sequential_targets=...)`"
-        )
-
-    # cannot infer from multiple modifiers
-    if len(modifier_targets) >= 2:
-        types = [type(modifier) for modifier, _ in modifier_targets]
-        raise ValueError(
-            "Cannot infer sequential targets from multiple sequential modifiers "
-            f"({types})"
-        )
-
-    # resolve single modifier
-    if len(modifier_targets) == 1:
-        if args.sequential_targets is not None:
-            raise ValueError(
-                f"Got sequential targets from both {type(modifier_targets[0][0])} "
-                "and dataset arguments `sequential_targets`"
-            )
-
-        sequential_targets = modifier_targets[0][1]
-
-    # if no modifiers, use data args
-    else:
-        sequential_targets = args.sequential_targets  # may be `None`
+    sequential_targets = args.sequential_targets
 
     # validate and infer
     match sequential_targets:
         case None:
-            return get_no_split_params(model)
+            return model._get_no_split_modules("auto")
         case str():
             return [sequential_targets]
         case _:
diff --git a/src/llmcompressor/pipelines/sequential/pipeline.py b/src/llmcompressor/pipelines/sequential/pipeline.py
@@ -92,8 +92,7 @@ def __call__(
         dispatch_for_sequential(model, onload_device)
 
         # prepare to trace subgraphs
-        modifiers = session.lifecycle.recipe.modifiers
-        sequential_targets = get_sequential_targets(modifiers, model, dataset_args)
+        sequential_targets = get_sequential_targets(model, dataset_args)
         ignore = dataset_args.tracing_ignore
 
         # trace subgraphs
diff --git a/src/llmcompressor/transformers/tracing/debug.py b/src/llmcompressor/transformers/tracing/debug.py
@@ -7,7 +7,6 @@
 import transformers
 from transformers import AutoProcessor, PreTrainedModel
 
-from llmcompressor.utils.pytorch.module import get_no_split_params
 from llmcompressor.pipelines.sequential.helpers import trace_subgraphs, Subgraph
 from llmcompressor.transformers import TextGenerationDataset
 from llmcompressor.args import DatasetArguments
@@ -87,7 +86,7 @@ def trace(
 
     # infer sequential targets
     if sequential_targets is None:
-        sequential_targets = get_no_split_params(model)
+        sequential_targets = model._get_no_split_modules("auto")
     if isinstance(sequential_targets, str):
         sequential_targets = [sequential_targets]
 
diff --git a/src/llmcompressor/utils/pytorch/module.py b/src/llmcompressor/utils/pytorch/module.py
@@ -18,7 +18,6 @@
     "expand_special_targets",
     "build_parameterized_layers",
     "qat_active",
-    "get_no_split_params",
 ]
 
 ALL_TARGET = "__ALL__"
@@ -128,26 +127,6 @@ def qat_active(module: Module) -> bool:
     return False
 
 
-def get_no_split_params(model: PreTrainedModel) -> Union[str, List[str]]:
-    """
-    Get list of module classes that shouldn't be split when sharding. For
-    Hugging Face Transformer models, this is the decoder layer type. For other
-    types of models, this just returns all module names.
-
-    :return: list of class names that shouldn't be split
-    """
-    try:
-        # Transformers < v5 support
-        no_split_modules = model._get_no_split_modules("auto")
-    except AttributeError:
-        # Transformers v5 support
-        no_split_modules = model._no_split_modules
-    if len(no_split_modules) <= 0:
-        return ALL_TARGET
-
-    return no_split_modules
-
-
 # https://discuss.pytorch.org/t/how-to-access-to-a-layer-by-module-name/83797/8
 
 
diff --git a/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_infer_targets.py b/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_infer_targets.py
diff --git a/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_owl.py b/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_owl.py
@@ -30,7 +30,7 @@ def test_infer_owl_layer_sparsity():
         args = DatasetArguments(data_collator="truncation")
         dataloader = format_calibration_data(args, dataset, None)
 
-        sequential_targets = modifier._infer_sequential_targets(model)
+        sequential_targets = model._get_no_split_modules("auto")
         layers = dict(match_named_modules(model, sequential_targets))
         sparsities = modifier._infer_owl_layer_sparsity(model, layers, dataloader)
         assert sparsities.keys() == layers.keys()
diff --git a/tests/llmcompressor/transformers/tracing/test_models.py b/tests/llmcompressor/transformers/tracing/test_models.py
@@ -15,7 +15,6 @@
 )
 
 from llmcompressor.transformers.tracing.debug import trace
-from llmcompressor.utils.pytorch.module import get_no_split_params
 from tests.testing_utils import requires_hf_token
 
 
@@ -156,7 +155,7 @@ def test_model_trace(model_id, model_class, targets, modality, backends):
 
 def get_target_modules(model, sequential_targets):
     if sequential_targets is None:
-        sequential_targets = get_no_split_params(model)
+        sequential_targets = model._get_no_split_modules("auto")
     if isinstance(sequential_targets, str):
         sequential_targets = [sequential_targets]
 

Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,6 @@ recipe = [`
`32`	`32`	`GPTQModifier(`
`33`	`33`	`targets="Linear",`
`34`	`34`	`scheme="W4A16",`
`35`		`- sequential_targets=["WhisperEncoderLayer", "WhisperDecoderLayer"],`
`36`	`35`	`ignore=["lm_head"],`
`37`	`36`	`)`
`38`	`37`	`]`