update example, do not start on initialize

kylesayrs · kylesayrs · commit 4b5fe769efa3 · 2025-03-18T19:07:34.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py b/examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py
@@ -5,7 +5,6 @@
 
 from llmcompressor import oneshot
 from llmcompressor.modifiers.obcq import SparseGPTModifier
-from llmcompressor.modifiers.pruning import ConstantPruningModifier
 from llmcompressor.modifiers.quantization import QuantizationModifier
 
 # Configuration
@@ -52,29 +51,23 @@ def get_recipe(fp8_enabled):
     save_dir = MODEL_ID.split("/")[1] + "2of4-sparse"
 
     if fp8_enabled:
-        base_recipe.extend(
-            [
-                QuantizationModifier(
-                    targets=["Linear"],
-                    ignore=["lm_head"],
-                    scheme="FP8_DYNAMIC",
-                ),
-                ConstantPruningModifier(
-                    targets=[
-                        r"re:.*q_proj.weight",
-                        r"re:.*k_proj.weight",
-                        r"re:.*v_proj.weight",
-                        r"re:.*o_proj.weight",
-                        r"re:.*gate_proj.weight",
-                        r"re:.*up_proj.weight",
-                        r"re:.*down_proj.weight",
-                    ],
-                    start=0,
-                ),
-            ]
+        base_recipe.append(
+            QuantizationModifier(
+                targets=["Linear"],
+                ignore=["lm_head"],
+                scheme="FP8_DYNAMIC",
+            )
         )
         save_dir = MODEL_ID.split("/")[1] + "2of4-W8A8-FP8-Dynamic-Per-Token"
 
+    # check that asymmetric quantization is not being used
+    q_scheme = base_recipe[1].scheme
+    if not isinstance(q_scheme, str) and not q_scheme["weights"].symmetric:
+        raise ValueError(
+            "Asymmetric quantization with 2of4 sparsity is not supported by vLLM. "
+            "Please use symmetric quantization"
+        )
+
     return base_recipe, save_dir
 
 
diff --git a/src/llmcompressor/modifiers/modifier.py b/src/llmcompressor/modifiers/modifier.py
@@ -89,12 +89,6 @@ def initialize(self, state: State, **kwargs):
 
         self.initialized_ = self.on_initialize(state=state, **kwargs)
 
-        # trigger start
-        fake_start_event = Event(type_=EventType.BATCH_START, global_step=0)
-        if self.should_start(fake_start_event):
-            self.on_start(state, fake_start_event, **kwargs)
-            self.started_ = True
-
     def finalize(self, state: State, **kwargs):
         """
         Finalize the modifier for the given model and state.