remove CMP from all 2of4 examples

kylesayrs · kylesayrs · commit 93a12f1dc9c0 · 2025-03-19T13:16:39.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml b/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml
@@ -6,20 +6,6 @@ sparsity_stage:
       mask_structure: "2:4"
       targets: ["Linear"]
       ignore: ["re:.*lm_head"]
-finetuning_stage:
-  run_type: train
-  finetuning_modifiers:
-    ConstantPruningModifier:
-      targets: [
-        're:.*q_proj.weight',
-        're:.*k_proj.weight', 
-        're:.*v_proj.weight',
-        're:.*o_proj.weight',
-        're:.*gate_proj.weight',
-        're:.*up_proj.weight',
-        're:.*down_proj.weight',
-      ]
-      start: 0
 quantization_stage:
   run_type: oneshot
   quantization_modifiers:
diff --git a/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_recipe.yaml b/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_recipe.yaml
@@ -6,20 +6,6 @@ sparsity_stage:
       mask_structure: "2:4"
       targets: ["Linear"]
       ignore: ["re:.*lm_head"]
-finetuning_stage:
-  run_type: train
-  finetuning_modifiers:
-    ConstantPruningModifier:
-      targets: [
-        're:.*q_proj.weight',
-        're:.*k_proj.weight', 
-        're:.*v_proj.weight',
-        're:.*o_proj.weight',
-        're:.*gate_proj.weight',
-        're:.*up_proj.weight',
-        're:.*down_proj.weight',
-      ]
-      start: 0
 quantization_stage:
   run_type: oneshot
   quantization_modifiers:
diff --git a/examples/sparse_2of4_quantization_fp8/README.md b/examples/sparse_2of4_quantization_fp8/README.md
@@ -63,21 +63,13 @@ recipe = [
 ]
 
 if fp8_enabled:
-    recipe.extend([
+    recipe.append(
         QuantizationModifier(
             targets=["Linear"],
             ignore=["lm_head"],
             scheme="FP8_DYNAMIC",
         ),
-        ConstantPruningModifier(
-            targets=[
-                r"re:.*q_proj.weight", r"re:.*k_proj.weight", r"re:.*v_proj.weight",
-                r"re:.*o_proj.weight", r"re:.*gate_proj.weight", r"re:.*up_proj.weight",
-                r"re:.*down_proj.weight",
-            ],
-            start=0,
-        ),
-    ])
+    )
 ```
 
 2. **Apply Compression**
diff --git a/tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4_fp8_dynamic.yaml b/tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4_fp8_dynamic.yaml
@@ -9,17 +9,6 @@ sparsity_stage:
 quantization_stage:
   run_type: oneshot
   quantization_modifiers:
-    ConstantPruningModifier:
-      targets: [
-        're:.*q_proj.weight',
-        're:.*k_proj.weight', 
-        're:.*v_proj.weight',
-        're:.*o_proj.weight',
-        're:.*gate_proj.weight',
-        're:.*up_proj.weight',
-        're:.*down_proj.weight',
-      ]
-      start: 0
     QuantizationModifier:
       targets: ["Linear"]
       ignore: ["lm_head"]