Skip to content

Commit 4b5fe76

Browse files
committed
update example, do not start on initialize
Signed-off-by: Kyle Sayers <[email protected]>
1 parent 03117ae commit 4b5fe76

File tree

2 files changed

+14
-27
lines changed

2 files changed

+14
-27
lines changed

examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py

+14-21
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from llmcompressor import oneshot
77
from llmcompressor.modifiers.obcq import SparseGPTModifier
8-
from llmcompressor.modifiers.pruning import ConstantPruningModifier
98
from llmcompressor.modifiers.quantization import QuantizationModifier
109

1110
# Configuration
@@ -52,29 +51,23 @@ def get_recipe(fp8_enabled):
5251
save_dir = MODEL_ID.split("/")[1] + "2of4-sparse"
5352

5453
if fp8_enabled:
55-
base_recipe.extend(
56-
[
57-
QuantizationModifier(
58-
targets=["Linear"],
59-
ignore=["lm_head"],
60-
scheme="FP8_DYNAMIC",
61-
),
62-
ConstantPruningModifier(
63-
targets=[
64-
r"re:.*q_proj.weight",
65-
r"re:.*k_proj.weight",
66-
r"re:.*v_proj.weight",
67-
r"re:.*o_proj.weight",
68-
r"re:.*gate_proj.weight",
69-
r"re:.*up_proj.weight",
70-
r"re:.*down_proj.weight",
71-
],
72-
start=0,
73-
),
74-
]
54+
base_recipe.append(
55+
QuantizationModifier(
56+
targets=["Linear"],
57+
ignore=["lm_head"],
58+
scheme="FP8_DYNAMIC",
59+
)
7560
)
7661
save_dir = MODEL_ID.split("/")[1] + "2of4-W8A8-FP8-Dynamic-Per-Token"
7762

63+
# check that asymmetric quantization is not being used
64+
q_scheme = base_recipe[1].scheme
65+
if not isinstance(q_scheme, str) and not q_scheme["weights"].symmetric:
66+
raise ValueError(
67+
"Asymmetric quantization with 2of4 sparsity is not supported by vLLM. "
68+
"Please use symmetric quantization"
69+
)
70+
7871
return base_recipe, save_dir
7972

8073

src/llmcompressor/modifiers/modifier.py

-6
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,6 @@ def initialize(self, state: State, **kwargs):
8989

9090
self.initialized_ = self.on_initialize(state=state, **kwargs)
9191

92-
# trigger start
93-
fake_start_event = Event(type_=EventType.BATCH_START, global_step=0)
94-
if self.should_start(fake_start_event):
95-
self.on_start(state, fake_start_event, **kwargs)
96-
self.started_ = True
97-
9892
def finalize(self, state: State, **kwargs):
9993
"""
10094
Finalize the modifier for the given model and state.

0 commit comments

Comments
 (0)