Skip to content

Commit 91089fe

Browse files
committed
Remove gpt_oss test code and add in examples
Signed-off-by: Sharif Inamdar <[email protected]>
1 parent b6200cd commit 91089fe

File tree

1 file changed

+0
-42
lines changed

1 file changed

+0
-42
lines changed

src/llmcompressor/modeling/gpt_oss.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -172,45 +172,3 @@ def forward(self, hidden_states):
172172
router_scores = router_scores.view(B * T, -1) # shape doesn't matter much; it’s ignored by the decoder
173173
return out, router_scores
174174

175-
176-
model_id = "unsloth/gpt-oss-120b-BF16"
177-
178-
model = AutoModelForCausalLM.from_pretrained(
179-
model_id,
180-
torch_dtype=torch.bfloat16,
181-
device_map="auto",
182-
trust_remote_code=True,
183-
)
184-
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
185-
186-
convert_model_for_quantization_gptoss(model)
187-
188-
# -----------------------------
189-
# Quantization recipe
190-
# -----------------------------
191-
recipe = QuantizationModifier(
192-
targets="Linear",
193-
scheme="FP8_DYNAMIC",
194-
ignore=[
195-
"re:.*lm_head",
196-
"re:.*self_attn",
197-
"re:.*attn",
198-
"re:.*attention.*",
199-
"re:.*router",
200-
],
201-
)
202-
203-
SAVE_DIR = f"{model_id.split('/')[-1]}-FP8-Dynamic"
204-
205-
# Oneshot quantization
206-
oneshot(
207-
model=model,
208-
tokenizer=tokenizer,
209-
recipe=recipe,
210-
trust_remote_code_model=True,
211-
output_dir=SAVE_DIR,
212-
)
213-
214-
# Save compressed
215-
model.save_pretrained(SAVE_DIR, save_compressed=True)
216-
tokenizer.save_pretrained(SAVE_DIR)

0 commit comments

Comments
 (0)