File tree Expand file tree Collapse file tree 1 file changed +0
-42
lines changed
src/llmcompressor/modeling Expand file tree Collapse file tree 1 file changed +0
-42
lines changed Original file line number Diff line number Diff line change @@ -172,45 +172,3 @@ def forward(self, hidden_states):
172172 router_scores = router_scores .view (B * T , - 1 ) # shape doesn't matter much; it’s ignored by the decoder
173173 return out , router_scores
174174
175-
176- model_id = "unsloth/gpt-oss-120b-BF16"
177-
178- model = AutoModelForCausalLM .from_pretrained (
179- model_id ,
180- torch_dtype = torch .bfloat16 ,
181- device_map = "auto" ,
182- trust_remote_code = True ,
183- )
184- tokenizer = AutoTokenizer .from_pretrained (model_id , trust_remote_code = True )
185-
186- convert_model_for_quantization_gptoss (model )
187-
188- # -----------------------------
189- # Quantization recipe
190- # -----------------------------
191- recipe = QuantizationModifier (
192- targets = "Linear" ,
193- scheme = "FP8_DYNAMIC" ,
194- ignore = [
195- "re:.*lm_head" ,
196- "re:.*self_attn" ,
197- "re:.*attn" ,
198- "re:.*attention.*" ,
199- "re:.*router" ,
200- ],
201- )
202-
203- SAVE_DIR = f"{ model_id .split ('/' )[- 1 ]} -FP8-Dynamic"
204-
205- # Oneshot quantization
206- oneshot (
207- model = model ,
208- tokenizer = tokenizer ,
209- recipe = recipe ,
210- trust_remote_code_model = True ,
211- output_dir = SAVE_DIR ,
212- )
213-
214- # Save compressed
215- model .save_pretrained (SAVE_DIR , save_compressed = True )
216- tokenizer .save_pretrained (SAVE_DIR )
You can’t perform that action at this time.
0 commit comments