From 1a39abc44ff3ddff0ddec15600399418d476559c Mon Sep 17 00:00:00 2001 From: mgoin Date: Tue, 10 Feb 2026 17:38:33 +0000 Subject: [PATCH] Add model_free_ptq example for glm 4.6 block fp8 Signed-off-by: mgoin --- examples/model_free_ptq/glm_4.6_fp8_block.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 examples/model_free_ptq/glm_4.6_fp8_block.py diff --git a/examples/model_free_ptq/glm_4.6_fp8_block.py b/examples/model_free_ptq/glm_4.6_fp8_block.py new file mode 100644 index 0000000000..e9091eebfc --- /dev/null +++ b/examples/model_free_ptq/glm_4.6_fp8_block.py @@ -0,0 +1,20 @@ +from llmcompressor import model_free_ptq + +MODEL_ID = "zai-org/GLM-4.6" +SAVE_DIR = MODEL_ID.rstrip("/").split("/")[-1] + "-FP8-BLOCK" + +# Apply FP8-Block to the model +# Once quantized, the model is saved +# using compressed-tensors to the SAVE_DIR. +model_free_ptq( + model_stub=MODEL_ID, + save_directory=SAVE_DIR, + scheme="FP8_BLOCK", + ignore=[ + "re:.*gate$", + "lm_head", + "model.embed_tokens", + ], + max_workers=15, + device="cuda:0", +)