Skip to content

Commit 7d1df8d

Browse files
committed
chore: update svdq dq configs
1 parent 95b0800 commit 7d1df8d

2 files changed

Lines changed: 5 additions & 0 deletions

File tree

examples/configs/quantize_svdq.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ quantize_config:
22
quant_type: "svdq_int4_r128_dq"
33
svdq_kwargs:
44
quantize_device: "cuda"
5+
runtime_kernel: "v2"
6+
fused_mlp: true
57
exclude_layers:
68
- "embedder"
79
- "embed"

examples/configs/quantize_svdq_few_shot.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ quantize_config:
44
smooth_strategy: "few_shot"
55
few_shot_steps: 2
66
few_shot_auto_compile: true
7+
few_shot_relax_strategy: "fixed"
78
# Device used for SVD decomposition and W4A4 packing math.
89
# - "cuda": force CUDA-side SVD + packing, even when float weights are on CPU.
910
# - "cpu": force CPU-side SVD + packing (slow, for low-memory GPUs).
@@ -19,6 +20,8 @@ quantize_config:
1920
# scenarios, set `few_shot_auto_compile: false`, then compile manually after
2021
# moving the pipeline to CUDA.
2122
quantize_device: "cuda"
23+
runtime_kernel: "v2"
24+
fused_mlp: true
2225
exclude_layers:
2326
- "embedder"
2427
- "embed"

0 commit comments

Comments
 (0)