We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 0d5b267 commit 0487c85Copy full SHA for 0487c85
1 file changed
examples/configs/blackwell/parallel_svdq.yaml
@@ -0,0 +1,13 @@
1
+parallelism_config:
2
+ ulysses_size: auto
3
+ attention_backend: native
4
+quantize_config:
5
+ quant_type: "svdq_nvfp4_r128_dq"
6
+ svdq_kwargs:
7
+ quantize_device: "cuda"
8
+ runtime_kernel: "v2"
9
+ fused_mlp: true
10
+ exclude_layers:
11
+ - "embedder"
12
+ - "embed"
13
+ verbose: false
0 commit comments