We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ad98b01 commit 9aa8d24Copy full SHA for 9aa8d24
scripts/models/qwen3-4B-Instruct-2507.sh
@@ -0,0 +1 @@
1
+MODEL_ARGS_ROTARY_BASE=5000000 source "$(dirname "$0")/qwen3-4B.sh"
scripts/models/qwen3-4B.sh
@@ -10,7 +10,7 @@ MODEL_ARGS=(
10
--disable-bias-linear
11
--normalization "RMSNorm"
12
--norm-epsilon 1e-6
13
- --rotary-base 1000000
+ --rotary-base "${MODEL_ARGS_ROTARY_BASE:-1000000}"
14
--vocab-size 151936
15
--kv-channels 128
16
--qk-layernorm
0 commit comments