|
17 | 17 | # General thinking params use presence_penalty=1.5 but causes thinking loops on simple questions |
18 | 18 | model = /models/Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf |
19 | 19 | mmproj = /models/mmproj-F16.gguf |
20 | | - alias = qwen3.5, qwen 3.5, general, vision, image, multimodal, coder, code |
| 20 | + alias = qwen3.5, qwen 3.5 |
21 | 21 | ctx-size = 131072 |
22 | 22 | n-gpu-layers = 99 |
23 | 23 | temp = 0.6 |
|
35 | 35 | [nothink - qwen3.5] |
36 | 36 | model = /models/Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf |
37 | 37 | mmproj = /models/mmproj-F16.gguf |
38 | | - alias = nothink |
| 38 | + alias = nothink-qwen |
39 | 39 | ctx-size = 131072 |
40 | 40 | n-gpu-layers = 99 |
41 | 41 | temp = 0.6 |
|
46 | 46 | chat-template-kwargs = {"enable_thinking": false} |
47 | 47 | jinja = 1 |
48 | 48 |
|
| 49 | + # ========================================================== |
| 50 | + # GEMMA 4 26B-A4B [MULTIMODAL] - THINK |
| 51 | + # 26B total / 3.8B active (MoE, 128 experts, 8+1 active) |
| 52 | + # Q4_K_XL (~17GB) + mmproj (~850MB) — ~6GB VRAM headroom |
| 53 | + # 256K native context, vision, thinking mode |
| 54 | + # Google recommended params: temp=1.0, top_p=0.95, top_k=64 |
| 55 | + # ========================================================== |
| 56 | + [gemma4 - gemma4-26b] |
| 57 | + model = /models/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf |
| 58 | + mmproj = /models/mmproj-gemma4-BF16.gguf |
| 59 | + alias = gemma4, gemma 4, gemma4-think, general, vision, image, multimodal, coder, code |
| 60 | + ctx-size = 131072 |
| 61 | + n-gpu-layers = 99 |
| 62 | + temp = 1.0 |
| 63 | + top-p = 0.95 |
| 64 | + top-k = 64 |
| 65 | + min-p = 0.0 |
| 66 | + presence-penalty = 0.0 |
| 67 | + chat-template-kwargs = {"enable_thinking": true} |
| 68 | + jinja = 1 |
| 69 | +
|
| 70 | + # ========================================================== |
| 71 | + # GEMMA 4 NO-THINK - Same model, thinking disabled |
| 72 | + # Same GGUF as think variant — switching is instant (no reload) |
| 73 | + # ========================================================== |
| 74 | + [gemma4-nothink - gemma4-26b-nothink] |
| 75 | + model = /models/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf |
| 76 | + mmproj = /models/mmproj-gemma4-BF16.gguf |
| 77 | + alias = gemma4-nothink, gemma4-fast, nothink |
| 78 | + ctx-size = 131072 |
| 79 | + n-gpu-layers = 99 |
| 80 | + temp = 1.0 |
| 81 | + top-p = 0.95 |
| 82 | + top-k = 64 |
| 83 | + min-p = 0.0 |
| 84 | + presence-penalty = 0.0 |
| 85 | + chat-template-kwargs = {"enable_thinking": false} |
| 86 | + jinja = 1 |
| 87 | +
|
49 | 88 | # ========================================================== |
50 | 89 | # UNCENSORED (THINK) - HauhauCS Aggressive variant |
51 | 90 | # Same Qwen3.5-35B-A3B base, uncensored fine-tune |
|
0 commit comments