Skip to content

Commit cbe8d4e

Browse files
committed
add the flags
1 parent 6ae9ec5 commit cbe8d4e

1 file changed

Lines changed: 6 additions & 0 deletions

File tree

src/kvboost/server/__main__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ def parse_args():
8181
"hqq-4bit / hqq-2bit: HQQ, no calibration, lower load-time memory than bnb. "
8282
"Pre-quantized AWQ/GPTQ checkpoints are detected automatically — "
8383
"leave this 'none' and just point --model at e.g. Qwen/Qwen3-8B-AWQ.")
84+
p.add_argument("--use-slow-tokenizer", action="store_true",
85+
help="Force the SentencePiece-based slow tokenizer. "
86+
"Workaround for fast-tokenizer builds whose byte-level "
87+
"decoder is missing/broken (symptom: decoded text drops "
88+
"spaces/newlines or shows literal 'Ġ' and 'Ċ'). "
89+
"Seen on some Llama-3 / DeepSeek-R1-Distill checkpoints.")
8490
p.add_argument("--max-memory", default=None,
8591
help="Per-device memory cap for CPU offload, JSON dict. "
8692
'Example: \'{"0": "7GiB", "cpu": "32GiB"}\'. When set, uses '

0 commit comments

Comments
 (0)