|
2 | 2 | # |
3 | 3 | # Build the YuhoLens-14B GGUF release set from a HuggingFace checkpoint. |
4 | 4 | # |
5 | | -# Required tools (operator must install before running): |
6 | | -# - python (matching the llama.cpp checkout's environment) |
7 | | -# - llama.cpp cloned and built with the quantize binary: |
8 | | -# git clone https://github.com/ggerganov/llama.cpp ../llama.cpp |
9 | | -# cd ../llama.cpp && cmake -B build && cmake --build build --target llama-quantize |
10 | | -# pip install -r ../llama.cpp/requirements.txt |
11 | | -# - At least 80 GB free on the target disk (f16 + four quants for a 14B model). |
| 5 | +# Two-directory layout: |
| 6 | +# - LLAMACPP_REPO: the cloned llama.cpp repo (provides convert_hf_to_gguf.py). |
| 7 | +# - LLAMACPP_BIN: the directory holding the llama-quantize binary. May be |
| 8 | +# the repo's build/bin (when llama.cpp is built from source) |
| 9 | +# OR a flat directory of prebuilt Windows binaries (which |
| 10 | +# is what ggml-org publishes on the GitHub releases page). |
| 11 | +# |
| 12 | +# When LLAMACPP_BIN is unset it auto-derives to "$LLAMACPP_REPO/build/bin". |
| 13 | +# Both the bare ("llama-quantize") and the .exe ("llama-quantize.exe") name |
| 14 | +# are probed, so the same script works on Linux source builds and on |
| 15 | +# Windows prebuilt-binary checkouts. |
| 16 | +# |
| 17 | +# Required tools the operator must install BEFORE running: |
| 18 | +# - python with `gguf` and `safetensors` packages (pip install gguf |
| 19 | +# safetensors). |
| 20 | +# - llama.cpp cloned somewhere readable (default ../llama.cpp). |
| 21 | +# - A llama-quantize binary, either built from source or unzipped from |
| 22 | +# the official prebuilt Windows release. |
| 23 | +# - At least 80 GB free disk for a 14B model (f16 intermediate + 5 quants). |
12 | 24 | # |
13 | 25 | # Usage: |
14 | 26 | # scripts/build_gguf.sh <checkpoint_dir> [output_dir] |
15 | 27 | # |
16 | 28 | # Defaults: |
17 | | -# - LLAMACPP env var overrides the llama.cpp path (default: ../llama.cpp). |
18 | 29 | # - output_dir defaults to <checkpoint_dir> when omitted. |
| 30 | +# - LLAMACPP_REPO defaults to ../llama.cpp. |
| 31 | +# - LLAMACPP_BIN defaults to $LLAMACPP_REPO/build/bin. |
19 | 32 | # |
20 | 33 | # This script does not run automatically. Operator runs it after the HF |
21 | 34 | # checkpoint is downloaded locally; the resulting GGUFs are uploaded to |
|
38 | 51 |
|
39 | 52 | mkdir -p "$OUT_DIR" |
40 | 53 |
|
41 | | -LLAMACPP="${LLAMACPP:-../llama.cpp}" |
42 | | -CONVERT_SCRIPT="$LLAMACPP/convert_hf_to_gguf.py" |
43 | | -QUANT_BIN="$LLAMACPP/build/bin/llama-quantize" |
| 54 | +# Back-compat: legacy LLAMACPP env var maps to LLAMACPP_REPO. |
| 55 | +LLAMACPP_REPO="${LLAMACPP_REPO:-${LLAMACPP:-../llama.cpp}}" |
| 56 | +LLAMACPP_BIN="${LLAMACPP_BIN:-$LLAMACPP_REPO/build/bin}" |
| 57 | +CONVERT_SCRIPT="$LLAMACPP_REPO/convert_hf_to_gguf.py" |
| 58 | + |
| 59 | +resolve_quant_bin() { |
| 60 | + for candidate in "$LLAMACPP_BIN/llama-quantize" "$LLAMACPP_BIN/llama-quantize.exe"; do |
| 61 | + if [[ -f "$candidate" ]]; then |
| 62 | + echo "$candidate" |
| 63 | + return 0 |
| 64 | + fi |
| 65 | + done |
| 66 | + return 1 |
| 67 | +} |
44 | 68 |
|
45 | 69 | if [[ ! -f "$CONVERT_SCRIPT" ]]; then |
46 | 70 | echo "error: convert script not found at $CONVERT_SCRIPT" >&2 |
47 | | - echo " set LLAMACPP=/path/to/llama.cpp or clone llama.cpp at ../llama.cpp" >&2 |
| 71 | + echo " set LLAMACPP_REPO=/path/to/llama.cpp clone" >&2 |
48 | 72 | exit 66 |
49 | 73 | fi |
50 | 74 |
|
51 | | -if [[ ! -x "$QUANT_BIN" ]]; then |
52 | | - echo "error: llama-quantize binary not found at $QUANT_BIN" >&2 |
53 | | - echo " build llama.cpp first: cmake -B build && cmake --build build --target llama-quantize" >&2 |
| 75 | +if ! QUANT_BIN="$(resolve_quant_bin)"; then |
| 76 | + echo "error: llama-quantize binary not found in $LLAMACPP_BIN" >&2 |
| 77 | + echo " set LLAMACPP_BIN to the directory containing llama-quantize(.exe)" >&2 |
| 78 | + echo " (build/bin/ for a source build, or the unzip dir for prebuilts)" >&2 |
54 | 79 | exit 66 |
55 | 80 | fi |
56 | 81 |
|
57 | 82 | OUT_F16="${OUT_DIR%/}/yuholens-14b-f16.gguf" |
58 | 83 |
|
| 84 | +echo "[gguf] using convert script: $CONVERT_SCRIPT" |
| 85 | +echo "[gguf] using quantize bin: $QUANT_BIN" |
59 | 86 | echo "[gguf] converting $CKPT -> $OUT_F16" |
60 | 87 | python "$CONVERT_SCRIPT" \ |
61 | 88 | --outfile "$OUT_F16" \ |
62 | 89 | --outtype f16 \ |
63 | 90 | "$CKPT" |
64 | 91 |
|
65 | | -QUANTS=("Q4_K_M" "Q5_K_M" "Q6_K" "Q8_0") |
| 92 | +# Q3_K_M is the 8 GB consumer headline quant; everything from Q4_K_M up |
| 93 | +# wants 10 GB+ VRAM or partial CPU offload at runtime. |
| 94 | +QUANTS=("Q3_K_M" "Q4_K_M" "Q5_K_M" "Q6_K" "Q8_0") |
66 | 95 | for quant in "${QUANTS[@]}"; do |
67 | 96 | out="${OUT_DIR%/}/yuholens-14b-${quant}.gguf" |
68 | 97 | echo "[gguf] quantising $quant -> $out" |
|
0 commit comments