diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 3093227f7c..85ada6cc6f 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -8,6 +8,7 @@ import { ggufAllShards, parseGgufShardFilename, parseGGUFQuantLabel, + parseGGUFFileVariant, GGUF_QUANT_ORDER, findNearestQuantType, serializeGgufMetadata, @@ -343,6 +344,47 @@ describe("gguf", () => { expect(parseGGUFQuantLabel("Qwen3-4B-UD-Q2_K_XL.gguf")).toEqual("UD-Q2_K_XL"); // unsloth UD (Unsloth Dynamic) prefix }); + it("parse file variant", async () => { + // Plain model files → empty array + expect(parseGGUFFileVariant("Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual([]); + expect(parseGGUFFileVariant("Codestral-22B-v0.1.gguf")).toEqual([]); + + // Spec slot values + expect(parseGGUFFileVariant("Model-Q4_K_M-LoRA.gguf")).toEqual(["LoRA"]); + expect(parseGGUFFileVariant("Model-Q4_K_M-vocab.gguf")).toEqual(["vocab"]); + expect(parseGGUFFileVariant("Qwen3.6-27B-MTP-Q8_0.gguf")).toEqual(["MTP"]); // MTP before encoding (am17an/RDson style) + + // imatrix community marker + expect(parseGGUFFileVariant("Model-IQ2_XXS-imatrix.gguf")).toEqual(["imatrix"]); + expect(parseGGUFFileVariant("Model-imatrix-Q4_K_M.gguf")).toEqual(["imatrix"]); // prefix position + + // Multiple distinct tokens preserved in first-occurrence order + expect(parseGGUFFileVariant("Model-MTP-imatrix-Q4_K_M.gguf")).toEqual(["MTP", "imatrix"]); + expect(parseGGUFFileVariant("Model-imatrix-MTP-Q4_K_M.gguf")).toEqual(["imatrix", "MTP"]); + expect(parseGGUFFileVariant("Model-LoRA-MTP-Q4_K_M.gguf")).toEqual(["LoRA", "MTP"]); + + // Exact duplicates deduped + expect(parseGGUFFileVariant("Model-imatrix-Q4_K_M-imatrix.gguf")).toEqual(["imatrix"]); + expect(parseGGUFFileVariant("Model-MTP-MTP-Q4_K_M.gguf")).toEqual(["MTP"]); + + // Case-insensitive match, canonical case returned + expect(parseGGUFFileVariant("Model-q4_k_m-lora.gguf")).toEqual(["LoRA"]); + expect(parseGGUFFileVariant("Model-LORA-mtp.gguf")).toEqual(["LoRA", "MTP"]); + + // `.` separator works (mradermacher-style stems) + expect(parseGGUFFileVariant("Model.imatrix.Q4_K_M.gguf")).toEqual(["imatrix"]); + + // `i1-` prefix is NOT a recognized marker on its own + expect(parseGGUFFileVariant("DeepSeek-V3.i1-Q4_K_M.gguf")).toEqual([]); + + // Path prefix is stripped before parsing + expect(parseGGUFFileVariant("subdir/Model-Q4_K_M-LoRA.gguf")).toEqual(["LoRA"]); + + // Substrings inside other tokens must NOT match (delimited only) + expect(parseGGUFFileVariant("Llama-imatrixed-Q4_K_M.gguf")).toEqual([]); + expect(parseGGUFFileVariant("MTPiston-Q4_K_M.gguf")).toEqual([]); + }); + it("calculate tensor data offset", async () => { const { tensorDataOffset } = await gguf(URL_LLAMA); expect(tensorDataOffset).toEqual(741056n); diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index b91658e4d0..409a5c0e78 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -17,12 +17,14 @@ export { GGUFValueType, GGMLQuantizationType, Architecture } from "./types"; export { GGUF_QUANT_DESCRIPTIONS } from "./quant-descriptions"; export { parseGGUFQuantLabel, + parseGGUFFileVariant, GGUF_QUANT_RE, GGUF_QUANT_RE_GLOBAL, GGUF_QUANT_ORDER, findNearestQuantType, GGMLFileQuantizationType, } from "@huggingface/tasks"; +export type { GGUFFileVariant } from "@huggingface/tasks"; export const RE_GGUF_FILE = /\.gguf$/; export const RE_GGUF_SHARD_FILE = /^(?.*?)-(?\d{5})-of-(?\d{5})\.gguf$/; diff --git a/packages/tasks/src/gguf.ts b/packages/tasks/src/gguf.ts index bfbb3f7fb5..98570dd3c8 100644 --- a/packages/tasks/src/gguf.ts +++ b/packages/tasks/src/gguf.ts @@ -209,3 +209,42 @@ export enum GGMLQuantizationType { NVFP4 = 40, Q1_0 = 41, } + +/** + * Filename-level GGUF variants. `LoRA` / `vocab` / `MTP` come from the GGUF + * naming spec's `` slot (see `ggml-org/ggml#1488` for MTP). `imatrix` + * is a community marker that the file was quantized with an importance + * matrix; it is not part of the spec `` slot but appears widely in + * filenames and is reported here for convenience. + */ +export type GGUFFileVariant = "LoRA" | "vocab" | "MTP" | "imatrix"; + +const GGUF_FILE_VARIANTS: readonly GGUFFileVariant[] = ["LoRA", "vocab", "MTP", "imatrix"]; + +// Match any variant token as a delimited token (`^`, `-`, or `.` on each side), +// case-insensitive. Capture group is the matched token in whatever case appeared. +const GGUF_FILE_VARIANT_RE = new RegExp(`(?<=^|[-.])(?${GGUF_FILE_VARIANTS.join("|")})(?=$|[-.])`, "gi"); + +/** + * Parse the variant tokens out of a GGUF filename, returning them in + * first-occurrence order with exact duplicates removed. Returns `[]` for + * plain model files. + */ +export function parseGGUFFileVariant(fname: string): GGUFFileVariant[] { + const base = fname.split("/").pop() ?? fname; + const stem = base.replace(/\.gguf$/i, ""); + const canonical = new Map(GGUF_FILE_VARIANTS.map((v) => [v.toLowerCase(), v] as const)); + // Re-create the regex per call so we don't share lastIndex across callers. + const re = new RegExp(GGUF_FILE_VARIANT_RE.source, "gi"); + const out: GGUFFileVariant[] = []; + const seen = new Set(); + for (const m of stem.matchAll(re)) { + const token = m.groups?.v; + if (!token) continue; + const variant = canonical.get(token.toLowerCase()); + if (!variant || seen.has(variant)) continue; + seen.add(variant); + out.push(variant); + } + return out; +}