Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions packages/gguf/src/gguf.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
ggufAllShards,
parseGgufShardFilename,
parseGGUFQuantLabel,
parseGGUFFileVariant,
GGUF_QUANT_ORDER,
findNearestQuantType,
serializeGgufMetadata,
Expand Down Expand Up @@ -343,6 +344,47 @@ describe("gguf", () => {
expect(parseGGUFQuantLabel("Qwen3-4B-UD-Q2_K_XL.gguf")).toEqual("UD-Q2_K_XL"); // unsloth UD (Unsloth Dynamic) prefix
});

it("parse file variant", async () => {
// Plain model files → empty array
expect(parseGGUFFileVariant("Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual([]);
expect(parseGGUFFileVariant("Codestral-22B-v0.1.gguf")).toEqual([]);

// Spec <Type> slot values
expect(parseGGUFFileVariant("Model-Q4_K_M-LoRA.gguf")).toEqual(["LoRA"]);
expect(parseGGUFFileVariant("Model-Q4_K_M-vocab.gguf")).toEqual(["vocab"]);
expect(parseGGUFFileVariant("Qwen3.6-27B-MTP-Q8_0.gguf")).toEqual(["MTP"]); // MTP before encoding (am17an/RDson style)

// imatrix community marker
expect(parseGGUFFileVariant("Model-IQ2_XXS-imatrix.gguf")).toEqual(["imatrix"]);
expect(parseGGUFFileVariant("Model-imatrix-Q4_K_M.gguf")).toEqual(["imatrix"]); // prefix position
Comment on lines +358 to +359

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you sure we want to support both orders? the spec defines one canonical order no?


// Multiple distinct tokens preserved in first-occurrence order
expect(parseGGUFFileVariant("Model-MTP-imatrix-Q4_K_M.gguf")).toEqual(["MTP", "imatrix"]);
expect(parseGGUFFileVariant("Model-imatrix-MTP-Q4_K_M.gguf")).toEqual(["imatrix", "MTP"]);
expect(parseGGUFFileVariant("Model-LoRA-MTP-Q4_K_M.gguf")).toEqual(["LoRA", "MTP"]);

// Exact duplicates deduped
expect(parseGGUFFileVariant("Model-imatrix-Q4_K_M-imatrix.gguf")).toEqual(["imatrix"]);
expect(parseGGUFFileVariant("Model-MTP-MTP-Q4_K_M.gguf")).toEqual(["MTP"]);

// Case-insensitive match, canonical case returned
expect(parseGGUFFileVariant("Model-q4_k_m-lora.gguf")).toEqual(["LoRA"]);
expect(parseGGUFFileVariant("Model-LORA-mtp.gguf")).toEqual(["LoRA", "MTP"]);

// `.` separator works (mradermacher-style stems)
expect(parseGGUFFileVariant("Model.imatrix.Q4_K_M.gguf")).toEqual(["imatrix"]);

// `i1-` prefix is NOT a recognized marker on its own
expect(parseGGUFFileVariant("DeepSeek-V3.i1-Q4_K_M.gguf")).toEqual([]);

// Path prefix is stripped before parsing
expect(parseGGUFFileVariant("subdir/Model-Q4_K_M-LoRA.gguf")).toEqual(["LoRA"]);

// Substrings inside other tokens must NOT match (delimited only)
expect(parseGGUFFileVariant("Llama-imatrixed-Q4_K_M.gguf")).toEqual([]);
expect(parseGGUFFileVariant("MTPiston-Q4_K_M.gguf")).toEqual([]);
});

it("calculate tensor data offset", async () => {
const { tensorDataOffset } = await gguf(URL_LLAMA);
expect(tensorDataOffset).toEqual(741056n);
Expand Down
2 changes: 2 additions & 0 deletions packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ export { GGUFValueType, GGMLQuantizationType, Architecture } from "./types";
export { GGUF_QUANT_DESCRIPTIONS } from "./quant-descriptions";
export {
parseGGUFQuantLabel,
parseGGUFFileVariant,
GGUF_QUANT_RE,
GGUF_QUANT_RE_GLOBAL,
GGUF_QUANT_ORDER,
findNearestQuantType,
GGMLFileQuantizationType,
} from "@huggingface/tasks";
export type { GGUFFileVariant } from "@huggingface/tasks";

export const RE_GGUF_FILE = /\.gguf$/;
export const RE_GGUF_SHARD_FILE = /^(?<prefix>.*?)-(?<shard>\d{5})-of-(?<total>\d{5})\.gguf$/;
Expand Down
39 changes: 39 additions & 0 deletions packages/tasks/src/gguf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,42 @@ export enum GGMLQuantizationType {
NVFP4 = 40,
Q1_0 = 41,
}

/**
* Filename-level GGUF variants. `LoRA` / `vocab` / `MTP` come from the GGUF
* naming spec's `<Type>` slot (see `ggml-org/ggml#1488` for MTP). `imatrix`
* is a community marker that the file was quantized with an importance
* matrix; it is not part of the spec `<Type>` slot but appears widely in
* filenames and is reported here for convenience.
*/
export type GGUFFileVariant = "LoRA" | "vocab" | "MTP" | "imatrix";

const GGUF_FILE_VARIANTS: readonly GGUFFileVariant[] = ["LoRA", "vocab", "MTP", "imatrix"];

// Match any variant token as a delimited token (`^`, `-`, or `.` on each side),
// case-insensitive. Capture group is the matched token in whatever case appeared.
const GGUF_FILE_VARIANT_RE = new RegExp(`(?<=^|[-.])(?<v>${GGUF_FILE_VARIANTS.join("|")})(?=$|[-.])`, "gi");

/**
* Parse the variant tokens out of a GGUF filename, returning them in
* first-occurrence order with exact duplicates removed. Returns `[]` for
* plain model files.
*/
export function parseGGUFFileVariant(fname: string): GGUFFileVariant[] {
const base = fname.split("/").pop() ?? fname;
const stem = base.replace(/\.gguf$/i, "");
const canonical = new Map(GGUF_FILE_VARIANTS.map((v) => [v.toLowerCase(), v] as const));
// Re-create the regex per call so we don't share lastIndex across callers.
const re = new RegExp(GGUF_FILE_VARIANT_RE.source, "gi");
const out: GGUFFileVariant[] = [];
const seen = new Set<GGUFFileVariant>();
for (const m of stem.matchAll(re)) {
const token = m.groups?.v;
if (!token) continue;
const variant = canonical.get(token.toLowerCase());
if (!variant || seen.has(variant)) continue;
seen.add(variant);
out.push(variant);
}
return out;
}
Loading