huggingface · mishig25 · May 13, 2026 · julien-c · May 13, 2026
@@ -8,6 +8,7 @@ import {
 	ggufAllShards,
 	parseGgufShardFilename,
 	parseGGUFQuantLabel,
+	parseGGUFFileVariant,
 	GGUF_QUANT_ORDER,
 	findNearestQuantType,
 	serializeGgufMetadata,
@@ -343,6 +344,47 @@ describe("gguf", () => {
 		expect(parseGGUFQuantLabel("Qwen3-4B-UD-Q2_K_XL.gguf")).toEqual("UD-Q2_K_XL"); // unsloth UD (Unsloth Dynamic) prefix
 	});
 
+	it("parse file variant", async () => {
+		// Plain model files → empty array
+		expect(parseGGUFFileVariant("Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual([]);
+		expect(parseGGUFFileVariant("Codestral-22B-v0.1.gguf")).toEqual([]);
+
+		// Spec <Type> slot values
+		expect(parseGGUFFileVariant("Model-Q4_K_M-LoRA.gguf")).toEqual(["LoRA"]);
+		expect(parseGGUFFileVariant("Model-Q4_K_M-vocab.gguf")).toEqual(["vocab"]);
+		expect(parseGGUFFileVariant("Qwen3.6-27B-MTP-Q8_0.gguf")).toEqual(["MTP"]); // MTP before encoding (am17an/RDson style)
+
+		// imatrix community marker
+		expect(parseGGUFFileVariant("Model-IQ2_XXS-imatrix.gguf")).toEqual(["imatrix"]);
+		expect(parseGGUFFileVariant("Model-imatrix-Q4_K_M.gguf")).toEqual(["imatrix"]); // prefix position
+
+		// Multiple distinct tokens preserved in first-occurrence order
+		expect(parseGGUFFileVariant("Model-MTP-imatrix-Q4_K_M.gguf")).toEqual(["MTP", "imatrix"]);
+		expect(parseGGUFFileVariant("Model-imatrix-MTP-Q4_K_M.gguf")).toEqual(["imatrix", "MTP"]);
+		expect(parseGGUFFileVariant("Model-LoRA-MTP-Q4_K_M.gguf")).toEqual(["LoRA", "MTP"]);
+
+		// Exact duplicates deduped
+		expect(parseGGUFFileVariant("Model-imatrix-Q4_K_M-imatrix.gguf")).toEqual(["imatrix"]);
+		expect(parseGGUFFileVariant("Model-MTP-MTP-Q4_K_M.gguf")).toEqual(["MTP"]);
+
+		// Case-insensitive match, canonical case returned
+		expect(parseGGUFFileVariant("Model-q4_k_m-lora.gguf")).toEqual(["LoRA"]);
+		expect(parseGGUFFileVariant("Model-LORA-mtp.gguf")).toEqual(["LoRA", "MTP"]);
+
+		// `.` separator works (mradermacher-style stems)
+		expect(parseGGUFFileVariant("Model.imatrix.Q4_K_M.gguf")).toEqual(["imatrix"]);
+
+		// `i1-` prefix is NOT a recognized marker on its own
+		expect(parseGGUFFileVariant("DeepSeek-V3.i1-Q4_K_M.gguf")).toEqual([]);
+
+		// Path prefix is stripped before parsing
+		expect(parseGGUFFileVariant("subdir/Model-Q4_K_M-LoRA.gguf")).toEqual(["LoRA"]);
+
+		// Substrings inside other tokens must NOT match (delimited only)
+		expect(parseGGUFFileVariant("Llama-imatrixed-Q4_K_M.gguf")).toEqual([]);
+		expect(parseGGUFFileVariant("MTPiston-Q4_K_M.gguf")).toEqual([]);
+	});
+
 	it("calculate tensor data offset", async () => {
 		const { tensorDataOffset } = await gguf(URL_LLAMA);
 		expect(tensorDataOffset).toEqual(741056n);

@@ -17,12 +17,14 @@ export { GGUFValueType, GGMLQuantizationType, Architecture } from "./types";
 export { GGUF_QUANT_DESCRIPTIONS } from "./quant-descriptions";
 export {
 	parseGGUFQuantLabel,
+	parseGGUFFileVariant,
 	GGUF_QUANT_RE,
 	GGUF_QUANT_RE_GLOBAL,
 	GGUF_QUANT_ORDER,
 	findNearestQuantType,
 	GGMLFileQuantizationType,
 } from "@huggingface/tasks";
+export type { GGUFFileVariant } from "@huggingface/tasks";
 
 export const RE_GGUF_FILE = /\.gguf$/;
 export const RE_GGUF_SHARD_FILE = /^(?<prefix>.*?)-(?<shard>\d{5})-of-(?<total>\d{5})\.gguf$/;

@@ -209,3 +209,42 @@ export enum GGMLQuantizationType {
 	NVFP4 = 40,
 	Q1_0 = 41,
 }
+
+/**
+ * Filename-level GGUF variants. `LoRA` / `vocab` / `MTP` come from the GGUF
+ * naming spec's `<Type>` slot (see `ggml-org/ggml#1488` for MTP). `imatrix`
+ * is a community marker that the file was quantized with an importance
+ * matrix; it is not part of the spec `<Type>` slot but appears widely in
+ * filenames and is reported here for convenience.
+ */
+export type GGUFFileVariant = "LoRA" | "vocab" | "MTP" | "imatrix";
+
+const GGUF_FILE_VARIANTS: readonly GGUFFileVariant[] = ["LoRA", "vocab", "MTP", "imatrix"];
+
+// Match any variant token as a delimited token (`^`, `-`, or `.` on each side),
+// case-insensitive. Capture group is the matched token in whatever case appeared.
+const GGUF_FILE_VARIANT_RE = new RegExp(`(?<=^|[-.])(?<v>${GGUF_FILE_VARIANTS.join("|")})(?=$|[-.])`, "gi");
+
+/**
+ * Parse the variant tokens out of a GGUF filename, returning them in
+ * first-occurrence order with exact duplicates removed. Returns `[]` for
+ * plain model files.
+ */
+export function parseGGUFFileVariant(fname: string): GGUFFileVariant[] {
+	const base = fname.split("/").pop() ?? fname;
+	const stem = base.replace(/\.gguf$/i, "");
+	const canonical = new Map(GGUF_FILE_VARIANTS.map((v) => [v.toLowerCase(), v] as const));
+	// Re-create the regex per call so we don't share lastIndex across callers.
+	const re = new RegExp(GGUF_FILE_VARIANT_RE.source, "gi");
+	const out: GGUFFileVariant[] = [];
+	const seen = new Set<GGUFFileVariant>();
+	for (const m of stem.matchAll(re)) {
+		const token = m.groups?.v;
+		if (!token) continue;
+		const variant = canonical.get(token.toLowerCase());
+		if (!variant || seen.has(variant)) continue;
+		seen.add(variant);
+		out.push(variant);
+	}
+	return out;
+}