Skip to content

Commit 3222eae

Browse files
authored
Merge branch 'main' into hoytak/260305-cas-xorb-change
2 parents 67f4d83 + 7f903df commit 3222eae

File tree

13 files changed

+166
-33
lines changed

13 files changed

+166
-33
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
9898

9999
```html
100100
<script type="module">
101-
import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@4.13.14/+esm';
101+
import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@4.13.15/+esm';
102102
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@2.10.5/+esm";
103103
</script>
104104
```

packages/gguf/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@huggingface/gguf",
3-
"version": "0.3.5",
3+
"version": "0.3.6",
44
"description": "a GGUF parser that works on remotely hosted files",
55
"keywords": [
66
"gguf",

packages/gguf/src/gguf.spec.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ const URL_V1 =
2828
const URL_SHARDED_GROK =
2929
"https://huggingface.co/Arki05/Grok-1-GGUF/resolve/ecafa8d8eca9b8cd75d11a0d08d3a6199dc5a068/grok-1-IQ3_XS-split-00001-of-00009.gguf";
3030
const URL_BIG_METADATA = "https://huggingface.co/ngxson/test_gguf_models/resolve/main/gguf_test_big_metadata.gguf";
31+
const URL_KIMI_K25 =
32+
"https://huggingface.co/unsloth/Kimi-K2.5-GGUF/resolve/5f759b07a564a5cb9fcfa9ad456cf254e539ac77/UD-Q3_K_XL/Kimi-K2.5-UD-Q3_K_XL-00001-of-00011.gguf";
3133

3234
describe("gguf", () => {
3335
beforeAll(async () => {
@@ -255,6 +257,46 @@ describe("gguf", () => {
255257
});
256258
});
257259

260+
it("should parse a large MoE model (Kimi-K2.5, 160K vocab)", async () => {
261+
const { metadata, typedMetadata, tensorInfos } = await gguf(URL_KIMI_K25, { typedMetadata: true });
262+
263+
expect(metadata).toMatchObject({
264+
version: 3,
265+
"general.architecture": "deepseek2",
266+
"general.name": "Kimi-K2.5",
267+
"deepseek2.block_count": 61,
268+
"deepseek2.embedding_length": 7168,
269+
"deepseek2.expert_count": 384,
270+
"deepseek2.expert_used_count": 8,
271+
"deepseek2.expert_shared_count": 1,
272+
"deepseek2.vocab_size": 163840,
273+
});
274+
275+
expect(typedMetadata["general.architecture"]).toEqual({
276+
value: "deepseek2",
277+
type: GGUFValueType.STRING,
278+
});
279+
expect(typedMetadata["deepseek2.expert_count"]).toEqual({
280+
value: 384,
281+
type: GGUFValueType.UINT32,
282+
});
283+
expect(typedMetadata["tokenizer.ggml.tokens"]).toMatchObject({
284+
type: GGUFValueType.ARRAY,
285+
subType: GGUFValueType.STRING,
286+
});
287+
const tokens = typedMetadata["tokenizer.ggml.tokens"].value;
288+
expect(Array.isArray(tokens)).toBe(true);
289+
if (Array.isArray(tokens)) {
290+
expect(tokens.length).toEqual(163_840);
291+
}
292+
293+
expect(tensorInfos.length).toBeGreaterThan(0);
294+
expect(tensorInfos[0]).toMatchObject({
295+
name: "output.weight",
296+
shape: [7168n, 163840n],
297+
});
298+
});
299+
258300
it("should parse a local file", async () => {
259301
const parsedGguf = await gguf(".cache/model.gguf", { allowLocalFile: true });
260302
const { metadata } = parsedGguf as GGUFParseOutput<{ strict: false }>; // custom metadata arch, no need for typing

packages/gguf/src/gguf.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ export {
2727
export const RE_GGUF_FILE = /\.gguf$/;
2828
export const RE_GGUF_SHARD_FILE = /^(?<prefix>.*?)-(?<shard>\d{5})-of-(?<total>\d{5})\.gguf$/;
2929
const GGUF_DEFAULT_ALIGNMENT = 32; // defined in ggml.h
30+
31+
/**
32+
* Safety limits to prevent OOM from crafted GGUF files (CWE-770).
33+
* Values are set well above any known real-world model (e.g. Kimi-K2.5 at 1T params,
34+
* 160K vocab, 384 experts) while still preventing billion-element allocations.
35+
*/
36+
const MAX_METADATA_ARRAY_LENGTH = 1_000_000;
37+
const MAX_KV_COUNT = 100_000;
38+
const MAX_TENSOR_COUNT = 10_000_000;
3039
const GGML_PAD = (x: number, n: number) => (x + n - 1) & ~(n - 1); // defined in ggml.h
3140
const PARALLEL_DOWNLOADS = 20;
3241

@@ -223,6 +232,11 @@ function readMetadataValue(
223232
case GGUFValueType.ARRAY: {
224233
const arrayType = view.getUint32(offset, littleEndian);
225234
const arrayLength = readVersionedSize(view, offset + 4, version, littleEndian);
235+
if (arrayLength.value > MAX_METADATA_ARRAY_LENGTH) {
236+
throw new Error(
237+
`Metadata array length ${arrayLength.value} exceeds maximum allowed (${MAX_METADATA_ARRAY_LENGTH})`,
238+
);
239+
}
226240
let length = 4 + arrayLength.length;
227241
const arrayValues: MetadataValue[] = [];
228242
for (let i = 0; i < arrayLength.value; i++) {
@@ -340,8 +354,14 @@ export async function gguf(
340354
// initial offset after header
341355
let offset = 8;
342356
const tensorCount = readVersionedSize(r.view, offset, version, littleEndian);
357+
if (tensorCount.value > MAX_TENSOR_COUNT) {
358+
throw new Error(`Tensor count ${tensorCount.value} exceeds maximum allowed (${MAX_TENSOR_COUNT})`);
359+
}
343360
offset += tensorCount.length;
344361
const numKv = readVersionedSize(r.view, offset, version, littleEndian);
362+
if (numKv.value > MAX_KV_COUNT) {
363+
throw new Error(`KV metadata count ${numKv.value} exceeds maximum allowed (${MAX_KV_COUNT})`);
364+
}
345365
offset += numKv.length;
346366
const metadata: GGUFMetadata<{ strict: false }> = {
347367
version,

packages/inference/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@huggingface/inference",
3-
"version": "4.13.14",
3+
"version": "4.13.15",
44
"description": "Typescript client for the Hugging Face Inference Providers and Inference Endpoints",
55
"keywords": [
66
"ai",

packages/inference/src/lib/getProviderHelper.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
198198
"zai-org": {
199199
conversational: new Zai.ZaiConversationalTask(),
200200
"text-to-image": new Zai.ZaiTextToImageTask(),
201+
"image-to-text": new Zai.ZaiImageToTextTask(),
201202
},
202203
};
203204

packages/inference/src/package.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
2-
export const PACKAGE_VERSION = "4.13.14";
2+
export const PACKAGE_VERSION = "4.13.15";
33
export const PACKAGE_NAME = "@huggingface/inference";

packages/inference/src/providers/hf-inference.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ import type { ImageToImageArgs } from "../tasks/cv/imageToImage.js";
7878
import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition.js";
7979
import { omit } from "../utils/omit.js";
8080
import type { ImageSegmentationArgs } from "../tasks/cv/imageSegmentation.js";
81+
import type { ImageToTextArgs } from "../tasks/cv/imageToText.js";
8182
interface Base64ImageGeneration {
8283
data: Array<{
8384
b64_json: string;
@@ -378,6 +379,10 @@ export class HFInferenceImageToTextTask extends HFInferenceTask implements Image
378379
}
379380
return response;
380381
}
382+
383+
async preparePayloadAsync(args: ImageToTextArgs): Promise<RequestArgs> {
384+
return "data" in args ? args : { ...omit(args, "inputs"), data: args.inputs };
385+
}
381386
}
382387

383388
export class HFInferenceImageToImageTask extends HFInferenceTask implements ImageToImageTaskHelper {

packages/inference/src/providers/providerHelper.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ import type { ImageToVideoArgs } from "../tasks/cv/imageToVideo.js";
6767
import type { ImageTextToImageArgs } from "../tasks/cv/imageTextToImage.js";
6868
import type { ImageTextToVideoArgs } from "../tasks/cv/imageTextToVideo.js";
6969
import type { ImageSegmentationArgs } from "../tasks/cv/imageSegmentation.js";
70+
import type { ImageToTextArgs } from "../tasks/cv/imageToText.js";
7071

7172
/**
7273
* Base class for task-specific provider helpers
@@ -205,6 +206,7 @@ export interface ObjectDetectionTaskHelper {
205206
export interface ImageToTextTaskHelper {
206207
getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise<ImageToTextOutput>;
207208
preparePayload(params: BodyParams<ImageToTextInput & BaseArgs>): Record<string, unknown> | BodyInit;
209+
preparePayloadAsync(args: ImageToTextArgs): Promise<RequestArgs>;
208210
}
209211

210212
export interface ZeroShotImageClassificationTaskHelper {

packages/inference/src/providers/zai-org.ts

Lines changed: 80 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,41 @@
1414
*
1515
* Thanks!
1616
*/
17+
import type { ImageToTextOutput } from "@huggingface/tasks";
1718
import {
1819
InferenceClientInputError,
1920
InferenceClientProviderApiError,
2021
InferenceClientProviderOutputError,
2122
} from "../errors.js";
2223
import { isUrl } from "../lib/isUrl.js";
23-
import type { BodyParams, HeaderParams, OutputType } from "../types.js";
24+
import type { BodyParams, HeaderParams, OutputType, RequestArgs } from "../types.js";
25+
import { base64FromBytes } from "../utils/base64FromBytes.js";
2426
import { dataUrlFromBlob } from "../utils/dataUrlFromBlob.js";
2527
import { delay } from "../utils/delay.js";
2628
import { omit } from "../utils/omit.js";
27-
import { BaseConversationalTask, TaskProviderHelper, type TextToImageTaskHelper } from "./providerHelper.js";
29+
import type { ImageToTextArgs } from "../tasks/cv/imageToText.js";
30+
import {
31+
BaseConversationalTask,
32+
TaskProviderHelper,
33+
type ImageToTextTaskHelper,
34+
type TextToImageTaskHelper,
35+
} from "./providerHelper.js";
2836

2937
const ZAI_API_BASE_URL = "https://api.z.ai";
3038

39+
abstract class ZaiTask extends TaskProviderHelper {
40+
constructor() {
41+
super("zai-org", ZAI_API_BASE_URL);
42+
}
43+
44+
override prepareHeaders(params: HeaderParams, binary: boolean): Record<string, string> {
45+
const headers = super.prepareHeaders(params, binary);
46+
headers["x-source-channel"] = "hugging_face";
47+
headers["accept-language"] = "en-US,en";
48+
return headers;
49+
}
50+
}
51+
3152
export class ZaiConversationalTask extends BaseConversationalTask {
3253
constructor() {
3354
super("zai-org", ZAI_API_BASE_URL);
@@ -63,28 +84,12 @@ interface ZaiAsyncResultResponse {
6384
const MAX_POLL_ATTEMPTS = 60;
6485
const POLL_INTERVAL_MS = 5000;
6586

66-
export class ZaiTextToImageTask extends TaskProviderHelper implements TextToImageTaskHelper {
67-
constructor() {
68-
super("zai-org", ZAI_API_BASE_URL);
69-
}
70-
71-
override prepareHeaders(params: HeaderParams, binary: boolean): Record<string, string> {
72-
const headers: Record<string, string> = {
73-
Authorization: `Bearer ${params.accessToken}`,
74-
"x-source-channel": "hugging_face",
75-
"accept-language": "en-US,en",
76-
};
77-
if (!binary) {
78-
headers["Content-Type"] = "application/json";
79-
}
80-
return headers;
81-
}
82-
83-
makeRoute(): string {
87+
export class ZaiTextToImageTask extends ZaiTask implements TextToImageTaskHelper {
88+
override makeRoute(): string {
8489
return "/api/paas/v4/async/images/generations";
8590
}
8691

87-
preparePayload(params: BodyParams): Record<string, unknown> {
92+
override preparePayload(params: BodyParams): Record<string, unknown> {
8893
return {
8994
...omit(params.args, ["inputs", "parameters"]),
9095
...(params.args.parameters as Record<string, unknown>),
@@ -93,7 +98,7 @@ export class ZaiTextToImageTask extends TaskProviderHelper implements TextToImag
9398
};
9499
}
95100

96-
async getResponse(
101+
override async getResponse(
97102
response: ZaiTextToImageResponse,
98103
url?: string,
99104
headers?: Record<string, string>,
@@ -190,3 +195,56 @@ export class ZaiTextToImageTask extends TaskProviderHelper implements TextToImag
190195
);
191196
}
192197
}
198+
199+
interface ZaiLayoutParsingResponse {
200+
md_results?: string;
201+
}
202+
203+
export class ZaiImageToTextTask extends ZaiTask implements ImageToTextTaskHelper {
204+
override makeRoute(): string {
205+
return "/api/paas/v4/layout_parsing";
206+
}
207+
208+
async preparePayloadAsync(args: ImageToTextArgs): Promise<RequestArgs> {
209+
const blob =
210+
"data" in args && args.data instanceof Blob
211+
? args.data
212+
: "inputs" in args
213+
? typeof args.inputs === "string" && isUrl(args.inputs)
214+
? await fetch(args.inputs).then((r) => r.blob())
215+
: args.inputs instanceof Blob
216+
? args.inputs
217+
: undefined
218+
: undefined;
219+
220+
if (!blob || !(blob instanceof Blob)) {
221+
throw new InferenceClientInputError("ZAI image-to-text requires a URL string or Blob as inputs");
222+
}
223+
224+
const mimeType = blob.type || "image/png";
225+
const b64 = base64FromBytes(new Uint8Array(await blob.arrayBuffer()));
226+
const file = `data:${mimeType};base64,${b64}`;
227+
228+
return {
229+
...("data" in args ? omit(args, "data") : omit(args, "inputs")),
230+
inputs: file,
231+
} as RequestArgs;
232+
}
233+
234+
override preparePayload(params: BodyParams): Record<string, unknown> {
235+
return {
236+
model: params.model,
237+
file: params.args.inputs,
238+
};
239+
}
240+
241+
override async getResponse(response: ZaiLayoutParsingResponse): Promise<ImageToTextOutput> {
242+
const mdResults = response?.md_results;
243+
if (typeof mdResults !== "string") {
244+
throw new InferenceClientProviderOutputError(
245+
`Received malformed response from ZAI layout_parsing API: expected { md_results: string }, got: ${JSON.stringify(response)}`,
246+
);
247+
}
248+
return { generated_text: mdResults, generatedText: mdResults };
249+
}
250+
}

0 commit comments

Comments
 (0)