Skip to content

Commit dc74c13

Browse files
committed
fix(chunk): assign list-typed conversion options directly instead of JSON.stringify
ChunkService.buildFormFields JSON-stringified `from_formats`, `ocr_lang`, and `page_range` into single multipart form-field values. docling-serve expects these as repeated multi-value fields, so requests setting any of them are rejected with HTTP 422 by Pydantic. Mirror the file.ts:508 pattern: widen the (private) return type to Record<string, unknown> and assign the arrays directly. The platform multipart encoder (platform/http.ts) already iterates arrays and coerces each element via String(item), so numeric tuples like page_range serialize correctly without explicit help. Adds a vitest case asserting the three fields pass through to streamUpload as arrays, not JSON strings.
1 parent 78926dd commit dc74c13

2 files changed

Lines changed: 75 additions & 5 deletions

File tree

src/services/chunk.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -416,14 +416,14 @@ export class ChunkService {
416416
private buildFormFields(
417417
options: ConversionOptions,
418418
targetType: "inbody" | "zip"
419-
): Record<string, string> {
420-
const fields: Record<string, string> = {
419+
): Record<string, unknown> {
420+
const fields: Record<string, unknown> = {
421421
target_type: targetType,
422422
};
423423

424424
// Add conversion options
425425
if (options.from_formats) {
426-
fields.convert_from_formats = JSON.stringify(options.from_formats);
426+
fields.convert_from_formats = options.from_formats;
427427
}
428428
if (options.image_export_mode) {
429429
fields.convert_image_export_mode = options.image_export_mode;
@@ -438,7 +438,7 @@ export class ChunkService {
438438
fields.convert_ocr_engine = options.ocr_engine;
439439
}
440440
if (options.ocr_lang) {
441-
fields.convert_ocr_lang = JSON.stringify(options.ocr_lang);
441+
fields.convert_ocr_lang = options.ocr_lang;
442442
}
443443
if (options.pdf_backend) {
444444
fields.convert_pdf_backend = options.pdf_backend;
@@ -453,7 +453,7 @@ export class ChunkService {
453453
fields.convert_pipeline = options.pipeline;
454454
}
455455
if (options.page_range) {
456-
fields.convert_page_range = JSON.stringify(options.page_range);
456+
fields.convert_page_range = options.page_range;
457457
}
458458
if (options.document_timeout !== undefined) {
459459
fields.convert_document_timeout = String(options.document_timeout);

tests/unit/chunk-service.test.ts

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import { describe, it, expect, vi, beforeEach } from "vitest";
2+
import { ChunkService } from "../../src/services/chunk";
3+
import { HttpClient } from "../../src/api";
4+
import type {
5+
ApiClientConfig,
6+
ChunkDocumentResponse,
7+
} from "../../src/types/api";
8+
9+
const mockConfig: ApiClientConfig = {
10+
baseUrl: "http://localhost:5001",
11+
timeout: 30000,
12+
};
13+
14+
const capturedFields: Array<Record<string, unknown>> = [];
15+
16+
vi.mock("../../src/api/http", async (importOriginal) => {
17+
const mod: any = await importOriginal();
18+
return {
19+
HttpClient: class extends mod.HttpClient {
20+
async streamUpload(
21+
_endpoint: string,
22+
_files: any,
23+
fields: Record<string, unknown>
24+
) {
25+
capturedFields.push(fields);
26+
return {
27+
data: {
28+
documents: [],
29+
chunks: [],
30+
processing_time: 1,
31+
} satisfies ChunkDocumentResponse,
32+
status: 200,
33+
statusText: "OK",
34+
headers: { "content-type": "application/json" },
35+
};
36+
}
37+
},
38+
};
39+
});
40+
41+
describe("ChunkService.buildFormFields", () => {
42+
let http: HttpClient;
43+
let service: ChunkService;
44+
45+
beforeEach(() => {
46+
capturedFields.length = 0;
47+
http = new HttpClient(mockConfig);
48+
service = new ChunkService(http);
49+
});
50+
51+
it("passes list-typed conversion options as arrays (not JSON strings) to streamUpload", async () => {
52+
await service.chunkHybridSync(Buffer.from("pdf"), "example.pdf", {
53+
from_formats: ["pdf", "docx"],
54+
ocr_lang: ["en", "fr"],
55+
page_range: [1, 2],
56+
});
57+
58+
expect(capturedFields).toHaveLength(1);
59+
const fields = capturedFields[0];
60+
61+
expect(fields.convert_from_formats).toEqual(["pdf", "docx"]);
62+
expect(fields.convert_ocr_lang).toEqual(["en", "fr"]);
63+
expect(fields.convert_page_range).toEqual([1, 2]);
64+
65+
// Guard against regression to JSON.stringify.
66+
expect(typeof fields.convert_from_formats).not.toBe("string");
67+
expect(typeof fields.convert_ocr_lang).not.toBe("string");
68+
expect(typeof fields.convert_page_range).not.toBe("string");
69+
});
70+
});

0 commit comments

Comments
 (0)