Skip to content

Commit ce4600f

Browse files
committed
test(pdf-server): bridge coverage gaps and switch fixture to gated loopback HTTP
PdfCacheRangeTransport.deliver(): pdf.js's reader is keyed by the original begin and removed after one delivery, so accumulate slices and call onDataRange once with the full buffer (the previous multi-call approach threw inside pdfjs). Covered by a new integration test that drives getDocument()/getPage(1) on a >1MB PDF through a clamping in-memory readPdfRange. validateUrl: allow http://127.0.0.1|localhost|[::1] only when PDF_SERVER_ALLOW_LOOPBACK_HTTP is set, so a remote deploy can't be made to probe its own ports. Covered by env-on/off unit tests. Fixture switched to plain HTTP (no openssl, no NODE_TLS_REJECT_UNAUTHORIZED). Adds /error.pdf (500s after 50KB) and two e2e tests: page 2 renders after stall release (>512KB object path), and display_pdf returns gracefully on mid-load 500. Existing <30% test now samples stats before the iframe loads. New unit tests: display_pdf returns (not hangs) on mid-load fetch failure via in-memory MCP client; computeDiff/serializeDiff contract tests pinning the restoredRemovedIds tombstone-preservation behaviour.
1 parent 3c9630c commit ce4600f

6 files changed

Lines changed: 337 additions & 65 deletions

File tree

examples/pdf-server/server.test.ts

Lines changed: 145 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ describe("PDF Cache with Timeouts", () => {
295295
});
296296

297297
describe("PdfCacheRangeTransport", () => {
298-
it("delivers ranges larger than MAX_CHUNK_BYTES in multiple onDataRange calls", async () => {
298+
it("accumulates ranges larger than MAX_CHUNK_BYTES into one onDataRange call", async () => {
299299
const big = MAX_CHUNK_BYTES * 2 + 100;
300300
const reads: Array<[number, number]> = [];
301301
const t = new PdfCacheRangeTransport("u", big, async (_u, off, n) => {
@@ -311,7 +311,11 @@ describe("PdfCacheRangeTransport", () => {
311311
);
312312
t.requestDataRange(0, big);
313313
await new Promise((r) => setTimeout(r, 10));
314-
expect(delivered).toEqual([
314+
// pdf.js's reader is keyed by the original begin and removed after one
315+
// delivery, so deliver() must call onDataRange exactly once with the
316+
// accumulated buffer — multiple calls would throw inside pdfjs.
317+
expect(delivered).toEqual([[0, big]]);
318+
expect(reads).toEqual([
315319
[0, MAX_CHUNK_BYTES],
316320
[MAX_CHUNK_BYTES, MAX_CHUNK_BYTES],
317321
[MAX_CHUNK_BYTES * 2, 100],
@@ -339,6 +343,127 @@ describe("PdfCacheRangeTransport", () => {
339343
t.requestDataRange(0, 100);
340344
await expect(t.failed).rejects.toThrow(/empty range/);
341345
});
346+
347+
it("getDocument resolves on a >1MB PDF when readPdfRange clamps to MAX_CHUNK_BYTES", async () => {
348+
// pdfjs coalesces adjacent missing chunks into one requestDataRange that
349+
// can exceed MAX_CHUNK_BYTES. deliver() must accumulate clamped reads and
350+
// hand pdfjs a single onDataRange(begin, fullBuffer). This test fails if
351+
// deliver() either truncates or calls onDataRange more than once per
352+
// requestDataRange (pdf.mjs _onReceiveData matches by exact begin).
353+
function makeRandomJpeg(len: number): Uint8Array {
354+
const header = Uint8Array.from([
355+
0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
356+
0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0xff, 0xc0, 0x00, 0x0b,
357+
0x08, 0x00, 0x08, 0x00, 0x08, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00,
358+
0x14, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
359+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xda, 0x00, 0x08, 0x01,
360+
0x01, 0x00, 0x00, 0x3f, 0x00,
361+
]);
362+
const scan = new Uint8Array(len);
363+
for (let i = 0; i < len; i++) {
364+
const b = (i * 1103515245 + 12345) & 0xff;
365+
scan[i] = b === 0xff ? 0xfe : b;
366+
}
367+
const eoi = Uint8Array.from([0xff, 0xd9]);
368+
const out = new Uint8Array(header.length + len + 2);
369+
out.set(header, 0);
370+
out.set(scan, header.length);
371+
out.set(eoi, header.length + len);
372+
return out;
373+
}
374+
375+
const d = await PDFDocument.create();
376+
const img = await d.embedJpg(makeRandomJpeg(1_100_000));
377+
const page = d.addPage([612, 792]);
378+
page.drawImage(img, { x: 36, y: 36, width: 540, height: 720 });
379+
const bytes = await d.save();
380+
expect(bytes.length).toBeGreaterThan(2 * MAX_CHUNK_BYTES);
381+
382+
let maxReadLen = 0;
383+
const readClamped: PdfCache["readPdfRange"] = async (_u, off, n) => {
384+
const len = Math.min(n, MAX_CHUNK_BYTES, bytes.length - off);
385+
maxReadLen = Math.max(maxReadLen, len);
386+
return { data: bytes.slice(off, off + len), totalBytes: bytes.length };
387+
};
388+
const transport = new PdfCacheRangeTransport(
389+
"mem://big",
390+
bytes.length,
391+
readClamped,
392+
);
393+
394+
const doc = await Promise.race([
395+
getDocument({
396+
range: transport,
397+
length: bytes.length,
398+
disableAutoFetch: true,
399+
disableStream: true,
400+
rangeChunkSize: 64 * 1024,
401+
}).promise,
402+
transport.failed,
403+
new Promise<never>((_, rej) =>
404+
setTimeout(() => rej(new Error("getDocument hung")), 5000),
405+
),
406+
]);
407+
const p1 = await Promise.race([
408+
doc.getPage(1),
409+
transport.failed,
410+
new Promise<never>((_, rej) =>
411+
setTimeout(() => rej(new Error("getPage hung")), 5000),
412+
),
413+
]);
414+
expect(p1).toBeDefined();
415+
expect(maxReadLen).toBeLessThanOrEqual(MAX_CHUNK_BYTES);
416+
doc.destroy();
417+
});
418+
});
419+
420+
describe("display_pdf transport-error handling", () => {
421+
it("returns (does not hang) when range fetches fail mid-load", async () => {
422+
// First fetch = the 1-byte size probe → 206 with Content-Range so
423+
// display_pdf gets totalBytes. Every subsequent fetch (made by
424+
// PdfCacheRangeTransport via readPdfRange) rejects, which must surface
425+
// through transport.failed → orFail() → outer catch, not hang.
426+
let calls = 0;
427+
const mockFetch = spyOn(globalThis, "fetch").mockImplementation(
428+
async () => {
429+
if (calls++ === 0) {
430+
return new Response(new Uint8Array(1), {
431+
status: 206,
432+
headers: { "Content-Range": "bytes 0-0/50000" },
433+
});
434+
}
435+
throw new Error("network down");
436+
},
437+
);
438+
439+
const server = createServer();
440+
const client = new Client({ name: "t", version: "1" });
441+
const [ct, st] = InMemoryTransport.createLinkedPair();
442+
await Promise.all([server.connect(st), client.connect(ct)]);
443+
444+
try {
445+
const result = await Promise.race([
446+
client.callTool({
447+
name: "display_pdf",
448+
arguments: { url: "https://arxiv.org/pdf/err-test" },
449+
}),
450+
new Promise<never>((_, rej) =>
451+
setTimeout(
452+
() => rej(new Error("display_pdf hung on transport error")),
453+
3000,
454+
),
455+
),
456+
]);
457+
expect(result.isError).toBeFalsy();
458+
const sc = result.structuredContent as { formFields?: unknown };
459+
expect(sc.formFields).toBeUndefined();
460+
expect(calls).toBeGreaterThan(1);
461+
} finally {
462+
mockFetch.mockRestore();
463+
await client.close();
464+
await server.close();
465+
}
466+
});
342467
});
343468

344469
describe("extractFormSchema field-tree handling", () => {
@@ -419,6 +544,24 @@ describe("extractFormSchema field-tree handling", () => {
419544
});
420545
});
421546

547+
describe("validateUrl loopback HTTP allow (PDF_SERVER_ALLOW_LOOPBACK_HTTP)", () => {
548+
it("rejects http://127.0.0.1 by default", () => {
549+
expect(validateUrl("http://127.0.0.1:9999/x.pdf").valid).toBe(false);
550+
});
551+
552+
it("accepts http://127.0.0.1 only when the env gate is set, and never non-loopback http", () => {
553+
const prev = process.env.PDF_SERVER_ALLOW_LOOPBACK_HTTP;
554+
process.env.PDF_SERVER_ALLOW_LOOPBACK_HTTP = "1";
555+
try {
556+
expect(validateUrl("http://127.0.0.1:9999/x.pdf").valid).toBe(true);
557+
expect(validateUrl("http://169.254.169.254/").valid).toBe(false);
558+
} finally {
559+
if (prev === undefined) delete process.env.PDF_SERVER_ALLOW_LOOPBACK_HTTP;
560+
else process.env.PDF_SERVER_ALLOW_LOOPBACK_HTTP = prev;
561+
}
562+
});
563+
});
564+
422565
describe("validateUrl with MCP roots (allowedLocalDirs)", () => {
423566
const savedFiles = new Set(allowedLocalFiles);
424567
const savedDirs = new Set(allowedLocalDirs);

examples/pdf-server/server.ts

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -641,10 +641,19 @@ export function validateUrl(url: string): {
641641
// Remote URL - require HTTPS
642642
try {
643643
const parsed = new URL(url);
644-
if (parsed.protocol !== "https:") {
645-
return { valid: false, error: `Only HTTPS URLs are allowed: ${url}` };
644+
if (parsed.protocol === "https:") return { valid: true };
645+
// Loopback HTTP is opt-in (test fixtures, local dev). Off by default so a
646+
// remotely-deployed server can't be made to probe its own internal ports.
647+
if (
648+
process.env.PDF_SERVER_ALLOW_LOOPBACK_HTTP &&
649+
parsed.protocol === "http:" &&
650+
(parsed.hostname === "127.0.0.1" ||
651+
parsed.hostname === "localhost" ||
652+
parsed.hostname === "[::1]")
653+
) {
654+
return { valid: true };
646655
}
647-
return { valid: true };
656+
return { valid: false, error: `Only HTTPS URLs are allowed: ${url}` };
648657
} catch {
649658
return { valid: false, error: `Invalid URL: ${url}` };
650659
}
@@ -914,20 +923,23 @@ export class PdfCacheRangeTransport extends PDFDataRangeTransport {
914923

915924
/**
916925
* pdf.js coalesces adjacent missing chunks into one unbounded request, but
917-
* readPdfRange clamps each call to MAX_CHUNK_BYTES. Deliver in slices so
918-
* the worker marks every requested chunk as loaded.
926+
* readPdfRange clamps each call to MAX_CHUNK_BYTES. Its reader is keyed by
927+
* the original `begin` and removed after one delivery, so we must accumulate
928+
* slices and call onDataRange exactly once with the full buffer.
919929
*/
920930
private async deliver(begin: number, end: number): Promise<void> {
921-
let off = begin;
922-
while (off < end) {
923-
const want = Math.min(end - off, MAX_CHUNK_BYTES);
924-
const { data } = await this.readPdfRange(this.url, off, want);
931+
const buf = new Uint8Array(end - begin);
932+
let off = 0;
933+
while (off < buf.length) {
934+
const want = Math.min(buf.length - off, MAX_CHUNK_BYTES);
935+
const { data } = await this.readPdfRange(this.url, begin + off, want);
925936
if (data.length === 0) {
926-
throw new Error(`empty range at ${off} for ${this.url}`);
937+
throw new Error(`empty range at ${begin + off} for ${this.url}`);
927938
}
928-
this.onDataRange(off, data);
939+
buf.set(data.subarray(0, Math.min(data.length, buf.length - off)), off);
929940
off += data.length;
930941
}
942+
this.onDataRange(begin, buf);
931943
}
932944
}
933945

examples/pdf-server/src/pdf-annotations.test.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,92 @@ describe("computeDiff", () => {
367367
current.map((a) => a.id).sort(),
368368
);
369369
});
370+
371+
// Backs the post-computeDiff union step in mcp-app.ts persistAnnotations /
372+
// getAnnotatedPdfBytes: with the lazy per-page baseline scan, computeDiff
373+
// alone cannot produce `removed` for natives on pages not yet visited, so
374+
// callers must union restoredRemovedIds. These tests pin the contract those
375+
// call sites depend on.
376+
describe("partial baseline (lazy-scan tombstone preservation)", () => {
377+
const tombstoned = "pdf-5-0";
378+
const userNote: PdfAnnotationDef = {
379+
type: "note",
380+
id: "u1",
381+
page: 1,
382+
x: 10,
383+
y: 10,
384+
content: "unrelated edit",
385+
};
386+
387+
/** Mirrors the union loop in mcp-app.ts persistAnnotations. */
388+
function unionRestored(
389+
diff: AnnotationDiff,
390+
restored: Iterable<string>,
391+
currentIds: Set<string>,
392+
): void {
393+
for (const id of restored) {
394+
if (!currentIds.has(id) && !diff.removed.includes(id)) {
395+
diff.removed.push(id);
396+
}
397+
}
398+
}
399+
400+
it("computeDiff alone drops tombstones for unscanned pages; the union step preserves them", () => {
401+
const baseline: PdfAnnotationDef[] = []; // page with the native not yet scanned
402+
const current = [userNote];
403+
const diff = computeDiff(baseline, current, new Map());
404+
expect(diff.removed).toEqual([]); // proves the union step is load-bearing
405+
406+
unionRestored(diff, [tombstoned], new Set(current.map((a) => a.id)));
407+
408+
expect(diff.removed).toEqual([tombstoned]);
409+
expect(isDiffEmpty(diff)).toBe(false);
410+
expect(deserializeDiff(serializeDiff(diff)).removed).toEqual([
411+
tombstoned,
412+
]);
413+
});
414+
415+
it("a tombstone the user re-added is excluded from the union", () => {
416+
const reAdded: PdfAnnotationDef = {
417+
type: "note",
418+
id: tombstoned,
419+
page: 3,
420+
x: 0,
421+
y: 0,
422+
content: "back",
423+
};
424+
const current = [userNote, reAdded];
425+
const diff = computeDiff([], current, new Map());
426+
unionRestored(diff, [tombstoned], new Set(current.map((a) => a.id)));
427+
expect(diff.removed).toEqual([]);
428+
});
429+
430+
it("union does not duplicate ids once the page is scanned and computeDiff produces them", () => {
431+
const native: PdfAnnotationDef = {
432+
type: "note",
433+
id: tombstoned,
434+
page: 3,
435+
x: 0,
436+
y: 0,
437+
content: "native",
438+
};
439+
const diff = computeDiff([native], [userNote], new Map());
440+
expect(diff.removed).toEqual([tombstoned]);
441+
unionRestored(diff, [tombstoned], new Set([userNote.id]));
442+
expect(diff.removed).toEqual([tombstoned]);
443+
});
444+
445+
it("removedRefs from restored tombstones parse for buildAnnotatedPdfBytes; non-ref ids are skipped", () => {
446+
const restored = new Set(["pdf-5-0", "pdf-12R", "pdf-2-idx-7"]);
447+
const removedRefs = [...restored]
448+
.map(parseAnnotationRef)
449+
.filter((r): r is NonNullable<typeof r> => r !== null);
450+
expect(removedRefs).toEqual([
451+
{ objectNumber: 5, generationNumber: 0 },
452+
{ objectNumber: 12, generationNumber: 0 },
453+
]);
454+
});
455+
});
370456
});
371457

372458
// =============================================================================

playwright.config.ts

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,10 @@ export default defineConfig({
4747
env: {
4848
...process.env,
4949
EXAMPLE: process.env.EXAMPLE ?? "",
50-
// pdf-incremental-load.spec.ts serves test PDFs over self-signed HTTPS
51-
// and pdf-server's outbound fetch must accept that cert. This disables
52-
// TLS verification for the entire e2e webServer process tree (all
53-
// example servers launched by examples:start), not just pdf-server —
54-
// run-all.ts inherits env to every child. Acceptable for the e2e
55-
// sandbox; never set in production. Tightening to pdf-server alone
56-
// requires a validateUrl localhost allow (tracked separately).
57-
NODE_TLS_REJECT_UNAUTHORIZED: "0",
50+
// Let pdf-server fetch from the http://127.0.0.1 range-counting fixture
51+
// (validateUrl rejects loopback HTTP unless this is set). Scoped to this
52+
// server's check only — does not touch Node's TLS verification.
53+
PDF_SERVER_ALLOW_LOOPBACK_HTTP: "1",
5854
},
5955
},
6056
// Snapshot configuration

0 commit comments

Comments
 (0)