Skip to content

Commit 0201ea1

Browse files
chelojimenezclaude
andauthored
fix(evals): honor widget-declared CSP in the browser harness network gate (#2553)
Co-authored-by: Claude <noreply@anthropic.com>
1 parent 4952271 commit 0201ea1

6 files changed

Lines changed: 853 additions & 42 deletions

File tree

mcpjam-inspector/server/utils/__tests__/mcp-app-browser-harness.test.ts

Lines changed: 299 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ import { build } from "esbuild";
33
import {
44
McpAppBrowserHarness,
55
ChromiumNotInstalledError,
6+
cspSourceMatchesUrl,
7+
injectCspMeta,
68
type McpAppBrowserHarnessOptions,
79
} from "../mcp-app-browser-harness";
810

@@ -67,14 +69,14 @@ beforeAll(async () => {
6769

6870
const harnesses: McpAppBrowserHarness[] = [];
6971
function makeHarness(
70-
overrides: Partial<McpAppBrowserHarnessOptions> = {},
72+
overrides: Partial<McpAppBrowserHarnessOptions> = {}
7173
): McpAppBrowserHarness & { calls: Array<{ name: string }> } {
7274
const calls: Array<{ name: string; args: Record<string, unknown> }> = [];
7375
const callTool = vi.fn(
7476
async (_serverId: string, name: string, args: Record<string, unknown>) => {
7577
calls.push({ name, args });
7678
return { content: [{ type: "text", text: "ok" }] };
77-
},
79+
}
7880
);
7981
const h = new McpAppBrowserHarness({
8082
callTool,
@@ -133,7 +135,7 @@ describe("McpAppBrowserHarness — render classification", () => {
133135
expect(obs.status).toBe("rendered");
134136
expect(obs.bridgeInitialized).toBe(true);
135137
expect(obs.screenshotBase64 && obs.screenshotBase64.length).toBeGreaterThan(
136-
0,
138+
0
137139
);
138140
// screenshot within the byte budget (256 KiB default).
139141
const bytes = Buffer.from(obs.screenshotBase64!, "base64").byteLength;
@@ -233,10 +235,13 @@ describe("McpAppBrowserHarness — interaction", () => {
233235
});
234236

235237
expect(result.widgetToolCalls.length).toBe(1);
236-
expect(result.widgetToolCalls[0]).toMatchObject({ name: "reserve", ok: true });
237-
expect(result.screenshotBase64 && result.screenshotBase64.length).toBeGreaterThan(
238-
0,
239-
);
238+
expect(result.widgetToolCalls[0]).toMatchObject({
239+
name: "reserve",
240+
ok: true,
241+
});
242+
expect(
243+
result.screenshotBase64 && result.screenshotBase64.length
244+
).toBeGreaterThan(0);
240245
// dispatched through the injected callTool with the widget's serverId.
241246
expect(h.calls).toEqual([{ name: "reserve", args: { seat: 12 } }]);
242247
}, 30_000);
@@ -330,3 +335,290 @@ describe("McpAppBrowserHarness — interaction", () => {
330335
expect(third.note).toBe("no_rendered_widget");
331336
}, 30_000);
332337
});
338+
339+
describe("McpAppBrowserHarness — unmount network-allowance lifecycle", () => {
340+
const sourcesOf = (h: McpAppBrowserHarness) =>
341+
(h as unknown as { widgetCspSources: string[] }).widgetCspSources;
342+
343+
it("keeps the live widget's declared origins when a STALE tool-call is dismissed", async () => {
344+
const h = makeHarness();
345+
await h.renderWidget({
346+
toolCallId: "live-1",
347+
toolName: "show",
348+
serverId: "srv",
349+
html: buttonHtml,
350+
cspMeta: { connect_domains: ["https://api.allowed.invalid"] },
351+
keepMounted: true,
352+
});
353+
expect(h.getMountedWidgetId()).toBe("live-1");
354+
expect(sourcesOf(h)).toContain("https://api.allowed.invalid");
355+
356+
// Dismissing a tool-call that is NOT the live mount (e.g. a carried id that
357+
// was already replaced) must not strip the current widget's allowances —
358+
// otherwise its subsequent subresource fetches abort at the route gate.
359+
await h.dismissWidget("stale-never-mounted");
360+
expect(h.getMountedWidgetId()).toBe("live-1");
361+
expect(sourcesOf(h)).toContain("https://api.allowed.invalid");
362+
363+
// Dismissing the actual live widget DOES clear them (fail closed).
364+
await h.dismissWidget("live-1");
365+
expect(h.getMountedWidgetId()).toBeNull();
366+
expect(sourcesOf(h)).toEqual([]);
367+
}, 30_000);
368+
});
369+
370+
describe("cspSourceMatchesUrl — CSP host-source matching", () => {
371+
const u = (s: string) => new URL(s);
372+
373+
it("matches exact origins and rejects scheme/host mismatches", () => {
374+
expect(
375+
cspSourceMatchesUrl("https://esm.sh", u("https://esm.sh/react"))
376+
).toBe(true);
377+
expect(
378+
cspSourceMatchesUrl("https://esm.sh", u("http://esm.sh/react"))
379+
).toBe(false);
380+
expect(
381+
cspSourceMatchesUrl("https://esm.sh", u("https://evil.sh/react"))
382+
).toBe(false);
383+
});
384+
385+
it("matches wildcard subdomains but not the bare apex", () => {
386+
const src = "https://*.excalidraw.com";
387+
expect(
388+
cspSourceMatchesUrl(src, u("https://cdn.excalidraw.com/a.woff2"))
389+
).toBe(true);
390+
expect(cspSourceMatchesUrl(src, u("https://a.b.excalidraw.com/x"))).toBe(
391+
true
392+
);
393+
expect(cspSourceMatchesUrl(src, u("https://excalidraw.com/x"))).toBe(false);
394+
expect(cspSourceMatchesUrl(src, u("https://notexcalidraw.com/x"))).toBe(
395+
false
396+
);
397+
});
398+
399+
it("scheme-less host-sources match http(s)/ws(s) on that host only", () => {
400+
expect(cspSourceMatchesUrl("esm.sh", u("https://esm.sh/x"))).toBe(true);
401+
expect(cspSourceMatchesUrl("esm.sh", u("http://esm.sh/x"))).toBe(true);
402+
expect(cspSourceMatchesUrl("esm.sh", u("wss://esm.sh/socket"))).toBe(true);
403+
expect(cspSourceMatchesUrl("esm.sh", u("ftp://esm.sh/x"))).toBe(false);
404+
expect(cspSourceMatchesUrl("esm.sh", u("https://other.sh/x"))).toBe(false);
405+
});
406+
407+
it("scheme-only sources allow any host on that scheme", () => {
408+
expect(cspSourceMatchesUrl("https:", u("https://anything.example/x"))).toBe(
409+
true
410+
);
411+
expect(cspSourceMatchesUrl("https:", u("http://anything.example/x"))).toBe(
412+
false
413+
);
414+
});
415+
416+
it("honors ports (explicit, wildcard, and scheme defaults)", () => {
417+
expect(
418+
cspSourceMatchesUrl("https://cdn.x.io:8443", u("https://cdn.x.io:8443/a"))
419+
).toBe(true);
420+
expect(
421+
cspSourceMatchesUrl("https://cdn.x.io:8443", u("https://cdn.x.io/a"))
422+
).toBe(false);
423+
expect(
424+
cspSourceMatchesUrl("https://cdn.x.io:443", u("https://cdn.x.io/a"))
425+
).toBe(true);
426+
expect(
427+
cspSourceMatchesUrl("https://cdn.x.io:*", u("https://cdn.x.io:9999/a"))
428+
).toBe(true);
429+
});
430+
431+
it("treats an omitted source port as the scheme default only (not any port)", () => {
432+
// CSP: a source without a port matches only the URL scheme's default port.
433+
expect(
434+
cspSourceMatchesUrl(
435+
"https://api.example.com",
436+
u("https://api.example.com/x")
437+
)
438+
).toBe(true);
439+
expect(
440+
cspSourceMatchesUrl(
441+
"https://api.example.com",
442+
u("https://api.example.com:443/x")
443+
)
444+
).toBe(true);
445+
expect(
446+
cspSourceMatchesUrl(
447+
"https://api.example.com",
448+
u("https://api.example.com:8443/x")
449+
)
450+
).toBe(false);
451+
// http default is 80.
452+
expect(cspSourceMatchesUrl("http://h.io", u("http://h.io/x"))).toBe(true);
453+
expect(cspSourceMatchesUrl("http://h.io", u("http://h.io:8080/x"))).toBe(
454+
false
455+
);
456+
});
457+
458+
it("ignores paths in host-sources (origin-granular gate)", () => {
459+
expect(
460+
cspSourceMatchesUrl(
461+
"https://cdn.x.io/assets/",
462+
u("https://cdn.x.io/other/file.js")
463+
)
464+
).toBe(true);
465+
});
466+
467+
it("never matches quoted keywords or empty sources", () => {
468+
expect(cspSourceMatchesUrl("'self'", u("https://esm.sh/x"))).toBe(false);
469+
expect(cspSourceMatchesUrl("'unsafe-inline'", u("https://esm.sh/x"))).toBe(
470+
false
471+
);
472+
expect(cspSourceMatchesUrl("", u("https://esm.sh/x"))).toBe(false);
473+
});
474+
});
475+
476+
describe("injectCspMeta", () => {
477+
it("inserts the policy as the first child of <head>", () => {
478+
const out = injectCspMeta(
479+
"<!doctype html><html><head><title>x</title></head><body></body></html>",
480+
"default-src 'self'"
481+
);
482+
expect(out).toContain(
483+
`<head><meta http-equiv="Content-Security-Policy" content="default-src 'self'">`
484+
);
485+
// Must precede any resource-bearing tag it governs.
486+
expect(out.indexOf("Content-Security-Policy")).toBeLessThan(
487+
out.indexOf("<title>")
488+
);
489+
});
490+
491+
it("synthesizes a <head> when the document omits one", () => {
492+
expect(
493+
injectCspMeta("<html><body>x</body></html>", "default-src 'none'")
494+
).toContain(
495+
`<html><head><meta http-equiv="Content-Security-Policy" content="default-src 'none'"></head>`
496+
);
497+
// No <html> at all: prepend so document.write still parses it first.
498+
expect(injectCspMeta("<p>x</p>", "default-src 'none'")).toBe(
499+
`<meta http-equiv="Content-Security-Policy" content="default-src 'none'"><p>x</p>`
500+
);
501+
});
502+
503+
it("escapes attribute-breaking chars so widget metadata can't corrupt the policy", () => {
504+
// A `"` in widget-derived CSP content must not break out of content="…"
505+
// and truncate/disable the injected policy (or inject sibling markup).
506+
const out = injectCspMeta(
507+
"<!doctype html><html><head></head><body></body></html>",
508+
`connect-src 'self' https://x"></head><script>alert(1)</script>`
509+
);
510+
expect(out).not.toContain(`x"></head>`); // no real breakout
511+
expect(out).not.toContain("<script>alert(1)</script>"); // not injected as markup
512+
expect(out).toContain("&quot;");
513+
expect(out).toContain("&lt;script&gt;alert(1)&lt;/script&gt;");
514+
// Single quotes are valid inside a double-quoted attribute -> left as-is.
515+
expect(out).toContain("connect-src 'self'");
516+
});
517+
});
518+
519+
describe("McpAppBrowserHarness — widget-declared CSP enforcement", () => {
520+
// Guest that probes three origins via fetch (a connect-src concern) the
521+
// instant it parses — before the bridge — so the injected <meta> CSP (first
522+
// in <head>) governs them. `.invalid` is a reserved TLD (RFC 2606): a
523+
// CSP-allowed probe leaves the machine and merely fails DNS, while a
524+
// CSP-blocked probe never makes a network attempt and logs a violation.
525+
const PROBE_GUEST_SRC = `
526+
fetch("https://conn-ok.invalid/a").catch(() => {});
527+
fetch("https://res-only.invalid/b").catch(() => {});
528+
fetch("https://nope.invalid/c").catch(() => {});
529+
import { App } from "@modelcontextprotocol/ext-apps";
530+
const app = new App({ name: "fixture-csp", version: "1.0.0" });
531+
(async () => {
532+
await app.connect();
533+
const d = document.createElement("div");
534+
d.textContent = "csp fixture";
535+
d.style.cssText = "font-size:32px;padding:40px";
536+
document.body.appendChild(d);
537+
})();
538+
`;
539+
540+
// Single-origin probe used for the undeclared-default and reset cases.
541+
const ONE_PROBE_GUEST_SRC = `
542+
fetch("https://anywhere.invalid/x").catch(() => {});
543+
import { App } from "@modelcontextprotocol/ext-apps";
544+
const app = new App({ name: "fixture-csp1", version: "1.0.0" });
545+
(async () => {
546+
await app.connect();
547+
const d = document.createElement("div");
548+
d.textContent = "csp1 fixture";
549+
d.style.cssText = "font-size:32px;padding:40px";
550+
document.body.appendChild(d);
551+
})();
552+
`;
553+
554+
let probeHtml = "";
555+
let oneProbeHtml = "";
556+
beforeAll(async () => {
557+
probeHtml = guestHtml(await bundleGuest(PROBE_GUEST_SRC));
558+
oneProbeHtml = guestHtml(await bundleGuest(ONE_PROBE_GUEST_SRC));
559+
}, 60_000);
560+
561+
it("enforces directive separation: fetch obeys connect_domains, not resource_domains", async () => {
562+
const h = makeHarness();
563+
const obs = await h.renderWidget({
564+
toolCallId: "csp-1",
565+
toolName: "show_widget",
566+
serverId: "srv",
567+
html: probeHtml,
568+
cspMeta: {
569+
connect_domains: ["https://conn-ok.invalid"],
570+
resource_domains: ["https://res-only.invalid"],
571+
},
572+
});
573+
expect(obs.status).toBe("rendered");
574+
const errs = (obs.consoleErrors ?? []).join("\n");
575+
// The "Refused to connect to '<url>'" prefix names the BLOCKED url itself
576+
// (so it can't be confused with conn-ok appearing in the echoed directive).
577+
expect(errs).toMatch(/Connecting to 'https:\/\/res-only\.invalid/);
578+
expect(errs).toMatch(/Connecting to 'https:\/\/nope\.invalid/);
579+
// connect_domains origin is permitted by connect-src -> no CSP violation.
580+
expect(errs).not.toMatch(/Connecting to 'https:\/\/conn-ok\.invalid/);
581+
}, 30_000);
582+
583+
it("policies undeclared widgets with the SEP restrictive default", async () => {
584+
const h = makeHarness();
585+
const obs = await h.renderWidget({
586+
toolCallId: "csp-default",
587+
toolName: "show_widget",
588+
serverId: "srv",
589+
html: oneProbeHtml,
590+
// no cspMeta -> widget-declared default: connect-src 'self' + loopback.
591+
});
592+
expect(obs.status).toBe("rendered");
593+
const errs = (obs.consoleErrors ?? []).join("\n");
594+
expect(errs).toMatch(/Connecting to 'https:\/\/anywhere\.invalid/);
595+
}, 30_000);
596+
597+
it("re-derives the policy per widget: a later undeclared widget loses the grant", async () => {
598+
const h = makeHarness();
599+
const first = await h.renderWidget({
600+
toolCallId: "csp-2a",
601+
toolName: "show_widget",
602+
serverId: "srv",
603+
html: oneProbeHtml,
604+
cspMeta: { connect_domains: ["https://anywhere.invalid"] },
605+
});
606+
expect(first.status).toBe("rendered");
607+
expect((first.consoleErrors ?? []).join("\n")).not.toMatch(
608+
/Connecting to 'https:\/\/anywhere\.invalid/
609+
);
610+
611+
// Same harness, same probe — but THIS widget declares nothing, so the
612+
// injected CSP reverts to the restrictive default and blocks the fetch.
613+
const second = await h.renderWidget({
614+
toolCallId: "csp-2b",
615+
toolName: "show_widget",
616+
serverId: "srv",
617+
html: oneProbeHtml,
618+
});
619+
expect(second.status).toBe("rendered");
620+
expect((second.consoleErrors ?? []).join("\n")).toMatch(
621+
/Connecting to 'https:\/\/anywhere\.invalid/
622+
);
623+
}, 45_000);
624+
});

0 commit comments

Comments
 (0)