Skip to content

Commit 2026876

Browse files
security: strip dangerous HTML from LLM outputs to prevent stored XSS (#5589)
1 parent 3aa2d93 commit 2026876

File tree

5 files changed

+70
-4
lines changed

5 files changed

+70
-4
lines changed

web/components/templates/requests/components/ChatOnlyView.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { stripDangerousHtml } from "@/lib/sanitizeContent";
12
import { cn } from "@/lib/utils";
23
import {
34
FunctionCall,
@@ -271,7 +272,7 @@ function ChatBubble({
271272
)}
272273
>
273274
<Streamdown shikiTheme={shikiTheme}>
274-
{preserveLineBreaksForMarkdown(displayContent)}
275+
{preserveLineBreaksForMarkdown(stripDangerousHtml(displayContent))}
275276
</Streamdown>
276277
</div>
277278

web/components/templates/requests/components/Realtime.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { stripDangerousHtml } from "@/lib/sanitizeContent";
12
import GlassHeader from "@/components/shared/universal/GlassHeader";
23
import { JsonRenderer } from "@/components/templates/requests/components/chatComponent/single/JsonRenderer";
34
import { logger } from "@/lib/telemetry/logger";
@@ -644,7 +645,7 @@ const SessionUpdate: React.FC<SessionUpdateProps> = ({ content }) => {
644645
>
645646
<div className="prose prose-sm dark:prose-invert prose-headings:text-slate-50 prose-p:text-slate-200 prose-a:text-cyan-200 hover:prose-a:text-cyan-100 prose-blockquote:border-slate-400 prose-blockquote:text-slate-300 prose-strong:text-white prose-em:text-slate-300 prose-code:text-yellow-200 prose-pre:bg-slate-800/50 prose-pre:text-slate-200 prose-ol:text-slate-200 prose-ul:text-slate-200 prose-li:text-slate-200 [&_ol>li::marker]:text-white [&_ul>li::marker]:text-white">
646647
<Streamdown shikiTheme={shikiTheme}>
647-
{preserveLineBreaksForMarkdown(sessionData.instructions)}
648+
{preserveLineBreaksForMarkdown(stripDangerousHtml(sessionData.instructions))}
648649
</Streamdown>
649650
</div>
650651
</div>

web/components/templates/requests/components/chatComponent/single/AssistantToolCalls.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import AssistantToolCall from "./AssistantToolCall";
22

33
import MarkdownEditor from "@/components/shared/markdownEditor";
4+
import { stripDangerousHtml } from "@/lib/sanitizeContent";
45
import { cn } from "@/lib/utils";
56
import {
67
FunctionCall,
@@ -67,7 +68,7 @@ export default function AssistantToolCalls({
6768
content && (
6869
<div className="w-full whitespace-pre-wrap break-words p-2 text-xs">
6970
<Streamdown shikiTheme={shikiTheme}>
70-
{preserveLineBreaksForMarkdown(content)}
71+
{preserveLineBreaksForMarkdown(stripDangerousHtml(content))}
7172
</Streamdown>
7273
</div>
7374
)

web/components/templates/requests/components/chatComponent/single/TextMessage.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { stripDangerousHtml } from "@/lib/sanitizeContent";
12
import { MappedLLMRequest, Message } from "@helicone-package/llm-mapper/types";
23
import { isJson } from "../ChatMessage";
34
import { JsonRenderer } from "./JsonRenderer";
@@ -124,7 +125,7 @@ export default function TextMessage({
124125
<>
125126
<div className="w-full whitespace-pre-wrap break-words text-sm">
126127
<Streamdown shikiTheme={shikiTheme}>
127-
{preserveLineBreaksForMarkdown(displayContent)}
128+
{preserveLineBreaksForMarkdown(stripDangerousHtml(displayContent))}
128129
</Streamdown>
129130
</div>
130131
{annotations && annotations.length > 0 && (

web/lib/sanitizeContent.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/**
2+
* Strips dangerous HTML tags from text content before markdown rendering.
3+
* Preserves markdown syntax and safe HTML tags (like <b>, <em>, <a>, etc.).
4+
*
5+
* This prevents stored XSS via LLM outputs containing malicious HTML
6+
* (e.g., <iframe srcdoc="<script>document.cookie</script>">).
7+
*
8+
* We use regex-based stripping rather than DOMPurify because DOMPurify
9+
* parses input as HTML, which mangles markdown syntax (backticks,
10+
* asterisks, brackets, etc.).
11+
*/
12+
export function stripDangerousHtml(text: string): string {
13+
if (typeof text !== "string") return text;
14+
15+
const dangerousTags = [
16+
"script",
17+
"iframe",
18+
"object",
19+
"embed",
20+
"style",
21+
"form",
22+
"input",
23+
"button",
24+
"textarea",
25+
"select",
26+
"applet",
27+
"base",
28+
"link",
29+
"meta",
30+
"svg",
31+
"math",
32+
];
33+
34+
let cleaned = text;
35+
36+
for (const tag of dangerousTags) {
37+
// Remove paired tags with content: <tag ...>...</tag>
38+
const pairedRegex = new RegExp(
39+
`<\\s*${tag}[^>]*>[\\s\\S]*?<\\s*/\\s*${tag}\\s*>`,
40+
"gi"
41+
);
42+
cleaned = cleaned.replace(pairedRegex, "");
43+
44+
// Remove self-closing or unclosed: <tag ... /> or <tag ...>
45+
const selfClosingRegex = new RegExp(`<\\s*${tag}[^>]*/?>`, "gi");
46+
cleaned = cleaned.replace(selfClosingRegex, "");
47+
}
48+
49+
// Remove event handlers from remaining tags (onclick, onload, onerror, etc.)
50+
cleaned = cleaned.replace(
51+
/\s+on\w+\s*=\s*(?:"[^"]*"|'[^']*'|[^\s>]+)/gi,
52+
""
53+
);
54+
55+
// Remove javascript: protocol in href/src/action attributes
56+
cleaned = cleaned.replace(
57+
/(href|src|action)\s*=\s*(?:"javascript:[^"]*"|'javascript:[^']*')/gi,
58+
'$1=""'
59+
);
60+
61+
return cleaned;
62+
}

0 commit comments

Comments
 (0)