Skip to content

Commit 50a068c

Browse files
refactor(security): SanitizeUrl uses Uri.TryCreate, not manual scheme parse
The original comment claimed Uri.TryCreate was "too lenient" because it parses `javascript:alert(1)` as a valid Uri. That's true, but the SafeUrlSchemes allow-list filter on uri.Scheme runs *after* parsing and rejects `javascript` (and every other non-http/https/mailto scheme) the same way. So the System.Uri version is equally safe and avoids the hand-rolled RFC 3986 scheme-character validator that was the actual maintenance risk. Behavior preserved: - http/https/mailto pass through unchanged (allow-list, case-insensitive) - javascript:/data:/vbscript:/file:/ftp:/ssh: → "about:blank" - Relative URLs (no scheme) pass through unchanged - Empty input passes through unchanged - unsafeAllowed=true bypasses the filter entirely Tests added (new file, 25 tests): TASK-045 XSS-fence had zero direct coverage before this commit. The theory rows pin each known XSS-vector scheme and the case-insensitive normalization contract. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e647680 commit 50a068c

2 files changed

Lines changed: 114 additions & 17 deletions

File tree

src/Reactor/Markdown/MarkdownHtml.cs

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -89,23 +89,17 @@ internal static string SanitizeUrl(string url, bool unsafeAllowed)
8989
{
9090
if (unsafeAllowed) return url;
9191
if (string.IsNullOrEmpty(url)) return url;
92-
// Relative URLs (no scheme) are safe; only absolute schemes need
93-
// checking. Pull the scheme by hand because Uri.TryCreate is too
94-
// lenient — it would parse `javascript:alert(1)` as a valid Uri.
95-
int colon = url.IndexOf(':');
96-
if (colon <= 0) return url; // no scheme = treat as path-relative
97-
// Schemes per RFC 3986: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
98-
for (int i = 0; i < colon; i++)
99-
{
100-
var c = url[i];
101-
bool ok = (i == 0)
102-
? (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
103-
: (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
104-
|| (c >= '0' && c <= '9') || c == '+' || c == '-' || c == '.';
105-
if (!ok) return url; // not a real scheme — treat as relative
106-
}
107-
var scheme = url[..colon];
108-
return SafeUrlSchemes.Contains(scheme) ? url : "about:blank";
92+
93+
// Uri.TryCreate happily parses `javascript:alert(1)` as a valid
94+
// absolute Uri — that's fine, because the allow-list filter on
95+
// uri.Scheme below rejects it. The point is to use System.Uri's
96+
// RFC 3986 scheme/host decomposition rather than hand-rolled
97+
// string parsing, which is fragile and a known source of bypass
98+
// bugs in URL sanitizers.
99+
if (!Uri.TryCreate(url, UriKind.Absolute, out var uri))
100+
return url; // Not absolute — treat as path-relative; safe.
101+
102+
return SafeUrlSchemes.Contains(uri.Scheme) ? url : "about:blank";
109103
}
110104

111105
private sealed class HtmlRenderer
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
using System.Reflection;
2+
using Xunit;
3+
4+
namespace Microsoft.UI.Reactor.Tests.Markdown;
5+
6+
/// <summary>
7+
/// Tests for the internal MarkdownHtml.SanitizeUrl scheme-allow-list.
8+
/// TASK-045 — the XSS fence on href/src attributes emitted by the
9+
/// Markdown renderer. Allow-list: http, https, mailto. Anything else
10+
/// becomes `about:blank`.
11+
/// </summary>
12+
public class SanitizeUrlTests
13+
{
14+
private static readonly Type MarkdownHtmlType =
15+
typeof(Microsoft.UI.Reactor.Factories).Assembly
16+
.GetType("Microsoft.UI.Reactor.Markdown.MarkdownHtml")
17+
?? throw new InvalidOperationException(
18+
"MarkdownHtml type not found in Reactor assembly.");
19+
20+
private static string Sanitize(string url, bool unsafeAllowed = false)
21+
{
22+
var mi = MarkdownHtmlType.GetMethod("SanitizeUrl",
23+
BindingFlags.Static | BindingFlags.NonPublic)!;
24+
return (string)mi.Invoke(null, new object?[] { url, unsafeAllowed })!;
25+
}
26+
27+
// ── Allow-listed schemes pass through unchanged ───────────────
28+
29+
[Theory]
30+
[InlineData("http://example.com/")]
31+
[InlineData("https://example.com/path?q=1")]
32+
[InlineData("HTTPS://EXAMPLE.COM")] // case-insensitive scheme
33+
[InlineData("mailto:foo@bar.com")]
34+
[InlineData("mailto:foo@bar.com?subject=hi")]
35+
public void Safe_Schemes_Pass_Through(string url)
36+
{
37+
Assert.Equal(url, Sanitize(url));
38+
}
39+
40+
// ── Relative URLs are not absolute — pass through unchanged ───
41+
42+
[Theory]
43+
[InlineData("/path/to/thing")]
44+
[InlineData("relative/path.html")]
45+
[InlineData("../up/one")]
46+
[InlineData("#fragment-only")]
47+
[InlineData("?query=only")]
48+
public void Relative_Urls_Pass_Through(string url)
49+
{
50+
Assert.Equal(url, Sanitize(url));
51+
}
52+
53+
// ── XSS vectors are rewritten to about:blank ──────────────────
54+
55+
[Theory]
56+
// The canonical XSS payload. A renderer that emits this in an href
57+
// attribute lets the page execute arbitrary JS on click.
58+
[InlineData("javascript:alert(1)")]
59+
[InlineData("JAVASCRIPT:alert(1)")] // case-insensitive
60+
[InlineData("javascript:void(0)")]
61+
// data: URIs can carry script in text/html or SVG payloads.
62+
[InlineData("data:text/html,<script>alert(1)</script>")]
63+
[InlineData("data:image/svg+xml,<svg onload=alert(1)>")]
64+
// vbscript: — IE legacy but still a known fingerprint payload.
65+
[InlineData("vbscript:msgbox(1)")]
66+
// file:// — disclosure / SSRF surface even on non-script targets.
67+
[InlineData("file:///C:/windows/win.ini")]
68+
// ftp, ssh, etc. — not on the allow-list.
69+
[InlineData("ftp://example.com/")]
70+
[InlineData("ssh://user@host")]
71+
// Capitalized variants — System.Uri normalizes scheme to lowercase
72+
// before comparison, and SafeUrlSchemes is OrdinalIgnoreCase, so
73+
// both layers fail closed regardless.
74+
[InlineData("Javascript:alert(1)")]
75+
[InlineData("DATA:text/html,x")]
76+
public void Disallowed_Schemes_Become_AboutBlank(string url)
77+
{
78+
Assert.Equal("about:blank", Sanitize(url));
79+
}
80+
81+
// ── Empty / null-ish input ────────────────────────────────────
82+
83+
[Fact]
84+
public void Empty_String_Passes_Through()
85+
{
86+
Assert.Equal("", Sanitize(""));
87+
}
88+
89+
// ── unsafeAllowed escape hatch bypasses the filter entirely ───
90+
91+
[Theory]
92+
[InlineData("javascript:alert(1)")]
93+
[InlineData("data:text/html,x")]
94+
[InlineData("file:///C:/x")]
95+
public void UnsafeAllowed_Bypasses_AllowList(string url)
96+
{
97+
// Pin: when the renderer is configured with AllowUnsafeUrls (host
98+
// is presenting trusted markdown, e.g. local devtools), every URL
99+
// passes through. A regression that filtered even in unsafe mode
100+
// would silently break that opt-in.
101+
Assert.Equal(url, Sanitize(url, unsafeAllowed: true));
102+
}
103+
}

0 commit comments

Comments
 (0)