Skip to content

Commit 71d723b

Browse files
kblokclaude
andcommitted
perf: optimize url blocking on navigation (upstream #14945)
Throw immediately in Frame.GoToAsync when the destination URL is blocked by blocklist/allowlist rules, instead of waiting for the browser to fail the request. Saves a network round-trip and aligns with upstream Puppeteer behavior. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1ff280a commit 71d723b

7 files changed

Lines changed: 109 additions & 52 deletions

File tree

lib/PuppeteerSharp.Tests/NetworkRestrictionTests/NetworkRestrictionsTests.cs

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ await page.GoToAsync(blockedUrl).ContinueWith(t =>
3434
});
3535

3636
Assert.That(error, Is.Not.Null);
37-
Assert.That(error.Message, Does.Contain("net::ERR_INTERNET_DISCONNECTED"));
37+
Assert.That(error.Message, Does.Contain("is blocked by blocklist/allowlist rules"));
3838
}
3939

4040
[Test, PuppeteerTest("network_restrictions.spec", "Network Restrictions", "should block window.location.href navigation to URLs in the blocklist")]
@@ -156,20 +156,29 @@ public async Task ShouldDetachFromTargetsViolatingBlocklistWhenConnectingToRunni
156156
}
157157
}
158158

159-
[Test, PuppeteerTest("network_restrictions.spec", "Network Restrictions", "should not block chrome://version/ even if it matches blocklist")]
160-
public async Task ShouldNotBlockChromeVersionEvenIfItMatchesBlocklist()
159+
[Test, PuppeteerTest("network_restrictions.spec", "Network Restrictions", "should block chrome://version/ when it matches blocklist")]
160+
public async Task ShouldBlockChromeVersionWhenItMatchesBlocklist()
161161
{
162-
const string chromeUrl = "chrome://version/";
162+
const string blockedUrl = "chrome://version/";
163163
var options = TestConstants.DefaultBrowserOptions();
164-
options.BlockList = [chromeUrl];
164+
options.BlockList = [blockedUrl];
165165

166166
await using var browser = await Puppeteer.LaunchAsync(options, TestConstants.LoggerFactory);
167167
await using var page = await browser.NewPageAsync();
168168

169-
await page.GoToAsync(chromeUrl);
169+
Exception error = null;
170+
await page.GoToAsync(blockedUrl).ContinueWith(t =>
171+
{
172+
if (t.IsFaulted)
173+
{
174+
error = t.Exception?.InnerException ?? t.Exception;
175+
}
170176

171-
// Navigation should succeed as chrome:// URLs usually bypass the network
172-
Assert.That(page.Url, Is.EqualTo(chromeUrl));
177+
return t;
178+
});
179+
180+
Assert.That(error, Is.Not.Null);
181+
Assert.That(error.Message, Does.Contain("is blocked by blocklist/allowlist rules"));
173182
}
174183

175184
[Test, PuppeteerTest("network_restrictions.spec", "Network Restrictions", "should only allow navigation to URLs in the allowlist")]
@@ -199,7 +208,7 @@ await page.GoToAsync(blockedUrl).ContinueWith(t =>
199208

200209
Assert.That(page.Url, Is.Not.EqualTo(blockedUrl));
201210
Assert.That(error, Is.Not.Null);
202-
Assert.That(error.Message, Does.Contain("net::ERR_INTERNET_DISCONNECTED"));
211+
Assert.That(error.Message, Does.Contain("is blocked by blocklist/allowlist rules"));
203212
}
204213

205214
[Test, PuppeteerTest("network_restrictions.spec", "Network Restrictions", "should block window.location.href navigation to URLs not in the allowlist")]
@@ -382,4 +391,33 @@ public async Task ShouldThrowAnErrorForAnInvalidPattern()
382391

383392
Assert.That(error, Is.Not.Null);
384393
}
394+
395+
[Test, PuppeteerTest("network_restrictions.spec", "Network Restrictions", "should block frame.goto when the destination is in the blocklist")]
396+
public async Task ShouldBlockFrameGotoWhenDestinationIsInBlocklist()
397+
{
398+
var options = TestConstants.DefaultBrowserOptions();
399+
options.BlockList = ["*://*:*/empty.html"];
400+
401+
await using var browser = await Puppeteer.LaunchAsync(options, TestConstants.LoggerFactory);
402+
await using var page = await browser.NewPageAsync();
403+
404+
await page.GoToAsync(TestConstants.ServerUrl + "/frames/one-frame.html");
405+
var frame = Array.Find(page.Frames, f => f != page.MainFrame);
406+
Assert.That(frame, Is.Not.Null);
407+
408+
var blockedUrl = TestConstants.ServerUrl + "/empty.html";
409+
Exception error = null;
410+
await frame.GoToAsync(blockedUrl).ContinueWith(t =>
411+
{
412+
if (t.IsFaulted)
413+
{
414+
error = t.Exception?.InnerException ?? t.Exception;
415+
}
416+
417+
return t;
418+
});
419+
420+
Assert.That(error, Is.Not.Null);
421+
Assert.That(error.Message, Does.Contain("is blocked by blocklist/allowlist rules"));
422+
}
385423
}

lib/PuppeteerSharp/Cdp/CdpFrame.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,13 @@ public override async Task<IResponse> GoToAsync(string url, NavigationOptions op
8888
throw new ArgumentNullException(nameof(options));
8989
}
9090

91+
if (CdpPage != null && !CdpPage.IsUrlAllowed(url))
92+
{
93+
throw new NavigationException(
94+
$"Navigation to {url} is blocked by blocklist/allowlist rules",
95+
url);
96+
}
97+
9198
var referrer = string.IsNullOrEmpty(options.Referer)
9299
? FrameManager.NetworkManager.ExtraHTTPHeaders?.GetValue(RefererHeaderName)
93100
: options.Referer;

lib/PuppeteerSharp/Cdp/CdpPage.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,8 @@ internal static decimal ConvertPrintParameterToInches(object parameter)
901901
return pixels / 96;
902902
}
903903

904+
internal bool IsUrlAllowed(string url) => _targetManager.IsUrlAllowed(url);
905+
904906
/// <inheritdoc />
905907
protected override async Task ExposeFunctionAsync(string name, Delegate puppeteerFunction)
906908
{

lib/PuppeteerSharp/Cdp/ChromeTargetManager.cs

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,49 @@ await _connection.SendAsync(
112112

113113
public IEnumerable<ITarget> GetChildTargets(ITarget target) => target.ChildTargets;
114114

115+
public bool IsUrlAllowed(string url)
116+
{
117+
var hasBlockList = _blockList != null && _blockList.Length > 0;
118+
var hasAllowList = _allowList != null && _allowList.Length > 0;
119+
120+
if (!hasBlockList && !hasAllowList)
121+
{
122+
return true;
123+
}
124+
125+
// Always allow internal or setup pages
126+
if (string.IsNullOrEmpty(url) || url == "about:blank")
127+
{
128+
return true;
129+
}
130+
131+
if (hasBlockList)
132+
{
133+
foreach (var pattern in _blockList)
134+
{
135+
if (MatchesUrlPattern(url, pattern))
136+
{
137+
return false;
138+
}
139+
}
140+
}
141+
142+
if (hasAllowList)
143+
{
144+
foreach (var pattern in _allowList)
145+
{
146+
if (MatchesUrlPattern(url, pattern))
147+
{
148+
return true;
149+
}
150+
}
151+
152+
return false;
153+
}
154+
155+
return true;
156+
}
157+
115158
private static void ValidateUrlPatterns(string[] patterns)
116159
{
117160
if (patterns == null)
@@ -459,49 +502,6 @@ private void OnDetachedFromTarget(object sender, TargetDetachedFromTargetRespons
459502
TargetGone?.Invoke(this, new TargetChangedArgs { Target = target });
460503
}
461504

462-
private bool IsUrlAllowed(string url)
463-
{
464-
var hasBlockList = _blockList != null && _blockList.Length > 0;
465-
var hasAllowList = _allowList != null && _allowList.Length > 0;
466-
467-
if (!hasBlockList && !hasAllowList)
468-
{
469-
return true;
470-
}
471-
472-
// Always allow internal or setup pages
473-
if (string.IsNullOrEmpty(url) || url == "about:blank")
474-
{
475-
return true;
476-
}
477-
478-
if (hasBlockList)
479-
{
480-
foreach (var pattern in _blockList)
481-
{
482-
if (MatchesUrlPattern(url, pattern))
483-
{
484-
return false;
485-
}
486-
}
487-
}
488-
489-
if (hasAllowList)
490-
{
491-
foreach (var pattern in _allowList)
492-
{
493-
if (MatchesUrlPattern(url, pattern))
494-
{
495-
return true;
496-
}
497-
}
498-
499-
return false;
500-
}
501-
502-
return true;
503-
}
504-
505505
private async Task MaybeSetupNetworkBlockListAsync(CDPSession session)
506506
{
507507
var hasBlockList = _blockList != null && _blockList.Length > 0;

lib/PuppeteerSharp/Cdp/FirefoxTargetManager.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ public async Task InitializeAsync()
6262

6363
public IEnumerable<ITarget> GetChildTargets(ITarget target) => [];
6464

65+
public bool IsUrlAllowed(string url) => true;
66+
6567
private void OnMessageReceived(object sender, MessageEventArgs e)
6668
{
6769
try

lib/PuppeteerSharp/Cdp/ITargetManager.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,12 @@ internal interface ITargetManager
4949
/// <param name="target">Target to evaluate.</param>
5050
/// <returns>A list of targets.</returns>
5151
IEnumerable<ITarget> GetChildTargets(ITarget target);
52+
53+
/// <summary>
54+
/// Validates a URL against the configured blocklist/allowlist patterns.
55+
/// </summary>
56+
/// <param name="url">URL to validate.</param>
57+
/// <returns><c>true</c> if the URL is allowed, otherwise <c>false</c>.</returns>
58+
bool IsUrlAllowed(string url);
5259
}
5360
}

lib/PuppeteerSharp/IFrame.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ public interface IFrame
243243
/// - the `timeout` is exceeded during navigation.
244244
/// - the remote server does not respond or is unreachable.
245245
/// - the main resource failed to load.
246+
/// - the URL is blocked by blocklist/allowlist rules.
246247
///
247248
/// <see cref="GoToAsync(string, int?, WaitUntilNavigation[])"/> will not throw an error when any valid HTTP status code is returned by the remote server,
248249
/// including 404 "Not Found" and 500 "Internal Server Error". The status code for such responses can be retrieved by calling <see cref="IResponse.Status"/>

0 commit comments

Comments
 (0)