Skip to content

Commit 4228707

Browse files
committed
feat: route HTTP proxy credentials through --proxy-server
Bypass Playwright's CDP Fetch.authRequired interceptor for authenticated HTTP proxies by passing inline credentials via Chrome's --proxy-server flag. Chrome sends Proxy-Authorization preemptively, avoiding the 407 round-trip that breaks on some proxies and Google domains (#182). Gated on platform (linux-x64, windows-x64) and binary version >= 146.0.7680.177.5. Unsupported platforms fall back to Playwright's proxy dict. Puppeteer falls back to page.authenticate() on unsupported platforms.
1 parent 864cae2 commit 4228707

8 files changed

Lines changed: 544 additions & 86 deletions

File tree

cloakbrowser/browser.py

Lines changed: 96 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,7 @@ def _ensure_proxy_scheme(proxy_url: str) -> str:
774774
return proxy_url if "://" in proxy_url else f"http://{proxy_url}"
775775

776776

777-
def _assemble_socks_url(
777+
def _assemble_proxy_url(
778778
scheme: str,
779779
host: str,
780780
port: int | None,
@@ -785,7 +785,7 @@ def _assemble_socks_url(
785785
query: str = "",
786786
fragment: str = "",
787787
) -> str:
788-
"""Build a SOCKS URL from already-percent-encoded credentials and host parts.
788+
"""Build a proxy URL from already-percent-encoded credentials and host parts.
789789
790790
``enc_pass is None`` means no password (no colon in userinfo). Empty string
791791
means present-but-empty (colon preserved). This mirrors the distinction
@@ -816,7 +816,7 @@ def _reconstruct_socks_url(proxy: ProxySettings) -> str:
816816
enc_user = quote(username, safe="")
817817
# Dict convention: empty/missing password → no colon.
818818
enc_pass = quote(password, safe="") if password else None
819-
return _assemble_socks_url(
819+
return _assemble_proxy_url(
820820
parsed.scheme, parsed.hostname or "", parsed.port,
821821
enc_user, enc_pass, parsed.path,
822822
)
@@ -856,7 +856,7 @@ def _normalize_socks_string_url(url: str) -> str:
856856
else:
857857
raw_pass = None
858858
enc_pass = None
859-
normalized = _assemble_socks_url(
859+
normalized = _assemble_proxy_url(
860860
parsed.scheme, parsed.hostname or "", parsed.port,
861861
enc_user, enc_pass,
862862
parsed.path, parsed.params, parsed.query, parsed.fragment,
@@ -1061,6 +1061,81 @@ def _parse_proxy_url(proxy: str) -> dict[str, Any]:
10611061
return result
10621062

10631063

1064+
def _has_credentials(proxy: str | ProxySettings) -> bool:
1065+
"""Check if the proxy has inline or dict-level credentials."""
1066+
if isinstance(proxy, dict):
1067+
return bool(proxy.get("username"))
1068+
return "@" in proxy
1069+
1070+
1071+
def _reconstruct_http_url(proxy: ProxySettings) -> str:
1072+
"""Reconstruct an HTTP(S) proxy URL with inline credentials from a Playwright proxy dict."""
1073+
server = proxy.get("server", "")
1074+
username = proxy.get("username", "")
1075+
password = proxy.get("password", "")
1076+
if not username:
1077+
return server
1078+
parsed = urlparse(_ensure_proxy_scheme(server))
1079+
enc_user = quote(username, safe="")
1080+
enc_pass = quote(password, safe="") if password else None
1081+
return _assemble_proxy_url(
1082+
parsed.scheme, parsed.hostname or "", parsed.port,
1083+
enc_user, enc_pass, parsed.path,
1084+
)
1085+
1086+
1087+
def _normalize_http_string_url(url: str) -> str:
1088+
"""Re-encode credentials in an HTTP(S) proxy URL string for --proxy-server.
1089+
1090+
Same pattern as ``_normalize_socks_string_url`` — decode then re-encode to
1091+
ensure Chromium's proxy URL parser handles special chars correctly.
1092+
"""
1093+
normalized = url if "://" in url else f"http://{url}"
1094+
try:
1095+
parsed = urlparse(normalized)
1096+
_ = parsed.port
1097+
except ValueError as e:
1098+
logger.warning("Malformed HTTP proxy URL, passing through unchanged: %s", e)
1099+
return normalized
1100+
if parsed.username is None and parsed.password is None:
1101+
return normalized
1102+
raw_user = parsed.username or ""
1103+
enc_user = quote(unquote(raw_user), safe="") if raw_user else ""
1104+
if parsed.password is not None:
1105+
raw_pass = parsed.password
1106+
enc_pass = quote(unquote(raw_pass), safe="") if raw_pass else ""
1107+
else:
1108+
raw_pass = None
1109+
enc_pass = None
1110+
result = _assemble_proxy_url(
1111+
parsed.scheme, parsed.hostname or "", parsed.port,
1112+
enc_user, enc_pass,
1113+
parsed.path, parsed.params, parsed.query, parsed.fragment,
1114+
)
1115+
if enc_user != raw_user or enc_pass != raw_pass:
1116+
logger.info(
1117+
"Auto URL-encoded HTTP proxy credentials (special characters "
1118+
"detected). Pre-encode the URL to suppress this notice."
1119+
)
1120+
return result
1121+
1122+
1123+
_HTTP_PROXY_INLINE_AUTH_MIN_VERSION = "146.0.7680.177.5"
1124+
_HTTP_PROXY_INLINE_AUTH_PLATFORMS = {"linux-x64", "windows-x64"}
1125+
1126+
1127+
def _supports_http_proxy_inline_auth() -> bool:
1128+
"""Check if the current platform's binary supports HTTP proxy inline credentials.
1129+
1130+
Requires both a supported platform AND a binary version with preemptive proxy auth.
1131+
"""
1132+
from .config import get_platform_tag, get_chromium_version, _version_tuple
1133+
tag = get_platform_tag()
1134+
if tag not in _HTTP_PROXY_INLINE_AUTH_PLATFORMS:
1135+
return False
1136+
return _version_tuple(get_chromium_version()) >= _version_tuple(_HTTP_PROXY_INLINE_AUTH_MIN_VERSION)
1137+
1138+
10641139
def _is_socks_proxy(proxy: str | ProxySettings | None) -> bool:
10651140
"""Check if the proxy uses SOCKS5 protocol."""
10661141
if proxy is None:
@@ -1074,8 +1149,9 @@ def _resolve_proxy_config(
10741149
) -> tuple[dict[str, Any], list[str]]:
10751150
"""Resolve proxy into Playwright kwargs and Chrome args.
10761151
1077-
Playwright rejects SOCKS5 proxies with credentials in its proxy dict,
1078-
so SOCKS5 is passed via --proxy-server Chrome arg instead.
1152+
Proxies with credentials (SOCKS5 or HTTP/HTTPS) are passed via Chrome's
1153+
--proxy-server flag with inline credentials, bypassing Playwright's CDP
1154+
auth interceptor which breaks on some proxies and Google domains (#182).
10791155
10801156
Returns:
10811157
(proxy_kwargs, extra_chrome_args) — one or both will be empty.
@@ -1096,7 +1172,20 @@ def _resolve_proxy_config(
10961172
# passwords at '=' and other special chars (#157).
10971173
return {}, [f"--proxy-server={_normalize_socks_string_url(proxy)}"]
10981174

1099-
# HTTP/HTTPS: use Playwright's proxy dict as before
1175+
# HTTP/HTTPS with credentials on supported platforms: bypass Playwright's
1176+
# CDP auth interceptor, pass directly to Chrome via --proxy-server with
1177+
# inline creds. Chrome sends Proxy-Authorization preemptively, avoiding
1178+
# the 407 round-trip that breaks on some proxies (#182).
1179+
if _has_credentials(proxy) and _supports_http_proxy_inline_auth():
1180+
if isinstance(proxy, dict):
1181+
url = _reconstruct_http_url(proxy)
1182+
extra_args = [f"--proxy-server={url}"]
1183+
if proxy.get("bypass"):
1184+
extra_args.append(f"--proxy-bypass-list={proxy['bypass']}")
1185+
return {}, extra_args
1186+
return {}, [f"--proxy-server={_normalize_http_string_url(proxy)}"]
1187+
1188+
# HTTP/HTTPS without credentials: use Playwright's proxy dict
11001189
if isinstance(proxy, dict):
11011190
return {"proxy": proxy}, []
11021191
return {"proxy": _parse_proxy_url(proxy)}, []

js/src/proxy.ts

Lines changed: 112 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
* Shared proxy URL parsing for Playwright and Puppeteer wrappers.
33
*/
44

5+
import { getChromiumVersion, getPlatformTag, parseVersion } from "./config.js";
6+
57
export interface ParsedProxy {
68
server: string;
79
username?: string;
@@ -155,11 +157,106 @@ export function normalizeSocksStringUrl(urlStr: string): string {
155157
}
156158
}
157159

160+
const HTTP_PROXY_INLINE_AUTH_MIN_VERSION = "146.0.7680.177.5";
161+
const HTTP_PROXY_INLINE_AUTH_PLATFORMS = new Set(["linux-x64", "windows-x64"]);
162+
163+
export function supportsHttpProxyInlineAuth(): boolean {
164+
try {
165+
const tag = getPlatformTag();
166+
if (!HTTP_PROXY_INLINE_AUTH_PLATFORMS.has(tag)) return false;
167+
const current = parseVersion(getChromiumVersion());
168+
const minimum = parseVersion(HTTP_PROXY_INLINE_AUTH_MIN_VERSION);
169+
for (let i = 0; i < Math.max(current.length, minimum.length); i++) {
170+
if ((current[i] ?? 0) > (minimum[i] ?? 0)) return true;
171+
if ((current[i] ?? 0) < (minimum[i] ?? 0)) return false;
172+
}
173+
return true; // equal = supported
174+
} catch {
175+
return false;
176+
}
177+
}
178+
179+
function hasCredentials(proxy: string | ProxyDict): boolean {
180+
if (typeof proxy === "string") return proxy.includes("@");
181+
return !!proxy.username;
182+
}
183+
184+
/**
185+
* Reconstruct an HTTP(S) proxy URL with inline credentials from a proxy dict.
186+
*/
187+
export function reconstructHttpUrl(proxy: ProxyDict): string {
188+
if (!proxy.username) return proxy.server;
189+
const url = new URL(ensureProxyScheme(proxy.server));
190+
url.username = encodeURIComponent(proxy.username);
191+
if (proxy.password) url.password = encodeURIComponent(proxy.password);
192+
return url.href.replace(/\/$/, "");
193+
}
194+
195+
/**
196+
* Re-encode credentials in an HTTP(S) proxy URL string for --proxy-server.
197+
* Same pattern as normalizeSocksStringUrl.
198+
*/
199+
export function normalizeHttpStringUrl(urlStr: string): string {
200+
const normalized = urlStr.includes("://") ? urlStr : `http://${urlStr}`;
201+
const schemeMatch = normalized.match(/^([a-z][a-z0-9+\-.]*):\/\/(.*)$/i);
202+
if (!schemeMatch) return normalized;
203+
const [, scheme, rest] = schemeMatch;
204+
const hostStart = rest.search(/[/?#]/);
205+
const authority = hostStart === -1 ? rest : rest.slice(0, hostStart);
206+
const suffix = hostStart === -1 ? "" : rest.slice(hostStart);
207+
const atIdx = authority.lastIndexOf("@");
208+
if (atIdx === -1) return normalized;
209+
const userinfo = authority.slice(0, atIdx);
210+
const hostPart = authority.slice(atIdx + 1);
211+
const bracketEnd = hostPart.lastIndexOf("]");
212+
const portColonIdx = hostPart.indexOf(":", Math.max(bracketEnd, 0));
213+
if (portColonIdx !== -1) {
214+
const portStr = hostPart.slice(portColonIdx + 1);
215+
if (portStr && !/^\d+$/.test(portStr)) {
216+
console.warn(`[cloakbrowser] Malformed HTTP proxy URL, passing through unchanged: invalid port`);
217+
return normalized;
218+
}
219+
}
220+
const hostAndRest = hostPart + suffix;
221+
const colonIdx = userinfo.indexOf(":");
222+
const rawUserEnc = colonIdx === -1 ? userinfo : userinfo.slice(0, colonIdx);
223+
const hasPassword = colonIdx !== -1;
224+
const rawPassEnc = hasPassword ? userinfo.slice(colonIdx + 1) : "";
225+
try {
226+
const encUser = rawUserEnc ? encodeURIComponent(lenientDecodeURIComponent(rawUserEnc)) : "";
227+
const encPass = hasPassword
228+
? (rawPassEnc ? encodeURIComponent(lenientDecodeURIComponent(rawPassEnc)) : "")
229+
: null;
230+
let userinfoPart: string;
231+
if (encPass !== null) {
232+
userinfoPart = `${encUser}:${encPass}@`;
233+
} else if (encUser) {
234+
userinfoPart = `${encUser}@`;
235+
} else {
236+
userinfoPart = "";
237+
}
238+
const result = `${scheme}://${userinfoPart}${hostAndRest}`;
239+
const credsChanged = encUser !== rawUserEnc
240+
|| (hasPassword ? encPass !== rawPassEnc : false);
241+
if (credsChanged) {
242+
console.info(
243+
"[cloakbrowser] Auto URL-encoded HTTP proxy credentials (special " +
244+
"characters detected). Pre-encode the URL to suppress this notice.",
245+
);
246+
}
247+
return result;
248+
} catch (e) {
249+
console.warn(`[cloakbrowser] Could not normalize HTTP proxy URL, passing through unchanged: ${(e as Error).message}`);
250+
return normalized;
251+
}
252+
}
253+
158254
/**
159255
* Resolve proxy into Playwright option and/or Chrome args.
160256
*
161-
* Playwright rejects SOCKS5 proxies with credentials in its proxy dict,
162-
* so SOCKS5 is passed via --proxy-server Chrome arg instead.
257+
* Proxies with credentials (SOCKS5 or HTTP/HTTPS on supported platforms) are
258+
* passed via Chrome's --proxy-server flag with inline credentials, bypassing
259+
* Playwright's CDP auth interceptor which breaks on some proxies (#182).
163260
*/
164261
export function resolveProxyConfig(proxy: string | ProxyDict | undefined): ProxyConfig {
165262
if (!proxy) return { proxyArgs: [] };
@@ -177,7 +274,19 @@ export function resolveProxyConfig(proxy: string | ProxyDict | undefined): Proxy
177274
return { proxyArgs: args };
178275
}
179276

180-
// HTTP/HTTPS: use Playwright's proxy dict
277+
// HTTP/HTTPS with credentials on supported platforms: bypass Playwright's
278+
// CDP auth interceptor, use Chrome's preemptive Proxy-Authorization (#182).
279+
if (hasCredentials(proxy) && supportsHttpProxyInlineAuth()) {
280+
if (typeof proxy === "string") {
281+
return { proxyArgs: [`--proxy-server=${normalizeHttpStringUrl(proxy)}`] };
282+
}
283+
const httpUrl = reconstructHttpUrl(proxy);
284+
const args = [`--proxy-server=${httpUrl}`];
285+
if (proxy.bypass) args.push(`--proxy-bypass-list=${proxy.bypass}`);
286+
return { proxyArgs: args };
287+
}
288+
289+
// HTTP/HTTPS without credentials (or unsupported platform): use Playwright's proxy dict
181290
if (typeof proxy === "string") {
182291
return { proxyOption: parseProxyUrl(proxy), proxyArgs: [] };
183292
}

js/src/puppeteer.ts

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import type { LaunchOptions } from "./types.js";
99
import { IGNORE_DEFAULT_ARGS } from "./config.js";
1010
import { buildArgs } from "./args.js";
1111
import { ensureBinary } from "./download.js";
12-
import { isSocksProxy, parseProxyUrl, resolveProxyConfig } from "./proxy.js";
12+
import { isSocksProxy, normalizeHttpStringUrl, parseProxyUrl, reconstructHttpUrl, resolveProxyConfig, supportsHttpProxyInlineAuth } from "./proxy.js";
1313
import { maybeResolveGeoip, resolveWebrtcArgs } from "./geoip.js";
1414

1515
/** Resolve binary path, geoip, webrtc, and build final Chrome args. */
@@ -26,9 +26,9 @@ async function resolveArgs(options: LaunchOptions): Promise<{ binaryPath: string
2626

2727
/**
2828
* Resolve proxy into Chrome CLI args and optional HTTP auth credentials.
29-
* SOCKS5: Chrome supports inline credentials natively (RFC 1929 auth).
30-
* HTTP: Chrome does NOT support inline credentials — strip them and
31-
* use page.authenticate() for Proxy-Authorization headers instead.
29+
* SOCKS5: Chrome handles inline credentials natively (RFC 1929 auth).
30+
* HTTP on supported platforms: inline credentials via --proxy-server.
31+
* HTTP on unsupported platforms: strip credentials, use page.authenticate() fallback.
3232
*/
3333
function resolveProxy(options: LaunchOptions, args: string[]): { username: string; password: string } | undefined {
3434
if (!options.proxy) return undefined;
@@ -39,6 +39,23 @@ function resolveProxy(options: LaunchOptions, args: string[]): { username: strin
3939
return undefined;
4040
}
4141

42+
// On supported platforms: pass full URL with inline creds to --proxy-server
43+
if (supportsHttpProxyInlineAuth()) {
44+
if (typeof options.proxy === "string") {
45+
args.push(`--proxy-server=${normalizeHttpStringUrl(options.proxy)}`);
46+
return undefined;
47+
}
48+
const url = options.proxy.username
49+
? reconstructHttpUrl(options.proxy)
50+
: options.proxy.server;
51+
args.push(`--proxy-server=${url}`);
52+
if (options.proxy.bypass) {
53+
args.push(`--proxy-bypass-list=${options.proxy.bypass}`);
54+
}
55+
return undefined;
56+
}
57+
58+
// Unsupported platform: strip credentials, fall back to page.authenticate()
4259
if (typeof options.proxy === "string") {
4360
const { server, username, password } = parseProxyUrl(options.proxy);
4461
args.push(`--proxy-server=${server}`);
@@ -55,7 +72,7 @@ function resolveProxy(options: LaunchOptions, args: string[]): { username: strin
5572
return username ? { username, password: password ?? "" } : undefined;
5673
}
5774

58-
/** Apply proxy auth monkey-patch and humanize behavioral patching. */
75+
/** Apply proxy auth fallback (unsupported platforms) and humanize patching. */
5976
async function applyPostLaunch(
6077
browser: Browser,
6178
options: LaunchOptions,

0 commit comments

Comments
 (0)