@@ -774,7 +774,7 @@ def _ensure_proxy_scheme(proxy_url: str) -> str:
774774 return proxy_url if "://" in proxy_url else f"http://{ proxy_url } "
775775
776776
777- def _assemble_socks_url (
777+ def _assemble_proxy_url (
778778 scheme : str ,
779779 host : str ,
780780 port : int | None ,
@@ -785,7 +785,7 @@ def _assemble_socks_url(
785785 query : str = "" ,
786786 fragment : str = "" ,
787787) -> str :
788- """Build a SOCKS URL from already-percent-encoded credentials and host parts.
788+ """Build a proxy URL from already-percent-encoded credentials and host parts.
789789
790790 ``enc_pass is None`` means no password (no colon in userinfo). Empty string
791791 means present-but-empty (colon preserved). This mirrors the distinction
@@ -816,7 +816,7 @@ def _reconstruct_socks_url(proxy: ProxySettings) -> str:
816816 enc_user = quote (username , safe = "" )
817817 # Dict convention: empty/missing password → no colon.
818818 enc_pass = quote (password , safe = "" ) if password else None
819- return _assemble_socks_url (
819+ return _assemble_proxy_url (
820820 parsed .scheme , parsed .hostname or "" , parsed .port ,
821821 enc_user , enc_pass , parsed .path ,
822822 )
@@ -856,7 +856,7 @@ def _normalize_socks_string_url(url: str) -> str:
856856 else :
857857 raw_pass = None
858858 enc_pass = None
859- normalized = _assemble_socks_url (
859+ normalized = _assemble_proxy_url (
860860 parsed .scheme , parsed .hostname or "" , parsed .port ,
861861 enc_user , enc_pass ,
862862 parsed .path , parsed .params , parsed .query , parsed .fragment ,
@@ -1061,6 +1061,81 @@ def _parse_proxy_url(proxy: str) -> dict[str, Any]:
10611061 return result
10621062
10631063
1064+ def _has_credentials (proxy : str | ProxySettings ) -> bool :
1065+ """Check if the proxy has inline or dict-level credentials."""
1066+ if isinstance (proxy , dict ):
1067+ return bool (proxy .get ("username" ))
1068+ return "@" in proxy
1069+
1070+
1071+ def _reconstruct_http_url (proxy : ProxySettings ) -> str :
1072+ """Reconstruct an HTTP(S) proxy URL with inline credentials from a Playwright proxy dict."""
1073+ server = proxy .get ("server" , "" )
1074+ username = proxy .get ("username" , "" )
1075+ password = proxy .get ("password" , "" )
1076+ if not username :
1077+ return server
1078+ parsed = urlparse (_ensure_proxy_scheme (server ))
1079+ enc_user = quote (username , safe = "" )
1080+ enc_pass = quote (password , safe = "" ) if password else None
1081+ return _assemble_proxy_url (
1082+ parsed .scheme , parsed .hostname or "" , parsed .port ,
1083+ enc_user , enc_pass , parsed .path ,
1084+ )
1085+
1086+
1087+ def _normalize_http_string_url (url : str ) -> str :
1088+ """Re-encode credentials in an HTTP(S) proxy URL string for --proxy-server.
1089+
1090+ Same pattern as ``_normalize_socks_string_url`` — decode then re-encode to
1091+ ensure Chromium's proxy URL parser handles special chars correctly.
1092+ """
1093+ normalized = url if "://" in url else f"http://{ url } "
1094+ try :
1095+ parsed = urlparse (normalized )
1096+ _ = parsed .port
1097+ except ValueError as e :
1098+ logger .warning ("Malformed HTTP proxy URL, passing through unchanged: %s" , e )
1099+ return normalized
1100+ if parsed .username is None and parsed .password is None :
1101+ return normalized
1102+ raw_user = parsed .username or ""
1103+ enc_user = quote (unquote (raw_user ), safe = "" ) if raw_user else ""
1104+ if parsed .password is not None :
1105+ raw_pass = parsed .password
1106+ enc_pass = quote (unquote (raw_pass ), safe = "" ) if raw_pass else ""
1107+ else :
1108+ raw_pass = None
1109+ enc_pass = None
1110+ result = _assemble_proxy_url (
1111+ parsed .scheme , parsed .hostname or "" , parsed .port ,
1112+ enc_user , enc_pass ,
1113+ parsed .path , parsed .params , parsed .query , parsed .fragment ,
1114+ )
1115+ if enc_user != raw_user or enc_pass != raw_pass :
1116+ logger .info (
1117+ "Auto URL-encoded HTTP proxy credentials (special characters "
1118+ "detected). Pre-encode the URL to suppress this notice."
1119+ )
1120+ return result
1121+
1122+
1123+ _HTTP_PROXY_INLINE_AUTH_MIN_VERSION = "146.0.7680.177.5"
1124+ _HTTP_PROXY_INLINE_AUTH_PLATFORMS = {"linux-x64" , "windows-x64" }
1125+
1126+
1127+ def _supports_http_proxy_inline_auth () -> bool :
1128+ """Check if the current platform's binary supports HTTP proxy inline credentials.
1129+
1130+ Requires both a supported platform AND a binary version with preemptive proxy auth.
1131+ """
1132+ from .config import get_platform_tag , get_chromium_version , _version_tuple
1133+ tag = get_platform_tag ()
1134+ if tag not in _HTTP_PROXY_INLINE_AUTH_PLATFORMS :
1135+ return False
1136+ return _version_tuple (get_chromium_version ()) >= _version_tuple (_HTTP_PROXY_INLINE_AUTH_MIN_VERSION )
1137+
1138+
10641139def _is_socks_proxy (proxy : str | ProxySettings | None ) -> bool :
10651140 """Check if the proxy uses SOCKS5 protocol."""
10661141 if proxy is None :
@@ -1074,8 +1149,9 @@ def _resolve_proxy_config(
10741149) -> tuple [dict [str , Any ], list [str ]]:
10751150 """Resolve proxy into Playwright kwargs and Chrome args.
10761151
1077- Playwright rejects SOCKS5 proxies with credentials in its proxy dict,
1078- so SOCKS5 is passed via --proxy-server Chrome arg instead.
1152+ Proxies with credentials (SOCKS5 or HTTP/HTTPS) are passed via Chrome's
1153+ --proxy-server flag with inline credentials, bypassing Playwright's CDP
1154+ auth interceptor which breaks on some proxies and Google domains (#182).
10791155
10801156 Returns:
10811157 (proxy_kwargs, extra_chrome_args) — one or both will be empty.
@@ -1096,7 +1172,20 @@ def _resolve_proxy_config(
10961172 # passwords at '=' and other special chars (#157).
10971173 return {}, [f"--proxy-server={ _normalize_socks_string_url (proxy )} " ]
10981174
1099- # HTTP/HTTPS: use Playwright's proxy dict as before
1175+ # HTTP/HTTPS with credentials on supported platforms: bypass Playwright's
1176+ # CDP auth interceptor, pass directly to Chrome via --proxy-server with
1177+ # inline creds. Chrome sends Proxy-Authorization preemptively, avoiding
1178+ # the 407 round-trip that breaks on some proxies (#182).
1179+ if _has_credentials (proxy ) and _supports_http_proxy_inline_auth ():
1180+ if isinstance (proxy , dict ):
1181+ url = _reconstruct_http_url (proxy )
1182+ extra_args = [f"--proxy-server={ url } " ]
1183+ if proxy .get ("bypass" ):
1184+ extra_args .append (f"--proxy-bypass-list={ proxy ['bypass' ]} " )
1185+ return {}, extra_args
1186+ return {}, [f"--proxy-server={ _normalize_http_string_url (proxy )} " ]
1187+
1188+ # HTTP/HTTPS without credentials: use Playwright's proxy dict
11001189 if isinstance (proxy , dict ):
11011190 return {"proxy" : proxy }, []
11021191 return {"proxy" : _parse_proxy_url (proxy )}, []
0 commit comments