Skip to content

Commit 317dc49

Browse files
committed
Release 2.2.1
1 parent 218633b commit 317dc49

File tree

2 files changed

+66
-27
lines changed

2 files changed

+66
-27
lines changed

src/downloader/http_gateway.py

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ def debug(self, *args: Any) -> None: ...
3737

3838

3939
class HttpConfig(TypedDict):
40-
http_proxy: Optional[tuple[str, str, int]] # (scheme, host, port)
41-
https_proxy: Optional[tuple[str, str, int]] # (scheme, host, port)
40+
http_proxy: Optional[ParseResult]
41+
https_proxy: Optional[ParseResult]
42+
http_proxy_headers: Optional[Dict[str, str]]
43+
https_proxy_headers: Optional[Dict[str, str]]
4244

4345

4446
class HttpGateway:
@@ -79,13 +81,14 @@ def open(self, url: str, method: Optional[str] = None, body: Any = None, headers
7981
if self._logger is not None: self._logger.debug(f'^^^^ {method} {url}')
8082
url = self._process_url(url)
8183
parsed_url = urlparse(url)
82-
if parsed_url.scheme not in {'http', 'https'}: raise HttpGatewayException(f"URL '{url}' has wrong scheme '{parsed_url.scheme}'.")
84+
scheme_code = _scheme_dict.get(parsed_url.scheme, -1)
85+
if scheme_code == -1: raise HttpGatewayException(f"URL '{url}' has wrong scheme '{parsed_url.scheme}'.")
8386
final_url, conn = self._request(
8487
url,
8588
parsed_url,
8689
method,
8790
body,
88-
{**_default_headers, **headers} if isinstance(headers, dict) else _default_headers,
91+
self._make_headers(headers, is_http=scheme_code==0),
8992
)
9093
if self._logger is not None: self._logger.debug(f'HTTP {conn.response.status}: {final_url}\n'
9194
f'1st byte @ {time.monotonic() - now:.3f}s\nvvvv\n')
@@ -95,6 +98,13 @@ def open(self, url: str, method: Optional[str] = None, body: Any = None, headers
9598
conn.finish_response()
9699
if self._logger is not None: self._logger.print(f'|||| Done: {final_url} ({time.monotonic() - now:.3f}s)')
97100

101+
def _make_headers(self, headers: Any, is_http: bool) -> dict[str, str]:
102+
if is_http and self._config and self._config['http_proxy_headers']:
103+
headers = headers if isinstance(headers, dict) else {}
104+
return {**_default_headers, **headers, **self._config['http_proxy_headers']}
105+
else:
106+
return {**_default_headers, **headers} if isinstance(headers, dict) else _default_headers
107+
98108
def cleanup(self) -> None:
99109
self._out_of_service = True
100110
total_cleared = 0
@@ -245,23 +255,51 @@ def _fill_redirects_swap(self, now: float, lock: threading.Lock, redirects: Dict
245255
return size != len(self._redirects_swap)
246256

247257

258+
_scheme_dict = {
259+
'http': 0,
260+
'https': 1
261+
}
262+
248263
def http_config(http_proxy: Optional[str], https_proxy: Optional[str]) -> HttpConfig:
249-
config: HttpConfig = {"http_proxy": None, "https_proxy": None}
264+
config: HttpConfig = {
265+
"http_proxy": None,
266+
"https_proxy": None,
267+
"http_proxy_headers": None,
268+
"https_proxy_headers": None
269+
}
270+
271+
if not http_proxy and not https_proxy: return config
272+
250273
if http_proxy:
251274
parsed = urlparse(http_proxy)
252275
if parsed.hostname and parsed.scheme in ('http', 'https'):
253-
config['http_proxy'] = (parsed.scheme, parsed.hostname, parsed.port or (443 if parsed.scheme == 'https' else 80))
276+
config['http_proxy'] = parsed
277+
auth_header = _make_proxy_auth_header(parsed)
278+
if auth_header:
279+
config['http_proxy_headers'] = {'Proxy-Authorization': auth_header}
254280

255281
if not https_proxy and http_proxy:
256282
https_proxy = http_proxy
257283

258284
if https_proxy:
259285
parsed = urlparse(https_proxy)
260286
if parsed.hostname and parsed.scheme in ('http', 'https'):
261-
config['https_proxy'] = (parsed.scheme, parsed.hostname, parsed.port or (443 if parsed.scheme == 'https' else 80))
287+
config['https_proxy'] = parsed
288+
auth_header = _make_proxy_auth_header(parsed)
289+
if auth_header:
290+
config['https_proxy_headers'] = {'Proxy-Authorization': auth_header}
262291

263292
return config
264293

294+
295+
def _make_proxy_auth_header(proxy: ParseResult) -> Optional[str]:
296+
if proxy.username and proxy.password:
297+
import base64
298+
credentials = f"{proxy.username}:{proxy.password}"
299+
encoded = base64.b64encode(credentials.encode()).decode('ascii')
300+
return f"Basic {encoded}"
301+
return None
302+
265303
USER_AGENT = 'Downloader/2.X (Linux; [email protected])'
266304
_default_headers = {'User-Agent': USER_AGENT, 'Connection': 'keep-alive', 'Keep-Alive': 'timeout=120'}
267305

@@ -420,23 +458,27 @@ def clear_timed_outs(self, now: float) -> int:
420458
def create_http_connection(scheme: str, netloc: str, timeout: float, ctx: ssl.SSLContext, config: Optional[dict[str, Any]]) -> HTTPConnection:
421459
if scheme == 'http':
422460
if config and config['http_proxy']:
423-
proxy_scheme, proxy_host, proxy_port = config['http_proxy']
424-
if proxy_scheme == 'https':
461+
proxy = config['http_proxy']
462+
proxy_host = proxy.hostname
463+
proxy_port = proxy.port or (443 if proxy.scheme == 'https' else 80)
464+
if proxy.scheme == 'https':
425465
return HTTPSConnection(proxy_host, proxy_port, timeout=timeout, context=ctx)
426466
return HTTPConnection(proxy_host, proxy_port, timeout=timeout)
427467
return HTTPConnection(netloc, timeout=timeout)
428468

429469
elif scheme == 'https':
430470
if config and config['https_proxy']:
431-
proxy_scheme, proxy_host, proxy_port = config['https_proxy']
471+
proxy = config['https_proxy']
472+
proxy_host = proxy.hostname
473+
proxy_port = proxy.port or (443 if proxy.scheme == 'https' else 80)
432474
parsed_netloc = urlparse(f'//{netloc}')
433475
target_host = parsed_netloc.hostname
434476
target_port = parsed_netloc.port or 443
435477
if not target_host:
436478
raise HttpGatewayException(f"Invalid netloc: {netloc}")
437479

438480
conn = HTTPSConnection(proxy_host, proxy_port, timeout=timeout, context=ctx)
439-
conn.set_tunnel(target_host, target_port)
481+
conn.set_tunnel(target_host, target_port, headers=config.get('https_proxy_headers'))
440482
return conn
441483
return HTTPSConnection(netloc, timeout=timeout, context=ctx)
442484

src/test/exploratory/http_gateway_connections/explore_http_gateway_with_real_urls.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -71,23 +71,20 @@ def main() -> None:
7171

7272
http_proxy_url = os.environ.get('HTTP_PROXY')
7373
https_proxy_url = os.environ.get('HTTPS_PROXY')
74-
config = http_config(http_proxy=http_proxy_url, https_proxy=https_proxy_url) if (http_proxy_url or https_proxy_url) else None
75-
76-
if config:
77-
logger.print('PROXY CONFIGURATION:')
78-
if config.get('http_proxy'):
79-
scheme, host, port = config['http_proxy']
80-
logger.print(f' HTTP Proxy: {scheme}://{host}:{port}')
81-
else:
82-
logger.print(f' HTTP Proxy: None (direct connection)')
83-
84-
if config.get('https_proxy'):
85-
scheme, host, port = config['https_proxy']
86-
logger.print(f' HTTPS Proxy: {scheme}://{host}:{port}')
87-
else:
88-
logger.print(f' HTTPS Proxy: None (direct connection)')
74+
config = http_config(http_proxy=http_proxy_url, https_proxy=https_proxy_url)
75+
76+
logger.print('PROXY CONFIGURATION:')
77+
if config.get('http_proxy'):
78+
scheme, host, port = config['http_proxy'].scheme, config['http_proxy'].hostname, config['http_proxy'].port
79+
logger.print(f' HTTP Proxy: {scheme}://{host}:{port}')
80+
else:
81+
logger.print(f' HTTP Proxy: None (direct connection)')
82+
83+
if config.get('https_proxy'):
84+
scheme, host, port = config['http_proxy'].scheme, config['http_proxy'].hostname, config['http_proxy'].port
85+
logger.print(f' HTTPS Proxy: {scheme}://{host}:{port}')
8986
else:
90-
logger.print('NO PROXY - Using direct connection')
87+
logger.print(f' HTTPS Proxy: None (direct connection)')
9188

9289
with HttpGateway(ssl_ctx=ssl.create_default_context(), timeout=180, logger=logger, config=config) as gateway:
9390
def fetch_url(input_url: str):

0 commit comments

Comments
 (0)