2222import time
2323from contextlib import contextmanager
2424from email .utils import parsedate_to_datetime
25- from typing import Type , Tuple , Any , Optional , Generator , List , Dict , Union , Protocol , TypeVar , Generic
25+ from typing import Type , Tuple , Any , Optional , Generator , List , Dict , Union , Protocol , TypeVar , Generic , TypedDict
2626from urllib .parse import urlparse , ParseResult , urlunparse
2727from http .client import HTTPConnection , HTTPSConnection , HTTPResponse , HTTPException
2828from types import TracebackType
@@ -36,12 +36,18 @@ def print(self, *args: Any) -> None: ...
3636 def debug (self , * args : Any ) -> None : ...
3737
3838
39+ class HttpConfig (TypedDict ):
40+ http_proxy : Optional [tuple [str , str , int ]] # (scheme, host, port)
41+ https_proxy : Optional [tuple [str , str , int ]] # (scheme, host, port)
42+
43+
3944class HttpGateway :
40- def __init__ (self , ssl_ctx : ssl .SSLContext , timeout : float , logger : Optional [HttpLogger ] = None ) -> None :
45+ def __init__ (self , ssl_ctx : ssl .SSLContext , timeout : float , logger : Optional [HttpLogger ] = None , config : Optional [ dict [ str , Any ]] = None ) -> None :
4146 now = time .monotonic ()
4247 self ._ssl_ctx = ssl_ctx
4348 self ._timeout = timeout
4449 self ._logger = logger
50+ self ._config = config
4551 self ._connections : Dict [_QueueId , _ConnectionQueue ] = {}
4652 self ._connections_lock = threading .Lock ()
4753 self ._clean_timeout_connections_timer = now
@@ -79,7 +85,7 @@ def open(self, url: str, method: Optional[str] = None, body: Any = None, headers
7985 parsed_url ,
8086 method ,
8187 body ,
82- headers or _default_headers ,
88+ { ** _default_headers , ** headers } if isinstance ( headers , dict ) else _default_headers ,
8389 )
8490 if self ._logger is not None : self ._logger .debug (f'HTTP { conn .response .status } : { final_url } \n '
8591 f'1st byte @ { time .monotonic () - now :.3f} s\n vvvv\n ' )
@@ -166,7 +172,7 @@ def _process_queue_id(self, queue_id: '_QueueId') -> '_QueueId': return _redirec
166172 def _take_connection (self , queue_id : '_QueueId' ) -> '_Connection' :
167173 with self ._connections_lock :
168174 if queue_id not in self ._connections :
169- self ._connections [queue_id ] = _ConnectionQueue (queue_id , self ._timeout , self ._ssl_ctx , self ._logger )
175+ self ._connections [queue_id ] = _ConnectionQueue (queue_id , self ._timeout , self ._ssl_ctx , self ._logger , self . _config )
170176 return self ._connections [queue_id ].pull ()
171177
172178 def _clean_timeout_connections (self , now : float ) -> None :
@@ -238,7 +244,26 @@ def _fill_redirects_swap(self, now: float, lock: threading.Lock, redirects: Dict
238244
239245 return size != len (self ._redirects_swap )
240246
241- _default_headers = {'Connection' : 'keep-alive' , 'Keep-Alive' : 'timeout=120' }
247+
248+ def http_config (http_proxy : Optional [str ], https_proxy : Optional [str ]) -> HttpConfig :
249+ config : HttpConfig = {"http_proxy" : None , "https_proxy" : None }
250+ if http_proxy :
251+ parsed = urlparse (http_proxy )
252+ if parsed .hostname and parsed .scheme in ('http' , 'https' ):
253+ config ['http_proxy' ] = (parsed .scheme , parsed .hostname , parsed .port or (443 if parsed .scheme == 'https' else 80 ))
254+
255+ if not https_proxy and http_proxy :
256+ https_proxy = http_proxy
257+
258+ if https_proxy :
259+ parsed = urlparse (https_proxy )
260+ if parsed .hostname and parsed .scheme in ('http' , 'https' ):
261+ config ['https_proxy' ] = (parsed .scheme , parsed .hostname , parsed .port or (443 if parsed .scheme == 'https' else 80 ))
262+
263+ return config
264+
265+ USER_AGENT = 'Downloader/2.X (Linux; [email protected] )' 266+ _default_headers = {'User-Agent' : USER_AGENT , 'Connection' : 'keep-alive' , 'Keep-Alive' : 'timeout=120' }
242267
243268
244269_QueueId = Tuple [str , str ]
@@ -274,7 +299,7 @@ def __init__(self, conn_id: int, http: HTTPConnection, connection_queue: '_Conne
274299 self ._timeout : float = http .timeout if http .timeout is not None else 120.0
275300 self ._last_use_time : float = 0.0
276301 self ._uses : int = 0
277- self ._max_uses : float = sys .float_info . max
302+ self ._max_uses : int = sys .maxsize
278303 self ._response : Optional [Union [HTTPResponse , '_FinishedResponse' ]] = None
279304 self ._response_headers = _ResponseHeaders (logger )
280305
@@ -306,8 +331,11 @@ def response_headers(self) -> '_ResponseHeaders':
306331 return self ._response_headers
307332
308333 def finish_response (self ) -> None :
309- if self ._close_response () and self ._uses < self ._max_uses :
310- self ._connection_queue .push (self )
334+ if self ._close_response ():
335+ if self ._uses < self ._max_uses :
336+ self ._connection_queue .push (self )
337+ else :
338+ self ._http .close ()
311339
312340 def _close_response (self ) -> bool :
313341 if isinstance (self ._response , _FinishedResponse ):
@@ -340,11 +368,12 @@ class _FinishedResponse: pass
340368
341369
342370class _ConnectionQueue :
343- def __init__ (self , queue_id : _QueueId , timeout : float , ctx : ssl .SSLContext , logger : Optional [HttpLogger ]) -> None :
371+ def __init__ (self , queue_id : _QueueId , timeout : float , ctx : ssl .SSLContext , logger : Optional [HttpLogger ], config : Optional [ dict [ str , Any ]] ) -> None :
344372 self .id = queue_id
345373 self ._timeout = timeout
346374 self ._ctx = ctx
347375 self ._logger = logger
376+ self ._config = config
348377 self ._queue : List [_Connection ] = []
349378 self ._queue_swap : List [_Connection ] = []
350379 self ._lock = threading .Lock ()
@@ -354,7 +383,7 @@ def pull(self) -> _Connection:
354383 with self ._lock :
355384 if len (self ._queue ) == 0 :
356385 self ._last_conn_id += 1
357- http_conn = create_http_connection (self .id [0 ], self .id [1 ], self ._timeout , self ._ctx )
386+ http_conn = create_http_connection (self .id [0 ], self .id [1 ], self ._timeout , self ._ctx , self . _config )
358387 return _Connection (conn_id = self ._last_conn_id , http = http_conn , connection_queue = self , logger = self ._logger )
359388 return self ._queue .pop ()
360389
@@ -388,10 +417,31 @@ def clear_timed_outs(self, now: float) -> int:
388417 return expired_count
389418
390419
391- def create_http_connection (scheme : str , netloc : str , timeout : float , ctx : ssl .SSLContext ) -> HTTPConnection :
392- if scheme == 'http' : return HTTPConnection (netloc , timeout = timeout )
393- elif scheme == 'https' : return HTTPSConnection (netloc , timeout = timeout , context = ctx )
394- else : raise HttpGatewayException (f"Scheme { scheme } not supported" )
420+ def create_http_connection (scheme : str , netloc : str , timeout : float , ctx : ssl .SSLContext , config : Optional [dict [str , Any ]]) -> HTTPConnection :
421+ if scheme == 'http' :
422+ if config and config ['http_proxy' ]:
423+ proxy_scheme , proxy_host , proxy_port = config ['http_proxy' ]
424+ if proxy_scheme == 'https' :
425+ return HTTPSConnection (proxy_host , proxy_port , timeout = timeout , context = ctx )
426+ return HTTPConnection (proxy_host , proxy_port , timeout = timeout )
427+ return HTTPConnection (netloc , timeout = timeout )
428+
429+ elif scheme == 'https' :
430+ if config and config ['https_proxy' ]:
431+ proxy_scheme , proxy_host , proxy_port = config ['https_proxy' ]
432+ parsed_netloc = urlparse (f'//{ netloc } ' )
433+ target_host = parsed_netloc .hostname
434+ target_port = parsed_netloc .port or 443
435+ if not target_host :
436+ raise HttpGatewayException (f"Invalid netloc: { netloc } " )
437+
438+ conn = HTTPSConnection (proxy_host , proxy_port , timeout = timeout , context = ctx )
439+ conn .set_tunnel (target_host , target_port )
440+ return conn
441+ return HTTPSConnection (netloc , timeout = timeout , context = ctx )
442+
443+ else :
444+ raise HttpGatewayException (f"Scheme { scheme } not supported" )
395445
396446
397447class _ResponseHeaders :
@@ -426,16 +476,15 @@ def redirect_params(self, status: int) -> Tuple[Optional[str], Optional[float]]:
426476 return new_url , None
427477
428478 age = self ._headers .get ('age' , 0 )
429- try :
430- age = int (age )
431- except Exception as e :
432- if self ._logger is not None : self ._logger .debug (f"Could not parse Age from { age } " , e )
433- age = 0
479+ if age != 0 :
480+ try :
481+ age = int (age )
482+ except Exception as e :
483+ if self ._logger is not None : self ._logger .debug (f"Could not parse Age from { age } " , e )
484+ age = 0
434485
435486 return new_url , time .monotonic () + max_age - age
436487
437- pass
438-
439488 expires = self ._headers .get ('expires' , None )
440489 if expires is not None :
441490 try :
0 commit comments