Skip to content

Commit 111ca0c

Browse files
committed
Added envvar proxy support to http gateway and minor improvements.
1 parent a5d5af2 commit 111ca0c

File tree

8 files changed

+120
-28
lines changed

8 files changed

+120
-28
lines changed

src/downloader/config.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
DEFAULT_MINIMUM_SYSTEM_FREE_SPACE_MB, DEFAULT_MINIMUM_EXTERNAL_FREE_SPACE_MB
2626
from downloader.db_options import DbOptions
2727
from downloader.error import DownloaderError
28+
from downloader.http_gateway import HttpConfig
2829

2930

3031
class Environment(TypedDict):
@@ -43,6 +44,8 @@ class Environment(TypedDict):
4344
PC_LAUNCHER: Optional[str]
4445
DEBUG: str
4546
FAIL_ON_FILE_ERROR: str
47+
HTTP_PROXY: str
48+
HTTPS_PROXY: str
4649

4750

4851
@unique
@@ -85,7 +88,7 @@ class ConfigMisterSection(TypedDict):
8588
user_defined_options: List[str]
8689

8790

88-
class ConfigRequired(ConfigMisterSection):
91+
class ConfigRequired(ConfigMisterSection, HttpConfig):
8992
zip_file_count_threshold: int
9093
zip_accumulated_mb_threshold: int
9194
debug: bool
@@ -138,7 +141,9 @@ def default_config() -> Config:
138141
'commit': 'unknown',
139142
'fail_on_file_error': False,
140143
'minimum_system_free_space_mb': DEFAULT_MINIMUM_SYSTEM_FREE_SPACE_MB,
141-
'minimum_external_free_space_mb': DEFAULT_MINIMUM_EXTERNAL_FREE_SPACE_MB
144+
'minimum_external_free_space_mb': DEFAULT_MINIMUM_EXTERNAL_FREE_SPACE_MB,
145+
'http_proxy': None,
146+
'https_proxy': None
142147
}
143148

144149

src/downloader/config_reader.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
K_MINIMUM_EXTERNAL_FREE_SPACE_MB, STORAGE_PRIORITY_OFF, STORAGE_PRIORITY_PREFER_SD, \
3333
STORAGE_PRIORITY_PREFER_EXTERNAL, EXIT_ERROR_WRONG_SETUP
3434
from downloader.db_options import DbOptions, DbOptionsProps, DbOptionsValidationException
35+
from downloader.http_gateway import http_config
3536
from downloader.logger import Logger, time_str
3637

3738

@@ -158,6 +159,9 @@ def read_config(self, config_path: str) -> Config:
158159
result['logfile'] = str(launcher_path.with_suffix('.log'))
159160
result['curl_ssl'] = ''
160161

162+
if self._env['HTTP_PROXY'] or self._env['HTTPS_PROXY']:
163+
result.update(http_config(http_proxy=self._env['HTTP_PROXY'], https_proxy=self._env['HTTPS_PROXY']))
164+
161165
result['environment'] = self._env
162166

163167
if result['verbose']: self._logger.print(f'BENCH {time_str(self._start_time)}| Read config done.')

src/downloader/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ def FILE_7z_util_uninstalled_description() -> SafeFetchInfo: return {
180180
KENV_FAIL_ON_FILE_ERROR: Final[str] = 'FAIL_ON_FILE_ERROR'
181181
KENV_LOGFILE: Final[str] = 'LOGFILE'
182182
KENV_LOGLEVEL: Final[str] = 'LOGLEVEL'
183+
KENV_HTTP_PROXY: Final[str] = 'HTTP_PROXY'
184+
KENV_LC_HTTP_PROXY: Final[str] = 'http_proxy'
185+
KENV_HTTPS_PROXY: Final[str] = 'HTTPS_PROXY'
186+
KENV_LC_HTTPS_PROXY: Final[str] = 'https_proxy'
183187

184188
# Db State Signature
185189
DB_STATE_SIGNATURE_NO_HASH: Final[str] = 'non_initialized_hash'

src/downloader/full_run_service_factory.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ def create(self, config: Config):
7777
http_gateway = HttpGateway(
7878
ssl_ctx=ssl_ctx,
7979
timeout=http_connection_timeout,
80-
logger=DebugOnlyLoggerDecorator(self._logger) if config['http_logging'] else None
80+
logger=DebugOnlyLoggerDecorator(self._logger) if config['http_logging'] else None,
81+
config=config
8182
)
8283
atexit.register(http_gateway.cleanup)
8384
safe_file_fetcher = SafeFileFetcher(config, system_file_system, self._logger, http_gateway, waiter)

src/downloader/http_gateway.py

Lines changed: 70 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import time
2323
from contextlib import contextmanager
2424
from email.utils import parsedate_to_datetime
25-
from typing import Type, Tuple, Any, Optional, Generator, List, Dict, Union, Protocol, TypeVar, Generic
25+
from typing import Type, Tuple, Any, Optional, Generator, List, Dict, Union, Protocol, TypeVar, Generic, TypedDict
2626
from urllib.parse import urlparse, ParseResult, urlunparse
2727
from http.client import HTTPConnection, HTTPSConnection, HTTPResponse, HTTPException
2828
from types import TracebackType
@@ -36,12 +36,18 @@ def print(self, *args: Any) -> None: ...
3636
def debug(self, *args: Any) -> None: ...
3737

3838

39+
class HttpConfig(TypedDict):
40+
http_proxy: Optional[tuple[str, str, int]] # (scheme, host, port)
41+
https_proxy: Optional[tuple[str, str, int]] # (scheme, host, port)
42+
43+
3944
class HttpGateway:
40-
def __init__(self, ssl_ctx: ssl.SSLContext, timeout: float, logger: Optional[HttpLogger] = None) -> None:
45+
def __init__(self, ssl_ctx: ssl.SSLContext, timeout: float, logger: Optional[HttpLogger] = None, config: Optional[dict[str, Any]] = None) -> None:
4146
now = time.monotonic()
4247
self._ssl_ctx = ssl_ctx
4348
self._timeout = timeout
4449
self._logger = logger
50+
self._config = config
4551
self._connections: Dict[_QueueId, _ConnectionQueue] = {}
4652
self._connections_lock = threading.Lock()
4753
self._clean_timeout_connections_timer = now
@@ -79,7 +85,7 @@ def open(self, url: str, method: Optional[str] = None, body: Any = None, headers
7985
parsed_url,
8086
method,
8187
body,
82-
headers or _default_headers,
88+
{**_default_headers, **headers} if isinstance(headers, dict) else _default_headers,
8389
)
8490
if self._logger is not None: self._logger.debug(f'HTTP {conn.response.status}: {final_url}\n'
8591
f'1st byte @ {time.monotonic() - now:.3f}s\nvvvv\n')
@@ -166,7 +172,7 @@ def _process_queue_id(self, queue_id: '_QueueId') -> '_QueueId': return _redirec
166172
def _take_connection(self, queue_id: '_QueueId') -> '_Connection':
167173
with self._connections_lock:
168174
if queue_id not in self._connections:
169-
self._connections[queue_id] = _ConnectionQueue(queue_id, self._timeout, self._ssl_ctx, self._logger)
175+
self._connections[queue_id] = _ConnectionQueue(queue_id, self._timeout, self._ssl_ctx, self._logger, self._config)
170176
return self._connections[queue_id].pull()
171177

172178
def _clean_timeout_connections(self, now: float) -> None:
@@ -238,7 +244,26 @@ def _fill_redirects_swap(self, now: float, lock: threading.Lock, redirects: Dict
238244

239245
return size != len(self._redirects_swap)
240246

241-
_default_headers = {'Connection': 'keep-alive', 'Keep-Alive': 'timeout=120'}
247+
248+
def http_config(http_proxy: Optional[str], https_proxy: Optional[str]) -> HttpConfig:
249+
config: HttpConfig = {"http_proxy": None, "https_proxy": None}
250+
if http_proxy:
251+
parsed = urlparse(http_proxy)
252+
if parsed.hostname and parsed.scheme in ('http', 'https'):
253+
config['http_proxy'] = (parsed.scheme, parsed.hostname, parsed.port or (443 if parsed.scheme == 'https' else 80))
254+
255+
if not https_proxy and http_proxy:
256+
https_proxy = http_proxy
257+
258+
if https_proxy:
259+
parsed = urlparse(https_proxy)
260+
if parsed.hostname and parsed.scheme in ('http', 'https'):
261+
config['https_proxy'] = (parsed.scheme, parsed.hostname, parsed.port or (443 if parsed.scheme == 'https' else 80))
262+
263+
return config
264+
265+
USER_AGENT = 'Downloader/2.X (Linux; [email protected])'
266+
_default_headers = {'User-Agent': USER_AGENT, 'Connection': 'keep-alive', 'Keep-Alive': 'timeout=120'}
242267

243268

244269
_QueueId = Tuple[str, str]
@@ -274,7 +299,7 @@ def __init__(self, conn_id: int, http: HTTPConnection, connection_queue: '_Conne
274299
self._timeout: float = http.timeout if http.timeout is not None else 120.0
275300
self._last_use_time: float = 0.0
276301
self._uses: int = 0
277-
self._max_uses: float = sys.float_info.max
302+
self._max_uses: int = sys.maxsize
278303
self._response: Optional[Union[HTTPResponse, '_FinishedResponse']] = None
279304
self._response_headers = _ResponseHeaders(logger)
280305

@@ -306,8 +331,11 @@ def response_headers(self) -> '_ResponseHeaders':
306331
return self._response_headers
307332

308333
def finish_response(self) -> None:
309-
if self._close_response() and self._uses < self._max_uses:
310-
self._connection_queue.push(self)
334+
if self._close_response():
335+
if self._uses < self._max_uses:
336+
self._connection_queue.push(self)
337+
else:
338+
self._http.close()
311339

312340
def _close_response(self) -> bool:
313341
if isinstance(self._response, _FinishedResponse):
@@ -340,11 +368,12 @@ class _FinishedResponse: pass
340368

341369

342370
class _ConnectionQueue:
343-
def __init__(self, queue_id: _QueueId, timeout: float, ctx: ssl.SSLContext, logger: Optional[HttpLogger]) -> None:
371+
def __init__(self, queue_id: _QueueId, timeout: float, ctx: ssl.SSLContext, logger: Optional[HttpLogger], config: Optional[dict[str, Any]]) -> None:
344372
self.id = queue_id
345373
self._timeout = timeout
346374
self._ctx = ctx
347375
self._logger = logger
376+
self._config = config
348377
self._queue: List[_Connection] = []
349378
self._queue_swap: List[_Connection] = []
350379
self._lock = threading.Lock()
@@ -354,7 +383,7 @@ def pull(self) -> _Connection:
354383
with self._lock:
355384
if len(self._queue) == 0:
356385
self._last_conn_id += 1
357-
http_conn = create_http_connection(self.id[0], self.id[1], self._timeout, self._ctx)
386+
http_conn = create_http_connection(self.id[0], self.id[1], self._timeout, self._ctx, self._config)
358387
return _Connection(conn_id=self._last_conn_id, http=http_conn, connection_queue=self, logger=self._logger)
359388
return self._queue.pop()
360389

@@ -388,10 +417,31 @@ def clear_timed_outs(self, now: float) -> int:
388417
return expired_count
389418

390419

391-
def create_http_connection(scheme: str, netloc: str, timeout: float, ctx: ssl.SSLContext) -> HTTPConnection:
392-
if scheme == 'http': return HTTPConnection(netloc, timeout=timeout)
393-
elif scheme == 'https': return HTTPSConnection(netloc, timeout=timeout, context=ctx)
394-
else: raise HttpGatewayException(f"Scheme {scheme} not supported")
420+
def create_http_connection(scheme: str, netloc: str, timeout: float, ctx: ssl.SSLContext, config: Optional[dict[str, Any]]) -> HTTPConnection:
421+
if scheme == 'http':
422+
if config and config['http_proxy']:
423+
proxy_scheme, proxy_host, proxy_port = config['http_proxy']
424+
if proxy_scheme == 'https':
425+
return HTTPSConnection(proxy_host, proxy_port, timeout=timeout, context=ctx)
426+
return HTTPConnection(proxy_host, proxy_port, timeout=timeout)
427+
return HTTPConnection(netloc, timeout=timeout)
428+
429+
elif scheme == 'https':
430+
if config and config['https_proxy']:
431+
proxy_scheme, proxy_host, proxy_port = config['https_proxy']
432+
parsed_netloc = urlparse(f'//{netloc}')
433+
target_host = parsed_netloc.hostname
434+
target_port = parsed_netloc.port or 443
435+
if not target_host:
436+
raise HttpGatewayException(f"Invalid netloc: {netloc}")
437+
438+
conn = HTTPSConnection(proxy_host, proxy_port, timeout=timeout, context=ctx)
439+
conn.set_tunnel(target_host, target_port)
440+
return conn
441+
return HTTPSConnection(netloc, timeout=timeout, context=ctx)
442+
443+
else:
444+
raise HttpGatewayException(f"Scheme {scheme} not supported")
395445

396446

397447
class _ResponseHeaders:
@@ -426,16 +476,15 @@ def redirect_params(self, status: int) -> Tuple[Optional[str], Optional[float]]:
426476
return new_url, None
427477

428478
age = self._headers.get('age', 0)
429-
try:
430-
age = int(age)
431-
except Exception as e:
432-
if self._logger is not None: self._logger.debug(f"Could not parse Age from {age}", e)
433-
age = 0
479+
if age != 0:
480+
try:
481+
age = int(age)
482+
except Exception as e:
483+
if self._logger is not None: self._logger.debug(f"Could not parse Age from {age}", e)
484+
age = 0
434485

435486
return new_url, time.monotonic() + max_age - age
436487

437-
pass
438-
439488
expires = self._headers.get('expires', None)
440489
if expires is not None:
441490
try:

src/downloader/main.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626

2727
from downloader.config import Environment
2828
from downloader.config_reader import ConfigReader
29-
from downloader.constants import KENV_LOGLEVEL
29+
from downloader.constants import KENV_LOGLEVEL, KENV_LC_HTTP_PROXY, KENV_HTTP_PROXY, KENV_HTTPS_PROXY, \
30+
KENV_LC_HTTPS_PROXY
3031
from downloader.logger import TopLogger
3132
from downloader.full_run_service_factory import FullRunServiceFactory
3233

@@ -75,6 +76,8 @@ def read_env(default_commit: Optional[str]) -> Environment:
7576
'PC_LAUNCHER': os.getenv(KENV_PC_LAUNCHER, None),
7677
'DEBUG': os.getenv(KENV_DEBUG, 'false').lower(),
7778
'FAIL_ON_FILE_ERROR': os.getenv(KENV_FAIL_ON_FILE_ERROR, 'false'),
79+
'HTTP_PROXY': os.getenv(KENV_HTTP_PROXY) or os.getenv(KENV_LC_HTTP_PROXY),
80+
'HTTPS_PROXY': os.getenv(KENV_HTTPS_PROXY) or os.getenv(KENV_LC_HTTPS_PROXY)
7881
}
7982

8083

src/test/exploratory/http_gateway_connections/explore_http_gateway_with_real_urls.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from pathlib import Path
2828
from typing import List
2929

30-
from downloader.http_gateway import HttpGateway, HttpLogger
30+
from downloader.http_gateway import HttpGateway, HttpLogger, http_config
3131

3232
urls = [
3333
'https://google.com',
@@ -69,7 +69,27 @@ def main() -> None:
6969
cancelled = 0
7070
dir_path = f'{os.path.dirname(os.path.realpath(__file__))}/delme'
7171

72-
with HttpGateway(ssl_ctx=ssl.create_default_context(), timeout=180, logger=logger) as gateway:
72+
http_proxy_url = os.environ.get('HTTP_PROXY')
73+
https_proxy_url = os.environ.get('HTTPS_PROXY')
74+
config = http_config(http_proxy=http_proxy_url, https_proxy=https_proxy_url) if (http_proxy_url or https_proxy_url) else None
75+
76+
if config:
77+
logger.print('PROXY CONFIGURATION:')
78+
if config.get('http_proxy'):
79+
scheme, host, port = config['http_proxy']
80+
logger.print(f' HTTP Proxy: {scheme}://{host}:{port}')
81+
else:
82+
logger.print(f' HTTP Proxy: None (direct connection)')
83+
84+
if config.get('https_proxy'):
85+
scheme, host, port = config['https_proxy']
86+
logger.print(f' HTTPS Proxy: {scheme}://{host}:{port}')
87+
else:
88+
logger.print(f' HTTPS Proxy: None (direct connection)')
89+
else:
90+
logger.print('NO PROXY - Using direct connection')
91+
92+
with HttpGateway(ssl_ctx=ssl.create_default_context(), timeout=180, logger=logger, config=config) as gateway:
7393
def fetch_url(input_url: str):
7494
nonlocal interrupted, gateway, dir_path
7595
with gateway.open(input_url) as (url, res):

src/test/objects.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
K_FILTER, KENV_DEFAULT_DB_URL, KENV_DEFAULT_DB_ID, KENV_DEFAULT_BASE_PATH, KENV_ALLOW_REBOOT, KENV_DEBUG, MEDIA_FAT, MEDIA_USB0, MEDIA_USB1, \
2525
MEDIA_USB2, KENV_FAIL_ON_FILE_ERROR, KENV_UPDATE_LINUX, KENV_CURL_SSL, KENV_COMMIT, DEFAULT_CURL_SSL_OPTIONS, \
2626
MEDIA_USB3, KENV_LOGFILE, KENV_PC_LAUNCHER, DEFAULT_UPDATE_LINUX_ENV, K_DB_URL, K_SECTION, K_OPTIONS, KENV_FORCED_BASE_PATH, \
27-
FILE_MiSTer_old
27+
FILE_MiSTer_old, KENV_HTTP_PROXY, KENV_HTTPS_PROXY
2828
from downloader.db_options import DbOptions
2929
from downloader.other import empty_store_without_base_path
3030
from downloader.db_entity import DbEntity
@@ -150,6 +150,8 @@ def config_with(
150150
default_db_id=None,
151151
user_defined_options=None,
152152
minimum_free_space=None,
153+
file_checking=None,
154+
153155
databases: Dict[str, Any] = None):
154156

155157
config = default_config()
@@ -177,6 +179,8 @@ def config_with(
177179
config['user_defined_options'] = user_defined_options
178180
if minimum_free_space is not None:
179181
config['minimum_system_free_space_mb'] = minimum_free_space
182+
if file_checking is not None:
183+
config['file_checking'] = file_checking
180184
return config
181185

182186

@@ -782,6 +786,8 @@ def default_env() -> Environment:
782786
KENV_LOGFILE: None,
783787
KENV_LOGLEVEL: '',
784788
KENV_PC_LAUNCHER: None,
789+
KENV_HTTP_PROXY: None,
790+
KENV_HTTPS_PROXY: None
785791
}
786792

787793

0 commit comments

Comments
 (0)