@@ -281,7 +281,7 @@ def enqueue(self, *,
281281 'max_retries' : max_retries }
282282 if isinstance (settings , dict ):
283283 try :
284- to_enqueue = CaptureSettings ( ** settings )
284+ to_enqueue = CaptureSettings . model_validate ( settings )
285285 except ValidationError as e :
286286 self .master_logger .warning (f'Unable to validate settings: { e } .' )
287287 raise CaptureSettingsError ('Invalid settings' , e )
@@ -446,16 +446,24 @@ async def _capture(self, uuid: str, priority: int) -> None:
446446 retry = False
447447 try :
448448 result : CaptureResponse = {}
449- _to_capture : dict [bytes , Any ] = {}
450449 url : str = ''
451- _to_capture = self .redis .hgetall (f'lacus:capture_settings:{ uuid } ' )
450+ _to_capture_b = self .redis .hgetall (f'lacus:capture_settings:{ uuid } ' )
452451
453- if not _to_capture :
452+ if not _to_capture_b :
454453 result = {'error' : f'No capture settings for { uuid } ' }
455454 raise CaptureError (f'No capture settings for { uuid } ' )
456455
456+ _to_capture = {k .decode (): v .decode () for k , v in _to_capture_b .items ()}
457+ _domain : str | None = None
458+ if 'url' in _to_capture and _to_capture ['url' ]:
459+ # allows to pass the right context for cookies provided as {name: value}
460+ try :
461+ _domain = urlsplit (_to_capture ['url' ]).hostname
462+ except Exception :
463+ # not capturing a url, ignore
464+ pass
457465 try :
458- to_capture = CaptureSettings ( ** { k . decode (): v . decode () for k , v in _to_capture . items () })
466+ to_capture = CaptureSettings . model_validate ( _to_capture , context = { 'domain' : _domain })
459467 except ValidationError as e :
460468 logger .warning (f'Settings invalid: { e } ' )
461469 raise CaptureSettingsError ('Invalid settings' , e )
@@ -555,25 +563,6 @@ async def _capture(self, uuid: str, priority: int) -> None:
555563 else :
556564 browser_engine = 'webkit'
557565
558- cookies : list [Cookie ] = []
559- if to_capture .cookies :
560- # In order to properly pass the cookies to playwright,
561- # each of then must have a name, a value and either a domain + path or a URL
562- # Name and value are mandatory, and we cannot auto-fill them.
563- # If the cookie doesn't have a domain + path OR a URL, we fill the domain
564- # with the hostname of the URL we try to capture and the path with "/"
565- # NOTE: these changes can only be done here because we need the URL.
566- for cookie in to_capture .cookies :
567- if not cookie .name or not cookie .value :
568- logger .warning (f'Invalid cookie: { cookie } ' )
569- continue
570- if not cookie .domain and not cookie .url :
571- if not splitted_url .hostname :
572- # If for any reason we cannot get the hostname there, ignore the cookie
573- continue
574- cookie .domain = splitted_url .hostname
575- cookie .path = '/'
576- cookies .append (cookie )
577566 try :
578567 logger .debug (f'Capturing { url } ' )
579568 stats_pipeline .sadd (f'stats:{ today } :captures' , url )
@@ -590,12 +579,12 @@ async def _capture(self, uuid: str, priority: int) -> None:
590579 uuid = uuid ) as capture :
591580 # required by Mypy: https://github.com/python/mypy/issues/3004
592581 capture .headers = to_capture .headers
593- capture .cookies = cookies # type: ignore[assignment]
582+ capture .cookies = [ c . model_dump ( exclude_none = True ) for c in to_capture . cookies ] if to_capture . cookies else None
594583 capture .storage = to_capture .storage
595- capture .viewport = to_capture .viewport .model_dump () if to_capture .viewport else None
584+ capture .viewport = to_capture .viewport .model_dump (exclude_none = True ) if to_capture .viewport else None
596585 capture .user_agent = to_capture .user_agent
597- capture .http_credentials = to_capture .http_credentials .model_dump () if to_capture .http_credentials else None
598- capture .geolocation = to_capture .geolocation .model_dump () if to_capture .geolocation else None
586+ capture .http_credentials = to_capture .http_credentials .model_dump (exclude_none = True ) if to_capture .http_credentials else None
587+ capture .geolocation = to_capture .geolocation .model_dump (exclude_none = True ) if to_capture .geolocation else None
599588 capture .timezone_id = to_capture .timezone_id
600589 capture .locale = to_capture .locale
601590 capture .color_scheme = to_capture .color_scheme
0 commit comments