Skip to content

Commit ea78c0d

Browse files
committed
chg: Move cookies to models
1 parent 6ed8da9 commit ea78c0d

File tree

3 files changed

+31
-42
lines changed

3 files changed

+31
-42
lines changed

lacuscore/lacuscore.py

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def enqueue(self, *,
281281
'max_retries': max_retries}
282282
if isinstance(settings, dict):
283283
try:
284-
to_enqueue = CaptureSettings(**settings)
284+
to_enqueue = CaptureSettings.model_validate(settings)
285285
except ValidationError as e:
286286
self.master_logger.warning(f'Unable to validate settings: {e}.')
287287
raise CaptureSettingsError('Invalid settings', e)
@@ -446,16 +446,24 @@ async def _capture(self, uuid: str, priority: int) -> None:
446446
retry = False
447447
try:
448448
result: CaptureResponse = {}
449-
_to_capture: dict[bytes, Any] = {}
450449
url: str = ''
451-
_to_capture = self.redis.hgetall(f'lacus:capture_settings:{uuid}')
450+
_to_capture_b = self.redis.hgetall(f'lacus:capture_settings:{uuid}')
452451

453-
if not _to_capture:
452+
if not _to_capture_b:
454453
result = {'error': f'No capture settings for {uuid}'}
455454
raise CaptureError(f'No capture settings for {uuid}')
456455

456+
_to_capture = {k.decode(): v.decode() for k, v in _to_capture_b.items()}
457+
_domain: str | None = None
458+
if 'url' in _to_capture and _to_capture['url']:
459+
# allows to pass the right context for cookies provided as {name: value}
460+
try:
461+
_domain = urlsplit(_to_capture['url']).hostname
462+
except Exception:
463+
# not capturing a url, ignore
464+
pass
457465
try:
458-
to_capture = CaptureSettings(**{k.decode(): v.decode() for k, v in _to_capture.items()})
466+
to_capture = CaptureSettings.model_validate(_to_capture, context={'domain': _domain})
459467
except ValidationError as e:
460468
logger.warning(f'Settings invalid: {e}')
461469
raise CaptureSettingsError('Invalid settings', e)
@@ -555,25 +563,6 @@ async def _capture(self, uuid: str, priority: int) -> None:
555563
else:
556564
browser_engine = 'webkit'
557565

558-
cookies: list[Cookie] = []
559-
if to_capture.cookies:
560-
# In order to properly pass the cookies to playwright,
561-
# each of then must have a name, a value and either a domain + path or a URL
562-
# Name and value are mandatory, and we cannot auto-fill them.
563-
# If the cookie doesn't have a domain + path OR a URL, we fill the domain
564-
# with the hostname of the URL we try to capture and the path with "/"
565-
# NOTE: these changes can only be done here because we need the URL.
566-
for cookie in to_capture.cookies:
567-
if not cookie.name or not cookie.value:
568-
logger.warning(f'Invalid cookie: {cookie}')
569-
continue
570-
if not cookie.domain and not cookie.url:
571-
if not splitted_url.hostname:
572-
# If for any reason we cannot get the hostname there, ignore the cookie
573-
continue
574-
cookie.domain = splitted_url.hostname
575-
cookie.path = '/'
576-
cookies.append(cookie)
577566
try:
578567
logger.debug(f'Capturing {url}')
579568
stats_pipeline.sadd(f'stats:{today}:captures', url)
@@ -590,12 +579,12 @@ async def _capture(self, uuid: str, priority: int) -> None:
590579
uuid=uuid) as capture:
591580
# required by Mypy: https://github.com/python/mypy/issues/3004
592581
capture.headers = to_capture.headers
593-
capture.cookies = cookies # type: ignore[assignment]
582+
capture.cookies = [c.model_dump(exclude_none=True) for c in to_capture.cookies] if to_capture.cookies else None
594583
capture.storage = to_capture.storage
595-
capture.viewport = to_capture.viewport.model_dump() if to_capture.viewport else None
584+
capture.viewport = to_capture.viewport.model_dump(exclude_none=True) if to_capture.viewport else None
596585
capture.user_agent = to_capture.user_agent
597-
capture.http_credentials = to_capture.http_credentials.model_dump() if to_capture.http_credentials else None
598-
capture.geolocation = to_capture.geolocation.model_dump() if to_capture.geolocation else None
586+
capture.http_credentials = to_capture.http_credentials.model_dump(exclude_none=True) if to_capture.http_credentials else None
587+
capture.geolocation = to_capture.geolocation.model_dump(exclude_none=True) if to_capture.geolocation else None
599588
capture.timezone_id = to_capture.timezone_id
600589
capture.locale = to_capture.locale
601590
capture.color_scheme = to_capture.color_scheme

poetry.lock

Lines changed: 12 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "lacuscore"
3-
version = "1.22.1"
3+
version = "1.22.2"
44
description = "Core of Lacus, usable as a module"
55
authors = [
66
{name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
@@ -15,7 +15,7 @@ dynamic = [ "classifiers" ]
1515

1616
dependencies = [
1717
"requests (>=2.32.5)",
18-
"playwrightcapture[recaptcha] (>=1.37.1)",
18+
"playwrightcapture[recaptcha] (>=1.37.2)",
1919
"defang (>=0.5.3)",
2020
"ua-parser[regex] (>=1.0.1)",
2121
"redis[hiredis] (>=5.3.0,<6.0.0)",

0 commit comments

Comments
 (0)