Skip to content

Commit c2a0357

Browse files
committed
replace dateutil parser with clean_datetime utility for event date handling
1 parent 6742cad commit c2a0357

File tree

2 files changed

+27
-11
lines changed

2 files changed

+27
-11
lines changed

backend/scraping/instagram_feed.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import traceback
1717
from datetime import datetime, timedelta, timezone as pytimezone
1818
from pathlib import Path
19-
from dateutil import parser as dateutil_parser
2019
from difflib import SequenceMatcher
2120

2221
import requests
@@ -36,6 +35,7 @@
3635
from services.storage_service import upload_image_from_url
3736
from shared.constants.user_agents import USER_AGENTS
3837
from utils.embedding_utils import find_similar_events
38+
from utils.events_utils import clean_datetime
3939

4040
MAX_POSTS = int(os.getenv("MAX_POSTS", "100"))
4141
MAX_CONSEC_OLD_POSTS = 10
@@ -121,10 +121,12 @@ def append_event_to_csv(
121121
csv_file.parent.mkdir(parents=True, exist_ok=True)
122122
file_exists = csv_file.exists()
123123

124-
dtstart = dateutil_parser.parse(event_data.get("dtstart")).replace(tzinfo=pytimezone.utc) if event_data.get("dtstart") else None
125-
dtend = dateutil_parser.parse(event_data.get("dtend")).replace(tzinfo=pytimezone.utc) if event_data.get("dtend") else None
126-
dtstart_utc = event_data.get("dtstart_utc")
127-
dtend_utc = event_data.get("dtend_utc")
124+
dtstart = clean_datetime(event_data.get("dtstart"))
125+
dtend = clean_datetime(event_data.get("dtend"))
126+
dtstart = dtstart.replace(tzinfo=pytimezone.utc) if dtstart else None
127+
dtend = dtend.replace(tzinfo=pytimezone.utc) if dtend else None
128+
dtstart_utc = clean_datetime(event_data.get("dtstart_utc"))
129+
dtend_utc = clean_datetime(event_data.get("dtend_utc"))
128130
duration = event_data.get("duration")
129131
all_day = event_data.get("all_day")
130132
location = event_data.get("location", "")
@@ -207,8 +209,14 @@ def append_event_to_csv(
207209
def insert_event_to_db(event_data, ig_handle, source_url):
208210
"""Map scraped event data to Event model fields, insert to DB"""
209211
title = event_data.get("title", "")
210-
dtstart = dateutil_parser.parse(event_data.get("dtstart")).replace(tzinfo=pytimezone.utc) if event_data.get("dtstart") else None
211-
dtend = dateutil_parser.parse(event_data.get("dtend")).replace(tzinfo=pytimezone.utc) if event_data.get("dtend") else None
212+
dtstart = clean_datetime(event_data.get("dtstart"))
213+
dtend = clean_datetime(event_data.get("dtend"))
214+
dtstart = dtstart.replace(tzinfo=pytimezone.utc) if dtstart else None
215+
dtend = dtend.replace(tzinfo=pytimezone.utc) if dtend else None
216+
dtstart_utc = clean_datetime(event_data.get("dtstart_utc"))
217+
dtend_utc = clean_datetime(event_data.get("dtend_utc"))
218+
duration = clean_datetime(event_data.get("duration"))
219+
all_day = event_data.get("all_day")
212220
source_image_url = event_data.get("source_image_url") or ""
213221
description = event_data.get("description", "") or ""
214222
location = event_data.get("location")
@@ -217,10 +225,6 @@ def insert_event_to_db(event_data, ig_handle, source_url):
217225
registration = bool(event_data.get("registration", False))
218226
embedding = event_data.get("embedding") or ""
219227
date = dtstart.date()
220-
dtstart_utc = event_data.get("dtstart_utc")
221-
dtend_utc = event_data.get("dtend_utc")
222-
duration = event_data.get("duration")
223-
all_day = event_data.get("all_day")
224228
tz = event_data.get("tz", "")
225229
latitude = event_data.get("latitude", None)
226230
longitude = event_data.get("longitude", None)

backend/utils/events_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
from dateutil import parser as dateutil_parser
2+
3+
4+
def clean_datetime(val):
5+
if not val or not isinstance(val, str) or not val.strip():
6+
return None
7+
try:
8+
return dateutil_parser.parse(val)
9+
except Exception:
10+
return None
11+
12+
113
def determine_display_handle(event):
214
"""
315
Determine a display handle for an event.

0 commit comments

Comments
 (0)