1616import traceback
1717from datetime import datetime , timedelta , timezone as pytimezone
1818from pathlib import Path
19- from dateutil import parser as dateutil_parser
2019from difflib import SequenceMatcher
2120
2221import requests
3635from services .storage_service import upload_image_from_url
3736from shared .constants .user_agents import USER_AGENTS
3837from utils .embedding_utils import find_similar_events
38+ from utils .events_utils import clean_datetime
3939
4040MAX_POSTS = int (os .getenv ("MAX_POSTS" , "100" ))
4141MAX_CONSEC_OLD_POSTS = 10
@@ -121,10 +121,12 @@ def append_event_to_csv(
121121 csv_file .parent .mkdir (parents = True , exist_ok = True )
122122 file_exists = csv_file .exists ()
123123
124- dtstart = dateutil_parser .parse (event_data .get ("dtstart" )).replace (tzinfo = pytimezone .utc ) if event_data .get ("dtstart" ) else None
125- dtend = dateutil_parser .parse (event_data .get ("dtend" )).replace (tzinfo = pytimezone .utc ) if event_data .get ("dtend" ) else None
126- dtstart_utc = event_data .get ("dtstart_utc" )
127- dtend_utc = event_data .get ("dtend_utc" )
124+ dtstart = clean_datetime (event_data .get ("dtstart" ))
125+ dtend = clean_datetime (event_data .get ("dtend" ))
126+ dtstart = dtstart .replace (tzinfo = pytimezone .utc ) if dtstart else None
127+ dtend = dtend .replace (tzinfo = pytimezone .utc ) if dtend else None
128+ dtstart_utc = clean_datetime (event_data .get ("dtstart_utc" ))
129+ dtend_utc = clean_datetime (event_data .get ("dtend_utc" ))
128130 duration = event_data .get ("duration" )
129131 all_day = event_data .get ("all_day" )
130132 location = event_data .get ("location" , "" )
@@ -207,8 +209,14 @@ def append_event_to_csv(
207209def insert_event_to_db (event_data , ig_handle , source_url ):
208210 """Map scraped event data to Event model fields, insert to DB"""
209211 title = event_data .get ("title" , "" )
210- dtstart = dateutil_parser .parse (event_data .get ("dtstart" )).replace (tzinfo = pytimezone .utc ) if event_data .get ("dtstart" ) else None
211- dtend = dateutil_parser .parse (event_data .get ("dtend" )).replace (tzinfo = pytimezone .utc ) if event_data .get ("dtend" ) else None
212+ dtstart = clean_datetime (event_data .get ("dtstart" ))
213+ dtend = clean_datetime (event_data .get ("dtend" ))
214+ dtstart = dtstart .replace (tzinfo = pytimezone .utc ) if dtstart else None
215+ dtend = dtend .replace (tzinfo = pytimezone .utc ) if dtend else None
216+ dtstart_utc = clean_datetime (event_data .get ("dtstart_utc" ))
217+ dtend_utc = clean_datetime (event_data .get ("dtend_utc" ))
218+ duration = clean_datetime (event_data .get ("duration" ))
219+ all_day = event_data .get ("all_day" )
212220 source_image_url = event_data .get ("source_image_url" ) or ""
213221 description = event_data .get ("description" , "" ) or ""
214222 location = event_data .get ("location" )
@@ -217,10 +225,6 @@ def insert_event_to_db(event_data, ig_handle, source_url):
217225 registration = bool (event_data .get ("registration" , False ))
218226 embedding = event_data .get ("embedding" ) or ""
219227 date = dtstart .date ()
220- dtstart_utc = event_data .get ("dtstart_utc" )
221- dtend_utc = event_data .get ("dtend_utc" )
222- duration = event_data .get ("duration" )
223- all_day = event_data .get ("all_day" )
224228 tz = event_data .get ("tz" , "" )
225229 latitude = event_data .get ("latitude" , None )
226230 longitude = event_data .get ("longitude" , None )
0 commit comments