2222
2323from example .embedding_utils import generate_event_embedding , is_duplicate_event
2424from services .openai_service import extract_event_from_caption
25- from services .storage_service import upload_image_from_url
25+ from services .storage_service import upload_image_from_url , delete_image
2626
2727
2828USER_AGENTS = [
@@ -110,6 +110,12 @@ def wrapper(*args, **kwargs):
110110 return wrapper
111111
112112
113+ def extract_s3_filename_from_url (image_url : str ) -> str :
114+ if not image_url :
115+ return None
116+ filename = image_url .split ('/' )[- 1 ]
117+ return f"events/{ filename } "
118+
113119def append_event_to_csv (event_data , club_ig , post_url , status = "success" , embedding = None ):
114120 csv_file = Path (__file__ ).resolve ().parent / "events_scraped.csv"
115121 csv_file .parent .mkdir (parents = True , exist_ok = True )
@@ -301,6 +307,11 @@ def process_recent_feed(
301307 event_data = extract_event_from_caption (post .caption , image_url )
302308 if event_data is None :
303309 logger .warning (f"AI client returned None for post { post .shortcode } " )
310+ # Delete uploaded S3 file if event extraction failed
311+ if image_url :
312+ s3_filename = extract_s3_filename_from_url (image_url )
313+ if s3_filename and delete_image (s3_filename ):
314+ logger .info (f"Deleted S3 file for failed event extraction: { s3_filename } " )
304315 continue
305316
306317 post_url = f"https://www.instagram.com/p/{ post .shortcode } /"
@@ -313,6 +324,12 @@ def process_recent_feed(
313324 if insert_event_to_db (event_data , post .owner_username , post_url ):
314325 events_added += 1
315326 logger .info (f"Successfully added event from { post .owner_username } " )
327+ else :
328+ # Event failed to insert to DB, delete S3 file
329+ if image_url :
330+ s3_filename = extract_s3_filename_from_url (image_url )
331+ if s3_filename and delete_image (s3_filename ):
332+ logger .info (f"Deleted S3 file for failed DB insert: { s3_filename } " )
316333 else :
317334 missing_fields = [
318335 key
@@ -323,6 +340,11 @@ def process_recent_feed(
323340 f"Missing required fields: { missing_fields } , skipping event"
324341 )
325342 embedding = generate_event_embedding (event_data )
343+ # Delete S3 file for events with missing required fields
344+ if image_url :
345+ s3_filename = extract_s3_filename_from_url (image_url )
346+ if s3_filename and delete_image (s3_filename ):
347+ logger .info (f"Deleted S3 file for event with missing fields: { s3_filename } " )
326348 append_event_to_csv (
327349 event_data , post .owner_username , post_url , status = "missing_fields" , embedding = embedding
328350 )
0 commit comments