Skip to content

Commit 3cc4af5

Browse files
committed
delete s3 file in scraper if not event
1 parent 0ec8de6 commit 3cc4af5

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

backend/scraping/instagram_feed.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
from example.embedding_utils import generate_event_embedding, is_duplicate_event
2424
from services.openai_service import extract_event_from_caption
25-
from services.storage_service import upload_image_from_url
25+
from services.storage_service import upload_image_from_url, delete_image
2626

2727

2828
USER_AGENTS = [
@@ -110,6 +110,12 @@ def wrapper(*args, **kwargs):
110110
return wrapper
111111

112112

113+
def extract_s3_filename_from_url(image_url: str) -> str:
114+
if not image_url:
115+
return None
116+
filename = image_url.split('/')[-1]
117+
return f"events/{filename}"
118+
113119
def append_event_to_csv(event_data, club_ig, post_url, status="success", embedding=None):
114120
csv_file = Path(__file__).resolve().parent / "events_scraped.csv"
115121
csv_file.parent.mkdir(parents=True, exist_ok=True)
@@ -301,6 +307,11 @@ def process_recent_feed(
301307
event_data = extract_event_from_caption(post.caption, image_url)
302308
if event_data is None:
303309
logger.warning(f"AI client returned None for post {post.shortcode}")
310+
# Delete uploaded S3 file if event extraction failed
311+
if image_url:
312+
s3_filename = extract_s3_filename_from_url(image_url)
313+
if s3_filename and delete_image(s3_filename):
314+
logger.info(f"Deleted S3 file for failed event extraction: {s3_filename}")
304315
continue
305316

306317
post_url = f"https://www.instagram.com/p/{post.shortcode}/"
@@ -313,6 +324,12 @@ def process_recent_feed(
313324
if insert_event_to_db(event_data, post.owner_username, post_url):
314325
events_added += 1
315326
logger.info(f"Successfully added event from {post.owner_username}")
327+
else:
328+
# Event failed to insert to DB, delete S3 file
329+
if image_url:
330+
s3_filename = extract_s3_filename_from_url(image_url)
331+
if s3_filename and delete_image(s3_filename):
332+
logger.info(f"Deleted S3 file for failed DB insert: {s3_filename}")
316333
else:
317334
missing_fields = [
318335
key
@@ -323,6 +340,11 @@ def process_recent_feed(
323340
f"Missing required fields: {missing_fields}, skipping event"
324341
)
325342
embedding = generate_event_embedding(event_data)
343+
# Delete S3 file for events with missing required fields
344+
if image_url:
345+
s3_filename = extract_s3_filename_from_url(image_url)
346+
if s3_filename and delete_image(s3_filename):
347+
logger.info(f"Deleted S3 file for event with missing fields: {s3_filename}")
326348
append_event_to_csv(
327349
event_data, post.owner_username, post_url, status="missing_fields", embedding=embedding
328350
)

0 commit comments

Comments
 (0)