Skip to content

Commit a5c902a

Browse files
committed
refactor: streamline post processing logic in recent feed handling
1 parent 9067f7a commit a5c902a

File tree

1 file changed

+21
-20
lines changed

1 file changed

+21
-20
lines changed

backend/scraping/instagram_feed.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -368,19 +368,16 @@ def check_post_limit():
368368
for post in safe_feed_posts(loader):
369369
try:
370370
post_time = timezone.make_aware(post.date_utc) if timezone.is_naive(post.date_utc) else post.date_utc
371-
if post.shortcode in seen_shortcodes or post_time < cutoff:
371+
if post_time < cutoff:
372372
consec_old_posts += 1
373373
logger.debug(
374-
f"[{post.shortcode}] [{post.owner_username}] Skipping post; consec_old_posts={consec_old_posts}"
374+
f"[{post.shortcode}] [{post.owner_username}] Skipping old post; consec_old_posts={consec_old_posts}"
375+
)
376+
continue
377+
if post.shortcode in seen_shortcodes:
378+
logger.debug(
379+
f"[{post.shortcode}] [{post.owner_username}] Skipping previously seen post"
375380
)
376-
if consec_old_posts >= max_consec_old_posts:
377-
termination_reason = (
378-
f"reached_consecutive_old_posts={max_consec_old_posts}"
379-
)
380-
logger.info(
381-
f"Reached {max_consec_old_posts} consecutive old posts, stopping."
382-
)
383-
break
384381
continue
385382

386383
consec_old_posts = 0
@@ -399,17 +396,13 @@ def check_post_limit():
399396
)
400397
source_image_url = None
401398

402-
posts_processed += 1
403399
extracted_list = extract_events_from_caption(
404400
post.caption, source_image_url, post.date_local
405401
)
406402
if not extracted_list:
407403
logger.warning(
408404
f"[{post.shortcode}] [{post.owner_username}] AI client returned no events for post"
409405
)
410-
IgnoredPost.objects.get_or_create(shortcode=post.shortcode)
411-
if check_post_limit():
412-
break
413406
continue
414407

415408
source_url = f"https://www.instagram.com/p/{post.shortcode}/"
@@ -475,19 +468,27 @@ def check_post_limit():
475468
added_to_db=added_to_db or "unknown",
476469
)
477470

478-
IgnoredPost.objects.get_or_create(shortcode=post.shortcode)
479-
if check_post_limit():
480-
break
481-
482-
time.sleep(random.uniform(30, 60))
483-
484471
except Exception as e:
485472
logger.error(
486473
f"[{post.shortcode}] [{post.owner_username}] Error processing post: {e!s}"
487474
)
488475
logger.error(f"[{post.shortcode}] [{post.owner_username}] Traceback: {traceback.format_exc()}")
489476
time.sleep(random.uniform(3, 8))
490477
continue
478+
finally:
479+
posts_processed += 1
480+
IgnoredPost.objects.get_or_create(shortcode=post.shortcode)
481+
if consec_old_posts >= max_consec_old_posts:
482+
termination_reason = (
483+
f"reached_consecutive_old_posts={max_consec_old_posts}"
484+
)
485+
logger.info(
486+
f"Reached {max_consec_old_posts} consecutive old posts, stopping."
487+
)
488+
break
489+
if check_post_limit():
490+
break
491+
time.sleep(random.uniform(30, 60))
491492

492493
if not termination_reason:
493494
termination_reason = "no_more_posts"

0 commit comments

Comments
 (0)