Skip to content

Commit 1b1d95d

Browse files
committed
added delay between requests, reduced consecutive old posts threshold befoe stopping
1 parent 28f8431 commit 1b1d95d

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

backend/scraping/instagram_feed.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from datetime import datetime
1111
import sys
1212
from fuzzywuzzy import fuzz
13+
import time
1314

1415

1516
logging.basicConfig(
@@ -173,7 +174,7 @@ def insert_event_to_db(event_data, club_ig, post_url, sim_threshold=80):
173174
conn.close()
174175

175176

176-
def process_recent_feed(cutoff=datetime.now(timezone.utc) - timedelta(days=1), max_posts=100, max_consec_old_posts=10):
177+
def process_recent_feed(cutoff=datetime.now(timezone.utc) - timedelta(days=1), max_posts=100, max_consec_old_posts=3):
177178
# Process Instagram feed posts and extract event info. Stops
178179
# scraping once posts become older than cutoff.
179180
try:
@@ -215,6 +216,7 @@ def process_recent_feed(cutoff=datetime.now(timezone.utc) - timedelta(days=1), m
215216
else:
216217
logger.debug(f"No caption for post {post.shortcode}, skipping...")
217218
print("No caption found, skipping...")
219+
time.sleep(5)
218220
except Exception as e:
219221
logger.error(f"Error processing post {post.shortcode} by {post.owner_username}: {str(e)}")
220222
logger.error(f"Traceback: {traceback.format_exc()}")

0 commit comments

Comments
 (0)