Skip to content

Commit cfcd891

Browse files
committed
better print
1 parent 8e72ad6 commit cfcd891

File tree

1 file changed

+48
-31
lines changed

1 file changed

+48
-31
lines changed

backend/scraping/instagram_feed.py

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import sys
1313
from fuzzywuzzy import fuzz
1414
import time
15-
15+
import json
1616

1717
logging.basicConfig(
1818
level=logging.DEBUG,
@@ -24,6 +24,32 @@
2424
)
2525
logger = logging.getLogger(__name__)
2626

27+
def get_post_image_url(post):
28+
"""
29+
Safely extract image URL from Instagram post with fallback options.
30+
31+
Args:
32+
post: Instagram post object from instaloader
33+
34+
Returns:
35+
str or None: Image URL if found, None otherwise
36+
"""
37+
try:
38+
# Try different methods to get image URL
39+
if hasattr(post, 'url'):
40+
return post.url
41+
elif hasattr(post, '_node') and 'display_url' in post._node:
42+
return post._node['display_url']
43+
elif hasattr(post, '_node') and 'display_src' in post._node:
44+
return post._node['display_src']
45+
else:
46+
logger.warning(f"No image URL found for post {getattr(post, 'shortcode', 'unknown')}")
47+
return None
48+
except (KeyError, AttributeError) as e:
49+
logger.error(f"Error accessing image URL for post {getattr(post, 'shortcode', 'unknown')}: {str(e)}")
50+
return None
51+
52+
2753
def handle_instagram_errors(func):
2854
# Handle common Instagram errors?
2955
def wrapper(*args, **kwargs):
@@ -89,33 +115,6 @@ def update_event_csv(event_data, club_name, url):
89115
print(f"Added event: {event_data.get('name')}")
90116
return True
91117

92-
93-
def process_instagram_posts(max_posts=10):
94-
"""
95-
Process Instagram posts and extract event information.
96-
"""
97-
club_name = "uw.wealthmanagement"
98-
99-
profile = Profile.from_username(L.context, club_name)
100-
events_added = 0
101-
102-
for i, post in enumerate(profile.get_posts()):
103-
print(f"Post: {post.post}")
104-
if i >= max_posts:
105-
break
106-
107-
print(f"\n--- Processing post {i+1} ---")
108-
109-
event_data = parse_caption_for_event(post.caption, post.url)
110-
111-
if update_event_csv(event_data, club_name, post.url):
112-
events_added += 1
113-
114-
print(f"\n--- Summary ---")
115-
print(f"Processed {max_posts} posts")
116-
print(f"Added {events_added} events to CSV")
117-
118-
119118
def insert_event_to_db(event_data, club_ig, post_url, sim_threshold=80):
120119
# Check if an event already exists in db and insert it if not
121120
event_name = event_data.get("name") #.title()
@@ -177,9 +176,22 @@ def process_recent_feed(cutoff=datetime.now(timezone.utc) - timedelta(days=2), m
177176
posts_processed = 0
178177
consec_old_posts = 0
179178
s3_uploader = S3ImageUploader() # Initialize S3 uploader
179+
180180

181181
for post in L.get_feed_posts():
182-
print(post._node.keys())
182+
for k in post._node.keys():
183+
print(k)
184+
if "image_versions2" in post._node:
185+
print(json.dumps(post._node["image_versions2"], indent=2))
186+
187+
if "carousel_media" in post._node:
188+
for i, media in enumerate(post._node["carousel_media"]):
189+
print(f"carousel item {i}:")
190+
print(json.dumps(media["image_versions2"], indent=2))
191+
192+
if "display_url" in post._node:
193+
print(json.dumps(post._node["display_url"], indent=2))
194+
183195
try:
184196
posts_processed += 1
185197
logger.info("\n" + "-" * 50)
@@ -198,7 +210,13 @@ def process_recent_feed(cutoff=datetime.now(timezone.utc) - timedelta(days=2), m
198210
logger.info(f"Reached max post limit of {max_posts}, stopping.")
199211
break
200212

201-
image_url = s3_uploader.upload_image(post.url)
213+
# Safely get image URL and upload to S3
214+
raw_image_url = get_post_image_url(post)
215+
if raw_image_url:
216+
image_url = s3_uploader.upload_image(raw_image_url)
217+
else:
218+
logger.warning(f"No image URL found for post {post.shortcode}, skipping image upload")
219+
image_url = None
202220

203221
event_data = parse_caption_for_event(post.caption, image_url)
204222

@@ -252,6 +270,5 @@ def session():
252270

253271

254272
if __name__ == "__main__":
255-
# process_instagram_posts(max_posts=10)
256273
L = session()
257274
process_recent_feed()

0 commit comments

Comments
 (0)