1414import re
1515import time
1616import traceback
17- from datetime import datetime , timedelta , timezone
17+ from datetime import datetime , timedelta
1818from pathlib import Path
1919
2020import requests
2121from dotenv import load_dotenv
2222from instaloader import Instaloader
2323from logging_config import logger
2424from zyte_setup import setup_zyte
25+ from django .utils import timezone
2526
2627from apps .clubs .models import Clubs
2728from apps .events .models import Events
@@ -124,58 +125,85 @@ def append_event_to_csv(
124125
125126 dtstart = event_data .get ("dtstart" , "" )
126127 dtend = event_data .get ("dtend" , "" )
127- dtstart_utc , dtend_utc , duration , all_day = tz_compute (dtstart , dtend )
128+ dtstart_utc = event_data .get ("dtstart_utc" , "" )
129+ dtend_utc = event_data .get ("dtend_utc" , "" )
130+ duration = event_data .get ("duration" , "" )
131+ all_day = event_data .get ("all_day" , False )
132+ location = event_data .get ("location" , "" )
133+ food = event_data .get ("food" , "" )
134+ price = event_data .get ("price" , "" )
135+ registration = bool (event_data .get ("registration" , False ))
136+ description = event_data .get ("description" , "" )
137+ rrule = event_data .get ("rrule" , "" )
138+ latitude = event_data .get ("latitude" , None )
139+ longitude = event_data .get ("longitude" , None )
140+ tz = event_data .get ("tz" , "" )
141+ school = event_data .get ("school" , "" )
142+ source_image_url = event_data .get ("source_image_url" , "" )
143+ title = event_data .get ("title" , "" )
144+
145+ fieldnames = [
146+ "ig_handle" ,
147+ "title" ,
148+ "source_url" ,
149+ "dtstart" ,
150+ "dtstart_utc" ,
151+ "dtend" ,
152+ "dtend_utc" ,
153+ "duration" ,
154+ "location" ,
155+ "food" ,
156+ "price" ,
157+ "registration" ,
158+ "description" ,
159+ "rrule" ,
160+ "latitude" ,
161+ "longitude" ,
162+ "tz" ,
163+ "school" ,
164+ "source_image_url" ,
165+ "all_day" ,
166+ "club_type" ,
167+ "raw_json" ,
168+ "added_to_db" ,
169+ "status" ,
170+ "embedding" ,
171+ ]
128172
129173 with open (csv_file , "a" , newline = "" , encoding = "utf-8" ) as csvfile :
130- fieldnames = [
131- "ig_handle" ,
132- "title" ,
133- "source_url" ,
134- "dtstart" ,
135- "dtstart_utc" ,
136- "dtend" ,
137- "dtend_utc" ,
138- "duration" ,
139- "location" ,
140- "food" ,
141- "price" ,
142- "registration" ,
143- "description" ,
144- "reactions" ,
145- "embedding" ,
146- "source_image_url" ,
147- "all_day" ,
148- "club_type" ,
149- "raw_json" ,
150- "added_to_db" ,
151- ]
152- writer = csv .DictWriter (csvfile , fieldnames = fieldnames )
174+ writer = csv .DictWriter (csvfile , fieldnames = fieldnames , lineterminator = "\n " )
153175 if not file_exists :
154176 writer .writeheader ()
155177 writer .writerow (
156178 {
157179 "ig_handle" : ig_handle ,
158- "title" : event_data . get ( " title" ) ,
180+ "title" : title ,
159181 "source_url" : source_url ,
160182 "dtstart" : dtstart ,
161183 "dtstart_utc" : dtstart_utc ,
162184 "dtend" : dtend ,
163185 "dtend_utc" : dtend_utc ,
164186 "duration" : duration ,
165- "location" : event_data .get ("location" ),
166- "food" : event_data .get ("food" , "" ),
167- "price" : event_data .get ("price" , "" ),
168- "registration" : bool (event_data .get ("registration" , False )),
169- "description" : event_data .get ("description" , "" ),
170- "reactions" : json .dumps (event_data .get ("reactions" ) or {}),
171- "embedding" : embedding or "" ,
172- "source_image_url" : event_data .get ("source_image_url" ) or "" ,
187+ "location" : location ,
188+ "food" : food ,
189+ "price" : price ,
190+ "registration" : registration ,
191+ "description" : description ,
192+ "rrule" : rrule ,
193+ "latitude" : latitude ,
194+ "longitude" : longitude ,
195+ "tz" : tz ,
196+ "school" : school ,
197+ "source_image_url" : source_image_url ,
173198 "all_day" : all_day ,
174199 "club_type" : club_type or event_data .get ("club_type" ) or "" ,
175200 "raw_json" : json .dumps (event_data , ensure_ascii = False ),
176201 "added_to_db" : added_to_db ,
202+ "status" : "CONFIRMED" ,
203+ "embedding" : embedding or "" ,
177204 }
178205 )
206+ logger .info (f"Event written to CSV with status: { added_to_db } " )
179207
180208
181209def insert_event_to_db (event_data , ig_handle , source_url ):
@@ -271,17 +299,21 @@ def insert_event_to_db(event_data, ig_handle, source_url):
271299 embedding = embedding ,
272300 club_type = club_type ,
273301 )
302+ logger .info ("Event added successfully" )
274303 return True
275304 except Exception as e :
276- logger .error (f"Error inserting event to db: { e } " )
277- append_event_to_csv (
278- event_data ,
279- ig_handle ,
280- source_url ,
281- added_to_db = "failed" ,
282- embedding = embedding ,
283- club_type = club_type ,
284- )
305+ logger .error (f"Error inserting event to DB: { e } " )
306+ try :
307+ append_event_to_csv (
308+ event_data ,
309+ ig_handle ,
310+ source_url ,
311+ added_to_db = "failed" ,
312+ embedding = embedding ,
313+ club_type = club_type ,
314+ )
315+ except Exception as csv_e :
316+ logger .error (f"Error writing event to CSV after DB failure: { csv_e } " )
285317 return False
286318
287319
@@ -327,7 +359,7 @@ def process_recent_feed(
327359 if post .shortcode in seen_shortcodes or post_time < cutoff :
328360 consec_old_posts += 1
329361 logger .debug (
330- f"Skipping post { post .shortcode } ; consec_old_posts={ consec_old_posts } "
362+ f"[ { post . shortcode } ] [ { post .owner_username } ] Skipping post ; consec_old_posts={ consec_old_posts } "
331363 )
332364 if consec_old_posts >= max_consec_old_posts :
333365 termination_reason = (
@@ -342,19 +374,17 @@ def process_recent_feed(
342374 consec_old_posts = 0
343375 posts_processed += 1
344376 logger .info ("-" * 100 )
345- logger .info (
346- f"Processing post: { post .shortcode } by { post .owner_username } "
347- )
377+ logger .info (f"[{ post .shortcode } ] [{ post .owner_username } ] Processing post" )
348378
349379 # Safely get image URL and upload to S3
350380 raw_image_url = get_post_image_url (post )
351381 if raw_image_url :
352382 time .sleep (random .uniform (1 , 3 ))
353383 source_image_url = upload_image_from_url (raw_image_url )
354- logger .info (f"Uploaded image to S3: { source_image_url } " )
384+ logger .info (f"[ { post . shortcode } ] [ { post . owner_username } ] Uploaded image to S3: { source_image_url } " )
355385 else :
356386 logger .warning (
357- f"No image URL found for post { post . shortcode } , skipping image upload"
387+ f"[ { post . shortcode } ] [ { post . owner_username } ] No image URL found for post, skipping image upload"
358388 )
359389 source_image_url = None
360390
@@ -363,14 +393,15 @@ def process_recent_feed(
363393 )
364394 if not events_data or len (events_data ) == 0 :
365395 logger .warning (
366- f"AI client returned no events for post { post . shortcode } "
396+ f"[ { post . shortcode } ] [ { post . owner_username } ] AI client returned no events for post"
367397 )
368398 if posts_processed >= max_posts :
369399 termination_reason = f"reached_max_posts={ max_posts } "
370400 logger .info (f"Reached max post limit of { max_posts } , stopping" )
371401 break
372402 continue
373403
404+ logger .debug (f"[{ post .shortcode } ] [{ post .owner_username } ] Event data: { json .dumps (events_data , ensure_ascii = False , separators = (',' , ':' ))} " )
374405 source_url = f"https://www.instagram.com/p/{ post .shortcode } /"
375406 today = datetime .now (timezone .utc ).date ()
376407
@@ -388,7 +419,7 @@ def process_recent_feed(
388419 if not event_data .get (key )
389420 ]
390421 logger .warning (
391- f"Missing required fields for event '{ event_data .get ('title' , 'Unknown' )} ': { missing_fields } , skipping event"
422+ f"[ { post . shortcode } ] [ { post . owner_username } ] Missing required fields for event '{ event_data .get ('title' , 'Unknown' )} ': { missing_fields } , skipping event"
392423 )
393424 embedding = generate_event_embedding (event_data )
394425 append_event_to_csv (
@@ -403,18 +434,18 @@ def process_recent_feed(
403434 date = datetime .fromisoformat (event_data .get ("dtstart" )).date ()
404435 if date < today :
405436 logger .info (
406- f"Skipping event '{ event_data .get ('title' )} ' with past date { date } "
437+ f"[ { post . shortcode } ] [ { post . owner_username } ] Skipping event '{ event_data .get ('title' )} ' with past date { date } "
407438 )
408439 continue
409440
410441 if insert_event_to_db (event_data , post .owner_username , source_url ):
411442 events_added += 1
412443 logger .info (
413- f"Successfully added event '{ event_data .get ('title' )} ' from { post . owner_username } "
444+ f"[ { post . shortcode } ] [ { post . owner_username } ] Successfully added event '{ event_data .get ('title' )} '"
414445 )
415446 else :
416447 logger .error (
417- f"Failed to add event '{ event_data .get ('title' )} ' from { post . owner_username } "
448+ f"[ { post . shortcode } ] [ { post . owner_username } ] Failed to add event '{ event_data .get ('title' )} '"
418449 )
419450
420451 if posts_processed >= max_posts :
@@ -426,9 +457,9 @@ def process_recent_feed(
426457
427458 except Exception as e :
428459 logger .error (
429- f"Error processing post { post .shortcode } by { post .owner_username } : { e !s} "
460+ f"[ { post .shortcode } ] [ { post .owner_username } ] Error processing post : { e !s} "
430461 )
431- logger .error (f"Traceback: { traceback .format_exc ()} " )
462+ logger .error (f"[ { post . shortcode } ] [ { post . owner_username } ] Traceback: { traceback .format_exc ()} " )
432463 time .sleep (random .uniform (3 , 8 ))
433464 continue
434465
0 commit comments