44import json
55from json_repair import repair_json
66import logging
7+ import math
78import os
89import random
910import re
@@ -146,10 +147,10 @@ def analyze_listening_profile(self, favorited_songs: List[Dict], top_artists: Li
146147
147148 # Diversity score: higher when more evenly distributed
148149 if total > 0 :
149- # Calculate normalized entropy
150- entropy = sum (- (count / total ) * (count / total ). bit_length ( ) for count in artist_counts .values () if count > 0 )
151- max_entropy = total . bit_length () if total > 1 else 1
152- profile ["diversity_score" ] = entropy / max_entropy if max_entropy > 0 else 0
150+ # Calculate normalized Shannon entropy
151+ entropy = - sum ((count / total ) * math . log2 (count / total ) for count in artist_counts .values () if count > 0 )
152+ max_entropy = math . log2 ( len ( artist_counts )) if len ( artist_counts ) > 1 else 1
153+ profile ["diversity_score" ] = min ( entropy / max_entropy , 1.0 ) if max_entropy > 0 else 0
153154
154155 profile ["artist_distribution" ] = dict (artist_counts .most_common (10 ))
155156
@@ -267,6 +268,59 @@ def _invalidate_cache(self) -> None:
267268 # Note: We don't delete call tracker to preserve daily limit
268269 logger .info ("Cache invalidation complete" )
269270
271+ def _load_recent_songs (self ) -> set :
272+ """Load recently recommended songs from disk.
273+
274+ Returns:
275+ Set of "artist - title" strings from recent runs, or empty set on failure.
276+ """
277+ recent_file = self .data_dir / "recent_playlist_songs.json"
278+ try :
279+ if recent_file .exists ():
280+ with open (recent_file , 'r' , encoding = 'utf-8' ) as f :
281+ data = json .load (f )
282+ if isinstance (data , list ):
283+ return set (data )
284+ except Exception as e :
285+ logger .warning ("Could not load recent songs: %s" , str (e )[:100 ])
286+ return set ()
287+
288+ def _save_recent_songs (self , songs : list ) -> None :
289+ """Save recently recommended songs to disk (capped at 200 entries across last 2 runs).
290+
291+ The file is written atomically (temp file + os.replace) so an interrupted
292+ write never leaves a corrupt or empty file on disk.
293+
294+ Args:
295+ songs: List of song dicts with "artist" and "title" keys from the new playlists.
296+ """
297+ recent_file = self .data_dir / "recent_playlist_songs.json"
298+ try :
299+ existing = self ._load_recent_songs ()
300+ new_entries = [
301+ f"{ s .get ('artist' , '' )} - { s .get ('title' , '' )} "
302+ for s in songs
303+ if s .get ('artist' ) and s .get ('title' )
304+ ]
305+ # Build ordered list: existing first (oldest), new entries appended last
306+ # so that truncation with [-200:] always keeps the most recent songs.
307+ seen : set = set ()
308+ ordered : list = []
309+ for entry in list (existing ) + new_entries :
310+ if entry not in seen :
311+ seen .add (entry )
312+ ordered .append (entry )
313+ # Cap at 200 entries (approximately 2 runs worth); drop oldest first
314+ combined = ordered [- 200 :]
315+ # Atomic write: write to a sibling temp file then replace
316+ tmp_file = recent_file .with_suffix (".json.tmp" )
317+ with open (tmp_file , 'w' , encoding = 'utf-8' ) as f :
318+ json .dump (combined , f , ensure_ascii = False )
319+ os .replace (tmp_file , recent_file )
320+ logger .info ("Saved %d recent songs to disk (%d total)" , len (new_entries ), len (combined ))
321+ except Exception as e :
322+ logger .warning ("Could not save recent songs: %s" , str (e )[:100 ])
323+
270324 def _build_cached_context (
271325 self ,
272326 top_artists : List [str ],
@@ -285,13 +339,14 @@ def _build_cached_context(
285339 Returns:
286340 Context string for AI
287341 """
288- artist_list = ", " .join (top_artists [:10 ] )
289- genre_list = ", " .join (top_genres [:6 ] )
342+ artist_list = ", " .join (random . sample ( top_artists [:20 ], min ( 10 , len ( top_artists [: 20 ]))) )
343+ genre_list = ", " .join (random . sample ( top_genres [:12 ], min ( 6 , len ( top_genres [: 12 ]))) )
290344
291- # Limit context for memory efficiency
345+ # Randomly sample a subset for variety — avoids O(n) shuffle of the full library
346+ k = min (self .max_context_songs , len (favorited_songs ))
292347 favorited_sample = [
293348 f"{ s .get ('artist' ,'' )} - { s .get ('title' ,'' )} "
294- for s in favorited_songs [: self . max_context_songs ]
349+ for s in random . sample ( favorited_songs , k )
295350 ]
296351 favorited_context = "\n " .join (favorited_sample )
297352
@@ -392,12 +447,18 @@ def _get_or_create_gemini_cache(
392447 logger .info ("Cache created: %s (expires in 24 hours)" , cached_content .name )
393448 return cached_content
394449
395- def _build_task_prompt (self , top_genres : List [str ], time_context : Optional [Dict [str , str ]] = None ) -> str :
450+ def _build_task_prompt (
451+ self ,
452+ top_genres : List [str ],
453+ time_context : Optional [Dict [str , str ]] = None ,
454+ recent_songs : Optional [set ] = None ,
455+ ) -> str :
396456 """Build the task-specific prompt with optional time-of-day awareness.
397457
398458 Args:
399459 top_genres: List of top genres
400460 time_context: Optional time-of-day context from get_time_context()
461+ recent_songs: Optional set of recently recommended "artist - title" strings to avoid
401462
402463 Returns:
403464 Task prompt string
@@ -411,7 +472,11 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[
411472 f'{ i + 2 } . "Daily Mix { i + 1 } " (30 songs, genre: { genre_name } ): 25 library + 5 new'
412473 )
413474
414- variety_seed = random .randint (1000 , 9999 )
475+ variety_seed = random .randint (100000 , 999999 ) # 6-digit range reduces collision probability across runs
476+
477+ # Pick a random decade bias hint for added variety
478+ decade_hints = ["1970s" , "1980s" , "1990s" , "2000s" , "2010s" , "2020s" , "Mix of all eras" ]
479+ decade_hint = random .choice (decade_hints )
415480
416481 # Add time-of-day context if provided
417482 time_guidance = ""
@@ -424,6 +489,16 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[
424489Guidance: { time_context .get ('guidance' , '' )}
425490
426491Apply this context when selecting NEW songs to match the current time of day.
492+ """
493+
494+ # Add recently recommended songs section if provided
495+ recent_songs_section = ""
496+ if recent_songs :
497+ sample_size = min (40 , len (recent_songs ))
498+ recent_sample = random .sample (sorted (recent_songs ), sample_size )
499+ recent_songs_section = f"""
500+ RECENTLY RECOMMENDED (avoid repeating these):
501+ { chr (10 ).join (recent_sample )}
427502"""
428503
429504 return f"""Generate exactly 11 playlists (Variety Seed: { variety_seed } ):
@@ -434,7 +509,8 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[
4345099. "Workout Energy" (30 songs): 25 library + 5 new high-energy
43551010. "Focus Flow" (30 songs): 25 library + 5 new ambient/instrumental
43651111. "Drive Time" (30 songs): 25 library + 5 new upbeat
437- { time_guidance }
512+ Decade focus: { decade_hint } — lean toward this era for new discoveries
513+ { time_guidance } { recent_songs_section }
438514Respond ONLY with a valid JSON array of objects, each with "artist" and "title" fields, using double quotes.
439515
440516{{
@@ -454,6 +530,9 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[
454530- ESCAPE ALL BACKSLASHES: Use \\ \\ not \\
455531- If song title has backslash, use double backslash
456532- Example: "AC\\ \\ DC" not "AC\\ DC"
533+ - Maximize variety: no artist should appear more than 2 times per playlist
534+ - Each playlist MUST have a different set of songs - NO song should appear in more than one playlist
535+ - Prioritize LESS POPULAR and DEEPER CUTS over well-known hits
457536"""
458537
459538 def _generate_with_gemini (
@@ -483,7 +562,8 @@ def _generate_with_gemini(
483562 if time_context :
484563 logger .info (f"🕐 Time context: { time_context .get ('description' )} - { time_context .get ('mood' )} " )
485564
486- prompt = self ._build_task_prompt (top_genres , time_context )
565+ recent_songs = self ._load_recent_songs ()
566+ prompt = self ._build_task_prompt (top_genres , time_context , recent_songs )
487567
488568 # Set thinking budget
489569 thinking_budget = 5000
@@ -515,6 +595,19 @@ def _generate_with_gemini(
515595 logger .warning ("Thinking budget nearly exhausted (%d/%d tokens)" ,
516596 thoughts , thinking_budget )
517597
598+ # Check for empty response
599+ if not response .text or response .text .strip () == "" :
600+ logger .error ("Gemini returned empty response" )
601+ raise ValueError ("Empty response from Gemini" )
602+
603+ # Validate JSON structure
604+ try :
605+ json .loads (response .text )
606+ except json .JSONDecodeError as e :
607+ logger .error (f"Gemini returned invalid JSON: { e } " )
608+ logger .debug (f"Problematic response start: { response .text [:500 ]} " )
609+ raise ValueError ("Invalid JSON response from Gemini" ) from e
610+
518611 return response .text
519612
520613 def _generate_with_openai (
@@ -544,7 +637,8 @@ def _generate_with_openai(
544637 if time_context :
545638 logger .info (f"🕐 Time context: { time_context .get ('description' )} - { time_context .get ('mood' )} " )
546639
547- task_prompt = self ._build_task_prompt (top_genres , time_context )
640+ recent_songs = self ._load_recent_songs ()
641+ task_prompt = self ._build_task_prompt (top_genres , time_context , recent_songs )
548642 full_prompt = f"{ cached_context } \n \n { task_prompt } "
549643
550644 response = self .client .chat .completions .create (
@@ -714,6 +808,11 @@ def generate_all_playlists(
714808 self ._record_ai_call ()
715809 total = sum (len (songs ) for songs in all_playlists .values ())
716810 logger .info ("Generated %d playlists (%d songs)" , len (all_playlists ), total )
811+
812+ # Persist all new songs for cross-run deduplication
813+ all_new_songs = [song for songs in all_playlists .values () for song in songs ]
814+ self ._save_recent_songs (all_new_songs )
815+
717816 return all_playlists , None
718817
719818 def _generate_with_retry (self , generate_func , * args , ** kwargs ) -> str :
0 commit comments