Skip to content

Commit ea37b9f

Browse files
authored
Sync dev with main
Added empty response check, JSON validation, improved playlist variety, general code cleanup/polishing
2 parents d4548a3 + 585eb37 commit ea37b9f

2 files changed

Lines changed: 191 additions & 95 deletions

File tree

octogen/ai/engine.py

Lines changed: 112 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import json
55
from json_repair import repair_json
66
import logging
7+
import math
78
import os
89
import random
910
import re
@@ -146,10 +147,10 @@ def analyze_listening_profile(self, favorited_songs: List[Dict], top_artists: Li
146147

147148
# Diversity score: higher when more evenly distributed
148149
if total > 0:
149-
# Calculate normalized entropy
150-
entropy = sum(-(count/total) * (count/total).bit_length() for count in artist_counts.values() if count > 0)
151-
max_entropy = total.bit_length() if total > 1 else 1
152-
profile["diversity_score"] = entropy / max_entropy if max_entropy > 0 else 0
150+
# Calculate normalized Shannon entropy
151+
entropy = -sum((count/total) * math.log2(count/total) for count in artist_counts.values() if count > 0)
152+
max_entropy = math.log2(len(artist_counts)) if len(artist_counts) > 1 else 1
153+
profile["diversity_score"] = min(entropy / max_entropy, 1.0) if max_entropy > 0 else 0
153154

154155
profile["artist_distribution"] = dict(artist_counts.most_common(10))
155156

@@ -267,6 +268,59 @@ def _invalidate_cache(self) -> None:
267268
# Note: We don't delete call tracker to preserve daily limit
268269
logger.info("Cache invalidation complete")
269270

271+
def _load_recent_songs(self) -> set:
272+
"""Load recently recommended songs from disk.
273+
274+
Returns:
275+
Set of "artist - title" strings from recent runs, or empty set on failure.
276+
"""
277+
recent_file = self.data_dir / "recent_playlist_songs.json"
278+
try:
279+
if recent_file.exists():
280+
with open(recent_file, 'r', encoding='utf-8') as f:
281+
data = json.load(f)
282+
if isinstance(data, list):
283+
return set(data)
284+
except Exception as e:
285+
logger.warning("Could not load recent songs: %s", str(e)[:100])
286+
return set()
287+
288+
def _save_recent_songs(self, songs: list) -> None:
289+
"""Save recently recommended songs to disk (capped at 200 entries across last 2 runs).
290+
291+
The file is written atomically (temp file + os.replace) so an interrupted
292+
write never leaves a corrupt or empty file on disk.
293+
294+
Args:
295+
songs: List of song dicts with "artist" and "title" keys from the new playlists.
296+
"""
297+
recent_file = self.data_dir / "recent_playlist_songs.json"
298+
try:
299+
existing = self._load_recent_songs()
300+
new_entries = [
301+
f"{s.get('artist', '')} - {s.get('title', '')}"
302+
for s in songs
303+
if s.get('artist') and s.get('title')
304+
]
305+
# Build ordered list: existing first (oldest), new entries appended last
306+
# so that truncation with [-200:] always keeps the most recent songs.
307+
seen: set = set()
308+
ordered: list = []
309+
for entry in list(existing) + new_entries:
310+
if entry not in seen:
311+
seen.add(entry)
312+
ordered.append(entry)
313+
# Cap at 200 entries (approximately 2 runs worth); drop oldest first
314+
combined = ordered[-200:]
315+
# Atomic write: write to a sibling temp file then replace
316+
tmp_file = recent_file.with_suffix(".json.tmp")
317+
with open(tmp_file, 'w', encoding='utf-8') as f:
318+
json.dump(combined, f, ensure_ascii=False)
319+
os.replace(tmp_file, recent_file)
320+
logger.info("Saved %d recent songs to disk (%d total)", len(new_entries), len(combined))
321+
except Exception as e:
322+
logger.warning("Could not save recent songs: %s", str(e)[:100])
323+
270324
def _build_cached_context(
271325
self,
272326
top_artists: List[str],
@@ -285,13 +339,14 @@ def _build_cached_context(
285339
Returns:
286340
Context string for AI
287341
"""
288-
artist_list = ", ".join(top_artists[:10])
289-
genre_list = ", ".join(top_genres[:6])
342+
artist_list = ", ".join(random.sample(top_artists[:20], min(10, len(top_artists[:20]))))
343+
genre_list = ", ".join(random.sample(top_genres[:12], min(6, len(top_genres[:12]))))
290344

291-
# Limit context for memory efficiency
345+
# Randomly sample a subset for variety — avoids O(n) shuffle of the full library
346+
k = min(self.max_context_songs, len(favorited_songs))
292347
favorited_sample = [
293348
f"{s.get('artist','')} - {s.get('title','')}"
294-
for s in favorited_songs[: self.max_context_songs]
349+
for s in random.sample(favorited_songs, k)
295350
]
296351
favorited_context = "\n".join(favorited_sample)
297352

@@ -392,12 +447,18 @@ def _get_or_create_gemini_cache(
392447
logger.info("Cache created: %s (expires in 24 hours)", cached_content.name)
393448
return cached_content
394449

395-
def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[str, str]] = None) -> str:
450+
def _build_task_prompt(
451+
self,
452+
top_genres: List[str],
453+
time_context: Optional[Dict[str, str]] = None,
454+
recent_songs: Optional[set] = None,
455+
) -> str:
396456
"""Build the task-specific prompt with optional time-of-day awareness.
397457
398458
Args:
399459
top_genres: List of top genres
400460
time_context: Optional time-of-day context from get_time_context()
461+
recent_songs: Optional set of recently recommended "artist - title" strings to avoid
401462
402463
Returns:
403464
Task prompt string
@@ -411,7 +472,11 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[
411472
f'{i+2}. "Daily Mix {i+1}" (30 songs, genre: {genre_name}): 25 library + 5 new'
412473
)
413474

414-
variety_seed = random.randint(1000, 9999)
475+
variety_seed = random.randint(100000, 999999) # 6-digit range reduces collision probability across runs
476+
477+
# Pick a random decade bias hint for added variety
478+
decade_hints = ["1970s", "1980s", "1990s", "2000s", "2010s", "2020s", "Mix of all eras"]
479+
decade_hint = random.choice(decade_hints)
415480

416481
# Add time-of-day context if provided
417482
time_guidance = ""
@@ -424,6 +489,16 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[
424489
Guidance: {time_context.get('guidance', '')}
425490
426491
Apply this context when selecting NEW songs to match the current time of day.
492+
"""
493+
494+
# Add recently recommended songs section if provided
495+
recent_songs_section = ""
496+
if recent_songs:
497+
sample_size = min(40, len(recent_songs))
498+
recent_sample = random.sample(sorted(recent_songs), sample_size)
499+
recent_songs_section = f"""
500+
RECENTLY RECOMMENDED (avoid repeating these):
501+
{chr(10).join(recent_sample)}
427502
"""
428503

429504
return f"""Generate exactly 11 playlists (Variety Seed: {variety_seed}):
@@ -434,7 +509,8 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[
434509
9. "Workout Energy" (30 songs): 25 library + 5 new high-energy
435510
10. "Focus Flow" (30 songs): 25 library + 5 new ambient/instrumental
436511
11. "Drive Time" (30 songs): 25 library + 5 new upbeat
437-
{time_guidance}
512+
Decade focus: {decade_hint} — lean toward this era for new discoveries
513+
{time_guidance}{recent_songs_section}
438514
Respond ONLY with a valid JSON array of objects, each with "artist" and "title" fields, using double quotes.
439515
440516
{{
@@ -454,6 +530,9 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[
454530
- ESCAPE ALL BACKSLASHES: Use \\\\ not \\
455531
- If song title has backslash, use double backslash
456532
- Example: "AC\\\\DC" not "AC\\DC"
533+
- Maximize variety: no artist should appear more than 2 times per playlist
534+
- Each playlist MUST have a different set of songs - NO song should appear in more than one playlist
535+
- Prioritize LESS POPULAR and DEEPER CUTS over well-known hits
457536
"""
458537

459538
def _generate_with_gemini(
@@ -483,7 +562,8 @@ def _generate_with_gemini(
483562
if time_context:
484563
logger.info(f"🕐 Time context: {time_context.get('description')} - {time_context.get('mood')}")
485564

486-
prompt = self._build_task_prompt(top_genres, time_context)
565+
recent_songs = self._load_recent_songs()
566+
prompt = self._build_task_prompt(top_genres, time_context, recent_songs)
487567

488568
# Set thinking budget
489569
thinking_budget = 5000
@@ -515,6 +595,19 @@ def _generate_with_gemini(
515595
logger.warning("Thinking budget nearly exhausted (%d/%d tokens)",
516596
thoughts, thinking_budget)
517597

598+
# Check for empty response
599+
if not response.text or response.text.strip() == "":
600+
logger.error("Gemini returned empty response")
601+
raise ValueError("Empty response from Gemini")
602+
603+
# Validate JSON structure
604+
try:
605+
json.loads(response.text)
606+
except json.JSONDecodeError as e:
607+
logger.error(f"Gemini returned invalid JSON: {e}")
608+
logger.debug(f"Problematic response start: {response.text[:500]}")
609+
raise ValueError("Invalid JSON response from Gemini") from e
610+
518611
return response.text
519612

520613
def _generate_with_openai(
@@ -544,7 +637,8 @@ def _generate_with_openai(
544637
if time_context:
545638
logger.info(f"🕐 Time context: {time_context.get('description')} - {time_context.get('mood')}")
546639

547-
task_prompt = self._build_task_prompt(top_genres, time_context)
640+
recent_songs = self._load_recent_songs()
641+
task_prompt = self._build_task_prompt(top_genres, time_context, recent_songs)
548642
full_prompt = f"{cached_context}\n\n{task_prompt}"
549643

550644
response = self.client.chat.completions.create(
@@ -714,6 +808,11 @@ def generate_all_playlists(
714808
self._record_ai_call()
715809
total = sum(len(songs) for songs in all_playlists.values())
716810
logger.info("Generated %d playlists (%d songs)", len(all_playlists), total)
811+
812+
# Persist all new songs for cross-run deduplication
813+
all_new_songs = [song for songs in all_playlists.values() for song in songs]
814+
self._save_recent_songs(all_new_songs)
815+
717816
return all_playlists, None
718817

719818
def _generate_with_retry(self, generate_func, *args, **kwargs) -> str:

0 commit comments

Comments
 (0)