Merge remote-tracking branch 'origin/main' into prod

owenisas · owenisas · commit c14cb30033f7 · 2026-03-22T16:34:15.000-07:00
diff --git a/backend/agent.py b/backend/agent.py
@@ -973,7 +973,7 @@ def assess_context_or_answer_simple(
                 include_reasoning=False,
             )
             _record_generation_usage(usage_tracker, followup)
-            return False, followup.get("content") or ""
+            return False, _sanitize_clarification_response(followup.get("content") or "", session=session)
 
     # Check if context is sufficient.
     content_upper = content.upper().strip()
@@ -985,7 +985,7 @@ def assess_context_or_answer_simple(
     if depth == "Short":
         return True, ""
 
-    return False, content
+    return False, _sanitize_clarification_response(content, session=session)
 
 def execute_tool(name, arguments, session=None):
     """Execute a tool by name with the given arguments.
@@ -1209,17 +1209,16 @@ def _build_content_nodes(created_main_block, tool_results=None):
         return []
 
     content = str(created_main_block.get("content") or "").strip()
-    if not content:
-        return []
-
     nodes = []
 
     # --- 1. Split by ## headings into markdown sections -----------------------
     import re as _re
-    sections = _re.split(r"\n(?=##\s|###\s)", content)
-    sections = [s.strip() for s in sections if s.strip()]
-    if not sections:
-        sections = [content]
+    sections = []
+    if content:
+        sections = _re.split(r"\n(?=##\s|###\s)", content)
+        sections = [s.strip() for s in sections if s.strip()]
+        if not sections:
+            sections = [content]
 
     # --- 2. Collect media from tool_results (deduplicated by URL) ---------------
     seen_urls = set()
@@ -1275,44 +1274,49 @@ def _build_content_nodes(created_main_block, tool_results=None):
         re.IGNORECASE,
     )
 
-    # Split sections into those eligible for images and those that aren't.
-    eligible_indices = []
-    for idx, section in enumerate(sections):
-        first_line = section.split("\n", 1)[0].strip()
-        if _IMAGE_ONLY_HEADING.match(first_line):
-            continue  # drop image-only sections entirely
-        if _CONCLUSION_HEADING.match(first_line):
-            sections[idx] = section  # keep but don't attach images
-        else:
-            eligible_indices.append(idx)
-
-    # Remove image-only sections from the list.
-    sections = [s for s in sections if not _IMAGE_ONLY_HEADING.match(s.split("\n", 1)[0].strip())]
-    # Recompute eligible indices after removal.
-    eligible_indices = []
-    for idx, section in enumerate(sections):
-        first_line = section.split("\n", 1)[0].strip()
-        if not _CONCLUSION_HEADING.match(first_line):
-            eligible_indices.append(idx)
-
-    max_media = max(len(eligible_indices) * 2, 1)  # up to 2 images per section
-    media_items = media_items[:max_media]
-
-    # --- 3. Interleave markdown sections + media (up to 2 per eligible section)
-    # Distribute media across eligible sections, skipping conclusion/preamble.
-    media_assignment: dict[int, list] = {}  # section_index -> [media_items]
-    for i, media in enumerate(media_items):
-        # Round-robin: fill each eligible section with 1, then loop back for 2nd
-        slot = i % len(eligible_indices) if eligible_indices else 0
-        sec_idx = eligible_indices[slot] if slot < len(eligible_indices) else 0
-        media_assignment.setdefault(sec_idx, [])
-        if len(media_assignment[sec_idx]) < 2:
-            media_assignment[sec_idx].append(media)
-
-    for idx, section in enumerate(sections):
-        nodes.append({"type": "markdown", "content": section})
-        for media in media_assignment.get(idx, []):
-            nodes.append(media)
+    if sections:
+        # Split sections into those eligible for images and those that aren't.
+        eligible_indices = []
+        for idx, section in enumerate(sections):
+            first_line = section.split("\n", 1)[0].strip()
+            if _IMAGE_ONLY_HEADING.match(first_line):
+                continue  # drop image-only sections entirely
+            if _CONCLUSION_HEADING.match(first_line):
+                sections[idx] = section  # keep but don't attach images
+            else:
+                eligible_indices.append(idx)
+
+        # Remove image-only sections from the list.
+        sections = [s for s in sections if not _IMAGE_ONLY_HEADING.match(s.split("\n", 1)[0].strip())]
+        # Recompute eligible indices after removal.
+        eligible_indices = []
+        for idx, section in enumerate(sections):
+            first_line = section.split("\n", 1)[0].strip()
+            if not _CONCLUSION_HEADING.match(first_line):
+                eligible_indices.append(idx)
+
+        max_media = max(len(eligible_indices) * 2, 1)  # up to 2 images per section
+        media_items = media_items[:max_media]
+
+        # --- 3. Interleave markdown sections + media (up to 2 per eligible section)
+        # Distribute media across eligible sections, skipping conclusion/preamble.
+        media_assignment: dict[int, list] = {}  # section_index -> [media_items]
+        for i, media in enumerate(media_items):
+            # Round-robin: fill each eligible section with 1, then loop back for 2nd
+            slot = i % len(eligible_indices) if eligible_indices else 0
+            sec_idx = eligible_indices[slot] if slot < len(eligible_indices) else 0
+            media_assignment.setdefault(sec_idx, [])
+            if len(media_assignment[sec_idx]) < 2:
+                media_assignment[sec_idx].append(media)
+
+        for idx, section in enumerate(sections):
+            nodes.append({"type": "markdown", "content": section})
+            for media in media_assignment.get(idx, []):
+                nodes.append(media)
+    else:
+        # Media-only / structured-only results are still renderable and should
+        # not collapse into an empty frontend block.
+        nodes.extend(media_items)
 
     # --- 4. Append structured nodes -------------------------------------------
     objectives = created_main_block.get("learning_objectives")
@@ -2131,6 +2135,12 @@ def _exec_sub(section):
             )
             if content_nodes:
                 response_block.metadata["content_nodes"] = content_nodes
+            elif not (response_block.content or "").strip():
+                response_block.content = (
+                    "I couldn't assemble a usable response for that request. "
+                    "Please try again."
+                )
+                response_block.metadata["empty_generation_fallback"] = True
             
             response_block.metadata["response_kind"] = "answer"
             depth_val = (session.user_profile or {}).get("explanation_depth", "Medium")
@@ -2227,6 +2237,12 @@ def _exec_sub(section):
                     )
                     if content_nodes:
                         response_block.metadata["content_nodes"] = content_nodes
+                    elif not (response_block.content or "").strip():
+                        response_block.content = (
+                            "I couldn't assemble a usable response for that request. "
+                            "Please try again."
+                        )
+                        response_block.metadata["empty_generation_fallback"] = True
                     response_block.metadata["response_kind"] = "answer"
                     if is_main_block:
                         depth_val = (session.user_profile or {}).get("explanation_depth", "Medium")
diff --git a/backend/tests/test_agent_tool_choice.py b/backend/tests/test_agent_tool_choice.py
@@ -1,8 +1,10 @@
 import sys
+from unittest.mock import patch
 
 sys.path.insert(0, ".")
 
 from agent import (
+    assess_context_or_answer_simple,
     _context_missing_fields,
     _merge_other_context_notes,
     _requests_text_only,
@@ -91,3 +93,18 @@ def test_sanitize_clarification_keeps_short_question():
     session = DummySession({})
     short_question = "What is your background with this topic, and what is your goal?"
     assert _sanitize_clarification_response(short_question, session=session) == short_question
+
+
+def test_assess_context_or_answer_simple_rewrites_empty_clarification():
+    session = DummySession({"__context_slots__": "familiarity"})
+
+    with patch("agent.llm_chat") as mock_chat:
+        mock_chat.return_value = {"content": "", "tool_calls": None}
+        is_ready, prompt = assess_context_or_answer_simple(
+            session=session,
+            user_message="Explain ASMR history",
+            model="gpt-5.4",
+        )
+
+    assert is_ready is False
+    assert "What are you learning this for" in prompt
diff --git a/backend/tests/test_block_generation.py b/backend/tests/test_block_generation.py
@@ -1097,6 +1097,34 @@ def test_build_content_nodes_assembles_all_node_types():
     assert quiz_node["questions"][0]["id"] == "q1"
 
 
+def test_build_content_nodes_preserves_media_without_markdown_content():
+    """Media-only responses should still produce renderable content nodes."""
+    from agent import _build_content_nodes
+
+    created_main_block = {
+        "title": "Visuals Only",
+        "summary": "Only media is available",
+        "content": "",
+    }
+    tool_results = [
+        {"images": [{"url": "https://img.example.com/asmr.jpg", "title": "ASMR setup"}]},
+    ]
+
+    nodes = _build_content_nodes(created_main_block, tool_results)
+
+    assert nodes == [
+        {
+            "type": "media",
+            "item": {
+                "id": "img-0",
+                "type": "image",
+                "source": "https://img.example.com/asmr.jpg",
+                "label": "ASMR setup",
+            },
+        }
+    ]
+
+
 def test_create_main_block_with_structured_fields_stores_content_nodes():
     """create_main_block with structured fields stores content_nodes in block metadata."""
     from agent import run_agent_with_session
@@ -1150,3 +1178,31 @@ def test_create_main_block_with_structured_fields_stores_content_nodes():
     assert "learningObjectives" in types
     assert "keyTerms" in types
     assert "quiz" in types
+
+
+def test_run_agent_with_session_uses_nonempty_fallback_when_orchestrator_returns_nothing():
+    """Main-block orchestration should never persist an entirely empty assistant block."""
+    from agent import run_agent_with_session
+    from models import Session
+
+    session = Session(system_prompt="You are a tutor.", user_profile={"__context_slots__": "familiarity,goal"})
+
+    with patch("agent.assess_context_or_answer_simple") as mock_preflight, \
+         patch("agent.run_orchestrator") as mock_orch, \
+         patch("agent.llm_chat") as mock_chat:
+        mock_preflight.return_value = (True, "")
+        mock_orch.return_value = [{"title": "Main", "instructions": "Answer directly"}]
+        mock_chat.side_effect = [
+            {"content": "", "tool_calls": None},
+        ]
+        block = run_agent_with_session(
+            session=session,
+            user_message="Explain the history of ASMR.",
+            model="test-model",
+            max_turns=1,
+            verbose=False,
+            persist_block=True,
+        )
+
+    assert block.content == "I couldn't assemble a usable response for that request. Please try again."
+    assert block.metadata.get("empty_generation_fallback") is True