Meetmendapara09
diff --git a/‎backend/config.py‎
Lines changed: 5 additions & 0 deletions b/‎backend/config.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backend/council.py‎
Lines changed: 20 additions & 10 deletions b/‎backend/council.py‎
Lines changed: 20 additions & 10 deletions
diff --git a/‎backend/main.py‎
Lines changed: 86 additions & 4 deletions b/‎backend/main.py‎
Lines changed: 86 additions & 4 deletions
@@ -29,3 +29,8 @@
 
 # Data directory for conversation storage
 DATA_DIR = "data/conversations"
+
+# Memory summarization mode: 'local' for on-device mem0 summarizer, 'model' to use a model
+MEMORY_MODE = os.getenv("MEMORY_MODE", "local")
+# Max sentences for local summary
+MEMORY_LOCAL_MAX_SENTENCES = int(os.getenv("MEMORY_LOCAL_MAX_SENTENCES", "3"))
@@ -33,14 +33,16 @@ async def stage1_collect_responses(messages: List[Dict[str, Any]]) -> List[Dict[
 
 async def stage2_collect_rankings(
     messages: List[Dict[str, Any]],
-    stage1_results: List[Dict[str, Any]]
+    stage1_results: List[Dict[str, Any]],
+    memory_summary: str = ""
 ) -> Tuple[List[Dict[str, Any]], Dict[str, str]]:
     """
-    Stage 2: Each model ranks the anonymized responses using conversation context.
+    Stage 2: Each model ranks the anonymized responses using conversation context and memory.
 
     Args:
         messages: The conversation messages (including the latest user message)
         stage1_results: Results from Stage 1
+        memory_summary: Optional concise memory summary to include for context
 
     Returns:
         Tuple of (rankings list, label_to_model mapping)
@@ -74,9 +76,12 @@ async def stage2_collect_rankings(
 
     conversation_text = "\n".join(conversation_history)
 
+    # Include memory summary if available
+    memory_block = f"Memory summary: {memory_summary}\n\n" if memory_summary else ""
+
     ranking_prompt = f"""You are evaluating different responses to the user's latest question.
 
-Conversation history:
+{memory_block}Conversation history:
 {conversation_text}
 
 Latest question: {user_query}
@@ -120,7 +125,8 @@ async def stage2_collect_rankings(
 async def stage3_synthesize_final(
     messages: List[Dict[str, Any]],
     stage1_results: List[Dict[str, Any]],
-    stage2_results: List[Dict[str, Any]]
+    stage2_results: List[Dict[str, Any]],
+    memory_summary: str = ""
 ) -> Dict[str, Any]:
     """
     Stage 3: Chairman synthesizes final response using conversation context.
@@ -157,9 +163,11 @@ async def stage3_synthesize_final(
             conversation_history.append(f"Assistant: {content}")
     conversation_text = "\n".join(conversation_history)
 
+    memory_block = f"Memory summary: {memory_summary}\n\n" if memory_summary else ""
+
     chairman_prompt = f"""You are the Chairman of an LLM Council. Multiple AI models have provided responses to a user's question, and then ranked each other's responses.
 
-Conversation history:
+{memory_block}Conversation history:
 {conversation_text}
 
 Latest question: {latest_user}
@@ -315,12 +323,13 @@ async def generate_conversation_title(user_query: str) -> str:
     return title
 
 
-async def run_full_council(messages: List[Dict[str, Any]]) -> Tuple[List, List, Dict, Dict]:
+async def run_full_council(messages: List[Dict[str, Any]], memory_summary: str = "") -> Tuple[List, List, Dict, Dict]:
     """
-    Run the complete 3-stage council process with conversation context.
+    Run the complete 3-stage council process with conversation context and memory.
 
     Args:
         messages: The conversation messages (including the latest user message)
+        memory_summary: Optional concise memory summary to include for context
 
     Returns:
         Tuple of (stage1_results, stage2_results, stage3_result, metadata)
@@ -336,7 +345,7 @@ async def run_full_council(messages: List[Dict[str, Any]]) -> Tuple[List, List,
         }, {}
 
     # Stage 2: Collect rankings
-    stage2_results, label_to_model = await stage2_collect_rankings(messages, stage1_results)
+    stage2_results, label_to_model = await stage2_collect_rankings(messages, stage1_results, memory_summary)
 
     # Calculate aggregate rankings
     aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
@@ -345,7 +354,8 @@ async def run_full_council(messages: List[Dict[str, Any]]) -> Tuple[List, List,
     stage3_result = await stage3_synthesize_final(
         messages,
         stage1_results,
-        stage2_results
+        stage2_results,
+        memory_summary
     )
 
     # Prepare metadata
@@ -354,4 +364,4 @@ async def run_full_council(messages: List[Dict[str, Any]]) -> Tuple[List, List,
         "aggregate_rankings": aggregate_rankings
     }
 
-    return stage1_results, stage2_results, stage3_result, metadata
+    return stage1_results, stage2_results, stage3_result, metadata
@@ -10,6 +10,7 @@
 import asyncio
 
 from . import storage
+from . import memory
 from .council import run_full_council, generate_conversation_title, stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final, calculate_aggregate_rankings
 
 app = FastAPI(title="LLM Council API")
@@ -79,6 +80,52 @@ async def get_conversation(conversation_id: str):
     return conversation
 
 
+@app.get("/api/conversations/{conversation_id}/memory")
+async def get_conversation_memory(conversation_id: str):
+    """Return the memory (short entries and summary) for a conversation."""
+    try:
+        mem = memory.get_memory(conversation_id)
+        return mem
+    except Exception:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+
+@app.post("/api/conversations/{conversation_id}/memory/clear")
+async def clear_conversation_memory(conversation_id: str):
+    """Clear a conversation's memory."""
+    try:
+        memory.clear_memory(conversation_id)
+        return {"status": "ok"}
+    except Exception:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+
+@app.get("/api/memory/mode")
+async def get_memory_mode():
+    """Return the current runtime memory mode and local settings."""
+    from .config import MEMORY_LOCAL_MAX_SENTENCES
+    try:
+        mode = memory.get_runtime_mode()
+        return {"mode": mode, "local_max_sentences": MEMORY_LOCAL_MAX_SENTENCES}
+    except Exception:
+        raise HTTPException(status_code=500, detail="Unable to retrieve memory mode")
+
+
+@app.post("/api/memory/mode")
+async def set_memory_mode(payload: Dict[str, str]):
+    """Set the runtime memory mode. Body: {"mode": "local"|"model"} """
+    mode = payload.get("mode")
+    if mode not in ("local", "model"):
+        raise HTTPException(status_code=400, detail="mode must be 'local' or 'model'")
+    try:
+        memory.set_runtime_mode(mode)
+        return {"status": "ok", "mode": mode}
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception:
+        raise HTTPException(status_code=500, detail="Unable to set memory mode")
+
+
 @app.post("/api/conversations/{conversation_id}/message")
 async def send_message(conversation_id: str, request: SendMessageRequest):
     """
@@ -117,8 +164,15 @@ async def send_message(conversation_id: str, request: SendMessageRequest):
     # Append the new user message (most recent)
     messages.append({"role": "user", "content": request.content})
 
-    # Run the 3-stage council process with conversation context
-    stage1_results, stage2_results, stage3_result, metadata = await run_full_council(messages)
+    # Get existing memory summary and pass it into the council
+    try:
+        mem = memory.get_memory(conversation_id)
+        memory_summary = mem.get("summary", "")
+    except Exception:
+        memory_summary = ""
+
+    # Run the 3-stage council process with conversation context and memory
+    stage1_results, stage2_results, stage3_result, metadata = await run_full_council(messages, memory_summary)
 
     # Add assistant message with all stages
     storage.add_assistant_message(
@@ -128,6 +182,17 @@ async def send_message(conversation_id: str, request: SendMessageRequest):
         stage3_result
     )
 
+    # Update the conversation-level memory in the background (do not block the response)
+    try:
+        asyncio.create_task(memory.add_exchange_and_update_summary(
+            conversation_id,
+            request.content,
+            stage3_result.get("response", "") if isinstance(stage3_result, dict) else ""
+        ))
+    except Exception:
+        # Non-fatal if memory update fails
+        pass
+
     # Return the complete response with metadata
     return {
         "stage1": stage1_results,
@@ -173,20 +238,27 @@ async def event_generator():
                         messages.append({"role": "assistant", "content": assistant_content})
             messages.append({"role": "user", "content": request.content})
 
+            # Get existing memory summary and pass it into the council
+            try:
+                mem = memory.get_memory(conversation_id)
+                memory_summary = mem.get("summary", "")
+            except Exception:
+                memory_summary = ""
+
             # Stage 1: Collect responses
             yield f"data: {json.dumps({'type': 'stage1_start'})}\n\n"
             stage1_results = await stage1_collect_responses(messages)
             yield f"data: {json.dumps({'type': 'stage1_complete', 'data': stage1_results})}\n\n"
 
             # Stage 2: Collect rankings
             yield f"data: {json.dumps({'type': 'stage2_start'})}\n\n"
-            stage2_results, label_to_model = await stage2_collect_rankings(messages, stage1_results)
+            stage2_results, label_to_model = await stage2_collect_rankings(messages, stage1_results, memory_summary)
             aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
             yield f"data: {json.dumps({'type': 'stage2_complete', 'data': stage2_results, 'metadata': {'label_to_model': label_to_model, 'aggregate_rankings': aggregate_rankings}})}\n\n"
 
             # Stage 3: Synthesize final answer
             yield f"data: {json.dumps({'type': 'stage3_start'})}\n\n"
-            stage3_result = await stage3_synthesize_final(messages, stage1_results, stage2_results)
+            stage3_result = await stage3_synthesize_final(messages, stage1_results, stage2_results, memory_summary)
             yield f"data: {json.dumps({'type': 'stage3_complete', 'data': stage3_result})}\n\n"
 
             # Wait for title generation if it was started
@@ -203,6 +275,16 @@ async def event_generator():
                 stage3_result
             )
 
+            # Update memory in the background
+            try:
+                asyncio.create_task(memory.add_exchange_and_update_summary(
+                    conversation_id,
+                    request.content,
+                    stage3_result.get("response", "") if isinstance(stage3_result, dict) else ""
+                ))
+            except Exception:
+                pass
+
             # Send completion event
             yield f"data: {json.dumps({'type': 'complete'})}\n\n"