Skip to content

Commit 557c655

Browse files
Implement memory management features: add memory retrieval, clearing, and mode settings; enhance conversation handling with memory context.
1 parent 0ab50fa commit 557c655

11 files changed

Lines changed: 740 additions & 19 deletions

File tree

backend/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,8 @@
2929

3030
# Data directory for conversation storage
3131
DATA_DIR = "data/conversations"
32+
33+
# Memory summarization mode: 'local' for on-device mem0 summarizer, 'model' to use a model
34+
MEMORY_MODE = os.getenv("MEMORY_MODE", "local")
35+
# Max sentences for local summary
36+
MEMORY_LOCAL_MAX_SENTENCES = int(os.getenv("MEMORY_LOCAL_MAX_SENTENCES", "3"))

backend/council.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,16 @@ async def stage1_collect_responses(messages: List[Dict[str, Any]]) -> List[Dict[
3333

3434
async def stage2_collect_rankings(
3535
messages: List[Dict[str, Any]],
36-
stage1_results: List[Dict[str, Any]]
36+
stage1_results: List[Dict[str, Any]],
37+
memory_summary: str = ""
3738
) -> Tuple[List[Dict[str, Any]], Dict[str, str]]:
3839
"""
39-
Stage 2: Each model ranks the anonymized responses using conversation context.
40+
Stage 2: Each model ranks the anonymized responses using conversation context and memory.
4041
4142
Args:
4243
messages: The conversation messages (including the latest user message)
4344
stage1_results: Results from Stage 1
45+
memory_summary: Optional concise memory summary to include for context
4446
4547
Returns:
4648
Tuple of (rankings list, label_to_model mapping)
@@ -74,9 +76,12 @@ async def stage2_collect_rankings(
7476

7577
conversation_text = "\n".join(conversation_history)
7678

79+
# Include memory summary if available
80+
memory_block = f"Memory summary: {memory_summary}\n\n" if memory_summary else ""
81+
7782
ranking_prompt = f"""You are evaluating different responses to the user's latest question.
7883
79-
Conversation history:
84+
{memory_block}Conversation history:
8085
{conversation_text}
8186
8287
Latest question: {user_query}
@@ -120,7 +125,8 @@ async def stage2_collect_rankings(
120125
async def stage3_synthesize_final(
121126
messages: List[Dict[str, Any]],
122127
stage1_results: List[Dict[str, Any]],
123-
stage2_results: List[Dict[str, Any]]
128+
stage2_results: List[Dict[str, Any]],
129+
memory_summary: str = ""
124130
) -> Dict[str, Any]:
125131
"""
126132
Stage 3: Chairman synthesizes final response using conversation context.
@@ -157,9 +163,11 @@ async def stage3_synthesize_final(
157163
conversation_history.append(f"Assistant: {content}")
158164
conversation_text = "\n".join(conversation_history)
159165

166+
memory_block = f"Memory summary: {memory_summary}\n\n" if memory_summary else ""
167+
160168
chairman_prompt = f"""You are the Chairman of an LLM Council. Multiple AI models have provided responses to a user's question, and then ranked each other's responses.
161169
162-
Conversation history:
170+
{memory_block}Conversation history:
163171
{conversation_text}
164172
165173
Latest question: {latest_user}
@@ -315,12 +323,13 @@ async def generate_conversation_title(user_query: str) -> str:
315323
return title
316324

317325

318-
async def run_full_council(messages: List[Dict[str, Any]]) -> Tuple[List, List, Dict, Dict]:
326+
async def run_full_council(messages: List[Dict[str, Any]], memory_summary: str = "") -> Tuple[List, List, Dict, Dict]:
319327
"""
320-
Run the complete 3-stage council process with conversation context.
328+
Run the complete 3-stage council process with conversation context and memory.
321329
322330
Args:
323331
messages: The conversation messages (including the latest user message)
332+
memory_summary: Optional concise memory summary to include for context
324333
325334
Returns:
326335
Tuple of (stage1_results, stage2_results, stage3_result, metadata)
@@ -336,7 +345,7 @@ async def run_full_council(messages: List[Dict[str, Any]]) -> Tuple[List, List,
336345
}, {}
337346

338347
# Stage 2: Collect rankings
339-
stage2_results, label_to_model = await stage2_collect_rankings(messages, stage1_results)
348+
stage2_results, label_to_model = await stage2_collect_rankings(messages, stage1_results, memory_summary)
340349

341350
# Calculate aggregate rankings
342351
aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
@@ -345,7 +354,8 @@ async def run_full_council(messages: List[Dict[str, Any]]) -> Tuple[List, List,
345354
stage3_result = await stage3_synthesize_final(
346355
messages,
347356
stage1_results,
348-
stage2_results
357+
stage2_results,
358+
memory_summary
349359
)
350360

351361
# Prepare metadata
@@ -354,4 +364,4 @@ async def run_full_council(messages: List[Dict[str, Any]]) -> Tuple[List, List,
354364
"aggregate_rankings": aggregate_rankings
355365
}
356366

357-
return stage1_results, stage2_results, stage3_result, metadata
367+
return stage1_results, stage2_results, stage3_result, metadata

backend/main.py

Lines changed: 86 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import asyncio
1111

1212
from . import storage
13+
from . import memory
1314
from .council import run_full_council, generate_conversation_title, stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final, calculate_aggregate_rankings
1415

1516
app = FastAPI(title="LLM Council API")
@@ -79,6 +80,52 @@ async def get_conversation(conversation_id: str):
7980
return conversation
8081

8182

83+
@app.get("/api/conversations/{conversation_id}/memory")
84+
async def get_conversation_memory(conversation_id: str):
85+
"""Return the memory (short entries and summary) for a conversation."""
86+
try:
87+
mem = memory.get_memory(conversation_id)
88+
return mem
89+
except Exception:
90+
raise HTTPException(status_code=404, detail="Conversation not found")
91+
92+
93+
@app.post("/api/conversations/{conversation_id}/memory/clear")
94+
async def clear_conversation_memory(conversation_id: str):
95+
"""Clear a conversation's memory."""
96+
try:
97+
memory.clear_memory(conversation_id)
98+
return {"status": "ok"}
99+
except Exception:
100+
raise HTTPException(status_code=404, detail="Conversation not found")
101+
102+
103+
@app.get("/api/memory/mode")
104+
async def get_memory_mode():
105+
"""Return the current runtime memory mode and local settings."""
106+
from .config import MEMORY_LOCAL_MAX_SENTENCES
107+
try:
108+
mode = memory.get_runtime_mode()
109+
return {"mode": mode, "local_max_sentences": MEMORY_LOCAL_MAX_SENTENCES}
110+
except Exception:
111+
raise HTTPException(status_code=500, detail="Unable to retrieve memory mode")
112+
113+
114+
@app.post("/api/memory/mode")
115+
async def set_memory_mode(payload: Dict[str, str]):
116+
"""Set the runtime memory mode. Body: {"mode": "local"|"model"} """
117+
mode = payload.get("mode")
118+
if mode not in ("local", "model"):
119+
raise HTTPException(status_code=400, detail="mode must be 'local' or 'model'")
120+
try:
121+
memory.set_runtime_mode(mode)
122+
return {"status": "ok", "mode": mode}
123+
except ValueError as e:
124+
raise HTTPException(status_code=400, detail=str(e))
125+
except Exception:
126+
raise HTTPException(status_code=500, detail="Unable to set memory mode")
127+
128+
82129
@app.post("/api/conversations/{conversation_id}/message")
83130
async def send_message(conversation_id: str, request: SendMessageRequest):
84131
"""
@@ -117,8 +164,15 @@ async def send_message(conversation_id: str, request: SendMessageRequest):
117164
# Append the new user message (most recent)
118165
messages.append({"role": "user", "content": request.content})
119166

120-
# Run the 3-stage council process with conversation context
121-
stage1_results, stage2_results, stage3_result, metadata = await run_full_council(messages)
167+
# Get existing memory summary and pass it into the council
168+
try:
169+
mem = memory.get_memory(conversation_id)
170+
memory_summary = mem.get("summary", "")
171+
except Exception:
172+
memory_summary = ""
173+
174+
# Run the 3-stage council process with conversation context and memory
175+
stage1_results, stage2_results, stage3_result, metadata = await run_full_council(messages, memory_summary)
122176

123177
# Add assistant message with all stages
124178
storage.add_assistant_message(
@@ -128,6 +182,17 @@ async def send_message(conversation_id: str, request: SendMessageRequest):
128182
stage3_result
129183
)
130184

185+
# Update the conversation-level memory in the background (do not block the response)
186+
try:
187+
asyncio.create_task(memory.add_exchange_and_update_summary(
188+
conversation_id,
189+
request.content,
190+
stage3_result.get("response", "") if isinstance(stage3_result, dict) else ""
191+
))
192+
except Exception:
193+
# Non-fatal if memory update fails
194+
pass
195+
131196
# Return the complete response with metadata
132197
return {
133198
"stage1": stage1_results,
@@ -173,20 +238,27 @@ async def event_generator():
173238
messages.append({"role": "assistant", "content": assistant_content})
174239
messages.append({"role": "user", "content": request.content})
175240

241+
# Get existing memory summary and pass it into the council
242+
try:
243+
mem = memory.get_memory(conversation_id)
244+
memory_summary = mem.get("summary", "")
245+
except Exception:
246+
memory_summary = ""
247+
176248
# Stage 1: Collect responses
177249
yield f"data: {json.dumps({'type': 'stage1_start'})}\n\n"
178250
stage1_results = await stage1_collect_responses(messages)
179251
yield f"data: {json.dumps({'type': 'stage1_complete', 'data': stage1_results})}\n\n"
180252

181253
# Stage 2: Collect rankings
182254
yield f"data: {json.dumps({'type': 'stage2_start'})}\n\n"
183-
stage2_results, label_to_model = await stage2_collect_rankings(messages, stage1_results)
255+
stage2_results, label_to_model = await stage2_collect_rankings(messages, stage1_results, memory_summary)
184256
aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
185257
yield f"data: {json.dumps({'type': 'stage2_complete', 'data': stage2_results, 'metadata': {'label_to_model': label_to_model, 'aggregate_rankings': aggregate_rankings}})}\n\n"
186258

187259
# Stage 3: Synthesize final answer
188260
yield f"data: {json.dumps({'type': 'stage3_start'})}\n\n"
189-
stage3_result = await stage3_synthesize_final(messages, stage1_results, stage2_results)
261+
stage3_result = await stage3_synthesize_final(messages, stage1_results, stage2_results, memory_summary)
190262
yield f"data: {json.dumps({'type': 'stage3_complete', 'data': stage3_result})}\n\n"
191263

192264
# Wait for title generation if it was started
@@ -203,6 +275,16 @@ async def event_generator():
203275
stage3_result
204276
)
205277

278+
# Update memory in the background
279+
try:
280+
asyncio.create_task(memory.add_exchange_and_update_summary(
281+
conversation_id,
282+
request.content,
283+
stage3_result.get("response", "") if isinstance(stage3_result, dict) else ""
284+
))
285+
except Exception:
286+
pass
287+
206288
# Send completion event
207289
yield f"data: {json.dumps({'type': 'complete'})}\n\n"
208290

0 commit comments

Comments
 (0)