-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathcodec_compaction.py
More file actions
98 lines (85 loc) · 3.26 KB
/
codec_compaction.py
File metadata and controls
98 lines (85 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""Context compaction — summarize old conversations, keep recent raw"""
import os
import sys
import json
import logging
log = logging.getLogger('codec')
# Load config once at import time (single source of truth via codec_config)
try:
_repo_dir = os.path.dirname(os.path.abspath(__file__))
if _repo_dir not in sys.path:
sys.path.insert(0, _repo_dir)
from codec_config import cfg as _cfg
_LLM_BASE_URL = _cfg.get("llm_base_url", "http://localhost:8081/v1")
_LLM_MODEL = _cfg.get("llm_model", "")
_LLM_API_KEY = _cfg.get("llm_api_key", "")
_LLM_KWARGS = _cfg.get("llm_kwargs", {})
except ImportError:
_LLM_BASE_URL = "http://localhost:8081/v1"
_LLM_MODEL = ""
_LLM_API_KEY = ""
_LLM_KWARGS = {}
def compact_context(recent_messages: list, max_recent: int = 5, max_summary_tokens: int = 200) -> str:
"""
Takes a list of conversation messages (dicts with 'role' and 'content').
Returns a compacted context string:
- Last max_recent messages kept raw
- Older messages summarized into a brief paragraph via LLM
"""
if not recent_messages:
return ""
if len(recent_messages) <= max_recent:
return "\n".join(f"[{m['role']}] {m['content'][:200]}" for m in recent_messages)
old_messages = recent_messages[:-max_recent]
recent = recent_messages[-max_recent:]
# Summarize old messages via the configured LLM
old_text = "\n".join(
f"[{m['role']}] {m['content'][:150]}"
for m in old_messages[-20:] # cap at last 20 old messages
)
summary = None
try:
import httpx
base_url = _LLM_BASE_URL
model = _LLM_MODEL
api_key = _LLM_API_KEY
llm_kwargs = _LLM_KWARGS
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
payload = {
"model": model,
"messages": [
{
"role": "system",
"content": (
"Summarize these conversation snippets in 2-3 sentences. "
"Focus on key facts, decisions, and action items. "
"Be extremely concise."
),
},
{"role": "user", "content": old_text},
],
"max_tokens": max_summary_tokens,
"temperature": 0.1,
}
payload.update(llm_kwargs)
r = httpx.post(
f"{base_url}/chat/completions",
json=payload,
headers=headers,
timeout=15,
)
if r.status_code == 200:
summary = r.json()["choices"][0]["message"]["content"].strip()
log.info(f"Context compacted: {len(old_messages)} old msgs → {len(summary)} char summary")
except Exception as e:
log.warning(f"Compaction LLM failed, using fallback: {e}")
if not summary:
# Fallback: take key phrases from the last 5 old messages
summary = "Previous context: " + ". ".join(
m['content'][:50] for m in old_messages[-5:]
)
context = f"[SUMMARY OF EARLIER CONVERSATION]\n{summary}\n\n[RECENT MESSAGES]\n"
context += "\n".join(f"[{m['role']}] {m['content'][:200]}" for m in recent)
return context