codec/codec_compaction.py at main · AVADSA25/codec · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""Context compaction — summarize old conversations, keep recent raw"""
import os
import sys
import json
import logging

log = logging.getLogger('codec')

# Load config once at import time (single source of truth via codec_config)
try:
    _repo_dir = os.path.dirname(os.path.abspath(__file__))
    if _repo_dir not in sys.path:
        sys.path.insert(0, _repo_dir)
    from codec_config import cfg as _cfg
    _LLM_BASE_URL = _cfg.get("llm_base_url", "http://localhost:8081/v1")
    _LLM_MODEL = _cfg.get("llm_model", "")
    _LLM_API_KEY = _cfg.get("llm_api_key", "")
    _LLM_KWARGS = _cfg.get("llm_kwargs", {})
except ImportError:
    _LLM_BASE_URL = "http://localhost:8081/v1"
    _LLM_MODEL = ""
    _LLM_API_KEY = ""
    _LLM_KWARGS = {}


def compact_context(recent_messages: list, max_recent: int = 5, max_summary_tokens: int = 200) -> str:
    """
    Takes a list of conversation messages (dicts with 'role' and 'content').
    Returns a compacted context string:
      - Last max_recent messages kept raw
      - Older messages summarized into a brief paragraph via LLM
    """
    if not recent_messages:
        return ""

    if len(recent_messages) <= max_recent:
        return "\n".join(f"[{m['role']}] {m['content'][:200]}" for m in recent_messages)

    old_messages = recent_messages[:-max_recent]
    recent = recent_messages[-max_recent:]

    # Summarize old messages via the configured LLM
    old_text = "\n".join(
        f"[{m['role']}] {m['content'][:150]}"
        for m in old_messages[-20:]  # cap at last 20 old messages
    )

    summary = None
    try:
        import httpx
        base_url = _LLM_BASE_URL
        model = _LLM_MODEL
        api_key = _LLM_API_KEY
        llm_kwargs = _LLM_KWARGS

        headers = {"Content-Type": "application/json"}
        if api_key:
            headers["Authorization"] = f"Bearer {api_key}"

        payload = {
            "model": model,
            "messages": [
                {
                    "role": "system",
                    "content": (
                        "Summarize these conversation snippets in 2-3 sentences. "
                        "Focus on key facts, decisions, and action items. "
                        "Be extremely concise."
                    ),
                },
                {"role": "user", "content": old_text},
            ],
            "max_tokens": max_summary_tokens,
            "temperature": 0.1,
        }
        payload.update(llm_kwargs)

        r = httpx.post(
            f"{base_url}/chat/completions",
            json=payload,
            headers=headers,
            timeout=15,
        )
        if r.status_code == 200:
            summary = r.json()["choices"][0]["message"]["content"].strip()
            log.info(f"Context compacted: {len(old_messages)} old msgs → {len(summary)} char summary")
    except Exception as e:
        log.warning(f"Compaction LLM failed, using fallback: {e}")

    if not summary:
        # Fallback: take key phrases from the last 5 old messages
        summary = "Previous context: " + ". ".join(
            m['content'][:50] for m in old_messages[-5:]
        )

    context = f"[SUMMARY OF EARLIER CONVERSATION]\n{summary}\n\n[RECENT MESSAGES]\n"
    context += "\n".join(f"[{m['role']}] {m['content'][:200]}" for m in recent)
    return context