Skip to content

Commit 04cb5a1

Browse files
feat(summarize): hierarchical multi-level conversation summarization
Implements issue #2. Compresses conversation turns progressively as they age using extractive techniques (no LLM required): - LevelFull (0): original content, recent turns - LevelParagraph (1): first paragraph + code blocks preserved - LevelSentence (2): first 1-2 sentences - LevelKeywords (3): top-12 significant words - LevelEvicted (4): dropped entirely when budget is exhausted Key behaviours: - PreserveRecent: N most recent turns always kept at LevelFull - ImportanceThreshold: turns above threshold never exceed LevelParagraph - ScoreImportance: heuristic scoring (code blocks, error keywords, role) - enforceTokenBudget: second pass with progressive eviction when over budget - estimateTokens: 4-chars-per-token approximation Co-authored-by: Ona <no-reply@ona.com>
1 parent 2e16c7d commit 04cb5a1

4 files changed

Lines changed: 773 additions & 0 deletions

File tree

pkg/summarize/hierarchy.go

Lines changed: 367 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,367 @@
1+
package summarize
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strings"
7+
"time"
8+
"unicode"
9+
)
10+
11+
// HierarchicalSummarizer implements Summarizer using rule-based compression.
12+
// It does not require an LLM — compression is performed locally using
13+
// extractive techniques (sentence selection, keyword extraction).
14+
//
15+
// For LLM-backed summarization, wrap this with an LLMSummarizer that
16+
// overrides the compress method.
17+
type HierarchicalSummarizer struct{}
18+
19+
// NewHierarchicalSummarizer creates a new summarizer.
20+
func NewHierarchicalSummarizer() *HierarchicalSummarizer {
21+
return &HierarchicalSummarizer{}
22+
}
23+
24+
// Summarize compresses turns to fit within opts.MaxTokens.
25+
// Turns are processed oldest-first; recent turns and high-importance turns
26+
// are preserved at full fidelity.
27+
func (s *HierarchicalSummarizer) Summarize(
28+
ctx context.Context,
29+
turns []Turn,
30+
opts SummarizeOptions,
31+
) ([]Turn, SummarizeStats, error) {
32+
start := time.Now()
33+
34+
if opts.PreserveRecent < 0 {
35+
opts.PreserveRecent = 10
36+
}
37+
if opts.ImportanceThreshold <= 0 {
38+
opts.ImportanceThreshold = 0.7
39+
}
40+
if len(opts.AgeLevels) == 0 {
41+
opts.AgeLevels = DefaultOptions().AgeLevels
42+
}
43+
44+
// Score importance for turns that don't have it set.
45+
ScoreTurns(turns)
46+
47+
// Count input tokens.
48+
inputTokens := 0
49+
for i := range turns {
50+
turns[i].TokenCount = estimateTokens(turns[i].Content)
51+
inputTokens += turns[i].TokenCount
52+
}
53+
54+
stats := SummarizeStats{
55+
InputTurns: len(turns),
56+
InputTokens: inputTokens,
57+
}
58+
59+
// Determine which turns to compress.
60+
now := time.Now()
61+
result := make([]Turn, len(turns))
62+
copy(result, turns)
63+
64+
recentCutoff := len(result) - opts.PreserveRecent
65+
if recentCutoff < 0 {
66+
recentCutoff = 0
67+
}
68+
69+
for i := range result {
70+
t := &result[i]
71+
72+
// Always preserve recent turns (only when PreserveRecent > 0).
73+
if opts.PreserveRecent > 0 && i >= recentCutoff {
74+
stats.PreservedTurns++
75+
continue
76+
}
77+
78+
// Preserve high-importance turns at LevelFull or LevelParagraph.
79+
maxLevel := s.maxLevelForAge(now.Sub(t.Timestamp), opts.AgeLevels)
80+
if t.Importance >= opts.ImportanceThreshold && maxLevel > LevelParagraph {
81+
maxLevel = LevelParagraph
82+
}
83+
84+
if maxLevel <= t.Level {
85+
// Already at or beyond target level.
86+
stats.PreservedTurns++
87+
continue
88+
}
89+
90+
// Compress to target level.
91+
if err := s.compressTo(t, maxLevel); err != nil {
92+
return nil, stats, fmt.Errorf("compress turn %s: %w", t.ID, err)
93+
}
94+
t.TokenCount = estimateTokens(t.Content)
95+
stats.CompressedTurns++
96+
}
97+
98+
// If MaxTokens is set and we're still over budget, do a second pass
99+
// compressing more aggressively from oldest to newest.
100+
if opts.MaxTokens > 0 {
101+
result = s.enforceTokenBudget(result, opts, recentCutoff)
102+
}
103+
104+
// Compute output stats.
105+
outputTokens := 0
106+
for _, t := range result {
107+
outputTokens += t.TokenCount
108+
}
109+
stats.OutputTurns = len(result)
110+
stats.OutputTokens = outputTokens
111+
if stats.InputTokens > 0 {
112+
stats.ReductionPct = float64(stats.InputTokens-stats.OutputTokens) / float64(stats.InputTokens) * 100
113+
}
114+
stats.Latency = time.Since(start)
115+
116+
return result, stats, nil
117+
}
118+
119+
// enforceTokenBudget does a second compression pass when still over budget.
120+
// It progressively compresses oldest turns through all levels, including
121+
// eviction (dropping turns entirely) as a last resort.
122+
func (s *HierarchicalSummarizer) enforceTokenBudget(
123+
turns []Turn,
124+
opts SummarizeOptions,
125+
recentCutoff int,
126+
) []Turn {
127+
total := 0
128+
for _, t := range turns {
129+
total += t.TokenCount
130+
}
131+
if total <= opts.MaxTokens {
132+
return turns
133+
}
134+
135+
// Compress oldest non-recent turns progressively through all levels.
136+
for level := LevelParagraph; level <= LevelEvicted && total > opts.MaxTokens; level++ {
137+
for i := range turns {
138+
if opts.PreserveRecent > 0 && i >= recentCutoff {
139+
break
140+
}
141+
t := &turns[i]
142+
if t.Level >= level {
143+
continue
144+
}
145+
if t.Importance >= opts.ImportanceThreshold && level > LevelParagraph {
146+
continue
147+
}
148+
before := t.TokenCount
149+
if level == LevelEvicted {
150+
t.Level = LevelEvicted
151+
t.Content = ""
152+
t.TokenCount = 0
153+
} else {
154+
_ = s.compressTo(t, level)
155+
t.TokenCount = estimateTokens(t.Content)
156+
}
157+
total -= before - t.TokenCount
158+
if total <= opts.MaxTokens {
159+
break
160+
}
161+
}
162+
}
163+
164+
// Remove evicted turns from the slice.
165+
out := turns[:0]
166+
for _, t := range turns {
167+
if t.Level != LevelEvicted {
168+
out = append(out, t)
169+
}
170+
}
171+
return out
172+
}
173+
174+
// maxLevelForAge returns the maximum compression level for a given age.
175+
func (s *HierarchicalSummarizer) maxLevelForAge(age time.Duration, levels []AgeLevel) Level {
176+
max := LevelFull
177+
for _, al := range levels {
178+
if age >= al.After && al.MaxLevel > max {
179+
max = al.MaxLevel
180+
}
181+
}
182+
return max
183+
}
184+
185+
// compressTo compresses a turn to the target level in-place.
186+
// The original content is preserved in Turn.Original on first compression.
187+
func (s *HierarchicalSummarizer) compressTo(t *Turn, target Level) error {
188+
if t.Original == "" {
189+
t.Original = t.Content
190+
}
191+
192+
switch target {
193+
case LevelParagraph:
194+
t.Content = extractParagraphSummary(t.Original)
195+
case LevelSentence:
196+
t.Content = extractSentenceSummary(t.Original)
197+
case LevelKeywords:
198+
t.Content = extractKeywordSummary(t.Original)
199+
}
200+
t.Level = target
201+
return nil
202+
}
203+
204+
// extractParagraphSummary keeps the first paragraph and any code blocks.
205+
func extractParagraphSummary(text string) string {
206+
lines := strings.Split(text, "\n")
207+
var out []string
208+
inCode := false
209+
paragraphDone := false
210+
211+
for _, line := range lines {
212+
if strings.HasPrefix(line, "```") {
213+
inCode = !inCode
214+
out = append(out, line)
215+
continue
216+
}
217+
if inCode {
218+
out = append(out, line)
219+
continue
220+
}
221+
if !paragraphDone {
222+
out = append(out, line)
223+
if line == "" && len(out) > 1 {
224+
paragraphDone = true
225+
}
226+
}
227+
}
228+
result := strings.TrimSpace(strings.Join(out, "\n"))
229+
if result == "" {
230+
return truncate(text, 300)
231+
}
232+
return result
233+
}
234+
235+
// extractSentenceSummary returns the first 1–2 sentences.
236+
func extractSentenceSummary(text string) string {
237+
// Strip code blocks first.
238+
text = stripCodeBlocks(text)
239+
sentences := splitSentences(text)
240+
if len(sentences) == 0 {
241+
return truncate(text, 150)
242+
}
243+
if len(sentences) == 1 {
244+
return sentences[0]
245+
}
246+
return sentences[0] + " " + sentences[1]
247+
}
248+
249+
// extractKeywordSummary extracts the most significant words.
250+
func extractKeywordSummary(text string) string {
251+
text = stripCodeBlocks(text)
252+
words := strings.Fields(text)
253+
var keywords []string
254+
seen := map[string]bool{}
255+
for _, w := range words {
256+
w = strings.Trim(w, `.,;:!?"'()[]{}`)
257+
lower := strings.ToLower(w)
258+
if len(w) < 4 || isStopWord(lower) || seen[lower] {
259+
continue
260+
}
261+
seen[lower] = true
262+
keywords = append(keywords, w)
263+
if len(keywords) >= 12 {
264+
break
265+
}
266+
}
267+
return strings.Join(keywords, ", ")
268+
}
269+
270+
func stripCodeBlocks(text string) string {
271+
var out strings.Builder
272+
inCode := false
273+
for _, line := range strings.Split(text, "\n") {
274+
if strings.HasPrefix(line, "```") {
275+
inCode = !inCode
276+
continue
277+
}
278+
if !inCode {
279+
out.WriteString(line)
280+
out.WriteByte('\n')
281+
}
282+
}
283+
return out.String()
284+
}
285+
286+
func splitSentences(text string) []string {
287+
var sentences []string
288+
var cur strings.Builder
289+
for _, r := range text {
290+
cur.WriteRune(r)
291+
if r == '.' || r == '!' || r == '?' {
292+
s := strings.TrimSpace(cur.String())
293+
if s != "" {
294+
sentences = append(sentences, s)
295+
}
296+
cur.Reset()
297+
}
298+
}
299+
if s := strings.TrimSpace(cur.String()); s != "" {
300+
sentences = append(sentences, s)
301+
}
302+
return sentences
303+
}
304+
305+
func truncate(s string, maxRunes int) string {
306+
runes := []rune(s)
307+
if len(runes) <= maxRunes {
308+
return s
309+
}
310+
return string(runes[:maxRunes]) + "…"
311+
}
312+
313+
func isStopWord(w string) bool {
314+
return stopWords[w]
315+
}
316+
317+
var stopWords = func() map[string]bool {
318+
words := []string{
319+
"the", "and", "for", "that", "this", "with", "from", "have",
320+
"will", "been", "were", "they", "their", "there", "when",
321+
"what", "which", "would", "could", "should", "about", "into",
322+
"more", "also", "some", "than", "then", "just", "like",
323+
}
324+
m := map[string]bool{}
325+
for _, w := range words {
326+
m[w] = true
327+
}
328+
return m
329+
}()
330+
331+
// DetectTurns segments a flat message list into Turn structs, assigning
332+
// timestamps based on index when real timestamps are unavailable.
333+
func DetectTurns(messages []struct {
334+
Role string
335+
Content string
336+
}) []Turn {
337+
now := time.Now()
338+
turns := make([]Turn, len(messages))
339+
for i, m := range messages {
340+
turns[i] = Turn{
341+
ID: fmt.Sprintf("turn-%d", i),
342+
Role: m.Role,
343+
Content: m.Content,
344+
Original: m.Content,
345+
Timestamp: now.Add(-time.Duration(len(messages)-i) * time.Minute),
346+
Level: LevelFull,
347+
TokenCount: estimateTokens(m.Content),
348+
}
349+
}
350+
return turns
351+
}
352+
353+
// TotalTokens returns the sum of token counts across all turns.
354+
func TotalTokens(turns []Turn) int {
355+
total := 0
356+
for _, t := range turns {
357+
total += t.TokenCount
358+
}
359+
return total
360+
}
361+
362+
// isLetter is used by keyword extraction.
363+
func isLetter(r rune) bool {
364+
return unicode.IsLetter(r)
365+
}
366+
367+
var _ = isLetter // suppress unused warning

0 commit comments

Comments
 (0)