Skip to content

Commit b671cd6

Browse files
feat(cache): TTL-aware cache tracker for batch workloads (#61)
Add TTLTracker to pkg/cache. Monitors Anthropic's 5-minute prompt cache TTL per prefix hash and detects cold-start penalties. - Touch(prefixHash): records request, returns wasAlive (warm/cold) - NextDeadline / TimeUntilExpiry: time remaining before cache expires - ScheduleDeadline(hash, margin): latest safe time for next batch request - ExpiredEntries: all prefixes past their TTL deadline - Evict: remove entry on boundary retreat - Stats: aggregate alive/expired counts and hit/miss totals AnthropicCacheTTL constant (5m) exported for use by callers. Co-authored-by: Ona <no-reply@ona.com>
1 parent 7ca5e8a commit b671cd6

3 files changed

Lines changed: 331 additions & 0 deletions

File tree

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,28 @@ Response stats when prefix is frozen:
795795
}
796796
```
797797

798+
#### TTL-aware cache tracker
799+
800+
`TTLTracker` monitors Anthropic's 5-minute prompt cache TTL per prefix hash. Use it to detect cold-start penalties and schedule batch requests before the cache expires:
801+
802+
```go
803+
tracker := cache.NewTTLTracker(0) // 0 = use AnthropicCacheTTL (5 min)
804+
805+
// After each request that carries a cache_control marker:
806+
wasAlive := tracker.Touch(plan.PrefixHash)
807+
if !wasAlive {
808+
log.Warn("cache cold start — first request or TTL expired")
809+
}
810+
811+
// For batch workloads: latest safe time to send next request
812+
deadline := tracker.ScheduleDeadline(plan.PrefixHash, 30*time.Second)
813+
time.Sleep(time.Until(deadline))
814+
815+
// Inspect expiry state
816+
entry := tracker.Entry(plan.PrefixHash)
817+
fmt.Printf("hits: %d misses: %d alive: %v\n", entry.HitCount, entry.MissCount, entry.IsAlive())
818+
```
819+
798820
#### Prefix stability validator
799821

800822
Detects dynamic content (timestamps, request IDs, UUIDs) bleeding into cached prefixes — the most common cause of 0% cache hit rates:
@@ -899,6 +921,7 @@ Distill is evolving from a dedup utility into a context intelligence layer. Here
899921
| **Cache-aware dedup** | [#50](https://github.com/Siddhant-K-code/distill/issues/50) | Shipped | `preserve_cache_prefix` option freezes chunks before the last `cache_control` marker so dedup cannot reorder them. Prefix hash and token count reported in stats. |
900922
| **Prefix stability validator** | [#48](https://github.com/Siddhant-K-code/distill/issues/48) | Shipped | `StabilityValidator` tracks prefix hashes across requests and detects dynamic content (timestamps, request IDs, UUIDs) bleeding into cached prefixes. |
901923
| **Per-call-site hit rate tracking** | [#47](https://github.com/Siddhant-K-code/distill/issues/47) | Shipped | `CallSiteTracker` records Anthropic cache usage per call site; `AllStats()` returns worst performers first. |
924+
| **TTL-aware cache tracker** | [#49](https://github.com/Siddhant-K-code/distill/issues/49) | Shipped | `TTLTracker` monitors Anthropic's 5-minute cache TTL per prefix hash. `ScheduleDeadline` tells batch jobs the latest safe time to send the next request. |
902925
903926
### Code Intelligence
904927

pkg/cache/ttl.go

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
package cache
2+
3+
import (
4+
"sync"
5+
"time"
6+
)
7+
8+
// AnthropicCacheTTL is the Anthropic prompt cache TTL. The cache entry is
9+
// kept alive as long as requests arrive within this window; it expires if
10+
// no request references it for this duration.
11+
const AnthropicCacheTTL = 5 * time.Minute
12+
13+
// TTLEntry tracks the last-seen time and expiry state of a single cache
14+
// prefix identified by its hash.
15+
type TTLEntry struct {
16+
PrefixHash string
17+
LastSeen time.Time
18+
ExpiresAt time.Time
19+
HitCount int
20+
MissCount int
21+
// Expired is true when the entry has not been refreshed within AnthropicCacheTTL.
22+
Expired bool
23+
}
24+
25+
// IsAlive returns true when the entry is still within its TTL window.
26+
func (e *TTLEntry) IsAlive() bool {
27+
return time.Now().Before(e.ExpiresAt)
28+
}
29+
30+
// TTLTracker monitors cache prefix liveness across requests. It detects
31+
// when a prefix has gone cold (no requests within AnthropicCacheTTL) and
32+
// records the resulting cold-start penalty.
33+
//
34+
// For batch workloads, use Schedule to determine the latest safe time to
35+
// send the next request without letting the cache expire.
36+
type TTLTracker struct {
37+
mu sync.Mutex
38+
entries map[string]*TTLEntry
39+
ttl time.Duration
40+
}
41+
42+
// NewTTLTracker creates a tracker with the given TTL.
43+
// Pass 0 to use AnthropicCacheTTL (5 minutes).
44+
func NewTTLTracker(ttl time.Duration) *TTLTracker {
45+
if ttl <= 0 {
46+
ttl = AnthropicCacheTTL
47+
}
48+
return &TTLTracker{
49+
entries: make(map[string]*TTLEntry),
50+
ttl: ttl,
51+
}
52+
}
53+
54+
// Touch records a request for the given prefix hash and returns whether the
55+
// cache was alive (warm) or had expired (cold start).
56+
//
57+
// Call Touch after every request that carries a cache_control marker.
58+
func (t *TTLTracker) Touch(prefixHash string) (wasAlive bool) {
59+
t.mu.Lock()
60+
defer t.mu.Unlock()
61+
62+
now := time.Now()
63+
entry, ok := t.entries[prefixHash]
64+
if !ok {
65+
// First time seeing this prefix.
66+
t.entries[prefixHash] = &TTLEntry{
67+
PrefixHash: prefixHash,
68+
LastSeen: now,
69+
ExpiresAt: now.Add(t.ttl),
70+
MissCount: 1,
71+
}
72+
return false
73+
}
74+
75+
wasAlive = now.Before(entry.ExpiresAt)
76+
if wasAlive {
77+
entry.HitCount++
78+
} else {
79+
entry.MissCount++
80+
entry.Expired = true
81+
}
82+
entry.LastSeen = now
83+
entry.ExpiresAt = now.Add(t.ttl)
84+
return wasAlive
85+
}
86+
87+
// NextDeadline returns the time by which the next request must arrive to
88+
// keep the cache entry alive. Returns zero time if the prefix is unknown.
89+
func (t *TTLTracker) NextDeadline(prefixHash string) time.Time {
90+
t.mu.Lock()
91+
defer t.mu.Unlock()
92+
e := t.entries[prefixHash]
93+
if e == nil {
94+
return time.Time{}
95+
}
96+
return e.ExpiresAt
97+
}
98+
99+
// TimeUntilExpiry returns how long until the cache entry expires.
100+
// Returns 0 if already expired or unknown.
101+
func (t *TTLTracker) TimeUntilExpiry(prefixHash string) time.Duration {
102+
deadline := t.NextDeadline(prefixHash)
103+
if deadline.IsZero() {
104+
return 0
105+
}
106+
d := time.Until(deadline)
107+
if d < 0 {
108+
return 0
109+
}
110+
return d
111+
}
112+
113+
// ScheduleDeadline returns the latest time a batch job can wait before
114+
// sending its next request without letting the cache expire. It subtracts
115+
// a safety margin from the TTL deadline.
116+
//
117+
// safetyMargin should account for network latency and scheduling jitter.
118+
// A value of 30s is reasonable for most deployments.
119+
func (t *TTLTracker) ScheduleDeadline(prefixHash string, safetyMargin time.Duration) time.Time {
120+
deadline := t.NextDeadline(prefixHash)
121+
if deadline.IsZero() {
122+
return time.Time{}
123+
}
124+
return deadline.Add(-safetyMargin)
125+
}
126+
127+
// Entry returns a snapshot of the TTL entry for a prefix hash, or nil.
128+
func (t *TTLTracker) Entry(prefixHash string) *TTLEntry {
129+
t.mu.Lock()
130+
defer t.mu.Unlock()
131+
e := t.entries[prefixHash]
132+
if e == nil {
133+
return nil
134+
}
135+
cp := *e
136+
return &cp
137+
}
138+
139+
// ExpiredEntries returns all entries that have passed their TTL deadline.
140+
func (t *TTLTracker) ExpiredEntries() []*TTLEntry {
141+
t.mu.Lock()
142+
defer t.mu.Unlock()
143+
now := time.Now()
144+
var out []*TTLEntry
145+
for _, e := range t.entries {
146+
if now.After(e.ExpiresAt) {
147+
cp := *e
148+
out = append(out, &cp)
149+
}
150+
}
151+
return out
152+
}
153+
154+
// Evict removes the entry for a prefix hash (e.g. after a boundary retreat).
155+
func (t *TTLTracker) Evict(prefixHash string) {
156+
t.mu.Lock()
157+
defer t.mu.Unlock()
158+
delete(t.entries, prefixHash)
159+
}
160+
161+
// Stats returns a summary of all tracked prefixes.
162+
type TTLStats struct {
163+
TotalPrefixes int
164+
AlivePrefixes int
165+
ExpiredPrefixes int
166+
TotalHits int64
167+
TotalMisses int64
168+
}
169+
170+
// Stats returns aggregate TTL statistics.
171+
func (t *TTLTracker) Stats() TTLStats {
172+
t.mu.Lock()
173+
defer t.mu.Unlock()
174+
now := time.Now()
175+
var s TTLStats
176+
s.TotalPrefixes = len(t.entries)
177+
for _, e := range t.entries {
178+
if now.Before(e.ExpiresAt) {
179+
s.AlivePrefixes++
180+
} else {
181+
s.ExpiredPrefixes++
182+
}
183+
s.TotalHits += int64(e.HitCount)
184+
s.TotalMisses += int64(e.MissCount)
185+
}
186+
return s
187+
}

pkg/cache/ttl_test.go

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
package cache
2+
3+
import (
4+
"testing"
5+
"time"
6+
)
7+
8+
func TestTTLTracker_FirstTouch_ColdStart(t *testing.T) {
9+
tr := NewTTLTracker(5 * time.Minute)
10+
alive := tr.Touch("hash-abc")
11+
if alive {
12+
t.Error("expected cold start (wasAlive=false) on first touch")
13+
}
14+
e := tr.Entry("hash-abc")
15+
if e == nil {
16+
t.Fatal("expected entry after touch")
17+
}
18+
if e.MissCount != 1 {
19+
t.Errorf("expected MissCount=1, got %d", e.MissCount)
20+
}
21+
}
22+
23+
func TestTTLTracker_SecondTouch_WarmHit(t *testing.T) {
24+
tr := NewTTLTracker(5 * time.Minute)
25+
tr.Touch("hash-abc")
26+
alive := tr.Touch("hash-abc")
27+
if !alive {
28+
t.Error("expected warm hit (wasAlive=true) on second touch within TTL")
29+
}
30+
e := tr.Entry("hash-abc")
31+
if e.HitCount != 1 {
32+
t.Errorf("expected HitCount=1, got %d", e.HitCount)
33+
}
34+
}
35+
36+
func TestTTLTracker_ExpiredEntry(t *testing.T) {
37+
// Use a very short TTL to simulate expiry.
38+
tr := NewTTLTracker(1 * time.Millisecond)
39+
tr.Touch("hash-xyz")
40+
time.Sleep(5 * time.Millisecond)
41+
42+
alive := tr.Touch("hash-xyz")
43+
if alive {
44+
t.Error("expected cold start after TTL expiry")
45+
}
46+
e := tr.Entry("hash-xyz")
47+
if !e.Expired {
48+
t.Error("expected Expired=true after TTL expiry")
49+
}
50+
}
51+
52+
func TestTTLTracker_TimeUntilExpiry(t *testing.T) {
53+
tr := NewTTLTracker(5 * time.Minute)
54+
tr.Touch("hash-abc")
55+
d := tr.TimeUntilExpiry("hash-abc")
56+
if d <= 0 || d > 5*time.Minute {
57+
t.Errorf("expected 0 < expiry <= 5m, got %v", d)
58+
}
59+
}
60+
61+
func TestTTLTracker_TimeUntilExpiry_Unknown(t *testing.T) {
62+
tr := NewTTLTracker(5 * time.Minute)
63+
if tr.TimeUntilExpiry("unknown") != 0 {
64+
t.Error("expected 0 for unknown prefix")
65+
}
66+
}
67+
68+
func TestTTLTracker_ScheduleDeadline(t *testing.T) {
69+
tr := NewTTLTracker(5 * time.Minute)
70+
tr.Touch("hash-abc")
71+
margin := 30 * time.Second
72+
deadline := tr.ScheduleDeadline("hash-abc", margin)
73+
if deadline.IsZero() {
74+
t.Fatal("expected non-zero deadline")
75+
}
76+
// Deadline should be ~4m30s from now.
77+
remaining := time.Until(deadline)
78+
if remaining <= 0 || remaining > 5*time.Minute {
79+
t.Errorf("unexpected deadline remaining: %v", remaining)
80+
}
81+
}
82+
83+
func TestTTLTracker_ExpiredEntries(t *testing.T) {
84+
tr := NewTTLTracker(1 * time.Millisecond)
85+
tr.Touch("a")
86+
tr.Touch("b")
87+
time.Sleep(5 * time.Millisecond)
88+
tr.Touch("c") // fresh entry
89+
90+
expired := tr.ExpiredEntries()
91+
if len(expired) != 2 {
92+
t.Errorf("expected 2 expired entries, got %d", len(expired))
93+
}
94+
}
95+
96+
func TestTTLTracker_Evict(t *testing.T) {
97+
tr := NewTTLTracker(5 * time.Minute)
98+
tr.Touch("hash-abc")
99+
tr.Evict("hash-abc")
100+
if tr.Entry("hash-abc") != nil {
101+
t.Error("expected nil after eviction")
102+
}
103+
}
104+
105+
func TestTTLTracker_Stats(t *testing.T) {
106+
tr := NewTTLTracker(5 * time.Minute)
107+
tr.Touch("a")
108+
tr.Touch("a") // hit
109+
tr.Touch("b")
110+
111+
s := tr.Stats()
112+
if s.TotalPrefixes != 2 {
113+
t.Errorf("expected 2 prefixes, got %d", s.TotalPrefixes)
114+
}
115+
if s.AlivePrefixes != 2 {
116+
t.Errorf("expected 2 alive, got %d", s.AlivePrefixes)
117+
}
118+
if s.TotalHits != 1 {
119+
t.Errorf("expected 1 total hit, got %d", s.TotalHits)
120+
}
121+
}

0 commit comments

Comments
 (0)