Skip to content

Commit 7ca5e8a

Browse files
feat(metrics): per-call-site cache hit rate tracking (#60)
* feat(metrics): per-call-site cache hit rate tracking (#47) Add CallSiteTracker to pkg/metrics. Records Anthropic API usage per call site and exposes HitRate, WriteEfficiency, RequestHitRate. - Record(callSite, UsageRecord): accumulates token counters and request counts; marks CacheHitRequests when cache_read_input_tokens > 0 - Stats(callSite): snapshot for a single call site - AllStats(): all call sites sorted by hit rate ascending (worst first) - Reset / ResetAll for test isolation - Summary(): human-readable table of call site, hit%, efficiency, reqs Thread-safe via sync.RWMutex. Co-authored-by: Ona <no-reply@ona.com> * docs: document per-call-site hit rate tracking in README (#47) Co-authored-by: Ona <no-reply@ona.com> --------- Co-authored-by: Ona <no-reply@ona.com>
1 parent 0917f08 commit 7ca5e8a

3 files changed

Lines changed: 309 additions & 0 deletions

File tree

README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,30 @@ Record Anthropic API usage with `metrics.RecordCacheUsage(UsageRecord{...})` aft
583583
| `distill_cache_hit_rate` | Gauge | Rolling hit rate: `cache_read / (cache_read + cache_creation + input)` |
584584
| `distill_cache_write_efficiency` | Gauge | Reads/writes ratio — values < 1.0 mean cache writes that expire before being read |
585585

586+
**Per-call-site hit rate tracking**
587+
588+
`CallSiteTracker` records Anthropic API usage per call site and surfaces the worst performers first:
589+
590+
```go
591+
tracker := metrics.NewCallSiteTracker()
592+
593+
// After each Anthropic API call:
594+
tracker.Record("agent/planner.go:84", metrics.UsageRecord{
595+
CacheCreationInputTokens: resp.Usage.CacheCreationInputTokens,
596+
CacheReadInputTokens: resp.Usage.CacheReadInputTokens,
597+
InputTokens: resp.Usage.InputTokens,
598+
})
599+
600+
// Inspect
601+
s := tracker.Stats("agent/planner.go:84")
602+
fmt.Printf("hit rate: %.0f%% efficiency: %.1fx\n", s.HitRate()*100, s.WriteEfficiency())
603+
604+
// All call sites, worst hit rate first
605+
for _, s := range tracker.AllStats() {
606+
fmt.Printf("%-40s %.0f%%\n", s.CallSite, s.HitRate()*100)
607+
}
608+
```
609+
586610
**Cache boundary metrics** (populated by the session boundary manager)
587611

588612
| Metric | Type | Description |
@@ -874,6 +898,7 @@ Distill is evolving from a dedup utility into a context intelligence layer. Here
874898
| **Memory decay lifecycle events** | [#54](https://github.com/Siddhant-K-code/distill/issues/54) | Shipped | `DecayWorker` emits `EventCompressed` and `EventEvicted` on each transition. `RecallResult` includes a `CacheBoundaryHint` for high-relevance entries. |
875899
| **Cache-aware dedup** | [#50](https://github.com/Siddhant-K-code/distill/issues/50) | Shipped | `preserve_cache_prefix` option freezes chunks before the last `cache_control` marker so dedup cannot reorder them. Prefix hash and token count reported in stats. |
876900
| **Prefix stability validator** | [#48](https://github.com/Siddhant-K-code/distill/issues/48) | Shipped | `StabilityValidator` tracks prefix hashes across requests and detects dynamic content (timestamps, request IDs, UUIDs) bleeding into cached prefixes. |
901+
| **Per-call-site hit rate tracking** | [#47](https://github.com/Siddhant-K-code/distill/issues/47) | Shipped | `CallSiteTracker` records Anthropic cache usage per call site; `AllStats()` returns worst performers first. |
877902
878903
### Code Intelligence
879904

pkg/metrics/callsite.go

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
package metrics
2+
3+
import (
4+
"fmt"
5+
"sync"
6+
"time"
7+
)
8+
9+
// CallSiteRecord holds cumulative cache usage for a single call site.
10+
type CallSiteRecord struct {
11+
CallSite string
12+
13+
// Token counters from Anthropic API usage blocks.
14+
CacheCreationTokens int64
15+
CacheReadTokens int64
16+
UncachedInputTokens int64
17+
OutputTokens int64
18+
19+
// Request counts.
20+
TotalRequests int64
21+
// CacheHitRequests is the number of requests where cache_read_input_tokens > 0.
22+
CacheHitRequests int64
23+
24+
FirstSeen time.Time
25+
LastSeen time.Time
26+
}
27+
28+
// HitRate returns cache_read / (cache_read + cache_creation + uncached_input).
29+
// Returns 0 when no tokens have been recorded.
30+
func (r *CallSiteRecord) HitRate() float64 {
31+
total := r.CacheReadTokens + r.CacheCreationTokens + r.UncachedInputTokens
32+
if total == 0 {
33+
return 0
34+
}
35+
return float64(r.CacheReadTokens) / float64(total)
36+
}
37+
38+
// WriteEfficiency returns cache_read / cache_creation.
39+
// Returns 0 when no cache writes have been recorded.
40+
func (r *CallSiteRecord) WriteEfficiency() float64 {
41+
if r.CacheCreationTokens == 0 {
42+
return 0
43+
}
44+
return float64(r.CacheReadTokens) / float64(r.CacheCreationTokens)
45+
}
46+
47+
// RequestHitRate returns the fraction of requests that had at least one
48+
// cache read token (i.e. the cache was warm for that request).
49+
func (r *CallSiteRecord) RequestHitRate() float64 {
50+
if r.TotalRequests == 0 {
51+
return 0
52+
}
53+
return float64(r.CacheHitRequests) / float64(r.TotalRequests)
54+
}
55+
56+
// CallSiteTracker records per-call-site Anthropic cache usage and exposes
57+
// aggregated hit rate statistics. It is safe for concurrent use.
58+
//
59+
// Typical usage:
60+
//
61+
// tracker := metrics.NewCallSiteTracker()
62+
// // after each Anthropic API call:
63+
// tracker.Record("agent/planner.go:84", metrics.UsageRecord{
64+
// CacheCreationInputTokens: resp.Usage.CacheCreationInputTokens,
65+
// CacheReadInputTokens: resp.Usage.CacheReadInputTokens,
66+
// InputTokens: resp.Usage.InputTokens,
67+
// OutputTokens: resp.Usage.OutputTokens,
68+
// })
69+
// stats := tracker.Stats("agent/planner.go:84")
70+
// fmt.Printf("hit rate: %.0f%%\n", stats.HitRate()*100)
71+
type CallSiteTracker struct {
72+
mu sync.RWMutex
73+
records map[string]*CallSiteRecord
74+
}
75+
76+
// NewCallSiteTracker creates a new tracker.
77+
func NewCallSiteTracker() *CallSiteTracker {
78+
return &CallSiteTracker{
79+
records: make(map[string]*CallSiteRecord),
80+
}
81+
}
82+
83+
// Record adds a usage observation for callSite.
84+
func (t *CallSiteTracker) Record(callSite string, u UsageRecord) {
85+
t.mu.Lock()
86+
defer t.mu.Unlock()
87+
88+
rec, ok := t.records[callSite]
89+
if !ok {
90+
rec = &CallSiteRecord{
91+
CallSite: callSite,
92+
FirstSeen: time.Now(),
93+
}
94+
t.records[callSite] = rec
95+
}
96+
97+
rec.CacheCreationTokens += int64(u.CacheCreationInputTokens)
98+
rec.CacheReadTokens += int64(u.CacheReadInputTokens)
99+
rec.UncachedInputTokens += int64(u.InputTokens)
100+
rec.OutputTokens += int64(u.OutputTokens)
101+
rec.TotalRequests++
102+
if u.CacheReadInputTokens > 0 {
103+
rec.CacheHitRequests++
104+
}
105+
rec.LastSeen = time.Now()
106+
}
107+
108+
// Stats returns a snapshot of the record for callSite, or nil if not found.
109+
func (t *CallSiteTracker) Stats(callSite string) *CallSiteRecord {
110+
t.mu.RLock()
111+
defer t.mu.RUnlock()
112+
r := t.records[callSite]
113+
if r == nil {
114+
return nil
115+
}
116+
cp := *r
117+
return &cp
118+
}
119+
120+
// AllStats returns snapshots of all recorded call sites, sorted by hit rate
121+
// ascending (worst performers first).
122+
func (t *CallSiteTracker) AllStats() []*CallSiteRecord {
123+
t.mu.RLock()
124+
defer t.mu.RUnlock()
125+
126+
out := make([]*CallSiteRecord, 0, len(t.records))
127+
for _, r := range t.records {
128+
cp := *r
129+
out = append(out, &cp)
130+
}
131+
132+
// Sort worst hit rate first so callers can surface actionable items.
133+
for i := 1; i < len(out); i++ {
134+
for j := i; j > 0 && out[j].HitRate() < out[j-1].HitRate(); j-- {
135+
out[j], out[j-1] = out[j-1], out[j]
136+
}
137+
}
138+
return out
139+
}
140+
141+
// Reset clears all observations for callSite.
142+
func (t *CallSiteTracker) Reset(callSite string) {
143+
t.mu.Lock()
144+
defer t.mu.Unlock()
145+
delete(t.records, callSite)
146+
}
147+
148+
// ResetAll clears all observations.
149+
func (t *CallSiteTracker) ResetAll() {
150+
t.mu.Lock()
151+
defer t.mu.Unlock()
152+
t.records = make(map[string]*CallSiteRecord)
153+
}
154+
155+
// Summary returns a human-readable summary of all call sites.
156+
func (t *CallSiteTracker) Summary() string {
157+
stats := t.AllStats()
158+
if len(stats) == 0 {
159+
return "no call sites recorded"
160+
}
161+
out := fmt.Sprintf("%-40s %8s %8s %8s\n", "call site", "hit%", "eff", "reqs")
162+
for _, s := range stats {
163+
out += fmt.Sprintf("%-40s %7.0f%% %7.1fx %8d\n",
164+
s.CallSite,
165+
s.HitRate()*100,
166+
s.WriteEfficiency(),
167+
s.TotalRequests,
168+
)
169+
}
170+
return out
171+
}

pkg/metrics/callsite_test.go

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package metrics
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestCallSiteTracker_HitRate(t *testing.T) {
8+
tr := NewCallSiteTracker()
9+
10+
// First request: cache write (no reads yet).
11+
tr.Record("agent.go:42", UsageRecord{
12+
CacheCreationInputTokens: 8000,
13+
InputTokens: 200,
14+
})
15+
16+
s := tr.Stats("agent.go:42")
17+
if s == nil {
18+
t.Fatal("expected stats, got nil")
19+
}
20+
if s.HitRate() != 0 {
21+
t.Errorf("expected 0 hit rate after write-only request, got %f", s.HitRate())
22+
}
23+
if s.TotalRequests != 1 {
24+
t.Errorf("expected 1 request, got %d", s.TotalRequests)
25+
}
26+
27+
// Second request: cache read.
28+
tr.Record("agent.go:42", UsageRecord{
29+
CacheReadInputTokens: 8000,
30+
})
31+
32+
s = tr.Stats("agent.go:42")
33+
// hit rate = 8000 / (8000 + 8000 + 200) = ~0.49
34+
if s.HitRate() <= 0 {
35+
t.Errorf("expected positive hit rate after cache read, got %f", s.HitRate())
36+
}
37+
if s.CacheHitRequests != 1 {
38+
t.Errorf("expected 1 cache hit request, got %d", s.CacheHitRequests)
39+
}
40+
if s.RequestHitRate() != 0.5 {
41+
t.Errorf("expected 0.5 request hit rate, got %f", s.RequestHitRate())
42+
}
43+
}
44+
45+
func TestCallSiteTracker_WriteEfficiency(t *testing.T) {
46+
tr := NewCallSiteTracker()
47+
48+
tr.Record("planner.go:84", UsageRecord{CacheCreationInputTokens: 4000})
49+
tr.Record("planner.go:84", UsageRecord{CacheReadInputTokens: 4000})
50+
tr.Record("planner.go:84", UsageRecord{CacheReadInputTokens: 4000})
51+
52+
s := tr.Stats("planner.go:84")
53+
// efficiency = 8000 / 4000 = 2.0
54+
if s.WriteEfficiency() != 2.0 {
55+
t.Errorf("expected write efficiency 2.0, got %f", s.WriteEfficiency())
56+
}
57+
}
58+
59+
func TestCallSiteTracker_AllStats_SortedByHitRate(t *testing.T) {
60+
tr := NewCallSiteTracker()
61+
62+
// good: 100% hit rate
63+
tr.Record("good.go:1", UsageRecord{CacheReadInputTokens: 1000})
64+
// bad: 0% hit rate
65+
tr.Record("bad.go:1", UsageRecord{CacheCreationInputTokens: 1000})
66+
67+
all := tr.AllStats()
68+
if len(all) != 2 {
69+
t.Fatalf("expected 2 records, got %d", len(all))
70+
}
71+
// Worst first.
72+
if all[0].CallSite != "bad.go:1" {
73+
t.Errorf("expected bad.go:1 first (worst hit rate), got %s", all[0].CallSite)
74+
}
75+
}
76+
77+
func TestCallSiteTracker_Reset(t *testing.T) {
78+
tr := NewCallSiteTracker()
79+
tr.Record("x.go:1", UsageRecord{InputTokens: 100})
80+
tr.Reset("x.go:1")
81+
if tr.Stats("x.go:1") != nil {
82+
t.Error("expected nil after reset")
83+
}
84+
}
85+
86+
func TestCallSiteTracker_ResetAll(t *testing.T) {
87+
tr := NewCallSiteTracker()
88+
tr.Record("a.go:1", UsageRecord{InputTokens: 100})
89+
tr.Record("b.go:1", UsageRecord{InputTokens: 100})
90+
tr.ResetAll()
91+
if len(tr.AllStats()) != 0 {
92+
t.Error("expected empty after ResetAll")
93+
}
94+
}
95+
96+
func TestCallSiteTracker_Summary(t *testing.T) {
97+
tr := NewCallSiteTracker()
98+
tr.Record("agent.go:42", UsageRecord{
99+
CacheCreationInputTokens: 4000,
100+
CacheReadInputTokens: 4000,
101+
})
102+
s := tr.Summary()
103+
if s == "" || s == "no call sites recorded" {
104+
t.Error("expected non-empty summary")
105+
}
106+
}
107+
108+
func TestCallSiteTracker_NilStats(t *testing.T) {
109+
tr := NewCallSiteTracker()
110+
if tr.Stats("nonexistent.go:1") != nil {
111+
t.Error("expected nil for unknown call site")
112+
}
113+
}

0 commit comments

Comments
 (0)