Skip to content

Commit ef02c5c

Browse files
authored
Merge pull request #1388 from simonferquel/split-invariant-messages
Optimize prompt caching by splitting system messages
2 parents 8f8550d + 421d1b0 commit ef02c5c

File tree

2 files changed

+94
-17
lines changed

2 files changed

+94
-17
lines changed

pkg/session/session.go

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -393,9 +393,19 @@ func New(opts ...Opt) *Session {
393393
return s
394394
}
395395

396-
func (s *Session) GetMessages(a *agent.Agent) []chat.Message {
397-
slog.Debug("Getting messages for agent", "agent", a.Name(), "session_id", s.ID)
396+
func markLastMessageAsCacheControl(messages []chat.Message) {
397+
if len(messages) > 0 {
398+
messages[len(messages)-1].CacheControl = true
399+
}
400+
}
398401

402+
// buildInvariantSystemMessages builds system messages that are identical
403+
// for all users of a given agent configuration. These messages can be
404+
// cached efficiently as they don't change between sessions, users, or projects.
405+
//
406+
// These messages are determined solely by the agent configuration and
407+
// remain constant across different sessions, users, and working directories.
408+
func buildInvariantSystemMessages(a *agent.Agent) []chat.Message {
399409
var messages []chat.Message
400410

401411
if a.HasSubAgents() {
@@ -465,11 +475,20 @@ func (s *Session) GetMessages(a *agent.Agent) []chat.Message {
465475
}
466476
}
467477

468-
// Cache control checkpoint #1 out of 4
469-
// At the end of the system messages that are most likely to be invariant.
470-
if len(messages) > 0 {
471-
messages[len(messages)-1].CacheControl = true
472-
}
478+
markLastMessageAsCacheControl(messages)
479+
480+
return messages
481+
}
482+
483+
// buildContextSpecificSystemMessages builds system messages that vary
484+
// per user, project, or time. These messages should come after
485+
// the invariant checkpoint to maintain optimal caching behavior.
486+
//
487+
// These messages depend on runtime context (working directory, current date,
488+
// user-specific skills) and cannot be cached across sessions or users.
489+
// Note: Session summary is handled separately in buildSessionSummaryMessages.
490+
func buildContextSpecificSystemMessages(a *agent.Agent, s *Session) []chat.Message {
491+
var messages []chat.Message
473492

474493
if a.AddDate() {
475494
messages = append(messages, chat.Message{
@@ -520,6 +539,20 @@ func (s *Session) GetMessages(a *agent.Agent) []chat.Message {
520539
}
521540
}
522541

542+
// this is still useful to mark those messages as cachecontrol, so that if a user starts a second prompt for the same project, the first prompt cacheincluding the user specifics can be leveraged
543+
markLastMessageAsCacheControl(messages)
544+
545+
return messages
546+
}
547+
548+
// buildSessionSummaryMessages builds system messages containing the session summary
549+
// if one exists. Session summaries are context-specific per session and thus should not have a checkpoint (they will be cached alongside the first user message anyway)
550+
//
551+
// lastSummaryIndex is the index of the last summary item in s.Messages, or -1 if none exists.
552+
func buildSessionSummaryMessages(s *Session) ([]chat.Message, int) {
553+
var messages []chat.Message
554+
// Find the last summary index to determine where conversation messages start
555+
// and to include the summary in session summary messages
523556
lastSummaryIndex := -1
524557
for i := len(s.Messages) - 1; i >= 0; i-- {
525558
if s.Messages[i].Summary != "" {
@@ -528,24 +561,35 @@ func (s *Session) GetMessages(a *agent.Agent) []chat.Message {
528561
}
529562
}
530563

531-
if lastSummaryIndex != -1 {
564+
if lastSummaryIndex >= 0 && lastSummaryIndex < len(s.Messages) {
532565
messages = append(messages, chat.Message{
533566
Role: chat.MessageRoleSystem,
534567
Content: "Session Summary: " + s.Messages[lastSummaryIndex].Summary,
535568
CreatedAt: time.Now().Format(time.RFC3339),
536569
})
537570
}
538571

539-
startIndex := lastSummaryIndex + 1
540-
if lastSummaryIndex == -1 {
541-
startIndex = 0
542-
}
572+
return messages, lastSummaryIndex
573+
}
543574

544-
// Cache control checkpoint #2 out of 4
545-
// At the end of all the system messages.
546-
if len(messages) > 0 {
547-
messages[len(messages)-1].CacheControl = true
548-
}
575+
func (s *Session) GetMessages(a *agent.Agent) []chat.Message {
576+
slog.Debug("Getting messages for agent", "agent", a.Name(), "session_id", s.ID)
577+
578+
var messages []chat.Message
579+
580+
// Build invariant system messages (cacheable across sessions/users/projects)
581+
invariantMessages := buildInvariantSystemMessages(a)
582+
messages = append(messages, invariantMessages...)
583+
584+
// Build context-specific system messages (vary per user/project/time)
585+
contextMessages := buildContextSpecificSystemMessages(a, s)
586+
messages = append(messages, contextMessages...)
587+
588+
// Build session summary messages (vary per session)
589+
summaryMessages, lastSummaryIndex := buildSessionSummaryMessages(s)
590+
messages = append(messages, summaryMessages...)
591+
592+
startIndex := lastSummaryIndex + 1
549593

550594
// Begin adding conversation messages
551595
for i := startIndex; i < len(s.Messages); i++ {

pkg/session/session_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,36 @@ func TestGetMessages_CacheControl(t *testing.T) {
184184
assert.Contains(t, messages[1].Content, "Using the Todo Tools")
185185
assert.True(t, messages[1].CacheControl)
186186
}
187+
188+
func TestGetMessages_CacheControlWithSummary(t *testing.T) {
189+
// Create agent with invariant, context-specific, and session summary
190+
testAgent := agent.New("root", "instructions",
191+
agent.WithToolSets(&builtin.TodoTool{}),
192+
agent.WithAddDate(true),
193+
)
194+
195+
s := New()
196+
s.Messages = append(s.Messages, Item{Summary: "Test summary"})
197+
messages := s.GetMessages(testAgent)
198+
199+
// Should have: instructions, toolset instructions, date, summary
200+
// Checkpoint #1: last invariant message (toolset instructions)
201+
// Checkpoint #2: last context-specific message (date)
202+
// Checkpoint #3: last system message (summary)
203+
204+
var checkpointIndices []int
205+
for i, msg := range messages {
206+
if msg.Role == chat.MessageRoleSystem && msg.CacheControl {
207+
checkpointIndices = append(checkpointIndices, i)
208+
}
209+
}
210+
211+
// Verify we have 2 checkpoints
212+
assert.Len(t, checkpointIndices, 2, "should have 2 checkpoints")
213+
214+
// Verify checkpoint #1 is on toolset instructions
215+
assert.Contains(t, messages[checkpointIndices[0]].Content, "Using the Todo Tools", "checkpoint #1 should be on toolset instructions")
216+
217+
// Verify checkpoint #2 is on date
218+
assert.Contains(t, messages[checkpointIndices[1]].Content, "Today's date", "checkpoint #2 should be on date message")
219+
}

0 commit comments

Comments
 (0)