Skip to content

Commit 64d1934

Browse files
authored
Merge pull request #520 from stakpak/fix/lazy-context-trimming
Add lazy context trimming
2 parents f8d68fd + 49ae15f commit 64d1934

15 files changed

Lines changed: 3387 additions & 60 deletions

File tree

AGENTS.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Vec<ChatMessage> # OpenAI-shaped messages (cli/mode_interacti
7373
ContextManager::reduce_context() # History reduction (libs/api/context_managers/)
7474
↓ merge_consecutive_same_role() # Merge tool messages
7575
↓ dedup_tool_results() # Deduplicate within merged messages
76+
↓ reduce_context_with_budget() # Budget-aware trimming (if over threshold)
7677
7778
Vec<LLMMessage> # Provider-neutral messages
7879
@@ -177,6 +178,22 @@ The codebase uses **three layers** to prevent invalid message sequences:
177178
2. **Pre-API sanitization** (`sanitize_tool_results`): Dedup and remove orphans from `Vec<ChatMessage>` before every API call
178179
3. **Context manager** (`task_board_context_manager.rs`): Merge consecutive same-role messages and dedup tool_results in the `reduce_context()` pipeline
179180

181+
### Context Trimming with Cache Preservation
182+
183+
Long sessions accumulate messages that approach the context window limit. The `TaskBoardContextManager` implements budget-aware trimming:
184+
185+
1. **Lazy trimming**: Only triggers when estimated tokens exceed `context_window × threshold` (default 80%)
186+
2. **Stable prefix**: Trimmed messages are replaced with `[trimmed]` placeholders, preserving message structure (roles, tool_call_ids) for API validity
187+
3. **Cache-friendly**: The trimmed prefix produces identical output across turns, so Anthropic's prompt cache stays valid
188+
4. **Metadata persistence**: Trimming state (`trimmed_up_to_message_index`) is stored in `CheckpointState.metadata` and flows through:
189+
- `CheckpointState.metadata``AgentState.metadata` → Hook updates → `save_checkpoint()` → persisted
190+
191+
Key files:
192+
- `libs/api/src/local/context_managers/task_board_context_manager.rs``reduce_context_with_budget()`, `estimate_tokens()`, `trim_message()`
193+
- `libs/api/src/local/hooks/task_board_context/mod.rs` — Wires budget-aware trimming into the hook lifecycle
194+
- `libs/api/src/storage.rs``CheckpointState.metadata` field
195+
- `libs/api/src/models.rs``AgentState.metadata` field
196+
180197
## Build & Test
181198

182199
```bash

cli/src/commands/acp/server.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1844,7 +1844,14 @@ impl acp::Agent for StakpakAcpAgent {
18441844
let model = self.model.read().await.clone();
18451845
let session_id = self.current_session_id.get();
18461846
let (stream, _request_id) = client
1847-
.chat_completion_stream(model, messages, tools_option.clone(), None, session_id)
1847+
.chat_completion_stream(
1848+
model,
1849+
messages,
1850+
tools_option.clone(),
1851+
None,
1852+
session_id,
1853+
None,
1854+
)
18481855
.await
18491856
.map_err(|e| {
18501857
log::error!("Chat completion stream failed: {e}");
@@ -2032,6 +2039,7 @@ impl acp::Agent for StakpakAcpAgent {
20322039
tools_option.clone(),
20332040
None,
20342041
session_id,
2042+
None,
20352043
)
20362044
.await
20372045
.map_err(|e| {

cli/src/commands/agent/run/checkpoint.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use uuid::Uuid;
1111
pub async fn get_checkpoint_messages(
1212
client: &dyn AgentProvider,
1313
checkpoint_id: &str,
14-
) -> Result<Vec<ChatMessage>, String> {
14+
) -> Result<(Vec<ChatMessage>, Option<serde_json::Value>), String> {
1515
let checkpoint_uuid = Uuid::parse_str(checkpoint_id).map_err(|_| {
1616
format!(
1717
"Invalid checkpoint ID '{}' - must be a valid UUID",
@@ -24,7 +24,7 @@ pub async fn get_checkpoint_messages(
2424
.await
2525
.map_err(|e| e.to_string())?;
2626

27-
Ok(checkpoint.state.messages)
27+
Ok((checkpoint.state.messages, checkpoint.state.metadata))
2828
}
2929

3030
pub async fn extract_checkpoint_messages_and_tool_calls(
@@ -178,19 +178,28 @@ pub async fn resume_session_from_checkpoint(
178178
client: &dyn AgentProvider,
179179
session_id: &str,
180180
input_tx: &tokio::sync::mpsc::Sender<InputEvent>,
181-
) -> Result<(Vec<ChatMessage>, Vec<ToolCall>, Uuid), String> {
181+
) -> Result<
182+
(
183+
Vec<ChatMessage>,
184+
Vec<ToolCall>,
185+
Uuid,
186+
Option<serde_json::Value>,
187+
),
188+
String,
189+
> {
182190
let session_uuid = Uuid::parse_str(session_id).map_err(|e| e.to_string())?;
183191

184192
match client.get_active_checkpoint(session_uuid).await {
185193
Ok(checkpoint) => {
194+
let metadata = checkpoint.state.metadata.clone();
186195
let (chat_messages, tool_calls) = extract_checkpoint_messages_and_tool_calls(
187196
&checkpoint.id.to_string(),
188197
input_tx,
189198
checkpoint.state.messages,
190199
)
191200
.await?;
192201

193-
Ok((chat_messages, tool_calls, checkpoint.session_id))
202+
Ok((chat_messages, tool_calls, checkpoint.session_id, metadata))
194203
}
195204
Err(e) => {
196205
send_input_event(

cli/src/commands/agent/run/mode_async.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<AsyncOu
235235

236236
let mut current_session_id: Option<Uuid> = None;
237237
let mut current_checkpoint_id: Option<Uuid> = None;
238+
let mut current_metadata: Option<serde_json::Value> = None;
238239
let mut prior_steps: usize = 0;
239240

240241
// Load checkpoint/session messages if provided
@@ -250,6 +251,7 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<AsyncOu
250251

251252
current_session_id = Some(checkpoint.session_id);
252253
current_checkpoint_id = Some(checkpoint.id);
254+
current_metadata = checkpoint.state.metadata;
253255
chat_messages.extend(checkpoint.state.messages);
254256

255257
llm_response_time += checkpoint_start.elapsed();
@@ -269,6 +271,7 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<AsyncOu
269271
Ok(checkpoint) => {
270272
current_session_id = Some(checkpoint.session_id);
271273
current_checkpoint_id = Some(checkpoint_uuid);
274+
current_metadata = checkpoint.state.metadata;
272275
prior_steps = checkpoint
273276
.state
274277
.messages
@@ -452,6 +455,7 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<AsyncOu
452455
chat_messages.clone(),
453456
Some(tools.clone()),
454457
current_session_id,
458+
current_metadata.clone(),
455459
)
456460
.await
457461
.map_err(|e| e.to_string())?;

cli/src/commands/agent/run/mode_interactive.rs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ pub async fn run_interactive(
238238
let ctx_clone = ctx.clone(); // Clone ctx for use in client task
239239
let client_handle: tokio::task::JoinHandle<ClientTaskResult> = tokio::spawn(async move {
240240
let mut current_session_id: Option<Uuid> = None;
241+
let mut current_metadata: Option<serde_json::Value> = None;
241242

242243
// Build unified AgentClient config
243244
let providers = ctx_clone.get_llm_provider_config();
@@ -324,11 +325,12 @@ pub async fn run_interactive(
324325
}
325326

326327
if let Some(session_id_str) = session_id {
327-
let (chat_messages, tool_calls, session_id_uuid) =
328+
let (chat_messages, tool_calls, session_id_uuid, checkpoint_metadata) =
328329
resume_session_from_checkpoint(client.as_ref(), &session_id_str, &input_tx)
329330
.await?;
330331

331332
current_session_id = Some(session_id_uuid);
333+
current_metadata = checkpoint_metadata;
332334
should_update_rulebooks_on_next_message = true;
333335
tools_queue.extend(tool_calls.clone());
334336

@@ -354,7 +356,7 @@ pub async fn run_interactive(
354356
current_session_id = Some(checkpoint.session_id);
355357
}
356358

357-
let checkpoint_messages =
359+
let (checkpoint_messages, _checkpoint_metadata) =
358360
get_checkpoint_messages(client.as_ref(), &checkpoint_id_str).await?;
359361

360362
let (chat_messages, tool_calls) = extract_checkpoint_messages_and_tool_calls(
@@ -688,9 +690,15 @@ pub async fn run_interactive(
688690
)
689691
.await
690692
{
691-
Ok((chat_messages, tool_calls, session_id_uuid)) => {
693+
Ok((
694+
chat_messages,
695+
tool_calls,
696+
session_id_uuid,
697+
checkpoint_metadata,
698+
)) => {
692699
// Track the current session ID
693700
current_session_id = Some(session_id_uuid);
701+
current_metadata = checkpoint_metadata;
694702

695703
// Mark that we need to update rulebooks on the next user message
696704
should_update_rulebooks_on_next_message = true;
@@ -757,9 +765,15 @@ pub async fn run_interactive(
757765
)
758766
.await
759767
{
760-
Ok((chat_messages, tool_calls, session_id_uuid)) => {
768+
Ok((
769+
chat_messages,
770+
tool_calls,
771+
session_id_uuid,
772+
checkpoint_metadata,
773+
)) => {
761774
// Track the current session ID
762775
current_session_id = Some(session_id_uuid);
776+
current_metadata = checkpoint_metadata;
763777

764778
// Mark that we need to update rulebooks on the next user message
765779
should_update_rulebooks_on_next_message = true;
@@ -992,6 +1006,7 @@ pub async fn run_interactive(
9921006
Some(tools.clone()),
9931007
headers.clone(),
9941008
current_session_id,
1009+
current_metadata.clone(),
9951010
)
9961011
.await;
9971012

libs/api/src/client/mod.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,8 @@ impl AgentClient {
230230
hook_registry.register(
231231
LifecycleEvent::BeforeInference,
232232
Box::new(TaskBoardContextHook::new(TaskBoardContextHookOptions {
233-
history_action_message_size_limit: Some(100),
234-
history_action_message_keep_last_n: Some(50),
235-
history_action_result_keep_last_n: Some(50),
233+
keep_last_n_assistant_messages: Some(10),
234+
context_budget_threshold: Some(0.8), // defaults to 0.8 (80%)
236235
})),
237236
);
238237
let hook_registry = Arc::new(hook_registry);

libs/api/src/client/provider.rs

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,10 @@ impl AgentProvider for AgentClient {
181181
messages: Vec<ChatMessage>,
182182
tools: Option<Vec<Tool>>,
183183
session_id: Option<Uuid>,
184+
metadata: Option<serde_json::Value>,
184185
) -> Result<ChatCompletionResponse, String> {
185186
let mut ctx = HookContext::new(session_id, AgentState::new(model, messages, tools));
187+
ctx.state.metadata = metadata;
186188

187189
// Execute before request hooks
188190
self.hook_registry
@@ -201,7 +203,11 @@ impl AgentProvider for AgentClient {
201203

202204
// Save checkpoint
203205
let result = self
204-
.save_checkpoint(&current_session, ctx.state.messages.clone())
206+
.save_checkpoint(
207+
&current_session,
208+
ctx.state.messages.clone(),
209+
ctx.state.metadata.clone(),
210+
)
205211
.await?;
206212
let checkpoint_created_at = result.checkpoint_created_at.timestamp() as u64;
207213
ctx.set_new_checkpoint_id(result.checkpoint_id);
@@ -265,6 +271,7 @@ impl AgentProvider for AgentClient {
265271
tools: Option<Vec<Tool>>,
266272
_headers: Option<HeaderMap>,
267273
session_id: Option<Uuid>,
274+
metadata: Option<serde_json::Value>,
268275
) -> Result<
269276
(
270277
Pin<
@@ -275,6 +282,7 @@ impl AgentProvider for AgentClient {
275282
String,
276283
> {
277284
let mut ctx = HookContext::new(session_id, AgentState::new(model, messages, tools));
285+
ctx.state.metadata = metadata;
278286

279287
// Execute before request hooks
280288
self.hook_registry
@@ -332,7 +340,11 @@ impl AgentProvider for AgentClient {
332340
}
333341

334342
let result = client
335-
.save_checkpoint(&current_session, ctx_clone.state.messages.clone())
343+
.save_checkpoint(
344+
&current_session,
345+
ctx_clone.state.messages.clone(),
346+
ctx_clone.state.metadata.clone(),
347+
)
336348
.await;
337349

338350
match result {
@@ -784,10 +796,15 @@ impl AgentClient {
784796
&self,
785797
current: &SessionInfo,
786798
messages: Vec<ChatMessage>,
799+
metadata: Option<serde_json::Value>,
787800
) -> Result<SessionInfo, String> {
788-
let checkpoint_request =
801+
let mut checkpoint_request =
789802
StorageCreateCheckpointRequest::new(messages).with_parent(current.checkpoint_id);
790803

804+
if let Some(meta) = metadata {
805+
checkpoint_request = checkpoint_request.with_metadata(meta);
806+
}
807+
791808
let checkpoint = self
792809
.session_storage
793810
.create_checkpoint(current.session_id, &checkpoint_request)

libs/api/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ pub trait AgentProvider: SessionStorage + Send + Sync {
147147
messages: Vec<ChatMessage>,
148148
tools: Option<Vec<Tool>>,
149149
session_id: Option<Uuid>,
150+
metadata: Option<serde_json::Value>,
150151
) -> Result<ChatCompletionResponse, String>;
151152
async fn chat_completion_stream(
152153
&self,
@@ -155,6 +156,7 @@ pub trait AgentProvider: SessionStorage + Send + Sync {
155156
tools: Option<Vec<Tool>>,
156157
headers: Option<HeaderMap>,
157158
session_id: Option<Uuid>,
159+
metadata: Option<serde_json::Value>,
158160
) -> Result<
159161
(
160162
std::pin::Pin<

0 commit comments

Comments
 (0)