Skip to content

Commit 8f94e84

Browse files
committed
feat(knowledge_review): add background skill review matching Hermes
Two separate triggers for background review, matching Hermes: - Memory review: fires after 5+ user turns (unchanged) - Skill review: fires after 10+ tool iterations in a single reply (complex work that required many tool calls = likely worth capturing) When both trigger at once, sends a combined prompt that asks for both memory AND skill extraction in one call (avoids double API cost). Skill review prompt asks specifically: 'Was a non-trivial approach used that required trial and error, or changing course, or did the user expect a different method?' — focused on learnable struggle, not routine. Review agent gets both memory + skill tools (create_skill, patch_skill, load_skill) so it can create new skills or update existing ones. Signed-off-by: Michael Neale <michael.neale@gmail.com>
1 parent 29d54bc commit 8f94e84

2 files changed

Lines changed: 106 additions & 41 deletions

File tree

crates/goose/src/agents/agent.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1806,15 +1806,30 @@ impl Agent {
18061806
tracing::info!(target: "goose::agents::agent", trace_output = last_assistant_text.as_str());
18071807
}
18081808

1809-
// Spawn background memory review if enough turns have passed
1810-
if turns_taken >= super::knowledge_review::DEFAULT_MEMORY_REVIEW_INTERVAL {
1809+
// Spawn background knowledge review if triggers are met
1810+
// Memory review: fires after N user turns
1811+
// Skill review: fires after N tool iterations (complex work)
1812+
let should_review_memory =
1813+
turns_taken >= super::knowledge_review::DEFAULT_MEMORY_REVIEW_INTERVAL;
1814+
1815+
let final_tool_count = conversation.messages().iter()
1816+
.flat_map(|m| m.content.iter())
1817+
.filter(|c| matches!(c, MessageContent::ToolRequest(_)))
1818+
.count()
1819+
.saturating_sub(pre_turn_tool_count);
1820+
let should_review_skills =
1821+
final_tool_count as u32 >= super::knowledge_review::DEFAULT_SKILL_REVIEW_ITERATIONS;
1822+
1823+
if should_review_memory || should_review_skills {
18111824
if let Ok(provider) = self.provider().await {
18121825
super::knowledge_review::spawn_background_review(
18131826
provider,
18141827
Arc::clone(&self.extension_manager),
18151828
conversation.clone(),
18161829
session_config.id.clone(),
18171830
working_dir.clone(),
1831+
should_review_memory,
1832+
should_review_skills,
18181833
);
18191834
}
18201835
}

crates/goose/src/agents/knowledge_review.rs

Lines changed: 89 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
//! Background knowledge review and pre-compression memory flush.
22
//!
3-
//! Two mechanisms for autonomous memory extraction:
3+
//! Three mechanisms for autonomous knowledge extraction:
44
//!
5-
//! 1. **Background review**: After a reply is delivered, if enough user turns
6-
//! have elapsed, spawn a background task that reviews the conversation
7-
//! and calls memory tools to extract durable facts.
5+
//! 1. **Background memory review**: After a reply, if enough user turns have
6+
//! elapsed, review the conversation for memory-worthy facts.
87
//!
9-
//! 2. **Pre-compression flush**: Before context compaction, give the model
10-
//! one cheap API call with only memory tools to save anything worth
11-
//! keeping before it gets summarized away.
8+
//! 2. **Background skill review**: After a reply, if enough tool iterations
9+
//! occurred (complex work), review whether a skill should be created/updated.
10+
//!
11+
//! 3. **Pre-compression flush**: Before context compaction, give the model
12+
//! one cheap API call with only memory tools to save anything worth keeping.
1213
1314
use std::sync::Arc;
1415

@@ -24,10 +25,12 @@ use tracing::{debug, info, warn};
2425
/// Default interval: review memory every N user turns.
2526
pub const DEFAULT_MEMORY_REVIEW_INTERVAL: u32 = 5;
2627

28+
/// Default interval: review skills every N tool iterations in a single reply.
29+
pub const DEFAULT_SKILL_REVIEW_ITERATIONS: u32 = 10;
30+
2731
/// Maximum tool calls the review agent can make per review.
2832
const MAX_REVIEW_TOOL_CALLS: usize = 8;
2933

30-
/// The prompt injected for background memory review.
3134
const MEMORY_REVIEW_PROMPT: &str = r#"Review the conversation above and extract any durable facts worth saving to persistent memory. Focus on:
3235
3336
1. User identity/preferences: name, role, timezone, coding style, communication preferences, pet peeves
@@ -49,10 +52,26 @@ Rules:
4952
5053
Make your memory tool calls now."#;
5154

52-
/// The prompt injected for pre-compression flush.
55+
const SKILL_REVIEW_PROMPT: &str = r#"Review the conversation above and consider saving or updating a skill if appropriate.
56+
57+
Focus on: was a non-trivial approach used to complete a task that required trial and error, or changing course due to experiential findings along the way, or did the user expect or desire a different method or outcome?
58+
59+
If a relevant skill already exists, update it with what you learned.
60+
Otherwise, create a new skill if the approach is reusable.
61+
If nothing is worth saving, just say "Nothing to save." and stop."#;
62+
63+
const COMBINED_REVIEW_PROMPT: &str = r#"Review the conversation above and consider two things:
64+
65+
**Memory**: Has the user revealed things about themselves — their persona, desires, preferences, or personal details? Has the user expressed expectations about how you should behave, their work style, or ways they want you to operate? If so, save using the memory tool.
66+
67+
**Skills**: Was a non-trivial approach used to complete a task that required trial and error, or changing course due to experiential findings along the way, or did the user expect or desire a different method or outcome? If a relevant skill already exists, update it. Otherwise, create a new one if the approach is reusable.
68+
69+
Only act if there's something genuinely worth saving.
70+
If nothing stands out, just say "Nothing to save." and stop."#;
71+
5372
const FLUSH_PROMPT: &str = "[System: The session context is being compressed. Save anything worth remembering permanently — prioritize user preferences, corrections, environment facts, and recurring patterns over task-specific details. This is your last chance before earlier conversation turns are summarized away.]";
5473

55-
/// Spawn a background task to review the conversation for memory-worthy facts.
74+
/// Spawn a background task to review the conversation for memory and/or skill saves.
5675
///
5776
/// Runs AFTER the reply is delivered. The user never sees this.
5877
pub fn spawn_background_review(
@@ -61,20 +80,42 @@ pub fn spawn_background_review(
6180
conversation: Conversation,
6281
session_id: String,
6382
working_dir: std::path::PathBuf,
83+
review_memory: bool,
84+
review_skills: bool,
6485
) {
86+
let prompt = if review_memory && review_skills {
87+
COMBINED_REVIEW_PROMPT
88+
} else if review_skills {
89+
SKILL_REVIEW_PROMPT
90+
} else {
91+
MEMORY_REVIEW_PROMPT
92+
};
93+
94+
let task_name = if review_memory && review_skills {
95+
"combined_review"
96+
} else if review_skills {
97+
"skill_review"
98+
} else {
99+
"memory_review"
100+
};
101+
65102
tokio::spawn(async move {
66-
if let Err(e) = run_memory_extraction(
103+
if let Err(e) = run_knowledge_extraction(
67104
provider.as_ref(),
68105
&extension_manager,
69106
&conversation,
70107
&session_id,
71108
&working_dir,
72-
MEMORY_REVIEW_PROMPT,
73-
"review",
109+
prompt,
110+
task_name,
111+
ReviewScope {
112+
include_memory_tools: review_memory,
113+
include_skill_tools: review_skills,
114+
},
74115
)
75116
.await
76117
{
77-
warn!("Background memory review failed: {}", e);
118+
warn!("Background {} failed: {}", task_name, e);
78119
}
79120
});
80121
}
@@ -103,53 +144,65 @@ pub async fn flush_memories_before_compaction(
103144
return Ok(());
104145
}
105146

106-
run_memory_extraction(
147+
run_knowledge_extraction(
107148
provider,
108149
extension_manager,
109150
conversation,
110151
session_id,
111152
working_dir,
112153
FLUSH_PROMPT,
113154
"flush",
155+
ReviewScope {
156+
include_memory_tools: true,
157+
include_skill_tools: false,
158+
},
114159
)
115160
.await
116161
}
117162

118-
/// Core extraction logic shared by background review and flush.
119-
///
120-
/// 1. Build messages from conversation + extraction prompt
121-
/// 2. Find memory tools from extension manager
122-
/// 3. Call complete_fast() with only memory tools
123-
/// 4. Execute any memory tool calls from the response
124-
async fn run_memory_extraction(
163+
/// What to review in a knowledge extraction call.
164+
struct ReviewScope {
165+
include_memory_tools: bool,
166+
include_skill_tools: bool,
167+
}
168+
169+
/// Core extraction logic shared by all review types.
170+
#[allow(clippy::too_many_arguments)]
171+
async fn run_knowledge_extraction(
125172
provider: &dyn Provider,
126173
extension_manager: &ExtensionManager,
127174
conversation: &Conversation,
128175
session_id: &str,
129176
working_dir: &std::path::Path,
130177
extraction_prompt: &str,
131178
task_name: &str,
179+
scope: ReviewScope,
132180
) -> anyhow::Result<()> {
133-
// Find memory tools
134181
let all_tools = extension_manager
135182
.get_prefixed_tools(session_id, None)
136183
.await
137184
.unwrap_or_default();
138185

139-
let memory_tools: Vec<Tool> = all_tools
186+
let review_tools: Vec<Tool> = all_tools
140187
.into_iter()
141188
.filter(|t| {
142189
let name: &str = &t.name;
143-
name == "memory" || name.ends_with("__memory")
190+
let is_memory = name == "memory" || name.ends_with("__memory");
191+
let is_skill = name == "create_skill"
192+
|| name == "patch_skill"
193+
|| name == "load_skill"
194+
|| name.ends_with("__create_skill")
195+
|| name.ends_with("__patch_skill")
196+
|| name.ends_with("__load_skill");
197+
(scope.include_memory_tools && is_memory) || (scope.include_skill_tools && is_skill)
144198
})
145199
.collect();
146200

147-
if memory_tools.is_empty() {
148-
debug!("No memory tools found, skipping {} extraction", task_name);
201+
if review_tools.is_empty() {
202+
debug!("No relevant tools found, skipping {} extraction", task_name);
149203
return Ok(());
150204
}
151205

152-
// Build messages: conversation snapshot + extraction prompt
153206
let mut messages: Vec<Message> = conversation
154207
.messages()
155208
.iter()
@@ -160,11 +213,10 @@ async fn run_memory_extraction(
160213
messages.push(Message::user().with_text(extraction_prompt));
161214

162215
let system_prompt =
163-
"You are reviewing a conversation to extract durable facts for persistent memory. \
164-
You have access to the memory tool. Use it to save important facts. \
165-
Be selective — only save things that will matter in future sessions.";
216+
"You are reviewing a conversation to extract durable knowledge. \
217+
You have access to memory and/or skill tools. Use them to save important facts \
218+
or reusable approaches. Be selective — only save things that will matter in future sessions.";
166219

167-
// Mini agent loop: call model, execute tool calls, repeat
168220
let mut tool_calls_made = 0;
169221

170222
loop {
@@ -173,18 +225,17 @@ async fn run_memory_extraction(
173225
}
174226

175227
let result = provider
176-
.complete_fast(session_id, system_prompt, &messages, &memory_tools)
228+
.complete_fast(session_id, system_prompt, &messages, &review_tools)
177229
.await;
178230

179231
let (response_message, _usage) = match result {
180232
Ok(r) => r,
181233
Err(e) => {
182-
debug!("Memory {} model call failed: {}", task_name, e);
234+
debug!("{} model call failed: {}", task_name, e);
183235
break;
184236
}
185237
};
186238

187-
// Extract tool requests from response
188239
let tool_requests: Vec<_> = response_message
189240
.content
190241
.iter()
@@ -203,7 +254,6 @@ async fn run_memory_extraction(
203254

204255
messages.push(response_message);
205256

206-
// Execute each tool call
207257
for tool_request in &tool_requests {
208258
tool_calls_made += 1;
209259

@@ -225,7 +275,7 @@ async fn run_memory_extraction(
225275
Ok(tool_result) => {
226276
let call_result = tool_result.result.await;
227277
debug!(
228-
"Memory {} tool call completed: {:?}",
278+
"{} tool call completed: {:?}",
229279
task_name,
230280
call_result.is_ok()
231281
);
@@ -234,14 +284,14 @@ async fn run_memory_extraction(
234284
messages.push(response);
235285
}
236286
Err(e) => {
237-
debug!("Memory {} tool dispatch failed: {}", task_name, e);
287+
debug!("{} tool dispatch failed: {}", task_name, e);
238288
}
239289
}
240290
}
241291
}
242292

243293
info!(
244-
"Memory {} complete: {} tool calls made",
294+
"{} complete: {} tool calls made",
245295
task_name, tool_calls_made
246296
);
247297
Ok(())

0 commit comments

Comments
 (0)