Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 64 additions & 7 deletions src/openhuman/agent/harness/session/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -792,7 +792,14 @@ impl Agent {
let mut post_turn_hooks: Vec<Arc<dyn crate::openhuman::agent::hooks::PostTurnHook>> =
Vec::new();
if config.learning.enabled {
if config.learning.reflection_enabled {
// #1419: reflection only fires for the user-facing orchestrator.
// Sub-agent transcripts are short-lived, scoped to a single
// delegation, and almost never carry durable user context worth
// surfacing in future chats. Gating here keeps the hook off
// every specialist / archivist / welcome session.
let is_orchestrator = agent_id == "orchestrator";

if config.learning.reflection_enabled && is_orchestrator {
// Only the reflection hook needs an owned snapshot of the
// full config, so create the `Arc` lazily inside this
// branch instead of paying for the clone whenever
Expand All @@ -814,16 +821,66 @@ impl Agent {
} else {
None
};
post_turn_hooks.push(Arc::new(crate::openhuman::learning::ReflectionHook::new(
config.learning.clone(),
full_config.clone(),
memory.clone(),
reflection_provider,
)));

// #1419: opt-in dedicated summarizer. When the cloud
// source is selected we reuse the same routed provider
// and just route the call through a different model
// hint. Local summarizers are handled inside
// ReflectionHook itself via the legacy local-AI path,
// so we skip the trait wiring there.
let summarizer: Option<Arc<dyn crate::openhuman::learning::SummarizerProvider>> =
if config.learning.summarizer.enabled
&& config.learning.summarizer.source
== crate::openhuman::config::SummarizerSource::Cloud
{
let provider: Arc<dyn crate::openhuman::providers::Provider> =
match reflection_provider.clone() {
Some(p) => p,
None => Arc::from(providers::create_routed_provider(
config.api_url.as_deref(),
config.api_key.as_deref(),
&config.reliability,
&config.model_routes,
&model_name,
)?),
};
Some(Arc::new(
crate::openhuman::learning::ConfiguredSummarizer::new(
provider,
config.learning.summarizer.model_hint.clone(),
config.learning.summarizer.max_context_chars,
"reflection-summarizer",
),
))
} else {
None
};
Comment on lines +825 to +857
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

Local summarizer config is still a no-op.

This only builds a dedicated SummarizerProvider for SummarizerSource::Cloud, but ReflectionHook::run_reflection only takes the dedicated path when self.summarizer.is_some(). With learning.summarizer.enabled = true and source = Local, reflection falls back to the legacy route and ignores the summarizer model hint/context cap entirely. Please either wire a local SummarizerProvider here or reject that config explicitly so it cannot silently do nothing.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/openhuman/agent/harness/session/builder.rs` around lines 825 - 857, The
current builder only constructs a SummarizerProvider when
learning.summarizer.source == SummarizerSource::Cloud, leaving a local-enabled
config ineffective because ReflectionHook::run_reflection checks
self.summarizer.is_some(); update the builder to either (A) instantiate and
assign a local implementation of crate::openhuman::learning::SummarizerProvider
when config.learning.summarizer.source == SummarizerSource::Local (e.g., wrap
the existing local summarizer implementation used by ReflectionHook into an
Arc<dyn SummarizerProvider> similar to ConfiguredSummarizer) or (B) explicitly
reject/return an Err when config.learning.summarizer.enabled == true and source
== Local so the config cannot silently be no-op; modify the Summarizer creation
block (the let summarizer: Option<Arc<dyn
crate::openhuman::learning::SummarizerProvider>> = ...) to implement one of
these behaviors so ReflectionHook::run_reflection sees a Some(...) for local
setups or the config fails fast.

if summarizer.is_some() {
log::info!(
"[learning] reflection summarizer enabled (model_hint={}, cap={})",
config.learning.summarizer.model_hint,
config.learning.summarizer.max_context_chars,
);
}

post_turn_hooks.push(Arc::new(
crate::openhuman::learning::ReflectionHook::with_summarizer(
config.learning.clone(),
full_config.clone(),
memory.clone(),
reflection_provider,
summarizer,
),
));
log::info!(
"[learning] reflection hook registered (source={:?})",
config.learning.reflection_source
);
} else if config.learning.reflection_enabled {
log::debug!(
"[learning] reflection hook skipped — agent '{}' is not the orchestrator",
agent_id
);
}

if config.learning.user_profile_enabled {
Expand Down
11 changes: 11 additions & 0 deletions src/openhuman/agent/harness/session/turn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1495,6 +1495,17 @@ impl Agent {
///
/// Fire-and-forget: failures are logged, never propagated.
pub(super) fn spawn_transcript_ingestion(&self) {
// #1419: transcript ingest only fires for the user-facing
// orchestrator. Sub-agent transcripts are short-lived, scoped
// to a single delegation, and almost never carry durable user
// context worth lifting into `conversation_memory`.
if self.agent_definition_name != "orchestrator" {
log::debug!(
"[transcript_ingest] skipping spawn — agent '{}' is not the orchestrator",
self.agent_definition_name
);
return;
}
let Some(path) = self.session_transcript_path.clone() else {
log::debug!("[transcript_ingest] no session transcript path yet — skipping spawn");
return;
Expand Down
7 changes: 4 additions & 3 deletions src/openhuman/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ pub use schema::{
ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig,
SchedulerConfig, SchedulerGateConfig, SchedulerGateMode, ScreenIntelligenceConfig,
SecretsConfig, SecurityConfig, SlackConfig, StorageConfig, StorageProviderConfig,
StorageProviderSection, StreamMode, TelegramConfig, UpdateConfig, UpdateRestartStrategy,
VoiceActivationMode, VoiceServerConfig, WebSearchConfig, WebhookConfig,
DEFAULT_CLOUD_LLM_MODEL, DEFAULT_MODEL, MODEL_AGENTIC_V1, MODEL_CODING_V1, MODEL_REASONING_V1,
StorageProviderSection, StreamMode, SummarizerConfig, SummarizerSource, TelegramConfig,
UpdateConfig, UpdateRestartStrategy, VoiceActivationMode, VoiceServerConfig, WebSearchConfig,
WebhookConfig, DEFAULT_CLOUD_LLM_MODEL, DEFAULT_MODEL, MODEL_AGENTIC_V1, MODEL_CODING_V1,
MODEL_REASONING_V1,
};
pub use schema::{
clear_active_user, default_root_openhuman_dir, pre_login_user_dir, read_active_user_id,
Expand Down
73 changes: 73 additions & 0 deletions src/openhuman/config/schema/learning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,78 @@ pub struct LearningConfig {
/// How often the periodic rebuild loop runs in seconds. Default: 1800 (30 minutes).
#[serde(default = "default_rebuild_interval_secs")]
pub rebuild_interval_secs: u64,

/// Summarizer LLM configuration (#1419).
///
/// Reflection + transcript-ingest are going to fire often; running
/// them on the orchestrator-tier model is wasteful. When
/// `summarizer.enabled = true` and a model hint is configured, the
/// learning pipeline routes its LLM calls to this cheaper model
/// instead. When disabled (the default) callers fall back to the
/// heuristic path from #1406 / the orchestrator-tier provider, so
/// users without a summarizer configured never silently break.
#[serde(default)]
pub summarizer: SummarizerConfig,
}

/// Where the summarizer model runs.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum SummarizerSource {
/// Use the configured cloud provider routed via a model hint.
#[default]
Cloud,
/// Use the local Ollama summarizer model via `LocalAiService::prompt()`.
Local,
}

/// Configuration for the dedicated cheap summarizer used by the
/// reflection / transcript-ingest paths (#1419). Pluggable model
/// separate from the orchestrator provider, carries its own context
/// window cap so callers can budget compression.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct SummarizerConfig {
/// Master switch. Default: false — leave the heuristic-only path
/// from #1406 as the source of truth for offline users.
#[serde(default)]
pub enabled: bool,

/// Where the summarizer runs (cloud routed via model hint vs local
/// Ollama). Default: cloud.
#[serde(default)]
pub source: SummarizerSource,

/// Model hint string passed to `Provider::simple_chat` when
/// `source == Cloud`. Defaults to `"hint:fast"` so a typical
/// `model_routes` config routes summarizer traffic to the cheapest
/// tier without the user touching this field.
#[serde(default = "default_summarizer_model_hint")]
pub model_hint: String,

/// Approximate character budget for the summarizer's prompt input.
/// Callers compress / truncate inputs to stay under this cap. The
/// implementation clamps to a sane minimum (1024 chars).
#[serde(default = "default_summarizer_context_chars")]
pub max_context_chars: usize,
}

fn default_summarizer_model_hint() -> String {
"hint:fast".to_string()
}

fn default_summarizer_context_chars() -> usize {
6_000
}

impl Default for SummarizerConfig {
fn default() -> Self {
Self {
enabled: false,
source: SummarizerSource::default(),
model_hint: default_summarizer_model_hint(),
max_context_chars: default_summarizer_context_chars(),
}
}
}

fn default_rebuild_interval_secs() -> u64 {
Expand Down Expand Up @@ -110,6 +182,7 @@ impl Default for LearningConfig {
chat_to_tree_enabled: default_true(),
stability_detector_enabled: default_true(),
rebuild_interval_secs: default_rebuild_interval_secs(),
summarizer: SummarizerConfig::default(),
}
}
}
2 changes: 1 addition & 1 deletion src/openhuman/config/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub use context::ContextConfig;
pub use dictation::{DictationActivationMode, DictationConfig};
pub use heartbeat_cron::{CronConfig, HeartbeatConfig};
pub use identity_cost::{CostConfig, ModelPricing};
pub use learning::{LearningConfig, ReflectionSource};
pub use learning::{LearningConfig, ReflectionSource, SummarizerConfig, SummarizerSource};
pub use local_ai::{LocalAiConfig, LocalAiUsage};
pub use meet::MeetConfig;
pub use node::NodeConfig;
Expand Down
5 changes: 5 additions & 0 deletions src/openhuman/learning/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub mod reflection;
pub mod scheduler;
pub mod schemas;
pub mod stability_detector;
pub mod summarizer;
pub mod tool_tracker;
pub mod transcript_ingest;
pub mod user_profile;
Expand All @@ -47,5 +48,9 @@ pub use schemas::{
all_learning_controller_schemas, all_learning_registered_controllers, learning_schemas,
};
pub use stability_detector::StabilityDetector;
pub use summarizer::{
compress_tool_calls, render_tool_digests, ConfiguredSummarizer, SummarizerProvider,
ToolCallDigest,
};
pub use tool_tracker::ToolTrackerHook;
pub use user_profile::UserProfileHook;
88 changes: 80 additions & 8 deletions src/openhuman/learning/reflection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ pub struct ReflectionHook {
full_config: Arc<Config>,
memory: Arc<dyn Memory>,
provider: Option<Arc<dyn crate::openhuman::providers::Provider>>,
/// Optional dedicated cheap summarizer (#1419). When present, the
/// reflection LLM call is routed here instead of through the
/// orchestrator-tier provider / local-AI service so frequent
/// reflection passes do not burn high-tier inference. None falls
/// back to the legacy routing in `run_reflection`.
summarizer: Option<Arc<dyn crate::openhuman::learning::SummarizerProvider>>,
/// Per-session reflection counts for throttling. Key is session_id (or "__global__").
session_counts: Mutex<HashMap<String, usize>>,
}
Expand All @@ -55,12 +61,30 @@ impl ReflectionHook {
full_config: Arc<Config>,
memory: Arc<dyn Memory>,
provider: Option<Arc<dyn crate::openhuman::providers::Provider>>,
) -> Self {
Self::with_summarizer(config, full_config, memory, provider, None)
}

/// Construct a reflection hook with an explicit summarizer override.
///
/// When `summarizer.is_some()` the reflection LLM call is routed to
/// the dedicated cheap summarizer instead of the orchestrator-tier
/// provider / local AI service. This is the path #1419 wires from
/// the session builder when `LearningConfig::summarizer.enabled` is
/// true and the agent is the orchestrator.
pub fn with_summarizer(
config: LearningConfig,
full_config: Arc<Config>,
memory: Arc<dyn Memory>,
provider: Option<Arc<dyn crate::openhuman::providers::Provider>>,
summarizer: Option<Arc<dyn crate::openhuman::learning::SummarizerProvider>>,
) -> Self {
Self {
config,
full_config,
memory,
provider,
summarizer,
session_counts: Mutex::new(HashMap::new()),
}
}
Expand Down Expand Up @@ -132,15 +156,33 @@ impl ReflectionHook {
));

if !ctx.tool_calls.is_empty() {
// #1419: compress raw multi-call history into per-tool
// digests before it hits the summarizer's smaller context
// window. The legacy per-call block is preserved when the
// turn has at most one call per distinct tool — the digest
// and the legacy line look identical in that case, but the
// per-call line keeps the existing "success=true,
// duration=Xms" surface intact for older test assertions.
let digests = crate::openhuman::learning::compress_tool_calls(&ctx.tool_calls);
let compressed_any = digests.iter().any(|d| d.count > 1);
prompt.push_str("## Tool Calls\n");
for tc in &ctx.tool_calls {
prompt.push_str(&format!(
"- {} (success={}, duration={}ms): {}\n",
tc.name,
tc.success,
tc.duration_ms,
truncate(&tc.output_summary, 100)
));
if compressed_any {
prompt.push_str(&crate::openhuman::learning::render_tool_digests(&digests));
log::debug!(
"[learning::reflection] compressed {} tool calls into {} per-tool digest(s)",
ctx.tool_calls.len(),
digests.len(),
);
} else {
for tc in &ctx.tool_calls {
prompt.push_str(&format!(
"- {} (success={}, duration={}ms): {}\n",
tc.name,
tc.success,
tc.duration_ms,
truncate(&tc.output_summary, 100)
));
}
}
prompt.push('\n');
}
Expand All @@ -155,6 +197,36 @@ impl ReflectionHook {

/// Call the configured LLM for reflection.
async fn run_reflection(&self, prompt: &str) -> anyhow::Result<String> {
// #1419: when a dedicated summarizer is configured, route the
// reflection call through it instead of the orchestrator-tier
// provider / local AI service. The summarizer carries its own
// context window cap and we log a context-fill metric so trigger
// thresholds can be tuned against real fill ratios.
if let Some(summarizer) = self.summarizer.as_ref() {
let cap = summarizer.context_window_chars();
let input_chars = prompt.chars().count();
let bounded: std::borrow::Cow<'_, str> = if input_chars > cap {
log::info!(
"[learning::reflection] summarizer={} input {} chars > cap {} — truncating",
summarizer.label(),
input_chars,
cap,
);
let head: String = prompt.chars().take(cap).collect();
std::borrow::Cow::Owned(head)
} else {
std::borrow::Cow::Borrowed(prompt)
};
log::debug!(
"[learning::reflection] dispatch via summarizer label={} input_chars={} cap={} fill={:.2}",
summarizer.label(),
bounded.chars().count(),
cap,
(bounded.chars().count() as f32) / (cap.max(1) as f32),
);
return summarizer.prompt(bounded.as_ref()).await;
}

match self.config.reflection_source {
ReflectionSource::Local => {
// Gate: local reflection requires the per-feature flag.
Expand Down
Loading
Loading