tinyhumansai · senamakel · May 12, 2026 · coderabbitai · May 12, 2026
@@ -792,7 +792,14 @@ impl Agent {
         let mut post_turn_hooks: Vec<Arc<dyn crate::openhuman::agent::hooks::PostTurnHook>> =
             Vec::new();
         if config.learning.enabled {
-            if config.learning.reflection_enabled {
+            // #1419: reflection only fires for the user-facing orchestrator.
+            // Sub-agent transcripts are short-lived, scoped to a single
+            // delegation, and almost never carry durable user context worth
+            // surfacing in future chats. Gating here keeps the hook off
+            // every specialist / archivist / welcome session.
+            let is_orchestrator = agent_id == "orchestrator";
+
+            if config.learning.reflection_enabled && is_orchestrator {
                 // Only the reflection hook needs an owned snapshot of the
                 // full config, so create the `Arc` lazily inside this
                 // branch instead of paying for the clone whenever
@@ -814,16 +821,66 @@ impl Agent {
                     } else {
                         None
                     };
-                post_turn_hooks.push(Arc::new(crate::openhuman::learning::ReflectionHook::new(
-                    config.learning.clone(),
-                    full_config.clone(),
-                    memory.clone(),
-                    reflection_provider,
-                )));
+
+                // #1419: opt-in dedicated summarizer. When the cloud
+                // source is selected we reuse the same routed provider
+                // and just route the call through a different model
+                // hint. Local summarizers are handled inside
+                // ReflectionHook itself via the legacy local-AI path,
+                // so we skip the trait wiring there.
+                let summarizer: Option<Arc<dyn crate::openhuman::learning::SummarizerProvider>> =
+                    if config.learning.summarizer.enabled
+                        && config.learning.summarizer.source
+                            == crate::openhuman::config::SummarizerSource::Cloud
+                    {
+                        let provider: Arc<dyn crate::openhuman::providers::Provider> =
+                            match reflection_provider.clone() {
+                                Some(p) => p,
+                                None => Arc::from(providers::create_routed_provider(
+                                    config.api_url.as_deref(),
+                                    config.api_key.as_deref(),
+                                    &config.reliability,
+                                    &config.model_routes,
+                                    &model_name,
+                                )?),
+                            };
+                        Some(Arc::new(
+                            crate::openhuman::learning::ConfiguredSummarizer::new(
+                                provider,
+                                config.learning.summarizer.model_hint.clone(),
+                                config.learning.summarizer.max_context_chars,
+                                "reflection-summarizer",
+                            ),
+                        ))
+                    } else {
+                        None
+                    };
+                if summarizer.is_some() {
+                    log::info!(
+                        "[learning] reflection summarizer enabled (model_hint={}, cap={})",
+                        config.learning.summarizer.model_hint,
+                        config.learning.summarizer.max_context_chars,
+                    );
+                }
+
+                post_turn_hooks.push(Arc::new(
+                    crate::openhuman::learning::ReflectionHook::with_summarizer(
+                        config.learning.clone(),
+                        full_config.clone(),
+                        memory.clone(),
+                        reflection_provider,
+                        summarizer,
+                    ),
+                ));
                 log::info!(
                     "[learning] reflection hook registered (source={:?})",
                     config.learning.reflection_source
                 );
+            } else if config.learning.reflection_enabled {
+                log::debug!(
+                    "[learning] reflection hook skipped — agent '{}' is not the orchestrator",
+                    agent_id
+                );
             }
 
             if config.learning.user_profile_enabled {

@@ -1495,6 +1495,17 @@ impl Agent {
     ///
     /// Fire-and-forget: failures are logged, never propagated.
     pub(super) fn spawn_transcript_ingestion(&self) {
+        // #1419: transcript ingest only fires for the user-facing
+        // orchestrator. Sub-agent transcripts are short-lived, scoped
+        // to a single delegation, and almost never carry durable user
+        // context worth lifting into `conversation_memory`.
+        if self.agent_definition_name != "orchestrator" {
+            log::debug!(
+                "[transcript_ingest] skipping spawn — agent '{}' is not the orchestrator",
+                self.agent_definition_name
+            );
+            return;
+        }
         let Some(path) = self.session_transcript_path.clone() else {
             log::debug!("[transcript_ingest] no session transcript path yet — skipping spawn");
             return;

@@ -34,9 +34,10 @@ pub use schema::{
     ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig,
     SchedulerConfig, SchedulerGateConfig, SchedulerGateMode, ScreenIntelligenceConfig,
     SecretsConfig, SecurityConfig, SlackConfig, StorageConfig, StorageProviderConfig,
-    StorageProviderSection, StreamMode, TelegramConfig, UpdateConfig, UpdateRestartStrategy,
-    VoiceActivationMode, VoiceServerConfig, WebSearchConfig, WebhookConfig,
-    DEFAULT_CLOUD_LLM_MODEL, DEFAULT_MODEL, MODEL_AGENTIC_V1, MODEL_CODING_V1, MODEL_REASONING_V1,
+    StorageProviderSection, StreamMode, SummarizerConfig, SummarizerSource, TelegramConfig,
+    UpdateConfig, UpdateRestartStrategy, VoiceActivationMode, VoiceServerConfig, WebSearchConfig,
+    WebhookConfig, DEFAULT_CLOUD_LLM_MODEL, DEFAULT_MODEL, MODEL_AGENTIC_V1, MODEL_CODING_V1,
+    MODEL_REASONING_V1,
 };
 pub use schema::{
     clear_active_user, default_root_openhuman_dir, pre_login_user_dir, read_active_user_id,

@@ -78,6 +78,78 @@ pub struct LearningConfig {
     /// How often the periodic rebuild loop runs in seconds. Default: 1800 (30 minutes).
     #[serde(default = "default_rebuild_interval_secs")]
     pub rebuild_interval_secs: u64,
+
+    /// Summarizer LLM configuration (#1419).
+    ///
+    /// Reflection + transcript-ingest are going to fire often; running
+    /// them on the orchestrator-tier model is wasteful. When
+    /// `summarizer.enabled = true` and a model hint is configured, the
+    /// learning pipeline routes its LLM calls to this cheaper model
+    /// instead. When disabled (the default) callers fall back to the
+    /// heuristic path from #1406 / the orchestrator-tier provider, so
+    /// users without a summarizer configured never silently break.
+    #[serde(default)]
+    pub summarizer: SummarizerConfig,
+}
+
+/// Where the summarizer model runs.
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum SummarizerSource {
+    /// Use the configured cloud provider routed via a model hint.
+    #[default]
+    Cloud,
+    /// Use the local Ollama summarizer model via `LocalAiService::prompt()`.
+    Local,
+}
+
+/// Configuration for the dedicated cheap summarizer used by the
+/// reflection / transcript-ingest paths (#1419). Pluggable model
+/// separate from the orchestrator provider, carries its own context
+/// window cap so callers can budget compression.
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+pub struct SummarizerConfig {
+    /// Master switch. Default: false — leave the heuristic-only path
+    /// from #1406 as the source of truth for offline users.
+    #[serde(default)]
+    pub enabled: bool,
+
+    /// Where the summarizer runs (cloud routed via model hint vs local
+    /// Ollama). Default: cloud.
+    #[serde(default)]
+    pub source: SummarizerSource,
+
+    /// Model hint string passed to `Provider::simple_chat` when
+    /// `source == Cloud`. Defaults to `"hint:fast"` so a typical
+    /// `model_routes` config routes summarizer traffic to the cheapest
+    /// tier without the user touching this field.
+    #[serde(default = "default_summarizer_model_hint")]
+    pub model_hint: String,
+
+    /// Approximate character budget for the summarizer's prompt input.
+    /// Callers compress / truncate inputs to stay under this cap. The
+    /// implementation clamps to a sane minimum (1024 chars).
+    #[serde(default = "default_summarizer_context_chars")]
+    pub max_context_chars: usize,
+}
+
+fn default_summarizer_model_hint() -> String {
+    "hint:fast".to_string()
+}
+
+fn default_summarizer_context_chars() -> usize {
+    6_000
+}
+
+impl Default for SummarizerConfig {
+    fn default() -> Self {
+        Self {
+            enabled: false,
+            source: SummarizerSource::default(),
+            model_hint: default_summarizer_model_hint(),
+            max_context_chars: default_summarizer_context_chars(),
+        }
+    }
 }
 
 fn default_rebuild_interval_secs() -> u64 {
@@ -110,6 +182,7 @@ impl Default for LearningConfig {
             chat_to_tree_enabled: default_true(),
             stability_detector_enabled: default_true(),
             rebuild_interval_secs: default_rebuild_interval_secs(),
+            summarizer: SummarizerConfig::default(),
         }
     }
 }
@@ -44,7 +44,7 @@ pub use context::ContextConfig;
 pub use dictation::{DictationActivationMode, DictationConfig};
 pub use heartbeat_cron::{CronConfig, HeartbeatConfig};
 pub use identity_cost::{CostConfig, ModelPricing};
-pub use learning::{LearningConfig, ReflectionSource};
+pub use learning::{LearningConfig, ReflectionSource, SummarizerConfig, SummarizerSource};
 pub use local_ai::{LocalAiConfig, LocalAiUsage};
 pub use meet::MeetConfig;
 pub use node::NodeConfig;

@@ -31,6 +31,7 @@ pub mod reflection;
 pub mod scheduler;
 pub mod schemas;
 pub mod stability_detector;
+pub mod summarizer;
 pub mod tool_tracker;
 pub mod transcript_ingest;
 pub mod user_profile;
@@ -47,5 +48,9 @@ pub use schemas::{
     all_learning_controller_schemas, all_learning_registered_controllers, learning_schemas,
 };
 pub use stability_detector::StabilityDetector;
+pub use summarizer::{
+    compress_tool_calls, render_tool_digests, ConfiguredSummarizer, SummarizerProvider,
+    ToolCallDigest,
+};
 pub use tool_tracker::ToolTrackerHook;
 pub use user_profile::UserProfileHook;
@@ -45,6 +45,12 @@ pub struct ReflectionHook {
     full_config: Arc<Config>,
     memory: Arc<dyn Memory>,
     provider: Option<Arc<dyn crate::openhuman::providers::Provider>>,
+    /// Optional dedicated cheap summarizer (#1419). When present, the
+    /// reflection LLM call is routed here instead of through the
+    /// orchestrator-tier provider / local-AI service so frequent
+    /// reflection passes do not burn high-tier inference. None falls
+    /// back to the legacy routing in `run_reflection`.
+    summarizer: Option<Arc<dyn crate::openhuman::learning::SummarizerProvider>>,
     /// Per-session reflection counts for throttling. Key is session_id (or "__global__").
     session_counts: Mutex<HashMap<String, usize>>,
 }
@@ -55,12 +61,30 @@ impl ReflectionHook {
         full_config: Arc<Config>,
         memory: Arc<dyn Memory>,
         provider: Option<Arc<dyn crate::openhuman::providers::Provider>>,
+    ) -> Self {
+        Self::with_summarizer(config, full_config, memory, provider, None)
+    }
+
+    /// Construct a reflection hook with an explicit summarizer override.
+    ///
+    /// When `summarizer.is_some()` the reflection LLM call is routed to
+    /// the dedicated cheap summarizer instead of the orchestrator-tier
+    /// provider / local AI service. This is the path #1419 wires from
+    /// the session builder when `LearningConfig::summarizer.enabled` is
+    /// true and the agent is the orchestrator.
+    pub fn with_summarizer(
+        config: LearningConfig,
+        full_config: Arc<Config>,
+        memory: Arc<dyn Memory>,
+        provider: Option<Arc<dyn crate::openhuman::providers::Provider>>,
+        summarizer: Option<Arc<dyn crate::openhuman::learning::SummarizerProvider>>,
     ) -> Self {
         Self {
             config,
             full_config,
             memory,
             provider,
+            summarizer,
             session_counts: Mutex::new(HashMap::new()),
         }
     }
@@ -132,15 +156,33 @@ impl ReflectionHook {
         ));
 
         if !ctx.tool_calls.is_empty() {
+            // #1419: compress raw multi-call history into per-tool
+            // digests before it hits the summarizer's smaller context
+            // window. The legacy per-call block is preserved when the
+            // turn has at most one call per distinct tool — the digest
+            // and the legacy line look identical in that case, but the
+            // per-call line keeps the existing "success=true,
+            // duration=Xms" surface intact for older test assertions.
+            let digests = crate::openhuman::learning::compress_tool_calls(&ctx.tool_calls);
+            let compressed_any = digests.iter().any(|d| d.count > 1);
             prompt.push_str("## Tool Calls\n");
-            for tc in &ctx.tool_calls {
-                prompt.push_str(&format!(
-                    "- {} (success={}, duration={}ms): {}\n",
-                    tc.name,
-                    tc.success,
-                    tc.duration_ms,
-                    truncate(&tc.output_summary, 100)
-                ));
+            if compressed_any {
+                prompt.push_str(&crate::openhuman::learning::render_tool_digests(&digests));
+                log::debug!(
+                    "[learning::reflection] compressed {} tool calls into {} per-tool digest(s)",
+                    ctx.tool_calls.len(),
+                    digests.len(),
+                );
+            } else {
+                for tc in &ctx.tool_calls {
+                    prompt.push_str(&format!(
+                        "- {} (success={}, duration={}ms): {}\n",
+                        tc.name,
+                        tc.success,
+                        tc.duration_ms,
+                        truncate(&tc.output_summary, 100)
+                    ));
+                }
             }
             prompt.push('\n');
         }
@@ -155,6 +197,36 @@ impl ReflectionHook {
 
     /// Call the configured LLM for reflection.
     async fn run_reflection(&self, prompt: &str) -> anyhow::Result<String> {
+        // #1419: when a dedicated summarizer is configured, route the
+        // reflection call through it instead of the orchestrator-tier
+        // provider / local AI service. The summarizer carries its own
+        // context window cap and we log a context-fill metric so trigger
+        // thresholds can be tuned against real fill ratios.
+        if let Some(summarizer) = self.summarizer.as_ref() {
+            let cap = summarizer.context_window_chars();
+            let input_chars = prompt.chars().count();
+            let bounded: std::borrow::Cow<'_, str> = if input_chars > cap {
+                log::info!(
+                    "[learning::reflection] summarizer={} input {} chars > cap {} — truncating",
+                    summarizer.label(),
+                    input_chars,
+                    cap,
+                );
+                let head: String = prompt.chars().take(cap).collect();
+                std::borrow::Cow::Owned(head)
+            } else {
+                std::borrow::Cow::Borrowed(prompt)
+            };
+            log::debug!(
+                "[learning::reflection] dispatch via summarizer label={} input_chars={} cap={} fill={:.2}",
+                summarizer.label(),
+                bounded.chars().count(),
+                cap,
+                (bounded.chars().count() as f32) / (cap.max(1) as f32),
+            );
+            return summarizer.prompt(bounded.as_ref()).await;
+        }
+
         match self.config.reflection_source {
             ReflectionSource::Local => {
                 // Gate: local reflection requires the per-feature flag.