diff --git a/app/src/utils/__tests__/toolTimelineFormatting.test.ts b/app/src/utils/__tests__/toolTimelineFormatting.test.ts index 983753f49..5eb9f882c 100644 --- a/app/src/utils/__tests__/toolTimelineFormatting.test.ts +++ b/app/src/utils/__tests__/toolTimelineFormatting.test.ts @@ -57,6 +57,45 @@ describe('formatTimelineEntry', () => { }); }); + it('formats delegate_to_integrations_agent with a known toolkit arg', () => { + expect( + formatTimelineEntry( + entry({ + name: 'delegate_to_integrations_agent', + argsBuffer: JSON.stringify({ + toolkit: 'gmail', + prompt: 'Find the latest invoice from Stripe.', + }), + }) + ) + ).toEqual({ + title: 'Making requests to your Gmail account', + detail: 'Find the latest invoice from Stripe.', + }); + }); + + it('formats delegate_to_integrations_agent with an unknown toolkit arg', () => { + expect( + formatTimelineEntry( + entry({ + name: 'delegate_to_integrations_agent', + argsBuffer: JSON.stringify({ toolkit: 'slack_bot', prompt: 'post update' }), + }) + ) + ).toEqual({ title: 'Checking your Slack Bot', detail: 'post update' }); + }); + + it('formats delegate_to_integrations_agent without a toolkit arg as a generic connected-app label', () => { + expect( + formatTimelineEntry( + entry({ + name: 'delegate_to_integrations_agent', + argsBuffer: JSON.stringify({ prompt: 'do something useful' }), + }) + ) + ).toEqual({ title: 'Checking your connected app', detail: 'do something useful' }); + }); + it('formats delegate_tools_agent with toolkit context from args', () => { expect( formatTimelineEntry( diff --git a/app/src/utils/toolTimelineFormatting.ts b/app/src/utils/toolTimelineFormatting.ts index ecf9f07c4..82aab5fef 100644 --- a/app/src/utils/toolTimelineFormatting.ts +++ b/app/src/utils/toolTimelineFormatting.ts @@ -55,10 +55,29 @@ export function formatTimelineEntry(entry: ToolTimelineEntry): { title: string; inferIntegrationName(parsedArgs?.toolkit) ?? inferIntegrationNameFromPrompt(parsedArgs?.prompt) ?? inferIntegrationName(entry.name); - return { - title: provider ? integrationActivityTitle(provider) : humanizeIdentifier(entry.name), - detail: entry.detail ?? parsedArgs?.prompt, - }; + + // The collapsed `delegate_to_integrations_agent` tool has no toolkit + // baked into its name; if we couldn't infer a provider from args or + // prompt, surface a generic connected-app label (matches the + // `spawn_subagent → integrations_agent` fallback above) instead of + // humanising the tool name into "To Integrations Agent". Unknown + // toolkit slugs from args fall back to a humanised toolkit label so + // composio integrations outside KNOWN_TOOLKIT_RE still render + // meaningfully (e.g. `slack_bot` → "Slack Bot") rather than the + // generic copy. + let title: string; + if (provider) { + title = integrationActivityTitle(provider); + } else if (entry.name === 'delegate_to_integrations_agent') { + const rawToolkit = parsedArgs?.toolkit?.trim(); + title = rawToolkit + ? integrationActivityTitle(humanizeIdentifier(rawToolkit)) + : 'Checking your connected app'; + } else { + title = humanizeIdentifier(entry.name); + } + + return { title, detail: entry.detail ?? parsedArgs?.prompt }; } return { diff --git a/src/openhuman/agent/agents/orchestrator/prompt.md b/src/openhuman/agent/agents/orchestrator/prompt.md index 0122fbee0..01d3b3aa2 100644 --- a/src/openhuman/agent/agents/orchestrator/prompt.md +++ b/src/openhuman/agent/agents/orchestrator/prompt.md @@ -21,7 +21,7 @@ Follow this sequence for every user message: - Yes: use direct tools first (`current_time`, `cron_*`, `memory_*`, `composio_list_connections`, etc.). - No: continue. 3. **Does this need specialised execution?** - - If external SaaS integration work is required, use `delegate_{toolkit}` (e.g. `delegate_gmail`, `delegate_notion`). + - If external SaaS integration work is required, use `delegate_to_integrations_agent` with `toolkit` set to the relevant slug (see the **Connected Integrations** section for the current list). - If code writing/execution/debugging is required, use `delegate_run_code`. - If web/doc crawling is required, use `delegate_researcher`. - If complex multi-step decomposition is required, use `delegate_plan`. @@ -29,9 +29,7 @@ Follow this sequence for every user message: - If memory archiving or distillation is required, use `delegate_archivist`. 4. **After delegation**, summarise results clearly and concisely. -Default bias: **do not spawn a sub-agent when a direct response or direct tool call is sufficient**. - -When delegating: use `delegate_researcher` for web/doc lookups, `delegate_run_code` for coding, `delegate_plan` for complex decomposition, `delegate_critic` for reviews, `delegate_archivist` for memory writes, `delegate_{toolkit}` for external integrations. Use `spawn_worker_thread` for long tasks that need their own thread. +Default bias: **do not spawn a sub-agent when a direct response or direct tool call is sufficient**. Use `spawn_worker_thread` for long tasks that need their own thread. ## Rules @@ -59,7 +57,7 @@ multi-file refactors, or batch integration work. It creates a persisted **worker thread the user can open from the thread list, and returns a compact `[worker_thread_ref]` (thread id + brief summary) to the parent instead of the full transcript. -For routine delegation use the matching `delegate_*` tool and surface the result inline. +For routine delegation use the matching specialist `delegate_*` tool (or `delegate_to_integrations_agent` for external services) and surface the result inline. Worker threads are one level deep by design: a sub-agent spawned via `spawn_worker_thread` cannot itself call `spawn_worker_thread`, so workers never nest. diff --git a/src/openhuman/agent/agents/orchestrator/prompt.rs b/src/openhuman/agent/agents/orchestrator/prompt.rs index 3046e85a9..41b19a88c 100644 --- a/src/openhuman/agent/agents/orchestrator/prompt.rs +++ b/src/openhuman/agent/agents/orchestrator/prompt.rs @@ -3,9 +3,10 @@ //! The orchestrator follows a direct-first policy: respond directly or use //! cheap direct tools whenever possible, and delegate only for specialised //! execution. It never executes Composio actions itself; the integration -//! block points to `delegate_{toolkit}` tools (synthesised by -//! `orchestrator_tools::collect_orchestrator_tools`) for true -//! external-service operations. That prose lives here (not in the shared +//! block points to the single collapsed `delegate_to_integrations_agent` +//! tool (synthesised by `orchestrator_tools::collect_orchestrator_tools`, +//! #1335) for true external-service operations, with the toolkit slug +//! passed as an argument. That prose lives here (not in the shared //! prompts module) so the skill-executor voice stays in //! `integrations_agent/prompt.rs` and nobody has to branch on `agent_id` //! in a shared section impl. @@ -90,15 +91,16 @@ fn render_delegation_guide(integrations: &[ConnectedIntegration]) -> String { } let mut out = String::from( "## Connected Integrations\n\n\ - Delegate tasks for these services using the matching `delegate_{toolkit}` tool:\n\n", + Delegate tasks for these services with `delegate_to_integrations_agent`, passing the toolkit slug as `toolkit`:\n\n", ); for ci in connected { // Use the same slug canonicalisation as `collect_orchestrator_tools` - // so the tool name in the prompt always matches the synthesised tool. + // so the `toolkit` arg the orchestrator emits always matches the + // enum the synthesised tool accepts. let slug = sanitise_slug(&ci.toolkit); let _ = writeln!( out, - "- **{}** (delegate via `delegate_{}`): {}", + "- **{}** (`toolkit: \"{}\"`): {}", ci.toolkit, slug, ci.description ); } @@ -161,7 +163,7 @@ mod tests { } #[test] - fn build_emits_delegation_guide_with_spawn_snippet() { + fn build_emits_delegation_guide_with_collapsed_tool() { let integrations = vec![ConnectedIntegration { toolkit: "gmail".into(), description: "Email access.".into(), @@ -170,7 +172,10 @@ mod tests { }]; let body = build(&ctx_with(&integrations)).unwrap(); assert!(body.contains("## Connected Integrations")); - assert!(body.contains("delegate_gmail")); + assert!(body.contains("delegate_to_integrations_agent")); + assert!(body.contains("toolkit: \"gmail\"")); + // Must NOT contain the old per-toolkit fan-out tool names. + assert!(!body.contains("delegate_gmail")); // Must NOT contain the old verbose spawn_subagent snippet. assert!(!body.contains("spawn_subagent(agent_id=\"integrations_agent\"")); // Delegator voice must NOT use the skill-executor wording. @@ -178,7 +183,7 @@ mod tests { } #[test] - fn delegation_guide_uses_compact_delegate_format() { + fn delegation_guide_uses_compact_collapsed_format() { let integrations = vec![ConnectedIntegration { toolkit: "gmail".into(), description: "Email access.".into(), @@ -187,15 +192,16 @@ mod tests { }]; let body = build(&ctx_with(&integrations)).unwrap(); assert!(body.contains("## Connected Integrations")); - assert!(body.contains("delegate_gmail")); - // Must NOT contain the old verbose spawn_subagent snippet. + assert!(body.contains("delegate_to_integrations_agent")); + // Old verbose / per-toolkit forms must be gone. + assert!(!body.contains("delegate_gmail")); assert!(!body.contains("spawn_subagent(agent_id=\"integrations_agent\"")); } #[test] fn build_hides_unconnected_integrations() { // Only connected toolkits make it into the Delegation Guide - // — unconnected entries would just trigger a spawn_subagent + // — unconnected entries would just trigger a downstream // pre-flight rejection, so keeping them out keeps the prompt // focused on what the orchestrator can actually delegate. let integrations = vec![ diff --git a/src/openhuman/agent/harness/definition.rs b/src/openhuman/agent/harness/definition.rs index d71f6fc81..9548a3926 100644 --- a/src/openhuman/agent/harness/definition.rs +++ b/src/openhuman/agent/harness/definition.rs @@ -157,10 +157,11 @@ pub struct AgentDefinition { /// agent's `delegate_name` override) and whose description is the /// target agent's [`AgentDefinition::when_to_use`]. /// - /// * [`SubagentEntry::Skills`] — one [`SkillDelegationTool`] per - /// connected Composio toolkit, each named `delegate_{toolkit}`, - /// all routing to the generic `integrations_agent` with an appropriate - /// `skill_filter` pre-populated. + /// * [`SubagentEntry::Skills`] — a single collapsed + /// [`SkillDelegationTool`] named `delegate_to_integrations_agent` + /// that takes the toolkit slug as an argument and routes to the + /// generic `integrations_agent` with the corresponding + /// `skill_filter` pre-populated (#1335). /// /// `subagents` is intentionally separate from [`AgentDefinition::tools`] /// so that reading a TOML makes the distinction obvious: `tools` is @@ -207,9 +208,11 @@ pub struct AgentDefinition { pub enum SubagentEntry { /// Delegate to a specific built-in or custom agent by id. AgentId(String), - /// Expand at build time to one `delegate_{toolkit}` tool per - /// connected Composio toolkit, each routing to the generic - /// `integrations_agent` with `skill_filter` pre-set. + /// Expand at build time to a single collapsed + /// `delegate_to_integrations_agent` tool whose `toolkit` argument + /// selects which connected Composio toolkit to route to, with + /// `skill_filter` pre-set on the underlying `integrations_agent` + /// dispatch (#1335). Skills(SkillsWildcard), } diff --git a/src/openhuman/agent/harness/session/builder.rs b/src/openhuman/agent/harness/session/builder.rs index e3c20fe9b..1ce03d67b 100644 --- a/src/openhuman/agent/harness/session/builder.rs +++ b/src/openhuman/agent/harness/session/builder.rs @@ -848,8 +848,10 @@ impl Agent { // // For an agent with `subagents = [...]` in its TOML (today: // orchestrator), `collect_orchestrator_tools` synthesises one - // `ArchetypeDelegationTool` per named sub-agent plus one - // `SkillDelegationTool` per connected Composio toolkit. + // `ArchetypeDelegationTool` per named sub-agent plus a single + // collapsed `SkillDelegationTool` + // (`delegate_to_integrations_agent`) whose `toolkit` argument + // selects among the connected Composio toolkits (#1335). // // For an agent without `subagents` (today: welcome, critic, // archivist, etc.), no delegation tools are synthesised — the diff --git a/src/openhuman/agent/harness/tool_loop.rs b/src/openhuman/agent/harness/tool_loop.rs index f10dda515..b4f9b9473 100644 --- a/src/openhuman/agent/harness/tool_loop.rs +++ b/src/openhuman/agent/harness/tool_loop.rs @@ -87,10 +87,11 @@ pub(crate) async fn agent_turn( /// /// * `extra_tools` — per-turn synthesised tools to splice alongside the /// persistent `tools_registry`. The agent-dispatch path uses this to -/// surface delegation tools (`research`, `delegate_gmail`, …) that -/// are synthesised fresh per turn from the active agent's -/// `subagents` field and the current Composio integration list, and -/// therefore are not registered in the global startup-time registry. +/// surface delegation tools (`research`, `plan`, +/// `delegate_to_integrations_agent`, …) that are synthesised fresh +/// per turn from the active agent's `subagents` field and the +/// current Composio integration list, and therefore are not +/// registered in the global startup-time registry. /// /// The combined tool list seen by the LLM this turn is /// `tools_registry.iter().chain(extra_tools.iter())`, further narrowed diff --git a/src/openhuman/channels/runtime/dispatch.rs b/src/openhuman/channels/runtime/dispatch.rs index 804fcc5f0..12cdb0e49 100644 --- a/src/openhuman/channels/runtime/dispatch.rs +++ b/src/openhuman/channels/runtime/dispatch.rs @@ -499,7 +499,8 @@ async fn resolve_target_agent(channel: &str) -> AgentScoping { /// * every tool name in the agent's `[tools] named = [...]` list /// (when the scope is [`ToolScope::Named`]); and /// * every name produced by the per-turn synthesised delegation tools -/// in `extra_tools` (e.g. `research`, `delegate_gmail`). +/// in `extra_tools` (e.g. `research`, `plan`, +/// `delegate_to_integrations_agent`). /// /// When the agent's tool scope is [`ToolScope::Wildcard`] **and** there /// are no `extra_tools`, returns `None` to preserve the legacy @@ -632,9 +633,11 @@ mod scoping_tests { /// `ToolScope::Named` with extras returns the union of the TOML /// named list and the extras' names. This is the orchestrator's - /// path: 4 direct tools from the TOML + N synthesised delegation - /// tools (`research`, `plan`, `delegate_gmail`, …) → all of them - /// visible to the orchestrator's LLM. + /// path: direct tools from the TOML + the synthesised delegation + /// tools (`research`, `plan`, `delegate_to_integrations_agent`) + /// → all of them visible to the orchestrator's LLM. The stub + /// names in this test are arbitrary; they exercise the union + /// logic, not the real synthesiser. #[test] fn named_scope_with_extras_returns_union() { let def = def_with_scope(ToolScope::Named(vec![ diff --git a/src/openhuman/tools/impl/agent/dispatch.rs b/src/openhuman/tools/impl/agent/dispatch.rs index 7ac382f5d..7db9e6121 100644 --- a/src/openhuman/tools/impl/agent/dispatch.rs +++ b/src/openhuman/tools/impl/agent/dispatch.rs @@ -54,9 +54,10 @@ pub(crate) async fn dispatch_subagent( ); // Propagate the per-call toolkit scope into the subagent runner so - // that `SkillDelegationTool`s can narrow `integrations_agent` to a single - // Composio toolkit (e.g. `delegate_gmail` → integrations_agent + - // toolkit="gmail"). Earlier code plumbed this through + // that the collapsed `SkillDelegationTool` can narrow + // `integrations_agent` to a single Composio toolkit (e.g. + // `delegate_to_integrations_agent { toolkit: "gmail" }` → + // integrations_agent + toolkit="gmail"). Earlier code plumbed this through // `skill_filter_override` (which matches `{skill}__` QuickJS-style // names), but Composio actions are named `GMAIL_*` / `NOTION_*` — // so the filter excluded every Composio tool instead of narrowing diff --git a/src/openhuman/tools/impl/agent/skill_delegation.rs b/src/openhuman/tools/impl/agent/skill_delegation.rs index f9c761364..c109f37a7 100644 --- a/src/openhuman/tools/impl/agent/skill_delegation.rs +++ b/src/openhuman/tools/impl/agent/skill_delegation.rs @@ -1,14 +1,85 @@ +//! Single collapsed delegation tool for Composio-backed integrations +//! (#1335). +//! +//! Replaces the previous per-toolkit fan-out where the orchestrator's +//! function-calling schema gained a new `delegate_` entry for +//! every connected integration. Every one of those tools dispatched to +//! the same `integrations_agent` with a different `skill_filter`, so +//! exposing them separately bloated the orchestrator's tool list +//! linearly with no behavioural benefit. +//! +//! The collapsed tool keeps the routing handle the orchestrator needs +//! ("send this to integrations, scoped to toolkit X") while making the +//! orchestrator's schema cost constant in the integration dimension. +//! +//! The list of connected toolkits is rendered inline in the tool +//! description so the orchestrator still discovers which integrations +//! are available without each one being its own schema entry. + use async_trait::async_trait; use serde_json::json; +use crate::openhuman::tools::orchestrator_tools::sanitise_slug; use crate::openhuman::tools::traits::{PermissionLevel, Tool, ToolCategory, ToolResult}; +/// Canonical tool name surfaced to the orchestrator LLM. +pub const INTEGRATIONS_DELEGATE_TOOL_NAME: &str = "delegate_to_integrations_agent"; + +/// Single collapsed delegation tool for all connected Composio toolkits. +/// +/// Carries the slugs + one-line descriptions of every connected toolkit +/// so the tool's `description()` (which is what the orchestrator's LLM +/// sees) enumerates the routing choices without needing N tools to +/// represent them. pub struct SkillDelegationTool { pub tool_name: String, - pub skill_id: String, + /// `(slug, description)` for every currently-connected toolkit. + /// `slug` is already `sanitise_slug`'d so it can be matched against + /// the LLM-provided `toolkit` argument with a plain `==`. + pub connected_toolkits: Vec<(String, String)>, pub tool_description: String, } +impl SkillDelegationTool { + /// Build the canonical collapsed tool from the connected-toolkit + /// list. Returns `None` when there are zero connected toolkits — + /// callers in `collect_orchestrator_tools` interpret that as "don't + /// expose any integrations delegation surface at all", which is the + /// right thing to do because the orchestrator can't usefully route + /// to an empty set. + pub fn for_connected(connected: Vec<(String, String)>) -> Option { + if connected.is_empty() { + return None; + } + let description = build_description(&connected); + Some(Self { + tool_name: INTEGRATIONS_DELEGATE_TOOL_NAME.to_string(), + connected_toolkits: connected, + tool_description: description, + }) + } +} + +fn build_description(connected: &[(String, String)]) -> String { + let mut buf = String::from( + "Use only when direct response/direct tools are insufficient and the task truly \ + requires external integration actions. Routes the work to the integrations_agent \ + with the named toolkit pre-selected. Required argument `toolkit` must be one of \ + the currently-connected slugs below; pass the user's task verbatim as `prompt`. \ + Connected toolkits:", + ); + for (slug, desc) in connected { + buf.push_str("\n - "); + buf.push_str(slug); + let trimmed = desc.trim(); + if !trimmed.is_empty() { + buf.push_str(": "); + buf.push_str(trimmed); + } + } + buf +} + #[async_trait] impl Tool for SkillDelegationTool { fn name(&self) -> &str { @@ -20,10 +91,21 @@ impl Tool for SkillDelegationTool { } fn parameters_schema(&self) -> serde_json::Value { + let slugs: Vec<&str> = self + .connected_toolkits + .iter() + .map(|(slug, _)| slug.as_str()) + .collect(); json!({ "type": "object", - "required": ["prompt"], + "required": ["toolkit", "prompt"], "properties": { + "toolkit": { + "type": "string", + "enum": slugs, + "description": "Composio toolkit slug to route to (e.g. `gmail`, `notion`). \ + Must match one of the connected toolkits enumerated in this tool's description." + }, "prompt": { "type": "string", "description": "Clear instruction for what to do. Include all relevant context — the sub-agent has no memory of your conversation." @@ -41,26 +123,225 @@ impl Tool for SkillDelegationTool { } async fn execute(&self, args: serde_json::Value) -> anyhow::Result { + let raw_toolkit = args + .get("toolkit") + .and_then(|v| v.as_str()) + .unwrap_or("") + .trim() + .to_string(); + log::debug!( + "[skill-delegation] execute start tool='{}' raw_toolkit={:?} prompt_chars={}", + self.tool_name, + raw_toolkit, + args.get("prompt") + .and_then(|v| v.as_str()) + .map(|s| s.chars().count()) + .unwrap_or(0) + ); + if raw_toolkit.is_empty() { + log::debug!( + "[skill-delegation] reject: missing `toolkit` argument for tool='{}'", + self.tool_name + ); + return Ok(ToolResult::error(format!( + "{}: `toolkit` is required and must match a connected integration slug", + self.tool_name + ))); + } + let slug = sanitise_slug(&raw_toolkit); + let known = self + .connected_toolkits + .iter() + .any(|(known_slug, _)| known_slug == &slug); + if !known { + let allowed: Vec<&str> = self + .connected_toolkits + .iter() + .map(|(slug, _)| slug.as_str()) + .collect(); + log::debug!( + "[skill-delegation] reject: toolkit '{}' (sanitised='{}') not in connected set {:?}", + raw_toolkit, + slug, + allowed + ); + return Ok(ToolResult::error(format!( + "{}: toolkit `{raw_toolkit}` is not connected — allowed: [{}]", + self.tool_name, + allowed.join(", ") + ))); + } + let prompt = args .get("prompt") .and_then(|v| v.as_str()) .unwrap_or("") .trim() .to_string(); - if prompt.is_empty() { + log::debug!( + "[skill-delegation] reject: empty `prompt` for tool='{}' toolkit='{}'", + self.tool_name, + slug + ); return Ok(ToolResult::error(format!( "{}: `prompt` is required", self.tool_name ))); } - super::dispatch_subagent( - "integrations_agent", - &self.tool_name, - &prompt, - Some(&self.skill_id), - ) - .await + log::debug!( + "[skill-delegation] dispatching toolkit='{}' to integrations_agent (prompt_chars={})", + slug, + prompt.chars().count() + ); + super::dispatch_subagent("integrations_agent", &self.tool_name, &prompt, Some(&slug)).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn for_connected_returns_none_when_no_toolkits() { + assert!(SkillDelegationTool::for_connected(vec![]).is_none()); + } + + #[test] + fn for_connected_uses_canonical_tool_name() { + let tool = SkillDelegationTool::for_connected(vec![( + "gmail".to_string(), + "Email access.".to_string(), + )]) + .unwrap(); + assert_eq!(tool.name(), INTEGRATIONS_DELEGATE_TOOL_NAME); + assert_eq!(tool.name(), "delegate_to_integrations_agent"); + } + + #[test] + fn description_enumerates_connected_toolkits() { + let tool = SkillDelegationTool::for_connected(vec![ + ("gmail".to_string(), "Email access.".to_string()), + ("notion".to_string(), "Pages and databases.".to_string()), + ]) + .unwrap(); + let desc = tool.description(); + assert!(desc.contains("gmail")); + assert!(desc.contains("notion")); + assert!(desc.contains("Email access.")); + assert!(desc.contains("Pages and databases.")); + } + + #[test] + fn parameters_schema_enforces_toolkit_enum_against_connected_slugs() { + let tool = SkillDelegationTool::for_connected(vec![ + ("gmail".to_string(), "Email.".to_string()), + ("notion".to_string(), "Docs.".to_string()), + ]) + .unwrap(); + let schema = tool.parameters_schema(); + let enum_vals = schema["properties"]["toolkit"]["enum"] + .as_array() + .expect("toolkit enum is an array"); + let collected: Vec<&str> = enum_vals.iter().map(|v| v.as_str().unwrap()).collect(); + assert_eq!(collected, vec!["gmail", "notion"]); + + let required = schema["required"].as_array().expect("required is an array"); + let required: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect(); + assert!(required.contains(&"toolkit")); + assert!(required.contains(&"prompt")); + } + + #[tokio::test] + async fn execute_rejects_missing_toolkit_argument() { + let tool = + SkillDelegationTool::for_connected(vec![("gmail".to_string(), "Email.".to_string())]) + .unwrap(); + let result = tool.execute(json!({"prompt": "x"})).await.unwrap(); + assert!(result.is_error); + assert!(result.output().contains("toolkit")); + } + + #[tokio::test] + async fn execute_rejects_unknown_toolkit_with_allowed_list() { + let tool = SkillDelegationTool::for_connected(vec![ + ("gmail".to_string(), "Email.".to_string()), + ("notion".to_string(), "Docs.".to_string()), + ]) + .unwrap(); + let result = tool + .execute(json!({"toolkit": "slack", "prompt": "hi"})) + .await + .unwrap(); + assert!(result.is_error); + let body = result.output(); + assert!(body.contains("slack")); + assert!(body.contains("gmail")); + assert!(body.contains("notion")); + } + + #[tokio::test] + async fn execute_rejects_empty_prompt() { + let tool = + SkillDelegationTool::for_connected(vec![("gmail".to_string(), "Email.".to_string())]) + .unwrap(); + let result = tool + .execute(json!({"toolkit": "gmail", "prompt": " "})) + .await + .unwrap(); + assert!(result.is_error); + assert!(result.output().contains("prompt")); + } + + #[tokio::test] + async fn execute_normalises_toolkit_input_before_matching() { + // Mixed-case + odd-character user input must collapse onto the + // canonical slug before the connectedness check fires. + let tool = SkillDelegationTool::for_connected(vec![( + "google_calendar".to_string(), + "Calendar.".to_string(), + )]) + .unwrap(); + // "GMail" sanitises to `gmail` — NOT in the connected set, so it + // must be rejected with the unknown-toolkit message that + // enumerates the allowed slugs. + let bad = tool + .execute(json!({"toolkit": "GMail", "prompt": "x"})) + .await + .unwrap(); + assert!(bad.is_error); + let bad_body = bad.output(); + assert!( + bad_body.contains("not connected"), + "expected unknown-toolkit error path, got: {bad_body}" + ); + assert!(bad_body.contains("google_calendar")); + + // "Google-Calendar" sanitises to `google_calendar`, which IS in + // the connected set, so the toolkit gate must let it through. + // Dispatch will then fail because no agent registry is wired up + // in this unit-test process — but the error must NOT be the + // unknown-toolkit branch, because that branch was supposed to + // be bypassed by the slug normalisation. + let ok = tool + .execute(json!({"toolkit": "Google-Calendar", "prompt": "do thing"})) + .await; + match ok { + Ok(result) => { + let body = result.output(); + assert!( + !body.contains("not connected"), + "normalised slug should pass the toolkit gate, got: {body}" + ); + } + Err(err) => { + let msg = err.to_string(); + assert!( + !msg.contains("not connected"), + "normalised slug should pass the toolkit gate, got: {msg}" + ); + } + } } } diff --git a/src/openhuman/tools/orchestrator_tools.rs b/src/openhuman/tools/orchestrator_tools.rs index 77e162861..c61ed81ea 100644 --- a/src/openhuman/tools/orchestrator_tools.rs +++ b/src/openhuman/tools/orchestrator_tools.rs @@ -3,10 +3,16 @@ //! The orchestrator agent is direct-first and only delegates specialised //! work. Rather than exposing a single generic //! `spawn_subagent(agent_id, prompt)` mega-tool, we synthesise one named -//! tool per entry in the orchestrator's `subagents = [...]` TOML field, -//! so the LLM's function-calling schema contains discoverable, well-named -//! tools like `research`, `plan`, `run_code`, `delegate_gmail`, -//! `delegate_github`, etc. +//! tool per [`SubagentEntry::AgentId`] in the orchestrator's +//! `subagents = [...]` TOML field, so the LLM's function-calling schema +//! contains discoverable, well-named tools like `research`, `plan`, +//! `run_code`, etc. +//! +//! For [`SubagentEntry::Skills`] wildcard expansions (#1335) we synthesise +//! a single collapsed `delegate_to_integrations_agent` tool that takes the +//! toolkit slug as an argument — keeping the orchestrator's schema cost +//! constant in the integration dimension instead of scaling with the +//! number of connected toolkits. //! //! Each synthesised tool's description is pulled live from the target //! agent's [`AgentDefinition::when_to_use`] (for @@ -41,18 +47,23 @@ use super::{ArchetypeDelegationTool, SkillDelegationTool, SpawnWorkerThreadTool, /// `when_to_use` — so editing an agent's TOML description immediately /// updates the tool schema the orchestrator LLM sees, with zero drift. /// -/// Each [`SubagentEntry::Skills`] wildcard expands to one -/// [`SkillDelegationTool`] per connected Composio integration in -/// `connected_integrations`. The synthesised tool routes to the generic -/// `integrations_agent` with `skill_filter = Some("{toolkit_slug}")` pre-set. +/// Each [`SubagentEntry::Skills`] wildcard expands to a single +/// collapsed [`SkillDelegationTool`] named +/// `delegate_to_integrations_agent` whose `toolkit` argument selects +/// among the slugs of every connected Composio integration in +/// `connected_integrations`. The tool routes to the generic +/// `integrations_agent` with the chosen toolkit's slug passed as +/// `skill_filter`. The collapsed form keeps the orchestrator's +/// function-calling schema constant in the integration dimension +/// (#1335). /// /// Entries that reference unknown agent ids (not in the registry) are /// logged at `warn` and skipped — the orchestrator still builds, just /// without the broken delegation. Entries that reference Skills wildcards /// with an empty `connected_integrations` slice produce zero tools, which /// is the correct behaviour when the user has not yet connected any -/// integrations (the LLM should not see phantom `delegate_gmail` tools -/// for unconnected toolkits). +/// integrations (the LLM should not see a `delegate_to_integrations_agent` +/// tool with an empty enum). /// /// Returns an empty Vec when `definition.subagents` is empty — callers /// (notably the builder) handle this by not extending the visible-tool @@ -123,11 +134,27 @@ pub fn collect_orchestrator_tools( ); continue; } + // Collapsed delegation tool (#1335). Previously this loop + // emitted one `delegate_` tool per connected + // integration. Every one of those tools dispatched to the + // same `integrations_agent` with a different `skill_filter`, + // so the fan-out cost the orchestrator schema bytes without + // buying any new routing capability. We now emit at most + // one `delegate_to_integrations_agent` tool that takes the + // toolkit slug as an argument; the description enumerates + // the connected toolkits so the orchestrator still + // discovers which integrations are routable. + // `sanitise_slug` is lossy — `Slack.Bot` and `Slack-Bot` + // both collapse to `slack_bot`. Once the raw id is + // discarded, one upstream integration would silently + // shadow the other. Detect the collision here, drop + // every duplicate after the first, and warn so routing + // stays unambiguous (the first arrival keeps the slug; + // later arrivals are unreachable through this enum and + // safer to omit than silently re-target). + let mut connected: Vec<(String, String)> = Vec::new(); + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); for integration in connected_integrations { - // Only emit a delegate_* tool for integrations that are - // actually connected — exposing unconnected entries would - // let the orchestrator call a tool whose pre-flight - // will immediately reject with "not connected". if !integration.connected { log::debug!( "[orchestrator_tools] skipping unconnected integration: {}", @@ -135,38 +162,50 @@ pub fn collect_orchestrator_tools( ); continue; } - // Slug the toolkit name into a tool-name-safe form. - // Composio toolkit slugs are already lowercase / dash- - // separated (e.g. "gmail", "google_calendar"), but - // we guard against surprises so a quirky slug can - // never produce an invalid function-calling schema. + // Slug the toolkit name into a tool-name-safe + // (and argument-safe) form so the LLM-facing + // enum stays predictable across odd toolkit + // names (dashes, dots, spaces, mixed case). let slug = sanitise_slug(&integration.toolkit); - let tool_name = format!("delegate_{}", slug); - // Prefer the toolkit's own one-line description when - // available; fall back to a generic template so the - // LLM still gets a meaningful tool description even - // on brand-new or poorly-populated toolkits. + if !seen.insert(slug.clone()) { + log::warn!( + "[orchestrator_tools] duplicate sanitised slug '{slug}' from raw \ + toolkit '{raw}' — dropping to keep collapsed delegation routing \ + unambiguous", + raw = integration.toolkit + ); + continue; + } + // Empty integration descriptions otherwise render as a + // bare ` - slug` line in the collapsed tool description, + // which gives the orchestrator LLM no hint about what + // the toolkit actually does. Fall back to the + // generic per-toolkit phrasing the old fan-out path + // used so brand-new or under-populated toolkits stay + // informative. let description = if integration.description.trim().is_empty() { format!( - "Use only when direct response/direct tools are insufficient and the task truly requires external integration actions. Delegate to the integrations_agent with the `{}` integration pre-selected.", + "External integration via {} — see the toolkit docs for available actions.", integration.toolkit ) } else { - format!( - "Use only when direct response/direct tools are insufficient and the task truly requires external integration actions. Delegate to the integrations_agent using `{}`. {}", - integration.toolkit, integration.description - ) + integration.description.clone() }; - log::debug!( - "[orchestrator_tools] registering skill delegation tool: {} -> integrations_agent (skill_filter={})", - tool_name, - slug - ); - tools.push(Box::new(SkillDelegationTool { - tool_name, - skill_id: slug, - tool_description: description, - })); + connected.push((slug, description)); + } + match SkillDelegationTool::for_connected(connected) { + Some(tool) => { + log::debug!( + "[orchestrator_tools] registering collapsed integrations delegation tool ({} toolkits)", + tool.connected_toolkits.len() + ); + tools.push(Box::new(tool)); + } + None => { + log::debug!( + "[orchestrator_tools] no connected integrations — collapsed delegation tool omitted" + ); + } } } } @@ -280,10 +319,10 @@ mod tests { /// Baseline: an orchestrator with 2 AgentId entries + a Skills /// wildcard, against a registry that knows both targets and a /// connected_integrations list with three toolkits, should produce - /// 2 + 3 = 5 delegation tools, each with the expected name and - /// description source. + /// 2 archetype tools + 1 collapsed integrations delegation tool + /// (#1335) — independent of how many integrations are connected. #[test] - fn collects_agentid_entries_and_expands_skills_wildcard() { + fn collects_agentid_entries_and_collapses_skills_wildcard() { let orch = sample_orchestrator(); let reg = registry_with_targets(); let integrations = vec![ @@ -298,33 +337,59 @@ mod tests { assert_eq!( names, vec![ - "spawn_worker_thread", // orchestrator-only, prepended in collect_orchestrator_tools - "research", // researcher's delegate_name override - "delegate_archivist", // archivist has no delegate_name → default - "delegate_gmail", - "delegate_github", - "delegate_notion", + "spawn_worker_thread", // orchestrator-only, prepended in collect_orchestrator_tools + "research", // researcher's delegate_name override + "delegate_archivist", // archivist has no delegate_name → default + "delegate_to_integrations_agent", ], - "tool names should come from delegate_name overrides, id fallbacks, and sanitised toolkit slugs" + "skills wildcard must collapse to a single delegate_to_integrations_agent tool" ); - // Descriptions should come from when_to_use for archetype tools, - // and from a templated string mentioning the toolkit display name - // for skill tools. + // Archetype tool descriptions come from `when_to_use`. let research_tool = tools.iter().find(|t| t.name() == "research").unwrap(); assert!(research_tool.description().contains("crawler")); - let gmail_tool = tools.iter().find(|t| t.name() == "delegate_gmail").unwrap(); - assert!(gmail_tool.description().contains("gmail")); - assert!(gmail_tool.description().contains("email")); + // The collapsed delegation tool enumerates every connected toolkit + // in its description so the orchestrator still discovers what's + // routable. + let delegate_tool = tools + .iter() + .find(|t| t.name() == "delegate_to_integrations_agent") + .unwrap(); + let desc = delegate_tool.description(); + assert!(desc.contains("gmail")); + assert!(desc.contains("github")); + assert!(desc.contains("notion")); + } + + /// The collapsed delegation tool's count is constant in the + /// integration dimension (#1335 primary acceptance criterion). + #[test] + fn collapsed_delegation_tool_count_is_constant_across_integration_counts() { + let orch = sample_orchestrator(); + let reg = registry_with_targets(); + + for n in [1usize, 3, 7, 20] { + let integrations: Vec<_> = (0..n) + .map(|i| integration(&format!("tool{i}"), &format!("Toolkit number {i}."))) + .collect(); + let tools = collect_orchestrator_tools(&orch, ®, &integrations); + let delegation_count = tools + .iter() + .filter(|t| t.name() == "delegate_to_integrations_agent") + .count(); + assert_eq!( + delegation_count, 1, + "expected exactly one collapsed delegation tool for {n} integrations" + ); + } } /// An orchestrator with a Skills wildcard but no connected - /// integrations should produce zero skill delegation tools — the LLM - /// must not be shown phantom `delegate_*` tools for toolkits that - /// aren't authorised. + /// integrations should produce zero integrations delegation tools — + /// the LLM must not be shown a routing handle for an empty set. #[test] - fn skills_wildcard_with_no_integrations_produces_no_tools() { + fn skills_wildcard_with_no_integrations_produces_no_delegation_tool() { let orch = sample_orchestrator(); let reg = registry_with_targets(); let tools = collect_orchestrator_tools(&orch, ®, &[]); @@ -373,12 +438,12 @@ mod tests { assert_eq!(sanitise_slug("weird name!"), "weird_name_"); } - /// Unconnected integrations must be silently skipped — exposing a - /// `delegate_*` tool for a toolkit whose OAuth token is absent would - /// let the orchestrator call a tool whose pre-flight check immediately - /// rejects with "not connected". + /// Unconnected integrations must be silently dropped from the + /// collapsed delegation tool's enum. Otherwise the orchestrator + /// could supply `toolkit = ""` and trigger a pre-flight + /// rejection downstream that says "not connected". #[test] - fn unconnected_integrations_are_skipped() { + fn unconnected_integrations_are_omitted_from_collapsed_tool() { let orch = sample_orchestrator(); let reg = registry_with_targets(); let integrations = vec![ @@ -387,23 +452,124 @@ mod tests { toolkit: "github".into(), description: "GitHub access.".into(), tools: vec![], - connected: false, // not connected — must not produce a tool + connected: false, // not connected — must not appear in the enum }, integration("notion", "Read and write pages."), ]; let tools = collect_orchestrator_tools(&orch, ®, &integrations); - let names: Vec<&str> = tools.iter().map(|t| t.name()).collect(); + let delegate_tool = tools + .iter() + .find(|t| t.name() == "delegate_to_integrations_agent") + .expect( + "collapsed delegation tool must exist when at least one integration is connected", + ); + let desc = delegate_tool.description(); + assert!(desc.contains("gmail")); + assert!(desc.contains("notion")); assert!( - names.contains(&"delegate_gmail"), - "connected gmail must produce a tool" + !desc.contains("github"), + "unconnected github must not leak into the delegation tool description" ); + + let schema = delegate_tool.parameters_schema(); + let enum_vals = schema["properties"]["toolkit"]["enum"] + .as_array() + .expect("toolkit enum must be present"); + let slugs: Vec<&str> = enum_vals.iter().map(|v| v.as_str().unwrap()).collect(); + assert_eq!(slugs, vec!["gmail", "notion"]); + } + + /// Quirky toolkit slugs (dashes, mixed case) must be canonicalised + /// before they land in the collapsed tool's enum so the + /// LLM-provided argument can be matched with `==` rather than a + /// fuzzy comparison. + #[test] + fn collapsed_tool_enum_uses_sanitised_slugs() { + let mut orch = def("orchestrator", "t", None); + orch.subagents = vec![SubagentEntry::Skills(SkillsWildcard { skills: "*".into() })]; + let reg = registry_with_targets(); + let integrations = vec![ + integration("Google-Calendar", "Calendar."), + integration("Slack.Bot", "Chat."), + ]; + let tools = collect_orchestrator_tools(&orch, ®, &integrations); + let delegate_tool = tools + .iter() + .find(|t| t.name() == "delegate_to_integrations_agent") + .expect("collapsed tool present"); + let schema = delegate_tool.parameters_schema(); + let enum_vals = schema["properties"]["toolkit"]["enum"].as_array().unwrap(); + let slugs: Vec<&str> = enum_vals.iter().map(|v| v.as_str().unwrap()).collect(); + assert_eq!(slugs, vec!["google_calendar", "slack_bot"]); + } + + /// Two upstream toolkits whose names sanitise to the same slug + /// must not silently both land in the collapsed enum — the second + /// arrival is dropped (with a warn log) so the orchestrator's + /// routing handle stays unambiguous. Without this guard, + /// `Slack.Bot` and `Slack-Bot` would both render as `slack_bot` + /// in the enum and the orchestrator could no longer distinguish + /// them. + /// An integration with an empty description must not render as a + /// bare ` - slug` line in the collapsed tool description — the + /// orchestrator LLM would have no signal about what the toolkit + /// does. The synthesiser falls back to a generic descriptive + /// phrase keyed on the raw toolkit name. + #[test] + fn empty_integration_description_falls_back_to_generic_label() { + let mut orch = def("orchestrator", "t", None); + orch.subagents = vec![SubagentEntry::Skills(SkillsWildcard { skills: "*".into() })]; + let reg = registry_with_targets(); + let integrations = vec![ + ConnectedIntegration { + toolkit: "Brand.New".into(), + description: " ".into(), + tools: vec![], + connected: true, + }, + integration("gmail", "Email."), + ]; + let tools = collect_orchestrator_tools(&orch, ®, &integrations); + let delegate_tool = tools + .iter() + .find(|t| t.name() == "delegate_to_integrations_agent") + .expect("collapsed tool present"); + let desc = delegate_tool.description(); assert!( - !names.contains(&"delegate_github"), - "unconnected github must NOT produce a tool" + desc.contains("External integration via Brand.New"), + "expected fallback phrasing, got: {desc}" ); - assert!( - names.contains(&"delegate_notion"), - "connected notion must produce a tool" + assert!(desc.contains("Email.")); + } + + #[test] + fn duplicate_sanitised_slug_drops_later_collisions() { + let mut orch = def("orchestrator", "t", None); + orch.subagents = vec![SubagentEntry::Skills(SkillsWildcard { skills: "*".into() })]; + let reg = registry_with_targets(); + let integrations = vec![ + integration("Slack.Bot", "First slack."), + integration("Slack-Bot", "Second slack — must be dropped."), + integration("Notion", "Pages."), + ]; + let tools = collect_orchestrator_tools(&orch, ®, &integrations); + let delegate_tool = tools + .iter() + .find(|t| t.name() == "delegate_to_integrations_agent") + .expect("collapsed tool present"); + let schema = delegate_tool.parameters_schema(); + let enum_vals = schema["properties"]["toolkit"]["enum"].as_array().unwrap(); + let slugs: Vec<&str> = enum_vals.iter().map(|v| v.as_str().unwrap()).collect(); + assert_eq!( + slugs, + vec!["slack_bot", "notion"], + "second slack_bot collision must be dropped, not silently shadowed" ); + // The dropped description must not appear in the tool description + // either — otherwise the orchestrator would think there's a route + // it can't actually distinguish. + let desc = delegate_tool.description(); + assert!(desc.contains("First slack.")); + assert!(!desc.contains("Second slack")); } }