Skip to content

Commit 7b98a66

Browse files
Merge pull request #5656 from aden-hive/feature/queen-worker-comm
Feature/queen worker comm
2 parents 4cbf5a7 + 26481e2 commit 7b98a66

File tree

78 files changed

+10960
-1302
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+10960
-1302
lines changed

core/framework/agents/hive_coder/nodes/__init__.py

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,38 @@
77
# Load reference docs at import time so they're always in the system prompt.
88
# No voluntary read_file() calls needed — the LLM gets everything upfront.
99
_ref_dir = Path(__file__).parent.parent / "reference"
10-
_framework_guide = (_ref_dir / "framework_guide.md").read_text(encoding="utf-8")
11-
_file_templates = (_ref_dir / "file_templates.md").read_text(encoding="utf-8")
12-
_anti_patterns = (_ref_dir / "anti_patterns.md").read_text(encoding="utf-8")
10+
_framework_guide = (_ref_dir / "framework_guide.md").read_text()
11+
_file_templates = (_ref_dir / "file_templates.md").read_text()
12+
_anti_patterns = (_ref_dir / "anti_patterns.md").read_text()
13+
_gcu_guide_path = _ref_dir / "gcu_guide.md"
14+
_gcu_guide = _gcu_guide_path.read_text() if _gcu_guide_path.exists() else ""
15+
16+
17+
def _is_gcu_enabled() -> bool:
18+
try:
19+
from framework.config import get_gcu_enabled
20+
21+
return get_gcu_enabled()
22+
except Exception:
23+
return False
24+
25+
26+
def _build_appendices() -> str:
27+
parts = (
28+
"\n\n# Appendix: Framework Reference\n\n"
29+
+ _framework_guide
30+
+ "\n\n# Appendix: File Templates\n\n"
31+
+ _file_templates
32+
+ "\n\n# Appendix: Anti-Patterns\n\n"
33+
+ _anti_patterns
34+
)
35+
if _is_gcu_enabled() and _gcu_guide:
36+
parts += "\n\n# Appendix: GCU Browser Automation Guide\n\n" + _gcu_guide
37+
return parts
38+
1339

1440
# Shared appendices — appended to every coding node's system prompt.
15-
_appendices = (
16-
"\n\n# Appendix: Framework Reference\n\n"
17-
+ _framework_guide
18-
+ "\n\n# Appendix: File Templates\n\n"
19-
+ _file_templates
20-
+ "\n\n# Appendix: Anti-Patterns\n\n"
21-
+ _anti_patterns
22-
)
41+
_appendices = _build_appendices()
2342

2443
# Tools available to both coder (worker) and queen.
2544
_SHARED_TOOLS = [
@@ -391,7 +410,10 @@
391410
**Node rules**:
392411
- **2-4 nodes MAX.** Never exceed 4. Merge thin nodes aggressively.
393412
- A node with 0 tools is NOT a real node — merge it.
394-
- node_type always "event_loop"
413+
- node_type "event_loop" for all regular graph nodes. Use "gcu" ONLY for
414+
browser automation subagents (see GCU appendix). GCU nodes MUST be in a
415+
parent node's sub_agents list, NEVER connected via edges, and NEVER used
416+
as entry/terminal nodes.
395417
- max_node_visits default is 0 (unbounded) — correct for forever-alive. \
396418
Only set >0 in one-shot agents with bounded feedback loops.
397419
- Feedback inputs: nullable_output_keys
@@ -539,6 +561,11 @@
539561
this session. If a worker is already loaded, it is automatically unloaded \
540562
first. Call after building and validating an agent to make it available \
541563
immediately.
564+
565+
## Credentials
566+
- list_credentials(credential_id?) — List all authorized credentials in the \
567+
local store. Returns IDs, aliases, status, and identity metadata (never \
568+
secrets). Optionally filter by credential_id.
542569
"""
543570

544571
_queen_behavior = """
@@ -589,14 +616,29 @@
589616
- For tasks matching the worker's goal, call start_worker(task).
590617
- For everything else, do it directly.
591618
619+
## When the user clicks Run (external event notification)
620+
When you receive an event that the user clicked Run:
621+
- If the worker started successfully, briefly acknowledge it — do NOT \
622+
repeat the full status. The user can see the graph is running.
623+
- If the worker failed to start (credential or structural error), \
624+
explain the problem clearly and help fix it. For credential errors, \
625+
guide the user to set up the missing credentials. For structural \
626+
issues, offer to fix the agent graph directly.
627+
592628
## When worker is running:
593-
- If the user asks about progress, call get_worker_status().
629+
- If the user asks about progress, call get_worker_status() ONCE and \
630+
report the result. Do NOT poll in a loop.
631+
- NEVER call get_worker_status() repeatedly without user input in between. \
632+
The worker will surface results through client-facing nodes. You do not \
633+
need to monitor it. One check per user request is enough.
594634
- If the user has a concern or instruction for the worker, call \
595635
inject_worker_message(content) to relay it.
596636
- You can still do coding tasks directly while the worker runs.
597637
- If an escalation ticket arrives from the judge, assess severity:
598638
- Low/transient: acknowledge silently, do not disturb the user.
599639
- High/critical: notify the user with a brief analysis and suggested action.
640+
- After starting the worker or checking its status, WAIT for the user's \
641+
next message. Do not take autonomous actions unless the user asks.
600642
601643
## When worker asks user a question:
602644
- The system will route the user's response directly to the worker. \
@@ -778,6 +820,8 @@
778820
"notify_operator",
779821
# Agent loading
780822
"load_built_agent",
823+
# Credentials
824+
"list_credentials",
781825
],
782826
system_prompt=(
783827
"You are the Queen — the user's primary interface. You are a coding agent "
@@ -803,6 +847,8 @@
803847
"notify_operator",
804848
# Agent loading
805849
"load_built_agent",
850+
# Credentials
851+
"list_credentials",
806852
]
807853

808854
__all__ = [

core/framework/agents/hive_coder/reference/anti_patterns.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,7 @@ def test_research_routes_back_to_interact(self):
105105
23. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
106106

107107
24. **Not using auto_responder for client-facing nodes** — Tests with client-facing nodes hang without an auto-responder that injects input. But note: even WITH auto_responder, forever-alive agents still hang because the graph never terminates. Auto-responder only helps for agents with terminal nodes.
108+
109+
25. **Manually wiring browser tools on event_loop nodes** — If the agent needs browser automation, use `node_type="gcu"` which auto-includes all browser tools and prepends best-practices guidance. Do NOT manually list browser tool names on event_loop nodes — they may not exist in the MCP server or may be incomplete. See the GCU Guide appendix.
110+
111+
26. **Using GCU nodes as regular graph nodes** — GCU nodes (`node_type="gcu"`) are exclusively subagents. They must ONLY appear in a parent node's `sub_agents=["gcu-node-id"]` list and be invoked via `delegate_to_sub_agent()`. They must NEVER be connected via edges, used as entry nodes, or used as terminal nodes. If a GCU node appears as an edge source or target, the graph will fail pre-load validation.

core/framework/agents/hive_coder/reference/framework_guide.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ goal = Goal(
7272
| id | str | required | kebab-case identifier |
7373
| name | str | required | Display name |
7474
| description | str | required | What the node does |
75-
| node_type | str | required | Always `"event_loop"` |
75+
| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
7676
| input_keys | list[str] | required | Memory keys this node reads |
7777
| output_keys | list[str] | required | Memory keys this node writes via set_output |
7878
| system_prompt | str | "" | LLM instructions |
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# GCU Browser Automation Guide
2+
3+
## When to Use GCU Nodes
4+
5+
Use `node_type="gcu"` when:
6+
- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
7+
- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
8+
- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
9+
10+
Do NOT use GCU for:
11+
- Static content that `web_scrape` handles fine
12+
- API-accessible data (use the API directly)
13+
- PDF/file processing
14+
- Anything that doesn't require a browser UI
15+
16+
## What GCU Nodes Are
17+
18+
- `node_type="gcu"` — a declarative enhancement over `event_loop`
19+
- Framework auto-prepends browser best-practices system prompt
20+
- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
21+
- Same underlying `EventLoopNode` class — no new imports needed
22+
- `tools=[]` is correct — tools are auto-populated at runtime
23+
24+
## GCU Architecture Pattern
25+
26+
GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
27+
28+
- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
29+
- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
30+
- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
31+
- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
32+
33+
## GCU Node Definition Template
34+
35+
```python
36+
gcu_browser_node = NodeSpec(
37+
id="gcu-browser-worker",
38+
name="Browser Worker",
39+
description="Browser subagent that does X.",
40+
node_type="gcu",
41+
client_facing=False,
42+
max_node_visits=1,
43+
input_keys=[],
44+
output_keys=["result"],
45+
tools=[], # Auto-populated with all browser tools
46+
system_prompt="""\
47+
You are a browser agent. Your job: [specific task].
48+
49+
## Workflow
50+
1. browser_start (only if no browser is running yet)
51+
2. browser_open(url=TARGET_URL) — note the returned targetId
52+
3. browser_snapshot to read the page
53+
4. [task-specific steps]
54+
5. set_output("result", JSON)
55+
56+
## Output format
57+
set_output("result", JSON) with:
58+
- [field]: [type and description]
59+
""",
60+
)
61+
```
62+
63+
## Parent Node Template (orchestrating GCU subagents)
64+
65+
```python
66+
orchestrator_node = NodeSpec(
67+
id="orchestrator",
68+
...
69+
node_type="event_loop",
70+
sub_agents=["gcu-browser-worker"],
71+
system_prompt="""\
72+
...
73+
delegate_to_sub_agent(
74+
agent_id="gcu-browser-worker",
75+
task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
76+
)
77+
...
78+
""",
79+
tools=[], # Orchestrator doesn't need browser tools
80+
)
81+
```
82+
83+
## mcp_servers.json with GCU
84+
85+
```json
86+
{
87+
"hive-tools": { ... },
88+
"gcu-tools": {
89+
"transport": "stdio",
90+
"command": "uv",
91+
"args": ["run", "python", "-m", "gcu.server", "--stdio"],
92+
"cwd": "../../tools",
93+
"description": "GCU tools for browser automation"
94+
}
95+
}
96+
```
97+
98+
Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
99+
100+
## GCU System Prompt Best Practices
101+
102+
Key rules to bake into GCU node prompts:
103+
104+
- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
105+
- Always `browser_wait` after navigation
106+
- Use large scroll amounts (~2000-5000) for lazy-loaded content
107+
- For spillover files, use `run_command` with grep, not `read_file`
108+
- If auth wall detected, report immediately — don't attempt login
109+
- Keep tool calls per turn ≤10
110+
- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
111+
112+
## GCU Anti-Patterns
113+
114+
- Using `browser_screenshot` to read text (use `browser_snapshot`)
115+
- Re-navigating after scrolling (resets scroll position)
116+
- Attempting login on auth walls
117+
- Forgetting `target_id` in multi-tab scenarios
118+
- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
119+
- Making GCU nodes `client_facing=True` (they should be autonomous subagents)

core/framework/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ def get_api_key() -> str | None:
9090
return None
9191

9292

93+
def get_gcu_enabled() -> bool:
94+
"""Return whether GCU (browser automation) is enabled in user config."""
95+
return get_hive_config().get("gcu_enabled", False)
96+
97+
9398
def get_api_base() -> str | None:
9499
"""Return the api_base URL for OpenAI-compatible endpoints, if configured."""
95100
llm = get_hive_config().get("llm", {})

core/framework/credentials/validation.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,7 @@ def format_error_message(self) -> str:
159159
f" {c.env_var} for {_label(c)}"
160160
f"\n Connect this integration at hive.adenhq.com first."
161161
)
162-
lines.append(
163-
"\nTo fix: run /hive-credentials in Claude Code."
164-
"\nIf you've already set up credentials, "
165-
"restart your terminal to load them."
166-
)
162+
lines.append("\nIf you've already set up credentials, restart your terminal to load them.")
167163
return "\n".join(lines)
168164

169165

core/framework/graph/conversation.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,38 @@ def _extract_spillover_filename(content: str) -> str | None:
107107
def _compact_tool_calls(tool_calls: list[dict[str, Any]]) -> list[dict[str, Any]]:
108108
"""Truncate tool_call arguments to save context tokens during compaction.
109109
110-
Preserves ``id``, ``type``, and ``function.name`` exactly. Truncates
111-
``function.arguments`` (a JSON string) to at most ``_TC_ARG_LIMIT`` chars
112-
so that large payloads (e.g. set_output with full findings) don't survive
113-
compaction and defeat the purpose of context reduction.
110+
Preserves ``id``, ``type``, and ``function.name`` exactly. When arguments
111+
exceed ``_TC_ARG_LIMIT``, replaces the full JSON string with a compact
112+
**valid** JSON summary. The Anthropic API parses tool_call arguments and
113+
rejects requests with malformed JSON (e.g. unterminated strings), so we
114+
must never produce broken JSON here.
114115
"""
115116
compact = []
116117
for tc in tool_calls:
117118
func = tc.get("function", {})
118119
args = func.get("arguments", "")
119120
if len(args) > _TC_ARG_LIMIT:
120-
args = args[:_TC_ARG_LIMIT] + "…[truncated]"
121+
# Build a valid JSON summary instead of slicing mid-string.
122+
# Try to extract top-level keys for a meaningful preview.
123+
try:
124+
parsed = json.loads(args)
125+
if isinstance(parsed, dict):
126+
# Preserve key names, truncate values
127+
summary_parts = []
128+
for k, v in parsed.items():
129+
v_str = str(v)
130+
if len(v_str) > 60:
131+
v_str = v_str[:60] + "..."
132+
summary_parts.append(f"{k}={v_str}")
133+
summary = ", ".join(summary_parts)
134+
if len(summary) > _TC_ARG_LIMIT:
135+
summary = summary[:_TC_ARG_LIMIT] + "..."
136+
args = json.dumps({"_compacted": summary})
137+
else:
138+
args = json.dumps({"_compacted": str(parsed)[:_TC_ARG_LIMIT]})
139+
except (json.JSONDecodeError, TypeError):
140+
# Args were already invalid JSON — wrap the preview safely
141+
args = json.dumps({"_compacted": args[:_TC_ARG_LIMIT]})
121142
compact.append(
122143
{
123144
"id": tc.get("id", ""),

core/framework/graph/conversation_judge.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,12 @@ async def evaluate_phase_completion(
103103

104104

105105
def _extract_recent_context(conversation: NodeConversation, max_messages: int = 10) -> str:
106-
"""Extract recent conversation messages for evaluation."""
106+
"""Extract recent conversation messages for evaluation.
107+
108+
Includes tool-call summaries from assistant messages so the judge
109+
can see what tools were invoked (especially set_output values) even
110+
when the assistant message body is empty.
111+
"""
107112
messages = conversation.messages
108113
recent = messages[-max_messages:] if len(messages) > max_messages else messages
109114

@@ -112,8 +117,24 @@ def _extract_recent_context(conversation: NodeConversation, max_messages: int =
112117
role = msg.role.upper()
113118
content = msg.content or ""
114119
# Truncate long tool results
115-
if msg.role == "tool" and len(content) > 200:
116-
content = content[:200] + "..."
120+
if msg.role == "tool" and len(content) > 500:
121+
content = content[:500] + "..."
122+
# For assistant messages with empty content but tool_calls,
123+
# summarise the tool calls so the judge knows what happened.
124+
if msg.role == "assistant" and not content.strip():
125+
tool_calls = getattr(msg, "tool_calls", None)
126+
if tool_calls:
127+
tc_parts = []
128+
for tc in tool_calls:
129+
fn = tc.get("function", {}) if isinstance(tc, dict) else {}
130+
name = fn.get("name", "")
131+
args = fn.get("arguments", "")
132+
if name == "set_output":
133+
# Show the value so the judge can evaluate content quality
134+
tc_parts.append(f" called {name}({args[:1000]})")
135+
else:
136+
tc_parts.append(f" called {name}(...)")
137+
content = "Tool calls:\n" + "\n".join(tc_parts)
117138
if content.strip():
118139
parts.append(f"[{role}]: {content.strip()}")
119140

@@ -125,6 +146,10 @@ def _format_outputs(accumulator_state: dict[str, Any]) -> str:
125146
126147
Lists and dicts get structural formatting so the judge can assess
127148
quantity and structure, not just a truncated stringification.
149+
150+
String values are given a generous limit (2000 chars) so the judge
151+
can verify substantive content (e.g. a research brief with key
152+
questions, scope boundaries, and deliverables).
128153
"""
129154
if not accumulator_state:
130155
return "(none)"
@@ -144,12 +169,12 @@ def _format_outputs(accumulator_state: dict[str, Any]) -> str:
144169
val_str += f"\n ... and {len(value) - 8} more"
145170
elif isinstance(value, dict):
146171
val_str = str(value)
147-
if len(val_str) > 400:
148-
val_str = val_str[:400] + "..."
172+
if len(val_str) > 2000:
173+
val_str = val_str[:2000] + "..."
149174
else:
150175
val_str = str(value)
151-
if len(val_str) > 300:
152-
val_str = val_str[:300] + "..."
176+
if len(val_str) > 2000:
177+
val_str = val_str[:2000] + "..."
153178
parts.append(f" {key}: {val_str}")
154179
return "\n".join(parts)
155180

0 commit comments

Comments
 (0)