From dc91ed8e39ad785b5fdbb762869eb2ed111d5265 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 11:49:47 +0800 Subject: [PATCH 01/15] feat: local-agent bridge with pluggable runtime adapters Adds a reverse-WebSocket bridge that lets users run local agents (Claude Code, OpenClaw daemon, Hermes) from their own machine while keeping the Clawith platform as the control plane. Server - /ws/bridge endpoint (backend/app/api/bridge_ws.py): per-agent WS channel; bridge advertises available adapters, server dispatches sessions via SessionStartFrame.adapter. - local_agent/ service package: adapter registry, protocol frames, session dispatcher. - Installer endpoint on /agents/{id}/bridge-installer: renders a platform-specific installer with the agent's API key baked in. Windows returns a single pristine clawith-bridge.exe + JSON config trailer (magic CLWB!END); Unix returns a bash script that installs the pip package and registers launchd/systemd. - agent_type='openclaw' now carries a bridge_adapter field (claude_code | openclaw | hermes) chosen at creation time. Server strictly matches the bridge's advertised adapters and fails with a clear error otherwise, instead of silently falling back. Bridge package (bridge/) - clawith_bridge: Python package with three adapters and a reverse-WS session manager. - PyInstaller spec that produces a single clawith-bridge.exe. - baked_config trailer reader: on first launch from the setup.exe, extracts server/token/adapter, runs the install flow, strips the trailer from the installed copy, and registers a user scheduled task. - install_windows.py: copies the exe to %LOCALAPPDATA%/Clawith/bin/, writes ~/.clawith-bridge.toml with only the chosen adapter enabled, registers a user-scope scheduled task that auto-starts at logon. Frontend - AgentCreate: 4 flat runtime cards (Platform Hosted / Claude Code / OpenClaw / Hermes). Non-native cards set agent_type='openclaw' with different bridge_adapter values. - AgentDetail: badge shows "Bridge . . Lab". - OpenClawSettings: bridge installer download block with a readonly Runtime line so users can confirm which adapter their installer will enable. - i18n keys for wizard.runtime.* and wizard.bridge.* in en/zh. Migrations - add_bridge_mode: opt-in flag for bridge-style agents. - add_bridge_adapter: new column, backfills existing openclaw agents to claude_code (matches current de-facto behavior). Build - bridge/clawith-bridge.exe is ignored and rebuilt via pyinstaller clawith-bridge.spec, then copied into backend/app/static/bridge/ before shipping. Co-Authored-By: Claude Opus 4.7 --- .gitignore | 3 + .../alembic/versions/add_bridge_adapter.py | 34 ++ backend/alembic/versions/add_bridge_mode.py | 24 + backend/app/api/agents.py | 100 ++- backend/app/api/bridge_ws.py | 206 +++++++ backend/app/api/gateway.py | 12 + backend/app/api/websocket.py | 147 ++++- backend/app/main.py | 2 + backend/app/models/agent.py | 11 + backend/app/schemas/schemas.py | 4 + backend/app/services/agent_tools.py | 231 +++++++ backend/app/services/local_agent/__init__.py | 9 + backend/app/services/local_agent/adapters.py | 38 ++ .../local_agent/installer_templates.py | 279 +++++++++ backend/app/services/local_agent/protocol.py | 171 ++++++ .../local_agent/session_dispatcher.py | 567 ++++++++++++++++++ backend/app/static/bridge/.gitkeep | 8 + bridge/.gitignore | 10 + bridge/Dockerfile | 29 + bridge/README.md | 165 +++++ bridge/clawith-bridge.spec | 62 ++ bridge/clawith-bridge.toml.example | 24 + bridge/clawith_bridge/__init__.py | 4 + bridge/clawith_bridge/__main__.py | 206 +++++++ bridge/clawith_bridge/adapters/__init__.py | 7 + bridge/clawith_bridge/adapters/base.py | 331 ++++++++++ bridge/clawith_bridge/adapters/claude_code.py | 276 +++++++++ bridge/clawith_bridge/adapters/hermes.py | 80 +++ bridge/clawith_bridge/adapters/openclaw.py | 104 ++++ bridge/clawith_bridge/baked_config.py | 103 ++++ bridge/clawith_bridge/config.py | 132 ++++ bridge/clawith_bridge/connection.py | 183 ++++++ bridge/clawith_bridge/diff_capture.py | 181 ++++++ bridge/clawith_bridge/install_windows.py | 174 ++++++ bridge/clawith_bridge/protocol.py | 151 +++++ bridge/clawith_bridge/py.typed | 0 bridge/clawith_bridge/reverse_tools.py | 59 ++ bridge/clawith_bridge/session_manager.py | 245 ++++++++ bridge/launcher.py | 10 + bridge/pyproject.toml | 33 + bridge/stub_bridge.py | 191 ++++++ frontend/src/i18n/en.json | 19 + frontend/src/i18n/zh.json | 19 + frontend/src/pages/AgentCreate.tsx | 104 +++- frontend/src/pages/AgentDetail.tsx | 24 +- frontend/src/pages/Chat.tsx | 200 +++++- frontend/src/pages/OpenClawSettings.tsx | 322 ++++++++++ frontend/src/types/index.ts | 1 + 48 files changed, 5251 insertions(+), 44 deletions(-) create mode 100644 backend/alembic/versions/add_bridge_adapter.py create mode 100644 backend/alembic/versions/add_bridge_mode.py create mode 100644 backend/app/api/bridge_ws.py create mode 100644 backend/app/services/local_agent/__init__.py create mode 100644 backend/app/services/local_agent/adapters.py create mode 100644 backend/app/services/local_agent/installer_templates.py create mode 100644 backend/app/services/local_agent/protocol.py create mode 100644 backend/app/services/local_agent/session_dispatcher.py create mode 100644 backend/app/static/bridge/.gitkeep create mode 100644 bridge/.gitignore create mode 100644 bridge/Dockerfile create mode 100644 bridge/README.md create mode 100644 bridge/clawith-bridge.spec create mode 100644 bridge/clawith-bridge.toml.example create mode 100644 bridge/clawith_bridge/__init__.py create mode 100644 bridge/clawith_bridge/__main__.py create mode 100644 bridge/clawith_bridge/adapters/__init__.py create mode 100644 bridge/clawith_bridge/adapters/base.py create mode 100644 bridge/clawith_bridge/adapters/claude_code.py create mode 100644 bridge/clawith_bridge/adapters/hermes.py create mode 100644 bridge/clawith_bridge/adapters/openclaw.py create mode 100644 bridge/clawith_bridge/baked_config.py create mode 100644 bridge/clawith_bridge/config.py create mode 100644 bridge/clawith_bridge/connection.py create mode 100644 bridge/clawith_bridge/diff_capture.py create mode 100644 bridge/clawith_bridge/install_windows.py create mode 100644 bridge/clawith_bridge/protocol.py create mode 100644 bridge/clawith_bridge/py.typed create mode 100644 bridge/clawith_bridge/reverse_tools.py create mode 100644 bridge/clawith_bridge/session_manager.py create mode 100644 bridge/launcher.py create mode 100644 bridge/pyproject.toml create mode 100644 bridge/stub_bridge.py diff --git a/.gitignore b/.gitignore index bae64bc6c..95457fde2 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,9 @@ backend/agent_data/ ss-nodes.json .data/ +# Bundled bridge binary (built separately via bridge/clawith-bridge.spec) +backend/app/static/bridge/clawith-bridge.exe + # Ignore Antigravity / Claude Code agent configurations .agent/ .agents/ diff --git a/backend/alembic/versions/add_bridge_adapter.py b/backend/alembic/versions/add_bridge_adapter.py new file mode 100644 index 000000000..c7fd80f09 --- /dev/null +++ b/backend/alembic/versions/add_bridge_adapter.py @@ -0,0 +1,34 @@ +"""Add bridge_adapter column to agents for per-agent local runtime selection. + +When agent_type='openclaw', bridge_adapter picks which local runtime the +downloaded bridge installer + session.start.adapter will target: +'claude_code' | 'openclaw' | 'hermes'. + +Backfill policy: existing openclaw agents get 'claude_code' (the de-facto +default TOML was only enabling claude_code). + +Revision ID: add_bridge_adapter +Revises: add_bridge_mode +Create Date: 2026-04-22 +""" +from alembic import op + + +revision = "add_bridge_adapter" +down_revision = "add_bridge_mode" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.execute( + "ALTER TABLE agents ADD COLUMN IF NOT EXISTS bridge_adapter VARCHAR(32)" + ) + op.execute( + "UPDATE agents SET bridge_adapter='claude_code' " + "WHERE agent_type='openclaw' AND bridge_adapter IS NULL" + ) + + +def downgrade() -> None: + op.execute("ALTER TABLE agents DROP COLUMN IF EXISTS bridge_adapter") diff --git a/backend/alembic/versions/add_bridge_mode.py b/backend/alembic/versions/add_bridge_mode.py new file mode 100644 index 000000000..0e0880a7b --- /dev/null +++ b/backend/alembic/versions/add_bridge_mode.py @@ -0,0 +1,24 @@ +"""Add bridge_mode column to agents for local-agent bridge integration. + +Revision ID: add_bridge_mode +Revises: increase_api_key_length +Create Date: 2026-04-21 +""" +from alembic import op + + +revision = "add_bridge_mode" +down_revision = "increase_api_key_length" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.execute( + "ALTER TABLE agents ADD COLUMN IF NOT EXISTS bridge_mode " + "VARCHAR(16) NOT NULL DEFAULT 'disabled'" + ) + + +def downgrade() -> None: + op.execute("ALTER TABLE agents DROP COLUMN IF EXISTS bridge_mode") diff --git a/backend/app/api/agents.py b/backend/app/api/agents.py index 0f8f1bc41..706db4604 100644 --- a/backend/app/api/agents.py +++ b/backend/app/api/agents.py @@ -7,7 +7,7 @@ from datetime import datetime, timezone from pathlib import Path -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, Request, Response, status from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -241,6 +241,11 @@ async def create_agent( creator_id=current_user.id, tenant_id=target_tenant_id, agent_type=data.agent_type or "native", + bridge_adapter=( + (data.bridge_adapter or "claude_code") + if (data.agent_type or "native") == "openclaw" + else None + ), primary_model_id=data.primary_model_id, fallback_model_id=data.fallback_model_id, max_tokens_per_day=data.max_tokens_per_day, @@ -289,6 +294,7 @@ async def create_agent( raw_key = f"oc-{secrets.token_urlsafe(32)}" agent.api_key_hash = hashlib.sha256(raw_key.encode()).hexdigest() agent.status = "idle" + agent.bridge_mode = "enabled" await db.commit() out = AgentOut.model_validate(agent).model_dump() out["api_key"] = raw_key # Return once on creation @@ -779,6 +785,98 @@ async def generate_or_reset_api_key( return {"api_key": raw_key, "message": "Key configured successfully."} +@router.post("/{agent_id}/bridge-installer") +async def download_bridge_installer( + agent_id: uuid.UUID, + request: Request, + platform: str, + current_user: User = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + """Generate a fresh API key and return a platform-specific bridge installer script. + + Each download regenerates the agent's API key; previously-issued installers + (and any bridges still using the old key) will stop working. This is by + design — the key is the only secret in the installer, and short-lived + secrets limit blast radius if a user accidentally shares the file. + """ + from app.services.local_agent.installer_templates import ( + derive_ws_url, + render_installer, + ) + from app.config import get_settings + + if platform not in ("windows", "macos", "linux"): + raise HTTPException(status_code=400, detail="platform must be windows, macos, or linux") + + agent, _access = await check_agent_access(db, current_user, agent_id) + if not is_agent_creator(current_user, agent) and current_user.role not in ("platform_admin", "org_admin"): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Only creator or admin can download bridge installers") + if getattr(agent, "agent_type", "native") != "openclaw": + raise HTTPException(status_code=400, detail="Bridge installer is only available for OpenClaw agents") + + # Regenerate the key (same pattern as /{agent_id}/api-key). This invalidates + # any previously-downloaded installer. + raw_key = f"oc-{secrets.token_urlsafe(32)}" + agent.api_key_hash = hashlib.sha256(raw_key.encode()).hexdigest() + + # Auto-enable bridge_mode if currently disabled — the user is clearly trying + # to set up a bridge, so the disabled mode would just reject their connection. + if getattr(agent, "bridge_mode", "disabled") == "disabled": + agent.bridge_mode = "enabled" + + await db.commit() + + # Resolve server URL. Prefer the configured PUBLIC_BASE_URL; fall back to + # the request's Host header (useful for dev / local testing). + settings = get_settings() + http_base = (settings.PUBLIC_BASE_URL or "").rstrip("/") + if not http_base: + forwarded_proto = request.headers.get("x-forwarded-proto", request.url.scheme) + forwarded_host = request.headers.get("x-forwarded-host", request.headers.get("host", f"{request.url.hostname}:{request.url.port or 80}")) + http_base = f"{forwarded_proto}://{forwarded_host}" + ws_url = derive_ws_url(http_base) + + try: + payload, filename, content_type = render_installer( + platform=platform, # type: ignore[arg-type] + server_url=ws_url, + api_key=raw_key, + agent_name=agent.name or str(agent.id), + adapter=getattr(agent, "bridge_adapter", None) or "claude_code", + ) + except FileNotFoundError as e: + # Bundled Windows exe missing — operator needs to build & drop it in. + raise HTTPException(status_code=503, detail=str(e)) from e + + # Audit log (best-effort) + try: + from app.services.activity_logger import log_activity + await log_activity( + agent_id=agent.id, + action_type="bridge_installer_download", + summary=f"Bridge 安装器已下载 ({platform}),API Key 已重新生成", + detail={ + "platform": platform, + "user_id": str(current_user.id), + "server_url": ws_url, + "filename": filename, + }, + ) + except Exception: # noqa: BLE001 + pass + + return Response( + content=payload, + media_type=content_type, + headers={ + "Content-Disposition": f'attachment; filename="{filename}"', + "X-Clawith-Server": ws_url, + "X-Clawith-Filename": filename, + }, + ) + + @router.get("/{agent_id}/gateway-messages") async def list_gateway_messages( agent_id: uuid.UUID, diff --git a/backend/app/api/bridge_ws.py b/backend/app/api/bridge_ws.py new file mode 100644 index 000000000..4bfba3dc5 --- /dev/null +++ b/backend/app/api/bridge_ws.py @@ -0,0 +1,206 @@ +"""WebSocket endpoint for local-agent bridges. + + wss:///ws/bridge?token= + +Flow: + 1. Bridge dials in with agent's OpenClaw API key. + 2. Server authenticates via shared `_get_agent_by_key` from gateway.py. + 3. Server sends `hello` frame. + 4. Bridge sends `bridge.register` advertising adapters + capabilities. + 5. Server registers the bridge with `session_dispatcher`. + 6. Read loop parses inbound frames and routes to dispatcher. +""" + +from __future__ import annotations + +import asyncio +from typing import Any + +from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect +from loguru import logger + +from app.api.gateway import _get_agent_by_key +from app.database import async_session +from app.services.local_agent.protocol import ( + BridgeRegisterFrame, + PongFrame, + parse_inbound, + PROTOCOL_VERSION, +) +from app.services.local_agent.session_dispatcher import dispatcher + + +router = APIRouter(tags=["bridge-ws"]) + + +PING_INTERVAL_SEC = 30 +PONG_TIMEOUT_SEC = 60 + + +@router.websocket("/ws/bridge") +async def websocket_bridge( + websocket: WebSocket, + token: str = Query(..., description="Agent API key (oc-xxx)"), +): + """Long-lived bridge channel. One bridge per agent_id.""" + await websocket.accept() + + # ── Authenticate ── + agent = None + try: + async with async_session() as db: + agent = await _get_agent_by_key(token, db) + except HTTPException as e: + await _close_with_error(websocket, f"auth: {e.detail}", code=4001) + return + except Exception as e: + logger.exception(f"[BridgeWS] auth failed: {e}") + await _close_with_error(websocket, "auth error", code=4001) + return + + agent_id = str(agent.id) + + # ── Agent lifecycle gate ── + # Reject agents that have been explicitly stopped or whose lease has + # expired. `creating` and `error` are both recoverable — don't block. + agent_status = getattr(agent, "status", None) + if agent_status == "stopped": + logger.warning(f"[BridgeWS] agent {agent_id} status=stopped, rejecting bridge") + await _close_with_error( + websocket, + "agent is stopped; re-enable it before connecting a bridge", + code=4003, + ) + return + if getattr(agent, "is_expired", False): + logger.warning(f"[BridgeWS] agent {agent_id} is_expired=True, rejecting bridge") + await _close_with_error( + websocket, + "agent lease has expired", + code=4003, + ) + return + + # ── Bridge_mode gate ── + mode = getattr(agent, "bridge_mode", "disabled") or "disabled" + if mode == "disabled": + logger.warning(f"[BridgeWS] agent {agent_id} has bridge_mode=disabled, rejecting") + await _close_with_error( + websocket, + "bridge_mode is disabled for this agent; enable it in agent settings", + code=4003, + ) + return + + # ── Send hello ── + try: + await dispatcher.send_hello(websocket) + except Exception as e: + logger.warning(f"[BridgeWS] send hello failed: {e}") + return + + # ── Wait for bridge.register ── + try: + register_raw = await asyncio.wait_for(websocket.receive_json(), timeout=15) + except asyncio.TimeoutError: + await _close_with_error(websocket, "timeout waiting for bridge.register", code=4002) + return + except (WebSocketDisconnect, RuntimeError): + return + + register_frame = parse_inbound(register_raw) + if not isinstance(register_frame, BridgeRegisterFrame): + await _close_with_error(websocket, "first frame must be bridge.register", code=4002) + return + + if register_frame.v != PROTOCOL_VERSION: + await _close_with_error( + websocket, + f"protocol version mismatch: server={PROTOCOL_VERSION} bridge={register_frame.v}", + code=4002, + ) + return + + attached = await dispatcher.attach_bridge(agent_id, websocket, register_frame) + if not attached: + await _close_with_error( + websocket, + "another bridge is already connected for this agent", + code=4003, + ) + return + + logger.info( + f"[BridgeWS] attached agent={agent_id} bridge_version={register_frame.bridge_version} " + f"adapters={register_frame.adapters}" + ) + + # ── Ping loop (keepalive) ── + ping_task = asyncio.create_task(_ping_loop(websocket)) + + # ── Read loop ── + try: + while True: + try: + raw = await websocket.receive_json() + except WebSocketDisconnect: + logger.info(f"[BridgeWS] bridge disconnected agent={agent_id}") + break + except Exception as e: + logger.warning(f"[BridgeWS] receive error: {e}") + break + + frame = parse_inbound(raw) + if frame is None: + logger.debug( + f"[BridgeWS] unknown/invalid frame from agent={agent_id}: type={raw.get('type')!r}" + ) + continue + + try: + await dispatcher.handle_inbound_frame(agent_id, frame) + except Exception as e: + logger.exception(f"[BridgeWS] dispatch error: {e}") + finally: + ping_task.cancel() + await dispatcher.detach_bridge(agent_id) + + +async def _ping_loop(ws: WebSocket) -> None: + """Periodic ping to keep WS alive through idle firewalls.""" + try: + while True: + await asyncio.sleep(PING_INTERVAL_SEC) + try: + await ws.send_json({"type": "ping"}) + except Exception: + return + except asyncio.CancelledError: + return + + +async def _close_with_error(ws: WebSocket, message: str, code: int = 4000) -> None: + try: + await ws.send_json({"type": "error", "message": message}) + except Exception: + pass + try: + await ws.close(code=code) + except Exception: + pass + + +# ── Admin helper ────────────────────────────────────────────────────── + +@router.get("/api/admin/bridge/status") +async def bridge_status(): + """Return currently-connected bridges (for ops visibility). + + No auth enforcement here — keep behind existing admin-router auth + in a future iteration. V1 is local-first, so this is for debug only. + """ + connected = dispatcher.list_connected() + return { + "count": len(connected), + "bridges": [dispatcher.get_bridge_info(a) for a in connected], + } diff --git a/backend/app/api/gateway.py b/backend/app/api/gateway.py index 265d63f80..dac22737e 100644 --- a/backend/app/api/gateway.py +++ b/backend/app/api/gateway.py @@ -73,6 +73,18 @@ async def poll_messages( logger.info(f"[Gateway] poll called, key_prefix={x_api_key[:8]}...") agent = await _get_agent_by_key(x_api_key, db) + # Hint: if a bridge is also connected for this agent, gateway polling is + # redundant. We still serve the poll (legacy compatibility) but flag it. + try: + from app.services.local_agent.session_dispatcher import dispatcher as _la_dispatcher + if _la_dispatcher.has_bridge(str(agent.id)): + logger.info( + f"[Gateway] agent {agent.id} has an active bridge (mode={getattr(agent, 'bridge_mode', 'disabled')}); " + f"poll is redundant — prefer WS bridge for this agent." + ) + except Exception: + pass + # Update last seen agent.openclaw_last_seen = datetime.now(timezone.utc) agent.status = "running" diff --git a/backend/app/api/websocket.py b/backend/app/api/websocket.py index a6698c5f0..e785acf4a 100644 --- a/backend/app/api/websocket.py +++ b/backend/app/api/websocket.py @@ -143,6 +143,7 @@ async def websocket_chat( # Verify access and load agent + model agent_name = "" agent_type = "" # Track agent type for OpenClaw routing + bridge_mode = "disabled" # Track bridge_mode for local-agent session routing role_description = "" welcome_message = "" llm_model = None @@ -169,6 +170,8 @@ async def websocket_chat( return agent_name = agent.name agent_type = agent.agent_type or "" + bridge_mode = getattr(agent, "bridge_mode", "disabled") or "disabled" + bridge_adapter = (getattr(agent, "bridge_adapter", None) or "claude_code") role_description = agent.role_description or "" welcome_message = agent.welcome_message or "" ctx_size = agent.context_window_size or 100 @@ -408,8 +411,148 @@ async def websocket_chat( await db.commit() logger.info("[WS] User message saved") - # ── OpenClaw routing: insert into gateway_messages instead of LLM ── + # ── OpenClaw routing: prefer bridge session, fall back to gateway queue ── if agent_type == "openclaw": + from app.services.local_agent.session_dispatcher import ( + dispatcher as _la_dispatcher, + EVENT_QUEUE_SENTINEL as _LA_SENTINEL, + BridgeDisconnected as _LA_Disconnected, + ) + _bridge_connected = _la_dispatcher.has_bridge(str(agent_id)) + _use_bridge = bridge_mode in ("enabled", "auto") and _bridge_connected + + if _use_bridge: + import asyncio as _aio_br + _ls_id = f"{conv_id}:{uuid.uuid4().hex[:8]}" + # The agent has a chosen runtime (bridge_adapter). Require + # the bridge to actually advertise it — if it doesn't, the + # user's installed TOML doesn't match the agent's intent, + # and silently falling back would run the wrong runtime. + _binfo = _la_dispatcher.get_bridge_info(str(agent_id)) or {} + _available = list(_binfo.get("adapters") or []) + if bridge_adapter not in _available: + await websocket.send_json({ + "type": "error", + "content": ( + f"Selected runtime '{bridge_adapter}' is not available on the " + f"connected bridge (it advertises: {_available or 'none'}). " + f"Reinstall the bridge installer for this agent, or enable " + f"[{bridge_adapter}] in ~/.clawith-bridge.toml and restart the bridge." + ), + }) + continue + _adapter = bridge_adapter + logger.info(f"[WS] OpenClaw: dispatching via bridge session={_ls_id} adapter={_adapter}") + try: + _events_q, _fut = await _la_dispatcher.start_session( + agent_id=str(agent_id), + session_id=_ls_id, + adapter=_adapter, + prompt=content, + params={}, + cwd=None, + env={}, + timeout_s=1800, + ) + except Exception as _e: + logger.exception(f"[WS] OpenClaw bridge start_session failed: {_e}") + await websocket.send_json({ + "type": "error", + "content": f"Failed to dispatch to OpenClaw bridge: {_e}", + }) + continue + + async def _bridge_drain(): + while True: + item = await _events_q.get() + if item is _LA_SENTINEL: + return + kind = item.get("kind") + payload = item.get("payload") or {} + msg = {"bridge_session_id": _ls_id, "adapter": _adapter} + if kind in ("stdout_chunk", "assistant_text"): + msg.update({"type": "chunk", "content": payload.get("text") or payload.get("content") or ""}) + elif kind == "thinking": + msg.update({"type": "thinking", "content": payload.get("text") or ""}) + elif kind in ("tool_call_start", "tool_call_result"): + msg.update({ + "type": "tool_call", + "name": payload.get("name") or "", + "args": payload.get("args"), + "status": "running" if kind == "tool_call_start" else "done", + "result": payload.get("result", ""), + }) + elif kind == "status": + msg.update({"type": "status", **payload}) + elif kind == "file_change": + msg.update({"type": "file_change", **payload}) + else: + msg.update({"type": "bridge_event", "kind": kind, "payload": payload}) + try: + await websocket.send_json(msg) + except Exception: + return + + _drain_task = _aio_br.create_task(_bridge_drain()) + _final_text = "" + _session_ok = False + _session_err: str | None = None + try: + # Await the returned future directly — avoids a race with + # session.done popping the session from bridge.sessions. + _final_text = await _aio_br.wait_for(_fut, timeout=1800) + _session_ok = True + except _LA_Disconnected as _e: + _session_err = f"本地 agent bridge 中途断开: {_e}" + logger.warning(f"[WS] OpenClaw bridge disconnected mid-session: {_e}") + except _aio_br.TimeoutError: + _session_err = "本地 agent session 超时 (>1800s)" + try: + await _la_dispatcher.cancel_session(str(agent_id), _ls_id, reason="timeout") + except Exception: + pass + except Exception as _e: + logger.exception(f"[WS] OpenClaw bridge session failed: {_e}") + _session_err = f"本地 agent session 失败: {_e}" + finally: + try: + await _aio_br.wait_for(_drain_task, timeout=2) + except Exception: + _drain_task.cancel() + + # Persist + emit done. Error path still emits `done` so the + # chat history records the turn and the frontend surfaces + # the pending session-error block instead of a silent drop. + _persist_text = _final_text if _session_ok else "" + async with async_session() as _db: + _ai_msg = ChatMessage( + agent_id=agent_id, + user_id=user_id, + role="assistant", + content=_persist_text or (f"[session error] {_session_err}" if _session_err else ""), + conversation_id=conv_id, + ) + _db.add(_ai_msg) + await _db.commit() + _done_payload: dict = { + "type": "done", + "role": "assistant", + "content": _persist_text, + } + if _session_err: + _done_payload["session_error"] = _session_err + await websocket.send_json(_done_payload) + continue + + # bridge_mode=enabled but no bridge connected → reject instead of queueing + if bridge_mode == "enabled" and not _bridge_connected: + await websocket.send_json({ + "type": "error", + "content": "Local agent bridge is not connected. Start `clawith-bridge` on the operator machine and retry.", + }) + continue + + # Legacy path (bridge_mode=disabled, or auto with no bridge): queue for polling from app.models.gateway_message import GatewayMessage as GwMsg async with async_session() as db: gw_msg = GwMsg( @@ -421,7 +564,7 @@ async def websocket_chat( ) db.add(gw_msg) await db.commit() - logger.info("[WS] OpenClaw: message queued for gateway poll") + logger.info("[WS] OpenClaw: message queued for gateway poll (bridge_mode=%s, bridge_connected=%s)", bridge_mode, _bridge_connected) await websocket.send_json({ "type": "done", "role": "assistant", diff --git a/backend/app/main.py b/backend/app/main.py index 896976db5..d7e1efa1e 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -270,6 +270,7 @@ def _bg_task_error(t): from app.api.tasks import router as tasks_router from app.api.files import router as files_router from app.api.websocket import router as ws_router +from app.api.bridge_ws import router as bridge_ws_router from app.api.feishu import router as feishu_router from app.api.sso import router as sso_router from app.api.organization import router as org_router @@ -338,6 +339,7 @@ def _bg_task_error(t): app.include_router(notification_router, prefix=settings.API_PREFIX) app.include_router(webhooks_router) # Public endpoint, no API prefix app.include_router(ws_router) +app.include_router(bridge_ws_router) app.include_router(gateway_router, prefix=settings.API_PREFIX) app.include_router(admin_router, prefix=settings.API_PREFIX) app.include_router(pages_router, prefix=settings.API_PREFIX) diff --git a/backend/app/models/agent.py b/backend/app/models/agent.py index 8cb129f7a..b4a7bac77 100644 --- a/backend/app/models/agent.py +++ b/backend/app/models/agent.py @@ -37,11 +37,22 @@ class Agent(Base): # Agent type: 'native' (platform-hosted LLM) or 'openclaw' (remote OpenClaw bot) agent_type: Mapped[str] = mapped_column(String(20), default="native", nullable=False) + # Local-runtime adapter selection when agent_type='openclaw': + # 'claude_code' (default), 'openclaw', or 'hermes'. Controls which + # adapter the bridge installer enables and which session.start.adapter + # the server sends. Ignored for native agents. + bridge_adapter: Mapped[str | None] = mapped_column(String(32)) # API key hash for OpenClaw gateway authentication api_key_hash: Mapped[str | None] = mapped_column(String(128)) # Last time OpenClaw polled the gateway (online status indicator) openclaw_last_seen: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + # Local-agent bridge integration mode: + # "disabled" (default, safe): bridge connections rejected; legacy gateway polling only. + # "enabled": bridge must be connected; /gateway routes still work but session path preferred. + # "auto": bridge preferred, gateway polling acts as fallback when bridge is offline. + bridge_mode: Mapped[str] = mapped_column(String(16), default="disabled", nullable=False) + # Runtime status: Mapped[str] = mapped_column( Enum("creating", "running", "idle", "stopped", "error", name="agent_status_enum", create_constraint=False), diff --git a/backend/app/schemas/schemas.py b/backend/app/schemas/schemas.py index 3870392b9..56c09ea2e 100644 --- a/backend/app/schemas/schemas.py +++ b/backend/app/schemas/schemas.py @@ -205,6 +205,7 @@ class UserUpdate(BaseModel): class AgentCreate(BaseModel): name: str = Field(min_length=2, max_length=100, description="Agent name, 2-100 characters") agent_type: str = "native" # native | openclaw + bridge_adapter: str | None = None # claude_code | openclaw | hermes (only for agent_type=openclaw) role_description: str = Field(default="", max_length=500, description="Role description, max 500 characters") bio: str | None = None welcome_message: str | None = None @@ -265,7 +266,9 @@ class AgentOut(BaseModel): llm_calls_today: int = 0 max_llm_calls_per_day: int = 100 agent_type: str = "native" + bridge_adapter: str | None = None openclaw_last_seen: datetime | None = None + bridge_mode: str = "disabled" has_api_key: bool = False api_key_hash: str | None = None created_at: datetime @@ -295,6 +298,7 @@ class AgentUpdate(BaseModel): heartbeat_active_hours: str | None = None timezone: str | None = None expires_at: datetime | None = None # Admin only — extend agent expiry + bridge_mode: str | None = Field(default=None, pattern="^(disabled|enabled|auto)$") class AgentStatusOut(BaseModel): diff --git a/backend/app/services/agent_tools.py b/backend/app/services/agent_tools.py index 2fafa229a..0d1f7ce5e 100644 --- a/backend/app/services/agent_tools.py +++ b/backend/app/services/agent_tools.py @@ -1609,9 +1609,77 @@ async def _get_tool_config(agent_id: Optional[uuid.UUID], tool_name: str) -> Opt }, }, }, + # ─── Local-agent session tools (bridge-dispatched) ───────────── + # Only exposed when the agent has bridge_mode in {"enabled","auto"}. + { + "type": "function", + "function": { + "name": "run_claude_code_session", + "description": ( + "Dispatch a coding task to a Claude Code CLI running on the operator's " + "local machine via the connected bridge. Streams the session in real time " + "and returns the final assistant response. Use for tasks that need to read/" + "edit files on the operator's workstation or run shell commands locally." + ), + "parameters": { + "type": "object", + "properties": { + "prompt": {"type": "string", "description": "The task or instruction for the local Claude Code session."}, + "cwd": {"type": "string", "description": "Optional working directory on the operator's machine."}, + "timeout_s": {"type": "integer", "description": "Maximum session duration in seconds (default 1800)."}, + }, + "required": ["prompt"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "run_hermes_session", + "description": ( + "Dispatch a task to a local Hermes daemon via the connected bridge. " + "Streams execution events and returns the final response." + ), + "parameters": { + "type": "object", + "properties": { + "prompt": {"type": "string", "description": "The task to send to the local Hermes daemon."}, + "params": {"type": "object", "description": "Optional Hermes-specific parameters."}, + "timeout_s": {"type": "integer", "description": "Maximum session duration in seconds (default 1800)."}, + }, + "required": ["prompt"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "run_openclaw_session", + "description": ( + "Dispatch a task to a local OpenClaw instance via the connected bridge. " + "Streams events and returns the final response." + ), + "parameters": { + "type": "object", + "properties": { + "prompt": {"type": "string", "description": "The task to send to the local OpenClaw agent."}, + "params": {"type": "object", "description": "Optional OpenClaw-specific parameters."}, + "timeout_s": {"type": "integer", "description": "Maximum session duration in seconds (default 1800)."}, + }, + "required": ["prompt"], + }, + }, + }, ] +_LOCAL_AGENT_TOOL_NAMES = { + "run_claude_code_session", + "run_hermes_session", + "run_openclaw_session", +} + + # Core tools that should always be available to agents regardless of # DB configuration. # Note: send_channel_message is intentionally NOT here — it lives in @@ -1847,6 +1915,20 @@ async def get_agent_tools_for_llm(agent_id: uuid.UUID) -> list[dict]: has_any_channel = await _agent_has_any_channel(agent_id) _always_tools = _always_core_tools + (_feishu_tools if has_feishu else []) + (_channel_tools if has_any_channel else []) + # Expose local-agent session tools only when the agent opted into bridge routing. + # The dispatch layer will return a helpful error if no bridge is currently connected. + try: + from app.models.agent import Agent as _AgForBridge + async with async_session() as _bdb: + _br = await _bdb.execute(select(_AgForBridge.bridge_mode).where(_AgForBridge.id == agent_id)) + _bridge_mode = _br.scalar_one_or_none() or "disabled" + if _bridge_mode in ("enabled", "auto"): + _always_tools = _always_tools + [ + t for t in AGENT_TOOLS if t["function"]["name"] in _LOCAL_AGENT_TOOL_NAMES + ] + except Exception: + pass + # Check tenant-level a2a_async_enabled flag _a2a_async = False try: @@ -2037,9 +2119,154 @@ async def _sync_tasks_to_file(agent_id: uuid.UUID, ws: Path): "web_search": "web_search", "execute_code": "execute_code", "execute_code_e2b": "execute_code", + "run_claude_code_session": "invoke_local_agent", + "run_hermes_session": "invoke_local_agent", + "run_openclaw_session": "invoke_local_agent", } +# ── Local-agent session tools ──────────────────────────────────── +# Map tool name → adapter name used by the bridge protocol. +_LOCAL_AGENT_TOOLS: dict[str, str] = { + "run_claude_code_session": "claude_code", + "run_hermes_session": "hermes", + "run_openclaw_session": "openclaw", +} + + +def _is_local_agent_tool(tool_name: str) -> bool: + return tool_name in _LOCAL_AGENT_TOOLS + + +async def _invoke_local_agent_session( + tool_name: str, + arguments: dict, + agent_id: uuid.UUID, + session_id: str, +) -> str: + """Dispatch a local-agent session via the bridge and return the final text. + + Streams session events to the chat WebSocket (if any) while blocking + on the session's completion Future. Returns a string suitable for the + LLM tool-loop to append as the tool result. + """ + from app.services.local_agent.session_dispatcher import ( + BridgeDisconnected, + BridgeUnavailable, + EVENT_QUEUE_SENTINEL, + SessionRejected, + dispatcher, + ) + + adapter = _LOCAL_AGENT_TOOLS[tool_name] + + if not dispatcher.has_bridge(str(agent_id)): + return ( + f"❌ No local-agent bridge is currently connected for this agent. " + f"Ask the operator to start `clawith-bridge` on their machine with adapter={adapter}, " + f"or retry later." + ) + + prompt = arguments.get("prompt") or arguments.get("task") or "" + if not prompt: + return "❌ Missing required argument 'prompt' for local-agent session." + + params = arguments.get("params") or {} + cwd = arguments.get("cwd") + env = arguments.get("env") or {} + timeout_s = int(arguments.get("timeout_s") or 1800) + + # Use a fresh session_id per invocation so concurrent tool calls don't clash. + # Prefix with the chat session_id for traceability. + ls_id = f"{session_id or 'nosess'}:{uuid.uuid4().hex[:8]}" + + try: + events_queue, future = await dispatcher.start_session( + agent_id=str(agent_id), + session_id=ls_id, + adapter=adapter, + prompt=prompt, + params=params, + cwd=cwd, + env=env, + timeout_s=timeout_s, + ) + except BridgeUnavailable as e: + return f"❌ Bridge unavailable: {e}" + except SessionRejected as e: + return f"❌ Session rejected by bridge: {e}" + except Exception as e: + logger.exception(f"[LocalAgent] start_session failed: {e}") + return f"❌ Failed to start local-agent session: {e}" + + # Try to import the chat WS manager lazily — events are fanned out to any + # chat WebSocket open on this (agent_id, session_id). In trigger / headless + # contexts there's no WS, so these calls are no-ops. + try: + from app.api.websocket import manager as _chat_manager + except Exception: + _chat_manager = None + + async def _drain_events() -> None: + while True: + item = await events_queue.get() + if item is EVENT_QUEUE_SENTINEL: + return + kind = item.get("kind") + payload = item.get("payload") or {} + if _chat_manager and session_id: + try: + # Translate bridge event kinds into existing chat WS frame + # types so the frontend can render without changes. + msg: dict = {"bridge_session_id": ls_id, "adapter": adapter} + if kind in ("stdout_chunk", "assistant_text"): + msg.update({"type": "chunk", "content": payload.get("text") or payload.get("content") or ""}) + elif kind == "thinking": + msg.update({"type": "thinking", "content": payload.get("text") or ""}) + elif kind in ("tool_call_start", "tool_call_result"): + msg.update({ + "type": "tool_call", + "name": payload.get("name") or "", + "args": payload.get("args"), + "status": "running" if kind == "tool_call_start" else "done", + "result": payload.get("result", ""), + }) + elif kind == "status": + msg.update({"type": "status", **payload}) + elif kind == "file_change": + msg.update({"type": "file_change", **payload}) + else: + msg.update({"type": "bridge_event", "kind": kind, "payload": payload}) + await _chat_manager.send_to_session(str(agent_id), session_id, msg) + except Exception as _e: + logger.debug(f"[LocalAgent] event fan-out suppressed: {_e}") + + drain_task = asyncio.create_task(_drain_events()) + + try: + # Await the future directly to avoid a race where the session is + # popped from bridge.sessions as soon as session.done arrives. + final_text = await asyncio.wait_for(future, timeout=timeout_s) + except BridgeDisconnected as e: + return f"❌ Local-agent bridge disconnected mid-session: {e}" + except asyncio.TimeoutError: + try: + await dispatcher.cancel_session(str(agent_id), ls_id, reason="timeout") + except Exception: + pass + return f"❌ Local-agent session timed out after {timeout_s}s" + except Exception as e: + logger.exception(f"[LocalAgent] session failed: {e}") + return f"❌ Local-agent session failed: {e}" + finally: + try: + await asyncio.wait_for(drain_task, timeout=2) + except Exception: + drain_task.cancel() + + return final_text or "(local agent produced no final text)" + + async def _get_agent_tenant_id(agent_id: uuid.UUID) -> str | None: """Get the agent tenant ID for tenant-scoped shared paths.""" try: @@ -2164,6 +2391,10 @@ async def execute_tool( "browser/computer operations." ) + # ── Local-agent session dispatch (bridge-backed) ── + if _is_local_agent_tool(tool_name): + return await _invoke_local_agent_session(tool_name, arguments, agent_id, session_id) + try: if tool_name == "list_files": result = _list_files(ws, arguments.get("path", ""), tenant_id=_agent_tenant_id) diff --git a/backend/app/services/local_agent/__init__.py b/backend/app/services/local_agent/__init__.py new file mode 100644 index 000000000..5074908a0 --- /dev/null +++ b/backend/app/services/local_agent/__init__.py @@ -0,0 +1,9 @@ +"""Local agent bridge subsystem. + +Replaces OpenClaw gateway polling with a reverse-connected WebSocket +session channel. User's local `clawith-bridge` dials in and streams +session events (from Claude Code, Hermes, OpenClaw, ...) to Clawith +in real time. + +See plan: clawith-5min-agent-session-agent-clawit-calm-yeti.md +""" diff --git a/backend/app/services/local_agent/adapters.py b/backend/app/services/local_agent/adapters.py new file mode 100644 index 000000000..2b05cc32a --- /dev/null +++ b/backend/app/services/local_agent/adapters.py @@ -0,0 +1,38 @@ +"""Adapter name registry (server-side). + +Real adapter logic lives on the bridge (bridge/clawith_bridge/adapters/). +Server only knows the set of names it will accept in `session.start.adapter` +and the param schemas used for UI validation. +""" + +from __future__ import annotations + +from typing import Any + +# Known adapter names. Add to this list when a new adapter ships on +# the bridge side. Unknown adapters are rejected at session start. +KNOWN_ADAPTERS: set[str] = { + "claude_code", + "hermes", + "openclaw", +} + +# Minimal param schema per adapter. Keys are the `params` field on +# SessionStartFrame. Used only for validation / UI hints. +ADAPTER_PARAM_SCHEMAS: dict[str, dict[str, Any]] = { + "claude_code": { + "model": {"type": "string", "description": "Claude model override, e.g. claude-opus-4-7"}, + "allowed_tools": {"type": "array", "description": "Whitelist of tool names the CLI may use"}, + "max_turns": {"type": "integer", "description": "Max tool-loop turns"}, + }, + "hermes": { + "endpoint": {"type": "string", "description": "Hermes daemon URL override"}, + }, + "openclaw": { + # OpenClaw historically accepts a bare prompt; no structured params. + }, +} + + +def is_known_adapter(name: str) -> bool: + return name in KNOWN_ADAPTERS diff --git a/backend/app/services/local_agent/installer_templates.py b/backend/app/services/local_agent/installer_templates.py new file mode 100644 index 000000000..8cbaf32be --- /dev/null +++ b/backend/app/services/local_agent/installer_templates.py @@ -0,0 +1,279 @@ +"""Platform-specific bridge installer script templates. + +Each template is rendered server-side with the agent's freshly-regenerated +API key and the resolved WebSocket server URL baked in, then returned as +an attachment for the user to run locally. + +Windows: returns a single `clawith-bridge-setup.exe` — the pristine +PyInstaller binary with a config trailer (JSON + magic) appended at EOF. +The user double-clicks it; the bridge detects the trailer on startup, +runs the install flow, strips the trailer from the installed copy at +%LOCALAPPDATA%\\Clawith\\bin\\, and registers a scheduled task. No ZIP, +no manual extraction, no install.cmd. + +macOS/Linux: returns a bash script that pip-installs `clawith-bridge` +and registers launchd/systemd user services. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Literal + +Platform = Literal["windows", "macos", "linux"] + +# Wire format shared with `bridge/clawith_bridge/baked_config.py`. If you +# change either side, change the other. Structure at end of file: +# [json utf-8 blob] [4B pristine_len BE] [8B magic] +_TRAILER_MAGIC: bytes = b"CLWB!END" +_TRAILER_LEN_BYTES = 4 + + +# Location of the bundled bridge exe. Kept inside the backend package so +# it ships via the normal Dockerfile `COPY . .` step. +_BRIDGE_EXE_PATH = ( + Path(__file__).resolve().parent.parent.parent / "static" / "bridge" / "clawith-bridge.exe" +) + + +_UNIX_SH = r"""#!/usr/bin/env bash +# Clawith Bridge Installer (auto-generated) +# Agent: {agent_name} +# Run: bash install-clawith-bridge.sh + +set -euo pipefail + +CLAWITH_SERVER="{server_url}" +CLAWITH_TOKEN="{api_key}" + +echo "[clawith-bridge] Checking Python..." +if ! command -v python3 >/dev/null 2>&1; then + echo "[clawith-bridge] ERROR: python3 not found. Install Python 3.10+ first." >&2 + exit 1 +fi + +# Check if clawith-bridge is already installed (dev mode or prior install). +if python3 -c "import clawith_bridge" 2>/dev/null; then + echo "[clawith-bridge] Package already installed - skipping pip install" +else + echo "[clawith-bridge] Installing clawith-bridge via pip..." + if ! python3 -m pip install --user --upgrade clawith-bridge; then + # pip failed (e.g. package not on PyPI yet). Re-check import; if still missing, abort. + if ! python3 -c "import clawith_bridge" 2>/dev/null; then + echo "[clawith-bridge] ERROR: pip install failed and clawith_bridge is not importable." >&2 + echo " If you are testing from a local repo, install it first:" >&2 + echo " python3 -m pip install -e /bridge" >&2 + exit 1 + fi + fi +fi + +CONFIG_PATH="$HOME/.clawith-bridge.toml" +echo "[clawith-bridge] Writing config to $CONFIG_PATH" +cat > "$CONFIG_PATH" < "$PLIST" < + + + + Labelcom.clawith.bridge + ProgramArguments + + /usr/bin/env + python3 + -m + clawith_bridge + + RunAtLoad + KeepAlive + StandardOutPath$HOME/Library/Logs/clawith-bridge.log + StandardErrorPath$HOME/Library/Logs/clawith-bridge.err.log + + +EOF + launchctl unload "$PLIST" 2>/dev/null || true + launchctl load "$PLIST" + echo "" + echo "[clawith-bridge] Done." + echo " Config: $CONFIG_PATH" + echo " Agent: com.clawith.bridge (launchd, auto-starts at login)" + echo " Logs: tail -f ~/Library/Logs/clawith-bridge.log" +elif [[ "$UNAME" == "Linux" ]]; then + # ── Linux: systemd --user ─────────────────────────── + UNIT_DIR="$HOME/.config/systemd/user" + mkdir -p "$UNIT_DIR" + cat > "$UNIT_DIR/clawith-bridge.service" </dev/null | grep -q "Linger=yes"; then + echo "" + echo " Tip: enable linger so the service runs even when you're not logged in:" + echo " sudo loginctl enable-linger $USER" + fi +else + echo "[clawith-bridge] Unknown platform: $UNAME — config written, but autostart not configured." >&2 + echo " Run manually: clawith-bridge" >&2 +fi + +echo "" +echo " If Clawith still shows 'Bridge not connected', ensure 'claude' CLI is installed and logged in:" +echo " npm install -g @anthropic-ai/claude-code" +echo " claude login" +""" + + +_KNOWN_ADAPTERS = ("claude_code", "openclaw", "hermes") + + +def _adapter_enabled_flags(adapter: str) -> dict[str, str]: + """Return `{"cc_enabled": ..., "hm_enabled": ..., "oc_enabled": ...}` — + three TOML bool literals with only `adapter` set to `true`. Unknown + adapters default to claude_code.""" + if adapter not in _KNOWN_ADAPTERS: + adapter = "claude_code" + return { + "cc_enabled": "true" if adapter == "claude_code" else "false", + "hm_enabled": "true" if adapter == "hermes" else "false", + "oc_enabled": "true" if adapter == "openclaw" else "false", + } + + +def render_installer( + platform: Platform, + *, + server_url: str, + api_key: str, + agent_name: str, + adapter: str = "claude_code", +) -> tuple[bytes, str, str]: + """Render a platform-specific installer. + + `adapter` picks which bridge adapter the generated TOML (or baked + trailer, on Windows) enables — one of 'claude_code' | 'openclaw' | + 'hermes'. + + Returns (payload_bytes, filename, content_type). + For Windows, payload is a single self-configuring .exe; for Unix, a + bash script. + """ + # Safety: template is a trusted constant, and we only format with values that + # come from server-controlled sources (agent name, generated token, derived URL). + # `agent_name` is the only user-controlled string that lands in a comment line. + safe_name = agent_name.replace("\n", " ").replace("\r", " ")[:200] + + if platform == "windows": + payload = _render_windows_exe( + server_url=server_url, + api_key=api_key, + agent_name=safe_name, + adapter=adapter, + ) + return ( + payload, + "clawith-bridge-setup.exe", + "application/vnd.microsoft.portable-executable", + ) + + text = _UNIX_SH.format( + agent_name=safe_name, + server_url=server_url, + api_key=api_key, + **_adapter_enabled_flags(adapter), + ) + return ( + text.encode("utf-8"), + "install-clawith-bridge.sh", + "text/x-shellscript; charset=utf-8", + ) + + +def _render_windows_exe(*, server_url: str, api_key: str, agent_name: str, adapter: str) -> bytes: + """Return pristine bridge exe bytes + baked-config trailer at EOF. + + The trailer encodes the server URL, agent token, agent name, and chosen + adapter as JSON. Bridge startup reads it via + `baked_config.read_baked_config()` and runs the install flow + automatically on first double-click. + """ + if not _BRIDGE_EXE_PATH.exists(): + raise FileNotFoundError( + f"bundled clawith-bridge.exe is missing at {_BRIDGE_EXE_PATH}. " + "Build it via: cd bridge/ && pyinstaller clawith-bridge.spec --clean, " + "then copy dist/clawith-bridge.exe to backend/app/static/bridge/." + ) + + pristine = _BRIDGE_EXE_PATH.read_bytes() + config = { + "server": server_url, + "token": api_key, + "agent_name": agent_name, + "adapter": adapter if adapter in _KNOWN_ADAPTERS else "claude_code", + } + blob = json.dumps(config, ensure_ascii=False, separators=(",", ":")).encode("utf-8") + pristine_len = len(pristine) + trailer = blob + pristine_len.to_bytes(_TRAILER_LEN_BYTES, "big") + _TRAILER_MAGIC + return pristine + trailer + + +def derive_ws_url(http_base: str) -> str: + """Convert an HTTP(S) base URL to its WebSocket equivalent. + + `http://host:port` -> `ws://host:port` + `https://host:port` -> `wss://host:port` + """ + if http_base.startswith("https://"): + return "wss://" + http_base[len("https://"):] + if http_base.startswith("http://"): + return "ws://" + http_base[len("http://"):] + # Already ws(s)?:// — return as-is + return http_base diff --git a/backend/app/services/local_agent/protocol.py b/backend/app/services/local_agent/protocol.py new file mode 100644 index 000000000..65727c2af --- /dev/null +++ b/backend/app/services/local_agent/protocol.py @@ -0,0 +1,171 @@ +"""WebSocket frame schemas for the local-agent bridge protocol. + +All frames are JSON objects with a `type` discriminator. Versioned via +`PROTOCOL_VERSION` — bumped on breaking shape changes so old bridges +can refuse to connect instead of silently misbehaving. + +Two directions: + Server → Bridge: hello / session.start / session.input / session.cancel / + tool.response / ping + Bridge → Server: bridge.register / session.accepted / session.event / + session.done / session.error / tool.call / pong +""" + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, Field + +PROTOCOL_VERSION = "1" + +# ── Event kinds carried by session.event ────────────────────────────── +EventKind = Literal[ + "stdout_chunk", + "stderr_chunk", + "assistant_text", + "thinking", + "tool_call_start", + "tool_call_result", + "status", + "file_change", +] + + +# ── Server → Bridge ─────────────────────────────────────────────────── + +class HelloFrame(BaseModel): + type: Literal["hello"] = "hello" + v: str = PROTOCOL_VERSION + server_time: str # ISO-8601 + + +class SessionStartFrame(BaseModel): + type: Literal["session.start"] = "session.start" + session_id: str + adapter: str # e.g. "claude_code" | "hermes" | "openclaw" + prompt: str + params: dict[str, Any] = Field(default_factory=dict) + cwd: str | None = None + env: dict[str, str] = Field(default_factory=dict) + timeout_s: int = 1800 + + +class SessionInputFrame(BaseModel): + type: Literal["session.input"] = "session.input" + session_id: str + text: str + + +class SessionCancelFrame(BaseModel): + type: Literal["session.cancel"] = "session.cancel" + session_id: str + reason: str = "" + + +class ToolResponseFrame(BaseModel): + """Response to a reverse tool.call from bridge (server answered it).""" + type: Literal["tool.response"] = "tool.response" + session_id: str + reverse_call_id: str + result: str | None = None + error: str | None = None + + +class PingFrame(BaseModel): + type: Literal["ping"] = "ping" + + +# ── Bridge → Server ─────────────────────────────────────────────────── + +class BridgeRegisterFrame(BaseModel): + """First frame from bridge after auth. Advertises what adapters and + capabilities this bridge build supports. + """ + type: Literal["bridge.register"] = "bridge.register" + v: str = PROTOCOL_VERSION + bridge_version: str + adapters: list[str] # adapter names the bridge can drive + capabilities: dict[str, Any] = Field(default_factory=dict) + + +class SessionAcceptedFrame(BaseModel): + type: Literal["session.accepted"] = "session.accepted" + session_id: str + adapter: str + local_session_id: str | None = None + + +class SessionEventFrame(BaseModel): + """Stream event from a running session.""" + type: Literal["session.event"] = "session.event" + session_id: str + kind: EventKind + payload: dict[str, Any] = Field(default_factory=dict) + + +class DiffSummary(BaseModel): + files_changed: int = 0 + insertions: int = 0 + deletions: int = 0 + files: list[dict[str, Any]] = Field(default_factory=list) + + +class SessionDoneFrame(BaseModel): + type: Literal["session.done"] = "session.done" + session_id: str + final_text: str = "" + exit_code: int | None = None + stats: dict[str, Any] = Field(default_factory=dict) + diff_summary: DiffSummary | None = None + + +class SessionErrorFrame(BaseModel): + type: Literal["session.error"] = "session.error" + session_id: str + error: str + retryable: bool = False + + +class ToolCallFrame(BaseModel): + """Reverse call: local agent is asking Clawith to do something.""" + type: Literal["tool.call"] = "tool.call" + session_id: str + reverse_call_id: str + name: str + arguments: dict[str, Any] = Field(default_factory=dict) + + +class PongFrame(BaseModel): + type: Literal["pong"] = "pong" + + +# ── Parse helpers ───────────────────────────────────────────────────── + +_INBOUND_BY_TYPE: dict[str, type[BaseModel]] = { + "bridge.register": BridgeRegisterFrame, + "session.accepted": SessionAcceptedFrame, + "session.event": SessionEventFrame, + "session.done": SessionDoneFrame, + "session.error": SessionErrorFrame, + "tool.call": ToolCallFrame, + "pong": PongFrame, +} + + +def parse_inbound(data: dict[str, Any]) -> BaseModel | None: + """Parse a frame received from the bridge. Returns None for unknown types. + + Unknown type is not an error — forward-compat: bridge may send new + frames that server doesn't understand yet. + """ + t = data.get("type") + if not isinstance(t, str): + return None + cls = _INBOUND_BY_TYPE.get(t) + if cls is None: + return None + try: + return cls.model_validate(data) + except Exception: + return None diff --git a/backend/app/services/local_agent/session_dispatcher.py b/backend/app/services/local_agent/session_dispatcher.py new file mode 100644 index 000000000..6ae61a4c8 --- /dev/null +++ b/backend/app/services/local_agent/session_dispatcher.py @@ -0,0 +1,567 @@ +"""Session dispatcher for local-agent bridges. + +Module-level singleton. Owns: + - `active_bridges`: one bridge connection per agent_id + - `pending_sessions`: asyncio.Future per session for final result + - `session_events`: asyncio.Queue per session for streaming events + - `reverse_calls`: Future per reverse-call so bridge can await server result + +Shape of the blocking call used by the LLM tool loop:: + + queue, future = await dispatcher.start_session( + agent_id, session_id, adapter, prompt, params, timeout_s + ) + # consumer (e.g. WS streamer) drains `queue` until SENTINEL; + # caller awaits `future` for the final string result. +""" + +from __future__ import annotations + +import asyncio +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any + +from fastapi import WebSocket +from loguru import logger + +from app.services.activity_logger import log_activity +from app.services.local_agent import adapters as _adapters +from app.services.local_agent.protocol import ( + PROTOCOL_VERSION, + BridgeRegisterFrame, + HelloFrame, + SessionCancelFrame, + SessionDoneFrame, + SessionErrorFrame, + SessionEventFrame, + SessionInputFrame, + SessionStartFrame, + ToolCallFrame, + ToolResponseFrame, +) + + +# Reverse-call allowlist. Bridge-initiated tool calls are restricted to +# these names to prevent local agents from driving Clawith's sandbox. +ALLOWED_REVERSE_TOOLS: frozenset[str] = frozenset({ + "send_message_to_agent", + "send_file_to_agent", + "manage_tasks", + "send_feishu_message", +}) + +# Default per-bridge concurrent session cap. +DEFAULT_MAX_CONCURRENT_SESSIONS = 4 + +# Sentinel yielded into event queues to signal terminal state to the +# consumer. After seeing this, consumer should stop iterating and +# await the session's Future for the final result. +EVENT_QUEUE_SENTINEL = object() + + +class BridgeDisconnected(Exception): + """Raised when a bridge vanishes while a session is in flight.""" + + +class BridgeUnavailable(Exception): + """Raised when no bridge is attached for an agent at start_session time.""" + + +class SessionRejected(Exception): + """Raised when session start is refused (unknown adapter, over cap, etc).""" + + +@dataclass +class _Session: + session_id: str + agent_id: str + adapter: str + started_at: datetime + future: asyncio.Future # type: ignore[type-arg] + events: asyncio.Queue # type: ignore[type-arg] + # Reverse-call futures keyed by reverse_call_id + reverse_calls: dict[str, asyncio.Future] = field(default_factory=dict) + + +@dataclass +class _Bridge: + agent_id: str + ws: WebSocket + bridge_version: str + adapters: list[str] + capabilities: dict[str, Any] + connected_at: datetime + sessions: dict[str, _Session] = field(default_factory=dict) + lock: asyncio.Lock = field(default_factory=asyncio.Lock) + + +class SessionDispatcher: + def __init__(self) -> None: + self._bridges: dict[str, _Bridge] = {} + + # ── Bridge lifecycle ────────────────────────────────────────────── + + async def attach_bridge( + self, + agent_id: str, + ws: WebSocket, + register: BridgeRegisterFrame, + ) -> bool: + """Register a new bridge. Returns False if one is already attached.""" + if agent_id in self._bridges: + logger.warning( + f"[Dispatcher] Bridge already attached for agent {agent_id}, rejecting new connection" + ) + return False + self._bridges[agent_id] = _Bridge( + agent_id=agent_id, + ws=ws, + bridge_version=register.bridge_version, + adapters=list(register.adapters), + capabilities=dict(register.capabilities), + connected_at=datetime.now(timezone.utc), + ) + logger.info( + f"[Dispatcher] Bridge attached: agent={agent_id} " + f"version={register.bridge_version} adapters={register.adapters}" + ) + try: + await log_activity( + agent_id=uuid.UUID(agent_id), + action_type="bridge_attached", + summary=f"本地 agent bridge 已连接 (v{register.bridge_version})", + detail={ + "bridge_version": register.bridge_version, + "adapters": list(register.adapters), + "capabilities": dict(register.capabilities), + }, + ) + except Exception: + pass + return True + + async def detach_bridge(self, agent_id: str) -> None: + bridge = self._bridges.pop(agent_id, None) + if not bridge: + return + abandoned = list(bridge.sessions.values()) + logger.info( + f"[Dispatcher] Bridge detached: agent={agent_id} " + f"sessions_abandoned={len(abandoned)}" + ) + # Fail all pending sessions for this bridge. + for session in abandoned: + self._fail_session(session, BridgeDisconnected("bridge disconnected")) + bridge.sessions.clear() + try: + await log_activity( + agent_id=uuid.UUID(agent_id), + action_type="bridge_detached", + summary=( + f"本地 agent bridge 断开 (放弃中 session={len(abandoned)})" + if abandoned else "本地 agent bridge 断开" + ), + detail={ + "abandoned_sessions": [s.session_id for s in abandoned], + "bridge_version": bridge.bridge_version, + }, + ) + except Exception: + pass + + def has_bridge(self, agent_id: str) -> bool: + return agent_id in self._bridges + + def get_bridge_info(self, agent_id: str) -> dict[str, Any] | None: + bridge = self._bridges.get(agent_id) + if not bridge: + return None + return { + "agent_id": agent_id, + "bridge_version": bridge.bridge_version, + "adapters": list(bridge.adapters), + "capabilities": dict(bridge.capabilities), + "connected_at": bridge.connected_at.isoformat(), + "active_sessions": list(bridge.sessions.keys()), + } + + def list_connected(self) -> list[str]: + return list(self._bridges.keys()) + + # ── Session lifecycle (called from LLM loop) ────────────────────── + + async def start_session( + self, + agent_id: str, + session_id: str, + adapter: str, + prompt: str, + params: dict[str, Any] | None = None, + cwd: str | None = None, + env: dict[str, str] | None = None, + timeout_s: int = 1800, + ) -> tuple[asyncio.Queue, asyncio.Future]: + """Start a session on the attached bridge. Returns (event_queue, future). + + Caller should: + - consume `event_queue` until it yields `EVENT_QUEUE_SENTINEL` + - await `future` for the final string result (raises on failure) + """ + bridge = self._bridges.get(agent_id) + if bridge is None: + raise BridgeUnavailable(f"no bridge attached for agent {agent_id}") + + if not _adapters.is_known_adapter(adapter): + raise SessionRejected(f"unknown adapter: {adapter}") + + if adapter not in bridge.adapters: + raise SessionRejected( + f"bridge does not advertise adapter {adapter!r} (has {bridge.adapters})" + ) + + if len(bridge.sessions) >= DEFAULT_MAX_CONCURRENT_SESSIONS: + raise SessionRejected( + f"bridge at concurrency cap ({DEFAULT_MAX_CONCURRENT_SESSIONS})" + ) + + if session_id in bridge.sessions: + raise SessionRejected(f"session {session_id} already active") + + loop = asyncio.get_event_loop() + future: asyncio.Future = loop.create_future() + events: asyncio.Queue = asyncio.Queue() + session = _Session( + session_id=session_id, + agent_id=agent_id, + adapter=adapter, + started_at=datetime.now(timezone.utc), + future=future, + events=events, + ) + bridge.sessions[session_id] = session + + frame = SessionStartFrame( + session_id=session_id, + adapter=adapter, + prompt=prompt, + params=params or {}, + cwd=cwd, + env=env or {}, + timeout_s=timeout_s, + ) + try: + await self._send(bridge, frame.model_dump(mode="json")) + except Exception as e: + bridge.sessions.pop(session_id, None) + raise BridgeDisconnected(f"failed to send session.start: {e}") + + logger.info( + f"[Dispatcher] session.start sent: agent={agent_id} " + f"session={session_id} adapter={adapter}" + ) + try: + await log_activity( + agent_id=uuid.UUID(agent_id), + action_type="local_session_start", + summary=f"派发本地 agent session ({adapter})", + detail={ + "session_id": session_id, + "adapter": adapter, + "prompt_preview": (prompt or "")[:200], + "cwd": cwd, + "timeout_s": timeout_s, + }, + ) + except Exception: + pass + return events, future + + async def send_input(self, agent_id: str, session_id: str, text: str) -> None: + bridge = self._bridges.get(agent_id) + if not bridge or session_id not in bridge.sessions: + raise BridgeDisconnected(f"session {session_id} not active") + frame = SessionInputFrame(session_id=session_id, text=text) + await self._send(bridge, frame.model_dump(mode="json")) + + async def cancel_session(self, agent_id: str, session_id: str, reason: str = "") -> None: + bridge = self._bridges.get(agent_id) + if not bridge or session_id not in bridge.sessions: + return + frame = SessionCancelFrame(session_id=session_id, reason=reason) + try: + await self._send(bridge, frame.model_dump(mode="json")) + except Exception: + pass + session = bridge.sessions.pop(session_id, None) + if session: + self._fail_session(session, asyncio.CancelledError(f"cancelled: {reason}")) + + async def wait_for_completion( + self, + agent_id: str, + session_id: str, + timeout_s: float | None = None, + ) -> str: + """Block until session's Future resolves. Returns final_text.""" + bridge = self._bridges.get(agent_id) + if not bridge or session_id not in bridge.sessions: + raise BridgeDisconnected(f"session {session_id} not found") + future = bridge.sessions[session_id].future + if timeout_s is not None: + return await asyncio.wait_for(future, timeout=timeout_s) + return await future + + # ── Inbound frame routing (called from bridge_ws reader) ────────── + + async def handle_inbound_frame(self, agent_id: str, frame: Any) -> None: + """Route a parsed inbound frame to its session.""" + bridge = self._bridges.get(agent_id) + if bridge is None: + logger.warning(f"[Dispatcher] frame from unknown agent {agent_id}: {type(frame).__name__}") + return + + if isinstance(frame, SessionEventFrame): + session = bridge.sessions.get(frame.session_id) + if session is None: + logger.warning(f"[Dispatcher] event for unknown session {frame.session_id}") + return + await session.events.put({"kind": frame.kind, "payload": frame.payload}) + + elif isinstance(frame, SessionDoneFrame): + session = bridge.sessions.pop(frame.session_id, None) + if session is None: + return + diff_dict = frame.diff_summary.model_dump() if frame.diff_summary else None + await session.events.put({ + "kind": "status", + "payload": { + "state": "done", + "exit_code": frame.exit_code, + "stats": frame.stats, + "diff_summary": diff_dict, + }, + }) + await session.events.put(EVENT_QUEUE_SENTINEL) + if not session.future.done(): + session.future.set_result(frame.final_text) + logger.info( + f"[Dispatcher] session done: {frame.session_id} " + f"final_len={len(frame.final_text)} exit={frame.exit_code}" + ) + try: + files_changed = diff_dict.get("files_changed") if diff_dict else 0 + insertions = diff_dict.get("insertions") if diff_dict else 0 + deletions = diff_dict.get("deletions") if diff_dict else 0 + duration_s = ( + datetime.now(timezone.utc) - session.started_at + ).total_seconds() + summary = ( + f"本地 agent session 完成 ({session.adapter}, exit={frame.exit_code})" + + ( + f",改动 {files_changed} 个文件 (+{insertions}/-{deletions})" + if diff_dict and files_changed + else "" + ) + ) + await log_activity( + agent_id=uuid.UUID(agent_id), + action_type="local_session_done", + summary=summary, + detail={ + "session_id": frame.session_id, + "adapter": session.adapter, + "exit_code": frame.exit_code, + "final_len": len(frame.final_text or ""), + "duration_s": round(duration_s, 2), + "stats": frame.stats, + "diff_summary": diff_dict, + }, + ) + except Exception: + pass + + elif isinstance(frame, SessionErrorFrame): + session = bridge.sessions.pop(frame.session_id, None) + if session is None: + return + self._fail_session(session, RuntimeError(frame.error)) + logger.warning( + f"[Dispatcher] session error: {frame.session_id} err={frame.error!r}" + ) + try: + duration_s = ( + datetime.now(timezone.utc) - session.started_at + ).total_seconds() + await log_activity( + agent_id=uuid.UUID(agent_id), + action_type="local_session_error", + summary=f"本地 agent session 报错 ({session.adapter}): {frame.error[:120]}", + detail={ + "session_id": frame.session_id, + "adapter": session.adapter, + "error": frame.error, + "duration_s": round(duration_s, 2), + }, + ) + except Exception: + pass + + elif isinstance(frame, ToolCallFrame): + # Reverse call: local agent is asking Clawith to run a tool. + asyncio.create_task(self._run_reverse_call(bridge, frame)) + + # SessionAcceptedFrame / PongFrame: no action needed (logged only) + + # ── Reverse tool call (bridge → server) ─────────────────────────── + + async def _run_reverse_call(self, bridge: _Bridge, frame: ToolCallFrame) -> None: + """Execute a bridge-initiated tool call, then send tool.response back.""" + session = bridge.sessions.get(frame.session_id) + if session is None: + logger.warning(f"[Dispatcher] reverse call for unknown session {frame.session_id}") + return + + # Allowlist enforcement. + if frame.name not in ALLOWED_REVERSE_TOOLS: + resp = ToolResponseFrame( + session_id=frame.session_id, + reverse_call_id=frame.reverse_call_id, + error=f"tool {frame.name!r} not allowed as reverse call", + ) + try: + await self._send(bridge, resp.model_dump(mode="json")) + except Exception: + pass + return + + # Look up the agent's creator_id to use as user_id for execute_tool. + # Reverse calls originate from the agent; we attribute them to the owner. + user_id = await self._lookup_agent_creator(bridge.agent_id) + + # Audit: record the reverse call intent BEFORE execution, so an + # attempt is logged even if execute_tool hangs or crashes mid-way. + # The tool itself (send_message_to_agent etc.) is already tenant-scoped; + # this extra log makes the reverse-path attribution explicit for ops. + try: + await log_activity( + agent_id=uuid.UUID(bridge.agent_id), + action_type="reverse_tool_call", + summary=f"本地 agent 通过 bridge 发起反向调用: {frame.name}", + detail={ + "session_id": frame.session_id, + "reverse_call_id": frame.reverse_call_id, + "tool_name": frame.name, + "argument_keys": sorted(list(frame.arguments.keys()))[:20], + "adapter": session.adapter, + }, + ) + except Exception: + pass + + result: str + error: str | None = None + try: + from app.services.agent_tools import execute_tool as _execute_tool + result = await _execute_tool( + tool_name=frame.name, + arguments=frame.arguments, + agent_id=uuid.UUID(bridge.agent_id), + user_id=user_id, + session_id=frame.session_id, + ) + except Exception as e: + logger.exception(f"[Dispatcher] reverse tool {frame.name!r} failed: {e}") + result = "" + error = str(e) + + # Audit: record outcome so the log pair (intent + outcome) is + # complete and easy to correlate by reverse_call_id. + try: + await log_activity( + agent_id=uuid.UUID(bridge.agent_id), + action_type="reverse_tool_result", + summary=( + f"反向调用 {frame.name} 失败: {error[:100]}" + if error else f"反向调用 {frame.name} 完成" + ), + detail={ + "session_id": frame.session_id, + "reverse_call_id": frame.reverse_call_id, + "tool_name": frame.name, + "error": error, + "result_preview": (result or "")[:200] if error is None else None, + }, + ) + except Exception: + pass + + resp = ToolResponseFrame( + session_id=frame.session_id, + reverse_call_id=frame.reverse_call_id, + result=result if error is None else None, + error=error, + ) + try: + await self._send(bridge, resp.model_dump(mode="json")) + except Exception as e: + logger.warning(f"[Dispatcher] failed to deliver tool.response: {e}") + + async def _lookup_agent_creator(self, agent_id: str) -> uuid.UUID: + """Resolve the creator_id for an agent (used as user_id for reverse calls).""" + try: + from sqlalchemy import select + from app.database import async_session + from app.models.agent import Agent + + async with async_session() as db: + r = await db.execute(select(Agent.creator_id).where(Agent.id == uuid.UUID(agent_id))) + creator_id = r.scalar_one_or_none() + if creator_id: + return creator_id + except Exception as e: + logger.warning(f"[Dispatcher] creator lookup failed: {e}") + # Last-resort: use agent_id as a surrogate so execute_tool doesn't crash on None. + return uuid.UUID(agent_id) + + # ── Helpers ─────────────────────────────────────────────────────── + + async def _send(self, bridge: _Bridge, payload: dict[str, Any]) -> None: + """Serialize bridge writes via lock (asyncio.WebSocket isn't goroutine-safe).""" + async with bridge.lock: + await bridge.ws.send_json(payload) + + def _fail_session(self, session: _Session, exc: BaseException) -> None: + """Mark a session as failed and tell the consumer to stop.""" + if not session.future.done(): + session.future.set_exception(exc) + try: + session.events.put_nowait({"kind": "status", "payload": {"state": "error", "error": str(exc)}}) + except Exception: + pass + try: + session.events.put_nowait(EVENT_QUEUE_SENTINEL) + except Exception: + pass + + async def send_hello(self, bridge_ws: WebSocket) -> None: + """Send initial hello frame to a freshly-accepted bridge WS.""" + hello = HelloFrame(server_time=datetime.now(timezone.utc).isoformat()) + await bridge_ws.send_json(hello.model_dump(mode="json")) + + +# Module-level singleton. +dispatcher = SessionDispatcher() + + +__all__ = [ + "ALLOWED_REVERSE_TOOLS", + "BridgeDisconnected", + "BridgeUnavailable", + "EVENT_QUEUE_SENTINEL", + "SessionDispatcher", + "SessionRejected", + "dispatcher", + "PROTOCOL_VERSION", +] diff --git a/backend/app/static/bridge/.gitkeep b/backend/app/static/bridge/.gitkeep new file mode 100644 index 000000000..9df880d11 --- /dev/null +++ b/backend/app/static/bridge/.gitkeep @@ -0,0 +1,8 @@ +# Place clawith-bridge.exe here after building it with: +# cd bridge/ +# python -m venv .venv-build && .venv-build\Scripts\activate +# pip install -e . pyinstaller +# pyinstaller clawith-bridge.spec --clean +# cp dist/clawith-bridge.exe ../backend/app/static/bridge/ +# +# The exe is gitignored (see root .gitignore). diff --git a/bridge/.gitignore b/bridge/.gitignore new file mode 100644 index 000000000..a49193853 --- /dev/null +++ b/bridge/.gitignore @@ -0,0 +1,10 @@ +# PyInstaller build artifacts +dist/ +build/ +*.spec.bak +.venv-build/ + +# Python +__pycache__/ +*.pyc +*.egg-info/ diff --git a/bridge/Dockerfile b/bridge/Dockerfile new file mode 100644 index 000000000..32711990b --- /dev/null +++ b/bridge/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.12-slim + +# Claude Code CLI and a couple of common dev tools +RUN apt-get update && apt-get install -y --no-install-recommends \ + git curl ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Claude Code CLI (npm-based) +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ + && apt-get update && apt-get install -y --no-install-recommends nodejs \ + && rm -rf /var/lib/apt/lists/* \ + && npm install -g @anthropic-ai/claude-code \ + && npm cache clean --force + +WORKDIR /app +COPY pyproject.toml ./ +COPY clawith_bridge ./clawith_bridge +RUN pip install --no-cache-dir . + +# Non-root; matches Claude Code's assumption that ~/.claude belongs to the runner +RUN useradd -m -u 1000 bridge +USER bridge +WORKDIR /home/bridge +RUN mkdir -p /home/bridge/workspace + +ENV CLAWITH_BRIDGE_SERVER="" \ + CLAWITH_BRIDGE_TOKEN="" + +ENTRYPOINT ["clawith-bridge"] diff --git a/bridge/README.md b/bridge/README.md new file mode 100644 index 000000000..0a71b95a1 --- /dev/null +++ b/bridge/README.md @@ -0,0 +1,165 @@ +# clawith-bridge + +Reverse-WS local agent session bridge for Clawith. The bridge runs on the +operator's workstation, dials into the Clawith server, and drives local CLIs +(Claude Code) or local daemons (Hermes, OpenClaw) on behalf of the server. + +## Install + +```bash +pip install . +``` + +Requires Python 3.10+. + +## Configure + +Copy `clawith-bridge.toml.example` to `~/.clawith-bridge.toml` and edit: + +```toml +server = "wss://clawith.example.com" +token = "oc-your-agent-api-key" + +[claude_code] +enabled = true +executable = "claude" + +[hermes] +enabled = false +base_url = "http://127.0.0.1:7890" + +[openclaw] +enabled = false +base_url = "http://127.0.0.1:9000" +``` + +Or pass flags on the command line: + +```bash +clawith-bridge --server wss://clawith --token oc-xxx +``` + +Env vars: `CLAWITH_BRIDGE_SERVER`, `CLAWITH_BRIDGE_TOKEN`, +`CLAWITH_BRIDGE_ADAPTER_CLAUDE_CODE=1`. + +**API keys for the agents themselves** (`ANTHROPIC_API_KEY`, Hermes tokens, +etc.) are read from the local environment by the spawned CLIs / daemons. +Clawith never sees them. + +## Run + +```bash +clawith-bridge +``` + +You'll see log lines like: + +``` +server hello: v=1 ... +registered: adapters=['claude_code'] +``` + +At this point the agent is online on the Clawith side (visible in +`/api/admin/bridge/status`). Incoming chat messages from Clawith spawn Claude +Code on your machine and stream the session back. + +## Windows (NSSM) + +The bridge does not implement its own `daemon start|stop|status` subcommand +on Windows — use NSSM to install it as a service: + +```powershell +# Download nssm from https://nssm.cc +nssm install ClawithBridge "C:\Path\To\python.exe" "-m" "clawith_bridge" +nssm set ClawithBridge AppDirectory "C:\Path\To\workspace" +nssm set ClawithBridge AppEnvironmentExtra "CLAWITH_BRIDGE_SERVER=wss://clawith" "CLAWITH_BRIDGE_TOKEN=oc-xxx" +``` + +**Critical**: run the service under your **user account**, not LocalSystem. +NSSM → Log on tab → "This account" + `.\username` + password. Reason: + +- `claude` CLI needs your `~/.claude/` credentials, which are not accessible + from `C:\Windows\System32\config\systemprofile` (LocalSystem's `~`) +- `claude` needs to resolve via your user PATH — LocalSystem's PATH typically + doesn't contain npm's global bin directory + +See `~/.claude/settings.json` and the cc-connect project for more context on +this Windows constraint — it's not Clawith-specific. + +## Docker (optional) + +```bash +docker build -t clawith-bridge . +docker run --rm \ + -e CLAWITH_BRIDGE_SERVER=wss://clawith \ + -e CLAWITH_BRIDGE_TOKEN=oc-xxx \ + -e ANTHROPIC_API_KEY=sk-ant-xxx \ + -v $HOME/workspace:/home/bridge/workspace \ + clawith-bridge +``` + +The image ships with Claude Code CLI preinstalled but not Hermes/OpenClaw +(you'd typically run those on the host and set `base_url` to reach them). + +## Packaging the Windows `setup.exe` for Clawith's UI downloader + +Clawith's OpenClaw agent settings page offers a one-click "Download Windows +installer" button that serves a self-configuring `clawith-bridge-setup.exe`. +That exe is the pristine PyInstaller binary with a per-agent config trailer +(JSON + 8-byte magic `CLWB!END`) appended at EOF. The server rebuilds the +trailer on each download; the pristine binary itself is identical for every +user and every agent. + +To build and deploy the pristine exe so the downloader works: + +```bash +# 1. Create a Windows-native Python build env (WSL/mingw won't produce a +# native Windows PE). Run this on a Windows host. +cd bridge/ +python -m venv .venv-build +.venv-build\Scripts\activate +pip install -e . pyinstaller + +# 2. Build — produces dist/clawith-bridge.exe (~13–14 MB onefile) +pyinstaller clawith-bridge.spec --clean --noconfirm + +# 3. Deploy into the backend's static dir so /api/agents/{id}/bridge-installer +# can serve it. +cp dist/clawith-bridge.exe ../backend/app/static/bridge/ +``` + +The built binary is **not tracked in git** (`.gitignore` excludes +`backend/app/static/bridge/clawith-bridge.exe`) — each operator rebuilds it +from source. When the file is missing, the download endpoint returns HTTP +503 with a message pointing here. + +On macOS and Linux the downloader returns a bash script instead; no binary +packaging is needed on those platforms (the script pip-installs +`clawith-bridge` and registers launchd / systemd user services). + +## Stub smoke test + +`stub_bridge.py` is a standalone script that pretends to be a real bridge. +Use it to verify the server side (bridge_ws + session_dispatcher) is wired +up correctly before installing Claude Code CLI etc. + +```bash +pip install websockets pydantic +python stub_bridge.py --server ws://127.0.0.1:8000 --token oc-your-agent-key +``` + +Then from the Clawith chat UI, send a message to that agent. The stub replies +with a fake streaming conversation ending in `session.done` with a mock +`diff_summary`. + +## Adapters + +| Adapter | Shape | Notes | +|---------------|-------------|-------| +| `claude_code` | Subprocess | Spawns `claude --output-format=stream-json -p `. Captures `assistant_text`, `tool_use`, `tool_result`, `thinking`. | +| `hermes` | HTTP daemon | POST `/tasks`, SSE `/tasks/{id}/events`, DELETE `/tasks/{id}`. Subclass `HermesAdapter` if your local Hermes API differs. | +| `openclaw` | HTTP daemon | POST `/v1/chat`, SSE `/v1/jobs/{id}/events`. Also supports inline responses (no job_id). | + +All three are instantiated per-session, so adapter classes can keep state +on `self`. The session manager cancels their running task on `session.cancel` +or on disconnect. diff --git a/bridge/clawith-bridge.spec b/bridge/clawith-bridge.spec new file mode 100644 index 000000000..9f20b53fe --- /dev/null +++ b/bridge/clawith-bridge.spec @@ -0,0 +1,62 @@ +# -*- mode: python ; coding: utf-8 -*- +# PyInstaller spec for clawith-bridge.exe (onefile, Windows console). +# +# Build: +# cd bridge/ +# pip install pyinstaller +# pyinstaller clawith-bridge.spec --clean +# +# Output: dist/clawith-bridge.exe +# +# The console window is visible when run from cmd (for `install` mode output) +# but hidden automatically when launched by Task Scheduler (see +# `_hide_console_if_service` in __main__.py). + +block_cipher = None + + +a = Analysis( + ['launcher.py'], + pathex=[], + binaries=[], + datas=[], + hiddenimports=[ + # Imported lazily only in install mode; static analysis misses it. + 'clawith_bridge.install_windows', + 'clawith_bridge.baked_config', + ], + hookspath=[], + hooksconfig={}, + runtime_hooks=[], + excludes=[ + # Shave size: these are transitive but never used in bridge. + # Don't exclude email/http/xml — httpx uses them internally. + 'tkinter', + 'unittest', + ], + noarchive=False, + cipher=block_cipher, +) + +pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) + +exe = EXE( + pyz, + a.scripts, + a.binaries, + a.datas, + [], + name='clawith-bridge', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + upx_exclude=[], + runtime_tmpdir=None, + console=True, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) diff --git a/bridge/clawith-bridge.toml.example b/bridge/clawith-bridge.toml.example new file mode 100644 index 000000000..c59ffa3ff --- /dev/null +++ b/bridge/clawith-bridge.toml.example @@ -0,0 +1,24 @@ +# Copy to ~/.clawith-bridge.toml and edit. + +server = "ws://127.0.0.1:8000" +token = "oc-REPLACE_ME" + +max_concurrent_sessions = 4 +ping_interval = 25 +reconnect_min = 1.0 +reconnect_max = 60.0 + +[claude_code] +enabled = true +executable = "claude" # path to Claude Code CLI +# default_cwd = "/Users/me/workspace" # optional + +[hermes] +enabled = false +base_url = "http://127.0.0.1:7890" +# auth_header = "Bearer xxx" + +[openclaw] +enabled = false +base_url = "http://127.0.0.1:9000" +# auth_header = "Bearer xxx" diff --git a/bridge/clawith_bridge/__init__.py b/bridge/clawith_bridge/__init__.py new file mode 100644 index 000000000..ba332a6d9 --- /dev/null +++ b/bridge/clawith_bridge/__init__.py @@ -0,0 +1,4 @@ +"""clawith-bridge: reverse-WS local agent session bridge for Clawith.""" + +__version__ = "0.1.0" +PROTOCOL_VERSION = "1" diff --git a/bridge/clawith_bridge/__main__.py b/bridge/clawith_bridge/__main__.py new file mode 100644 index 000000000..c00bcc08f --- /dev/null +++ b/bridge/clawith_bridge/__main__.py @@ -0,0 +1,206 @@ +"""CLI entry point for clawith-bridge. + + clawith-bridge --server wss://clawith.example.com --token oc-xxxx + clawith-bridge --config ~/.clawith-bridge.toml + clawith-bridge install --server wss://... --token oc-xxxx --name "My Agent" + +Config resolution is documented in `config.py`: TOML file < env < CLI flags. + +The `install` subcommand is Windows-only and requires the PyInstaller-packaged +binary (it copies itself to %LOCALAPPDATA%\\Clawith\\bin\\ and registers a +user-scope scheduled task). On macOS/Linux, run the bridge directly or wrap it +with launchd/systemd yourself. +""" +from __future__ import annotations + +import argparse +import asyncio +import signal +import sys +from pathlib import Path + +from loguru import logger + +from . import __version__ +from .baked_config import read_baked_config +from .config import DEFAULT_CONFIG_PATH, load_config +from .connection import BridgeClient + + +def _build_parser() -> argparse.ArgumentParser: + ap = argparse.ArgumentParser(prog="clawith-bridge", description="Clawith local-agent bridge") + ap.add_argument("--server", help="Clawith server WS root (e.g. wss://host)") + ap.add_argument("--token", help="Agent API key (oc-xxx)") + ap.add_argument( + "--config", + type=Path, + default=None, + help=f"Path to TOML config (default: {DEFAULT_CONFIG_PATH})", + ) + ap.add_argument("--log-level", default="INFO", help="DEBUG | INFO | WARNING | ERROR") + ap.add_argument("--version", action="version", version=f"clawith-bridge {__version__}") + + sub = ap.add_subparsers(dest="command") + ip = sub.add_parser( + "install", + help="Windows-only: install as a user scheduled task that auto-starts at logon", + ) + ip.add_argument("--server", required=True, help="Clawith server WS URL (wss://...)") + ip.add_argument("--token", required=True, help="Agent API key (oc-xxx)") + ip.add_argument("--name", default="", help="Agent display name (shown in install log only)") + ip.add_argument( + "--adapter", + default="claude_code", + choices=("claude_code", "openclaw", "hermes"), + help="Which adapter to enable in the generated TOML (default: claude_code)", + ) + + return ap + + +def _hide_console_if_service() -> None: + """When launched by Task Scheduler (no interactive TTY), hide our console window. + + Stays visible when user runs the exe manually from cmd/PowerShell. + """ + if sys.platform != "win32": + return + try: + if sys.stdin and sys.stdin.isatty(): + return + except (AttributeError, OSError): + pass + try: + import ctypes + hwnd = ctypes.windll.kernel32.GetConsoleWindow() + if hwnd: + ctypes.windll.user32.ShowWindow(hwnd, 0) # SW_HIDE + except Exception: + pass + + +def _run(args: argparse.Namespace) -> int: + _hide_console_if_service() + cfg = load_config( + config_path=args.config, + cli_server=args.server, + cli_token=args.token, + ) + + if not cfg.token: + logger.error("No agent token configured. Pass --token or set CLAWITH_BRIDGE_TOKEN.") + return 2 + if not cfg.server: + logger.error("No server URL configured. Pass --server or set CLAWITH_BRIDGE_SERVER.") + return 2 + + enabled = cfg.enabled_adapters() + if not enabled: + logger.error( + "No adapters enabled. Enable at least one in the config file, e.g. " + "[claude_code] enabled = true" + ) + return 2 + logger.info(f"starting clawith-bridge {__version__}, adapters={enabled}, server={cfg.server}") + + client = BridgeClient(cfg) + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + def _shutdown(*_): + logger.info("shutdown requested") + client.stop() + + try: + loop.add_signal_handler(signal.SIGINT, _shutdown) + loop.add_signal_handler(signal.SIGTERM, _shutdown) + except (NotImplementedError, RuntimeError): + # Windows asyncio doesn't support signal handlers in the selector loop + pass + + try: + loop.run_until_complete(client.run_forever()) + except KeyboardInterrupt: + _shutdown() + finally: + try: + pending = [t for t in asyncio.all_tasks(loop) if not t.done()] + for t in pending: + t.cancel() + if pending: + loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True)) + finally: + loop.close() + + return 0 + + +def _maybe_auto_install(args: argparse.Namespace) -> int | None: + """If this is a bare double-click of a setup.exe with baked config, + run the install flow and return its exit code. Otherwise return None. + + Conditions: + - no subcommand chosen + - no explicit --server / --token on CLI + - running from a PyInstaller-frozen binary + - sys.executable has a config trailer + """ + if sys.platform != "win32": + return None + if args.command is not None: + return None + if args.server or args.token: + return None + if not getattr(sys, "frozen", False): + return None + + baked = read_baked_config() + if baked is None: + return None + + server = baked.get("server") + token = baked.get("token") + name = baked.get("agent_name") or baked.get("name") or "" + adapter = baked.get("adapter") or "claude_code" + if not server or not token: + return None + + from .install_windows import install + rc = install(server=server, token=token, name=name, adapter=adapter) + + # Double-clicking a console exe opens a window that closes on exit. Pause + # so the user actually sees the install result. + try: + print() + print("Press Enter to close this window...") + input() + except EOFError: + pass + return rc + + +def main(argv: list[str] | None = None) -> int: + args = _build_parser().parse_args(argv) + + logger.remove() + logger.add(sys.stderr, level=args.log_level.upper()) + + if args.command == "install": + from .install_windows import install + return install( + server=args.server, + token=args.token, + name=args.name, + adapter=getattr(args, "adapter", "claude_code"), + ) + + auto_rc = _maybe_auto_install(args) + if auto_rc is not None: + return auto_rc + + return _run(args) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/bridge/clawith_bridge/adapters/__init__.py b/bridge/clawith_bridge/adapters/__init__.py new file mode 100644 index 000000000..78f015dbc --- /dev/null +++ b/bridge/clawith_bridge/adapters/__init__.py @@ -0,0 +1,7 @@ +"""Adapters for each local agent flavor. + +Each adapter produces a stream of `SessionEvent` dicts for a given session_id. +""" +from .base import BaseAdapter, SessionEvent, SubprocessAdapter, DaemonAdapter + +__all__ = ["BaseAdapter", "SessionEvent", "SubprocessAdapter", "DaemonAdapter"] diff --git a/bridge/clawith_bridge/adapters/base.py b/bridge/clawith_bridge/adapters/base.py new file mode 100644 index 000000000..6a824bc10 --- /dev/null +++ b/bridge/clawith_bridge/adapters/base.py @@ -0,0 +1,331 @@ +"""Adapter base classes. + +`BaseAdapter` is the surface the session manager sees. Two concrete bases: + +- `SubprocessAdapter`: for CLI-shaped agents (Claude Code, Codex later). Spawns + a child process on start_session; parses its stdout; terminates on cancel. + Lifecycle is process-bound — process exit → session.done. + +- `DaemonAdapter`: for long-running local daemons (Hermes, OpenClaw). Assumes + HTTP-over-localhost; start = POST, stream = SSE / polling, cancel = DELETE. + Daemons don't "die" per-session, so completion is a server-reported signal. + +Most adapters should subclass one of these rather than BaseAdapter directly — +the two bases handle the messy plumbing (pipe reading, backoff, etc). +""" +from __future__ import annotations + +import abc +import asyncio +import os +import shlex +import signal +import subprocess +import sys +from dataclasses import dataclass, field +from typing import Any, AsyncIterator, ClassVar + + +@dataclass +class SessionEvent: + """What an adapter yields for each observed event.""" + kind: str # must match protocol.EventKind + payload: dict[str, Any] = field(default_factory=dict) + + +class BaseAdapter(abc.ABC): + name: ClassVar[str] = "base" + capabilities: ClassVar[dict[str, Any]] = {} + + def __init__(self, config: Any = None) -> None: + self.config = config + + @abc.abstractmethod + async def start_session( + self, + session_id: str, + prompt: str, + params: dict[str, Any], + cwd: str | None, + env: dict[str, str], + timeout_s: int, + ) -> AsyncIterator[SessionEvent]: + """Yield SessionEvent objects until the session ends. + + Must terminate (return) when the session completes so the session + manager can emit session.done. Exceptions propagate as session.error. + """ + raise NotImplementedError + yield # type: ignore[unreachable] # pragma: no cover + + async def send_input(self, session_id: str, text: str) -> None: # pragma: no cover + raise NotImplementedError("adapter does not support interactive input") + + async def cancel(self, session_id: str, reason: str) -> None: # pragma: no cover + """Default: rely on asyncio task cancellation from the session manager.""" + return + + async def final_text(self, session_id: str) -> str: + """Return the final assistant text for this session (after it's done). + + Default: empty. Adapters that accumulate a canonical final response + (like Claude Code's terminal message) should override. + """ + return "" + + async def stats(self, session_id: str) -> dict[str, Any]: + return {} + + +# ── SubprocessAdapter ──────────────────────────────────────────────── + +class SubprocessAdapter(BaseAdapter): + """Base for CLI-shaped adapters. + + Subclasses: + - implement `build_command(prompt, params, cwd)` → (argv, stdin_bytes) + - implement `parse_stdout_line(line)` → iterable[SessionEvent] + - optionally override `parse_stderr_line(line)` and `final_text` + + The base class handles spawning, cancellation via terminate/kill, + and collecting stdout/stderr line-by-line. + """ + + name = "subprocess" + capabilities = {"interactive_input": False, "cancellation": True} + + # Grace period between terminate and kill when cancelling. + KILL_GRACE_SEC = 5 + + def __init__(self, config: Any = None) -> None: + super().__init__(config) + self._procs: dict[str, asyncio.subprocess.Process] = {} + self._final_text: dict[str, str] = {} + + @abc.abstractmethod + def build_command( + self, + prompt: str, + params: dict[str, Any], + cwd: str | None, + ) -> tuple[list[str], bytes | None]: + """Return (argv, stdin_bytes) for the CLI invocation.""" + + def parse_stdout_line(self, line: str) -> list[SessionEvent]: + """Default: emit each line as a stdout_chunk.""" + return [SessionEvent(kind="stdout_chunk", payload={"text": line})] + + def parse_stderr_line(self, line: str) -> list[SessionEvent]: + """Default: emit as stderr_chunk.""" + return [SessionEvent(kind="stderr_chunk", payload={"text": line})] + + async def start_session( + self, + session_id: str, + prompt: str, + params: dict[str, Any], + cwd: str | None, + env: dict[str, str], + timeout_s: int, + ) -> AsyncIterator[SessionEvent]: + argv, stdin_bytes = self.build_command(prompt, params, cwd) + effective_env = {**os.environ, **(env or {})} + queue: asyncio.Queue[SessionEvent | None] = asyncio.Queue() + + try: + proc = await asyncio.create_subprocess_exec( + *argv, + cwd=cwd, + env=effective_env, + stdin=asyncio.subprocess.PIPE if stdin_bytes else None, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + except FileNotFoundError as e: + yield SessionEvent(kind="stderr_chunk", payload={"text": f"{argv[0]!r} not found: {e}"}) + return + self._procs[session_id] = proc + + if stdin_bytes: + try: + assert proc.stdin is not None + proc.stdin.write(stdin_bytes) + await proc.stdin.drain() + proc.stdin.close() + except Exception: + pass + + async def _drain(stream: asyncio.StreamReader | None, parser) -> None: + if stream is None: + return + while True: + raw = await stream.readline() + if not raw: + return + try: + line = raw.decode("utf-8", errors="replace").rstrip("\n") + except Exception: + continue + for ev in parser(line): + await queue.put(ev) + + readers = [ + asyncio.create_task(_drain(proc.stdout, self.parse_stdout_line)), + asyncio.create_task(_drain(proc.stderr, self.parse_stderr_line)), + ] + + async def _watcher() -> None: + await asyncio.gather(*readers, return_exceptions=True) + await queue.put(None) # sentinel + + watcher = asyncio.create_task(_watcher()) + + try: + start_t = asyncio.get_event_loop().time() + while True: + remaining = timeout_s - (asyncio.get_event_loop().time() - start_t) + if remaining <= 0: + yield SessionEvent(kind="stderr_chunk", payload={"text": f"timeout after {timeout_s}s"}) + break + try: + item = await asyncio.wait_for(queue.get(), timeout=remaining) + except asyncio.TimeoutError: + yield SessionEvent(kind="stderr_chunk", payload={"text": f"timeout after {timeout_s}s"}) + break + if item is None: + break + yield item + finally: + await self._cleanup(session_id) + for r in readers: + r.cancel() + watcher.cancel() + try: + await proc.wait() + except Exception: + pass + + async def cancel(self, session_id: str, reason: str) -> None: + proc = self._procs.get(session_id) + if proc is None or proc.returncode is not None: + return + try: + proc.terminate() + except ProcessLookupError: + return + try: + await asyncio.wait_for(proc.wait(), timeout=self.KILL_GRACE_SEC) + except asyncio.TimeoutError: + try: + proc.kill() + except ProcessLookupError: + pass + + async def _cleanup(self, session_id: str) -> None: + self._procs.pop(session_id, None) + + async def final_text(self, session_id: str) -> str: + return self._final_text.pop(session_id, "") + + +# ── DaemonAdapter ──────────────────────────────────────────────────── + +class DaemonAdapter(BaseAdapter): + """Base for adapters that talk to a local HTTP daemon. + + Subclasses: + - override `start_session_request(prompt, params, cwd)` → returns task_id + - override `iter_events(task_id)` → async iter of SessionEvent + - override `cancel_request(task_id)` for cancellation + - optionally override `final_text_from_events` to capture final text + + The base class manages httpx.AsyncClient lifecycle and task_id tracking. + """ + + name = "daemon" + capabilities = {"interactive_input": False, "cancellation": True} + + def __init__(self, config: Any = None) -> None: + super().__init__(config) + self._tasks: dict[str, str] = {} # session_id -> daemon task_id + self._final_text: dict[str, str] = {} + self._client = None # httpx.AsyncClient, lazy-created + + async def _ensure_client(self): + if self._client is None: + import httpx + base_url = getattr(self.config, "base_url", None) if self.config else None + headers = {} + auth_header = getattr(self.config, "auth_header", None) if self.config else None + if auth_header: + headers["Authorization"] = auth_header + self._client = httpx.AsyncClient(base_url=base_url or "", headers=headers, timeout=None) + return self._client + + async def aclose(self) -> None: + if self._client is not None: + try: + await self._client.aclose() + except Exception: + pass + self._client = None + + @abc.abstractmethod + async def start_session_request( + self, + prompt: str, + params: dict[str, Any], + cwd: str | None, + ) -> str: + """POST to daemon, return daemon-local task_id.""" + + @abc.abstractmethod + async def iter_events(self, task_id: str) -> AsyncIterator[SessionEvent]: + """Async iterate events until the daemon reports completion.""" + raise NotImplementedError + yield # type: ignore[unreachable] # pragma: no cover + + async def cancel_request(self, task_id: str) -> None: # pragma: no cover + """Default noop. Override to DELETE / POST cancel to daemon.""" + return + + async def start_session( + self, + session_id: str, + prompt: str, + params: dict[str, Any], + cwd: str | None, + env: dict[str, str], # daemons ignore env; local env is the daemon's own + timeout_s: int, + ) -> AsyncIterator[SessionEvent]: + try: + task_id = await self.start_session_request(prompt, params, cwd) + except Exception as e: + yield SessionEvent(kind="stderr_chunk", payload={"text": f"daemon start failed: {e}"}) + return + self._tasks[session_id] = task_id + final_accum: list[str] = [] + try: + start_t = asyncio.get_event_loop().time() + async for ev in self.iter_events(task_id): + if asyncio.get_event_loop().time() - start_t > timeout_s: + yield SessionEvent(kind="stderr_chunk", payload={"text": f"timeout after {timeout_s}s"}) + break + if ev.kind == "assistant_text": + final_accum.append(str(ev.payload.get("text", ""))) + yield ev + finally: + self._tasks.pop(session_id, None) + self._final_text[session_id] = "".join(final_accum) + + async def cancel(self, session_id: str, reason: str) -> None: + task_id = self._tasks.get(session_id) + if not task_id: + return + try: + await self.cancel_request(task_id) + except Exception: + pass + + async def final_text(self, session_id: str) -> str: + return self._final_text.pop(session_id, "") diff --git a/bridge/clawith_bridge/adapters/claude_code.py b/bridge/clawith_bridge/adapters/claude_code.py new file mode 100644 index 000000000..838398b3a --- /dev/null +++ b/bridge/clawith_bridge/adapters/claude_code.py @@ -0,0 +1,276 @@ +"""Claude Code adapter. + +Spawns `claude --output-format=stream-json --input-format=text -p ` +and parses newline-delimited JSON events from stdout. + +Each stream-json event looks roughly like: + {"type": "text", "text": "Hello"} + {"type": "tool_use", "name": "Bash", "input": {...}} + {"type": "tool_result", "tool_use_id": "...", "content": "..."} + {"type": "assistant_message", "content": "...final..."} + {"type": "result", "subtype": "success", "total_cost_usd": 0.0, ...} + +We map these onto our EventKind taxonomy. +""" +from __future__ import annotations + +import glob +import json +import os +import shlex +import shutil +import sys +from pathlib import Path +from typing import Any + +from .base import SubprocessAdapter, SessionEvent + + +def resolve_claude_executable(configured: str | None) -> list[str]: + """Return an argv prefix that reliably invokes the Claude Code CLI. + + Three platform gotchas this handles: + + 1. **Windows .cmd shim**: `npm install -g @anthropic-ai/claude-code` + produces `%APPDATA%\\npm\\claude.cmd` — a Node wrapper, not a .exe. + `asyncio.create_subprocess_exec` uses Win32 CreateProcess, which + refuses .cmd/.bat. We detect and wrap with `cmd.exe /c`. + 2. **macOS launchd**: user agents launched at login start with a bare + PATH like `/usr/bin:/bin:/usr/sbin:/sbin` — no Homebrew, no npm global, + no nvm. We explicitly probe the usual install locations. + 3. **Linux systemd --user**: same story as launchd. + + Resolution order: configured path → `shutil.which` → well-known locations + → bare `claude` (let PATH decide — this works when the bridge is run + interactively from a login shell). + """ + if configured and configured != "claude": + wrapped = _wrap_if_windows_cmd(configured) + if wrapped: + return wrapped + + found = shutil.which("claude") + if found: + return _wrap_if_windows_cmd(found) or [found] + + if sys.platform == "win32": + found = shutil.which("claude.cmd") + if found: + return _wrap_if_windows_cmd(found) or [found] + + for candidate in _well_known_claude_paths(): + if os.path.exists(candidate): + return _wrap_if_windows_cmd(candidate) or [candidate] + + return ["claude"] + + +def _wrap_if_windows_cmd(path: str) -> list[str] | None: + """If `path` points at a real file, return [path] — but wrap .cmd/.bat + on Windows with cmd.exe /c. Return None if `path` doesn't exist.""" + if not path or not os.path.exists(path): + return None + if sys.platform == "win32" and path.lower().endswith((".cmd", ".bat")): + return ["cmd.exe", "/c", path] + return [path] + + +def _well_known_claude_paths() -> list[str]: + """Platform-specific paths where `claude` is commonly installed but may + be missing from the bridge process's PATH (launchd/systemd/Task Scheduler + environments have minimal PATH).""" + paths: list[str] = [] + home = str(Path.home()) + + if sys.platform == "win32": + appdata = os.environ.get("APPDATA") + if appdata: + paths.append(os.path.join(appdata, "npm", "claude.cmd")) + paths.append(os.path.join(appdata, "npm", "claude.exe")) + program_files = os.environ.get("ProgramFiles", r"C:\Program Files") + paths.append(os.path.join(program_files, "nodejs", "claude.cmd")) + elif sys.platform == "darwin": + paths.extend([ + "/opt/homebrew/bin/claude", + "/usr/local/bin/claude", + os.path.join(home, ".npm-global/bin/claude"), + os.path.join(home, ".local/bin/claude"), + ]) + paths.extend(sorted(glob.glob(os.path.join(home, ".nvm/versions/node/*/bin/claude")), reverse=True)) + else: + paths.extend([ + "/usr/local/bin/claude", + "/usr/bin/claude", + os.path.join(home, ".npm-global/bin/claude"), + os.path.join(home, ".local/bin/claude"), + ]) + paths.extend(sorted(glob.glob(os.path.join(home, ".nvm/versions/node/*/bin/claude")), reverse=True)) + + return paths + + +class ClaudeCodeAdapter(SubprocessAdapter): + name = "claude_code" + capabilities = {"interactive_input": False, "cancellation": True, "tool_calls": True} + + DEFAULT_EXECUTABLE = "claude" + + def __init__(self, config: Any = None) -> None: + super().__init__(config) + self._finals: dict[str, list[str]] = {} + + def build_command( + self, + prompt: str, + params: dict[str, Any], + cwd: str | None, + ) -> tuple[list[str], bytes | None]: + configured = (getattr(self.config, "executable", None) if self.config else None) + exe_prefix = resolve_claude_executable(configured) + argv: list[str] = [*exe_prefix, "-p", prompt, "--output-format", "stream-json", "--verbose"] + + permission_mode = params.get("permission_mode") + if permission_mode in ("acceptEdits", "bypassPermissions", "default", "plan"): + argv.extend(["--permission-mode", permission_mode]) + + model = params.get("model") + if model: + argv.extend(["--model", str(model)]) + + allowed_tools = params.get("allowed_tools") + if isinstance(allowed_tools, list) and allowed_tools: + argv.extend(["--allowed-tools", ",".join(allowed_tools)]) + + extra_args = params.get("extra_args") + if isinstance(extra_args, list): + argv.extend(str(a) for a in extra_args) + elif isinstance(extra_args, str) and extra_args.strip(): + argv.extend(shlex.split(extra_args)) + + return argv, None + + def _session_finals(self, session_id: str) -> list[str]: + # Hacky: we don't get session_id threaded into parse_stdout_line, so we + # track finals keyed by process. Only one concurrent session per process, + # and the session manager binds us to one session at a time, so this is + # fine if the adapter is instantiated per session — see session_manager. + return self._finals.setdefault("__current__", []) + + def parse_stdout_line(self, line: str) -> list[SessionEvent]: + line = line.strip() + if not line: + return [] + try: + evt = json.loads(line) + except json.JSONDecodeError: + return [SessionEvent(kind="stdout_chunk", payload={"text": line})] + + kind = evt.get("type") + subtype = evt.get("subtype") + + # Accumulator for the final answer + finals = self._session_finals("__current__") + + if kind == "text": + text = evt.get("text", "") + if text: + finals.append(text) + return [SessionEvent(kind="assistant_text", payload={"text": text})] + return [] + if kind == "thinking": + text = evt.get("thinking") or evt.get("text") or "" + return [SessionEvent(kind="thinking", payload={"text": text})] + if kind == "tool_use": + return [SessionEvent( + kind="tool_call_start", + payload={ + "name": evt.get("name", ""), + "args": evt.get("input", {}), + "tool_use_id": evt.get("id") or evt.get("tool_use_id"), + }, + )] + if kind == "tool_result": + content = evt.get("content") + if isinstance(content, list): + # Newer Claude Code emits content as a list of blocks. + content_text = "\n".join( + c.get("text", "") if isinstance(c, dict) else str(c) for c in content + ) + else: + content_text = str(content) if content is not None else "" + return [SessionEvent( + kind="tool_call_result", + payload={ + "tool_use_id": evt.get("tool_use_id"), + "result": content_text, + "is_error": bool(evt.get("is_error")), + }, + )] + if kind == "assistant" and isinstance(evt.get("message"), dict): + # The 2024+ format wraps content: {"message": {"content": [...]}}. + msg = evt["message"] + out: list[SessionEvent] = [] + for block in msg.get("content", []) or []: + if not isinstance(block, dict): + continue + btype = block.get("type") + if btype == "text": + t = block.get("text", "") + if t: + finals.append(t) + out.append(SessionEvent(kind="assistant_text", payload={"text": t})) + elif btype == "tool_use": + out.append(SessionEvent( + kind="tool_call_start", + payload={ + "name": block.get("name", ""), + "args": block.get("input", {}), + "tool_use_id": block.get("id"), + }, + )) + elif btype == "thinking": + out.append(SessionEvent( + kind="thinking", payload={"text": block.get("thinking", "")}, + )) + return out + if kind == "user" and isinstance(evt.get("message"), dict): + # tool_result blocks inside a user turn + msg = evt["message"] + out: list[SessionEvent] = [] + for block in msg.get("content", []) or []: + if isinstance(block, dict) and block.get("type") == "tool_result": + content = block.get("content") + if isinstance(content, list): + content_text = "\n".join( + c.get("text", "") if isinstance(c, dict) else str(c) for c in content + ) + else: + content_text = str(content) if content is not None else "" + out.append(SessionEvent( + kind="tool_call_result", + payload={ + "tool_use_id": block.get("tool_use_id"), + "result": content_text, + "is_error": bool(block.get("is_error")), + }, + )) + return out + if kind == "result": + # Terminal frame — carries totals + stats_payload = { + "state": "done", + "exit_code": 0 if subtype == "success" else 1, + "total_cost_usd": evt.get("total_cost_usd"), + "duration_ms": evt.get("duration_ms"), + "num_turns": evt.get("num_turns"), + } + return [SessionEvent(kind="status", payload=stats_payload)] + if kind == "system": + return [SessionEvent(kind="status", payload={"state": "init", **{k: v for k, v in evt.items() if k != "type"}})] + + # Unknown — pass through as stdout_chunk for visibility. + return [SessionEvent(kind="stdout_chunk", payload={"text": line})] + + async def final_text(self, session_id: str) -> str: + text = "".join(self._finals.pop("__current__", [])) + return text diff --git a/bridge/clawith_bridge/adapters/hermes.py b/bridge/clawith_bridge/adapters/hermes.py new file mode 100644 index 000000000..4df4487d8 --- /dev/null +++ b/bridge/clawith_bridge/adapters/hermes.py @@ -0,0 +1,80 @@ +"""Hermes daemon adapter. + +Hermes is assumed to expose a local HTTP API along the lines of: + + POST /tasks { "prompt": "...", "params": {...} } -> { "task_id": "..." } + GET /tasks/{id}/events (SSE) stream of {"kind": "...", "payload": {...}} then {"kind": "done"} + DELETE /tasks/{id} 204 + +Configure via `~/.clawith-bridge.toml`: + + [hermes] + enabled = true + base_url = "http://127.0.0.1:7890" + auth_header = "Bearer xxx" + +If your actual Hermes API differs, subclass this and override the three methods. +""" +from __future__ import annotations + +import asyncio +import json +from typing import Any, AsyncIterator + +from .base import DaemonAdapter, SessionEvent + + +class HermesAdapter(DaemonAdapter): + name = "hermes" + capabilities = {"interactive_input": False, "cancellation": True} + + async def start_session_request( + self, + prompt: str, + params: dict[str, Any], + cwd: str | None, + ) -> str: + client = await self._ensure_client() + body = {"prompt": prompt, "params": params or {}} + if cwd: + body["cwd"] = cwd + r = await client.post("/tasks", json=body) + r.raise_for_status() + data = r.json() + task_id = data.get("task_id") or data.get("id") + if not task_id: + raise RuntimeError(f"Hermes start response missing task_id: {data}") + return str(task_id) + + async def iter_events(self, task_id: str) -> AsyncIterator[SessionEvent]: + client = await self._ensure_client() + # SSE-style streaming: each event line begins with `data: ` + async with client.stream("GET", f"/tasks/{task_id}/events") as resp: + resp.raise_for_status() + async for raw in resp.aiter_lines(): + if not raw: + continue + line = raw.strip() + if line.startswith("data:"): + line = line[5:].strip() + if not line: + continue + try: + evt = json.loads(line) + except json.JSONDecodeError: + yield SessionEvent(kind="stdout_chunk", payload={"text": line}) + continue + kind = evt.get("kind") + payload = evt.get("payload") or {} + if kind == "done": + return + if not isinstance(kind, str): + continue + yield SessionEvent(kind=kind, payload=payload) + + async def cancel_request(self, task_id: str) -> None: + client = await self._ensure_client() + try: + await client.delete(f"/tasks/{task_id}") + except Exception: + pass diff --git a/bridge/clawith_bridge/adapters/openclaw.py b/bridge/clawith_bridge/adapters/openclaw.py new file mode 100644 index 000000000..a88ba124a --- /dev/null +++ b/bridge/clawith_bridge/adapters/openclaw.py @@ -0,0 +1,104 @@ +"""OpenClaw adapter — wraps a local OpenClaw daemon. + +OpenClaw in its "local" mode is the flavor this bridge is meant to eventually +replace. During the transition, two shapes are supported via config: + + 1. `mode = "http"` (default): OpenClaw exposes + POST /v1/chat { "messages": [...] } -> { "job_id": "..." } + GET /v1/jobs/{id}/events (SSE) + Configure with `base_url` + optional `auth_header`. + + 2. `mode = "subprocess"`: Spawn `openclaw run --prompt=...` (for dev setups + that don't run a long-lived daemon). + +V1 implements the HTTP path; the subprocess path is left as a small shim you +can wire up by overriding `build_command` (SubprocessAdapter). +""" +from __future__ import annotations + +import json +from typing import Any, AsyncIterator + +from .base import DaemonAdapter, SessionEvent + + +class OpenClawAdapter(DaemonAdapter): + name = "openclaw" + capabilities = {"interactive_input": False, "cancellation": True} + + async def start_session_request( + self, + prompt: str, + params: dict[str, Any], + cwd: str | None, + ) -> str: + client = await self._ensure_client() + body = {"messages": [{"role": "user", "content": prompt}], "params": params or {}} + r = await client.post("/v1/chat", json=body) + r.raise_for_status() + data = r.json() + job_id = data.get("job_id") or data.get("id") + if not job_id: + # OpenClaw may return the response inline (no job_id) for fast paths + content = ( + data.get("content") + or data.get("message", {}).get("content") + or "" + ) + if content: + # Stash so iter_events can flush it immediately without hitting the network. + return f"inline:{json.dumps({'content': content})}" + raise RuntimeError(f"OpenClaw start response missing job_id: {data}") + return str(job_id) + + async def iter_events(self, task_id: str) -> AsyncIterator[SessionEvent]: + if task_id.startswith("inline:"): + # Synchronous response path — emit once and finish. + try: + payload = json.loads(task_id[len("inline:"):]) + except json.JSONDecodeError: + payload = {} + content = payload.get("content") or "" + if content: + yield SessionEvent(kind="assistant_text", payload={"text": content}) + return + + client = await self._ensure_client() + async with client.stream("GET", f"/v1/jobs/{task_id}/events") as resp: + resp.raise_for_status() + async for raw in resp.aiter_lines(): + if not raw: + continue + line = raw.strip() + if line.startswith("data:"): + line = line[5:].strip() + if not line or line == "[DONE]": + if line == "[DONE]": + return + continue + try: + evt = json.loads(line) + except json.JSONDecodeError: + yield SessionEvent(kind="stdout_chunk", payload={"text": line}) + continue + # Map OpenClaw-native fields to our taxonomy. + if "delta" in evt: + yield SessionEvent(kind="assistant_text", payload={"text": str(evt["delta"])}) + continue + if "content" in evt and "role" in evt: + yield SessionEvent(kind="assistant_text", payload={"text": str(evt["content"])}) + continue + kind = evt.get("kind") + if isinstance(kind, str): + yield SessionEvent(kind=kind, payload=evt.get("payload") or {}) + if kind in ("done", "finished"): + return + + async def cancel_request(self, task_id: str) -> None: + if task_id.startswith("inline:"): + return + client = await self._ensure_client() + try: + await client.post(f"/v1/jobs/{task_id}/cancel") + except Exception: + pass diff --git a/bridge/clawith_bridge/baked_config.py b/bridge/clawith_bridge/baked_config.py new file mode 100644 index 000000000..9f5aee795 --- /dev/null +++ b/bridge/clawith_bridge/baked_config.py @@ -0,0 +1,103 @@ +"""Shared trailer format for per-agent config baked into the exe. + +Server-side `render_installer(platform="windows")` appends a trailer to the +pristine PyInstaller exe so the user downloads a single self-configuring +setup.exe. On double-click, the bridge reads the trailer (this module) and +runs the install flow. The install flow then strips the trailer from the +copy it drops into %LOCALAPPDATA%\\Clawith\\bin\\ so subsequent service-mode +launches don't re-trigger install. + +Trailer layout (read from end of file, backwards): + + [...pristine exe bytes...] + [json utf-8 blob ] <- variable length + [4 bytes pristine_len BE ] <- uint32, offset where the trailer starts + [8 bytes magic "CLWB!END"] <- literal bytes at EOF + +PE loaders only read what section headers point at, so an overlay at EOF is +ignored by the Windows loader. Appending doesn't break PyInstaller bootstrap. + +This module stays dependency-free (only stdlib) so it's cheap to import from +both the server (which generates trailers) and the bridge (which reads them). +""" +from __future__ import annotations + +import json +import sys +from pathlib import Path +from typing import Any + +MAGIC: bytes = b"CLWB!END" +TRAILER_LEN_BYTES = 4 # uint32 big-endian for pristine_len +TRAILER_FIXED_SUFFIX = TRAILER_LEN_BYTES + len(MAGIC) # = 12 + + +def build_trailer(config: dict[str, Any], pristine_len: int) -> bytes: + """Encode `config` as JSON and produce the trailer bytes to append. + + Caller is responsible for supplying `pristine_len` = length of the original + exe in bytes (i.e. the offset where the trailer starts after concatenation). + """ + blob = json.dumps(config, ensure_ascii=False, separators=(",", ":")).encode("utf-8") + return blob + pristine_len.to_bytes(TRAILER_LEN_BYTES, "big") + MAGIC + + +def read_baked_config(exe_path: Path | None = None) -> dict[str, Any] | None: + """Return baked config dict if the exe has a trailer, else None. + + Defaults to reading `sys.executable`. Safe to call on any file; returns + None for files with no trailer, unreadable files, or malformed trailers. + """ + path = exe_path or Path(sys.executable) + try: + data = path.read_bytes() + except OSError: + return None + + if len(data) < TRAILER_FIXED_SUFFIX: + return None + if data[-len(MAGIC):] != MAGIC: + return None + + len_start = -TRAILER_FIXED_SUFFIX + len_end = -len(MAGIC) + pristine_len = int.from_bytes(data[len_start:len_end], "big") + if pristine_len <= 0 or pristine_len > len(data) - TRAILER_FIXED_SUFFIX: + return None + + blob = data[pristine_len:len_start] + try: + obj = json.loads(blob.decode("utf-8")) + except (UnicodeDecodeError, json.JSONDecodeError): + return None + if not isinstance(obj, dict): + return None + return obj + + +def strip_trailer(exe_path: Path) -> bool: + """If the file at exe_path has a trailer, truncate it to pristine bytes. + + Returns True if a trailer was found and stripped, False otherwise. + Never raises on "no trailer" — only on filesystem errors from truncate(). + """ + try: + with exe_path.open("rb") as f: + f.seek(0, 2) + size = f.tell() + if size < TRAILER_FIXED_SUFFIX: + return False + f.seek(-TRAILER_FIXED_SUFFIX, 2) + suffix = f.read(TRAILER_FIXED_SUFFIX) + except OSError: + return False + + if suffix[-len(MAGIC):] != MAGIC: + return False + pristine_len = int.from_bytes(suffix[:TRAILER_LEN_BYTES], "big") + if pristine_len <= 0 or pristine_len >= size: + return False + + with exe_path.open("r+b") as f: + f.truncate(pristine_len) + return True diff --git a/bridge/clawith_bridge/config.py b/bridge/clawith_bridge/config.py new file mode 100644 index 000000000..b047928e8 --- /dev/null +++ b/bridge/clawith_bridge/config.py @@ -0,0 +1,132 @@ +"""Bridge configuration: CLI args + env + optional TOML file. + +Resolution order (last wins): + 1. Defaults + 2. ~/.clawith-bridge.toml (if present) + 3. Environment variables (CLAWITH_BRIDGE_*) + 4. CLI flags + +The token (agent API key) and server URL are the only required bits. +""" +from __future__ import annotations + +import os +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib # type: ignore[import-not-found] + + +DEFAULT_CONFIG_PATH = Path.home() / ".clawith-bridge.toml" + + +@dataclass +class AdapterConfig: + """Per-adapter config knobs. Unknown keys are passed through as `extra`.""" + enabled: bool = True + # Subprocess adapters + executable: str | None = None + default_cwd: str | None = None + # Daemon adapters + base_url: str | None = None + auth_header: str | None = None + extra: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class BridgeConfig: + server: str = "ws://127.0.0.1:8000" + token: str = "" + bridge_version: str = "clawith-bridge/0.1.0" + max_concurrent_sessions: int = 4 + ping_interval: int = 25 + reconnect_min: float = 1.0 + reconnect_max: float = 60.0 + + claude_code: AdapterConfig = field(default_factory=AdapterConfig) + hermes: AdapterConfig = field(default_factory=lambda: AdapterConfig(enabled=False)) + openclaw: AdapterConfig = field(default_factory=lambda: AdapterConfig(enabled=False)) + + def enabled_adapters(self) -> list[str]: + out: list[str] = [] + if self.claude_code.enabled: + out.append("claude_code") + if self.hermes.enabled: + out.append("hermes") + if self.openclaw.enabled: + out.append("openclaw") + return out + + +def _load_toml(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + with path.open("rb") as f: + return tomllib.load(f) + + +def _apply_env(cfg: BridgeConfig) -> None: + env = os.environ + if v := env.get("CLAWITH_BRIDGE_SERVER"): + cfg.server = v + if v := env.get("CLAWITH_BRIDGE_TOKEN"): + cfg.token = v + if v := env.get("CLAWITH_BRIDGE_MAX_SESSIONS"): + try: + cfg.max_concurrent_sessions = int(v) + except ValueError: + pass + # Adapter enable flags + for name in ("claude_code", "hermes", "openclaw"): + key = f"CLAWITH_BRIDGE_ADAPTER_{name.upper()}" + v = env.get(key) + if v is not None: + getattr(cfg, name).enabled = v.strip().lower() not in ("0", "false", "no", "off") + + +def _apply_toml(cfg: BridgeConfig, data: dict[str, Any]) -> None: + if not data: + return + for k in ("server", "token", "bridge_version"): + if k in data: + setattr(cfg, k, data[k]) + for k in ("max_concurrent_sessions", "ping_interval"): + if k in data: + setattr(cfg, k, int(data[k])) + for k in ("reconnect_min", "reconnect_max"): + if k in data: + setattr(cfg, k, float(data[k])) + for name in ("claude_code", "hermes", "openclaw"): + section = data.get(name) + if isinstance(section, dict): + ac: AdapterConfig = getattr(cfg, name) + for key in ("enabled",): + if key in section: + setattr(ac, key, bool(section[key])) + for key in ("executable", "default_cwd", "base_url", "auth_header"): + if key in section: + setattr(ac, key, section[key]) + for k, v in section.items(): + if k not in {"enabled", "executable", "default_cwd", "base_url", "auth_header"}: + ac.extra[k] = v + + +def load_config( + config_path: Path | None = None, + cli_server: str | None = None, + cli_token: str | None = None, +) -> BridgeConfig: + cfg = BridgeConfig() + path = config_path or DEFAULT_CONFIG_PATH + _apply_toml(cfg, _load_toml(path)) + _apply_env(cfg) + if cli_server: + cfg.server = cli_server + if cli_token: + cfg.token = cli_token + return cfg diff --git a/bridge/clawith_bridge/connection.py b/bridge/clawith_bridge/connection.py new file mode 100644 index 000000000..a21130b6d --- /dev/null +++ b/bridge/clawith_bridge/connection.py @@ -0,0 +1,183 @@ +"""WebSocket connection + reconnect loop. + +One long-lived task per process. On disconnect, all in-flight sessions are +cancelled (server will fail them to the LLM loop with BridgeDisconnected). +Reconnects with exponential backoff, then re-advertises via bridge.register. +""" +from __future__ import annotations + +import asyncio +import json +import random +from typing import Any + +from loguru import logger +from pydantic import BaseModel + +try: + import websockets + from websockets.exceptions import ConnectionClosed +except ImportError as _e: # pragma: no cover + raise SystemExit("pip install websockets>=12") from _e + +from . import PROTOCOL_VERSION, __version__ +from .config import BridgeConfig +from .protocol import ( + BridgeRegisterFrame, + HelloFrame, + PongFrame, + SessionCancelFrame, + SessionInputFrame, + SessionStartFrame, + ToolResponseFrame, + parse_inbound, +) +from .reverse_tools import ReverseCallClient +from .session_manager import SessionManager + + +class BridgeClient: + def __init__(self, config: BridgeConfig) -> None: + self.config = config + self._ws = None + self._stopped = asyncio.Event() + self._reverse = ReverseCallClient() + self._session_mgr: SessionManager | None = None + + async def run_forever(self) -> None: + backoff = self.config.reconnect_min + while not self._stopped.is_set(): + try: + await self._connect_and_run() + backoff = self.config.reconnect_min # reset on clean close + except asyncio.CancelledError: + raise + except ConnectionClosed as e: + logger.warning(f"connection closed: code={e.code} reason={e.reason!r}") + except OSError as e: + logger.warning(f"network error: {e}") + except Exception as e: + logger.exception(f"bridge loop error: {e}") + + if self._stopped.is_set(): + break + + # Exponential backoff with jitter + sleep_for = min(backoff, self.config.reconnect_max) + jitter = sleep_for * 0.2 * random.random() + logger.info(f"reconnecting in {sleep_for + jitter:.1f}s …") + try: + await asyncio.wait_for(self._stopped.wait(), timeout=sleep_for + jitter) + break # stop requested + except asyncio.TimeoutError: + pass + backoff = min(backoff * 2, self.config.reconnect_max) + + def stop(self) -> None: + self._stopped.set() + + # ── Internals ───────────────────────────────────────────────────── + + async def _connect_and_run(self) -> None: + url = f"{self.config.server.rstrip('/')}/ws/bridge?token={self.config.token}" + logger.info(f"dialing: {url.replace(self.config.token, '***')}") + async with websockets.connect( + url, + max_size=2 * 1024 * 1024, + ping_interval=self.config.ping_interval, + ping_timeout=self.config.ping_interval * 2, + ) as ws: + self._ws = ws + self._session_mgr = SessionManager(self.config, self._send_model) + + # 1. Expect server hello + hello_raw = await asyncio.wait_for(ws.recv(), timeout=15) + hello = json.loads(hello_raw) + if hello.get("type") != "hello": + logger.error(f"unexpected first frame from server: {hello}") + return + logger.info(f"server hello: v={hello.get('v')} server_time={hello.get('server_time')}") + + # 2. Send bridge.register + adapters = self.config.enabled_adapters() + register = BridgeRegisterFrame( + bridge_version=f"clawith-bridge/{__version__}", + adapters=adapters, + capabilities={ + "interactive_input": False, + "cancellation": True, + "reverse_tools": True, + "protocol_version": PROTOCOL_VERSION, + }, + ) + await self._send_model(register) + logger.info(f"registered: adapters={adapters}") + + # 3. Enter read loop + await self._read_loop(ws) + + async def _read_loop(self, ws: Any) -> None: + try: + async for raw in ws: + try: + data = json.loads(raw) + except json.JSONDecodeError: + logger.warning("received non-JSON frame, ignoring") + continue + await self._handle_frame(data) + finally: + if self._session_mgr: + await self._session_mgr.fail_all("bridge disconnected") + self._reverse.fail_all(ConnectionError("bridge disconnected")) + self._ws = None + self._session_mgr = None + + async def _handle_frame(self, data: dict[str, Any]) -> None: + assert self._session_mgr is not None + frame = parse_inbound(data) + if frame is None: + t = data.get("type") + if t == "ping": + await self._send_raw({"type": "pong"}) + else: + logger.debug(f"unknown frame from server: type={t!r}") + return + + if isinstance(frame, HelloFrame): + logger.info(f"second hello received (server resynced?): {frame.server_time}") + return + + if isinstance(frame, SessionStartFrame): + await self._session_mgr.start( + session_id=frame.session_id, + adapter=frame.adapter, + prompt=frame.prompt, + params=frame.params, + cwd=frame.cwd, + env=frame.env, + timeout_s=frame.timeout_s, + ) + return + + if isinstance(frame, SessionInputFrame): + await self._session_mgr.send_input(frame.session_id, frame.text) + return + + if isinstance(frame, SessionCancelFrame): + await self._session_mgr.cancel(frame.session_id, frame.reason) + return + + if isinstance(frame, ToolResponseFrame): + self._reverse.resolve(frame.reverse_call_id, frame.result, frame.error) + return + + logger.debug(f"unhandled inbound frame: {type(frame).__name__}") + + async def _send_model(self, model: BaseModel) -> None: + await self._send_raw(model.model_dump(mode="json")) + + async def _send_raw(self, payload: dict[str, Any]) -> None: + ws = self._ws + if ws is None: + raise ConnectionError("not connected") + await ws.send(json.dumps(payload)) diff --git a/bridge/clawith_bridge/diff_capture.py b/bridge/clawith_bridge/diff_capture.py new file mode 100644 index 000000000..129af8572 --- /dev/null +++ b/bridge/clawith_bridge/diff_capture.py @@ -0,0 +1,181 @@ +"""Snapshot-before / diff-after capture for `session.done` diff_summary. + +Two backends: + - `git` (preferred): record HEAD + working-tree status before, `git diff --stat` + against that snapshot at session end. + - `mtime fallback`: scan the cwd tree, remember (path, size, mtime) for each + file; at session end, diff against a re-scan. No content diff — just + files_changed / created / deleted counts plus per-file size deltas. + +Both return a dict matching the `DiffSummary` schema shape. +""" +from __future__ import annotations + +import asyncio +import os +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + + +@dataclass +class _GitSnapshot: + cwd: str + head: str + stash_ref: str | None = None # reserved; unused V1 + + +@dataclass +class _MtimeSnapshot: + cwd: str + files: dict[str, tuple[int, float]] = field(default_factory=dict) # path -> (size, mtime) + + +async def _run(cmd: list[str], cwd: str) -> tuple[int, str, str]: + try: + proc = await asyncio.create_subprocess_exec( + *cmd, + cwd=cwd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + out, err = await proc.communicate() + return ( + proc.returncode or 0, + out.decode("utf-8", errors="replace"), + err.decode("utf-8", errors="replace"), + ) + except FileNotFoundError: + return 127, "", "command not found" + + +async def _is_git_repo(cwd: str) -> bool: + rc, out, _ = await _run(["git", "rev-parse", "--is-inside-work-tree"], cwd=cwd) + return rc == 0 and out.strip() == "true" + + +async def _git_head(cwd: str) -> str: + rc, out, _ = await _run(["git", "rev-parse", "HEAD"], cwd=cwd) + if rc == 0: + return out.strip() + return "" + + +def _mtime_scan(cwd: Path, max_files: int = 20000) -> dict[str, tuple[int, float]]: + out: dict[str, tuple[int, float]] = {} + count = 0 + skip_dirs = {".git", "node_modules", "__pycache__", ".venv", "venv", ".tox", "dist", "build"} + for root, dirs, files in os.walk(cwd): + dirs[:] = [d for d in dirs if d not in skip_dirs and not d.startswith(".")] + for f in files: + full = Path(root) / f + try: + st = full.stat() + except OSError: + continue + rel = str(full.relative_to(cwd)) + out[rel] = (st.st_size, st.st_mtime) + count += 1 + if count >= max_files: + return out + return out + + +async def snapshot(cwd: str | None) -> _GitSnapshot | _MtimeSnapshot | None: + if not cwd: + return None + if not os.path.isdir(cwd): + return None + if await _is_git_repo(cwd): + head = await _git_head(cwd) + return _GitSnapshot(cwd=cwd, head=head) + files = await asyncio.to_thread(_mtime_scan, Path(cwd)) + return _MtimeSnapshot(cwd=cwd, files=files) + + +_NUMSTAT_LINE = re.compile(r"^(\d+|-)\s+(\d+|-)\s+(.+)$") + + +async def _git_diff_summary(snap: _GitSnapshot) -> dict[str, Any]: + # Include both committed changes (HEAD..HEAD) — N/A here, snap.head == current HEAD + # and uncommitted changes (working tree + index vs. HEAD). + rc, out, err = await _run(["git", "diff", "--numstat", snap.head], cwd=snap.cwd) + if rc != 0: + return {"files_changed": 0, "insertions": 0, "deletions": 0, "files": [], "warning": err.strip()} + files: list[dict[str, Any]] = [] + total_ins = 0 + total_del = 0 + for line in out.splitlines(): + m = _NUMSTAT_LINE.match(line.strip()) + if not m: + continue + ins_raw, del_raw, path = m.groups() + ins = int(ins_raw) if ins_raw.isdigit() else 0 + dels = int(del_raw) if del_raw.isdigit() else 0 + total_ins += ins + total_del += dels + files.append({"path": path, "+": ins, "-": dels}) + # Untracked files — show as created + rc2, out2, _ = await _run(["git", "ls-files", "--others", "--exclude-standard"], cwd=snap.cwd) + if rc2 == 0: + for path in out2.splitlines(): + path = path.strip() + if not path: + continue + files.append({"path": path, "+": 0, "-": 0, "status": "untracked"}) + return { + "files_changed": len(files), + "insertions": total_ins, + "deletions": total_del, + "files": files[:200], + } + + +async def _mtime_diff_summary(snap: _MtimeSnapshot) -> dict[str, Any]: + after = await asyncio.to_thread(_mtime_scan, Path(snap.cwd)) + before = snap.files + files: list[dict[str, Any]] = [] + total_delta = 0 + for path, (size, mtime) in after.items(): + if path not in before: + files.append({"path": path, "+": size, "-": 0, "status": "created"}) + total_delta += size + else: + old_size, old_mtime = before[path] + if mtime != old_mtime or size != old_size: + delta = size - old_size + files.append({ + "path": path, + "+": max(0, delta), + "-": max(0, -delta), + "status": "modified", + }) + total_delta += abs(delta) + for path in before.keys() - after.keys(): + old_size, _ = before[path] + files.append({"path": path, "+": 0, "-": old_size, "status": "deleted"}) + total_delta += old_size + return { + "files_changed": len(files), + "insertions": 0, # mtime scan doesn't know line counts + "deletions": 0, + "files": files[:200], + "note": "mtime-based summary; install git for line-level counts", + } + + +async def diff_summary(snap: _GitSnapshot | _MtimeSnapshot | None) -> dict[str, Any] | None: + if snap is None: + return None + if isinstance(snap, _GitSnapshot): + try: + return await _git_diff_summary(snap) + except Exception as e: + return {"files_changed": 0, "insertions": 0, "deletions": 0, "files": [], "warning": f"git diff failed: {e}"} + if isinstance(snap, _MtimeSnapshot): + try: + return await _mtime_diff_summary(snap) + except Exception as e: + return {"files_changed": 0, "insertions": 0, "deletions": 0, "files": [], "warning": f"mtime scan failed: {e}"} + return None diff --git a/bridge/clawith_bridge/install_windows.py b/bridge/clawith_bridge/install_windows.py new file mode 100644 index 000000000..a4349dd60 --- /dev/null +++ b/bridge/clawith_bridge/install_windows.py @@ -0,0 +1,174 @@ +"""One-shot Windows installer. + +Copies the running frozen .exe to %LOCALAPPDATA%\\Clawith\\bin\\, writes +~/.clawith-bridge.toml (UTF-8 no BOM), and registers a user-scope scheduled +task that auto-starts the bridge at logon. + +Only works when invoked from the PyInstaller-frozen binary (sys.frozen == True). +From a source checkout, install manually via `pip install -e .` + scheduled task. +""" +from __future__ import annotations + +import base64 +import os +import shutil +import subprocess +import sys +from pathlib import Path +from textwrap import dedent + +from .baked_config import strip_trailer + +TASK_NAME = "ClawithBridge" + + +def _install_dir() -> Path: + base = os.environ.get("LOCALAPPDATA") or str(Path.home() / "AppData" / "Local") + return Path(base) / "Clawith" / "bin" + + +def _config_path() -> Path: + return Path.home() / ".clawith-bridge.toml" + + +_KNOWN_ADAPTERS = ("claude_code", "openclaw", "hermes") + + +def _render_config(server: str, token: str, adapter: str = "claude_code") -> str: + if adapter not in _KNOWN_ADAPTERS: + adapter = "claude_code" + cc = "true" if adapter == "claude_code" else "false" + hm = "true" if adapter == "hermes" else "false" + oc = "true" if adapter == "openclaw" else "false" + return dedent( + f'''\ + # Auto-generated by clawith-bridge install. Do not share this file - it contains your API key. + server = "{server}" + token = "{token}" + + max_concurrent_sessions = 4 + + [claude_code] + enabled = {cc} + executable = "claude" + + [hermes] + enabled = {hm} + base_url = "http://127.0.0.1:7890" + + [openclaw] + enabled = {oc} + base_url = "http://127.0.0.1:9000" + ''' + ) + + +def _run_ps(script: str, check: bool = True) -> subprocess.CompletedProcess[bytes]: + encoded = base64.b64encode(script.encode("utf-16-le")).decode() + return subprocess.run( + ["powershell.exe", "-NoProfile", "-NonInteractive", "-EncodedCommand", encoded], + check=check, + capture_output=True, + ) + + +def _stop_task() -> None: + _run_ps( + f'Stop-ScheduledTask -TaskName "{TASK_NAME}" -ErrorAction SilentlyContinue', + check=False, + ) + + +def _register_task(exe_path: Path, home_dir: Path) -> None: + script = dedent( + f''' + $ErrorActionPreference = "Stop" + $Action = New-ScheduledTaskAction -Execute "{exe_path}" -WorkingDirectory "{home_dir}" + $Trigger = New-ScheduledTaskTrigger -AtLogOn -User $env:USERNAME + $Settings = New-ScheduledTaskSettingsSet -AllowStartIfOnBatteries -DontStopIfGoingOnBatteries -StartWhenAvailable -Hidden + $Principal = New-ScheduledTaskPrincipal -UserId $env:USERNAME -LogonType Interactive -RunLevel Limited + Register-ScheduledTask -TaskName "{TASK_NAME}" -Action $Action -Trigger $Trigger -Settings $Settings -Principal $Principal -Force | Out-Null + Start-ScheduledTask -TaskName "{TASK_NAME}" + ''' + ) + result = _run_ps(script, check=False) + if result.returncode != 0: + stderr = result.stderr.decode("utf-8", errors="replace").strip() + stdout = result.stdout.decode("utf-8", errors="replace").strip() + raise RuntimeError( + f"Register-ScheduledTask failed (exit={result.returncode}).\n" + f"stdout: {stdout}\nstderr: {stderr}" + ) + + +def install(*, server: str, token: str, name: str = "", adapter: str = "claude_code") -> int: + if sys.platform != "win32": + print("ERROR: `install` only works on Windows.", file=sys.stderr) + print("On macOS/Linux, run the bridge directly or use launchd/systemd.", file=sys.stderr) + return 2 + + if not getattr(sys, "frozen", False): + print("ERROR: `install` must be run from the packaged .exe, not a source checkout.", file=sys.stderr) + print("For dev, use `pip install -e .` and register a scheduled task manually.", file=sys.stderr) + return 2 + + install_dir = _install_dir() + exe_dest = install_dir / "clawith-bridge.exe" + cfg_path = _config_path() + home_dir = Path.home() + src_exe = Path(sys.executable).resolve() + + label = f" for agent '{name}'" if name else "" + print(f"[clawith-bridge] Installing bridge{label}...") + + # Copy the running .exe to a stable location. If we're re-running from + # within the installed copy, skip. Otherwise stop the task so we can + # overwrite a file the scheduler might be holding open. + install_dir.mkdir(parents=True, exist_ok=True) + already_at_dest = exe_dest.exists() and src_exe == exe_dest.resolve() + if already_at_dest: + print(f" - already running from {exe_dest}, skipping copy") + else: + if exe_dest.exists(): + _stop_task() + print(f" - copying exe to {exe_dest}") + try: + shutil.copy2(src_exe, exe_dest) + except PermissionError as e: + print(f"\nERROR: cannot overwrite {exe_dest}: {e}", file=sys.stderr) + print("The bridge may still be running. Stop it and retry:", file=sys.stderr) + print(f' powershell -Command "Stop-ScheduledTask -TaskName {TASK_NAME}"', file=sys.stderr) + return 3 + + # Strip the baked-config trailer from the installed copy so the scheduled + # task running it never re-triggers this install flow. The original + # download keeps its trailer and can be re-run if the user wants to + # reinstall. + if strip_trailer(exe_dest): + print(" - stripped setup trailer from installed copy") + + print(f" - writing config to {cfg_path} (adapter={adapter})") + cfg_path.write_text(_render_config(server, token, adapter), encoding="utf-8") + + print(f" - registering scheduled task '{TASK_NAME}' (starts at user logon)") + try: + _register_task(exe_dest, home_dir) + except RuntimeError as e: + print(f"\nERROR: {e}", file=sys.stderr) + return 4 + + print() + print("[clawith-bridge] Done. The bridge is now running in the background.") + print(f" Config: {cfg_path}") + print(f" Exe: {exe_dest}") + print(f" Task: {TASK_NAME}") + print() + print(" Check status:") + print(f' powershell -Command "Get-ScheduledTask -TaskName {TASK_NAME}"') + print(" Uninstall:") + print(f' powershell -Command "Unregister-ScheduledTask -TaskName {TASK_NAME} -Confirm:$false"') + print() + print(" If Clawith still shows 'Bridge not connected', make sure the 'claude' CLI is logged in:") + print(" npm install -g @anthropic-ai/claude-code") + print(" claude login") + return 0 diff --git a/bridge/clawith_bridge/protocol.py b/bridge/clawith_bridge/protocol.py new file mode 100644 index 000000000..11e8004b0 --- /dev/null +++ b/bridge/clawith_bridge/protocol.py @@ -0,0 +1,151 @@ +"""Frame schemas — mirror of backend/app/services/local_agent/protocol.py. + +Vendored deliberately so the bridge package doesn't import anything from the +backend. Keep in sync with the server. +""" +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, Field + +PROTOCOL_VERSION = "1" + +EventKind = Literal[ + "stdout_chunk", + "stderr_chunk", + "assistant_text", + "thinking", + "tool_call_start", + "tool_call_result", + "status", + "file_change", +] + + +# ── Server → Bridge ─────────────────────────────────────────────────── + +class HelloFrame(BaseModel): + type: Literal["hello"] = "hello" + v: str = PROTOCOL_VERSION + server_time: str + + +class SessionStartFrame(BaseModel): + type: Literal["session.start"] = "session.start" + session_id: str + adapter: str + prompt: str + params: dict[str, Any] = Field(default_factory=dict) + cwd: str | None = None + env: dict[str, str] = Field(default_factory=dict) + timeout_s: int = 1800 + + +class SessionInputFrame(BaseModel): + type: Literal["session.input"] = "session.input" + session_id: str + text: str + + +class SessionCancelFrame(BaseModel): + type: Literal["session.cancel"] = "session.cancel" + session_id: str + reason: str = "" + + +class ToolResponseFrame(BaseModel): + type: Literal["tool.response"] = "tool.response" + session_id: str + reverse_call_id: str + result: str | None = None + error: str | None = None + + +class PingFrame(BaseModel): + type: Literal["ping"] = "ping" + + +# ── Bridge → Server ─────────────────────────────────────────────────── + +class BridgeRegisterFrame(BaseModel): + type: Literal["bridge.register"] = "bridge.register" + v: str = PROTOCOL_VERSION + bridge_version: str + adapters: list[str] + capabilities: dict[str, Any] = Field(default_factory=dict) + + +class SessionAcceptedFrame(BaseModel): + type: Literal["session.accepted"] = "session.accepted" + session_id: str + adapter: str + local_session_id: str | None = None + + +class SessionEventFrame(BaseModel): + type: Literal["session.event"] = "session.event" + session_id: str + kind: EventKind + payload: dict[str, Any] = Field(default_factory=dict) + + +class DiffSummary(BaseModel): + files_changed: int = 0 + insertions: int = 0 + deletions: int = 0 + files: list[dict[str, Any]] = Field(default_factory=list) + + +class SessionDoneFrame(BaseModel): + type: Literal["session.done"] = "session.done" + session_id: str + final_text: str = "" + exit_code: int | None = None + stats: dict[str, Any] = Field(default_factory=dict) + diff_summary: DiffSummary | None = None + + +class SessionErrorFrame(BaseModel): + type: Literal["session.error"] = "session.error" + session_id: str + error: str + retryable: bool = False + + +class ToolCallFrame(BaseModel): + type: Literal["tool.call"] = "tool.call" + session_id: str + reverse_call_id: str + name: str + arguments: dict[str, Any] = Field(default_factory=dict) + + +class PongFrame(BaseModel): + type: Literal["pong"] = "pong" + + +# ── Parse helpers ───────────────────────────────────────────────────── + +_INBOUND_BY_TYPE: dict[str, type[BaseModel]] = { + "hello": HelloFrame, + "session.start": SessionStartFrame, + "session.input": SessionInputFrame, + "session.cancel": SessionCancelFrame, + "tool.response": ToolResponseFrame, + "ping": PingFrame, +} + + +def parse_inbound(data: dict[str, Any]) -> BaseModel | None: + """Parse a frame received from the server. Returns None for unknown types.""" + t = data.get("type") + if not isinstance(t, str): + return None + cls = _INBOUND_BY_TYPE.get(t) + if cls is None: + return None + try: + return cls.model_validate(data) + except Exception: + return None diff --git a/bridge/clawith_bridge/py.typed b/bridge/clawith_bridge/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/bridge/clawith_bridge/reverse_tools.py b/bridge/clawith_bridge/reverse_tools.py new file mode 100644 index 000000000..e75d18563 --- /dev/null +++ b/bridge/clawith_bridge/reverse_tools.py @@ -0,0 +1,59 @@ +"""Reverse tool-call helpers. + +Local agents (Claude Code, etc.) can initiate calls to Clawith's own tools +(send a Feishu message, update a task, etc.) by emitting a `tool.call` frame. +This module provides a thin client so in-process code in the bridge can also +initiate a reverse call and await its result. + +In V1 we only *forward* reverse calls — local agents emit frames the bridge +relays verbatim. In-bridge helpers can use this when needed (e.g. diff_capture +could theoretically notify a channel on big diffs). +""" +from __future__ import annotations + +import asyncio +import uuid +from typing import Any + +from .protocol import ToolCallFrame + + +class ReverseCallClient: + """Dispatches ToolCallFrame and awaits matching ToolResponseFrame. + + Connection owner drives this: when a `tool.response` frame arrives, the + connection layer calls `resolve(reverse_call_id, result, error)`. + """ + + def __init__(self) -> None: + self._pending: dict[str, asyncio.Future] = {} + + def next_id(self) -> str: + return uuid.uuid4().hex + + def build_frame(self, session_id: str, name: str, arguments: dict[str, Any]) -> tuple[ToolCallFrame, asyncio.Future]: + call_id = self.next_id() + fut: asyncio.Future = asyncio.get_event_loop().create_future() + self._pending[call_id] = fut + frame = ToolCallFrame( + session_id=session_id, + reverse_call_id=call_id, + name=name, + arguments=arguments, + ) + return frame, fut + + def resolve(self, call_id: str, result: str | None, error: str | None) -> None: + fut = self._pending.pop(call_id, None) + if fut is None or fut.done(): + return + if error: + fut.set_exception(RuntimeError(error)) + else: + fut.set_result(result or "") + + def fail_all(self, exc: BaseException) -> None: + for call_id, fut in list(self._pending.items()): + if not fut.done(): + fut.set_exception(exc) + self._pending.clear() diff --git a/bridge/clawith_bridge/session_manager.py b/bridge/clawith_bridge/session_manager.py new file mode 100644 index 000000000..e1fd70101 --- /dev/null +++ b/bridge/clawith_bridge/session_manager.py @@ -0,0 +1,245 @@ +"""Local session manager — owns running adapter sessions inside the bridge. + +One instance per live WS connection. Methods `start`, `send_input`, `cancel` +are called from the connection's read loop as server frames arrive; events +yielded by adapters are forwarded to the server via the `send_frame` callback +passed in. + +Each adapter class is instantiated per-session (not per-bridge) so adapters +can freely keep per-session state on self. Cheap because these are lightweight. +""" +from __future__ import annotations + +import asyncio +from dataclasses import dataclass, field +from typing import Any, Awaitable, Callable + +from loguru import logger + +from .adapters import BaseAdapter +from .adapters.claude_code import ClaudeCodeAdapter +from .adapters.hermes import HermesAdapter +from .adapters.openclaw import OpenClawAdapter +from .config import BridgeConfig +from .diff_capture import diff_summary, snapshot +from .protocol import ( + SessionAcceptedFrame, + SessionDoneFrame, + SessionErrorFrame, + SessionEventFrame, + DiffSummary, +) + + +SendFrame = Callable[[Any], Awaitable[None]] + + +_ADAPTER_REGISTRY: dict[str, tuple[type[BaseAdapter], str]] = { + "claude_code": (ClaudeCodeAdapter, "claude_code"), + "hermes": (HermesAdapter, "hermes"), + "openclaw": (OpenClawAdapter, "openclaw"), +} + + +@dataclass +class _Running: + session_id: str + adapter_name: str + adapter: BaseAdapter + task: asyncio.Task + cwd: str | None = None + snapshot_obj: Any = None + final_chunks: list[str] = field(default_factory=list) + exit_code: int | None = None + + +class SessionManager: + def __init__(self, config: BridgeConfig, send_frame: SendFrame) -> None: + self.config = config + self._send = send_frame + self._sessions: dict[str, _Running] = {} + self._lock = asyncio.Lock() + + # ── Lifecycle ───────────────────────────────────────────────────── + + async def start( + self, + session_id: str, + adapter: str, + prompt: str, + params: dict[str, Any], + cwd: str | None, + env: dict[str, str], + timeout_s: int, + ) -> None: + async with self._lock: + if session_id in self._sessions: + logger.warning(f"duplicate session.start ignored: {session_id}") + return + if len(self._sessions) >= self.config.max_concurrent_sessions: + await self._send_error(session_id, "bridge at concurrency cap", retryable=True) + return + reg = _ADAPTER_REGISTRY.get(adapter) + if reg is None: + await self._send_error(session_id, f"unknown adapter: {adapter}") + return + cls, cfg_attr = reg + if not getattr(self.config, cfg_attr).enabled: + await self._send_error(session_id, f"adapter disabled by local config: {adapter}") + return + adapter_instance = cls(getattr(self.config, cfg_attr)) + + task = asyncio.create_task( + self._run(session_id, adapter, adapter_instance, prompt, params, cwd, env, timeout_s) + ) + self._sessions[session_id] = _Running( + session_id=session_id, + adapter_name=adapter, + adapter=adapter_instance, + task=task, + cwd=cwd, + ) + + async def send_input(self, session_id: str, text: str) -> None: + running = self._sessions.get(session_id) + if running is None: + return + try: + await running.adapter.send_input(session_id, text) + except NotImplementedError: + await self._send_error(session_id, "adapter does not support interactive input") + except Exception as e: + logger.warning(f"send_input failed: {e}") + + async def cancel(self, session_id: str, reason: str) -> None: + running = self._sessions.get(session_id) + if running is None: + return + try: + await running.adapter.cancel(session_id, reason) + except Exception as e: + logger.warning(f"adapter.cancel failed: {e}") + running.task.cancel() + + async def fail_all(self, reason: str) -> None: + for sid, running in list(self._sessions.items()): + running.task.cancel() + self._sessions.clear() + + # ── Internals ───────────────────────────────────────────────────── + + async def _run( + self, + session_id: str, + adapter_name: str, + adapter: BaseAdapter, + prompt: str, + params: dict[str, Any], + cwd: str | None, + env: dict[str, str], + timeout_s: int, + ) -> None: + # Send session.accepted + await self._send( + SessionAcceptedFrame(session_id=session_id, adapter=adapter_name) + ) + + # Snapshot cwd for diff_summary + snap = None + try: + snap = await snapshot(cwd) + except Exception as e: + logger.debug(f"snapshot failed (non-fatal): {e}") + + running = self._sessions.get(session_id) + if running is not None: + running.snapshot_obj = snap + + error: str | None = None + final_accum: list[str] = [] + try: + async for ev in adapter.start_session( + session_id, prompt, params, cwd, env, timeout_s + ): + # Accumulate assistant_text for fallback final_text + if ev.kind == "assistant_text": + txt = ev.payload.get("text") or "" + if txt: + final_accum.append(str(txt)) + try: + await self._send(SessionEventFrame( + session_id=session_id, + kind=ev.kind, + payload=ev.payload, + )) + except Exception as send_err: + logger.warning(f"failed to forward event, aborting session: {send_err}") + error = f"bridge send failed: {send_err}" + break + except asyncio.CancelledError: + error = "cancelled" + except Exception as e: + logger.exception(f"[session {session_id}] adapter crashed") + error = f"{type(e).__name__}: {e}" + + # Final text: adapter-provided first, fallback to accumulator + final_text = "" + try: + final_text = await adapter.final_text(session_id) + except Exception: + pass + if not final_text: + final_text = "".join(final_accum) + + # Compute diff_summary + diff_obj: DiffSummary | None = None + try: + ds = await diff_summary(snap) + if ds: + diff_obj = DiffSummary(**{k: v for k, v in ds.items() if k in DiffSummary.model_fields}) + except Exception as e: + logger.debug(f"diff_summary failed: {e}") + + # Send terminal frame + if error and error != "cancelled": + try: + await self._send(SessionErrorFrame( + session_id=session_id, error=error, retryable=False + )) + except Exception: + pass + else: + try: + await self._send(SessionDoneFrame( + session_id=session_id, + final_text=final_text, + exit_code=0 if error is None else 1, + stats=await _safe_stats(adapter, session_id), + diff_summary=diff_obj, + )) + except Exception: + pass + + # Clean up DaemonAdapter http client + try: + if hasattr(adapter, "aclose"): + await adapter.aclose() + except Exception: + pass + + self._sessions.pop(session_id, None) + + async def _send_error(self, session_id: str, error: str, retryable: bool = False) -> None: + try: + await self._send(SessionErrorFrame( + session_id=session_id, error=error, retryable=retryable + )) + except Exception: + pass + + +async def _safe_stats(adapter: BaseAdapter, session_id: str) -> dict[str, Any]: + try: + return await adapter.stats(session_id) or {} + except Exception: + return {} diff --git a/bridge/launcher.py b/bridge/launcher.py new file mode 100644 index 000000000..6c70958d2 --- /dev/null +++ b/bridge/launcher.py @@ -0,0 +1,10 @@ +"""PyInstaller entry point. + +PyInstaller loads the target script as `__main__`, which breaks relative +imports inside the package. Use this file as the build entry so +`clawith_bridge.__main__` loads normally as a submodule. +""" +from clawith_bridge.__main__ import main + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/bridge/pyproject.toml b/bridge/pyproject.toml new file mode 100644 index 000000000..a931b92a4 --- /dev/null +++ b/bridge/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "clawith-bridge" +version = "0.1.0" +description = "Reverse-WS local agent session bridge for Clawith" +readme = "README.md" +requires-python = ">=3.10" +license = { text = "Apache-2.0" } +authors = [{ name = "Clawith" }] +dependencies = [ + "websockets>=12.0", + "httpx>=0.27", + "pydantic>=2.5", + "loguru>=0.7", + "tomli>=2.0;python_version<'3.11'", +] + +[project.optional-dependencies] +dev = ["pytest>=8", "pytest-asyncio>=0.23"] +build = ["pyinstaller>=6.0"] + +[project.scripts] +clawith-bridge = "clawith_bridge.__main__:main" + +[tool.setuptools.packages.find] +where = ["."] +include = ["clawith_bridge*"] + +[tool.setuptools.package-data] +clawith_bridge = ["py.typed"] diff --git a/bridge/stub_bridge.py b/bridge/stub_bridge.py new file mode 100644 index 000000000..8e88e1789 --- /dev/null +++ b/bridge/stub_bridge.py @@ -0,0 +1,191 @@ +"""Stub bridge for smoke-testing the Clawith local-agent session path. + +What it does: + 1. Opens a WebSocket to `ws(s):///ws/bridge?token=` + 2. Waits for server `hello` + 3. Sends `bridge.register` advertising all three adapters + 4. On `session.start`: fakes a streaming session (thinking → a few chunks → + one tool_call → session.done with a mock diff_summary) + 5. Responds to `ping` with `pong`; handles `session.cancel` gracefully + +Run it like: + + python stub_bridge.py --server ws://127.0.0.1:8000 --token oc- + +The agent must have `bridge_mode` set to `enabled` or `auto` (not the default +`disabled`) for the server to accept the connection. +""" +from __future__ import annotations + +import argparse +import asyncio +import json +import signal +import sys +from typing import Any + +try: + import websockets # type: ignore +except ImportError: + print("ERROR: install websockets first: pip install websockets", file=sys.stderr) + sys.exit(1) + + +PROTOCOL_VERSION = "1" +BRIDGE_VERSION = "stub-0.1" + + +async def _fake_session(ws: Any, session_id: str, prompt: str, adapter: str) -> None: + """Emit a fake event stream for one session_id, then session.done.""" + async def event(kind: str, payload: dict) -> None: + await ws.send(json.dumps({ + "type": "session.event", + "session_id": session_id, + "kind": kind, + "payload": payload, + })) + + # Accept + await ws.send(json.dumps({ + "type": "session.accepted", + "session_id": session_id, + "adapter": adapter, + })) + + # "Thinking" + await event("thinking", {"text": f"(stub {adapter}) considering: {prompt[:80]!r}…"}) + await asyncio.sleep(0.3) + + # Stream a few chunks + for piece in ( + f"Hi from the stub bridge (adapter={adapter}).\n", + "I'm pretending to run a real local agent now.\n", + f"Your prompt was: {prompt}\n", + ): + await event("assistant_text", {"text": piece}) + await asyncio.sleep(0.2) + + # Fake a tool_call round-trip + await event("tool_call_start", {"name": "fake_tool", "args": {"x": 1}}) + await asyncio.sleep(0.2) + await event("tool_call_result", {"name": "fake_tool", "result": "ok"}) + await asyncio.sleep(0.1) + + # Done with a mock diff_summary + await ws.send(json.dumps({ + "type": "session.done", + "session_id": session_id, + "final_text": f"Done (stub {adapter}). I processed: {prompt[:200]}", + "exit_code": 0, + "stats": {"chunks": 3, "tool_calls": 1}, + "diff_summary": { + "files_changed": 1, + "insertions": 3, + "deletions": 0, + "files": [{"path": "stub/demo.txt", "+": 3, "-": 0}], + }, + })) + + +async def run(server: str, token: str) -> None: + url = f"{server.rstrip('/')}/ws/bridge?token={token}" + print(f"[stub-bridge] connecting: {url}") + async with websockets.connect(url, max_size=2 * 1024 * 1024) as ws: + # Expect hello + hello_raw = await ws.recv() + hello = json.loads(hello_raw) + print(f"[stub-bridge] server hello: {hello}") + + # Send bridge.register + register = { + "type": "bridge.register", + "v": PROTOCOL_VERSION, + "bridge_version": BRIDGE_VERSION, + "adapters": ["claude_code", "hermes", "openclaw"], + "capabilities": {"interactive_input": False, "cancellation": True}, + } + await ws.send(json.dumps(register)) + print("[stub-bridge] registered as claude_code/hermes/openclaw") + + active_sessions: dict[str, asyncio.Task] = {} + + async for raw in ws: + try: + frame = json.loads(raw) + except Exception as e: + print(f"[stub-bridge] bad frame: {e}") + continue + + t = frame.get("type") + if t == "ping": + await ws.send(json.dumps({"type": "pong"})) + continue + + if t == "session.start": + sid = frame["session_id"] + prompt = frame.get("prompt", "") + adapter = frame.get("adapter", "claude_code") + print(f"[stub-bridge] session.start {sid} adapter={adapter} prompt={prompt!r}") + task = asyncio.create_task(_fake_session(ws, sid, prompt, adapter)) + active_sessions[sid] = task + task.add_done_callback(lambda _t, _sid=sid: active_sessions.pop(_sid, None)) + continue + + if t == "session.cancel": + sid = frame["session_id"] + task = active_sessions.pop(sid, None) + if task: + task.cancel() + await ws.send(json.dumps({ + "type": "session.error", + "session_id": sid, + "error": f"cancelled: {frame.get('reason')}", + })) + continue + + if t == "session.input": + # Stub doesn't support interactive input, just echo it back + sid = frame["session_id"] + await ws.send(json.dumps({ + "type": "session.event", + "session_id": sid, + "kind": "assistant_text", + "payload": {"text": f"(stub echo of input) {frame.get('text','')}"}, + })) + continue + + if t == "tool.response": + # We never made a reverse call in this stub, but log if one arrives + print(f"[stub-bridge] got tool.response (unexpected): {frame}") + continue + + print(f"[stub-bridge] unhandled frame type={t!r}") + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--server", default="ws://127.0.0.1:8000", help="Clawith server WS root") + ap.add_argument("--token", required=True, help="Agent API key (oc-...)") + args = ap.parse_args() + + loop = asyncio.new_event_loop() + + def _shutdown(*_): + for t in asyncio.all_tasks(loop): + t.cancel() + + try: + loop.add_signal_handler(signal.SIGINT, _shutdown) + except (NotImplementedError, RuntimeError): + pass # Windows + + try: + loop.run_until_complete(run(args.server, args.token)) + except (KeyboardInterrupt, asyncio.CancelledError): + print("[stub-bridge] bye") + finally: + loop.close() + + +if __name__ == "__main__": + main() diff --git a/frontend/src/i18n/en.json b/frontend/src/i18n/en.json index a06b0ea7b..30563f932 100644 --- a/frontend/src/i18n/en.json +++ b/frontend/src/i18n/en.json @@ -720,6 +720,25 @@ } }, "wizard": { + "runtime": { + "title": "Runtime", + "native": "Platform Hosted", + "nativeDesc": "Full agent running on Clawith platform", + "claude_code": "Claude Code", + "claude_codeDesc": "Bridge to local Claude Code CLI", + "openclaw": "OpenClaw", + "openclawDesc": "Bridge to local OpenClaw daemon", + "hermes": "Hermes", + "hermesDesc": "Bridge to local Hermes agent" + }, + "bridge": { + "titleClaudeCode": "Link Claude Code Agent", + "titleOpenclaw": "Link OpenClaw Agent", + "titleHermes": "Link Hermes Agent", + "descClaudeCode": "Give your agent a name. Download the installer and run it on the machine where your Claude Code CLI is installed.", + "descOpenclaw": "Give your agent a name. Download the installer and run it on the machine where your OpenClaw daemon runs.", + "descHermes": "Give your agent a name. Download the installer and run it on the machine where your Hermes agent runs." + }, "steps": { "basicInfo": "Basic Info", "personality": "Personality", diff --git a/frontend/src/i18n/zh.json b/frontend/src/i18n/zh.json index 1d96737a2..83d3319c8 100644 --- a/frontend/src/i18n/zh.json +++ b/frontend/src/i18n/zh.json @@ -801,6 +801,25 @@ } }, "wizard": { + "runtime": { + "title": "运行时", + "native": "平台托管", + "nativeDesc": "由 Clawith 平台托管的完整 agent", + "claude_code": "Claude Code", + "claude_codeDesc": "桥接到本地 Claude Code CLI", + "openclaw": "OpenClaw", + "openclawDesc": "桥接到本地 OpenClaw daemon", + "hermes": "Hermes", + "hermesDesc": "桥接到本地 Hermes agent" + }, + "bridge": { + "titleClaudeCode": "接入 Claude Code Agent", + "titleOpenclaw": "接入 OpenClaw Agent", + "titleHermes": "接入 Hermes Agent", + "descClaudeCode": "给 agent 命名。下载安装器,在已安装 Claude Code CLI 的机器上运行。", + "descOpenclaw": "给 agent 命名。下载安装器,在运行 OpenClaw daemon 的机器上运行。", + "descHermes": "给 agent 命名。下载安装器,在运行 Hermes agent 的机器上运行。" + }, "steps": { "basicInfo": "基本信息", "personality": "人格设定", diff --git a/frontend/src/pages/AgentCreate.tsx b/frontend/src/pages/AgentCreate.tsx index 8816eb9a4..f241d8bd4 100644 --- a/frontend/src/pages/AgentCreate.tsx +++ b/frontend/src/pages/AgentCreate.tsx @@ -66,6 +66,7 @@ export default function AgentCreate() { const [error, setError] = useState(''); const [fieldErrors, setFieldErrors] = useState>({}); const [agentType, setAgentType] = useState<'native' | 'openclaw'>('native'); + const [bridgeAdapter, setBridgeAdapter] = useState<'claude_code' | 'openclaw' | 'hermes'>('claude_code'); // Clear field error when user edits a field const clearFieldError = (field: string) => setFieldErrors(prev => { const n = { ...prev }; delete n[field]; return n; }); const [createdApiKey, setCreatedApiKey] = useState(''); @@ -253,6 +254,7 @@ export default function AgentCreate() { createMutation.mutate({ name: form.name, agent_type: agentType, + bridge_adapter: agentType === 'openclaw' ? bridgeAdapter : undefined, role_description: form.role_description, personality: agentType === 'native' ? form.personality : undefined, boundaries: agentType === 'native' ? form.boundaries : undefined, @@ -398,36 +400,70 @@ For humans, the message is delivered via their available channel (e.g. Feishu).` ); } - // ── Type Selector (shared between both modes) ── + // ── Runtime Selector: 4 flat cards ── + // Platform Hosted (native) + 3 bridge runtimes (openclaw agent_type, different bridge_adapter) + const runtimeCards = [ + { + key: 'native' as const, + active: agentType === 'native', + onClick: () => { setAgentType('native'); setStep(0); }, + title: t('wizard.runtime.native', 'Platform Hosted'), + desc: t('wizard.runtime.nativeDesc', 'Full agent running on Clawith platform'), + lab: false, + }, + { + key: 'claude_code' as const, + active: agentType === 'openclaw' && bridgeAdapter === 'claude_code', + onClick: () => { setAgentType('openclaw'); setBridgeAdapter('claude_code'); setStep(0); }, + title: t('wizard.runtime.claude_code', 'Claude Code'), + desc: t('wizard.runtime.claude_codeDesc', 'Bridge to local Claude Code CLI'), + lab: true, + }, + { + key: 'openclaw' as const, + active: agentType === 'openclaw' && bridgeAdapter === 'openclaw', + onClick: () => { setAgentType('openclaw'); setBridgeAdapter('openclaw'); setStep(0); }, + title: t('wizard.runtime.openclaw', 'OpenClaw'), + desc: t('wizard.runtime.openclawDesc', 'Bridge to local OpenClaw daemon'), + lab: true, + }, + { + key: 'hermes' as const, + active: agentType === 'openclaw' && bridgeAdapter === 'hermes', + onClick: () => { setAgentType('openclaw'); setBridgeAdapter('hermes'); setStep(0); }, + title: t('wizard.runtime.hermes', 'Hermes'), + desc: t('wizard.runtime.hermesDesc', 'Bridge to local Hermes agent'), + lab: true, + }, + ]; const typeSelector = ( -
-
{ setAgentType('native'); setStep(0); }} - style={{ - padding: '16px', borderRadius: '8px', cursor: 'pointer', - border: `1.5px solid ${agentType === 'native' ? 'var(--accent-primary)' : 'var(--border-default)'}`, - background: agentType === 'native' ? 'var(--accent-subtle)' : 'var(--bg-elevated)', - }} - > -
{t('openclaw.nativeTitle', 'Platform Hosted')}
-
{t('openclaw.nativeDesc', 'Full agent running on Clawith platform')}
+
+
+ {t('wizard.runtime.title', 'Runtime')}
-
{ setAgentType('openclaw'); setStep(0); }} - style={{ - padding: '16px', borderRadius: '8px', cursor: 'pointer', position: 'relative', - border: `1.5px solid ${agentType === 'openclaw' ? 'var(--accent-primary)' : 'var(--border-default)'}`, - background: agentType === 'openclaw' ? 'var(--accent-subtle)' : 'var(--bg-elevated)', - }} - > - Lab -
{t('openclaw.openclawTitle', 'Link OpenClaw')}
-
{t('openclaw.openclawDesc', 'Connect your existing OpenClaw agent')}
+
+ {runtimeCards.map((card) => ( +
+ {card.lab && ( + Lab + )} +
{card.title}
+
{card.desc}
+
+ ))}
); @@ -450,14 +486,22 @@ For humans, the message is delivered via their available channel (e.g. Feishu).`

- {t('openclaw.basicTitle', 'Link OpenClaw Agent')} + {bridgeAdapter === 'claude_code' + ? t('wizard.bridge.titleClaudeCode', 'Link Claude Code Agent') + : bridgeAdapter === 'hermes' + ? t('wizard.bridge.titleHermes', 'Link Hermes Agent') + : t('wizard.bridge.titleOpenclaw', 'Link OpenClaw Agent')} Lab

- {t('openclaw.basicDesc', 'Give your OpenClaw agent a name and description. The LLM model, personality, and skills are configured on your OpenClaw instance.')} + {bridgeAdapter === 'claude_code' + ? t('wizard.bridge.descClaudeCode', 'Give your agent a name. Download the installer and run it on the machine where your Claude Code CLI is installed.') + : bridgeAdapter === 'hermes' + ? t('wizard.bridge.descHermes', 'Give your agent a name. Download the installer and run it on the machine where your Hermes agent runs.') + : t('wizard.bridge.descOpenclaw', 'Give your agent a name. Download the installer and run it on the machine where your OpenClaw daemon runs.')}

diff --git a/frontend/src/pages/AgentDetail.tsx b/frontend/src/pages/AgentDetail.tsx index 4eb2ca50f..6d3800e97 100644 --- a/frontend/src/pages/AgentDetail.tsx +++ b/frontend/src/pages/AgentDetail.tsx @@ -2090,8 +2090,12 @@ function AgentDetailInner() { } else if (d.type === 'trigger_notification') { setChatMessages(prev => [...prev, parseChatMsg({ role: 'assistant', content: d.content })]); fetchMySessions(true, agentId); - } else { + } else if (d.role && d.content) { setChatMessages(prev => [...prev, parseChatMsg({ role: d.role, content: d.content })]); + } else { + // Unknown event with no role/content — control frames (rate_limit_event, + // ping, bridge status/file_change/bridge_event). Drop to avoid phantoms. + console.debug('[stream] skip unknown event', d.type, d); } }; }; @@ -2815,13 +2819,17 @@ function AgentDetailInner() { {(agent as any).is_expired && ( Expired )} - {(agent as any).agent_type === 'openclaw' && ( - OpenClaw · Lab - )} + {(agent as any).agent_type === 'openclaw' && (() => { + const a = (agent as any).bridge_adapter || 'claude_code'; + const label = a === 'claude_code' ? 'Claude Code' : a === 'hermes' ? 'Hermes' : 'OpenClaw'; + return ( + Bridge · {label} · Lab + ); + })()} {!(agent as any).is_expired && (agent as any).expires_at && ( Expires: {new Date((agent as any).expires_at).toLocaleString()} diff --git a/frontend/src/pages/Chat.tsx b/frontend/src/pages/Chat.tsx index 601151d0d..9669d2612 100644 --- a/frontend/src/pages/Chat.tsx +++ b/frontend/src/pages/Chat.tsx @@ -45,6 +45,22 @@ interface ToolCall { result?: string; } +interface DiffFile { + path: string; + '+'?: number; + '-'?: number; + status?: string; +} + +interface DiffSummary { + files_changed: number; + insertions: number; + deletions: number; + files: DiffFile[]; + note?: string; + warning?: string; +} + interface Message { role: 'user' | 'assistant'; content: string; @@ -54,6 +70,10 @@ interface Message { imageUrl?: string; timestamp?: string; _isToolGroup?: boolean; + diffSummary?: DiffSummary; + bridgeAdapter?: string; // e.g. "claude_code", "hermes", "openclaw" + fileChanges?: Array<{ path: string; kind: string }>; + sessionError?: string; // set when the bridge session failed mid-way } // CSS keyframe for the pulse/breathing LED — injected once into @@ -71,6 +91,112 @@ if (typeof document !== 'undefined' && !document.getElementById(PULSE_STYLE_ID)) document.head.appendChild(s); } +function DiffSummaryBlock({ summary, adapter }: { summary: DiffSummary; adapter?: string }) { + const [expanded, setExpanded] = useState(false); + const files = summary.files || []; + const totalIns = summary.insertions || 0; + const totalDel = summary.deletions || 0; + const n = summary.files_changed ?? files.length; + if (!n) return null; + return ( +
+ + {expanded && ( +
+ {files.slice(0, 200).map((f, idx) => ( +
+ + {f.status ? [{f.status}] : null} + {f.path} + + + {(f['+'] ?? 0) > 0 && +{f['+']} } + {(f['-'] ?? 0) > 0 && -{f['-']}} + +
+ ))} + {(summary.note || summary.warning) && ( +
+ {summary.note || summary.warning} +
+ )} +
+ )} +
+ ); +} + + +function SessionErrorBlock({ message }: { message: string }) { + if (!message) return null; + return ( +
+ ⚠️ + {message} +
+ ); +} + + +function FileChangesBlock({ changes }: { changes: Array<{ path: string; kind: string }> }) { + if (!changes || changes.length === 0) return null; + return ( +
+ {changes.slice(0, 8).map((c, i) => ( + + {c.kind === 'created' ? '+' : c.kind === 'deleted' ? '-' : '~'} {c.path} + + ))} + {changes.length > 8 && +{changes.length - 8} more} +
+ ); +} + + function ChatToolChain({ toolCalls }: { toolCalls: ToolCall[] }) { const { t } = useTranslation(); const [expanded, setExpanded] = useState(false); @@ -287,6 +413,10 @@ export default function Chat() { const pendingToolCalls = useRef([]); const streamContent = useRef(''); const thinkingContent = useRef(''); + const pendingDiffSummary = useRef(null); + const pendingBridgeAdapter = useRef(null); + const pendingFileChanges = useRef>([]); + const pendingSessionError = useRef(null); const { data: agent } = useQuery({ queryKey: ['agent', id], @@ -572,27 +702,82 @@ export default function Chat() { setLivePanelVisible(true); } } + } else if (data.type === 'status') { + // Bridge session lifecycle markers. We care most about + // state=done which carries diff_summary and stats — + // stash it so the next 'done' / final text can render it. + if (data.adapter) pendingBridgeAdapter.current = data.adapter; + if (data.state === 'done' && data.diff_summary) { + pendingDiffSummary.current = data.diff_summary as DiffSummary; + } + if (data.state === 'error' && data.error) { + console.warn('[bridge-session error]', data.error); + pendingSessionError.current = String(data.error); + } + } else if (data.type === 'file_change') { + if (data.path) { + pendingFileChanges.current.push({ + path: String(data.path), + kind: String(data.kind || data.status || 'changed'), + }); + } + } else if (data.type === 'bridge_event') { + // Forward-compat: log unknown bridge event kinds without + // crashing the UI. New kinds the server adds will show up + // here until we add explicit handling. + console.debug('[bridge-event]', data.kind, data.payload); } else if (data.type === 'done') { // Final response — replace streaming message with final + tool calls const toolCalls = pendingToolCalls.current.length > 0 ? [...pendingToolCalls.current] : undefined; const thinking = thinkingContent.current || undefined; + const diffSummary = pendingDiffSummary.current || undefined; + const bridgeAdapter = pendingBridgeAdapter.current || undefined; + const fileChanges = pendingFileChanges.current.length > 0 ? [...pendingFileChanges.current] : undefined; + const sessionError = (data.session_error as string | undefined) || pendingSessionError.current || undefined; pendingToolCalls.current = []; streamContent.current = ''; thinkingContent.current = ''; + pendingDiffSummary.current = null; + pendingBridgeAdapter.current = null; + pendingFileChanges.current = []; + pendingSessionError.current = null; setStreaming(false); setMessages(prev => { const updated = [...prev]; // Replace the last streaming assistant message if (updated.length > 0 && updated[updated.length - 1].role === 'assistant') { - updated[updated.length - 1] = { role: 'assistant', content: data.content, toolCalls, thinking }; + updated[updated.length - 1] = { + role: 'assistant', + content: data.content, + toolCalls, + thinking, + diffSummary, + bridgeAdapter, + fileChanges, + sessionError, + }; } else { - updated.push({ role: 'assistant', content: data.content, toolCalls, thinking }); + updated.push({ + role: 'assistant', + content: data.content, + toolCalls, + thinking, + diffSummary, + bridgeAdapter, + fileChanges, + sessionError, + }); } return updated; }); - } else { + } else if (data.role && data.content) { // Legacy format: {role, content} setMessages(prev => [...prev, { role: data.role, content: data.content }]); + } else { + // Unknown event with no role/content — e.g. Anthropic SDK + // control frames (rate_limit_event, ping, message_start). + // Dropping them prevents phantom user messages in the UI. + console.debug('[stream] skip unknown event', data.type, data); } }; }; @@ -868,6 +1053,15 @@ export default function Chat() { ) : (
{msg.content}
)} + {msg.role === 'assistant' && msg.fileChanges && msg.fileChanges.length > 0 && ( + + )} + {msg.role === 'assistant' && msg.diffSummary && ( + + )} + {msg.role === 'assistant' && msg.sessionError && ( + + )} {msg.timestamp && (
{new Date(msg.timestamp).toLocaleString(undefined, { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' })} diff --git a/frontend/src/pages/OpenClawSettings.tsx b/frontend/src/pages/OpenClawSettings.tsx index f64f84a02..ef11569ed 100644 --- a/frontend/src/pages/OpenClawSettings.tsx +++ b/frontend/src/pages/OpenClawSettings.tsx @@ -106,6 +106,84 @@ export default function OpenClawSettings({ agent, agentId }: OpenClawSettingsPro const currentScope = permData?.scope_type || 'company'; const currentAccessLevel = permData?.access_level || 'use'; + // ─── Bridge mode state ────────────────────────────── + const currentBridgeMode: 'disabled' | 'enabled' | 'auto' = + (agent?.bridge_mode as any) || 'disabled'; + const [bridgeSaving, setBridgeSaving] = useState(null); + + const handleBridgeModeChange = async (newMode: 'disabled' | 'enabled' | 'auto') => { + if (newMode === currentBridgeMode) return; + setBridgeSaving(newMode); + try { + await agentApi.update(agentId, { bridge_mode: newMode } as any); + queryClient.invalidateQueries({ queryKey: ['agent', agentId] }); + } catch (e) { + console.error('Failed to update bridge_mode', e); + } finally { + setBridgeSaving(null); + } + }; + + // ─── Bridge installer download ────────────────────── + const detectedPlatform: 'windows' | 'macos' | 'linux' = (() => { + const p = (typeof navigator !== 'undefined' ? navigator.platform || '' : '').toLowerCase(); + const ua = (typeof navigator !== 'undefined' ? navigator.userAgent || '' : '').toLowerCase(); + if (p.startsWith('win') || ua.includes('windows')) return 'windows'; + if (p.startsWith('mac') || ua.includes('mac os')) return 'macos'; + return 'linux'; + })(); + const [installerPlatform, setInstallerPlatform] = useState<'windows' | 'macos' | 'linux'>(detectedPlatform); + const [installerDownloading, setInstallerDownloading] = useState(false); + const [installerConfirm, setInstallerConfirm] = useState(false); + const [installerError, setInstallerError] = useState(''); + const [installerDownloaded, setInstallerDownloaded] = useState(false); + + const handleDownloadInstaller = async () => { + setInstallerDownloading(true); + setInstallerError(''); + try { + const token = localStorage.getItem('token'); + const resp = await fetch(`/api/agents/${agentId}/bridge-installer?platform=${installerPlatform}`, { + method: 'POST', + headers: { ...(token ? { Authorization: `Bearer ${token}` } : {}) }, + }); + if (!resp.ok) { + const errText = await resp.text().catch(() => ''); + throw new Error(errText || `HTTP ${resp.status}`); + } + const blob = await resp.blob(); + const filename = resp.headers.get('X-Clawith-Filename') + || (installerPlatform === 'windows' ? 'clawith-bridge-setup.exe' : 'install-clawith-bridge.sh'); + + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = filename; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + + setInstallerDownloaded(true); + setInstallerConfirm(false); + // Agent's api_key_hash + bridge_mode may have changed server-side + queryClient.invalidateQueries({ queryKey: ['agent', agentId] }); + // Clear previously-shown plaintext key (it's now invalid) + setApiKey(null); + } catch (e: any) { + console.error('Failed to download installer', e); + setInstallerError(e?.message || 'Download failed'); + } finally { + setInstallerDownloading(false); + } + }; + + const runCommand = installerPlatform === 'windows' + ? (isChinese + ? '双击 clawith-bridge-setup.exe 即可安装' + : 'Double-click clawith-bridge-setup.exe to install') + : 'bash install-clawith-bridge.sh'; + return (

{t('agent.settings.title')}

@@ -226,6 +304,250 @@ export default function OpenClawSettings({ agent, agentId }: OpenClawSettingsPro )}
+ {/* ── Bridge Mode ── */} +
+

+ {isChinese ? '本地 Bridge 连接模式' : 'Local Bridge Mode'} +

+

+ {isChinese + ? 'Bridge 是跑在你本机的小程序,把 Claude Code 等本地工具接入 Clawith。' + : 'The bridge is a small program running on your machine that connects local tools like Claude Code to Clawith.'} +

+ +
+ {([ + { + val: 'enabled' as const, + label: isChinese ? '启用(推荐)' : 'Enabled (recommended)', + desc: isChinese + ? '通过 Bridge 实时流式执行。Bridge 未连接时消息会失败。' + : 'Stream execution via bridge. Messages fail if bridge is not connected.', + }, + { + val: 'auto' as const, + label: isChinese ? '自动回落' : 'Auto fallback', + desc: isChinese + ? '优先 Bridge;未连接时回落到旧版 Gateway 轮询(~5 分钟延迟)。' + : 'Prefer bridge; fall back to legacy gateway polling (~5 min delay) when offline.', + }, + { + val: 'disabled' as const, + label: isChinese ? '禁用(兼容旧版)' : 'Disabled (legacy)', + desc: isChinese + ? '只走 Gateway 轮询。Bridge 连接会被拒绝。' + : 'Use gateway polling only. Bridge connections will be rejected.', + }, + ]).map(opt => { + const selected = currentBridgeMode === opt.val; + const isSaving = bridgeSaving === opt.val; + return ( + + ); + })} +
+ + {!isOwner && ( +
+ {isChinese ? '只有创建者或管理员可以修改此设置' : 'Only the creator or admin can change this setting'} +
+ )} +
+ + {/* ── Install Bridge ── */} + {isOwner && ( +
+

+ {isChinese ? '一键安装 Bridge' : 'One-click Bridge Install'} +

+

+ {isChinese + ? '下载预配置好 API Key 的安装包,在你本机运行即可完成安装 + 自启 + 连接。Windows 无需 Python。' + : 'Download a pre-configured installer. Run it on your local machine to install + autostart + connect. No Python required on Windows.'} +

+ + {/* Runtime (readonly) */} + {(() => { + const a = (agent as any)?.bridge_adapter || 'claude_code'; + const label = a === 'claude_code' ? 'Claude Code' : a === 'hermes' ? 'Hermes' : 'OpenClaw'; + return ( +
+ {isChinese ? '运行时:' : 'Runtime:'} + {label} + + {isChinese + ? `安装器会启用 [${a}] adapter` + : `installer will enable [${a}] adapter`} + +
+ ); + })()} + + {/* Platform selector */} +
+ {([ + { val: 'windows' as const, label: 'Windows' }, + { val: 'macos' as const, label: 'macOS' }, + { val: 'linux' as const, label: 'Linux' }, + ]).map(opt => { + const selected = installerPlatform === opt.val; + return ( + + ); + })} +
+ + {/* Warning about key regeneration */} +
+ {isChinese + ? '⚠ 每次下载都会重新生成 API Key,已在运行的 bridge 会断开。需要重新分发安装器给多台机器时请一次性配置完。' + : '⚠ Each download regenerates the API Key; any running bridge will disconnect. Set up all machines from a single download.'} +
+ + {/* Download action */} + {!installerConfirm ? ( + + ) : ( +
+
+ {isChinese ? '确认下载并重新生成 API Key?' : 'Confirm download and regenerate API Key?'} +
+
+ + +
+
+ )} + + {installerError && ( +
+ {installerError} +
+ )} + + {/* Post-download instructions */} + {installerDownloaded && ( +
+
+ {isChinese ? '✓ 已下载。在本机运行:' : '✓ Downloaded. Run it on your machine:'} +
+
+ + {runCommand} + + +
+
+ {isChinese + ? <>前置:先装 claude CLI 并登录(npm install -g @anthropic-ai/claude-code 然后 claude login)。 + : <>Prereq: install claude CLI and login first (npm install -g @anthropic-ai/claude-code then claude login).} +
+
+ )} +
+ )} + {/* ── Permissions ── */}

diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts index a2c6064d3..673491517 100644 --- a/frontend/src/types/index.ts +++ b/frontend/src/types/index.ts @@ -38,6 +38,7 @@ export interface Agent { context_window_size?: number; agent_type?: 'native' | 'openclaw'; openclaw_last_seen?: string; + bridge_mode?: 'disabled' | 'enabled' | 'auto'; created_at: string; last_active_at?: string; } From 8dc1b1ce8f777f3e8f6b5ca6a65af7efc2858ff1 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 11:55:38 +0800 Subject: [PATCH 02/15] fix: add bridge_attached/bridge_detached to activity_action_enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit session_dispatcher.py logs these two events when a bridge connects or disconnects, but the enum never included them — so every attach/detach raised InvalidTextRepresentationError in logs and the activity row was dropped. Adds both values via idempotent ALTER TYPE ... ADD VALUE IF NOT EXISTS, matching the pattern used in add_agentbay_enum_value. Co-Authored-By: Claude Opus 4.7 --- .../versions/add_bridge_activity_enum.py | 28 +++++++++++++++++++ backend/app/models/activity_log.py | 1 + 2 files changed, 29 insertions(+) create mode 100644 backend/alembic/versions/add_bridge_activity_enum.py diff --git a/backend/alembic/versions/add_bridge_activity_enum.py b/backend/alembic/versions/add_bridge_activity_enum.py new file mode 100644 index 000000000..13f1fbfa5 --- /dev/null +++ b/backend/alembic/versions/add_bridge_activity_enum.py @@ -0,0 +1,28 @@ +"""Add bridge_attached / bridge_detached to activity_action_enum. + +Revision ID: add_bridge_activity_enum +Revises: add_bridge_adapter + +session_dispatcher.py logs these two events when a bridge connects +or disconnects, but the enum never included them — so every +attach/detach produced an InvalidTextRepresentationError and the +row was dropped. This backfills the enum values; existing rows +are unaffected. +""" +from alembic import op + + +revision = 'add_bridge_activity_enum' +down_revision = 'add_bridge_adapter' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.execute("ALTER TYPE activity_action_enum ADD VALUE IF NOT EXISTS 'bridge_attached'") + op.execute("ALTER TYPE activity_action_enum ADD VALUE IF NOT EXISTS 'bridge_detached'") + + +def downgrade() -> None: + # PostgreSQL does not support removing values from an enum type. + pass diff --git a/backend/app/models/activity_log.py b/backend/app/models/activity_log.py index 13b682608..a0929592a 100644 --- a/backend/app/models/activity_log.py +++ b/backend/app/models/activity_log.py @@ -22,6 +22,7 @@ class AgentActivityLog(Base): "chat_reply", "tool_call", "feishu_msg_sent", "agent_msg_sent", "web_msg_sent", "task_created", "task_updated", "file_written", "error", "schedule_run", "heartbeat", "plaza_post", + "bridge_attached", "bridge_detached", name="activity_action_enum", create_constraint=False, ), From 51fc3ef6f209b64bde6a87cbb1a29deaa506f3c8 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 12:02:12 +0800 Subject: [PATCH 03/15] feat: bridge online/offline badge on AgentDetail + fill remaining enum gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a live bridge-connection indicator so users can tell at a glance whether their local bridge is running, instead of discovering offline state by failing to chat. Backend - GET /agents/{id}/bridge-status: reuses check_agent_access, returns {connected, applicable, bridge_version, adapters, connected_at, active_sessions}. Non-bridge agents return applicable=false. Frontend - agentApi.bridgeStatus() client method. - AgentDetail polls every 5s (only for agent_type=openclaw, only while page is foregrounded) and renders a colored dot inside the existing runtime badge — green when connected, red when offline, transparent while the first request is in flight. - Tooltip shows bridge version + active session count when connected, and a hint to install/start the bridge when offline. Enum sweep (fix) - Also adds local_session_start / local_session_done / local_session_error / reverse_tool_call / reverse_tool_result / bridge_installer_download to activity_action_enum. These were all referenced from bridge code but missing from the enum, so every bridge session was silently losing its audit trail. Co-Authored-By: Claude Opus 4.7 --- .../versions/add_bridge_session_enum.py | 45 +++++++++++++++++++ backend/app/api/agents.py | 32 +++++++++++++ backend/app/models/activity_log.py | 4 +- frontend/src/pages/AgentDetail.tsx | 41 ++++++++++++++--- frontend/src/services/api.ts | 10 +++++ 5 files changed, 126 insertions(+), 6 deletions(-) create mode 100644 backend/alembic/versions/add_bridge_session_enum.py diff --git a/backend/alembic/versions/add_bridge_session_enum.py b/backend/alembic/versions/add_bridge_session_enum.py new file mode 100644 index 000000000..c95accba6 --- /dev/null +++ b/backend/alembic/versions/add_bridge_session_enum.py @@ -0,0 +1,45 @@ +"""Add remaining bridge session/tool action types to activity_action_enum. + +Revision ID: add_bridge_session_enum +Revises: add_bridge_activity_enum + +Beyond bridge_attached/bridge_detached (added in the prior migration), +the bridge code also logs per-session events and reverse-tool-call +events. The enum was missing all of them, so every bridge session +silently dropped its audit trail. + +Values added: + - bridge_installer_download (agents.py download_bridge_installer) + - local_session_start (session_dispatcher) + - local_session_done + - local_session_error + - reverse_tool_call (bridge-initiated tool calls) + - reverse_tool_result +""" +from alembic import op + + +revision = 'add_bridge_session_enum' +down_revision = 'add_bridge_activity_enum' +branch_labels = None +depends_on = None + + +_NEW_VALUES = ( + "bridge_installer_download", + "local_session_start", + "local_session_done", + "local_session_error", + "reverse_tool_call", + "reverse_tool_result", +) + + +def upgrade() -> None: + for v in _NEW_VALUES: + op.execute(f"ALTER TYPE activity_action_enum ADD VALUE IF NOT EXISTS '{v}'") + + +def downgrade() -> None: + # PostgreSQL does not support removing values from an enum type. + pass diff --git a/backend/app/api/agents.py b/backend/app/api/agents.py index 706db4604..c271ba0b6 100644 --- a/backend/app/api/agents.py +++ b/backend/app/api/agents.py @@ -877,6 +877,38 @@ async def download_bridge_installer( ) +@router.get("/{agent_id}/bridge-status") +async def get_bridge_status( + agent_id: uuid.UUID, + current_user: User = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + """Return live bridge connection status for this agent. + + Used by the AgentDetail page to show an online/offline badge so + users don't have to discover bridge-offline state by failing to + chat. In-memory only (session_dispatcher._bridges), so correct + for the single-process backend; a Redis presence map would be + needed for multi-worker deploys. + """ + agent, _access = await check_agent_access(db, current_user, agent_id) + if getattr(agent, "agent_type", "native") != "openclaw": + return {"connected": False, "applicable": False} + + from app.services.local_agent.session_dispatcher import dispatcher + info = dispatcher.get_bridge_info(str(agent_id)) + if info is None: + return {"connected": False, "applicable": True} + return { + "connected": True, + "applicable": True, + "bridge_version": info.get("bridge_version"), + "adapters": info.get("adapters") or [], + "connected_at": info.get("connected_at"), + "active_sessions": len(info.get("active_sessions") or []), + } + + @router.get("/{agent_id}/gateway-messages") async def list_gateway_messages( agent_id: uuid.UUID, diff --git a/backend/app/models/activity_log.py b/backend/app/models/activity_log.py index a0929592a..257af9b46 100644 --- a/backend/app/models/activity_log.py +++ b/backend/app/models/activity_log.py @@ -22,7 +22,9 @@ class AgentActivityLog(Base): "chat_reply", "tool_call", "feishu_msg_sent", "agent_msg_sent", "web_msg_sent", "task_created", "task_updated", "file_written", "error", "schedule_run", "heartbeat", "plaza_post", - "bridge_attached", "bridge_detached", + "bridge_attached", "bridge_detached", "bridge_installer_download", + "local_session_start", "local_session_done", "local_session_error", + "reverse_tool_call", "reverse_tool_result", name="activity_action_enum", create_constraint=False, ), diff --git a/frontend/src/pages/AgentDetail.tsx b/frontend/src/pages/AgentDetail.tsx index 6d3800e97..f0bfae5a2 100644 --- a/frontend/src/pages/AgentDetail.tsx +++ b/frontend/src/pages/AgentDetail.tsx @@ -1334,6 +1334,18 @@ function AgentDetailInner() { enabled: !!id, }); + // Live bridge status — only meaningful for bridge-style (agent_type=openclaw) agents. + // Polls while the user has the page open so the badge flips within a few seconds + // of the bridge connecting or dropping. + const isBridgeAgent = (agent as any)?.agent_type === 'openclaw'; + const { data: bridgeStatus } = useQuery({ + queryKey: ['bridge-status', id], + queryFn: () => agentApi.bridgeStatus(id!), + enabled: !!id && isBridgeAgent, + refetchInterval: isBridgeAgent ? 5000 : false, + refetchIntervalInBackground: false, + }); + // ── Aware tab data: triggers ── const { data: awareTriggers = [], refetch: refetchTriggers } = useQuery({ queryKey: ['triggers', id], @@ -2822,12 +2834,31 @@ function AgentDetailInner() { {(agent as any).agent_type === 'openclaw' && (() => { const a = (agent as any).bridge_adapter || 'claude_code'; const label = a === 'claude_code' ? 'Claude Code' : a === 'hermes' ? 'Hermes' : 'OpenClaw'; + const connected = !!bridgeStatus?.connected; + // undefined while first request in flight → show neutral badge, no dot + const loading = bridgeStatus === undefined; + const dotColor = loading ? 'transparent' : connected ? '#22c55e' : '#ef4444'; + const title = loading + ? '' + : connected + ? `Bridge online (v${bridgeStatus?.bridge_version || '?'})${bridgeStatus?.active_sessions ? `, ${bridgeStatus.active_sessions} active session(s)` : ''}` + : 'Bridge offline — install or start the bridge on your local machine'; return ( - Bridge · {label} · Lab + + + Bridge · {label} · Lab + ); })()} {!(agent as any).is_expired && (agent as any).expires_at && ( diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index ad94c2def..51991bc46 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -256,6 +256,16 @@ export const agentApi = { gatewayMessages: (id: string) => request(`/agents/${id}/gateway-messages`), + + bridgeStatus: (id: string) => + request<{ + connected: boolean; + applicable: boolean; + bridge_version?: string; + adapters?: string[]; + connected_at?: string; + active_sessions?: number; + }>(`/agents/${id}/bridge-status`), }; // ─── Tasks ──────────────────────────────────────────── From 2fad498084d110170aa01745a15c01482a037acf Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 12:06:46 +0800 Subject: [PATCH 04/15] feat: allow editing bridge_adapter post-creation with redownload prompt After creating an openclaw agent the runtime was frozen: even though the server/bridge supports claude_code/openclaw/hermes at runtime, the only way to change adapter was to delete the agent and re-create it. This change exposes runtime selection on the agent's OpenClaw Settings page. - schemas.AgentUpdate: add bridge_adapter: str | None with regex guard - agents.update_agent: silently drop bridge_adapter for non-openclaw agents instead of 422'ing (idempotent if UI sends it for native) - OpenClawSettings.tsx: replace readonly runtime badge with a 3-button selector; on change, PATCH /agents/{id}, invalidate cache, and show a yellow notice telling the user to redownload the installer OR edit ~/.clawith-bridge.toml to set enabled=true under [] and restart the bridge. The server side of the handshake picks up the new adapter on next SessionStart regardless of which path the user takes. Co-Authored-By: Claude Opus 4.7 --- backend/app/api/agents.py | 6 ++ backend/app/schemas/schemas.py | 1 + frontend/src/pages/OpenClawSettings.tsx | 124 ++++++++++++++++++++---- 3 files changed, 111 insertions(+), 20 deletions(-) diff --git a/backend/app/api/agents.py b/backend/app/api/agents.py index c271ba0b6..9bdcb73a5 100644 --- a/backend/app/api/agents.py +++ b/backend/app/api/agents.py @@ -483,6 +483,12 @@ async def update_agent( update_data = data.model_dump(exclude_unset=True) + # bridge_adapter: only meaningful for bridge-style agents. Silently + # drop the field for native agents instead of erroring, so generic + # bulk-update flows don't have to know the agent type. + if "bridge_adapter" in update_data and getattr(agent, "agent_type", "native") != "openclaw": + update_data.pop("bridge_adapter", None) + # expires_at: admin only if "expires_at" in update_data: if not is_admin: diff --git a/backend/app/schemas/schemas.py b/backend/app/schemas/schemas.py index 56c09ea2e..59c540363 100644 --- a/backend/app/schemas/schemas.py +++ b/backend/app/schemas/schemas.py @@ -299,6 +299,7 @@ class AgentUpdate(BaseModel): timezone: str | None = None expires_at: datetime | None = None # Admin only — extend agent expiry bridge_mode: str | None = Field(default=None, pattern="^(disabled|enabled|auto)$") + bridge_adapter: str | None = Field(default=None, pattern="^(claude_code|openclaw|hermes)$") class AgentStatusOut(BaseModel): diff --git a/frontend/src/pages/OpenClawSettings.tsx b/frontend/src/pages/OpenClawSettings.tsx index ef11569ed..65f846d13 100644 --- a/frontend/src/pages/OpenClawSettings.tsx +++ b/frontend/src/pages/OpenClawSettings.tsx @@ -399,26 +399,8 @@ export default function OpenClawSettings({ agent, agentId }: OpenClawSettingsPro : 'Download a pre-configured installer. Run it on your local machine to install + autostart + connect. No Python required on Windows.'}

- {/* Runtime (readonly) */} - {(() => { - const a = (agent as any)?.bridge_adapter || 'claude_code'; - const label = a === 'claude_code' ? 'Claude Code' : a === 'hermes' ? 'Hermes' : 'OpenClaw'; - return ( -
- {isChinese ? '运行时:' : 'Runtime:'} - {label} - - {isChinese - ? `安装器会启用 [${a}] adapter` - : `installer will enable [${a}] adapter`} - -
- ); - })()} + {/* Runtime selector (editable) */} + {/* Platform selector */}
@@ -714,3 +696,105 @@ export default function OpenClawSettings({ agent, agentId }: OpenClawSettingsPro
); } + +// ──────────────────────────────────────────────────────────────── +// RuntimeSelector +// +// Lets the creator/admin change bridge_adapter after agent creation. +// When it changes, the already-installed bridge keeps advertising the +// old adapter until the user reinstalls — we surface that clearly +// instead of silently drifting. +// ──────────────────────────────────────────────────────────────── +interface RuntimeSelectorProps { + agent: any; + agentId: string; + isChinese: boolean; +} + +function RuntimeSelector({ agent, agentId, isChinese }: RuntimeSelectorProps) { + const qc = useQueryClient(); + const current: 'claude_code' | 'openclaw' | 'hermes' = + (agent?.bridge_adapter as any) || 'claude_code'; + const [saving, setSaving] = useState(null); + const [justChanged, setJustChanged] = useState(false); + + const OPTIONS: { value: 'claude_code' | 'openclaw' | 'hermes'; label: string }[] = [ + { value: 'claude_code', label: 'Claude Code' }, + { value: 'openclaw', label: 'OpenClaw' }, + { value: 'hermes', label: 'Hermes' }, + ]; + + const onSelect = async (next: 'claude_code' | 'openclaw' | 'hermes') => { + if (next === current || saving) return; + setSaving(next); + try { + await agentApi.update(agentId, { bridge_adapter: next } as any); + await qc.invalidateQueries({ queryKey: ['agent', agentId] }); + setJustChanged(true); + } catch (e) { + console.error('Failed to update runtime', e); + alert(isChinese ? '切换 runtime 失败,请稍后重试' : 'Failed to change runtime, please retry'); + } finally { + setSaving(null); + } + }; + + return ( +
+
+ + {isChinese ? '运行时' : 'Runtime'} + + + {isChinese ? '(创建后可切换)' : '(changeable after creation)'} + +
+
+ {OPTIONS.map(opt => { + const selected = current === opt.value; + const isLoading = saving === opt.value; + return ( + + ); + })} +
+ {justChanged && ( +
+ {isChinese ? ( + <> + 已切换到 {OPTIONS.find(o => o.value === current)?.label}。 + 之前安装过的 bridge 仍然只启用旧 runtime,请 重新下载安装器 并在本机运行, + 或在 ~/.clawith-bridge.toml 里把 [{current}] 下的 enabled 改成 true 后重启 bridge。 + + ) : ( + <> + Switched to {OPTIONS.find(o => o.value === current)?.label}. + Your already-installed bridge still only enables the old runtime — + redownload the installer below and run it again on your local machine, + or edit ~/.clawith-bridge.toml to set enabled = true under [{current}] and restart the bridge. + + )} +
+ )} +
+ ); +} From 64cb73d2b612a0c604947adc147b0de9189d2b61 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 12:10:08 +0800 Subject: [PATCH 05/15] feat: surface bridge adapter mismatch proactively instead of on chat fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the installed bridge advertises an adapter different from what the agent is configured for, server-side dispatch rejects with a chat-time error. That's correct but late — users only discover the problem by failing to chat, and the remediation (redownload or edit TOML) lives in a chat bubble they likely dismissed. Two changes, both driven by /agents/{id}/bridge-status polling: - OpenClawSettings: RuntimeSelector now accepts the live bridge_status and renders a red banner whenever bridge.adapters doesn't include the selected runtime. The banner lists what the bridge actually advertises, points at the installer below, and shows the TOML escape hatch. The previous "justChanged" yellow banner is kept but only fires when the bridge is offline at the moment of switching (no live signal to rely on). - AgentDetail bridge badge: three-state dot. Green = online + match, yellow = online + mismatch (with amber glow), red = offline. Tooltip spells out the mismatch and points at Settings. The prior two-state (green/red) would silently light green even when the agent's adapter was unreachable. Co-Authored-By: Claude Opus 4.7 --- frontend/src/pages/AgentDetail.tsx | 22 ++++++-- frontend/src/pages/OpenClawSettings.tsx | 74 ++++++++++++++++++++----- 2 files changed, 78 insertions(+), 18 deletions(-) diff --git a/frontend/src/pages/AgentDetail.tsx b/frontend/src/pages/AgentDetail.tsx index f0bfae5a2..3831bbf86 100644 --- a/frontend/src/pages/AgentDetail.tsx +++ b/frontend/src/pages/AgentDetail.tsx @@ -2837,12 +2837,24 @@ function AgentDetailInner() { const connected = !!bridgeStatus?.connected; // undefined while first request in flight → show neutral badge, no dot const loading = bridgeStatus === undefined; - const dotColor = loading ? 'transparent' : connected ? '#22c55e' : '#ef4444'; + const liveAdapters: string[] = Array.isArray(bridgeStatus?.adapters) ? bridgeStatus!.adapters! : []; + const mismatch = connected && liveAdapters.length > 0 && !liveAdapters.includes(a); + // green = online+match, yellow = online+mismatch, red = offline, transparent = loading + const dotColor = loading + ? 'transparent' + : !connected + ? '#ef4444' + : mismatch + ? '#f59e0b' + : '#22c55e'; const title = loading ? '' - : connected - ? `Bridge online (v${bridgeStatus?.bridge_version || '?'})${bridgeStatus?.active_sessions ? `, ${bridgeStatus.active_sessions} active session(s)` : ''}` - : 'Bridge offline — install or start the bridge on your local machine'; + : !connected + ? 'Bridge offline — install or start the bridge on your local machine' + : mismatch + ? `Runtime mismatch: agent expects ${label}, bridge advertises ${liveAdapters.join(', ')}. Redownload installer from Settings.` + : `Bridge online (v${bridgeStatus?.bridge_version || '?'})${bridgeStatus?.active_sessions ? `, ${bridgeStatus.active_sessions} active session(s)` : ''}`; + const glow = connected && !mismatch ? '0 0 4px rgba(34,197,94,0.9)' : mismatch ? '0 0 4px rgba(245,158,11,0.9)' : undefined; return ( Bridge · {label} · Lab diff --git a/frontend/src/pages/OpenClawSettings.tsx b/frontend/src/pages/OpenClawSettings.tsx index 65f846d13..557a4031f 100644 --- a/frontend/src/pages/OpenClawSettings.tsx +++ b/frontend/src/pages/OpenClawSettings.tsx @@ -74,6 +74,16 @@ export default function OpenClawSettings({ agent, agentId }: OpenClawSettingsPro enabled: !!agentId, }); + // Live bridge status — used to detect adapter mismatch (agent expects + // one runtime, installed bridge advertises another). + const { data: bridgeStatus } = useQuery({ + queryKey: ['bridge-status', agentId], + queryFn: () => agentApi.bridgeStatus(agentId), + enabled: !!agentId, + refetchInterval: 5000, + refetchIntervalInBackground: false, + }); + const handleScopeChange = async (newScope: string) => { try { await fetchAuth(`/agents/${agentId}/permissions`, { @@ -400,7 +410,7 @@ export default function OpenClawSettings({ agent, agentId }: OpenClawSettingsPro

{/* Runtime selector (editable) */} - + {/* Platform selector */}
@@ -709,9 +719,16 @@ interface RuntimeSelectorProps { agent: any; agentId: string; isChinese: boolean; + bridgeStatus?: { connected: boolean; applicable: boolean; adapters?: string[] }; } -function RuntimeSelector({ agent, agentId, isChinese }: RuntimeSelectorProps) { +const ADAPTER_LABELS: Record = { + claude_code: 'Claude Code', + openclaw: 'OpenClaw', + hermes: 'Hermes', +}; + +function RuntimeSelector({ agent, agentId, isChinese, bridgeStatus }: RuntimeSelectorProps) { const qc = useQueryClient(); const current: 'claude_code' | 'openclaw' | 'hermes' = (agent?.bridge_adapter as any) || 'claude_code'; @@ -719,11 +736,18 @@ function RuntimeSelector({ agent, agentId, isChinese }: RuntimeSelectorProps) { const [justChanged, setJustChanged] = useState(false); const OPTIONS: { value: 'claude_code' | 'openclaw' | 'hermes'; label: string }[] = [ - { value: 'claude_code', label: 'Claude Code' }, - { value: 'openclaw', label: 'OpenClaw' }, - { value: 'hermes', label: 'Hermes' }, + { value: 'claude_code', label: ADAPTER_LABELS.claude_code }, + { value: 'openclaw', label: ADAPTER_LABELS.openclaw }, + { value: 'hermes', label: ADAPTER_LABELS.hermes }, ]; + // Live mismatch: bridge is connected but its TOML enables different adapters + // than what the agent expects. Auto-clears once the user reinstalls/reconfigures + // and the next poll reports the right adapter. + const liveAdapters: string[] = Array.isArray(bridgeStatus?.adapters) ? bridgeStatus!.adapters! : []; + const liveMismatch = !!(bridgeStatus?.connected && liveAdapters.length > 0 && !liveAdapters.includes(current)); + const bridgeIsOn = !!bridgeStatus?.connected; + const onSelect = async (next: 'claude_code' | 'openclaw' | 'hermes') => { if (next === current || saving) return; setSaving(next); @@ -773,7 +797,31 @@ function RuntimeSelector({ agent, agentId, isChinese }: RuntimeSelectorProps) { ); })}
- {justChanged && ( + {liveMismatch && ( +
+ {isChinese ? ( + <> + ⚠️ Runtime 不匹配:Agent 期望 {ADAPTER_LABELS[current]}, + 但本机 bridge 实际启用的是 {liveAdapters.map(a => ADAPTER_LABELS[a] || a).join(' / ')}。 + 此时发消息会报 runtime 不可用。请 重新下载下方的安装器 并在本机运行, + 或编辑 ~/.clawith-bridge.toml[{current}] 下的 enabled 改成 true(同时把其他 runtime 的 enabled 改成 false)后重启 bridge。 + + ) : ( + <> + ⚠️ Runtime mismatch: this agent expects {ADAPTER_LABELS[current]}, + but the bridge installed on your machine is advertising {liveAdapters.map(a => ADAPTER_LABELS[a] || a).join(' / ')}. + Chatting will fail with "runtime not available". Redownload the installer below and run it again, + or edit ~/.clawith-bridge.toml to set enabled = true under [{current}] + (and false for the others) and restart the bridge. + + )} +
+ )} + {!liveMismatch && justChanged && !bridgeIsOn && (
{isChinese ? ( <> - 已切换到 {OPTIONS.find(o => o.value === current)?.label}。 - 之前安装过的 bridge 仍然只启用旧 runtime,请 重新下载安装器 并在本机运行, - 或在 ~/.clawith-bridge.toml 里把 [{current}] 下的 enabled 改成 true 后重启 bridge。 + 已切换到 {ADAPTER_LABELS[current]}。Bridge 当前离线, + 无法验证它是否已启用新 runtime。请 重新下载下方的安装器 并在本机运行, + 或编辑 ~/.clawith-bridge.toml[{current}] 下的 enabled 改成 true 后重启 bridge。 ) : ( <> - Switched to {OPTIONS.find(o => o.value === current)?.label}. - Your already-installed bridge still only enables the old runtime — - redownload the installer below and run it again on your local machine, - or edit ~/.clawith-bridge.toml to set enabled = true under [{current}] and restart the bridge. + Switched to {ADAPTER_LABELS[current]}. The bridge is offline right now, + so we can't verify it has the new runtime enabled. Redownload the installer below + and run it again, or edit ~/.clawith-bridge.toml to set enabled = true under + [{current}] and restart the bridge. )}
From 387a76d37112c36bba70a23f395655fd736a0ee9 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 13:01:06 +0800 Subject: [PATCH 06/15] fix(security): close Codex-flagged bridge auth gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues from external review on the bridge PR: - Remove GET /api/admin/bridge/status from bridge_ws.py. It had no auth enforcement and exposed every connected bridge's agent_id, version, and active sessions to anyone who could reach the backend. The comment admitted "V1 doesn't do auth, add later" — 'later' was never going to be safer than now. The per-agent /agents/{id}/bridge-status endpoint added with the bridge status badge already covers the legitimate use case and is auth-gated. - Require PUBLIC_BASE_URL for bridge-installer downloads instead of falling back to the request's Host / X-Forwarded-Host header. An authenticated attacker could set those to an attacker-controlled hostname and have the installer bake that into the bridge's dial-home URL. Validation now runs before the DB mutation, so a misconfiguration doesn't rotate the API key and leave the agent wedged. .env.example calls out the requirement explicitly. Co-Authored-By: Claude Opus 4.7 --- .env.example | 9 +++++---- backend/app/api/agents.py | 32 ++++++++++++++++++++------------ backend/app/api/bridge_ws.py | 16 ---------------- 3 files changed, 25 insertions(+), 32 deletions(-) diff --git a/.env.example b/.env.example index 7317369ea..6c0445490 100644 --- a/.env.example +++ b/.env.example @@ -28,10 +28,11 @@ JINA_API_KEY= # Exa API key (for exa_search tool and web_search Exa engine — get one at https://exa.ai) EXA_API_KEY= -# Public app URL used in user-facing links, such as password reset emails. -# Leave empty for auto-discovery from the browser request. -# Set explicitly for production (e.g. https://your-domain.com) — required for -# background tasks like webhook URLs and email links that have no request context. +# Public app URL used in user-facing links, password reset emails, and the +# bridge installer's dial-home address. Set explicitly (e.g. http://localhost:3008 +# for dev, https://your-domain.com for prod). Bridge installer downloads refuse +# to proceed when this is unset, because falling back to the request Host header +# lets a malicious request bake an attacker-controlled hostname into the installer. PUBLIC_BASE_URL= diff --git a/backend/app/api/agents.py b/backend/app/api/agents.py index 9bdcb73a5..9fe329046 100644 --- a/backend/app/api/agents.py +++ b/backend/app/api/agents.py @@ -7,7 +7,7 @@ from datetime import datetime, timezone from pathlib import Path -from fastapi import APIRouter, Depends, HTTPException, Request, Response, status +from fastapi import APIRouter, Depends, HTTPException, Response, status from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -794,7 +794,6 @@ async def generate_or_reset_api_key( @router.post("/{agent_id}/bridge-installer") async def download_bridge_installer( agent_id: uuid.UUID, - request: Request, platform: str, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), @@ -821,6 +820,25 @@ async def download_bridge_installer( if getattr(agent, "agent_type", "native") != "openclaw": raise HTTPException(status_code=400, detail="Bridge installer is only available for OpenClaw agents") + # Resolve server URL from configuration only. We deliberately do NOT fall + # back to the Host / X-Forwarded-Host header here: a malicious request + # could set those to an attacker-controlled hostname and the installer + # would bake it in, making the bridge dial home to the wrong server. + # Validate *before* mutating the DB so a misconfiguration doesn't leave + # the agent with a rotated key and no way to deliver the installer. + settings = get_settings() + http_base = (settings.PUBLIC_BASE_URL or "").strip().rstrip("/") + if not http_base: + raise HTTPException( + status_code=500, + detail=( + "PUBLIC_BASE_URL is not configured on the server. Set it to the " + "externally-reachable URL (e.g. https://clawith.example.com) " + "before downloading a bridge installer." + ), + ) + ws_url = derive_ws_url(http_base) + # Regenerate the key (same pattern as /{agent_id}/api-key). This invalidates # any previously-downloaded installer. raw_key = f"oc-{secrets.token_urlsafe(32)}" @@ -833,16 +851,6 @@ async def download_bridge_installer( await db.commit() - # Resolve server URL. Prefer the configured PUBLIC_BASE_URL; fall back to - # the request's Host header (useful for dev / local testing). - settings = get_settings() - http_base = (settings.PUBLIC_BASE_URL or "").rstrip("/") - if not http_base: - forwarded_proto = request.headers.get("x-forwarded-proto", request.url.scheme) - forwarded_host = request.headers.get("x-forwarded-host", request.headers.get("host", f"{request.url.hostname}:{request.url.port or 80}")) - http_base = f"{forwarded_proto}://{forwarded_host}" - ws_url = derive_ws_url(http_base) - try: payload, filename, content_type = render_installer( platform=platform, # type: ignore[arg-type] diff --git a/backend/app/api/bridge_ws.py b/backend/app/api/bridge_ws.py index 4bfba3dc5..a3236b072 100644 --- a/backend/app/api/bridge_ws.py +++ b/backend/app/api/bridge_ws.py @@ -188,19 +188,3 @@ async def _close_with_error(ws: WebSocket, message: str, code: int = 4000) -> No await ws.close(code=code) except Exception: pass - - -# ── Admin helper ────────────────────────────────────────────────────── - -@router.get("/api/admin/bridge/status") -async def bridge_status(): - """Return currently-connected bridges (for ops visibility). - - No auth enforcement here — keep behind existing admin-router auth - in a future iteration. V1 is local-first, so this is for debug only. - """ - connected = dispatcher.list_connected() - return { - "count": len(connected), - "bridges": [dispatcher.get_bridge_info(a) for a in connected], - } From dfe781f76d7e5e7e95e47c6c8df0bed3f5e5b1e2 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 13:06:04 +0800 Subject: [PATCH 07/15] test: add bridge-scoped tests for installer, schema, and enum migrations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three focused test files covering the code paths most likely to silently regress the bridge feature: - test_bridge_installer_template.py: render_installer(adapter=...) with each of claude_code/openclaw/hermes produces a TOML section block with exactly that adapter's `enabled = true` and the others `false`; unknown adapters fall back to claude_code; agent_name with CR/LF is flattened so it can't escape the `# Agent:` comment line. - test_agent_schema_bridge_adapter.py: AgentCreate/AgentUpdate reject typos, wrong case, wrong separators, and injection-style values. - test_bridge_migrations_idempotent.py: loads each enum migration by file path (alembic/versions isn't a package), mocks op.execute, and asserts every emitted statement carries ADD VALUE IF NOT EXISTS — the clause that lets the backend boot re-run migrations safely on every container restart. Also cross-checks that every bridge action type logged by the session code is present in the enum. Also adds the same regex constraint to AgentCreate.bridge_adapter that was already on AgentUpdate.bridge_adapter — previously Create accepted arbitrary strings, so a typo could end up in the DB and only surface at chat-time when the dispatcher couldn't find a matching adapter. Co-Authored-By: Claude Opus 4.7 --- backend/app/schemas/schemas.py | 6 +- .../tests/test_agent_schema_bridge_adapter.py | 67 ++++++++++ .../tests/test_bridge_installer_template.py | 117 ++++++++++++++++++ .../test_bridge_migrations_idempotent.py | 99 +++++++++++++++ 4 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 backend/tests/test_agent_schema_bridge_adapter.py create mode 100644 backend/tests/test_bridge_installer_template.py create mode 100644 backend/tests/test_bridge_migrations_idempotent.py diff --git a/backend/app/schemas/schemas.py b/backend/app/schemas/schemas.py index 59c540363..9ca0b607e 100644 --- a/backend/app/schemas/schemas.py +++ b/backend/app/schemas/schemas.py @@ -205,7 +205,11 @@ class UserUpdate(BaseModel): class AgentCreate(BaseModel): name: str = Field(min_length=2, max_length=100, description="Agent name, 2-100 characters") agent_type: str = "native" # native | openclaw - bridge_adapter: str | None = None # claude_code | openclaw | hermes (only for agent_type=openclaw) + bridge_adapter: str | None = Field( + default=None, + pattern="^(claude_code|openclaw|hermes)$", + description="claude_code | openclaw | hermes (only for agent_type=openclaw)", + ) role_description: str = Field(default="", max_length=500, description="Role description, max 500 characters") bio: str | None = None welcome_message: str | None = None diff --git a/backend/tests/test_agent_schema_bridge_adapter.py b/backend/tests/test_agent_schema_bridge_adapter.py new file mode 100644 index 000000000..48b30cb5e --- /dev/null +++ b/backend/tests/test_agent_schema_bridge_adapter.py @@ -0,0 +1,67 @@ +"""Pydantic-level tests for the bridge_adapter field on AgentCreate/AgentUpdate. + +The field is constrained by a regex pattern to prevent arbitrary strings +from reaching the DB — a typo like "claud_code" would silently save and +only blow up at chat-time when the dispatcher couldn't find a matching +adapter. +""" + +import pytest +from pydantic import ValidationError + +from app.schemas.schemas import AgentCreate, AgentUpdate + + +_VALID = ["claude_code", "openclaw", "hermes"] +_INVALID = [ + "claud_code", # typo + "CLAUDE_CODE", # wrong case — regex is case-sensitive + "claude-code", # wrong separator + "random", + " claude_code", # leading whitespace + "claude_code ", # trailing whitespace + "", # empty string + "claude_code;drop", # injection-style +] + + +# ── AgentUpdate.bridge_adapter ───────────────────────────────────────── + + +@pytest.mark.parametrize("value", _VALID) +def test_agent_update_accepts_valid_adapter(value): + m = AgentUpdate(bridge_adapter=value) + assert m.bridge_adapter == value + + +@pytest.mark.parametrize("value", _INVALID) +def test_agent_update_rejects_invalid_adapter(value): + with pytest.raises(ValidationError): + AgentUpdate(bridge_adapter=value) + + +def test_agent_update_allows_none(): + # None is the "don't change it" sentinel — must not trip the regex. + m = AgentUpdate(bridge_adapter=None) + assert m.bridge_adapter is None + + +def test_agent_update_allows_field_absent(): + # Equivalent to None — exclude_unset semantics in the route rely on this. + m = AgentUpdate() + assert m.bridge_adapter is None + + +# ── AgentCreate.bridge_adapter (same pattern) ────────────────────────── + + +@pytest.mark.parametrize("value", _VALID) +def test_agent_create_accepts_valid_adapter(value): + m = AgentCreate(name="test", bridge_adapter=value) + assert m.bridge_adapter == value + + +@pytest.mark.parametrize("value", _INVALID) +def test_agent_create_rejects_invalid_adapter(value): + with pytest.raises(ValidationError): + AgentCreate(name="test", bridge_adapter=value) diff --git a/backend/tests/test_bridge_installer_template.py b/backend/tests/test_bridge_installer_template.py new file mode 100644 index 000000000..f2f0e38e2 --- /dev/null +++ b/backend/tests/test_bridge_installer_template.py @@ -0,0 +1,117 @@ +"""Tests for bridge installer template rendering. + +The server-side installer template is the only place that decides which +bridge adapter (claude_code / openclaw / hermes) will be enabled on the +user's machine. A regression here would silently put the wrong runtime +into `~/.clawith-bridge.toml` and the bridge would advertise an adapter +the agent isn't configured for — visible only as a chat-time error. +""" + +import pytest + +from app.services.local_agent.installer_templates import ( + _KNOWN_ADAPTERS, + _adapter_enabled_flags, + derive_ws_url, + render_installer, +) + + +# ── _adapter_enabled_flags ───────────────────────────────────────────── + + +@pytest.mark.parametrize("adapter,expected_true", [ + ("claude_code", "cc_enabled"), + ("openclaw", "oc_enabled"), + ("hermes", "hm_enabled"), +]) +def test_adapter_flags_exactly_one_true(adapter, expected_true): + flags = _adapter_enabled_flags(adapter) + assert flags[expected_true] == "true" + for k, v in flags.items(): + if k != expected_true: + assert v == "false", f"expected {k}=false for adapter={adapter}, got {v}" + + +def test_adapter_flags_unknown_defaults_to_claude_code(): + # Defensive: if a caller passes a stale/unknown name we should not + # end up with every adapter disabled — fall back to claude_code. + flags = _adapter_enabled_flags("not_a_real_adapter") + assert flags["cc_enabled"] == "true" + assert flags["oc_enabled"] == "false" + assert flags["hm_enabled"] == "false" + + +# ── Unix shell template (linux/macos) ────────────────────────────────── + + +@pytest.mark.parametrize("adapter", ["claude_code", "openclaw", "hermes"]) +def test_render_installer_linux_only_selected_adapter_enabled(adapter): + payload, filename, content_type = render_installer( + platform="linux", + server_url="ws://localhost:8000", + api_key="oc-test-key", + agent_name="test-agent", + adapter=adapter, + ) + + script = payload.decode("utf-8") + assert filename == "install-clawith-bridge.sh" + assert content_type.startswith("text/x-shellscript") + + sections = { + "claude_code": "[claude_code]\nenabled = ", + "hermes": "[hermes]\nenabled = ", + "openclaw": "[openclaw]\nenabled = ", + } + for name, header in sections.items(): + expected = "true" if name == adapter else "false" + line = header + expected + assert line in script, f"expected {line!r} in generated TOML for adapter={adapter}" + + +def test_render_installer_agent_name_newlines_stripped(): + # agent_name lands in a bash `# Agent: ...` comment. CR/LF in a + # user-controlled name must be replaced so the comment can't escape + # onto a new line that bash would execute. + payload, _, _ = render_installer( + platform="linux", + server_url="ws://localhost:8000", + api_key="oc-x", + agent_name="evil\nrm -rf /\r\necho pwned", + adapter="claude_code", + ) + script = payload.decode("utf-8") + + # Locate the single line containing "# Agent: " and verify the entire + # injected payload is flattened onto that one line — no stray \n or \r. + agent_line = next(line for line in script.splitlines() if line.startswith("# Agent:")) + assert "rm -rf" in agent_line # sanity: content survived + assert "\n" not in agent_line # splitlines guarantees this, kept for intent + assert "\r" not in agent_line + + +# ── derive_ws_url ────────────────────────────────────────────────────── + + +@pytest.mark.parametrize("http_base,expected", [ + ("http://localhost:8000", "ws://localhost:8000"), + ("https://clawith.ai", "wss://clawith.ai"), + ("https://clawith.ai:443", "wss://clawith.ai:443"), + ("ws://already", "ws://already"), + ("wss://already", "wss://already"), +]) +def test_derive_ws_url(http_base, expected): + assert derive_ws_url(http_base) == expected + + +# ── sanity: _KNOWN_ADAPTERS matches the schema regex ─────────────────── + + +def test_known_adapters_matches_schema_regex(): + # If someone adds a new adapter here but forgets to update + # schemas.AgentCreate / AgentUpdate's regex, PATCH will 422. + from app.schemas.schemas import AgentUpdate + + for adapter in _KNOWN_ADAPTERS: + AgentUpdate(bridge_adapter=adapter) # must not raise diff --git a/backend/tests/test_bridge_migrations_idempotent.py b/backend/tests/test_bridge_migrations_idempotent.py new file mode 100644 index 000000000..76d986b9c --- /dev/null +++ b/backend/tests/test_bridge_migrations_idempotent.py @@ -0,0 +1,99 @@ +"""Regression tests for the bridge enum migrations. + +`ALTER TYPE ... ADD VALUE` on PostgreSQL is NOT transactional and NOT +idempotent without the `IF NOT EXISTS` clause. We run migrations on +every backend startup, so dropping that clause would make the backend +fail to boot on every restart after the first — a silent footgun that's +easy to introduce by copy-pasting. + +Instead of spinning up a real Postgres here, we mock `op.execute` to +capture every SQL string the migration emits, then assert the safety +clauses are there. +""" + +import importlib.util +import sys +from pathlib import Path +from unittest import mock + +import pytest + + +# `alembic/versions` is not an importable package (no __init__.py), so we +# load each migration file directly by path. +_VERSIONS_DIR = Path(__file__).resolve().parent.parent / "alembic" / "versions" + +_MIGRATIONS_UNDER_TEST = [ + ("add_bridge_activity_enum", 2), + ("add_bridge_session_enum", 6), +] + + +def _load(name: str): + path = _VERSIONS_DIR / f"{name}.py" + spec = importlib.util.spec_from_file_location(f"_test_mig_{name}", path) + mod = importlib.util.module_from_spec(spec) # type: ignore[arg-type] + sys.modules[spec.name] = mod # type: ignore[union-attr] + spec.loader.exec_module(mod) # type: ignore[union-attr] + return mod + + +@pytest.mark.parametrize("module_path,expected_stmts", _MIGRATIONS_UNDER_TEST) +def test_upgrade_emits_only_idempotent_alters(module_path, expected_stmts): + mod = _load(module_path) + with mock.patch.object(mod.op, "execute") as m_exec: + mod.upgrade() + + calls = [c.args[0] for c in m_exec.call_args_list] + assert len(calls) == expected_stmts, ( + f"{module_path}.upgrade() emitted {len(calls)} statements; " + f"expected {expected_stmts}. If you added a new enum value, " + "bump the expected count here." + ) + + for sql in calls: + # Every statement must be a no-op when the value already exists — + # that's what lets us replay the migration on every backend boot. + assert sql.startswith("ALTER TYPE "), f"unexpected non-enum stmt: {sql!r}" + assert "ADD VALUE IF NOT EXISTS" in sql, ( + f"migration statement is not idempotent (missing " + f"'IF NOT EXISTS'): {sql!r}" + ) + + +@pytest.mark.parametrize("module_path,_n", _MIGRATIONS_UNDER_TEST) +def test_upgrade_can_be_invoked_twice(module_path, _n): + # Because every statement carries IF NOT EXISTS, calling upgrade() + # twice back-to-back must hit op.execute twice the emit count + # without raising. + mod = _load(module_path) + with mock.patch.object(mod.op, "execute") as m_exec: + mod.upgrade() + mod.upgrade() + assert m_exec.call_count >= 2 + + +@pytest.mark.parametrize("module_path,_n", _MIGRATIONS_UNDER_TEST) +def test_downgrade_is_a_no_op(module_path, _n): + # Postgres can't remove enum values; the migration doc acknowledges + # this and downgrade() must not emit DDL that would fail. + mod = _load(module_path) + with mock.patch.object(mod.op, "execute") as m_exec: + mod.downgrade() + assert m_exec.call_count == 0 + + +def test_session_enum_covers_all_bridge_session_events(): + # Mirror check: every action_type produced by the bridge session code + # must be present in the enum migration. If someone introduces a new + # event type (e.g. local_session_timeout) this test will remind them + # to migrate the enum before shipping. + from app.models.activity_log import AgentActivityLog + + enum_values = AgentActivityLog.__table__.c.action_type.type.enums + assert set(enum_values).issuperset({ + "bridge_attached", "bridge_detached", + "bridge_installer_download", + "local_session_start", "local_session_done", "local_session_error", + "reverse_tool_call", "reverse_tool_result", + }) From 25743ba64ec568392577d886be62f5796199c312 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 13:37:28 +0800 Subject: [PATCH 08/15] =?UTF-8?q?fix(bridge):=20address=20Codex=20P1s=20?= =?UTF-8?q?=E2=80=94=20rotation=20rollback=20+=20subprocess=20timeout=20ki?= =?UTF-8?q?ll?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two independent error-path bugs flagged in the PR review, both visible only when something else has already gone wrong, both leaving the system in a wedged state with no user-driven recovery: 1. bridge-installer: defer API key rotation until render_installer() succeeds. The Windows exe ships bundled in the backend image; if an operator forgets to build it, render_installer raises FileNotFoundError and we return 503. But we used to commit the rotated key BEFORE building, so that 503 left the caller's already-running bridge with a dead token and no installer — the only recovery was a manual admin key reset. Now we generate the candidate key, build the payload, and only then commit the rotation + bridge_mode auto-enable. 2. SubprocessAdapter: on timeout, the finally block did `await proc.wait()` without first terminating the child. If the CLI kept running past timeout_s (e.g. hanging on network I/O), `wait()` blocked forever and the session slot stayed held until session_manager's outer cancel fired — i.e. timeout_s wasn't actually an upper bound. Extracted the terminate→grace→kill sequence into `_terminate_proc()` and call it from both the streaming finally block and cancel(), so timeout_s is now an actual bound regardless of what the child is doing. Note: the bridge-side fix changes code that gets baked into the bundled Windows exe (`backend/app/static/bridge/clawith-bridge.exe`). That exe will need to be rebuilt + dropped back in before the fix reaches end users on Windows; the source fix is what's under review here. Co-Authored-By: Claude Opus 4.7 --- backend/app/api/agents.py | 24 ++++++++------ bridge/clawith_bridge/adapters/base.py | 44 +++++++++++++++++++------- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/backend/app/api/agents.py b/backend/app/api/agents.py index 9fe329046..0ff7bd2d5 100644 --- a/backend/app/api/agents.py +++ b/backend/app/api/agents.py @@ -839,17 +839,12 @@ async def download_bridge_installer( ) ws_url = derive_ws_url(http_base) - # Regenerate the key (same pattern as /{agent_id}/api-key). This invalidates - # any previously-downloaded installer. + # Generate a candidate key but DO NOT persist it yet. If the installer + # build fails downstream (e.g. bundled Windows exe missing → 503), we + # must not have invalidated the bridge's currently-working token — + # otherwise the user is left with a dead bridge AND no usable + # installer, and the only recovery is an admin manual reset. raw_key = f"oc-{secrets.token_urlsafe(32)}" - agent.api_key_hash = hashlib.sha256(raw_key.encode()).hexdigest() - - # Auto-enable bridge_mode if currently disabled — the user is clearly trying - # to set up a bridge, so the disabled mode would just reject their connection. - if getattr(agent, "bridge_mode", "disabled") == "disabled": - agent.bridge_mode = "enabled" - - await db.commit() try: payload, filename, content_type = render_installer( @@ -861,8 +856,17 @@ async def download_bridge_installer( ) except FileNotFoundError as e: # Bundled Windows exe missing — operator needs to build & drop it in. + # Key is still the old one; existing bridges stay connected. raise HTTPException(status_code=503, detail=str(e)) from e + # Build succeeded — now it's safe to rotate the key and enable bridge mode. + agent.api_key_hash = hashlib.sha256(raw_key.encode()).hexdigest() + if getattr(agent, "bridge_mode", "disabled") == "disabled": + # Auto-enable bridge_mode if currently disabled — the user is clearly + # trying to set up a bridge, so the disabled mode would just reject it. + agent.bridge_mode = "enabled" + await db.commit() + # Audit log (best-effort) try: from app.services.activity_logger import log_activity diff --git a/bridge/clawith_bridge/adapters/base.py b/bridge/clawith_bridge/adapters/base.py index 6a824bc10..25c120ca3 100644 --- a/bridge/clawith_bridge/adapters/base.py +++ b/bridge/clawith_bridge/adapters/base.py @@ -200,26 +200,46 @@ async def _watcher() -> None: for r in readers: r.cancel() watcher.cancel() - try: - await proc.wait() - except Exception: - pass - - async def cancel(self, session_id: str, reason: str) -> None: - proc = self._procs.get(session_id) - if proc is None or proc.returncode is not None: + # If we exited the loop because of timeout (or any other + # break above), the child may still be running. Without an + # explicit terminate here, `proc.wait()` would block past + # timeout_s and the session slot stays held until some + # outer cancel fires. Bound the wait with terminate→kill. + await self._terminate_proc(proc) + + async def _terminate_proc(self, proc: asyncio.subprocess.Process) -> None: + """Ensure `proc` has exited. Idempotent; safe if proc already exited.""" + if proc.returncode is not None: return try: proc.terminate() except ProcessLookupError: return + except Exception: + pass try: await asyncio.wait_for(proc.wait(), timeout=self.KILL_GRACE_SEC) + return except asyncio.TimeoutError: - try: - proc.kill() - except ProcessLookupError: - pass + pass + except Exception: + return + try: + proc.kill() + except ProcessLookupError: + return + except Exception: + pass + try: + await proc.wait() + except Exception: + pass + + async def cancel(self, session_id: str, reason: str) -> None: + proc = self._procs.get(session_id) + if proc is None: + return + await self._terminate_proc(proc) async def _cleanup(self, session_id: str) -> None: self._procs.pop(session_id, None) From 824a88f463309e2af4821544b4d528782fa56e0f Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 14:21:55 +0800 Subject: [PATCH 09/15] test: add endpoint coverage for bridge_adapter plumbing Covers three paths that shipped without regression coverage: - GET /agents/{id}/bridge-status response shape (native vs openclaw, connected vs disconnected, optional-field fallbacks) - PATCH /agents/{id} silent-drop guard for native agents - POST /agents/ default bridge_adapter resolution (claude_code for openclaw, NULL for native even if client sends a value) --- .../tests/test_bridge_adapter_endpoints.py | 404 ++++++++++++++++++ 1 file changed, 404 insertions(+) create mode 100644 backend/tests/test_bridge_adapter_endpoints.py diff --git a/backend/tests/test_bridge_adapter_endpoints.py b/backend/tests/test_bridge_adapter_endpoints.py new file mode 100644 index 000000000..98c215e7b --- /dev/null +++ b/backend/tests/test_bridge_adapter_endpoints.py @@ -0,0 +1,404 @@ +"""Endpoint-level tests for the `bridge_adapter` plumbing. + +Covers three code paths that existed with no regression coverage: + +1. `GET /agents/{id}/bridge-status` — response shape for native agents + (not applicable), openclaw without a bridge (disconnected), and + openclaw with a bridge connected. Ensures auth goes through + `check_agent_access` like the rest of the agent endpoints. + +2. `PATCH /agents/{id}` — silently drops `bridge_adapter` when the + agent is not openclaw, and applies it when it is. The "silent drop" + behavior is load-bearing for generic bulk-update flows; if we + errored instead, the UI would have to branch on agent type before + every PATCH. + +3. `POST /agents/` — defaults `bridge_adapter` to `"claude_code"` for + openclaw agents when the caller omits it, respects an explicit + value when provided, and leaves it NULL for native agents even if + the caller mistakenly sends one. +""" + +import uuid +from datetime import datetime, timezone + +import pytest + +from app.api import agents as agents_api +from app.models.agent import Agent +from app.models.user import User +from app.schemas.schemas import AgentCreate, AgentUpdate + + +# ── Test doubles ──────────────────────────────────────────────────────── + +class DummyResult: + def __init__(self, values=None): + self._values = list(values or []) + + def scalar_one_or_none(self): + return self._values[0] if self._values else None + + def scalars(self): + return self + + def all(self): + return list(self._values) + + +def _populate_agent_server_defaults(agent: Agent) -> None: + """Mimic Postgres server-side defaults on un-flushed Agent instances + so `AgentOut.model_validate(agent)` downstream doesn't choke on None + values for fields declared as `int` / `dict` / `datetime` in the + Pydantic schema. + """ + if agent.id is None: + agent.id = uuid.uuid4() + if agent.tokens_used_today is None: + agent.tokens_used_today = 0 + if agent.tokens_used_month is None: + agent.tokens_used_month = 0 + if agent.tokens_used_total is None: + agent.tokens_used_total = 0 + if agent.llm_calls_today is None: + agent.llm_calls_today = 0 + if agent.max_llm_calls_per_day is None: + agent.max_llm_calls_per_day = 100 + if agent.max_triggers is None: + agent.max_triggers = 20 + if agent.min_poll_interval_min is None: + agent.min_poll_interval_min = 5 + if agent.webhook_rate_limit is None: + agent.webhook_rate_limit = 5 + if agent.heartbeat_enabled is None: + agent.heartbeat_enabled = True + if agent.heartbeat_interval_minutes is None: + agent.heartbeat_interval_minutes = 240 + if agent.heartbeat_active_hours is None: + agent.heartbeat_active_hours = "09:00-18:00" + if agent.max_tool_rounds is None: + agent.max_tool_rounds = 50 + if agent.context_window_size is None: + agent.context_window_size = 100 + if agent.is_expired is None: + agent.is_expired = False + if agent.autonomy_policy is None: + agent.autonomy_policy = {} + if agent.created_at is None: + agent.created_at = datetime.now(timezone.utc) + if agent.bridge_mode is None: + agent.bridge_mode = "disabled" + if agent.status is None: + agent.status = "idle" + + +class RecordingDB: + def __init__(self, responses=None): + self.responses = list(responses or []) + self.added: list[object] = [] + self.committed = False + self.flush_count = 0 + + async def execute(self, statement, params=None): + if self.responses: + return self.responses.pop(0) + return DummyResult() + + def add(self, obj): + self.added.append(obj) + if isinstance(obj, Agent): + _populate_agent_server_defaults(obj) + + async def flush(self): + self.flush_count += 1 + + async def commit(self): + self.committed = True + + +def make_user(**overrides): + # Note: username/email/password_hash on User are association_proxy + # fields that delegate to Identity — setting them on a fresh User + # without a bound Identity raises. The tests below don't read those + # fields, so we just leave them off. + values = { + "id": uuid.uuid4(), + "display_name": "Alice", + "role": "member", + "tenant_id": None, + "is_active": True, + "quota_agent_ttl_hours": 48, + } + values.update(overrides) + return User(**values) + + +def make_agent(creator_id: uuid.UUID, **overrides): + values = { + "id": uuid.uuid4(), + "name": "Ops Bot", + "role_description": "assistant", + "creator_id": creator_id, + "status": "idle", + "agent_type": "native", + } + values.update(overrides) + agent = Agent(**values) + _populate_agent_server_defaults(agent) + return agent + + +# ── GET /agents/{id}/bridge-status ────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_bridge_status_not_applicable_for_native_agent(monkeypatch): + user = make_user() + agent = make_agent(user.id, agent_type="native") + + async def fake_check(_db, _user, _aid): + return agent, "use" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + + result = await agents_api.get_bridge_status( + agent_id=agent.id, + current_user=user, + db=RecordingDB(), + ) + assert result == {"connected": False, "applicable": False} + + +@pytest.mark.asyncio +async def test_bridge_status_disconnected_for_openclaw_without_bridge(monkeypatch): + user = make_user() + agent = make_agent(user.id, agent_type="openclaw", bridge_adapter="claude_code") + + async def fake_check(_db, _user, _aid): + return agent, "use" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + + from app.services.local_agent import session_dispatcher as sd + + monkeypatch.setattr(sd.dispatcher, "get_bridge_info", lambda _aid: None) + + result = await agents_api.get_bridge_status( + agent_id=agent.id, + current_user=user, + db=RecordingDB(), + ) + assert result == {"connected": False, "applicable": True} + + +@pytest.mark.asyncio +async def test_bridge_status_connected_returns_full_shape(monkeypatch): + user = make_user() + agent = make_agent(user.id, agent_type="openclaw", bridge_adapter="hermes") + + async def fake_check(_db, _user, _aid): + return agent, "use" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + + info = { + "bridge_version": "0.2.1", + "adapters": ["claude_code", "hermes"], + "connected_at": "2026-04-22T12:00:00Z", + "active_sessions": ["s1", "s2"], + } + from app.services.local_agent import session_dispatcher as sd + + monkeypatch.setattr(sd.dispatcher, "get_bridge_info", lambda _aid: info) + + result = await agents_api.get_bridge_status( + agent_id=agent.id, + current_user=user, + db=RecordingDB(), + ) + assert result["connected"] is True + assert result["applicable"] is True + assert result["bridge_version"] == "0.2.1" + assert result["adapters"] == ["claude_code", "hermes"] + assert result["connected_at"] == "2026-04-22T12:00:00Z" + # Adapter returns the *length* of active_sessions, not the list itself, + # so the UI can render "3 active sessions" without a second round-trip. + assert result["active_sessions"] == 2 + + +@pytest.mark.asyncio +async def test_bridge_status_handles_missing_optional_fields(monkeypatch): + # Bridges that registered without advertising adapters/version should + # still produce a well-formed response (empty list, None version) so + # the frontend doesn't need to defensively null-check each field. + user = make_user() + agent = make_agent(user.id, agent_type="openclaw") + + async def fake_check(_db, _user, _aid): + return agent, "use" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + + from app.services.local_agent import session_dispatcher as sd + + monkeypatch.setattr(sd.dispatcher, "get_bridge_info", lambda _aid: {}) + + result = await agents_api.get_bridge_status( + agent_id=agent.id, + current_user=user, + db=RecordingDB(), + ) + assert result["connected"] is True + assert result["adapters"] == [] + assert result["bridge_version"] is None + assert result["active_sessions"] == 0 + + +# ── PATCH /agents/{id} — bridge_adapter guard ────────────────────────── + + +@pytest.mark.asyncio +async def test_update_agent_drops_bridge_adapter_for_native(monkeypatch): + user = make_user() + agent = make_agent(user.id, agent_type="native", bridge_adapter=None) + + async def fake_check(_db, _user, _aid): + return agent, "manage" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + monkeypatch.setattr(agents_api, "is_agent_creator", lambda _u, _a: True) + + await agents_api.update_agent( + agent_id=agent.id, + data=AgentUpdate(bridge_adapter="hermes"), + current_user=user, + db=RecordingDB(), + ) + # Native agent: the field must not have been written. A silent drop + # is the contract — generic bulk update shouldn't need to know the + # agent type. + assert agent.bridge_adapter is None + + +@pytest.mark.asyncio +async def test_update_agent_applies_bridge_adapter_for_openclaw(monkeypatch): + user = make_user() + agent = make_agent(user.id, agent_type="openclaw", bridge_adapter="claude_code") + + async def fake_check(_db, _user, _aid): + return agent, "manage" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + monkeypatch.setattr(agents_api, "is_agent_creator", lambda _u, _a: True) + + await agents_api.update_agent( + agent_id=agent.id, + data=AgentUpdate(bridge_adapter="hermes"), + current_user=user, + db=RecordingDB(), + ) + assert agent.bridge_adapter == "hermes" + + +@pytest.mark.asyncio +async def test_update_agent_leaves_bridge_adapter_unchanged_when_absent(monkeypatch): + # exclude_unset semantics: a PATCH that doesn't mention bridge_adapter + # must not touch the existing value, regardless of agent type. + user = make_user() + agent = make_agent(user.id, agent_type="openclaw", bridge_adapter="hermes") + + async def fake_check(_db, _user, _aid): + return agent, "manage" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + monkeypatch.setattr(agents_api, "is_agent_creator", lambda _u, _a: True) + + await agents_api.update_agent( + agent_id=agent.id, + data=AgentUpdate(bio="new bio"), + current_user=user, + db=RecordingDB(), + ) + assert agent.bridge_adapter == "hermes" + assert agent.bio == "new bio" + + +# ── POST /agents — bridge_adapter defaults ───────────────────────────── + + +@pytest.fixture +def _stub_quota(monkeypatch): + from app.services import quota_guard + + async def fake_check(_user_id): + return None + + monkeypatch.setattr(quota_guard, "check_agent_creation_quota", fake_check) + + +@pytest.mark.asyncio +async def test_create_openclaw_defaults_bridge_adapter_to_claude_code(_stub_quota): + user = make_user() + db = RecordingDB() + data = AgentCreate( + name="TestOpenClaw", + role_description="", + agent_type="openclaw", + # bridge_adapter intentionally omitted + ) + + await agents_api.create_agent(data=data, current_user=user, db=db) + + added_agents = [obj for obj in db.added if isinstance(obj, Agent)] + assert len(added_agents) == 1 + assert added_agents[0].agent_type == "openclaw" + assert added_agents[0].bridge_adapter == "claude_code" + + +@pytest.mark.asyncio +async def test_create_openclaw_respects_explicit_bridge_adapter(_stub_quota): + user = make_user() + db = RecordingDB() + data = AgentCreate( + name="TestHermes", + role_description="", + agent_type="openclaw", + bridge_adapter="hermes", + ) + + await agents_api.create_agent(data=data, current_user=user, db=db) + + added_agents = [obj for obj in db.added if isinstance(obj, Agent)] + assert added_agents[0].bridge_adapter == "hermes" + + +@pytest.mark.asyncio +async def test_create_native_leaves_bridge_adapter_null(_stub_quota, monkeypatch): + # Even if a misbehaving client sends bridge_adapter with a native + # agent, the backend must refuse to persist it — the field is only + # meaningful for bridge-style agents. + user = make_user() + db = RecordingDB() + + # Native path touches agent_manager; stub it so we don't hit the FS + # or spawn containers. + from app.services import agent_manager as am_module + + async def _noop(*_args, **_kwargs): + return None + + monkeypatch.setattr(am_module.agent_manager, "initialize_agent_files", _noop) + monkeypatch.setattr(am_module.agent_manager, "start_container", _noop) + + data = AgentCreate( + name="TestNative", + role_description="", + agent_type="native", + bridge_adapter="hermes", # client mistake + ) + + await agents_api.create_agent(data=data, current_user=user, db=db) + + added_agents = [obj for obj in db.added if isinstance(obj, Agent)] + assert added_agents[0].agent_type == "native" + assert added_agents[0].bridge_adapter is None From c6e26ed07faefd3e00aaf997b9db8dbbff269a61 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 15:07:23 +0800 Subject: [PATCH 10/15] feat(bridge): decouple installer download from API key rotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Downloading an installer no longer rotates the agent's API key. The plaintext key is now stored in `agents.api_key` alongside the existing hash; download reuses it, so re-downloading (for a different runtime, another platform, another machine) keeps any running bridge online. Legacy agents without plaintext mint + dual-write once on their first download — a one-time opportunistic upgrade preserved behind the same rollback-safe pattern as before (no DB mutation until render succeeds). Explicit rotation stays on `POST /agents/{id}/api-key`, which now dual-writes both columns so gateway's existing plaintext/hash fallback stays consistent. Tests: 5 new cases in `test_install_rotate_decoupling.py` covering reuse, legacy upgrade, render-failure rollback, explicit rotate dual-write, and openclaw-create dual-write. Frontend: removes the "download regenerates the key, bridge will disconnect" warning from OpenClawSettings; the existing "Regenerate API Key" section already carries that warning and remains the explicit rotation UI. Co-Authored-By: Claude Opus 4.7 --- backend/alembic/versions/add_agent_api_key.py | 35 ++ backend/app/api/agents.py | 61 ++- backend/app/models/agent.py | 5 + .../tests/test_install_rotate_decoupling.py | 406 ++++++++++++++++++ frontend/src/pages/OpenClawSettings.tsx | 10 +- 5 files changed, 497 insertions(+), 20 deletions(-) create mode 100644 backend/alembic/versions/add_agent_api_key.py create mode 100644 backend/tests/test_install_rotate_decoupling.py diff --git a/backend/alembic/versions/add_agent_api_key.py b/backend/alembic/versions/add_agent_api_key.py new file mode 100644 index 000000000..37ba40941 --- /dev/null +++ b/backend/alembic/versions/add_agent_api_key.py @@ -0,0 +1,35 @@ +"""Add plaintext api_key column to agents, alongside api_key_hash. + +This decouples installer download from key rotation. The existing design +stored only the sha256 hash, so every installer download had to mint a +fresh key (the plaintext needed to bake into the installer was otherwise +unrecoverable). That invalidated any bridge currently using the key — +a bad UX when the user just wants to re-download for a different runtime +on the same machine. + +Storing the plaintext alongside the hash lets download be idempotent: +reuse the existing plaintext if present, fall back to mint-and-store on +first download for legacy agents (nullable column, no backfill needed). +The hash stays for the legacy dual-path auth in gateway._get_agent_by_key. + +Revision ID: add_agent_api_key +Revises: add_bridge_session_enum +Create Date: 2026-04-22 +""" +from alembic import op + + +revision = "add_agent_api_key" +down_revision = "add_bridge_session_enum" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.execute( + "ALTER TABLE agents ADD COLUMN IF NOT EXISTS api_key VARCHAR(128)" + ) + + +def downgrade() -> None: + op.execute("ALTER TABLE agents DROP COLUMN IF EXISTS api_key") diff --git a/backend/app/api/agents.py b/backend/app/api/agents.py index 0ff7bd2d5..061173892 100644 --- a/backend/app/api/agents.py +++ b/backend/app/api/agents.py @@ -292,6 +292,7 @@ async def create_agent( # For OpenClaw agents: skip file system and container setup, generate API key if agent.agent_type == "openclaw": raw_key = f"oc-{secrets.token_urlsafe(32)}" + agent.api_key = raw_key agent.api_key_hash = hashlib.sha256(raw_key.encode()).hexdigest() agent.status = "idle" agent.bridge_mode = "enabled" @@ -785,6 +786,7 @@ async def generate_or_reset_api_key( raise HTTPException(status_code=400, detail="API keys are only available for OpenClaw agents") raw_key = f"oc-{secrets.token_urlsafe(32)}" + agent.api_key = raw_key agent.api_key_hash = hashlib.sha256(raw_key.encode()).hexdigest() await db.commit() @@ -798,12 +800,17 @@ async def download_bridge_installer( current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): - """Generate a fresh API key and return a platform-specific bridge installer script. + """Return a platform-specific bridge installer script for this agent. - Each download regenerates the agent's API key; previously-issued installers - (and any bridges still using the old key) will stop working. This is by - design — the key is the only secret in the installer, and short-lived - secrets limit blast radius if a user accidentally shares the file. + Download is idempotent against the API key: the agent's stored plaintext + key is baked into the installer and reused across downloads, so the user + can re-download the installer (for a different runtime, a different + platform, another machine) without rotating the key and kicking the + currently-running bridge offline. + + Legacy agents (created before the api_key column existed) mint and + persist a key on their first download — a one-time upgrade. Explicit + rotation is the separate POST /agents/{id}/api-key endpoint. """ from app.services.local_agent.installer_templates import ( derive_ws_url, @@ -839,12 +846,24 @@ async def download_bridge_installer( ) ws_url = derive_ws_url(http_base) - # Generate a candidate key but DO NOT persist it yet. If the installer - # build fails downstream (e.g. bundled Windows exe missing → 503), we - # must not have invalidated the bridge's currently-working token — - # otherwise the user is left with a dead bridge AND no usable - # installer, and the only recovery is an admin manual reset. - raw_key = f"oc-{secrets.token_urlsafe(32)}" + # Reuse the existing plaintext key if the agent has one — download is + # idempotent in that case (no rotation, no disconnection of the + # currently-running bridge). For legacy agents with only api_key_hash + # stored, mint a new key and dual-write on successful build; this is a + # one-time opportunistic upgrade. + # + # Either way, DO NOT persist anything until the installer build succeeds. + # If it fails (e.g. bundled Windows exe missing → 503), we must not have + # invalidated the bridge's currently-working token — otherwise the user + # is left with a dead bridge AND no usable installer, and the only + # recovery is an admin manual reset. + existing_plaintext = getattr(agent, "api_key", None) + if existing_plaintext: + raw_key = existing_plaintext + needs_persist = False + else: + raw_key = f"oc-{secrets.token_urlsafe(32)}" + needs_persist = True try: payload, filename, content_type = render_installer( @@ -859,13 +878,20 @@ async def download_bridge_installer( # Key is still the old one; existing bridges stay connected. raise HTTPException(status_code=503, detail=str(e)) from e - # Build succeeded — now it's safe to rotate the key and enable bridge mode. - agent.api_key_hash = hashlib.sha256(raw_key.encode()).hexdigest() + # Build succeeded — persist the key if we minted one (legacy upgrade), + # and enable bridge mode if not already. + mutated = False + if needs_persist: + agent.api_key = raw_key + agent.api_key_hash = hashlib.sha256(raw_key.encode()).hexdigest() + mutated = True if getattr(agent, "bridge_mode", "disabled") == "disabled": # Auto-enable bridge_mode if currently disabled — the user is clearly # trying to set up a bridge, so the disabled mode would just reject it. agent.bridge_mode = "enabled" - await db.commit() + mutated = True + if mutated: + await db.commit() # Audit log (best-effort) try: @@ -873,12 +899,17 @@ async def download_bridge_installer( await log_activity( agent_id=agent.id, action_type="bridge_installer_download", - summary=f"Bridge 安装器已下载 ({platform}),API Key 已重新生成", + summary=( + f"Bridge 安装器已下载 ({platform}),API Key 首次生成" + if needs_persist + else f"Bridge 安装器已下载 ({platform})" + ), detail={ "platform": platform, "user_id": str(current_user.id), "server_url": ws_url, "filename": filename, + "key_rotated": needs_persist, }, ) except Exception: # noqa: BLE001 diff --git a/backend/app/models/agent.py b/backend/app/models/agent.py index b4a7bac77..c7e3485cd 100644 --- a/backend/app/models/agent.py +++ b/backend/app/models/agent.py @@ -44,6 +44,11 @@ class Agent(Base): bridge_adapter: Mapped[str | None] = mapped_column(String(32)) # API key hash for OpenClaw gateway authentication api_key_hash: Mapped[str | None] = mapped_column(String(128)) + # Plaintext API key stored alongside the hash so installer downloads + # can reuse it without rotating — rotation is an explicit action. + # See gateway._get_agent_by_key for the dual-path auth that falls back + # to api_key_hash for legacy agents where this column is still NULL. + api_key: Mapped[str | None] = mapped_column(String(128)) # Last time OpenClaw polled the gateway (online status indicator) openclaw_last_seen: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) diff --git a/backend/tests/test_install_rotate_decoupling.py b/backend/tests/test_install_rotate_decoupling.py new file mode 100644 index 000000000..d0dd659cd --- /dev/null +++ b/backend/tests/test_install_rotate_decoupling.py @@ -0,0 +1,406 @@ +"""Tests for the install/rotate decoupling (agents.api_key dual storage). + +Pre-change behavior: every installer download minted a fresh API key and +overwrote `agents.api_key_hash`, which dropped any currently-connected +bridge using the old key. That made "re-download for a different runtime" +a destructive operation — the user had to race to the machine to replace +the installer before losing access. + +Post-change behavior: the agent's plaintext key is stored in +`agents.api_key`, and download reuses it. Explicit rotation is a separate +endpoint (POST /agents/{id}/api-key). + +These tests pin the contract: + +1. Download reuses the stored plaintext, does NOT rotate the hash. +2. Legacy agents (NULL api_key) get a plaintext minted + dual-written + on their first download — a one-time opportunistic upgrade. +3. The explicit rotate endpoint dual-writes both columns. +4. Creating an openclaw agent dual-writes both columns up front. +""" + +import hashlib +import uuid +from datetime import datetime, timezone + +import pytest + +from app.api import agents as agents_api +from app.models.agent import Agent +from app.models.user import User + + +# ── Test doubles (mirrored from test_bridge_adapter_endpoints.py) ─────── + + +class DummyResult: + def __init__(self, values=None): + self._values = list(values or []) + + def scalar_one_or_none(self): + return self._values[0] if self._values else None + + def scalars(self): + return self + + def all(self): + return list(self._values) + + +def _populate_agent_server_defaults(agent: Agent) -> None: + if agent.id is None: + agent.id = uuid.uuid4() + if agent.tokens_used_today is None: + agent.tokens_used_today = 0 + if agent.tokens_used_month is None: + agent.tokens_used_month = 0 + if agent.tokens_used_total is None: + agent.tokens_used_total = 0 + if agent.llm_calls_today is None: + agent.llm_calls_today = 0 + if agent.max_llm_calls_per_day is None: + agent.max_llm_calls_per_day = 100 + if agent.max_triggers is None: + agent.max_triggers = 20 + if agent.min_poll_interval_min is None: + agent.min_poll_interval_min = 5 + if agent.webhook_rate_limit is None: + agent.webhook_rate_limit = 5 + if agent.heartbeat_enabled is None: + agent.heartbeat_enabled = True + if agent.heartbeat_interval_minutes is None: + agent.heartbeat_interval_minutes = 240 + if agent.heartbeat_active_hours is None: + agent.heartbeat_active_hours = "09:00-18:00" + if agent.max_tool_rounds is None: + agent.max_tool_rounds = 50 + if agent.context_window_size is None: + agent.context_window_size = 100 + if agent.is_expired is None: + agent.is_expired = False + if agent.autonomy_policy is None: + agent.autonomy_policy = {} + if agent.created_at is None: + agent.created_at = datetime.now(timezone.utc) + if agent.bridge_mode is None: + agent.bridge_mode = "disabled" + if agent.status is None: + agent.status = "idle" + + +class RecordingDB: + def __init__(self, responses=None): + self.responses = list(responses or []) + self.added: list[object] = [] + self.commits = 0 + self.flush_count = 0 + + async def execute(self, statement, params=None): + if self.responses: + return self.responses.pop(0) + return DummyResult() + + def add(self, obj): + self.added.append(obj) + if isinstance(obj, Agent): + _populate_agent_server_defaults(obj) + + async def flush(self): + self.flush_count += 1 + + async def commit(self): + self.commits += 1 + + @property + def committed(self) -> bool: + return self.commits > 0 + + +def make_user(**overrides): + values = { + "id": uuid.uuid4(), + "display_name": "Alice", + "role": "member", + "tenant_id": None, + "is_active": True, + "quota_agent_ttl_hours": 48, + } + values.update(overrides) + return User(**values) + + +def make_agent(creator_id: uuid.UUID, **overrides): + values = { + "id": uuid.uuid4(), + "name": "Ops Bot", + "role_description": "assistant", + "creator_id": creator_id, + "status": "idle", + "agent_type": "openclaw", + } + values.update(overrides) + agent = Agent(**values) + _populate_agent_server_defaults(agent) + return agent + + +# ── download_bridge_installer — reuse path ────────────────────────────── + + +@pytest.fixture +def _download_env(monkeypatch): + """Stub PUBLIC_BASE_URL, render_installer, and log_activity so the + download endpoint runs end-to-end without touching the filesystem, + network, or database activity log. + """ + # PUBLIC_BASE_URL must be set or the endpoint 500s before doing anything. + from app import config as app_config + + def fake_settings(): + class S: + PUBLIC_BASE_URL = "https://clawith.example.com" + return S() + + monkeypatch.setattr(app_config, "get_settings", fake_settings) + + # Capture what api_key render_installer sees — this is the whole point + # of the test: the *baked* key should equal the stored plaintext, + # not a freshly-minted one. + captured: dict = {} + + def fake_render(*, platform, server_url, api_key, agent_name, adapter): + captured["api_key"] = api_key + captured["adapter"] = adapter + return (b"payload", "installer.sh", "application/x-sh") + + from app.services.local_agent import installer_templates + + monkeypatch.setattr(installer_templates, "render_installer", fake_render) + + # log_activity imports are done inside the endpoint; patch via module. + async def noop_log(**_kwargs): + return None + + from app.services import activity_logger + + monkeypatch.setattr(activity_logger, "log_activity", noop_log) + + return captured + + +@pytest.mark.asyncio +async def test_download_reuses_stored_plaintext_without_rotating( + monkeypatch, _download_env +): + user = make_user() + stored_plaintext = "oc-existing-plaintext-abc123" + stored_hash = hashlib.sha256(stored_plaintext.encode()).hexdigest() + agent = make_agent( + user.id, + agent_type="openclaw", + api_key=stored_plaintext, + api_key_hash=stored_hash, + bridge_mode="enabled", + ) + + async def fake_check(_db, _user, _aid): + return agent, "manage" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + monkeypatch.setattr(agents_api, "is_agent_creator", lambda _u, _a: True) + + db = RecordingDB() + await agents_api.download_bridge_installer( + agent_id=agent.id, + platform="linux", + current_user=user, + db=db, + ) + + # The installer must bake the *stored* key, not a freshly-minted one. + assert _download_env["api_key"] == stored_plaintext + # Neither column should have been touched. + assert agent.api_key == stored_plaintext + assert agent.api_key_hash == stored_hash + # And no commit, since nothing changed. + assert db.commits == 0 + + +@pytest.mark.asyncio +async def test_download_fills_plaintext_for_legacy_agent( + monkeypatch, _download_env +): + # Legacy agent: predates the api_key column, only has api_key_hash. + # First download opportunistically upgrades it by minting + dual-writing. + user = make_user() + legacy_hash = hashlib.sha256(b"legacy-unknown").hexdigest() + agent = make_agent( + user.id, + agent_type="openclaw", + api_key=None, + api_key_hash=legacy_hash, + bridge_mode="enabled", + ) + + async def fake_check(_db, _user, _aid): + return agent, "manage" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + monkeypatch.setattr(agents_api, "is_agent_creator", lambda _u, _a: True) + + db = RecordingDB() + await agents_api.download_bridge_installer( + agent_id=agent.id, + platform="linux", + current_user=user, + db=db, + ) + + minted = _download_env["api_key"] + assert minted.startswith("oc-") + # Both columns must have been written together. + assert agent.api_key == minted + assert agent.api_key_hash == hashlib.sha256(minted.encode()).hexdigest() + # Legacy hash is gone — this is the one-time upgrade; subsequent + # downloads take the reuse path. + assert agent.api_key_hash != legacy_hash + assert db.commits == 1 + + +@pytest.mark.asyncio +async def test_download_does_not_persist_when_render_fails(monkeypatch): + # If render_installer raises (e.g. bundled exe missing), the existing + # plaintext must not be touched and no commit may happen. This pins + # the rollback-safety invariant from the prior bug fix. + user = make_user() + stored_plaintext = "oc-existing-plaintext" + stored_hash = hashlib.sha256(stored_plaintext.encode()).hexdigest() + agent = make_agent( + user.id, + agent_type="openclaw", + api_key=stored_plaintext, + api_key_hash=stored_hash, + ) + + async def fake_check(_db, _user, _aid): + return agent, "manage" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + monkeypatch.setattr(agents_api, "is_agent_creator", lambda _u, _a: True) + + from app import config as app_config + + def fake_settings(): + class S: + PUBLIC_BASE_URL = "https://clawith.example.com" + return S() + + monkeypatch.setattr(app_config, "get_settings", fake_settings) + + def boom(**_kwargs): + raise FileNotFoundError("windows bundled exe missing") + + from app.services.local_agent import installer_templates + + monkeypatch.setattr(installer_templates, "render_installer", boom) + + from fastapi import HTTPException + + db = RecordingDB() + with pytest.raises(HTTPException) as excinfo: + await agents_api.download_bridge_installer( + agent_id=agent.id, + platform="windows", + current_user=user, + db=db, + ) + assert excinfo.value.status_code == 503 + assert agent.api_key == stored_plaintext + assert agent.api_key_hash == stored_hash + assert db.commits == 0 + + +# ── rotate — explicit dual-write ──────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_rotate_endpoint_writes_both_fields(monkeypatch): + # The /agents/{id}/api-key endpoint is the *explicit* rotation path. + # It must overwrite both columns together so the dual-path auth + # (gateway._get_agent_by_key, plaintext first then hash fallback) stays + # consistent across rotations. + user = make_user() + old_plaintext = "oc-old-abc" + old_hash = hashlib.sha256(old_plaintext.encode()).hexdigest() + agent = make_agent( + user.id, + agent_type="openclaw", + api_key=old_plaintext, + api_key_hash=old_hash, + ) + + async def fake_check(_db, _user, _aid): + return agent, "manage" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + monkeypatch.setattr(agents_api, "is_agent_creator", lambda _u, _a: True) + + db = RecordingDB() + result = await agents_api.generate_or_reset_api_key( + agent_id=agent.id, + current_user=user, + db=db, + ) + + new_plaintext = result["api_key"] + assert new_plaintext.startswith("oc-") + assert new_plaintext != old_plaintext + assert agent.api_key == new_plaintext + assert agent.api_key_hash == hashlib.sha256(new_plaintext.encode()).hexdigest() + assert agent.api_key_hash != old_hash + assert db.commits == 1 + + +# ── create_agent — openclaw dual-write on creation ────────────────────── + + +@pytest.fixture +def _stub_quota(monkeypatch): + from app.services import quota_guard + + async def fake_check(_user_id): + return None + + monkeypatch.setattr(quota_guard, "check_agent_creation_quota", fake_check) + + +@pytest.mark.asyncio +async def test_create_openclaw_agent_stores_plaintext_key(_stub_quota): + # Creating an openclaw agent must write both columns so that the + # FIRST installer download can take the fast reuse path — without + # this, every freshly-created agent would immediately hit the legacy + # upgrade branch on first download. + from app.schemas.schemas import AgentCreate + + user = make_user() + db = RecordingDB() + data = AgentCreate( + name="FreshOpenClaw", + role_description="", + agent_type="openclaw", + ) + + result = await agents_api.create_agent(data=data, current_user=user, db=db) + + added_agents = [obj for obj in db.added if isinstance(obj, Agent)] + assert len(added_agents) == 1 + created = added_agents[0] + assert created.agent_type == "openclaw" + # Both columns populated at creation time. + assert created.api_key is not None + assert created.api_key.startswith("oc-") + assert created.api_key_hash == hashlib.sha256( + created.api_key.encode() + ).hexdigest() + # The one-time plaintext returned to the caller matches what's stored. + assert result["api_key"] == created.api_key diff --git a/frontend/src/pages/OpenClawSettings.tsx b/frontend/src/pages/OpenClawSettings.tsx index 557a4031f..58cd8ad07 100644 --- a/frontend/src/pages/OpenClawSettings.tsx +++ b/frontend/src/pages/OpenClawSettings.tsx @@ -444,15 +444,15 @@ export default function OpenClawSettings({ agent, agentId }: OpenClawSettingsPro })}

- {/* Warning about key regeneration */} + {/* Hint: download is idempotent — does NOT rotate the key */}
{isChinese - ? '⚠ 每次下载都会重新生成 API Key,已在运行的 bridge 会断开。需要重新分发安装器给多台机器时请一次性配置完。' - : '⚠ Each download regenerates the API Key; any running bridge will disconnect. Set up all machines from a single download.'} + ? '下载不会重置 API Key,已在运行的 bridge 保持在线。要撤销旧 Key 请用上方的"重新生成 API Key"。' + : 'Downloading does NOT rotate the API Key — any running bridge stays online. Use "Regenerate API Key" above to revoke the old key.'}
{/* Download action */} @@ -473,7 +473,7 @@ export default function OpenClawSettings({ agent, agentId }: OpenClawSettingsPro background: 'rgba(99,102,241,0.04)', border: '1px solid var(--border-subtle)', }}>
- {isChinese ? '确认下载并重新生成 API Key?' : 'Confirm download and regenerate API Key?'} + {isChinese ? '确认下载安装器?' : 'Confirm download?'}
)} -
+ {!bridgeAdapter && ( +
+ {t('wizard.runtime.pickLocalHint', '请从上方选择一个本地 Agent 类型以继续。')} +
+ )} + + {bridgeAdapter &&

{bridgeAdapter === 'claude_code' ? t('wizard.bridge.titleClaudeCode', 'Link Claude Code Agent') @@ -552,7 +589,7 @@ For humans, the message is delivered via their available channel (e.g. Feishu).` {createMutation.isPending ? t('common.loading') : t('openclaw.createBtn', 'Link Agent')}

-
+
}
); } From 9cd7b74989c5059dea0cff7b7a7d1a199f393b57 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 18:06:30 +0800 Subject: [PATCH 13/15] feat(bridge): real OpenClaw adapter via ACP stdio subprocess MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior OpenClaw adapter was a placeholder that assumed an HTTP/SSE daemon at :9000 — OpenClaw's actual runtime is a WebSocket JSON-RPC gateway at :18789 with Ed25519 device-challenge auth. Rather than re-implementing all of that, we now spawn `openclaw acp`, the CLI subcommand that wraps the gateway and speaks Agent Client Protocol (JSON-RPC 2.0) over stdio. OpenClaw handles the auth/framing internally; we just route messages. - new: ACPSubprocessAdapter base in adapters/acp_base.py — JSON-RPC read/write loop, request/response correlation, session/update notification mapping to Clawith EventKind, agent-initiated request handling (auto-approves session/request_permission so prompts can proceed) - rewrite: OpenClawAdapter now extends ACPSubprocessAdapter with a thin build_acp_argv that threads config.extra (url, token_file, session_label, provenance, verbose) into `openclaw acp` flags - fix: DaemonAdapter.start_session now re-raises after emitting the diagnostic stderr_chunk. Before, a daemon connection failure silently yielded and returned — the session manager interpreted it as a clean completion and sent SessionDoneFrame(exit=0, final=""), presenting as a green status and an empty reply. Re-raising surfaces session.error instead. (Hermes still uses DaemonAdapter — this fix turns its current silent-empty-reply UX into a clear error until that adapter is also rewritten.) - Windows .cmd shim handling generalized in acp_base.resolve_stdio_executable, parallel to claude_code.py's resolver - 25 tests covering notification mapping, executable resolution, end-to-end subprocess round-trip with a fake ACP agent, and daemon-failure regression Hermes is intentionally left on its placeholder this round — can't be E2E verified without a local Hermes install. Will follow in a separate PR. --- bridge/clawith_bridge/adapters/acp_base.py | 552 ++++++++++++++++++ bridge/clawith_bridge/adapters/base.py | 5 +- bridge/clawith_bridge/adapters/openclaw.py | 148 ++--- bridge/pyproject.toml | 4 + bridge/tests/__init__.py | 0 bridge/tests/conftest.py | 8 + bridge/tests/test_acp_adapter.py | 266 +++++++++ .../tests/test_base_daemon_error_surfacing.py | 56 ++ bridge/tests/test_openclaw_adapter.py | 158 +++++ 9 files changed, 1102 insertions(+), 95 deletions(-) create mode 100644 bridge/clawith_bridge/adapters/acp_base.py create mode 100644 bridge/tests/__init__.py create mode 100644 bridge/tests/conftest.py create mode 100644 bridge/tests/test_acp_adapter.py create mode 100644 bridge/tests/test_base_daemon_error_surfacing.py create mode 100644 bridge/tests/test_openclaw_adapter.py diff --git a/bridge/clawith_bridge/adapters/acp_base.py b/bridge/clawith_bridge/adapters/acp_base.py new file mode 100644 index 000000000..8300526c1 --- /dev/null +++ b/bridge/clawith_bridge/adapters/acp_base.py @@ -0,0 +1,552 @@ +"""ACP-over-stdio subprocess adapter base. + +For adapters that speak Agent Client Protocol (ACP) through a local subprocess. +OpenClaw ships `openclaw acp` which does exactly this — spawns as stdio ACP, +handles gateway auth/device signing internally, streams session notifications. +Hermes will follow the same pattern in a later PR. + +Wire protocol: JSON-RPC 2.0, one message per line, bidirectional. + +Request/response flow on a single prompt turn: + + → initialize (client → agent) + ← initialize result + → session/new (cwd + mcpServers) + ← session/new result (sessionId) + → session/prompt (sessionId + prompt: ContentBlock[]) + ← session/update notifications (streaming; 0..N) + ← session/prompt result (stopReason) + +The agent may also send requests back at us (e.g. `session/request_permission`, +`fs/read_text_file`). We handle the ones we care about and reject the rest so +the agent doesn't stall. + +Why not `SubprocessAdapter`? That base writes `stdin_bytes` once and closes +stdin — fine for `claude -p`, wrong for stateful JSON-RPC. We manage the +process ourselves here. +""" +from __future__ import annotations + +import abc +import asyncio +import glob +import json +import os +import shutil +import sys +from pathlib import Path +from typing import Any, AsyncIterator, ClassVar + +from .base import BaseAdapter, SessionEvent + + +# ── Executable resolution (shared with claude_code.py's pattern) ──────── + +def resolve_stdio_executable( + configured: str | None, + default_name: str, + well_known_paths: list[str], +) -> list[str]: + """Return an argv prefix that reliably invokes a CLI that might live in a + non-PATH location (npm global, launchd minimal PATH, systemd --user, etc.). + + Resolution order: + 1. configured (if not the bare default name) + 2. shutil.which(default_name) + 3. shutil.which(default_name + ".cmd") on Windows + 4. well_known_paths (first existing file) + 5. bare default_name — let PATH decide + + On Windows, .cmd/.bat paths are wrapped with `cmd.exe /c` because + asyncio.create_subprocess_exec uses Win32 CreateProcess, which refuses + .cmd directly. + """ + if configured and configured != default_name: + wrapped = _wrap_if_windows_cmd(configured) + if wrapped: + return wrapped + + found = shutil.which(default_name) + if found: + return _wrap_if_windows_cmd(found) or [found] + + if sys.platform == "win32": + found = shutil.which(default_name + ".cmd") + if found: + return _wrap_if_windows_cmd(found) or [found] + + for candidate in well_known_paths: + if os.path.exists(candidate): + return _wrap_if_windows_cmd(candidate) or [candidate] + + return [default_name] + + +def _wrap_if_windows_cmd(path: str) -> list[str] | None: + """Return [path], but wrap .cmd/.bat on Windows with cmd.exe /c. + Return None if `path` doesn't exist.""" + if not path or not os.path.exists(path): + return None + if sys.platform == "win32" and path.lower().endswith((".cmd", ".bat")): + return ["cmd.exe", "/c", path] + return [path] + + +def npm_global_candidates(name: str) -> list[str]: + """Common npm-global install paths for a CLI named `name`.""" + paths: list[str] = [] + home = str(Path.home()) + if sys.platform == "win32": + appdata = os.environ.get("APPDATA") + if appdata: + paths.append(os.path.join(appdata, "npm", f"{name}.cmd")) + paths.append(os.path.join(appdata, "npm", f"{name}.exe")) + program_files = os.environ.get("ProgramFiles", r"C:\Program Files") + paths.append(os.path.join(program_files, "nodejs", f"{name}.cmd")) + elif sys.platform == "darwin": + paths.extend([ + f"/opt/homebrew/bin/{name}", + f"/usr/local/bin/{name}", + os.path.join(home, ".npm-global/bin", name), + os.path.join(home, ".local/bin", name), + ]) + paths.extend(sorted( + glob.glob(os.path.join(home, f".nvm/versions/node/*/bin/{name}")), + reverse=True, + )) + else: + paths.extend([ + f"/usr/local/bin/{name}", + f"/usr/bin/{name}", + os.path.join(home, ".npm-global/bin", name), + os.path.join(home, ".local/bin", name), + ]) + paths.extend(sorted( + glob.glob(os.path.join(home, f".nvm/versions/node/*/bin/{name}")), + reverse=True, + )) + return paths + + +# ── ACPSubprocessAdapter ──────────────────────────────────────────────── + +class ACPSubprocessAdapter(BaseAdapter): + """Base class for ACP-over-stdio adapters.""" + + name: ClassVar[str] = "acp_base" + capabilities: ClassVar[dict[str, Any]] = { + "interactive_input": False, + "cancellation": True, + "tool_calls": True, + } + + DEFAULT_EXECUTABLE: ClassVar[str] = "acp" + KILL_GRACE_SEC: ClassVar[float] = 5.0 + ACP_PROTOCOL_VERSION: ClassVar[int] = 1 + + def __init__(self, config: Any = None) -> None: + super().__init__(config) + self._procs: dict[str, asyncio.subprocess.Process] = {} + self._finals: dict[str, list[str]] = {} + + # ── Subclass hooks ────────────────────────────────────────────────── + + @abc.abstractmethod + def build_acp_argv(self, params: dict[str, Any], cwd: str | None) -> list[str]: + """Return argv whose tail is typically ` acp [flags...]`.""" + + def build_prompt_content(self, prompt: str) -> list[dict[str, Any]]: + """Return ContentBlock[] for the prompt. Override for image/resource support.""" + return [{"type": "text", "text": prompt}] + + # ── start_session ─────────────────────────────────────────────────── + + async def start_session( + self, + session_id: str, + prompt: str, + params: dict[str, Any], + cwd: str | None, + env: dict[str, str], + timeout_s: int, + ) -> AsyncIterator[SessionEvent]: + argv = self.build_acp_argv(params, cwd) + effective_env = {**os.environ, **(env or {})} + + try: + proc = await asyncio.create_subprocess_exec( + *argv, + cwd=cwd, + env=effective_env, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + except FileNotFoundError as e: + yield SessionEvent(kind="stderr_chunk", payload={"text": f"{argv[0]!r} not found: {e}"}) + raise + self._procs[session_id] = proc + self._finals[session_id] = [] + + event_queue: asyncio.Queue[SessionEvent | None] = asyncio.Queue() + pending: dict[int, asyncio.Future[dict[str, Any]]] = {} + next_id = [10] # boxed counter; requests 1-3 reserved for protocol handshake + + async def _write_line(obj: dict[str, Any]) -> None: + assert proc.stdin is not None + if proc.stdin.is_closing(): + return + payload = (json.dumps(obj, ensure_ascii=False) + "\n").encode("utf-8") + proc.stdin.write(payload) + try: + await proc.stdin.drain() + except (ConnectionResetError, BrokenPipeError): + pass + + async def _request(req_id: int, method: str, params_: dict[str, Any]) -> dict[str, Any]: + loop = asyncio.get_event_loop() + fut: asyncio.Future[dict[str, Any]] = loop.create_future() + pending[req_id] = fut + await _write_line({ + "jsonrpc": "2.0", + "id": req_id, + "method": method, + "params": params_, + }) + return await fut + + async def _respond(req_id: int, result: dict[str, Any] | None = None, + error: dict[str, Any] | None = None) -> None: + msg: dict[str, Any] = {"jsonrpc": "2.0", "id": req_id} + if error is not None: + msg["error"] = error + else: + msg["result"] = result or {} + await _write_line(msg) + + async def _reader() -> None: + """Read JSON-RPC from stdout; dispatch responses + notifications.""" + assert proc.stdout is not None + while True: + raw = await proc.stdout.readline() + if not raw: + break + try: + line = raw.decode("utf-8", errors="replace").strip() + except Exception: + continue + if not line: + continue + try: + msg = json.loads(line) + except json.JSONDecodeError: + await event_queue.put(SessionEvent( + kind="stdout_chunk", payload={"text": line}, + )) + continue + if not isinstance(msg, dict): + continue + + if "id" in msg and ("result" in msg or "error" in msg): + mid = msg.get("id") + fut = pending.pop(mid, None) if isinstance(mid, int) else None + if fut and not fut.done(): + if "error" in msg: + fut.set_exception(RuntimeError( + f"ACP error (id={mid}): {msg['error']}" + )) + else: + fut.set_result(msg.get("result") or {}) + continue + + method = msg.get("method") + if method == "session/update": + for ev in self._parse_session_update( + (msg.get("params") or {}), session_id, + ): + await event_queue.put(ev) + elif method and "id" in msg: + # Agent is asking us for something. Handle a few common + # ones; reject the rest so the agent can continue. + await self._handle_agent_request(msg, _respond) + # else: notification we don't care about — drop. + await event_queue.put(None) # EOF sentinel + + async def _stderr_reader() -> None: + assert proc.stderr is not None + while True: + raw = await proc.stderr.readline() + if not raw: + break + text = raw.decode("utf-8", errors="replace").rstrip() + if text: + await event_queue.put(SessionEvent( + kind="stderr_chunk", payload={"text": text}, + )) + + reader_task = asyncio.create_task(_reader()) + stderr_task = asyncio.create_task(_stderr_reader()) + + prompt_task: asyncio.Task[dict[str, Any]] | None = None + + try: + # 1. initialize + init_result = await asyncio.wait_for( + _request(1, "initialize", { + "protocolVersion": self.ACP_PROTOCOL_VERSION, + "clientCapabilities": {}, + }), + timeout=min(30, timeout_s), + ) + yield SessionEvent(kind="status", payload={ + "state": "init", + "agent_capabilities": init_result.get("agentCapabilities") or {}, + }) + + # 2. session/new + session_result = await asyncio.wait_for( + _request(2, "session/new", { + "cwd": os.path.abspath(cwd) if cwd else os.path.abspath(os.getcwd()), + "mcpServers": [], + }), + timeout=min(30, timeout_s), + ) + acp_session_id = session_result.get("sessionId") + if not isinstance(acp_session_id, str) or not acp_session_id: + yield SessionEvent(kind="stderr_chunk", payload={ + "text": "ACP session/new returned no sessionId", + }) + raise RuntimeError("ACP session/new returned no sessionId") + + # 3. session/prompt — fire async, pump notifications in parallel + prompt_task = asyncio.create_task(_request(3, "session/prompt", { + "sessionId": acp_session_id, + "prompt": self.build_prompt_content(prompt), + })) + + start_t = asyncio.get_event_loop().time() + while True: + if prompt_task.done(): + # Drain any events already queued before we noticed completion + while not event_queue.empty(): + item = event_queue.get_nowait() + if item is None: + break + yield item + break + remaining = timeout_s - (asyncio.get_event_loop().time() - start_t) + if remaining <= 0: + yield SessionEvent(kind="stderr_chunk", payload={ + "text": f"timeout after {timeout_s}s", + }) + break + try: + item = await asyncio.wait_for( + event_queue.get(), timeout=min(remaining, 0.5), + ) + except asyncio.TimeoutError: + continue + if item is None: + # stdout EOF — process exited mid-prompt + break + yield item + + # Collect final stopReason + try: + prompt_result = await asyncio.wait_for(prompt_task, timeout=5) + stop_reason = prompt_result.get("stopReason", "unknown") + yield SessionEvent(kind="status", payload={ + "state": "done", + "stop_reason": stop_reason, + }) + except asyncio.TimeoutError: + yield SessionEvent(kind="stderr_chunk", payload={ + "text": "ACP prompt response not received after stream end", + }) + except Exception as e: + yield SessionEvent(kind="stderr_chunk", payload={ + "text": f"ACP prompt failed: {e}", + }) + raise + finally: + if prompt_task and not prompt_task.done(): + prompt_task.cancel() + for fut in pending.values(): + if not fut.done(): + fut.cancel() + try: + if proc.stdin and not proc.stdin.is_closing(): + proc.stdin.close() + except Exception: + pass + reader_task.cancel() + stderr_task.cancel() + await self._terminate_proc(proc) + self._procs.pop(session_id, None) + + # ── Notification parsing ──────────────────────────────────────────── + + def _parse_session_update( + self, params: dict[str, Any], session_id: str, + ) -> list[SessionEvent]: + update = params.get("update") + if not isinstance(update, dict): + return [] + kind = update.get("sessionUpdate") + finals = self._finals.setdefault(session_id, []) + + if kind in ("agent_message_chunk", "user_message_chunk"): + if kind == "user_message_chunk": + return [] # echo of our own prompt — skip + return self._text_events_from_content( + update.get("content"), finals, event_kind="assistant_text", + ) + if kind == "agent_thought_chunk": + return self._text_events_from_content( + update.get("content"), finals=None, event_kind="thinking", + ) + if kind == "tool_call": + return [SessionEvent( + kind="tool_call_start", + payload={ + "name": update.get("title") or "", + "args": update.get("rawInput") or {}, + "tool_use_id": update.get("toolCallId"), + "kind_hint": update.get("kind"), + }, + )] + if kind == "tool_call_update": + status = update.get("status") + # Only surface terminal states; intermediate updates are noise + if status in ("completed", "failed"): + return [SessionEvent( + kind="tool_call_result", + payload={ + "tool_use_id": update.get("toolCallId"), + "result": self._serialize_tool_content(update.get("content")), + "is_error": status == "failed", + }, + )] + return [] + if kind == "plan": + return [SessionEvent( + kind="status", + payload={"state": "plan", "entries": update.get("entries") or []}, + )] + # available_commands_update / current_mode_update / usage_update / + # session_info_update / config_option_update — informational, drop. + return [] + + @staticmethod + def _text_events_from_content( + content: Any, finals: list[str] | None, event_kind: str, + ) -> list[SessionEvent]: + if not isinstance(content, dict): + return [] + if content.get("type") != "text": + return [] + text = content.get("text") or "" + if not text: + return [] + if finals is not None: + finals.append(text) + return [SessionEvent(kind=event_kind, payload={"text": text})] + + @staticmethod + def _serialize_tool_content(content: Any) -> str: + """Flatten ToolCallContent[] to a string for the Clawith tool_call_result.""" + if not isinstance(content, list): + return "" + parts: list[str] = [] + for block in content: + if not isinstance(block, dict): + continue + btype = block.get("type") + if btype == "content": + inner = block.get("content") + if isinstance(inner, dict) and inner.get("type") == "text": + parts.append(inner.get("text") or "") + elif btype == "diff": + # Give the client enough to render; stringify structurally + parts.append(json.dumps( + {"diff": block.get("path"), "oldText": block.get("oldText"), + "newText": block.get("newText")}, + ensure_ascii=False, + )) + elif btype == "terminal": + parts.append(f"[terminal {block.get('terminalId')}]") + return "\n".join(p for p in parts if p) + + # ── Incoming-request handling ─────────────────────────────────────── + + async def _handle_agent_request(self, msg: dict[str, Any], respond) -> None: + """Agent → client requests. Auto-approve permissions; reject the rest.""" + req_id = msg.get("id") + method = msg.get("method", "") + params = msg.get("params") or {} + + if method == "session/request_permission": + # Default: pick the first "allow"-shaped option to let the agent proceed. + options = params.get("options") or [] + chosen = None + for opt in options: + if not isinstance(opt, dict): + continue + opt_kind = str(opt.get("kind", "")).lower() + if "allow" in opt_kind or "approve" in opt_kind: + chosen = opt + break + if chosen is None and options and isinstance(options[0], dict): + chosen = options[0] + if chosen is not None: + option_id = chosen.get("optionId") or chosen.get("id") + await respond(req_id, result={ + "outcome": {"outcome": "selected", "optionId": option_id}, + }) + return + await respond(req_id, result={"outcome": {"outcome": "cancelled"}}) + return + + # Everything else — method not found. Agent should cope gracefully. + await respond(req_id, error={ + "code": -32601, + "message": f"Method not supported by clawith bridge: {method}", + }) + + # ── Lifecycle ─────────────────────────────────────────────────────── + + async def _terminate_proc(self, proc: asyncio.subprocess.Process) -> None: + if proc.returncode is not None: + return + try: + proc.terminate() + except ProcessLookupError: + return + except Exception: + pass + try: + await asyncio.wait_for(proc.wait(), timeout=self.KILL_GRACE_SEC) + return + except asyncio.TimeoutError: + pass + except Exception: + return + try: + proc.kill() + except ProcessLookupError: + return + except Exception: + pass + try: + await proc.wait() + except Exception: + pass + + async def cancel(self, session_id: str, reason: str) -> None: + proc = self._procs.get(session_id) + if proc is None: + return + await self._terminate_proc(proc) + + async def final_text(self, session_id: str) -> str: + return "".join(self._finals.pop(session_id, [])) diff --git a/bridge/clawith_bridge/adapters/base.py b/bridge/clawith_bridge/adapters/base.py index 25c120ca3..af425e691 100644 --- a/bridge/clawith_bridge/adapters/base.py +++ b/bridge/clawith_bridge/adapters/base.py @@ -321,8 +321,11 @@ async def start_session( try: task_id = await self.start_session_request(prompt, params, cwd) except Exception as e: + # Surface as an event (for visibility) AND re-raise so the session + # manager emits session.error (non-zero exit) rather than a silent + # session.done with empty final_text. See test_base_daemon_error_surfacing. yield SessionEvent(kind="stderr_chunk", payload={"text": f"daemon start failed: {e}"}) - return + raise self._tasks[session_id] = task_id final_accum: list[str] = [] try: diff --git a/bridge/clawith_bridge/adapters/openclaw.py b/bridge/clawith_bridge/adapters/openclaw.py index a88ba124a..18f9ef308 100644 --- a/bridge/clawith_bridge/adapters/openclaw.py +++ b/bridge/clawith_bridge/adapters/openclaw.py @@ -1,104 +1,64 @@ -"""OpenClaw adapter — wraps a local OpenClaw daemon. +"""OpenClaw adapter — talks to a local OpenClaw daemon via `openclaw acp`. -OpenClaw in its "local" mode is the flavor this bridge is meant to eventually -replace. During the transition, two shapes are supported via config: +OpenClaw ships a first-class ACP (Agent Client Protocol) subcommand that runs +as a stdio JSON-RPC bridge and handles all the messy gateway plumbing +internally — Ed25519 device keypair, challenge signing, WebSocket v3 framing, +session routing, reconnection. We spawn it, speak stdio JSON-RPC, done. - 1. `mode = "http"` (default): OpenClaw exposes - POST /v1/chat { "messages": [...] } -> { "job_id": "..." } - GET /v1/jobs/{id}/events (SSE) - Configure with `base_url` + optional `auth_header`. +Flags threaded through from `AdapterConfig.extra` (all optional): + - url → `--url ` gateway URL (defaults to local :18789) + - token_file → `--token-file ` agent API key file + - session_label → `--session-label ` human-readable new-session label + - provenance → `--provenance ` + - verbose → `--verbose` flag; pass truthy value in TOML - 2. `mode = "subprocess"`: Spawn `openclaw run --prompt=...` (for dev setups - that don't run a long-lived daemon). - -V1 implements the HTTP path; the subprocess path is left as a small shim you -can wire up by overriding `build_command` (SubprocessAdapter). +Per-prompt `params` can also set `session_label` to override the config one. """ from __future__ import annotations -import json -from typing import Any, AsyncIterator +from typing import Any -from .base import DaemonAdapter, SessionEvent +from .acp_base import ACPSubprocessAdapter, npm_global_candidates, resolve_stdio_executable -class OpenClawAdapter(DaemonAdapter): +class OpenClawAdapter(ACPSubprocessAdapter): name = "openclaw" - capabilities = {"interactive_input": False, "cancellation": True} - - async def start_session_request( - self, - prompt: str, - params: dict[str, Any], - cwd: str | None, - ) -> str: - client = await self._ensure_client() - body = {"messages": [{"role": "user", "content": prompt}], "params": params or {}} - r = await client.post("/v1/chat", json=body) - r.raise_for_status() - data = r.json() - job_id = data.get("job_id") or data.get("id") - if not job_id: - # OpenClaw may return the response inline (no job_id) for fast paths - content = ( - data.get("content") - or data.get("message", {}).get("content") - or "" - ) - if content: - # Stash so iter_events can flush it immediately without hitting the network. - return f"inline:{json.dumps({'content': content})}" - raise RuntimeError(f"OpenClaw start response missing job_id: {data}") - return str(job_id) - - async def iter_events(self, task_id: str) -> AsyncIterator[SessionEvent]: - if task_id.startswith("inline:"): - # Synchronous response path — emit once and finish. - try: - payload = json.loads(task_id[len("inline:"):]) - except json.JSONDecodeError: - payload = {} - content = payload.get("content") or "" - if content: - yield SessionEvent(kind="assistant_text", payload={"text": content}) - return - - client = await self._ensure_client() - async with client.stream("GET", f"/v1/jobs/{task_id}/events") as resp: - resp.raise_for_status() - async for raw in resp.aiter_lines(): - if not raw: - continue - line = raw.strip() - if line.startswith("data:"): - line = line[5:].strip() - if not line or line == "[DONE]": - if line == "[DONE]": - return - continue - try: - evt = json.loads(line) - except json.JSONDecodeError: - yield SessionEvent(kind="stdout_chunk", payload={"text": line}) - continue - # Map OpenClaw-native fields to our taxonomy. - if "delta" in evt: - yield SessionEvent(kind="assistant_text", payload={"text": str(evt["delta"])}) - continue - if "content" in evt and "role" in evt: - yield SessionEvent(kind="assistant_text", payload={"text": str(evt["content"])}) - continue - kind = evt.get("kind") - if isinstance(kind, str): - yield SessionEvent(kind=kind, payload=evt.get("payload") or {}) - if kind in ("done", "finished"): - return - - async def cancel_request(self, task_id: str) -> None: - if task_id.startswith("inline:"): - return - client = await self._ensure_client() - try: - await client.post(f"/v1/jobs/{task_id}/cancel") - except Exception: - pass + capabilities = { + "interactive_input": False, + "cancellation": True, + "tool_calls": True, + } + + DEFAULT_EXECUTABLE = "openclaw" + + def build_acp_argv(self, params: dict[str, Any], cwd: str | None) -> list[str]: + configured = getattr(self.config, "executable", None) if self.config else None + exe_prefix = resolve_stdio_executable( + configured, + self.DEFAULT_EXECUTABLE, + npm_global_candidates(self.DEFAULT_EXECUTABLE), + ) + argv: list[str] = [*exe_prefix, "acp"] + + extra = (getattr(self.config, "extra", {}) if self.config else {}) or {} + + url = extra.get("url") + if url: + argv.extend(["--url", str(url)]) + + token_file = extra.get("token_file") + if token_file: + argv.extend(["--token-file", str(token_file)]) + + provenance = extra.get("provenance") + if provenance: + argv.extend(["--provenance", str(provenance)]) + + session_label = params.get("session_label") or extra.get("session_label") + if session_label: + argv.extend(["--session-label", str(session_label)]) + + if extra.get("verbose"): + argv.append("--verbose") + + return argv diff --git a/bridge/pyproject.toml b/bridge/pyproject.toml index a931b92a4..1fe1db69a 100644 --- a/bridge/pyproject.toml +++ b/bridge/pyproject.toml @@ -31,3 +31,7 @@ include = ["clawith_bridge*"] [tool.setuptools.package-data] clawith_bridge = ["py.typed"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/bridge/tests/__init__.py b/bridge/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bridge/tests/conftest.py b/bridge/tests/conftest.py new file mode 100644 index 000000000..88447c1ee --- /dev/null +++ b/bridge/tests/conftest.py @@ -0,0 +1,8 @@ +"""Test configuration for clawith-bridge.""" +import sys +from pathlib import Path + +# Ensure the package root is on sys.path when running pytest from any cwd. +_ROOT = Path(__file__).resolve().parent.parent +if str(_ROOT) not in sys.path: + sys.path.insert(0, str(_ROOT)) diff --git a/bridge/tests/test_acp_adapter.py b/bridge/tests/test_acp_adapter.py new file mode 100644 index 000000000..5e25d6a02 --- /dev/null +++ b/bridge/tests/test_acp_adapter.py @@ -0,0 +1,266 @@ +"""Tests for ACPSubprocessAdapter. + +Two layers: + - Unit: _parse_session_update and _serialize_tool_content against crafted + inputs — exercises the ACP notification → SessionEvent mapping without + spinning up a process. + - Integration: spawn a real Python subprocess that acts as a fake ACP agent, + read the event stream end-to-end, assert on observed events and the + adapter's accumulated final_text. +""" +from __future__ import annotations + +import asyncio +import json +import sys +import textwrap + +import pytest + +from clawith_bridge.adapters.acp_base import ACPSubprocessAdapter + + +# ── Unit tests for notification mapping ───────────────────────────────── + + +class _StubAdapter(ACPSubprocessAdapter): + """Concrete subclass so we can call the helper methods (ACPSubprocessAdapter + is abstract on build_acp_argv).""" + def build_acp_argv(self, params, cwd): # pragma: no cover — not called + return ["unused"] + + +def _parse(update_dict): + a = _StubAdapter() + return a._parse_session_update({"update": update_dict}, session_id="s1") + + +def test_parse_agent_message_chunk_yields_assistant_text(): + events = _parse({ + "sessionUpdate": "agent_message_chunk", + "content": {"type": "text", "text": "Hello"}, + }) + assert len(events) == 1 + assert events[0].kind == "assistant_text" + assert events[0].payload == {"text": "Hello"} + + +def test_parse_user_message_chunk_is_dropped(): + # Echo of our own prompt — the bridge doesn't want to forward this. + events = _parse({ + "sessionUpdate": "user_message_chunk", + "content": {"type": "text", "text": "my prompt"}, + }) + assert events == [] + + +def test_parse_agent_thought_chunk_yields_thinking(): + events = _parse({ + "sessionUpdate": "agent_thought_chunk", + "content": {"type": "text", "text": "pondering..."}, + }) + assert len(events) == 1 + assert events[0].kind == "thinking" + + +def test_parse_tool_call_yields_tool_call_start(): + events = _parse({ + "sessionUpdate": "tool_call", + "toolCallId": "tc-1", + "title": "Read", + "kind": "read", + "rawInput": {"path": "/tmp/x"}, + }) + assert len(events) == 1 + assert events[0].kind == "tool_call_start" + assert events[0].payload["name"] == "Read" + assert events[0].payload["tool_use_id"] == "tc-1" + assert events[0].payload["args"] == {"path": "/tmp/x"} + + +def test_parse_tool_call_update_completed_yields_tool_call_result(): + events = _parse({ + "sessionUpdate": "tool_call_update", + "toolCallId": "tc-1", + "status": "completed", + "content": [ + {"type": "content", "content": {"type": "text", "text": "file contents"}}, + ], + }) + assert len(events) == 1 + assert events[0].kind == "tool_call_result" + assert events[0].payload["result"] == "file contents" + assert events[0].payload["is_error"] is False + + +def test_parse_tool_call_update_in_progress_is_dropped(): + # Only terminal statuses surface — intermediate updates are noise. + events = _parse({ + "sessionUpdate": "tool_call_update", + "toolCallId": "tc-1", + "status": "in_progress", + }) + assert events == [] + + +def test_parse_tool_call_update_failed_is_error(): + events = _parse({ + "sessionUpdate": "tool_call_update", + "toolCallId": "tc-2", + "status": "failed", + "content": [{"type": "content", "content": {"type": "text", "text": "boom"}}], + }) + assert events[0].kind == "tool_call_result" + assert events[0].payload["is_error"] is True + + +def test_parse_unknown_session_update_is_ignored(): + events = _parse({ + "sessionUpdate": "available_commands_update", + "availableCommands": [], + }) + assert events == [] + + +@pytest.mark.asyncio +async def test_final_text_accumulates_agent_chunks_only(): + a = _StubAdapter() + a._parse_session_update({"update": { + "sessionUpdate": "agent_message_chunk", + "content": {"type": "text", "text": "Hel"}, + }}, session_id="s1") + a._parse_session_update({"update": { + "sessionUpdate": "agent_message_chunk", + "content": {"type": "text", "text": "lo"}, + }}, session_id="s1") + a._parse_session_update({"update": { + "sessionUpdate": "agent_thought_chunk", + "content": {"type": "text", "text": "thinking — not in final"}, + }}, session_id="s1") + + assert await a.final_text("s1") == "Hello" + + +def test_serialize_tool_content_flattens_mixed_blocks(): + out = ACPSubprocessAdapter._serialize_tool_content([ + {"type": "content", "content": {"type": "text", "text": "first"}}, + {"type": "diff", "path": "a.py", "oldText": "x", "newText": "y"}, + {"type": "content", "content": {"type": "text", "text": "last"}}, + ]) + assert "first" in out + assert "last" in out + assert "a.py" in out + + +# ── Integration test: real subprocess speaking fake ACP ───────────────── + + +FAKE_AGENT_SCRIPT = textwrap.dedent(""" + import json, sys + + def send(obj): + sys.stdout.write(json.dumps(obj) + "\\n") + sys.stdout.flush() + + for raw in sys.stdin: + raw = raw.strip() + if not raw: + continue + msg = json.loads(raw) + mid = msg.get("id") + method = msg.get("method") + + if method == "initialize": + send({"jsonrpc": "2.0", "id": mid, "result": { + "protocolVersion": 1, + "agentCapabilities": {"promptCapabilities": {}}, + }}) + elif method == "session/new": + send({"jsonrpc": "2.0", "id": mid, "result": { + "sessionId": "acp-sess-abc", + }}) + elif method == "session/prompt": + # Stream a few agent_message_chunks, then a tool_call, then terminate. + for chunk in ("Hello, ", "world", "!"): + send({"jsonrpc": "2.0", "method": "session/update", "params": { + "sessionId": "acp-sess-abc", + "update": { + "sessionUpdate": "agent_message_chunk", + "content": {"type": "text", "text": chunk}, + }, + }}) + send({"jsonrpc": "2.0", "method": "session/update", "params": { + "sessionId": "acp-sess-abc", + "update": { + "sessionUpdate": "tool_call", + "toolCallId": "tc-42", + "title": "FakeTool", + "rawInput": {"q": 1}, + }, + }}) + send({"jsonrpc": "2.0", "id": mid, "result": {"stopReason": "end_turn"}}) + sys.exit(0) +""") + + +class _FakeACPAdapter(ACPSubprocessAdapter): + """Spawn `python -c ` instead of a real ACP binary.""" + def __init__(self, script: str): + super().__init__(config=None) + self._script = script + + def build_acp_argv(self, params, cwd): + return [sys.executable, "-u", "-c", self._script] + + +@pytest.mark.asyncio +async def test_end_to_end_prompt_yields_events_and_final_text(tmp_path): + adapter = _FakeACPAdapter(FAKE_AGENT_SCRIPT) + events = [] + async for ev in adapter.start_session( + session_id="s-int", + prompt="hi", + params={}, + cwd=str(tmp_path), + env={}, + timeout_s=30, + ): + events.append(ev) + + kinds = [e.kind for e in events] + # init status, 3 assistant_text chunks, 1 tool_call_start, done status. + assert "status" in kinds + assert kinds.count("assistant_text") == 3 + assert kinds.count("tool_call_start") == 1 + + assistant_texts = [e.payload["text"] for e in events if e.kind == "assistant_text"] + assert assistant_texts == ["Hello, ", "world", "!"] + + # final_text accumulates the three chunks + final = await adapter.final_text("s-int") + assert final == "Hello, world!" + + # Terminal status carries stop_reason + done_status = next( + (e for e in events if e.kind == "status" and e.payload.get("state") == "done"), + None, + ) + assert done_status is not None + assert done_status.payload.get("stop_reason") == "end_turn" + + +@pytest.mark.asyncio +async def test_missing_executable_raises_file_not_found(): + class _MissingAdapter(ACPSubprocessAdapter): + def build_acp_argv(self, params, cwd): + return ["definitely-not-a-real-binary-xyz-12345"] + + adapter = _MissingAdapter() + events = [] + with pytest.raises(FileNotFoundError): + async for ev in adapter.start_session( + session_id="s-miss", prompt="x", params={}, cwd=None, env={}, timeout_s=5, + ): + events.append(ev) + # We want visibility before the raise — at least one stderr_chunk event. + assert any(e.kind == "stderr_chunk" for e in events) diff --git a/bridge/tests/test_base_daemon_error_surfacing.py b/bridge/tests/test_base_daemon_error_surfacing.py new file mode 100644 index 000000000..1e981f08c --- /dev/null +++ b/bridge/tests/test_base_daemon_error_surfacing.py @@ -0,0 +1,56 @@ +"""Regression test for DaemonAdapter silent-failure bug. + +Before fix: if `start_session_request` raised (e.g. connection refused), the +adapter yielded a `stderr_chunk` and *returned* — which the session manager +interpreted as a normal completion and sent `SessionDoneFrame(exit=0, final="")`. +From the user's point of view: a green status light and an empty reply. + +After fix: the exception is re-raised after emitting the diagnostic event, so +the session manager emits `SessionErrorFrame` with non-zero exit — the user +sees a clear error instead of silent empty success. +""" +from __future__ import annotations + +from typing import Any, AsyncIterator + +import pytest + +from clawith_bridge.adapters.base import DaemonAdapter, SessionEvent + + +class _BrokenDaemonAdapter(DaemonAdapter): + name = "broken" + + async def start_session_request( + self, prompt: str, params: dict[str, Any], cwd: str | None, + ) -> str: + raise ConnectionRefusedError("127.0.0.1:9000 refused") + + async def iter_events(self, task_id: str) -> AsyncIterator[SessionEvent]: # pragma: no cover + # never called + return + yield # type: ignore[unreachable] + + +@pytest.mark.asyncio +async def test_daemon_start_failure_raises_not_silent(): + adapter = _BrokenDaemonAdapter() + + events: list[SessionEvent] = [] + with pytest.raises(ConnectionRefusedError): + async for ev in adapter.start_session( + session_id="s-err", + prompt="hello", + params={}, + cwd=None, + env={}, + timeout_s=10, + ): + events.append(ev) + + # We still want the user-visible diagnostic — the error is surfaced as a + # stderr_chunk event before the generator raises. + assert len(events) == 1 + assert events[0].kind == "stderr_chunk" + assert "daemon start failed" in events[0].payload["text"] + assert "127.0.0.1:9000" in events[0].payload["text"] diff --git a/bridge/tests/test_openclaw_adapter.py b/bridge/tests/test_openclaw_adapter.py new file mode 100644 index 000000000..d86408728 --- /dev/null +++ b/bridge/tests/test_openclaw_adapter.py @@ -0,0 +1,158 @@ +"""Tests for OpenClawAdapter argv construction. + +Focuses on build_acp_argv — the stdio JSON-RPC machinery is covered in +test_acp_adapter.py; here we verify the CLI flags we thread through to +`openclaw acp` match the user's config and per-request params. +""" +from __future__ import annotations + +import os +import sys +from pathlib import Path + +import pytest + +from clawith_bridge.adapters import acp_base +from clawith_bridge.adapters.openclaw import OpenClawAdapter +from clawith_bridge.config import AdapterConfig + + +def _make_adapter(config: AdapterConfig | None = None) -> OpenClawAdapter: + return OpenClawAdapter(config=config) + + +# ── build_acp_argv: flag threading ────────────────────────────────────── + + +def test_build_argv_no_extras_minimal(monkeypatch): + # openclaw.py imports the symbol by name, so patching acp_base doesn't + # reach the already-bound reference. Patch at the use site. + from clawith_bridge.adapters import openclaw as openclaw_mod + monkeypatch.setattr( + openclaw_mod, "resolve_stdio_executable", + lambda configured, default, paths: ["/fake/openclaw"], + ) + argv = _make_adapter(AdapterConfig()).build_acp_argv({}, cwd=None) + assert argv == ["/fake/openclaw", "acp"] + + +def test_build_argv_threads_url_and_token_file(monkeypatch): + # openclaw.py imports the symbol by name, so patching acp_base doesn't + # reach the already-bound reference. Patch at the use site. + from clawith_bridge.adapters import openclaw as openclaw_mod + monkeypatch.setattr( + openclaw_mod, "resolve_stdio_executable", + lambda configured, default, paths: ["/fake/openclaw"], + ) + cfg = AdapterConfig(extra={ + "url": "https://gateway.example/ws", + "token_file": "/secret/openclaw-token", + }) + argv = _make_adapter(cfg).build_acp_argv({}, cwd=None) + assert argv == [ + "/fake/openclaw", "acp", + "--url", "https://gateway.example/ws", + "--token-file", "/secret/openclaw-token", + ] + + +def test_build_argv_params_session_label_overrides_config(monkeypatch): + # openclaw.py imports the symbol by name, so patching acp_base doesn't + # reach the already-bound reference. Patch at the use site. + from clawith_bridge.adapters import openclaw as openclaw_mod + monkeypatch.setattr( + openclaw_mod, "resolve_stdio_executable", + lambda configured, default, paths: ["/fake/openclaw"], + ) + cfg = AdapterConfig(extra={"session_label": "config-label"}) + argv = _make_adapter(cfg).build_acp_argv( + {"session_label": "param-label"}, cwd=None, + ) + assert "--session-label" in argv + # Param wins + idx = argv.index("--session-label") + assert argv[idx + 1] == "param-label" + + +def test_build_argv_verbose_adds_flag(monkeypatch): + # openclaw.py imports the symbol by name, so patching acp_base doesn't + # reach the already-bound reference. Patch at the use site. + from clawith_bridge.adapters import openclaw as openclaw_mod + monkeypatch.setattr( + openclaw_mod, "resolve_stdio_executable", + lambda configured, default, paths: ["/fake/openclaw"], + ) + cfg = AdapterConfig(extra={"verbose": True}) + argv = _make_adapter(cfg).build_acp_argv({}, cwd=None) + assert "--verbose" in argv + + +def test_build_argv_provenance(monkeypatch): + # openclaw.py imports the symbol by name, so patching acp_base doesn't + # reach the already-bound reference. Patch at the use site. + from clawith_bridge.adapters import openclaw as openclaw_mod + monkeypatch.setattr( + openclaw_mod, "resolve_stdio_executable", + lambda configured, default, paths: ["/fake/openclaw"], + ) + cfg = AdapterConfig(extra={"provenance": "clawith-bridge/0.1"}) + argv = _make_adapter(cfg).build_acp_argv({}, cwd=None) + assert "--provenance" in argv + assert argv[argv.index("--provenance") + 1] == "clawith-bridge/0.1" + + +# ── Executable resolution: Windows .cmd shim ──────────────────────────── + + +def test_wrap_if_windows_cmd_wraps_on_windows(tmp_path, monkeypatch): + cmd_path = tmp_path / "openclaw.cmd" + cmd_path.write_text("@echo off\n") + monkeypatch.setattr(sys, "platform", "win32") + result = acp_base._wrap_if_windows_cmd(str(cmd_path)) + assert result == ["cmd.exe", "/c", str(cmd_path)] + + +def test_wrap_if_windows_cmd_bare_exe_unchanged(tmp_path, monkeypatch): + exe_path = tmp_path / "openclaw.exe" + exe_path.write_text("") + monkeypatch.setattr(sys, "platform", "win32") + result = acp_base._wrap_if_windows_cmd(str(exe_path)) + assert result == [str(exe_path)] + + +def test_wrap_if_windows_cmd_returns_none_for_missing(tmp_path, monkeypatch): + monkeypatch.setattr(sys, "platform", "win32") + assert acp_base._wrap_if_windows_cmd(str(tmp_path / "nope.cmd")) is None + + +def test_resolve_falls_through_to_bare_name(monkeypatch): + monkeypatch.setattr(acp_base.shutil, "which", lambda name: None) + monkeypatch.setattr(os.path, "exists", lambda p: False) + result = acp_base.resolve_stdio_executable(None, "openclaw", []) + assert result == ["openclaw"] + + +def test_resolve_prefers_configured_path_when_exists(tmp_path, monkeypatch): + fake = tmp_path / "custom-openclaw" + fake.write_text("") + # On POSIX the .cmd/.bat wrap is skipped, so resolve returns the raw path. + monkeypatch.setattr(sys, "platform", "linux") + result = acp_base.resolve_stdio_executable(str(fake), "openclaw", []) + assert result == [str(fake)] + + +def test_resolve_uses_shutil_which_when_no_configured(monkeypatch): + monkeypatch.setattr(acp_base.shutil, "which", + lambda name: "/usr/bin/openclaw" if name == "openclaw" else None) + monkeypatch.setattr(sys, "platform", "linux") + monkeypatch.setattr(os.path, "exists", lambda p: True) + result = acp_base.resolve_stdio_executable(None, "openclaw", []) + assert result == ["/usr/bin/openclaw"] + + +def test_npm_global_candidates_includes_cmd_on_windows(monkeypatch, tmp_path): + monkeypatch.setattr(sys, "platform", "win32") + monkeypatch.setenv("APPDATA", str(tmp_path)) + candidates = acp_base.npm_global_candidates("openclaw") + assert any(c.endswith("openclaw.cmd") for c in candidates) + assert any(c.endswith("openclaw.exe") for c in candidates) From e765f9f177efe70896c31cd73de172198c6f0c98 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Wed, 22 Apr 2026 18:51:46 +0800 Subject: [PATCH 14/15] fix(bridge): revoke attached bridge when API key is rotated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `/ws/bridge` authenticates only at the WebSocket upgrade — once a bridge is connected the read loop never re-checks the token. Rotating the hash alone left the old socket running sessions on a revoked credential until it disconnected on its own. Codex P1 flagged this. Fix: - `SessionDispatcher.detach_bridge` gains `close_ws` + `reason` params. When set, the server closes the WS with code 4001 (same class as upgrade-time auth failure) before failing in-flight sessions. Existing call site in `/ws/bridge`'s read-loop finally keeps the no-close default, so no behavior change there. - Rotate endpoint calls `detach_bridge(close_ws=True, reason="api_key_rotated")` after committing the new hash. Wrapped in try/except so an eviction failure does not mask the successful rotation from the operator. Tests pin both the dispatcher contract (close code, reason, in-flight session failure, no-op when unattached) and the integration (rotate endpoint invokes detach_bridge with the right args, still succeeds if eviction raises). Co-Authored-By: Claude Opus 4.7 --- backend/app/api/agents.py | 15 ++ .../local_agent/session_dispatcher.py | 31 ++- backend/tests/test_rotation_revokes_bridge.py | 248 ++++++++++++++++++ 3 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 backend/tests/test_rotation_revokes_bridge.py diff --git a/backend/app/api/agents.py b/backend/app/api/agents.py index 061173892..a618a5150 100644 --- a/backend/app/api/agents.py +++ b/backend/app/api/agents.py @@ -790,6 +790,21 @@ async def generate_or_reset_api_key( agent.api_key_hash = hashlib.sha256(raw_key.encode()).hexdigest() await db.commit() + # Revoke any currently-attached bridge. `/ws/bridge` authenticates only + # at the initial upgrade, so rotating the hash alone doesn't unseat a + # bridge that's already holding a socket — it would keep running sessions + # with the old key until it disconnects on its own. Kick it now so the + # operator has to re-auth with the new key. detach_bridge is idempotent + # (no-ops when no bridge is attached). + from app.services.local_agent.session_dispatcher import dispatcher as _bridge_dispatcher + try: + await _bridge_dispatcher.detach_bridge( + str(agent.id), close_ws=True, reason="api_key_rotated", + ) + except Exception as e: # noqa: BLE001 — best-effort eviction; don't fail rotation + from loguru import logger as _logger + _logger.warning(f"[rotate] bridge eviction failed for {agent.id}: {e}") + return {"api_key": raw_key, "message": "Key configured successfully."} diff --git a/backend/app/services/local_agent/session_dispatcher.py b/backend/app/services/local_agent/session_dispatcher.py index 6ae61a4c8..2902879ac 100644 --- a/backend/app/services/local_agent/session_dispatcher.py +++ b/backend/app/services/local_agent/session_dispatcher.py @@ -142,14 +142,41 @@ async def attach_bridge( pass return True - async def detach_bridge(self, agent_id: str) -> None: + async def detach_bridge( + self, + agent_id: str, + close_ws: bool = False, + reason: str = "", + ) -> None: + """Remove a bridge from the registry and fail its in-flight sessions. + + `close_ws=True` additionally closes the bridge's WebSocket from the + server side. This is used for server-initiated eviction — notably API + key rotation, which must revoke the bridge's authenticated socket + because auth is checked only at the initial `/ws/bridge` upgrade. + The default (False) preserves the prior behavior, where detach runs + from the bridge's own read-loop `finally` after the socket is already + closing. + """ bridge = self._bridges.pop(agent_id, None) if not bridge: return + if close_ws: + # code 4001 matches the "auth failed" class used at upgrade; the + # bridge treats it as a terminal credential problem rather than a + # transient network blip. `reason` is cropped to the 123-byte WS + # limit by starlette, but trim here to keep logs readable. + try: + await bridge.ws.close( + code=4001, + reason=(reason or "bridge detached by server")[:120], + ) + except Exception as e: + logger.debug(f"[Dispatcher] ws.close during detach failed: {e}") abandoned = list(bridge.sessions.values()) logger.info( f"[Dispatcher] Bridge detached: agent={agent_id} " - f"sessions_abandoned={len(abandoned)}" + f"sessions_abandoned={len(abandoned)} reason={reason!r}" ) # Fail all pending sessions for this bridge. for session in abandoned: diff --git a/backend/tests/test_rotation_revokes_bridge.py b/backend/tests/test_rotation_revokes_bridge.py new file mode 100644 index 000000000..13b7562da --- /dev/null +++ b/backend/tests/test_rotation_revokes_bridge.py @@ -0,0 +1,248 @@ +"""Pins the contract that rotating an agent's API key evicts any currently- +attached bridge. + +The `/ws/bridge` endpoint authenticates only at the WebSocket upgrade — once +a bridge is connected, the read loop never re-checks the token. So rotating +`agents.api_key_hash` alone lets an old bridge keep running sessions on a +revoked credential until it disconnects on its own. + +Codex P1 review flagged this. Fix: + 1. `SessionDispatcher.detach_bridge` grew `close_ws` + `reason` params. + 2. The rotate endpoint calls `detach_bridge(close_ws=True, reason="api_key_rotated")` + after committing the new hash. + +These tests pin both pieces: + - Unit: dispatcher closes the WS with code 4001 + reason, drops the + registry entry, and fails in-flight sessions. + - Integration: rotate endpoint invokes detach_bridge with the right args. +""" +from __future__ import annotations + +import asyncio +import hashlib +import uuid + +import pytest + +from app.api import agents as agents_api +from app.services.local_agent.session_dispatcher import ( + BridgeDisconnected, + SessionDispatcher, + _Bridge, + _Session, +) + +# Reuse the RecordingDB / make_user / make_agent helpers — same shape as in +# test_install_rotate_decoupling.py. Copied rather than shared to keep this +# file self-contained for future grep-archaeologists. +from tests.test_install_rotate_decoupling import make_agent, make_user, RecordingDB + + +# ── Unit: dispatcher.detach_bridge(close_ws=True) ────────────────────── + + +class _FakeWS: + """Minimal WebSocket stand-in that records close() calls.""" + + def __init__(self) -> None: + self.close_calls: list[dict] = [] + + async def close(self, code: int = 1000, reason: str = "") -> None: + self.close_calls.append({"code": code, "reason": reason}) + + +@pytest.mark.asyncio +async def test_detach_bridge_closes_ws_when_requested(monkeypatch): + # log_activity hits the real DB in prod; stub it out so the dispatcher's + # post-detach telemetry doesn't blow up the test. + from app.services.local_agent import session_dispatcher as sd + + async def _noop_log(**_kwargs): + return None + + monkeypatch.setattr(sd, "log_activity", _noop_log) + + dispatcher = SessionDispatcher() + agent_id = str(uuid.uuid4()) + fake_ws = _FakeWS() + + # Register a bridge with one in-flight session so we can also verify + # that abandoned sessions get failed (not silently dropped). + loop = asyncio.get_event_loop() + future: asyncio.Future = loop.create_future() + events: asyncio.Queue = asyncio.Queue() + from datetime import datetime, timezone + session = _Session( + session_id="s-1", + agent_id=agent_id, + adapter="openclaw", + started_at=datetime.now(timezone.utc), + future=future, + events=events, + ) + bridge = _Bridge( + agent_id=agent_id, + ws=fake_ws, # type: ignore[arg-type] + bridge_version="0.1.0", + adapters=["openclaw"], + capabilities={}, + connected_at=datetime.now(timezone.utc), + sessions={"s-1": session}, + ) + dispatcher._bridges[agent_id] = bridge + + await dispatcher.detach_bridge( + agent_id, close_ws=True, reason="api_key_rotated", + ) + + # Bridge is gone from the registry. + assert agent_id not in dispatcher._bridges + + # WS was closed with the auth-failed code and a reason derived from the + # caller's argument. 4001 matches what `/ws/bridge` uses at upgrade for + # auth failure, so the bridge treats this as "go reauth" not "retry". + assert len(fake_ws.close_calls) == 1 + assert fake_ws.close_calls[0]["code"] == 4001 + assert "api_key_rotated" in fake_ws.close_calls[0]["reason"] + + # In-flight session got failed (not silently abandoned). + assert future.done() + with pytest.raises(BridgeDisconnected): + future.result() + + +@pytest.mark.asyncio +async def test_detach_bridge_default_does_not_close_ws(monkeypatch): + # The existing call site (`/ws/bridge` read-loop finally) passes no + # close_ws kwarg — the socket is already tearing down. Verify we don't + # double-close, which would raise inside starlette. + from app.services.local_agent import session_dispatcher as sd + + async def _noop_log(**_kwargs): + return None + + monkeypatch.setattr(sd, "log_activity", _noop_log) + + dispatcher = SessionDispatcher() + agent_id = str(uuid.uuid4()) + fake_ws = _FakeWS() + + from datetime import datetime, timezone + bridge = _Bridge( + agent_id=agent_id, + ws=fake_ws, # type: ignore[arg-type] + bridge_version="0.1.0", + adapters=["openclaw"], + capabilities={}, + connected_at=datetime.now(timezone.utc), + ) + dispatcher._bridges[agent_id] = bridge + + await dispatcher.detach_bridge(agent_id) + + assert agent_id not in dispatcher._bridges + assert fake_ws.close_calls == [] + + +@pytest.mark.asyncio +async def test_detach_bridge_noop_when_nothing_attached(): + # Rotation runs detach unconditionally — if the user rotates without a + # bridge attached, we must not raise. + dispatcher = SessionDispatcher() + # Should not raise. + await dispatcher.detach_bridge( + str(uuid.uuid4()), close_ws=True, reason="api_key_rotated", + ) + + +# ── Integration: rotate endpoint → dispatcher.detach_bridge ────────────── + + +@pytest.mark.asyncio +async def test_rotate_endpoint_evicts_bridge(monkeypatch): + # The key claim: rotating the API key kicks any attached bridge, not + # just rewriting the DB. Without this, a bridge that stole the old key + # keeps running sessions until it disconnects on its own. + user = make_user() + old_plaintext = "oc-old-abc" + old_hash = hashlib.sha256(old_plaintext.encode()).hexdigest() + agent = make_agent( + user.id, + agent_type="openclaw", + api_key=old_plaintext, + api_key_hash=old_hash, + ) + + async def fake_check(_db, _user, _aid): + return agent, "manage" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + monkeypatch.setattr(agents_api, "is_agent_creator", lambda _u, _a: True) + + # Record detach_bridge calls on the real dispatcher singleton. + from app.services.local_agent import session_dispatcher as sd + + calls: list[dict] = [] + + async def fake_detach(agent_id, close_ws=False, reason=""): + calls.append( + {"agent_id": agent_id, "close_ws": close_ws, "reason": reason}, + ) + + monkeypatch.setattr(sd.dispatcher, "detach_bridge", fake_detach) + + db = RecordingDB() + result = await agents_api.generate_or_reset_api_key( + agent_id=agent.id, + current_user=user, + db=db, + ) + + # Normal rotate postconditions still hold. + assert result["api_key"].startswith("oc-") + assert agent.api_key_hash != old_hash + assert db.commits == 1 + + # The new invariant: exactly one eviction targeted at this agent, with + # the flag set so the socket actually gets closed. + assert len(calls) == 1 + assert calls[0]["agent_id"] == str(agent.id) + assert calls[0]["close_ws"] is True + assert calls[0]["reason"] == "api_key_rotated" + + +@pytest.mark.asyncio +async def test_rotate_endpoint_still_succeeds_if_eviction_raises(monkeypatch): + # Eviction is best-effort — a failure here must not rollback the rotate + # or mask it from the operator. Pin the try/except around detach_bridge. + user = make_user() + agent = make_agent( + user.id, + agent_type="openclaw", + api_key="oc-old", + api_key_hash=hashlib.sha256(b"oc-old").hexdigest(), + ) + + async def fake_check(_db, _user, _aid): + return agent, "manage" + + monkeypatch.setattr(agents_api, "check_agent_access", fake_check) + monkeypatch.setattr(agents_api, "is_agent_creator", lambda _u, _a: True) + + from app.services.local_agent import session_dispatcher as sd + + async def boom(*_args, **_kwargs): + raise RuntimeError("dispatcher on fire") + + monkeypatch.setattr(sd.dispatcher, "detach_bridge", boom) + + db = RecordingDB() + result = await agents_api.generate_or_reset_api_key( + agent_id=agent.id, + current_user=user, + db=db, + ) + + # Rotate completed despite the eviction failure. + assert result["api_key"].startswith("oc-") + assert db.commits == 1 From 618ec2176c780940d8d48e997b78c9a5357170d5 Mon Sep 17 00:00:00 2001 From: zhongyua Date: Thu, 23 Apr 2026 16:18:06 +0800 Subject: [PATCH 15/15] fix(backend): guard ss-local config with isfile() so missing mount stops crashing startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the host-side ./ss-nodes.json is absent, Docker auto-creates the bind-mount source as a directory. The previous os.path.exists() check passed, then open() raised IsADirectoryError — surfaced as "Background task ss-local-proxy CRASHED" on every container start. isfile() treats the directory as absent and takes the clean skip branch. --- backend/app/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/app/main.py b/backend/app/main.py index d7e1efa1e..975c1824b 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -24,7 +24,10 @@ async def _start_ss_local() -> None: # Load proxy nodes from config file (gitignored, mounted as Docker volume) import json as _json cfg_file = os.environ.get("SS_CONFIG_FILE", "/data/ss-nodes.json") - if os.path.exists(cfg_file): + # isfile(), not exists(): Docker auto-creates the bind-mount source as a + # *directory* when the host-side file is missing, so a bare exists() check + # passes and the subsequent open() crashes with IsADirectoryError on boot. + if os.path.isfile(cfg_file): # Guard against empty or malformed config file — both produce a clear # warning and a clean exit rather than an unhandled JSONDecodeError. try: