Skip to content

Commit 9ecbf6c

Browse files
committed
Fix tool execution guardrails: expand confirmation set and implement SSE confirmation flow (#565)
Expand TOOLS_REQUIRING_CONFIRMATION from just run_shell_command to all write/execute tools (write_file, edit_file, write_python_file, etc.). Implement the full backend-to-frontend confirmation flow: SSEOutputHandler now overrides confirm_tool_execution() to emit permission_request events and block until the frontend responds via a new POST /api/chat/confirm-tool endpoint. Frontend wires SSE events to the existing PermissionPrompt UI.
1 parent cc90935 commit 9ecbf6c

8 files changed

Lines changed: 134 additions & 11 deletions

File tree

src/gaia/agents/base/agent.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,17 @@
3434
# Tools that require explicit user confirmation before execution.
3535
# Adding a tool name here causes _execute_tool() to call
3636
# console.confirm_tool_execution() and block until the user responds.
37-
TOOLS_REQUIRING_CONFIRMATION = {"run_shell_command"}
37+
TOOLS_REQUIRING_CONFIRMATION = {
38+
"run_shell_command",
39+
"run_cli_command",
40+
"write_file",
41+
"write_python_file",
42+
"edit_file",
43+
"edit_python_file",
44+
"write_markdown_file",
45+
"replace_function",
46+
"update_gaia_md",
47+
}
3848

3949

4050
class Agent(abc.ABC):

src/gaia/apps/webui/src/components/ChatView.tsx

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { useEffect, useRef, useCallback, useState } from 'react';
55
import { Edit3, Paperclip, Download, Send, Upload, MessageSquare, Square, ArrowDown, Lock, FileText, FolderSearch, CheckCircle2, X } from 'lucide-react';
66
import { MessageBubble } from './MessageBubble';
77
import { useChatStore } from '../stores/chatStore';
8+
import { useNotificationStore } from '../stores/notificationStore';
89
import * as api from '../services/api';
910
import { log } from '../utils/logger';
1011
import { bugReportUrl } from './UnsupportedFeature';
@@ -23,10 +24,10 @@ const EMPTY_SUGGESTIONS = [
2324
*
2425
* Primary filtering happens server-side in sse_handler.py (see _TOOL_CALL_JSON_RE).
2526
* This frontend regex is a secondary safety net in case any tool-call JSON leaks
26-
* through the SSE stream. The canonical pattern is defined in sse_handler.py;
27-
* keep this in sync if the server-side pattern changes.
27+
* through the SSE stream. Uses quote-aware matching to avoid stripping answer
28+
* content from JSON-wrapped responses. Keep in sync with the server-side pattern.
2829
*/
29-
const TOOL_CALL_JSON_SAFETY_RE = /\s*\{\s*"?(?:tool|thought|goal)"?\s*:\s*"[^"]*"[^}]*(?:"?tool_args"?\s*:\s*\{[^}]*\})?\s*\}/g;
30+
const TOOL_CALL_JSON_SAFETY_RE = /\s*\{\s*"?(?:tool|thought|goal)"?\s*:\s*"[^"]*"(?:\s*,\s*"[^"]*"\s*:\s*(?:"(?:[^"\\]|\\.)*"|[^,}]*))*\s*\}/g;
3031

3132
/**
3233
* Strip the LLM JSON envelope from streamed/accumulated content.
@@ -684,6 +685,27 @@ export function ChatView({ sessionId }: ChatViewProps) {
684685
return; // Step headers are redundant with actual tool/thinking steps
685686
}
686687

688+
// Permission request — push to notification store for the
689+
// PermissionPrompt overlay, which calls confirmTool() on response.
690+
if (event.type === 'permission_request') {
691+
const { addNotification } = useNotificationStore.getState();
692+
addNotification({
693+
id: `perm-${Date.now()}`,
694+
type: 'permission_request',
695+
agentId: sessionId,
696+
agentName: 'GAIA Agent',
697+
title: `Tool: ${event.tool}`,
698+
message: `The agent wants to run "${event.tool}". Allow?`,
699+
timestamp: Date.now(),
700+
read: false,
701+
dismissed: false,
702+
priority: 'high',
703+
tool: event.tool,
704+
toolArgs: event.args,
705+
});
706+
return;
707+
}
708+
687709
const step = agentEventToStep(event, stepIdRef);
688710
if (step) {
689711
addAgentStep(step);

src/gaia/apps/webui/src/services/api.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ export async function deleteMessagesFrom(sessionId: string, messageId: number):
111111
return apiFetch('DELETE', `/sessions/${sessionId}/messages/${messageId}/and-below`);
112112
}
113113

114+
// -- Tool Confirmation --------------------------------------------------------
115+
116+
export async function confirmTool(sessionId: string, approved: boolean): Promise<{ status: string; approved: boolean }> {
117+
return apiFetch('POST', '/chat/confirm-tool', { session_id: sessionId, approved });
118+
}
119+
114120
// -- Chat (Streaming with Agent Events) ----------------------------------------
115121

116122
/**
@@ -134,6 +140,7 @@ export interface StreamCallbacks {
134140
const AGENT_EVENT_TYPES = new Set([
135141
'status', 'step', 'thinking', 'plan',
136142
'tool_start', 'tool_end', 'tool_result', 'tool_args', 'agent_error',
143+
'permission_request',
137144
]);
138145

139146
export function sendMessageStream(

src/gaia/apps/webui/src/stores/notificationStore.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import { create } from 'zustand';
1313
import type { GaiaNotification, NotificationType } from '../types/agent';
14+
import { confirmTool } from '../services/api';
1415

1516
// ── Constants ────────────────────────────────────────────────────────────
1617

@@ -78,18 +79,29 @@ export const useNotificationStore = create<NotificationState>((set, get) => ({
7879
setTypeFilter: (type) => set({ typeFilter: type }),
7980

8081
respondToPermission: async (id, action, remember) => {
81-
const api = window.gaiaAPI;
82-
if (api) {
82+
// Find the notification to get the session ID for the REST call
83+
const notification = get().notifications.find((n) => n.id === id);
84+
const sessionId = notification?.agentId;
85+
86+
// Try Electron IPC first, then fall back to REST API
87+
const electronApi = window.gaiaAPI;
88+
if (electronApi?.notification?.respondPermission) {
8389
try {
84-
await api.notification.respondPermission(id, action, remember);
90+
await electronApi.notification.respondPermission(id, action, remember);
8591
} catch (err) {
8692
console.error('[notificationStore] Failed to send permission response via IPC:', err);
87-
// Don't update local state — the agent didn't receive the response.
88-
// The permission prompt remains actionable so the user can retry.
93+
return;
94+
}
95+
} else if (sessionId) {
96+
// Web browser mode — call the REST endpoint
97+
try {
98+
await confirmTool(sessionId, action === 'allow');
99+
} catch (err) {
100+
console.error('[notificationStore] Failed to send permission response via REST:', err);
89101
return;
90102
}
91103
}
92-
// Update local state only after IPC succeeds (or if no IPC is available)
104+
// Update local state only after backend confirms
93105
set((state) => ({
94106
notifications: state.notifications.map((n) =>
95107
n.id === id

src/gaia/apps/webui/src/types/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ export type StreamEventType =
184184
| 'tool_result' // Tool result summary
185185
| 'tool_args' // Tool arguments detail
186186
| 'answer' // Final answer from agent
187-
| 'agent_error';// Agent-level error (non-fatal)
187+
| 'agent_error' // Agent-level error (non-fatal)
188+
| 'permission_request'; // Tool confirmation request
188189

189190
export interface StreamEvent {
190191
type: StreamEventType;

src/gaia/ui/_chat_helpers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424

2525
logger = logging.getLogger(__name__)
2626

27+
# Active SSE handlers keyed by session_id. The /api/chat/confirm-tool
28+
# endpoint looks up the handler here to resolve a pending confirmation.
29+
_active_sse_handlers: dict = {} # session_id -> SSEOutputHandler
30+
2731

2832
# ── Chat Helpers ─────────────────────────────────────────────────────────────
2933

@@ -215,6 +219,9 @@ async def _stream_chat_response(db: ChatDatabase, session: dict, request: ChatRe
215219
# Create SSE handler first and emit immediate feedback BEFORE the
216220
# slow ChatAgent construction (RAG indexing, LLM connection can take 10-30s)
217221
sse_handler = SSEOutputHandler()
222+
# Register so /api/chat/confirm-tool can find this handler.
223+
session_id = request.session_id
224+
_active_sse_handlers[session_id] = sse_handler
218225
sse_handler._emit(
219226
{"type": "status", "status": "info", "message": "Connecting to LLM..."}
220227
)
@@ -554,6 +561,7 @@ def _run_agent():
554561

555562
# Signal cancellation (handles client disconnect) then wait for producer
556563
sse_handler.cancelled.set()
564+
_active_sse_handlers.pop(session_id, None)
557565
producer.join(timeout=5.0)
558566
if producer.is_alive():
559567
logger.warning("Producer thread still running after stream ended")

src/gaia/ui/routers/chat.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from fastapi import APIRouter, Depends, HTTPException, Request
1818
from fastapi.responses import StreamingResponse
1919

20+
from pydantic import BaseModel
21+
2022
from ..database import ChatDatabase
2123
from ..dependencies import get_db
2224
from ..models import ChatRequest, ChatResponse
@@ -133,3 +135,30 @@ async def _guarded_stream():
133135
# the streaming generator took ownership.
134136
if not sem_released:
135137
chat_semaphore.release()
138+
139+
140+
class ToolConfirmRequest(BaseModel):
141+
"""Request body for the tool confirmation endpoint."""
142+
143+
session_id: str
144+
approved: bool
145+
146+
147+
@router.post("/api/chat/confirm-tool")
148+
async def confirm_tool(request: ToolConfirmRequest):
149+
"""Respond to a tool confirmation prompt from the agent.
150+
151+
The agent blocks in ``SSEOutputHandler.confirm_tool_execution()`` until
152+
this endpoint is called. The frontend triggers this when the user
153+
clicks Allow or Deny on the PermissionPrompt overlay.
154+
"""
155+
from .._chat_helpers import _active_sse_handlers
156+
157+
handler = _active_sse_handlers.get(request.session_id)
158+
if not handler:
159+
raise HTTPException(
160+
status_code=404,
161+
detail="No active chat session found for this session ID",
162+
)
163+
handler.resolve_tool_confirmation(request.approved)
164+
return {"status": "ok", "approved": request.approved}

src/gaia/ui/sse_handler.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ def __init__(self):
7171
self._tool_count = 0
7272
self._last_tool_name: Optional[str] = None
7373
self._stream_buffer = "" # Buffer to detect and filter tool-call JSON
74+
# Tool confirmation gate: confirm_tool_execution() blocks on this
75+
# event until the frontend responds via the /api/chat/confirm-tool endpoint.
76+
self._confirm_event = threading.Event()
77+
self._confirm_result: Optional[bool] = None
7478

7579
def _emit(self, event: Dict[str, Any]):
7680
"""Push an event to the queue for SSE delivery."""
@@ -515,6 +519,36 @@ def print_streaming_text(self, text_chunk: str, end_of_stream: bool = False):
515519
self._emit({"type": "chunk", "content": self._stream_buffer})
516520
self._stream_buffer = ""
517521

522+
def confirm_tool_execution(
523+
self, tool_name: str, tool_args: Dict[str, Any]
524+
) -> bool:
525+
"""Emit a permission_request event and block until the frontend responds.
526+
527+
The /api/chat/confirm-tool endpoint calls ``resolve_tool_confirmation()``
528+
which sets ``_confirm_result`` and signals ``_confirm_event``.
529+
"""
530+
self._confirm_event.clear()
531+
self._confirm_result = None
532+
self._emit(
533+
{
534+
"type": "permission_request",
535+
"tool": tool_name,
536+
"args": tool_args,
537+
}
538+
)
539+
# Block the agent thread until the frontend responds or the stream
540+
# is cancelled. Poll in 0.5 s increments so cancellation is responsive.
541+
while not self._confirm_event.is_set():
542+
if self.cancelled.is_set():
543+
return False
544+
self._confirm_event.wait(timeout=0.5)
545+
return bool(self._confirm_result)
546+
547+
def resolve_tool_confirmation(self, approved: bool) -> None:
548+
"""Called by the /api/chat/confirm-tool endpoint to unblock the agent."""
549+
self._confirm_result = approved
550+
self._confirm_event.set()
551+
518552
def signal_done(self):
519553
"""Signal that the agent has finished processing."""
520554
# Flush any remaining stream buffer before signaling done

0 commit comments

Comments
 (0)