-
Notifications
You must be signed in to change notification settings - Fork 123
Expand file tree
/
Copy pathagent.py
More file actions
2043 lines (1778 loc) · 87.9 KB
/
Copy pathagent.py
File metadata and controls
2043 lines (1778 loc) · 87.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT
"""
Chat Agent - Interactive chat with RAG and file search capabilities.
"""
import os
import platform
import sqlite3
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, ClassVar, Dict, List, Optional
try:
from watchdog.observers import Observer
except ImportError:
Observer = None
from gaia.agents.base.agent import Agent, default_max_steps
from gaia.agents.base.console import AgentConsole
from gaia.agents.base.memory import MemoryMixin
from gaia.agents.base.tool_loader import ToolLoader
from gaia.agents.base.tools import _TOOL_REGISTRY
from gaia_agent_chat.session import SessionManager
from gaia.agents.tools import FileToolsMixin
from gaia.agents.tools import FileSystemToolsMixin # Enhanced file system navigation
from gaia.agents.tools import ScratchpadToolsMixin # Structured data analysis
from gaia.agents.tools import ( # Web browsing and search; Shared tools
BrowserToolsMixin,
FileIOToolsMixin,
FileSearchToolsMixin,
RAGToolsMixin,
ScreenshotToolsMixin,
ShellToolsMixin,
)
from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME
from gaia.logger import get_logger
from gaia.mcp.mixin import MCPClientMixin
from gaia.rag.sdk import RAGSDK, RAGConfig
from gaia.sd.mixin import SDToolsMixin
from gaia.security import PathValidator
from gaia.utils.file_watcher import FileChangeHandler, check_watchdog_available
from gaia.vlm.mixin import VLMToolsMixin
logger = get_logger(__name__)
@dataclass
class ChatAgentConfig:
"""Configuration for ChatAgent."""
# LLM settings
use_claude: bool = False
use_chatgpt: bool = False
claude_model: str = "claude-sonnet-4-20250514"
base_url: Optional[str] = None
model_id: Optional[str] = None # None = use default model (Gemma)
# Execution settings
max_steps: int = field(default_factory=default_max_steps)
streaming: bool = False # Use --streaming to enable
# NPU's FLM build runs at 4K, so a device config can override the 32K ctx.
device: Optional[str] = None
min_context_size: Optional[int] = None
# Debug/output settings
debug: bool = False
debug_prompts: bool = False # Backward compatibility
show_prompts: bool = False
show_stats: bool = False
silent_mode: bool = False
output_dir: Optional[str] = None
# RAG settings
rag_documents: List[str] = field(default_factory=list)
library_documents: List[str] = field(
default_factory=list
) # Available but not auto-indexed
watch_directories: List[str] = field(default_factory=list)
chunk_size: int = 500
chunk_overlap: int = 100
max_chunks: int = 5
use_llm_chunking: bool = False # Use fast heuristic-based chunking by default
# Security
allowed_paths: Optional[List[str]] = None
# File System settings
enable_filesystem: bool = (
False # Enhanced file system tools (disabled until agent split)
)
enable_scratchpad: bool = (
False # Data scratchpad for analysis (disabled until agent split)
)
filesystem_index_path: str = "~/.gaia/file_index.db"
scratchpad_db_path: str = "~/.gaia/scratchpad.db"
filesystem_scan_depth: int = 3 # Default scan depth (conservative)
filesystem_exclude_patterns: List[str] = field(default_factory=list)
# Browser settings
enable_browser: bool = False # Web browsing tools (disabled until agent split)
browser_timeout: int = 30 # HTTP request timeout in seconds
browser_max_download_size: int = 100 * 1024 * 1024 # 100 MB max download
browser_rate_limit: float = 1.0 # Seconds between requests per domain
# Session persistence (UI session ID for cross-turn document retention)
ui_session_id: Optional[str] = None
# Optional capability flags (disabled by default to keep document Q&A focused)
enable_sd_tools: bool = False # Stable Diffusion image generation
# MCP settings.
# 50 default is the validated middle ground: covers the 49-tool MCP
# server tested on Gemma-4-E4B at 100% pass rate in PR #718 with one
# tool of headroom, and is 5× the previous 10 limit (which silently
# skipped any tool past index 10). Workflows with >50 tools should
# override explicitly; for larger sets the prompt bloat can hurt
# small-model accuracy and warrants its own validation run.
mcp_tool_limit: int = 50 # Max MCP tools to register (prevents context bloat)
# Prompt profile controls which tools and prompt sections are included.
# Profiles keep the system prompt lean for task-specific agents:
# "chat" — basic conversation only (personality, greetings, no RAG/file tools)
# "doc" — document Q&A with RAG tools + hallucination prevention
# "file" — file system operations, search, analysis
# "data" — data analysis, CSV, tables (scratchpad)
# "web" — web research, page fetching
# "full" — all tools and prompt sections (backward-compatible default)
prompt_profile: str = "full"
# Per-agent identity for the connectors activation filter (#1005).
# Must be set BEFORE ``Agent.__init__`` runs ``_register_tools``, because
# that's where ``_active_mcp_servers`` consults ``is_agent_active`` to
# decide which MCP servers' tools to surface. The registry's
# ``_wrap_factory_with_namespaced_id`` injects this via kwargs, and the
# UI's direct construction paths in ``_chat_helpers`` pass it explicitly.
# Leaving this ``None`` reproduces the pre-#1005 behaviour where the
# agent sees every connected MCP server unfiltered — keep it set for
# any built-in or registered Chat instance.
namespaced_agent_id: Optional[str] = None
class ChatAgent(
MemoryMixin,
Agent,
RAGToolsMixin,
FileToolsMixin,
ShellToolsMixin,
FileSystemToolsMixin,
ScratchpadToolsMixin,
BrowserToolsMixin,
FileSearchToolsMixin,
FileIOToolsMixin,
VLMToolsMixin,
ScreenshotToolsMixin,
SDToolsMixin,
MCPClientMixin,
):
"""
Chat Agent with RAG, file system navigation, data analysis, web browsing,
and shell capabilities.
This agent provides:
- Document Q&A using RAG
- File system browsing, search, and navigation
- Structured data analysis via SQLite scratchpad
- Web browsing, search, and file download
- Shell command execution
- Auto-indexing when files change
- Interactive chat interface
- Session persistence with auto-save
- MCP server integration
"""
# Dynamic MCP loader — registry exposes this for the Settings "Active for" panel.
CONSUMES_MCP_SERVERS: ClassVar[bool] = True
def __init__(self, config: Optional[ChatAgentConfig] = None):
"""
Initialize Chat Agent.
Args:
config: ChatAgentConfig object with all settings. If None, uses defaults.
"""
# Use provided config or create default
if config is None:
config = ChatAgentConfig()
# Stamp the per-agent identity for the connectors activation filter
# (#1005) BEFORE ``super().__init__`` runs ``_register_tools``. The
# MCP-tool registration step in ``_register_tools`` consults
# ``_active_mcp_servers`` which reads this attribute; setting it
# after super().__init__ would be too late and the filter would
# silently fall back to "ad-hoc agent — show every MCP server".
if config.namespaced_agent_id:
self._gaia_namespaced_agent_id = config.namespaced_agent_id
# Initialize path validator
self.path_validator = PathValidator(
config.allowed_paths,
on_prompt_start=lambda: self.console.pause_progress(), # pylint: disable=unnecessary-lambda
on_prompt_end=lambda: self.console.resume_progress(), # pylint: disable=unnecessary-lambda
)
# Store config for access in other methods
self.config = config
# Now use config for all initialization
# Store RAG configuration from config
self.rag_documents = config.rag_documents
self.library_documents = (
config.library_documents
) # Available but not auto-indexed
self.watch_directories = config.watch_directories
self.chunk_size = config.chunk_size
self.max_chunks = config.max_chunks
# Security: Configure allowed paths for file operations
# If None, allow current directory and subdirectories
if config.allowed_paths is None:
self.allowed_paths = [Path.cwd()]
else:
self.allowed_paths = [Path(p).resolve() for p in config.allowed_paths]
# Use the configured default model (Gemma) when no explicit model is set
effective_model_id = config.model_id or DEFAULT_MODEL_NAME
# Debug logging for model selection
logger.debug(
f"Model selection: model_id={repr(config.model_id)}, effective={effective_model_id}"
)
# Store model for display
self.model_display_name = effective_model_id
# Store max_chunks for adaptive retrieval
self.base_max_chunks = config.max_chunks
# Resolve effective base_url: config value > env var > default
effective_base_url = (
config.base_url
if config.base_url is not None
else os.getenv("LEMONADE_BASE_URL", "http://localhost:13305/api/v1")
)
# Initialize RAG SDK (optional - will be None if dependencies not installed)
try:
rag_config = RAGConfig(
model=effective_model_id,
chunk_size=config.chunk_size,
chunk_overlap=config.chunk_overlap, # Configurable overlap for context preservation
max_chunks=config.max_chunks,
show_stats=config.show_stats,
use_local_llm=not (config.use_claude or config.use_chatgpt),
use_llm_chunking=config.use_llm_chunking, # Enable semantic chunking
base_url=effective_base_url, # Pass base_url to RAG for VLM client
allowed_paths=config.allowed_paths, # Pass allowed paths to RAG SDK
)
self.rag = RAGSDK(rag_config)
except Exception as e:
logger.warning(
"RAG not available (install with: uv pip install -e '.[rag]'): %s", e
)
logger.debug("RAG init traceback:", exc_info=True)
self.rag = None
# File system monitoring
self.observers = []
self.file_handlers = [] # Track FileChangeHandler instances for telemetry
self.indexed_files = set()
# Initialize file system index service (optional)
self._fs_index = None
self._path_validator = self.path_validator
if config.enable_filesystem:
try:
from gaia.filesystem.index import FileSystemIndexService
self._fs_index = FileSystemIndexService(
db_path=config.filesystem_index_path
)
logger.info("File system index service initialized")
except (ImportError, OSError, sqlite3.Error) as e:
logger.warning(
"File system index not available: %s. "
"Disable with config.enable_filesystem=False to silence.",
e,
)
# Initialize scratchpad service (optional)
self._scratchpad = None
if config.enable_scratchpad:
try:
from gaia.scratchpad.service import ScratchpadService
self._scratchpad = ScratchpadService(db_path=config.scratchpad_db_path)
logger.info("Scratchpad service initialized")
except (ImportError, OSError, sqlite3.Error) as e:
logger.warning(
"Scratchpad service not available: %s. "
"Disable with config.enable_scratchpad=False to silence.",
e,
)
# Initialize web client for browser tools (optional)
self._web_client = None
if config.enable_browser:
try:
from gaia.web.client import WebClient
self._web_client = WebClient(
timeout=config.browser_timeout,
max_download_size=config.browser_max_download_size,
rate_limit=config.browser_rate_limit,
)
logger.info("Web client initialized for browser tools")
except (ImportError, OSError) as e:
logger.warning(
"Web client not available: %s. "
"Disable with config.enable_browser=False to silence.",
e,
)
# Session management
self.session_manager = SessionManager()
self.current_session = None
self.conversation_history: List[Dict[str, str]] = (
[]
) # Track conversation for persistence
# Tool loader controls which tool bundles are active per-session.
# Instantiate here so the agent can reset bundle activation when a
# new conversation/session is created.
self.tool_loader = ToolLoader()
# Initialize memory subsystem (before super().__init__ which calls _register_tools)
self.init_memory()
# Store base URL for use in _register_tools() (VLM, etc.)
self._base_url = effective_base_url
# MCP client manager — set up before super().__init__() because Agent.__init__()
# calls _register_tools() internally, and MCP tools are loaded there.
try:
from gaia.mcp.client.config import MCPConfig
from gaia.mcp.client.mcp_client_manager import MCPClientManager
self._mcp_manager = MCPClientManager(config=MCPConfig(), debug=config.debug)
except Exception as _e:
logger.debug("MCP not available: %s", _e)
self._mcp_manager = None
# Call parent constructor
super().__init__(
use_claude=config.use_claude,
use_chatgpt=config.use_chatgpt,
claude_model=config.claude_model,
base_url=effective_base_url,
model_id=effective_model_id, # Pass the effective model to parent
max_steps=config.max_steps,
debug_prompts=config.debug_prompts,
show_prompts=config.show_prompts,
output_dir=config.output_dir,
streaming=config.streaming,
show_stats=config.show_stats,
silent_mode=config.silent_mode,
debug=config.debug,
device=config.device,
min_context_size=(
config.min_context_size
if config.min_context_size is not None
else 32768
),
)
# Index initial documents (only if RAG is available)
if self.rag_documents and self.rag:
self._index_documents(self.rag_documents)
elif self.rag_documents and not self.rag:
logger.warning(
"RAG dependencies not installed. Cannot index documents. "
'Install with: uv pip install -e ".[rag]"'
)
# Restore agent-indexed documents from prior turns using UI session ID.
# When the agent indexes a document during a turn (via its index_document
# tool), it saves the path to a per-session JSON file. On subsequent turns
# a fresh ChatAgent instance is created, so we re-load those documents here
# to preserve cross-turn discovery (e.g. smart_discovery scenario).
if config.ui_session_id and self.rag:
loaded = self.session_manager.load_session(config.ui_session_id)
if loaded:
self.current_session = loaded
for doc_path in loaded.indexed_documents:
if doc_path not in self.indexed_files and os.path.exists(doc_path):
try:
real = os.path.realpath(doc_path)
if not hasattr(
self, "_is_path_allowed"
) or self._is_path_allowed(real):
result = self.rag.index_document(real)
if result.get("success"):
self.indexed_files.add(doc_path)
logger.info(
"Restored indexed doc from prior turn: %s",
doc_path,
)
except Exception as exc:
logger.warning(
"Failed to restore indexed doc %s: %s", doc_path, exc
)
else:
# First turn for this UI session — create a persistent agent session
self.current_session = self.session_manager.create_session(
config.ui_session_id
)
# New conversation started for this UI session; clear any
# session-scoped tool activations so bundles don't persist
# across distinct conversations.
try:
self.tool_loader.reset_session()
except Exception:
# Never fail agent init due to tool loader reset.
pass
# Start watching directories
if self.watch_directories:
self._start_watching()
def _post_process_tool_result(
self,
tool_name: str,
_tool_args: Dict[str, Any],
tool_result: Dict[str, Any],
) -> Optional[List[Dict[str, Any]]]:
"""
Post-process tool results for Chat Agent.
Handles RAG-specific debug information display, then delegates
to ``super()`` so the base class can set ``_single_tool_done``
for ``single_tool_per_turn=True`` agents on the success path.
Args:
tool_name: Name of the tool that was executed
_tool_args: Arguments that were passed to the tool (unused)
tool_result: Result returned by the tool
"""
# Handle RAG query debug information
if (
tool_name
in ["query_documents", "query_specific_file", "search_indexed_chunks"]
and isinstance(tool_result, dict)
and "debug_info" in tool_result
and self.debug
):
debug_info = tool_result.get("debug_info")
print("[DEBUG] RAG Query Debug Info:")
print(f" - Search keys: {debug_info.get('search_keys', [])}")
print(
f" - Total chunks found: {debug_info.get('total_chunks_before_dedup', 0)}"
)
print(
f" - After deduplication: {debug_info.get('total_chunks_after_dedup', 0)}"
)
print(
f" - Final chunks returned: {debug_info.get('final_chunks_returned', 0)}"
)
return super()._post_process_tool_result(tool_name, _tool_args, tool_result)
def _get_mixin_prompts(self) -> list[str]:
"""Auto-discover mixin prompts, but exclude SD unless actually initialized."""
prompts = super()._get_mixin_prompts()
# Remove SD prompt if SD was not explicitly initialized (saves ~1000 tokens)
if not hasattr(self, "sd_default_model"):
prompts = [p for p in prompts if "Stable Diffusion" not in p]
return prompts
def _get_system_prompt(self) -> str:
"""Generate the system prompt for the Chat Agent."""
# Get list of indexed documents
indexed_docs_section = ""
has_indexed = hasattr(self, "rag") and self.rag and self.rag.indexed_files
has_library = hasattr(self, "library_documents") and self.library_documents
if has_indexed:
doc_names = sorted({Path(fp).name for fp in self.rag.indexed_files})
n_docs = len(doc_names)
# When exactly one doc is indexed, references like "this document",
# "the document", "what is this about?" are unambiguous — answer
# from that doc without asking for clarification. The "ask which
# one" rule applies only when 2+ docs are indexed (#1030 follow-up:
# the trim accidentally weakened this case so Gemma started asking
# which document with only one indexed file present).
if n_docs == 1:
only = doc_names[0]
resolution_rule = (
f"**SINGLE-DOC RESOLUTION (CRITICAL):** Exactly one document "
f'is indexed: `{only}`. References like "this document", '
f'"the document", "the file", "it", "what is this '
f'about?", or any unqualified question ALL refer to '
f"`{only}`. NEVER ask the user to clarify which document — "
f"there is only one. Call `query_specific_file` "
f"with file_path=`{only}` immediately and answer from the "
f"retrieved chunks."
)
else:
resolution_rule = (
"**MULTI-DOC RESOLUTION:** Multiple documents are indexed. "
"If the user's question clearly names or implies one (e.g., "
'"the financial report", "the handbook"), `query_specific_file` '
'that one. If the question is vague ("summarize the doc", '
'"what does it say?") and could mean any of them, ask which '
"one before querying. For broad cross-doc questions, use "
"`query_documents` to search all indexed files at once."
)
indexed_docs_section = f"""
**CURRENTLY INDEXED DOCUMENTS:**
You have {n_docs} document(s) already indexed and ready to search:
{chr(10).join(f'- {name}' for name in doc_names)}
**MANDATORY RULE — RAG-FIRST:** When the user asks ANY question about the content, data, pricing, features, or details from these documents, you MUST call `query_documents` or `query_specific_file` BEFORE answering. Do NOT answer document-specific questions from your training knowledge — always retrieve from the indexed documents first.
{resolution_rule}
**ANTI-RE-INDEX RULE:** These documents are already indexed. Do NOT call `index_document` for any of these files again. Query them directly.
You do NOT need to check what's indexed first — this list is always up-to-date.
"""
elif has_library:
# Documents are in the library but NOT yet indexed.
# The agent should NOT auto-index them; let the user choose.
lib_entries = []
for fp in sorted(self.library_documents, key=lambda p: Path(p).name):
lib_entries.append(f"- {Path(fp).name} (path: {fp})")
indexed_docs_section = f"""
**DOCUMENT LIBRARY (not yet indexed):**
The user has {len(self.library_documents)} document(s) available in their library:
{chr(10).join(lib_entries)}
These documents are NOT yet loaded into the search index. To search a document, you must first index it using the index_document tool with the file path above.
**CRITICAL RULES:**
- Do NOT automatically index all documents. Only index what the user specifically asks about.
- When the user asks a vague question like "summarize a document" or "what does the document say", ALWAYS ask which document they want by listing the available documents above.
- When the user asks about a SPECIFIC document by name, index ONLY that document and then answer.
- When the user asks "what documents do you have?" or "what's indexed?", simply list the documents above. Do NOT trigger indexing.
- For general questions (greetings, knowledge questions), answer normally without indexing anything.
"""
else:
indexed_docs_section = """
**CURRENTLY INDEXED DOCUMENTS:**
No documents are currently indexed.
- For general questions and greetings: answer from your knowledge.
- For domain-specific questions: use the SMART DISCOVERY WORKFLOW below.
- Do NOT call query_documents or query_specific_file on empty indexes.
"""
# Build the prompt — single consolidated platform block (current OS only)
os_name = platform.system()
os_version = platform.version()
machine = platform.machine()
home_dir = str(Path.home())
if os_name == "Windows":
platform_block = f"""
**ENVIRONMENT:** Windows ({os_version}, {machine})
- Home directory: {home_dir}
- Use native Windows paths (e.g., C:\\Users\\user\\Desktop\\file.txt). NEVER use WSL/Unix paths.
- Common folders: Desktop, Documents, Downloads (under {home_dir})
- Shell: `systeminfo`, `tasklist`, `ipconfig`, `driverquery`
- Network: prefer `ipconfig`. Primary adapter has real Default Gateway — ignore virtual adapters.
- Process monitoring: `powershell -Command "Get-Process | Sort-Object WS -Descending | Select-Object -First 15 Name, Id, @{{N='Memory(MB)';E={{[math]::Round($_.WS/1MB,1)}}}}"`. Avoid `tasklist /V`.
- CPU: `powershell -Command "Get-CimInstance Win32_Processor | Select-Object Name"`
- GPU: `powershell -Command "Get-CimInstance Win32_VideoController | Format-List Name,DriverVersion,AdapterRAM"`
- Prefer `Get-CimInstance` over `wmic` (deprecated). Do NOT use Linux commands.
"""
elif os_name == "Darwin":
platform_block = f"""
**ENVIRONMENT:** macOS ({os_version}, {machine})
- Home directory: {home_dir}
- CPU: `sysctl -n machdep.cpu.brand_string`, GPU: `system_profiler SPDisplaysDataType`
- Version: `sw_vers`, kernel: `uname -a`
"""
else:
platform_block = f"""
**ENVIRONMENT:** {os_name} ({os_version}, {machine})
- Home directory: {home_dir}
- CPU: `lscpu`, GPU: `lspci | grep VGA`, Memory: `free -h`
"""
base_prompt = f"""You are GAIA — a personal AI running locally on the user's machine. Sharp, witty, genuinely fun. Think: the smartest person at the party, who's also nice.
{platform_block}
**WHO YOU ARE:**
- You're GAIA. Not "an AI assistant" or "a helpful tool" — just GAIA.
- Have opinions, share them. Be playful, lightly sarcastic, funny when it fits.
- Keep it short. Match length to question complexity. 1-2 sentences for greetings, simple lookups, capability questions. Multi-paragraph only for genuine analysis.
- Vary your phrasing on greetings — don't lock onto one canned opener. Never list features or tools unprompted in a greeting.
- **PREFER MEMORY OVER GENERIC** — if you have memories about the user (name, project, recent activity), USE THEM to personalize the greeting before falling back to a generic opener.
- **FACT-SHARING RULE:** When the user shares personal information ("I'm Sam", "I work at X"), acknowledge what they told you. NEVER reply with a generic "What are you working on?"
- Be honest and direct. No hedging, no "As an AI..." disclaimers, no sycophancy ("great question!", "what a wonderful idea!"). Push back respectfully when the user is wrong.
**NEVER:**
- Say "Certainly!", "Of course!", "Great question!", "I'd be happy to!", "How can I assist you today?"
- Describe your own capabilities or purpose unprompted.
- Start responses with "I" if you can avoid it.
- Output planning text before a tool call ("Let me check...", "I'll search for..."). Call the tool directly.
- End a turn with only a planning statement and no answer or tool call.
- Output tool-call syntax as text (e.g. "[tool:query_specific_file]"). Issue the actual JSON tool call.
- Answer capability questions ("what can you do?") with bullet lists — single paragraph, 1-2 sentences max.
**OUTPUT FORMATTING:** Use Markdown — **bold** for emphasis, `inline code` for paths/commands, bullet/numbered lists for enumerations, ### headings for long responses, tables for tabular data, code blocks for snippets. Keep responses scannable.
"""
# ── Tool usage rules (always present) ──
tool_rules = """
**TOOL USAGE:**
- Greetings, general knowledge, conversation → answer directly, no tools.
- Indexed documents present + question about their content → ALWAYS call `query_documents` or `query_specific_file` FIRST, then answer from results. Never answer document-specific questions from training knowledge.
- No documents indexed → answer from your knowledge. Don't call RAG tools on empty indexes.
- Files / system info questions → use the matching tool. Always show `display_message` fields when present.
**POST-INDEX QUERY RULE (mandatory):** After `index_document`, your next action MUST be `query_specific_file` or `query_documents`. Never answer from the filename. Never call `list_indexed_documents` and answer — it only returns filenames, not content.
**FILE SEARCH:** Start with a quick search (no `deep_search`). Use `deep_search=true` only if the user asks again after a quick search returns nothing. Multiple matches → show numbered list and let the user pick.
**NEVER FAKE TOOL OUTPUT:** Don't write JSON blocks in your reply text simulating tool results. If you need data, call the tool. Saying "I already retrieved X" without prior-turn evidence is confabulation.
"""
# ── Tier 1: Discovery workflow (compact form) ──
discovery_rules = """
**SMART DISCOVERY WORKFLOW:**
1. Domain question + nothing relevant indexed: `find_files` with 1-2 document-type keywords (handbook / report / manual / policy / guide), NOT the question's content terms. If nothing after 2 tries, `browse_directory`.
2. Files found → `index_document` immediately (no confirmation), then query and answer in the same turn.
3. Already indexed → query directly.
**NEVER ASK PERMISSION TO INDEX.** "Would you like me to index this?" is BANNED. If a document is referenced and you can locate it, index + query + answer in one flow.
**SEARCH LOOP PREVENTION:** Same `find_files` / `browse_directory` query twice with same result → STOP and acknowledge.
**VAGUE FOLLOW-UP ("what about X?"):** find_files("X") → index_document → query_specific_file with whatever question is implicit, or "key topics overview" if none.
"""
# ── Tier 1b: Optional tool sections — each block is only injected when
# the corresponding mixin was actually registered. Without this gating
# the LLM sees tool instructions for tools that don't exist and either
# hallucinates them or emits syntactically-valid tool calls that come
# back as "unknown tool" errors (#495 review feedback from @itomek-amd).
profile = getattr(self.config, "prompt_profile", "full")
filesystem_section = ""
if profile in ("file", "full") or getattr(
self.config, "enable_filesystem", False
):
filesystem_section = """
**FILE SYSTEM TOOLS:** browse_directory (list folder), tree (visual hierarchy), file_info (metadata), find_files (search by name/content/size/date/type), read_file (text/CSV/JSON/PDF), bookmark (save locations).
**FILE SEARCH AND AUTO-INDEX WORKFLOW:**
- Use 1-2 distinctive keywords, not full phrases. WRONG: find_files("Acme Corp API reference"). RIGHT: find_files("api").
- First call must NOT use `deep_search=true`. Quick search covers CWD, Documents, Downloads, Desktop.
- 1 hit + content question (any "what / how / who / when / where") → index + query + answer in one turn, no confirmation.
- Multiple hits → numbered list, user picks. Zero hits → try a synonym, then `browse_directory`.
- Always surface tool `display_message` fields to the user.
**DIRECTORY BROWSING WORKFLOW:** "what's in my Documents?" → `browse_directory`. "show me the project structure" → `tree`. Specific-file metadata → `file_info`. Save a frequently-used path → `bookmark`.
"""
scratchpad_section = ""
if profile in ("data", "full") or getattr(
self.config, "enable_scratchpad", False
):
scratchpad_section = """
**DATA ANALYSIS WORKFLOW (Scratchpad):** find_files → create_table → read_file + insert_data per doc → query_data (SQL: SUM/AVG/GROUP BY) → drop_table when done.
"""
browser_section = ""
if profile in ("web", "full") or getattr(self.config, "enable_browser", False):
browser_section = """
**BROWSER TOOLS:** search_web (DuckDuckGo, no key), fetch_page (extract readable text/links/tables), download_file (save URL locally; can then index_document).
"""
# Tail of Tier 1: indexing note kept separately so gated sections can
# be inserted between discovery_rules and this tail.
discovery_rules_tail = """
**DIRECTORY INDEXING:** User asks to index a folder → search_directory → show matches → index_directory → report results.
"""
# ── Tier 2: RAG query rules (only when documents are indexed) ──
# Compact directive form. Each rule is one or two short bullets — no
# multi-paragraph eval-survival walls. Together with `tool_rules` these
# carry the same imperative directives as the previous long form,
# without the per-rule example explosion that was inflating the prompt
# past Gemma's iGPU prompt-processing budget (#1030).
rag_query_rules = ""
if has_indexed:
rag_query_rules = """
**RAG ANSWERING RULES (documents are indexed):**
1. **FACTUAL ACCURACY RULE — always retrieve before answering.** Any factual question about indexed documents (numbers, dates, names, policies, sections) → call `query_specific_file` or `query_documents` first, then answer from the retrieved chunks. Don't answer from training knowledge, even if you "know" the topic. This applies on every turn — "indexed" means stored in the RAG index, NOT in your context window.
2. **Never invent content.** Quote numbers / dates / section refs verbatim from the retrieved chunks. Don't round, don't extrapolate, don't cite a section number you didn't see in a chunk. If the answer is not in the retrieved chunks, say "That's not in the document" and STOP — never supplement with "but approximately X" or "typically Y".
3. **Multi-fact requests → one query per fact.** If asked for 3 things, issue at least 3 targeted queries. Don't combine into one fuzzy query.
4. **Pick the right tool.** Specific document referenced → `query_specific_file`. Unsure which doc has the info → `query_documents`. Document overview / summary → `summarize_document` if available, else `query_specific_file(file, "overview summary key topics")`.
5. **Vague reference + 2+ docs indexed → ask which document first.** Once user disambiguates ("the financial one", "the second one") → query that doc immediately. Never re-index when disambiguation is the only thing missing.
6. **Tool loop prevention.** Same query terms returning the same chunks twice → STOP. After 2 unsuccessful retrieval attempts: acknowledge and answer with what you have. Pronouns ("that", "it") refer to data you ALREADY stated — check prior turn responses before issuing a new query.
7. **Conversation summary requests** ("what did you say?", "recap", "summarize what you told me") → answer from conversation history, not new tool calls. Repeat your prior facts verbatim — don't re-derive.
8. **Pushback on a correct answer** ("are you sure?") → restate firmly. Don't re-index. Don't soften.
9. **Computed values from prior turns are facts.** Don't re-derive a projection / total / range you already gave unless asked to recalculate.
10. **Source attribution.** When summarising answers across multiple docs, name the exact doc each fact came from. Don't conflate sources.
11. **Cross-turn doc reference** ("the file", "that document", "the python source") → already-indexed file from prior turn. Query directly, don't re-search.
12. **Negation scope.** If the doc says group X is NOT eligible for Y, never later extend "all employees" language to include X. The omission IS the answer.
13. **After every tool call, write the actual answer.** Never end on "I need to provide an answer..." — that's an internal thought, not a response.
"""
# ── Data analysis rules (compact form) ──
data_file_rules = """
**CSV / EXCEL DATA FILES:**
- Use `analyze_data_file` — NEVER `query_specific_file` / `query_documents` (RAG truncates rows).
- Pick params by question type:
- "Top X by metric" → `group_by="column"` (result: `top_1`, `group_by_results` sorted desc)
- "Total across all rows" → `analysis_type="summary"` (result: `summary.<col>.sum`)
- Time-bounded → add `date_range="YYYY-MM-DD:YYYY-MM-DD"`
- Read exact numbers from the result dict; never do mental arithmetic. Lead the answer with the specific metric the user asked for, not a "comprehensive summary" preamble.
**FILE BROWSING:** browse_directory (navigate), list_recent_files (recent), get_file_info (metadata).
**IMAGE GENERATION (when SD enabled):** Always CALL `generate_image` first. Don't pre-announce availability. If it errors, state unavailable in 1-2 sentences (mention `--sd` flag); don't apologize or describe what you would have done.
**UNSUPPORTED:** Email, scheduling, cloud storage, file conversion, live collaboration, video/audio analysis — say not available and link https://github.com/amd/gaia/issues/new?template=feature_request.md . Web browsing IS supported via `search_web` / `fetch_page` / `download_file`. Image analysis IS supported via `analyze_image`.
"""
# Assemble prompt based on profile
profile = getattr(self.config, "prompt_profile", "full")
if profile == "chat":
# Minimal: personality only — but respect explicitly enabled tools.
extras = filesystem_section + scratchpad_section + browser_section
return base_prompt + extras
if profile == "doc":
# Document Q&A: RAG tools + hallucination prevention
return (
base_prompt
+ indexed_docs_section
+ tool_rules
+ discovery_rules
+ discovery_rules_tail
+ rag_query_rules
)
if profile == "file":
# File operations: file system + search + discovery
return (
base_prompt
+ tool_rules
+ discovery_rules
+ filesystem_section
+ discovery_rules_tail
)
if profile == "data":
# Data analysis: scratchpad + file tools
return base_prompt + tool_rules + scratchpad_section + data_file_rules
if profile == "web":
# Web research: browser tools
return base_prompt + browser_section
# "full" — all sections (backward-compatible default)
return (
base_prompt
+ indexed_docs_section
+ tool_rules
+ discovery_rules
+ filesystem_section
+ scratchpad_section
+ browser_section
+ discovery_rules_tail
+ rag_query_rules
+ data_file_rules
)
def _create_console(self):
"""Create console for chat agent."""
from gaia.agents.base.console import SilentConsole
if self.silent_mode:
# For chat agent, we ALWAYS want to show the final answer
# Even in silent mode, the user needs to see the response
return SilentConsole(silence_final_answer=False)
return AgentConsole()
def _generate_search_keys(self, query: str) -> List[str]:
"""
Generate search keys from query for better retrieval.
Extracts keywords and reformulates query for improved matching.
Args:
query: User query
Returns:
List of search keys/queries
"""
keys = [query] # Always include original query
# Extract potential keywords (simple approach)
# Remove common words and extract meaningful terms
stop_words = {
"what",
"how",
"when",
"where",
"who",
"why",
"is",
"are",
"was",
"were",
"the",
"a",
"an",
"and",
"or",
"but",
"in",
"on",
"at",
"to",
"for",
"of",
"with",
"by",
"from",
"about",
"can",
"could",
"would",
"should",
"do",
"does",
"did",
"tell",
"me",
"you",
}
words = query.lower().split()
keywords = [
w.strip("?,.:;!")
for w in words
if w.lower() not in stop_words and len(w) > 2
]
# Add keyword-based query (only if different from original)
if keywords:
keyword_query = " ".join(keywords)
if keyword_query != query: # Avoid duplicates
keys.append(keyword_query)
# Add question reformulations for common patterns
if query.lower().startswith("what is"):
topic = query[8:].strip("?").strip()
keys.append(f"{topic} definition")
keys.append(f"{topic} explanation")
elif query.lower().startswith("how to"):
topic = query[7:].strip("?").strip()
keys.append(f"{topic} steps")
keys.append(f"{topic} guide")
logger.debug(f"Generated search keys: {keys}")
return keys
def _is_path_allowed(self, path: str) -> bool:
"""
Check if a path is within allowed directories.
Uses PathValidator for the actual check.
Args:
path: Path to validate
Returns:
True if path is allowed, False otherwise
"""
return self.path_validator.is_path_allowed(path, prompt_user=False)
def _validate_and_open_file(self, file_path: str, mode: str = "r"):
"""
Safely open a file with path validation using O_NOFOLLOW to prevent TOCTOU attacks.
This method prevents Time-of-Check-Time-of-Use vulnerabilities by:
1. Using O_NOFOLLOW flag to reject symlinks
2. Opening file with low-level os.open() before validation
3. Validating the opened file descriptor, not the path
Args:
file_path: Path to the file
mode: File open mode ('r', 'w', 'rb', 'wb', etc.)
Returns:
File handle if successful
Raises:
PermissionError: If path is not allowed or is a symlink
IOError: If file cannot be opened
"""
import stat
try:
# Determine open flags based on mode
if "r" in mode and "+" not in mode:
flags = os.O_RDONLY
elif "w" in mode:
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
elif "a" in mode:
flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
elif "+" in mode:
flags = os.O_RDWR
else:
flags = os.O_RDONLY
# CRITICAL: Add O_NOFOLLOW to reject symlinks
# This prevents TOCTOU attacks where symlinks are swapped
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
# Open the file at low level (doesn't follow symlinks with O_NOFOLLOW)
try:
fd = os.open(file_path, flags)
except OSError as e:
if e.errno == 40: # ELOOP - too many symbolic links
raise PermissionError(f"Symlinks not allowed: {file_path}")
raise IOError(f"Cannot open file {file_path}: {e}")
# Get the real path of the opened file descriptor
# On Linux, we can use /proc/self/fd/
# On other systems, use fstat
try:
file_stat = os.fstat(fd)
# Verify it's a regular file, not a directory or special file
if not stat.S_ISREG(file_stat.st_mode):
os.close(fd)
raise PermissionError(f"Not a regular file: {file_path}")
# Get the real path (Linux-specific, but works on most Unix)
if os.path.exists(f"/proc/self/fd/{fd}"):
real_path = Path(os.readlink(f"/proc/self/fd/{fd}")).resolve()
else:
# Fallback for non-Linux systems
real_path = Path(file_path).resolve()
# Validate the real path is within allowed directories
path_allowed = False
for allowed_path in self.allowed_paths:
try:
real_path.relative_to(allowed_path)
path_allowed = True
break
except ValueError:
continue
if not path_allowed:
os.close(fd)
raise PermissionError(
f"Access denied to path: {real_path}\n"
f"Requested: {file_path}\n"
f"Resolved to path outside allowed directories"
)
# Convert file descriptor to Python file object
if "b" in mode:
return os.fdopen(fd, mode)
else:
return os.fdopen(fd, mode, encoding="utf-8")
except Exception:
os.close(fd)
raise
except PermissionError:
raise
except Exception as e:
raise IOError(f"Failed to securely open file {file_path}: {e}")
def _auto_save_session(self) -> None:
"""Auto-save current session (called after important operations)."""
try:
if self.current_session:
self.save_current_session()