22"""
33QAAgent: A specialized agent for question answering with RAG capabilities.
44
5- This agent extends AliasAgentBase to provide GitHub MCP tools and RAG (Retrieval-Augmented Generation)
6- functionality for answering questions based on a knowledge base stored in Qdrant.
5+ This agent extends AliasAgentBase to provide GitHub MCP tools and
6+ RAG (Retrieval-Augmented Generation) for a knowledge base in Qdrant.
77"""
88import hashlib
99import os
10- import re
1110from pathlib import Path
12- from typing import TYPE_CHECKING , List , Optional , Union
11+ from typing import TYPE_CHECKING , List , Optional , Sequence , Union
1312
1413from loguru import logger
15-
16- if TYPE_CHECKING :
17- from alias .agent .tools import AliasToolkit
18-
1914from agentscope .embedding import DashScopeTextEmbedding
2015from agentscope .message import TextBlock
2116from agentscope .mcp import HttpStatelessClient
3126 split_faq_records ,
3227)
3328
29+ if TYPE_CHECKING :
30+ from alias .agent .tools import AliasToolkit
31+
3432# Qdrant configuration
3533QDRANT_HOST = "127.0.0.1"
3634QDRANT_PORT = 6333
@@ -59,27 +57,32 @@ def _get_default_system_prompt(name: str) -> str:
5957 """
6058 try :
6159 # Try to load from the built-in prompt file
62- prompt_file = Path (__file__ ).parent / "qa_agent_utils" / "build_in_prompt" / "qaagent_base_sys_prompt.md"
60+ prompt_file = (
61+ Path (__file__ ).parent
62+ / "qa_agent_utils"
63+ / "build_in_prompt"
64+ / "qaagent_base_sys_prompt.md"
65+ )
6366 if prompt_file .exists ():
6467 prompt = prompt_file .read_text (encoding = "utf-8" )
6568 return prompt .format (name = name )
6669 except Exception as e :
6770 logger .warning (f"Could not load default QA prompt: { e } " )
68-
71+
6972 # Fallback to a simple default prompt
7073 return (
7174 f"You are a helpful assistant named { name } .\n \n "
72- "**IMPORTANT**: When answering questions, you MUST use the `retrieve_knowledge` tool "
73- "to search for answers in the knowledge base FIRST before providing any answer. "
74- "Do not answer based solely on your training data if the question might be in the knowledge base.\n \n "
75+ "**IMPORTANT**: You MUST use the `retrieve_knowledge` tool to "
76+ "search the knowledge base FIRST before answering. "
77+ "Do not answer from training data alone if the question may be in "
78+ "the knowledge base.\n \n "
7579 "The `query` parameter is crucial for retrieval quality. "
76- "You may try multiple different queries to get the best results. "
77- "Adjust the `limit` and `score_threshold` parameters to control "
78- "the number and relevance of results.\n \n "
80+ "Try multiple queries; adjust `limit` and `score_threshold` "
81+ "for number and relevance of results.\n \n "
7982 )
8083
8184 @classmethod
82- async def create (
85+ async def create ( # pylint: disable=too-many-branches,too-many-statements
8386 cls ,
8487 name : str ,
8588 model : str = "qwen3-max" ,
@@ -98,11 +101,12 @@ async def create(
98101 model: The model name (e.g., "qwen3-max", "qwen-vl-max").
99102 system_prompt: The system prompt. If None, uses default prompt.
100103 tools: Tool names to register from worker_full_toolkit.
101- worker_full_toolkit: Optional. If provided, use this toolkit (same sandbox/share_tools as AliasAgentBase).
102- If None, create sandbox and full toolkit internally.
103- use_long_term_memory_service: Whether to enable long-term memory service.
104- file: List of file paths to process and add to the knowledge base. None to use default or skip.
105- collection_name: Name of the Qdrant collection for RAG. None to use default 'as_faq'.
104+ worker_full_toolkit: Optional. If provided, use this toolkit (same
105+ sandbox/share_tools as AliasAgentBase). If None, create
106+ sandbox and full toolkit internally.
107+ use_long_term_memory_service: Whether to enable long-term memory.
108+ file: List of file paths to process. None to use default or skip.
109+ collection_name: Qdrant collection. None = default 'as_faq'.
106110
107111 Returns:
108112 A configured QAAgent instance with RAG capabilities.
@@ -111,20 +115,24 @@ async def create(
111115 if file is not None and not isinstance (file , list ):
112116 raise ValueError ("file must be a list of file paths or None" )
113117
114- # Resolve collection_name for this agent (RAG tool will use this collection)
115- coll_name = collection_name if collection_name is not None else DEFAULT_COLLECTION_NAME
118+ # Resolve collection_name (RAG tool uses this collection)
119+ coll_name = (
120+ collection_name
121+ if collection_name is not None
122+ else DEFAULT_COLLECTION_NAME
123+ )
116124
117125 qdrant_running = check_container_running (QDRANT_CONTAINER_NAME )
118126
119127 if not qdrant_running :
120- # RAG not initialized: start Qdrant first, then init by (file, collection_name )
128+ # RAG not initialized: start Qdrant, init (file, collection )
121129 try :
122130 start_qdrant_container ()
123131 except Exception as e :
124132 logger .warning (f"Could not start Qdrant container: { e } " )
125133 logger .warning ("RAG functionality may not work properly" )
126134 else :
127- # Resolve (files to process, collection_name ) for initial load
135+ # Resolve (files to process, collection ) for initial load
128136 if file is None and collection_name is None :
129137 files_to_process = [DEFAULT_RAG_FILE_PATH ]
130138 init_collection = DEFAULT_COLLECTION_NAME
@@ -139,26 +147,34 @@ async def create(
139147 init_collection = collection_name
140148 await cls ._process_files (files_to_process , init_collection )
141149 else :
142- # Qdrant already running: collection_name is the one this agent will use
150+ # Qdrant running: collection_name is the one this agent uses
143151 if file :
144152 await cls ._process_files (file , coll_name )
145153 elif not collection_exists (coll_name ):
146154 logger .info (
147- f"Collection '{ coll_name } ' does not exist; using default file to populate." ,
155+ f"Collection '{ coll_name } ' does not exist; "
156+ "using default file to populate." ,
148157 )
149158 if DEFAULT_RAG_FILE_PATH .exists ():
150- await cls ._process_files ([DEFAULT_RAG_FILE_PATH ], coll_name )
159+ await cls ._process_files (
160+ [DEFAULT_RAG_FILE_PATH ],
161+ coll_name ,
162+ )
151163 else :
152- logger .warning (f"Default RAG file not found: { DEFAULT_RAG_FILE_PATH } " )
164+ logger .warning (
165+ f"Default RAG file not found: { DEFAULT_RAG_FILE_PATH } " ,
166+ )
153167
154168 # Use default system prompt if not provided
155169 if system_prompt is None :
156170 system_prompt = cls ._get_default_system_prompt (name )
157171
158- # Use caller's worker_full_toolkit, or build sandbox + full toolkit internally
172+ # Use worker_full_toolkit or build sandbox + toolkit internally
159173 if worker_full_toolkit is None :
160174 try :
161- from alias .runtime .alias_sandbox .alias_sandbox import AliasSandbox
175+ from alias .runtime .alias_sandbox .alias_sandbox import (
176+ AliasSandbox ,
177+ )
162178 from alias .agent .tools import AliasToolkit
163179 from alias .agent .tools .add_tools import add_tools
164180
@@ -168,7 +184,10 @@ async def create(
168184 try :
169185 await add_tools (worker_full_toolkit )
170186 except Exception as e :
171- logger .warning (f"add_tools failed: { e } ; continuing with sandbox tools only" )
187+ logger .warning (
188+ f"add_tools failed: { e } ; "
189+ "continuing with sandbox tools only" ,
190+ )
172191 logger .info ("Created sandbox and full toolkit for QAAgent" )
173192 except Exception as e :
174193 logger .warning (f"Could not create sandbox for QAAgent: { e } " )
@@ -192,7 +211,7 @@ async def create(
192211
193212 @staticmethod
194213 async def _process_files (
195- file_paths : List [Union [str , Path ]],
214+ file_paths : Sequence [Union [str , Path ]],
196215 collection_name : str ,
197216 ) -> None :
198217 """
@@ -202,7 +221,10 @@ async def _process_files(
202221 file_paths: List of file paths to process.
203222 collection_name: Name of the Qdrant collection to add documents to.
204223 """
205- logger .info (f"Processing { len (file_paths )} file(s) for collection '{ collection_name } '" )
224+ logger .info (
225+ f"Processing { len (file_paths )} file(s) "
226+ f"for collection '{ collection_name } '" ,
227+ )
206228
207229 # Create knowledge base instance
208230 knowledge = SimpleKnowledge (
@@ -245,12 +267,17 @@ async def _process_files(
245267 for faq_record in faq_records :
246268 # If the record is short enough, use it as-is
247269 if len (faq_record ) <= 2048 :
248- doc_id = hashlib .sha256 (faq_record .encode ("utf-8" )).hexdigest ()
270+ doc_id = hashlib .sha256 (
271+ faq_record .encode ("utf-8" ),
272+ ).hexdigest ()
249273 all_documents .append (
250274 Document (
251275 id = doc_id ,
252276 metadata = DocMetadata (
253- content = TextBlock (type = "text" , text = faq_record ),
277+ content = TextBlock (
278+ type = "text" ,
279+ text = faq_record ,
280+ ),
254281 doc_id = doc_id ,
255282 chunk_id = 0 ,
256283 total_chunks = 1 ,
@@ -269,10 +296,15 @@ async def _process_files(
269296 f"to collection '{ collection_name } '" ,
270297 )
271298 else :
272- logger .warning ("No documents were processed from the provided files" )
299+ logger .warning (
300+ "No documents were processed from the provided files" ,
301+ )
273302
274303 @staticmethod
275- async def _register_rag_tool (agent : "QAAgent" , collection_name : str ) -> None :
304+ async def _register_rag_tool (
305+ agent : "QAAgent" ,
306+ collection_name : str ,
307+ ) -> None :
276308 """
277309 Register the retrieve_knowledge tool for RAG.
278310
@@ -300,16 +332,17 @@ async def _register_rag_tool(agent: "QAAgent", collection_name: str) -> None:
300332 )
301333 agent .toolkit .register_tool_function (
302334 knowledge .retrieve_knowledge ,
303- func_description = ( # Provide a clear description for the tool
304- "Quickly retrieve answers to questions related to "
305- "the knowledge base. The `query` parameter is crucial "
306- "for retrieval quality."
307- "You may try multiple different queries to get the best "
308- "results. Adjust the `limit` and `score_threshold` "
309- "parameters to control the number and relevance of results."
335+ func_description = (
336+ "Quickly retrieve answers from the knowledge base. "
337+ "The `query` parameter is crucial for retrieval quality. "
338+ "Try multiple queries; adjust `limit` and "
339+ "`score_threshold` for relevance of results."
310340 ),
311341 )
312- logger .info (f"Registered retrieve_knowledge tool with collection '{ collection_name } '" )
342+ logger .info (
343+ f"Registered retrieve_knowledge tool "
344+ f"with collection '{ collection_name } '" ,
345+ )
313346 except Exception as e :
314347 print (traceback .format_exc ())
315348 raise e from None
@@ -328,8 +361,7 @@ async def _register_github_tools(agent: "QAAgent") -> None:
328361 if not github_token :
329362 logger .error (
330363 "Missing GITHUB_TOKEN; GitHub MCP tools cannot be used. "
331- "Please export GITHUB_TOKEN in your environment before "
332- "proceeding." ,
364+ "Please export GITHUB_TOKEN in your environment." ,
333365 )
334366 else :
335367 try :
0 commit comments