@@ -119,21 +119,41 @@ def add_file(self, file_path: str, description: str, knowledge_base: str = None)
119119 """
120120 self .collection .add_file (self .name , file_path , description , knowledge_base )
121121
122- def add_webpages (self , urls : List [str ], description : str , knowledge_base : str = None ):
122+ def add_webpages (
123+ self ,
124+ urls : List [str ],
125+ description : str ,
126+ knowledge_base : str = None ,
127+ crawl_depth : int = 1 ,
128+ filters : List [str ] = None ):
123129 """
124- Add a list of crawled URLs to the agent for retrieval.
130+ Add a crawled URL to the agent for retrieval.
125131
126- :param urls: List of URLs to be crawled and added.
132+ :param urls: URLs of pages to be crawled and added.
133+ :param description: Description of the webpages. Used by agent to know when to do retrieval.
134+ :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
135+ :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max
136+ :param filters: Include only URLs that match these regex patterns
127137 """
128- self .collection .add_webpages (self .name , urls , description , knowledge_base )
138+ self .collection .add_webpages (self .name , urls , description , knowledge_base = knowledge_base , crawl_depth = crawl_depth , filters = filters )
129139
130- def add_webpage (self , url : str , description : str , knowledge_base : str = None ):
140+ def add_webpage (
141+ self ,
142+ url : str ,
143+ description : str ,
144+ knowledge_base : str = None ,
145+ crawl_depth : int = 1 ,
146+ filters : List [str ] = None ):
131147 """
132148 Add a crawled URL to the agent for retrieval.
133149
134150 :param url: URL of the page to be crawled and added.
151+ :param description: Description of the webpages. Used by agent to know when to do retrieval.
152+ :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
153+ :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max
154+ :param filters: Include only URLs that match these regex patterns
135155 """
136- self .collection .add_webpage (self .name , url , description , knowledge_base )
156+ self .collection .add_webpage (self .name , url , description , knowledge_base = knowledge_base , crawl_depth = crawl_depth , filters = filters )
137157
138158 def add_database (self , database : str , tables : List [str ], description : str ):
139159 """
@@ -313,14 +333,24 @@ def add_file(self, name: str, file_path: str, description: str, knowledge_base:
313333 """
314334 self .add_files (name , [file_path ], description , knowledge_base )
315335
316- def add_webpages (self , name : str , urls : List [str ], description : str , knowledge_base : str = None ):
336+ def add_webpages (
337+ self ,
338+ name : str ,
339+ urls : List [str ],
340+ description : str ,
341+ knowledge_base : str = None ,
342+ crawl_depth : int = 1 ,
343+ filters : List [str ] = None
344+ ):
317345 """
318346 Add a list of webpages to the agent for retrieval.
319347
320348 :param name: Name of the agent
321349 :param urls: List of URLs of the webpages to be added.
322350 :param description: Description of the webpages. Used by agent to know when to do retrieval.
323351 :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
352+ :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
353+ :param filters: Include only URLs that match these regex patterns
324354 """
325355 if not urls :
326356 return
@@ -339,7 +369,7 @@ def add_webpages(self, name: str, urls: List[str], description: str, knowledge_b
339369 kb = self ._create_default_knowledge_base (agent , kb_name )
340370
341371 # Insert crawled webpage.
342- kb .insert_webpages (urls )
372+ kb .insert_webpages (urls , crawl_depth = crawl_depth , filters = filters )
343373
344374 # Make sure skill name is unique.
345375 skill_name = f'{ domain } { path } _retrieval_skill_{ uuid4 ()} '
@@ -351,16 +381,25 @@ def add_webpages(self, name: str, urls: List[str], description: str, knowledge_b
351381 agent .skills .append (webpage_retrieval_skill )
352382 self .update (agent .name , agent )
353383
354- def add_webpage (self , name : str , url : str , description : str , knowledge_base : str = None ):
384+ def add_webpage (
385+ self ,
386+ name : str ,
387+ url : str ,
388+ description : str ,
389+ knowledge_base : str = None ,
390+ crawl_depth : int = 1 ,
391+ filters : List [str ] = None ):
355392 """
356393 Add a webpage to the agent for retrieval.
357394
358395 :param name: Name of the agent
359396 :param file_path: URL of the webpage to be added, or name of existing webpage.
360397 :param description: Description of the webpage. Used by agent to know when to do retrieval.
361398 :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
399+ :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
400+ :param filters: Include only URLs that match these regex patterns
362401 """
363- self .add_webpages (name , [url ], description , knowledge_base )
402+ self .add_webpages (name , [url ], description , knowledge_base = knowledge_base , crawl_depth = crawl_depth , filters = filters )
364403
365404 def add_database (self , name : str , database : str , tables : List [str ], description : str ):
366405 """
0 commit comments