@@ -151,7 +151,7 @@ async def web_search_links_as_docs(queries, search_engines=_DEFAULT_SE,
151151 await on_search_complete_hook (urls )
152152
153153 logger .debug ("Downloading documents for URLS: \n \t -%s" , "\n \t -" .join (urls ))
154- docs = await _load_docs (urls , browser_semaphore , ** kwargs )
154+ docs = await load_docs (urls , browser_semaphore , ** kwargs )
155155 return docs
156156
157157
@@ -345,8 +345,28 @@ def _as_set(user_input):
345345 return set (user_input or [])
346346
347347
348- async def _load_docs (urls , browser_semaphore = None , ** kwargs ):
349- """Load a document for each input URL"""
348+ async def load_docs (urls , browser_semaphore = None , ** kwargs ):
349+ """Load a document for each input URL
350+
351+ Parameters
352+ ----------
353+ urls : iterable of str
354+ Iterable of URL's (as strings) to fetch.
355+ browser_semaphore : :class:`asyncio.Semaphore`, optional
356+ Semaphore instance that can be used to limit the number of
357+ playwright browsers open concurrently. If ``None``, no limits
358+ are applied. By default, ``None``.
359+ kwargs
360+ Keyword-argument pairs to initialize
361+ :class:`elm.web.file_loader.AsyncFileLoader`.
362+
363+ Returns
364+ -------
365+ list
366+ List of non-empty document instances containing information from
367+ the URL's. If a URL could not be fetched (i.e. document instance
368+ is empty), it will not be included in the output list.
369+ """
350370 logger .trace ("Downloading docs for the following URL's:\n %r" , urls )
351371 logger .trace ("kwargs for AsyncFileLoader:\n %s" ,
352372 pprint .PrettyPrinter ().pformat (kwargs ))
0 commit comments