Skip to content

Commit 13c8af8

Browse files
committed
Move func
1 parent 8d6ad6d commit 13c8af8

1 file changed

Lines changed: 36 additions & 36 deletions

File tree

elm/web/search/run.py

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,42 @@ async def search_with_fallback(queries, search_engines=_DEFAULT_SE,
249249
return set()
250250

251251

252+
async def load_docs(urls, browser_semaphore=None, **kwargs):
253+
"""Load a document for each input URL
254+
255+
Parameters
256+
----------
257+
urls : iterable of str
258+
Iterable of URL's (as strings) to fetch.
259+
browser_semaphore : :class:`asyncio.Semaphore`, optional
260+
Semaphore instance that can be used to limit the number of
261+
playwright browsers open concurrently for document retrieval. If
262+
``None``, no limits are applied. By default, ``None``.
263+
kwargs
264+
Keyword-argument pairs to initialize
265+
:class:`elm.web.file_loader.AsyncFileLoader`.
266+
267+
Returns
268+
-------
269+
list
270+
List of non-empty document instances containing information from
271+
the URL's. If a URL could not be fetched (i.e. document instance
272+
is empty), it will not be included in the output list.
273+
"""
274+
logger.trace("Downloading docs for the following URL's:\n%r", urls)
275+
logger.trace("kwargs for AsyncFileLoader:\n%s",
276+
pprint.PrettyPrinter().pformat(kwargs))
277+
file_loader = AsyncFileLoader(browser_semaphore=browser_semaphore,
278+
**kwargs)
279+
docs = await file_loader.fetch_all(*urls)
280+
281+
page_lens = {doc.attrs.get("source", "Unknown"): len(doc.pages)
282+
for doc in docs}
283+
logger.debug("Loaded the following number of pages for docs:\n%s",
284+
pprint.PrettyPrinter().pformat(page_lens))
285+
return [doc for doc in docs if not doc.empty]
286+
287+
252288
async def _single_se_search(se_name, queries, num_urls, ignore_url_parts,
253289
browser_sem, task_name, kwargs):
254290
"""Search for links using a single search engine"""
@@ -342,39 +378,3 @@ def _as_set(user_input):
342378
if isinstance(user_input, str):
343379
user_input = {user_input}
344380
return set(user_input or [])
345-
346-
347-
async def load_docs(urls, browser_semaphore=None, **kwargs):
348-
"""Load a document for each input URL
349-
350-
Parameters
351-
----------
352-
urls : iterable of str
353-
Iterable of URL's (as strings) to fetch.
354-
browser_semaphore : :class:`asyncio.Semaphore`, optional
355-
Semaphore instance that can be used to limit the number of
356-
playwright browsers open concurrently for document retrieval. If
357-
``None``, no limits are applied. By default, ``None``.
358-
kwargs
359-
Keyword-argument pairs to initialize
360-
:class:`elm.web.file_loader.AsyncFileLoader`.
361-
362-
Returns
363-
-------
364-
list
365-
List of non-empty document instances containing information from
366-
the URL's. If a URL could not be fetched (i.e. document instance
367-
is empty), it will not be included in the output list.
368-
"""
369-
logger.trace("Downloading docs for the following URL's:\n%r", urls)
370-
logger.trace("kwargs for AsyncFileLoader:\n%s",
371-
pprint.PrettyPrinter().pformat(kwargs))
372-
file_loader = AsyncFileLoader(browser_semaphore=browser_semaphore,
373-
**kwargs)
374-
docs = await file_loader.fetch_all(*urls)
375-
376-
page_lens = {doc.attrs.get("source", "Unknown"): len(doc.pages)
377-
for doc in docs}
378-
logger.debug("Loaded the following number of pages for docs:\n%s",
379-
pprint.PrettyPrinter().pformat(page_lens))
380-
return [doc for doc in docs if not doc.empty]

0 commit comments

Comments
 (0)