@@ -29,7 +29,7 @@ def __init__(
2929 self .url = url
3030 self .css_selector = css_selector
3131
32- async def acrawl (self , url : str , css_selector : str | None = None ):
32+ async def crawl (self , url : str , css_selector : str | None = None ):
3333 async with AsyncWebCrawler (verbose = True ) as crawler :
3434 result = await crawler .arun (
3535 url ,
@@ -38,9 +38,6 @@ async def acrawl(self, url: str, css_selector: str | None = None):
3838
3939 return result
4040
41- def crawl (self , url : str , css_selector : str | None = None ):
42- return asyncio .run (self .acrawl (url , css_selector ))
43-
4441 def _process_result (self , result : CrawlResult ):
4542 if result .markdown is None :
4643 raise ValueError (f"No valid content found at { self .url } " )
@@ -52,25 +49,14 @@ def _process_result(self, result: CrawlResult):
5249
5350 return Document (page_content = result .markdown , metadata = metadata )
5451
55- def lazy_load (self ) -> Iterator [Document ]:
56- """Load HTML document into document objects."""
57- # First attempt loading with CSS selector if provided
58- result = self .crawl (self .url , self .css_selector )
59-
60- # Second attempt loading without CSS selector if first attempt failed
61- if result .markdown is None and self .css_selector is not None :
62- result = self .crawl (self .url )
63-
64- yield self ._process_result (result )
65-
6652 async def alazy_load (self ) -> AsyncIterator [Document ]:
6753 """Load HTML document into document objects."""
6854 # First attempt loading with CSS selector if provided
69- result = await self .acrawl (self .url , self .css_selector )
55+ result = await self .crawl (self .url , self .css_selector )
7056
7157 # Second attempt loading without CSS selector if first attempt failed
7258 if result .markdown is None and self .css_selector is not None :
73- result = self .crawl (self .url )
59+ result = await self .crawl (self .url )
7460
7561 yield self ._process_result (result )
7662
@@ -126,7 +112,11 @@ async def _extract_single_source(
126112 logger .info (f"Extracting content from { extract_from } " )
127113 loader = get_best_loader (extract_from )
128114
129- docs = await loader .aload () if use_async else loader .load ()
115+ docs = (
116+ await loader .aload ()
117+ if use_async or isinstance (loader , Crawl4AILoader )
118+ else loader .load ()
119+ )
130120
131121 content_parts = []
132122 for doc in docs :
0 commit comments