Skip to content

Commit b2cf88f

Browse files
authored
feat: Add BeautifulSoupParser type alias (#674)
To avoid repeating same Literal definitions.
1 parent a002769 commit b2cf88f

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
try:
2-
from ._beautifulsoup_crawler import BeautifulSoupCrawler
2+
from ._beautifulsoup_crawler import BeautifulSoupCrawler, BeautifulSoupParser
33
from ._beautifulsoup_crawling_context import BeautifulSoupCrawlingContext
44
except ImportError as exc:
55
raise ImportError(
66
"To import anything from this subpackage, you need to install the 'beautifulsoup' extra."
77
"For example, if you use pip, run `pip install 'crawlee[beautifulsoup]'`.",
88
) from exc
99

10-
__all__ = ['BeautifulSoupCrawler', 'BeautifulSoupCrawlingContext']
10+
__all__ = ['BeautifulSoupCrawler', 'BeautifulSoupCrawlingContext', 'BeautifulSoupParser']

src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
if TYPE_CHECKING:
2222
from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs
2323

24+
BeautifulSoupParser = Literal['html.parser', 'lxml', 'xml', 'html5lib']
25+
2426

2527
class BeautifulSoupCrawler(BasicCrawler[BeautifulSoupCrawlingContext]):
2628
"""A web crawler for performing HTTP requests and parsing HTML/XML content.
@@ -61,7 +63,7 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
6163
def __init__(
6264
self,
6365
*,
64-
parser: Literal['html.parser', 'lxml', 'xml', 'html5lib'] = 'lxml',
66+
parser: BeautifulSoupParser = 'lxml',
6567
additional_http_error_status_codes: Iterable[int] = (),
6668
ignore_http_error_status_codes: Iterable[int] = (),
6769
**kwargs: Unpack[BasicCrawlerOptions[BeautifulSoupCrawlingContext]],

0 commit comments

Comments
 (0)