feat: Add BeautifulSoupParser type alias (#674)

Pijukatel · web-flow · commit b2cf88ffea8d · 2024-11-11T12:57:53.000+01:00
To avoid repeating same Literal definitions.
diff --git a/src/crawlee/beautifulsoup_crawler/__init__.py b/src/crawlee/beautifulsoup_crawler/__init__.py
@@ -1,10 +1,10 @@
 try:
-    from ._beautifulsoup_crawler import BeautifulSoupCrawler
+    from ._beautifulsoup_crawler import BeautifulSoupCrawler, BeautifulSoupParser
     from ._beautifulsoup_crawling_context import BeautifulSoupCrawlingContext
 except ImportError as exc:
     raise ImportError(
         "To import anything from this subpackage, you need to install the 'beautifulsoup' extra."
         "For example, if you use pip, run `pip install 'crawlee[beautifulsoup]'`.",
     ) from exc
 
-__all__ = ['BeautifulSoupCrawler', 'BeautifulSoupCrawlingContext']
+__all__ = ['BeautifulSoupCrawler', 'BeautifulSoupCrawlingContext', 'BeautifulSoupParser']
diff --git a/src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py b/src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py
@@ -21,6 +21,8 @@
 if TYPE_CHECKING:
     from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs
 
+BeautifulSoupParser = Literal['html.parser', 'lxml', 'xml', 'html5lib']
+
 
 class BeautifulSoupCrawler(BasicCrawler[BeautifulSoupCrawlingContext]):
     """A web crawler for performing HTTP requests and parsing HTML/XML content.
@@ -61,7 +63,7 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
     def __init__(
         self,
         *,
-        parser: Literal['html.parser', 'lxml', 'xml', 'html5lib'] = 'lxml',
+        parser: BeautifulSoupParser = 'lxml',
         additional_http_error_status_codes: Iterable[int] = (),
         ignore_http_error_status_codes: Iterable[int] = (),
         **kwargs: Unpack[BasicCrawlerOptions[BeautifulSoupCrawlingContext]],