diff --git a/docs/how_it_works.md b/docs/how_it_works.md
index 3602610..9416a70 100644
--- a/docs/how_it_works.md
+++ b/docs/how_it_works.md
@@ -24,7 +24,7 @@ Used to provide various configuration settings to the converter. They are as fol
- INLINE_LINKS for formatting images and links
- PROTECT_LINKS protect from line breaks
- GOOGLE_LIST_INDENT no of pixels to indent nested lists
- - IGNORE_ANCHORS
+ - IGNORE_LINKS
- IGNORE_IMAGES
- IMAGES_AS_HTML always generate HTML tags for images; preserves `height`, `width`, `alt` if possible.
- IMAGES_TO_ALT
diff --git a/docs/usage.md b/docs/usage.md
index a1758d3..9e31989 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -70,7 +70,7 @@ simple indications of their function.
- INLINE_LINKS for formatting images and links
- PROTECT_LINKS protect from line breaks
- GOOGLE_LIST_INDENT no of pixels to indent nested lists
- - IGNORE_ANCHORS
+ - IGNORE_LINKS
- IGNORE_IMAGES
- IMAGES_AS_HTML always generate HTML tags for images; preserves `height`, `width`, `alt` if possible.
- IMAGES_TO_ALT
diff --git a/html2text/__init__.py b/html2text/__init__.py
index d8e41a1..add0725 100644
--- a/html2text/__init__.py
+++ b/html2text/__init__.py
@@ -34,11 +34,39 @@
class HTML2Text(html.parser.HTMLParser):
+ init_params = [
+ "bypass_tables",
+ "close_quote",
+ "default_image_alt",
+ "escape_snob",
+ "google_list_indent",
+ "ignore_emphasis",
+ "ignore_images",
+ "ignore_links",
+ "ignore_tables",
+ "images_as_html",
+ "images_to_alt",
+ "images_with_size",
+ "inline_links",
+ "links_each_paragraph",
+ "mark_code",
+ "open_quote",
+ "pad_tables",
+ "protect_links",
+ "single_line_break",
+ "skip_internal_links",
+ "unicode_snob",
+ "use_automatic_links",
+ "wrap_links",
+ "wrap_list_items",
+ ]
+
def __init__(
self,
out: Optional[OutCallback] = None,
baseurl: str = "",
bodywidth: int = config.BODY_WIDTH,
+ **kwargs
) -> None:
"""
Input parameters:
@@ -52,37 +80,16 @@ def __init__(
self.split_next_td = False
self.td_count = 0
self.table_start = False
- self.unicode_snob = config.UNICODE_SNOB # covered in cli
- self.escape_snob = config.ESCAPE_SNOB # covered in cli
- self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH
- self.body_width = bodywidth # covered in cli
- self.skip_internal_links = config.SKIP_INTERNAL_LINKS # covered in cli
- self.inline_links = config.INLINE_LINKS # covered in cli
- self.protect_links = config.PROTECT_LINKS # covered in cli
- self.google_list_indent = config.GOOGLE_LIST_INDENT # covered in cli
- self.ignore_links = config.IGNORE_ANCHORS # covered in cli
- self.ignore_images = config.IGNORE_IMAGES # covered in cli
- self.images_as_html = config.IMAGES_AS_HTML # covered in cli
- self.images_to_alt = config.IMAGES_TO_ALT # covered in cli
- self.images_with_size = config.IMAGES_WITH_SIZE # covered in cli
- self.ignore_emphasis = config.IGNORE_EMPHASIS # covered in cli
- self.bypass_tables = config.BYPASS_TABLES # covered in cli
- self.ignore_tables = config.IGNORE_TABLES # covered in cli
- self.google_doc = False # covered in cli
- self.ul_item_mark = "*" # covered in cli
- self.emphasis_mark = "_" # covered in cli
+ self.google_doc = False
+ self.ul_item_mark = "*"
+ self.emphasis_mark = "_"
self.strong_mark = "**"
- self.single_line_break = config.SINGLE_LINE_BREAK # covered in cli
- self.use_automatic_links = config.USE_AUTOMATIC_LINKS # covered in cli
- self.hide_strikethrough = False # covered in cli
- self.mark_code = config.MARK_CODE
- self.wrap_list_items = config.WRAP_LIST_ITEMS # covered in cli
- self.wrap_links = config.WRAP_LINKS # covered in cli
- self.pad_tables = config.PAD_TABLES # covered in cli
- self.default_image_alt = config.DEFAULT_IMAGE_ALT # covered in cli
+ self.hide_strikethrough = False
self.tag_callback = None
- self.open_quote = config.OPEN_QUOTE # covered in cli
- self.close_quote = config.CLOSE_QUOTE # covered in cli
+ self.body_width = bodywidth
+
+ for param in self.init_params:
+ setattr(self, param, kwargs.get(param, getattr(config, param.upper())))
if out is None:
self.out = self.outtextf
@@ -939,9 +946,14 @@ def optwrap(self, text: str) -> str:
return result
-def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = None) -> str:
+def html2text(
+ html: str,
+ baseurl: str = "",
+ bodywidth: Optional[int] = None,
+ **kwargs: Optional[OutCallback]
+) -> str:
if bodywidth is None:
bodywidth = config.BODY_WIDTH
- h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
+ h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth, **kwargs)
return h.handle(html)
diff --git a/html2text/cli.py b/html2text/cli.py
index 30a362e..586ad96 100644
--- a/html2text/cli.py
+++ b/html2text/cli.py
@@ -63,7 +63,7 @@ class bcolors:
"--ignore-links",
dest="ignore_links",
action="store_true",
- default=config.IGNORE_ANCHORS,
+ default=config.IGNORE_LINKS,
help="don't include any formatting for links",
)
p.add_argument(
diff --git a/html2text/config.py b/html2text/config.py
index 2bb38b6..c01525d 100644
--- a/html2text/config.py
+++ b/html2text/config.py
@@ -37,7 +37,7 @@
# Values Google and others may use to indicate bold text
BOLD_TEXT_STYLE_VALUES = ("bold", "700", "800", "900")
-IGNORE_ANCHORS = False
+IGNORE_LINKS = False
IGNORE_IMAGES = False
IMAGES_AS_HTML = False
IMAGES_TO_ALT = False
diff --git a/test/test_html2text.py b/test/test_html2text.py
index 7bdd679..533852c 100644
--- a/test/test_html2text.py
+++ b/test/test_html2text.py
@@ -226,3 +226,15 @@ def _skip_certain_tags(h2t, tag, attrs, start):
"some italics too."
)
assert ret == ("this is a txt and this is a with text and some _italics_ too.\n\n")
+
+
+def test_kwargs_in_class():
+ h = html2text.HTML2Text(wrap_links=False)
+ assert h.wrap_links is False
+
+
+def test_kwargs_in_function():
+ test_data = "Foo"
+ wrapped = html2text.html2text(test_data, wrap_links=True)
+ unwrapped = html2text.html2text(test_data, wrap_links=False)
+ assert wrapped != unwrapped