34
34
35
35
36
36
class HTML2Text (html .parser .HTMLParser ):
37
+ init_params = [
38
+ "bypass_tables" ,
39
+ "close_quote" ,
40
+ "default_image_alt" ,
41
+ "escape_snob" ,
42
+ "google_list_indent" ,
43
+ "ignore_emphasis" ,
44
+ "ignore_images" ,
45
+ "ignore_links" ,
46
+ "ignore_tables" ,
47
+ "images_as_html" ,
48
+ "images_to_alt" ,
49
+ "images_with_size" ,
50
+ "inline_links" ,
51
+ "links_each_paragraph" ,
52
+ "mark_code" ,
53
+ "open_quote" ,
54
+ "pad_tables" ,
55
+ "protect_links" ,
56
+ "single_line_break" ,
57
+ "skip_internal_links" ,
58
+ "unicode_snob" ,
59
+ "use_automatic_links" ,
60
+ "wrap_links" ,
61
+ "wrap_list_items" ,
62
+ ]
63
+
37
64
def __init__ (
38
65
self ,
39
66
out : Optional [OutCallback ] = None ,
40
67
baseurl : str = "" ,
41
68
bodywidth : int = config .BODY_WIDTH ,
69
+ ** kwargs
42
70
) -> None :
43
71
"""
44
72
Input parameters:
@@ -52,37 +80,16 @@ def __init__(
52
80
self .split_next_td = False
53
81
self .td_count = 0
54
82
self .table_start = False
55
- self .unicode_snob = config .UNICODE_SNOB # covered in cli
56
- self .escape_snob = config .ESCAPE_SNOB # covered in cli
57
- self .links_each_paragraph = config .LINKS_EACH_PARAGRAPH
58
- self .body_width = bodywidth # covered in cli
59
- self .skip_internal_links = config .SKIP_INTERNAL_LINKS # covered in cli
60
- self .inline_links = config .INLINE_LINKS # covered in cli
61
- self .protect_links = config .PROTECT_LINKS # covered in cli
62
- self .google_list_indent = config .GOOGLE_LIST_INDENT # covered in cli
63
- self .ignore_links = config .IGNORE_ANCHORS # covered in cli
64
- self .ignore_images = config .IGNORE_IMAGES # covered in cli
65
- self .images_as_html = config .IMAGES_AS_HTML # covered in cli
66
- self .images_to_alt = config .IMAGES_TO_ALT # covered in cli
67
- self .images_with_size = config .IMAGES_WITH_SIZE # covered in cli
68
- self .ignore_emphasis = config .IGNORE_EMPHASIS # covered in cli
69
- self .bypass_tables = config .BYPASS_TABLES # covered in cli
70
- self .ignore_tables = config .IGNORE_TABLES # covered in cli
71
- self .google_doc = False # covered in cli
72
- self .ul_item_mark = "*" # covered in cli
73
- self .emphasis_mark = "_" # covered in cli
83
+ self .google_doc = False
84
+ self .ul_item_mark = "*"
85
+ self .emphasis_mark = "_"
74
86
self .strong_mark = "**"
75
- self .single_line_break = config .SINGLE_LINE_BREAK # covered in cli
76
- self .use_automatic_links = config .USE_AUTOMATIC_LINKS # covered in cli
77
- self .hide_strikethrough = False # covered in cli
78
- self .mark_code = config .MARK_CODE
79
- self .wrap_list_items = config .WRAP_LIST_ITEMS # covered in cli
80
- self .wrap_links = config .WRAP_LINKS # covered in cli
81
- self .pad_tables = config .PAD_TABLES # covered in cli
82
- self .default_image_alt = config .DEFAULT_IMAGE_ALT # covered in cli
87
+ self .hide_strikethrough = False
83
88
self .tag_callback = None
84
- self .open_quote = config .OPEN_QUOTE # covered in cli
85
- self .close_quote = config .CLOSE_QUOTE # covered in cli
89
+ self .body_width = bodywidth
90
+
91
+ for param in self .init_params :
92
+ setattr (self , param , kwargs .get (param , getattr (config , param .upper ())))
86
93
87
94
if out is None :
88
95
self .out = self .outtextf
@@ -939,9 +946,14 @@ def optwrap(self, text: str) -> str:
939
946
return result
940
947
941
948
942
- def html2text (html : str , baseurl : str = "" , bodywidth : Optional [int ] = None ) -> str :
949
+ def html2text (
950
+ html : str ,
951
+ baseurl : str = "" ,
952
+ bodywidth : Optional [int ] = None ,
953
+ ** kwargs : Optional [OutCallback ]
954
+ ) -> str :
943
955
if bodywidth is None :
944
956
bodywidth = config .BODY_WIDTH
945
- h = HTML2Text (baseurl = baseurl , bodywidth = bodywidth )
957
+ h = HTML2Text (baseurl = baseurl , bodywidth = bodywidth , ** kwargs )
946
958
947
959
return h .handle (html )
0 commit comments