1+ import copy
2+
13from bs4 import BeautifulSoup
4+ from bs4 .element import NavigableString , Tag
5+
6+
7+ LIST_TAGS = {"ol" , "ul" }
28
39
410def split_quill (html : str ) -> list [str ]:
5- """Split a Text Editor HTML string into a list of HTML strings, each representing a direct child of the editor div .
11+ """Split a Text Editor HTML string into printable HTML fragments .
612
7- This is useful for breaking text-editor content into separate rows in a print format."""
13+ Top-level Quill blocks are returned separately so print formats can render them
14+ as individual table rows. Lists are split further by item because wkhtmltopdf
15+ handles page breaks between table rows more reliably than page breaks inside
16+ long table cells."""
817 soup = BeautifulSoup (html , "html.parser" )
918 nested_divs = soup .find_all ("div" , recursive = False )
1019
@@ -13,7 +22,102 @@ def split_quill(html: str) -> list[str]:
1322 return [html ]
1423
1524 div = nested_divs [0 ]
16- if div .has_attr ("class" ) and "ql-editor" in div ["class" ]:
17- return [f'<div class="ql-editor">{ str (child )} </div>' for child in div .children ]
18- else :
19- return [str (child ) for child in div .children ]
25+ fragments = _split_children (div )
26+
27+ if _is_quill_editor (div ):
28+ return [_wrap_in_editor (div , fragment ) for fragment in fragments ]
29+
30+ return fragments
31+
32+
33+ def _split_children (parent : Tag ) -> list [str ]:
34+ fragments = []
35+
36+ for child in parent .children :
37+ if _is_blank_text (child ):
38+ continue
39+
40+ if isinstance (child , Tag ) and child .name in LIST_TAGS :
41+ fragments .extend (_split_list (child ))
42+ else :
43+ fragments .append (str (child ))
44+
45+ return fragments
46+
47+
48+ def _split_list (list_tag : Tag ) -> list [str ]:
49+ list_items = [child for child in list_tag .children if isinstance (child , Tag ) and child .name == "li" ]
50+ if not list_items :
51+ return [str (list_tag )]
52+
53+ fragments = []
54+ ordered_counters = [0 ] * 10
55+
56+ for item in list_items :
57+ fragment = _new_tag_like (list_tag )
58+
59+ if _is_ordered_list_item (list_tag , item ):
60+ indent = _get_quill_indent (item )
61+ ordered_counters [indent ] += 1
62+ ordered_counters [indent + 1 :] = [0 ] * (len (ordered_counters ) - indent - 1 )
63+ _set_ordered_list_start (fragment , indent , ordered_counters [indent ])
64+
65+ fragment .append (BeautifulSoup (str (item ), "html.parser" ).find ("li" ))
66+ fragments .append (str (fragment ))
67+
68+ return fragments
69+
70+
71+ def _is_quill_editor (tag : Tag ) -> bool :
72+ return "ql-editor" in tag .get ("class" , [])
73+
74+
75+ def _is_blank_text (element : Tag | NavigableString ) -> bool :
76+ return isinstance (element , NavigableString ) and not element .strip ()
77+
78+
79+ def _new_tag_like (tag : Tag ) -> Tag :
80+ soup = BeautifulSoup ("" , "html.parser" )
81+ new_tag = soup .new_tag (tag .name )
82+ new_tag .attrs = copy .deepcopy (tag .attrs )
83+ return new_tag
84+
85+
86+ def _wrap_in_editor (editor_tag : Tag , fragment : str ) -> str :
87+ soup = BeautifulSoup ("" , "html.parser" )
88+ editor = soup .new_tag ("div" )
89+ editor .attrs = copy .deepcopy (editor_tag .attrs )
90+ fragment_soup = BeautifulSoup (fragment , "html.parser" )
91+
92+ for child in list (fragment_soup .contents ):
93+ editor .append (child )
94+
95+ return str (editor )
96+
97+
98+ def _is_ordered_list_item (list_tag : Tag , item : Tag ) -> bool :
99+ data_list = item .get ("data-list" )
100+ if data_list :
101+ return data_list == "ordered"
102+
103+ return list_tag .name == "ol"
104+
105+
106+ def _get_quill_indent (item : Tag ) -> int :
107+ for class_name in item .get ("class" , []):
108+ if class_name .startswith ("ql-indent-" ):
109+ try :
110+ return int (class_name .removeprefix ("ql-indent-" ))
111+ except ValueError :
112+ pass
113+
114+ return 0
115+
116+
117+ def _set_ordered_list_start (list_tag : Tag , indent : int , number : int ) -> None :
118+ if indent == 0 and list_tag .name == "ol" :
119+ list_tag ["start" ] = str (number )
120+
121+ style = list_tag .get ("style" , "" ).rstrip ()
122+ counter_reset = f"counter-reset: list-{ indent } { number - 1 } ;"
123+ list_tag ["style" ] = f"{ style .rstrip (';' )} ; { counter_reset } " if style else counter_reset
0 commit comments