Skip to content

Commit 737a7f2

Browse files
Use handle startendtag (#79)
* Use handle_startendtag to simplify parser. * Add a few more tests. * Add back test with newfound purpose.
1 parent 312ce2d commit 737a7f2

File tree

2 files changed

+31
-35
lines changed

2 files changed

+31
-35
lines changed

tdom/parser.py

Lines changed: 9 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -33,34 +33,18 @@ def handle_starttag(
3333
self, tag: str, attrs: t.Sequence[tuple[str, str | None]]
3434
) -> None:
3535
node = Element(tag, attrs=LastUpdatedOrderedDict(attrs), children=[])
36-
self.stack.append(node)
37-
38-
# Unfortunately, Python's built-in HTMLParser has inconsistent behavior
39-
# with void elements. In particular, it calls handle_endtag() for them
40-
# only if they explicitly self-close (e.g., <br />). But in the HTML
41-
# spec itself, *there is no distinction* between <br> and <br />.
42-
# So we need to handle this case ourselves.
43-
#
44-
# See https://github.com/python/cpython/issues/69445
4536
if tag in VOID_ELEMENTS:
46-
# Always call handle_endtag for void elements. If it happens
47-
# to be self-closed in the input, handle_endtag() will effectively
48-
# be called twice. We ignore the second call there.
49-
self.handle_endtag(tag)
37+
self.append_element_child(node)
38+
else:
39+
self.stack.append(node)
5040

51-
def handle_endtag(self, tag: str) -> None:
52-
if tag in VOID_ELEMENTS:
53-
# Special case: handle Python issue #69445 (see comment above).
54-
most_recent_closed = self.get_most_recent_closed_element()
55-
if most_recent_closed and most_recent_closed.tag == tag:
56-
# Ignore this call; we've already closed it.
57-
return
58-
open_element = self.get_open_element()
59-
if open_element and open_element.tag == tag:
60-
_ = self.stack.pop()
61-
self.append_element_child(open_element)
62-
return
41+
def handle_startendtag(
42+
self, tag: str, attrs: t.Sequence[tuple[str, str | None]]
43+
) -> None:
44+
node = Element(tag, attrs=LastUpdatedOrderedDict(attrs), children=[])
45+
self.append_element_child(node)
6346

47+
def handle_endtag(self, tag: str) -> None:
6448
if not self.stack:
6549
raise ValueError(f"Unexpected closing tag </{tag}> with no open element.")
6650

@@ -99,13 +83,6 @@ def get_open_element(self) -> Element | None:
9983
"""Return the currently open Element, if any."""
10084
return self.stack[-1] if self.stack else None
10185

102-
def get_most_recent_closed_element(self) -> Element | None:
103-
"""Return the most recently closed Element, if any."""
104-
parent = self.get_parent()
105-
if parent.children and isinstance(parent.children[-1], Element):
106-
return parent.children[-1]
107-
return None
108-
10986
def append_element_child(self, child: Element) -> None:
11087
parent = self.get_parent()
11188
node: Element | Fragment = child

tdom/parser_test.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,16 +227,35 @@ def test_parse_unexpected_closing_tag():
227227
_ = parse_html("Unopened</div>")
228228

229229

230-
def test_nested_self_closing_tags():
231-
node = parse_html("<div></div><br>")
230+
def test_self_closing_tags():
231+
node = parse_html("<div/><p></p>")
232232
assert node == Fragment(
233233
children=[
234234
Element("div"),
235-
Element("br"),
235+
Element("p"),
236236
]
237237
)
238238

239239

240+
def test_nested_self_closing_tags():
241+
node = parse_html("<div><br><div /><br></div>")
242+
assert node == Element(
243+
"div", children=[Element("br"), Element("div"), Element("br")]
244+
)
245+
node = parse_html("<div><div /></div>")
246+
assert node == Element("div", children=[Element("div")])
247+
248+
249+
def test_self_closing_tags_unexpected_closing_tag():
250+
with pytest.raises(ValueError):
251+
_ = parse_html("<div /></div>")
252+
253+
254+
def test_self_closing_void_tags_unexpected_closing_tag():
255+
with pytest.raises(ValueError):
256+
_ = parse_html("<input /></input>")
257+
258+
240259
def test_parse_html_iter_preserves_chunks():
241260
chunks = [
242261
"<div>",

0 commit comments

Comments
 (0)