Skip to content

Commit b103729

Browse files
Store attrs in the order they were last updated. (#73)
1 parent 5816df7 commit b103729

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

tdom/parser.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import random
22
import string
33
import typing as t
4+
from collections import OrderedDict
45
from html.parser import HTMLParser
56

67
from markupsafe import Markup
@@ -31,7 +32,7 @@ def __init__(self):
3132
def handle_starttag(
3233
self, tag: str, attrs: t.Sequence[tuple[str, str | None]]
3334
) -> None:
34-
node = Element(tag, attrs=dict(attrs), children=[])
35+
node = Element(tag, attrs=LastUpdatedOrderedDict(attrs), children=[])
3536
self.stack.append(node)
3637

3738
# Unfortunately, Python's built-in HTMLParser has inconsistent behavior
@@ -167,3 +168,16 @@ def parse_html(input: str | t.Iterable[str]) -> Node:
167168
parser.feed(chunk)
168169
parser.close()
169170
return parser.get_node()
171+
172+
173+
class LastUpdatedOrderedDict(OrderedDict):
174+
"""
175+
Store items in the order the keys were last added.
176+
This differs from a regular dict which uses "insertion order".
177+
178+
@NOTE: This is directly from the python documentation
179+
"""
180+
181+
def __setitem__(self, key, value):
182+
super().__setitem__(key, value)
183+
self.move_to_end(key)

tdom/parser_test.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ def test_parse_element_with_attributes():
6060
)
6161

6262

63+
def test_parse_element_attribute_order():
64+
node = parse_html('<a title="a" href="b" title="c"></a>')
65+
assert list(node.attrs.items()) == [("href", "b"), ("title", "c")]
66+
67+
6368
def test_parse_comment():
6469
node = parse_html("<!-- This is a comment -->")
6570
assert node == Comment(" This is a comment ")

0 commit comments

Comments
 (0)