File tree Expand file tree Collapse file tree 2 files changed +20
-1
lines changed
Expand file tree Collapse file tree 2 files changed +20
-1
lines changed Original file line number Diff line number Diff line change 11import random
22import string
33import typing as t
4+ from collections import OrderedDict
45from html .parser import HTMLParser
56
67from markupsafe import Markup
@@ -31,7 +32,7 @@ def __init__(self):
3132 def handle_starttag (
3233 self , tag : str , attrs : t .Sequence [tuple [str , str | None ]]
3334 ) -> None :
34- node = Element (tag , attrs = dict (attrs ), children = [])
35+ node = Element (tag , attrs = LastUpdatedOrderedDict (attrs ), children = [])
3536 self .stack .append (node )
3637
3738 # Unfortunately, Python's built-in HTMLParser has inconsistent behavior
@@ -167,3 +168,16 @@ def parse_html(input: str | t.Iterable[str]) -> Node:
167168 parser .feed (chunk )
168169 parser .close ()
169170 return parser .get_node ()
171+
172+
173+ class LastUpdatedOrderedDict (OrderedDict ):
174+ """
175+ Store items in the order the keys were last added.
176+ This differs from a regular dict which uses "insertion order".
177+
178+ @NOTE: This is directly from the python documentation
179+ """
180+
181+ def __setitem__ (self , key , value ):
182+ super ().__setitem__ (key , value )
183+ self .move_to_end (key )
Original file line number Diff line number Diff line change @@ -60,6 +60,11 @@ def test_parse_element_with_attributes():
6060 )
6161
6262
63+ def test_parse_element_attribute_order ():
64+ node = parse_html ('<a title="a" href="b" title="c"></a>' )
65+ assert list (node .attrs .items ()) == [("href" , "b" ), ("title" , "c" )]
66+
67+
6368def test_parse_comment ():
6469 node = parse_html ("<!-- This is a comment -->" )
6570 assert node == Comment (" This is a comment " )
You can’t perform that action at this time.
0 commit comments