Skip to content

Commit fd1daae

Browse files
committed
My own refactoring of the architecture along the lines discussed with Ian.
This borrows heavily from an early cut of Ian's branch, but has my own flavor in a few key places. Squashed commit of the following: commit 26c39f8 Author: Dave <[email protected]> Date: Tue Dec 9 15:47:26 2025 -0800 No inner function, maybe commit 89526cb Author: Dave <[email protected]> Date: Tue Dec 9 15:46:43 2025 -0800 Unify flattening commit 788c14d Author: Dave <[email protected]> Date: Tue Dec 9 15:39:45 2025 -0800 Minor cleanups commit a0e696c Author: Dave <[email protected]> Date: Tue Dec 9 15:30:14 2025 -0800 Uh, this was silly code; clean it up commit 8021e54 Author: Dave <[email protected]> Date: Tue Dec 9 15:23:53 2025 -0800 Continue cleaning up commit 25a6c55 Author: Dave <[email protected]> Date: Tue Dec 9 15:15:30 2025 -0800 Better _resolve_t_text_ref() commit 8400dbf Author: Dave <[email protected]> Date: Tue Dec 9 15:13:46 2025 -0800 Blargh, don't like commit e814d33 Author: Dave <[email protected]> Date: Tue Dec 9 15:09:34 2025 -0800 Re-add zero-arg function invocation. Which is... fine, I guess. commit 0a1c88e Author: Dave <[email protected]> Date: Tue Dec 9 15:07:42 2025 -0800 Full test pass; still lots to do commit 5b12db5 Author: Dave <[email protected]> Date: Tue Dec 9 14:11:48 2025 -0800 Closer, but not yet at, sanity commit 3f1f075 Author: Dave <[email protected]> Date: Tue Dec 9 13:37:24 2025 -0800 In progress: better attribute handling commit de9e98d Author: Dave <[email protected]> Date: Tue Dec 9 13:07:48 2025 -0800 Consider resolution as part of TNode itself commit 23d8d5b Author: Dave <[email protected]> Date: Tue Dec 9 12:55:17 2025 -0800 One more test commit 612369c Author: Dave <[email protected]> Date: Tue Dec 9 12:50:50 2025 -0800 Enough parser tests to be happy, for now. commit de4d46f Author: Dave <[email protected]> Date: Tue Dec 9 12:38:42 2025 -0800 Test attributes commit d3f28bf Author: Dave <[email protected]> Date: Tue Dec 9 11:49:24 2025 -0800 Fix parser tests commit abd67cd Author: Dave <[email protected]> Date: Tue Dec 9 11:33:36 2025 -0800 In progress: start to restore sanity and tests commit a054992 Author: Dave <[email protected]> Date: Tue Dec 9 10:44:28 2025 -0800 Work in progress: implement my own two-layer arch
1 parent 737a7f2 commit fd1daae

File tree

9 files changed

+942
-561
lines changed

9 files changed

+942
-561
lines changed

README.md

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -385,19 +385,6 @@ result = html(t"<ul><{Items} /></ul>")
385385
assert str(result) == "<ul><li>first</li><li>second</li></ul>"
386386
```
387387

388-
If you prefer, you can use **explicit fragment syntax** to wrap multiple
389-
elements in a `Fragment`:
390-
391-
```python
392-
def Items() -> Node:
393-
return html(t'<><li>first</li><li>second</li></>')
394-
395-
result = html(t'<ul><{Items} /></ul>')
396-
assert str(result) == "<ul><li>first</li><li>second</li></ul>"
397-
```
398-
399-
This is not required, but it can make your intent clearer.
400-
401388
#### Class-based components
402389

403390
Component functions are great for simple use cases, but for more complex

tdom/escaping.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import re
2+
from string.templatelib import Interpolation, Template
3+
4+
from markupsafe import Markup
5+
from markupsafe import escape as markup_escape
6+
7+
from .utils import format_interpolation as base_format_interpolation
8+
9+
10+
def _format_safe(value: object, format_spec: str) -> str:
11+
"""Use Markup() to mark a value as safe HTML."""
12+
assert format_spec == "safe"
13+
return Markup(value)
14+
15+
16+
def _format_unsafe(value: object, format_spec: str) -> str:
17+
"""Convert a value to a plain string, forcing it to be treated as unsafe."""
18+
assert format_spec == "unsafe"
19+
return str(value)
20+
21+
22+
CUSTOM_FORMATTERS = (("safe", _format_safe), ("unsafe", _format_unsafe))
23+
24+
25+
def format_interpolation(interpolation: Interpolation) -> object:
26+
return base_format_interpolation(
27+
interpolation,
28+
formatters=CUSTOM_FORMATTERS,
29+
)
30+
31+
32+
def render_template_as_f(template: Template) -> str:
33+
"""Fully render a template by formatting its interpolations."""
34+
parts: list[str] = []
35+
for part in template:
36+
if isinstance(part, str):
37+
parts.append(part)
38+
else:
39+
parts.append(str(format_interpolation(part)))
40+
return "".join(parts)
41+
42+
43+
escape_html_text = markup_escape # unify api for test of project
44+
45+
46+
def escape_html_comment(text):
47+
"""Escape text injected into an HTML comment."""
48+
GT = "&gt;"
49+
LT = "&lt;"
50+
51+
if not text:
52+
return text
53+
# - text must not start with the string ">"
54+
if text[0] == ">":
55+
text = GT + text[1:]
56+
57+
# - nor start with the string "->"
58+
if text[:2] == "->":
59+
text = "-" + GT + text[2:]
60+
61+
# - nor contain the strings "<!--", "-->", or "--!>"
62+
if (index := text.find("<!--")) and index != -1:
63+
text = text[:index] + LT + text[index + 1]
64+
if (index := text.find("-->")) and index != -1:
65+
text = text[: index + 2] + GT + text[index + 3]
66+
if (index := text.find("--!>")) and index != -1:
67+
text = text[: index + 3] + GT + text[index + 4]
68+
69+
# - nor end with the string "<!-".
70+
if text[-3:] == "<!-":
71+
text = text[:-3] + LT + "!-"
72+
73+
return text
74+
75+
76+
def escape_html_style(text):
77+
# @TODO: We should maybe make this opt-in or throw a warning that says to
78+
# avoid dropping content in STYLEs.
79+
LT = "&lt;"
80+
close_str = "</style>"
81+
close_str_re = re.compile(close_str, re.I | re.A)
82+
replace_str = LT + close_str[1:]
83+
return re.sub(close_str_re, replace_str, text)
84+
85+
86+
def escape_html_script(text):
87+
"""
88+
https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements
89+
90+
(from link) The easiest and safest way to avoid the rather strange restrictions
91+
described in this section is to always escape an ASCII case-insensitive
92+
match for:
93+
- "<!--" as "\x3c!--"
94+
- "<script" as "\x3cscript"
95+
- "</script" as "\x3c/script"`
96+
97+
This does not make your script *run*; it just tries to prevent accidentally injecting
98+
*another* SCRIPT tag into a SCRIPT tag being rendered.
99+
"""
100+
# @TODO: We should maybe make this opt-in or throw a warning that says to
101+
# avoid dropping content in SCRIPTs in almost all cases and use
102+
# data-* attributes to dump JSON if needed. Or whatever the latest
103+
# best-practices version is because this seems like a "we think we got all the cases"
104+
# situation even in the living standard.
105+
match_to_replace = (
106+
(re.compile("<!--", re.I | re.A), "\x3c!--"),
107+
(re.compile("<script", re.I | re.A), "\x3cscript"),
108+
(re.compile("</script", re.I | re.A), "\x3c/script"),
109+
)
110+
for match_re, replace_text in match_to_replace:
111+
text = re.sub(match_re, replace_text, text)
112+
return text

tdom/nodes.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
from dataclasses import dataclass, field
22

3-
from markupsafe import escape
3+
from .escaping import (
4+
escape_html_comment,
5+
escape_html_script,
6+
escape_html_style,
7+
escape_html_text,
8+
)
49

510
# See https://developer.mozilla.org/en-US/docs/Glossary/Void_element
611
VOID_ELEMENTS = frozenset(
@@ -45,7 +50,7 @@ class Text(Node):
4550

4651
def __str__(self) -> str:
4752
# Use markupsafe's escape to handle HTML escaping
48-
return escape(self.text)
53+
return escape_html_text(self.text)
4954

5055
def __eq__(self, other: object) -> bool:
5156
# This is primarily of use for testing purposes. We only consider
@@ -66,7 +71,7 @@ class Comment(Node):
6671
text: str
6772

6873
def __str__(self) -> str:
69-
return f"<!--{self.text}-->"
74+
return f"<!--{escape_html_comment(self.text)}-->"
7075

7176

7277
@dataclass(slots=True)
@@ -100,16 +105,38 @@ def is_void(self) -> bool:
100105
def is_content(self) -> bool:
101106
return self.tag in CONTENT_ELEMENTS
102107

108+
def _children_to_str(self):
109+
if not self.children:
110+
return ""
111+
if self.tag in ("script", "style"):
112+
chunks = []
113+
for child in self.children:
114+
if isinstance(child, Text):
115+
chunks.append(child.text)
116+
else:
117+
raise ValueError(
118+
"Cannot serialize non-text content inside a script tag."
119+
)
120+
raw_children_str = "".join(chunks)
121+
if self.tag == "script":
122+
return escape_html_script(raw_children_str)
123+
elif self.tag == "style":
124+
return escape_html_style(raw_children_str)
125+
else:
126+
raise ValueError("Unsupported tag for single-level bulk escaping.")
127+
else:
128+
return "".join(str(child) for child in self.children)
129+
103130
def __str__(self) -> str:
104131
# We use markupsafe's escape to handle HTML escaping of attribute values
105132
# which means it's possible to mark them as safe if needed.
106133
attrs_str = "".join(
107-
f" {key}" if value is None else f' {key}="{escape(value)}"'
134+
f" {key}" if value is None else f' {key}="{escape_html_text(value)}"'
108135
for key, value in self.attrs.items()
109136
)
110137
if self.is_void:
111138
return f"<{self.tag}{attrs_str} />"
112139
if not self.children:
113140
return f"<{self.tag}{attrs_str}></{self.tag}>"
114-
children_str = "".join(str(child) for child in self.children)
141+
children_str = self._children_to_str()
115142
return f"<{self.tag}{attrs_str}>{children_str}</{self.tag}>"

0 commit comments

Comments
 (0)