Skip to content

Commit f587471

Browse files
authored
[DRAFT] Architecture update based on conversations with Ian (#83)
* My own refactoring of the architecture along the lines discussed with Ian. This borrows heavily from an early cut of Ian's branch, but has my own flavor in a few key places. Squashed commit of the following: commit 26c39f8 Author: Dave <[email protected]> Date: Tue Dec 9 15:47:26 2025 -0800 No inner function, maybe commit 89526cb Author: Dave <[email protected]> Date: Tue Dec 9 15:46:43 2025 -0800 Unify flattening commit 788c14d Author: Dave <[email protected]> Date: Tue Dec 9 15:39:45 2025 -0800 Minor cleanups commit a0e696c Author: Dave <[email protected]> Date: Tue Dec 9 15:30:14 2025 -0800 Uh, this was silly code; clean it up commit 8021e54 Author: Dave <[email protected]> Date: Tue Dec 9 15:23:53 2025 -0800 Continue cleaning up commit 25a6c55 Author: Dave <[email protected]> Date: Tue Dec 9 15:15:30 2025 -0800 Better _resolve_t_text_ref() commit 8400dbf Author: Dave <[email protected]> Date: Tue Dec 9 15:13:46 2025 -0800 Blargh, don't like commit e814d33 Author: Dave <[email protected]> Date: Tue Dec 9 15:09:34 2025 -0800 Re-add zero-arg function invocation. Which is... fine, I guess. commit 0a1c88e Author: Dave <[email protected]> Date: Tue Dec 9 15:07:42 2025 -0800 Full test pass; still lots to do commit 5b12db5 Author: Dave <[email protected]> Date: Tue Dec 9 14:11:48 2025 -0800 Closer, but not yet at, sanity commit 3f1f075 Author: Dave <[email protected]> Date: Tue Dec 9 13:37:24 2025 -0800 In progress: better attribute handling commit de9e98d Author: Dave <[email protected]> Date: Tue Dec 9 13:07:48 2025 -0800 Consider resolution as part of TNode itself commit 23d8d5b Author: Dave <[email protected]> Date: Tue Dec 9 12:55:17 2025 -0800 One more test commit 612369c Author: Dave <[email protected]> Date: Tue Dec 9 12:50:50 2025 -0800 Enough parser tests to be happy, for now. commit de4d46f Author: Dave <[email protected]> Date: Tue Dec 9 12:38:42 2025 -0800 Test attributes commit d3f28bf Author: Dave <[email protected]> Date: Tue Dec 9 11:49:24 2025 -0800 Fix parser tests commit abd67cd Author: Dave <[email protected]> Date: Tue Dec 9 11:33:36 2025 -0800 In progress: start to restore sanity and tests commit a054992 Author: Dave <[email protected]> Date: Tue Dec 9 10:44:28 2025 -0800 Work in progress: implement my own two-layer arch * Update comment * Update error message * Add fragment syntax back to README * Add fragment syntax back to README * Start to clean up * Add more tests. * One more question
1 parent e8f219a commit f587471

18 files changed

+1330
-1334
lines changed

README.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -599,15 +599,12 @@ used internally by the `html()` function but can also be used independently:
599599

600600
```python
601601
from string.templatelib import Interpolation
602-
from tdom.utils import format_interpolation, convert
602+
from tdom.format import convert
603603

604604
# Test convert function
605605
assert convert("hello", "s") == "hello"
606606
assert convert("hello", "r") == "'hello'"
607607
assert convert(42, None) == 42
608-
609-
# format_interpolation is used internally for custom format specifiers
610-
# The html() function uses this to implement :safe and :unsafe specifiers
611608
```
612609

613610
**`convert()`**: Applies conversion specifiers (`!a`, `!r`, `!s`) to values

tdom/escaping.py

Lines changed: 27 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,16 @@
11
import re
2-
from string.templatelib import Interpolation
32

4-
from markupsafe import Markup
3+
from markupsafe import escape as markup_escape
54

6-
from .utils import format_interpolation as base_format_interpolation
5+
escape_html_text = markup_escape # unify api for test of project
76

87

9-
def _format_safe(value: object, format_spec: str) -> str:
10-
"""Use Markup() to mark a value as safe HTML."""
11-
assert format_spec == "safe"
12-
return Markup(value)
8+
GT = "&gt;"
9+
LT = "&lt;"
1310

1411

15-
def _format_unsafe(value: object, format_spec: str) -> str:
16-
"""Convert a value to a plain string, forcing it to be treated as unsafe."""
17-
assert format_spec == "unsafe"
18-
return str(value)
19-
20-
21-
CUSTOM_FORMATTERS = (("safe", _format_safe), ("unsafe", _format_unsafe))
22-
23-
24-
def format_interpolation(interpolation: Interpolation) -> object:
25-
return base_format_interpolation(
26-
interpolation,
27-
formatters=CUSTOM_FORMATTERS,
28-
)
29-
30-
31-
def escape_html_comment(text):
12+
def escape_html_comment(text: str) -> str:
3213
"""Escape text injected into an HTML comment."""
33-
GT = "&gt;"
34-
LT = "&lt;"
35-
3614
if not text:
3715
return text
3816
# - text must not start with the string ">"
@@ -44,12 +22,9 @@ def escape_html_comment(text):
4422
text = "-" + GT + text[2:]
4523

4624
# - nor contain the strings "<!--", "-->", or "--!>"
47-
if (index := text.find("<!--")) and index != -1:
48-
text = text[:index] + LT + text[index + 1]
49-
if (index := text.find("-->")) and index != -1:
50-
text = text[: index + 2] + GT + text[index + 3]
51-
if (index := text.find("--!>")) and index != -1:
52-
text = text[: index + 3] + GT + text[index + 4]
25+
text = text.replace("<!--", LT + "!--")
26+
text = text.replace("-->", "--" + GT)
27+
text = text.replace("--!>", "--!" + GT)
5328

5429
# - nor end with the string "<!-".
5530
if text[-3:] == "<!-":
@@ -58,16 +33,27 @@ def escape_html_comment(text):
5833
return text
5934

6035

61-
def escape_html_style(text):
62-
LT = "&lt;"
63-
close_str = "</style>"
64-
close_str_re = re.compile(close_str, re.I | re.A)
65-
replace_str = LT + close_str[1:]
66-
return re.sub(close_str_re, replace_str, text)
36+
STYLE_RES = ((re.compile("</style>", re.I | re.A), LT + "/style>"),)
6737

6838

69-
def escape_html_script(text):
39+
def escape_html_style(text: str) -> str:
40+
"""Escape text injected into an HTML style element."""
41+
for matche_re, replace_text in STYLE_RES:
42+
text = re.sub(matche_re, replace_text, text)
43+
return text
44+
45+
46+
SCRIPT_RES = (
47+
(re.compile("<!--", re.I | re.A), "\x3c!--"),
48+
(re.compile("<script", re.I | re.A), "\x3cscript"),
49+
(re.compile("</script", re.I | re.A), "\x3c/script"),
50+
)
51+
52+
53+
def escape_html_script(text: str) -> str:
7054
"""
55+
Escape text injected into an HTML script element.
56+
7157
https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements
7258
7359
(from link) The easiest and safest way to avoid the rather strange restrictions
@@ -76,15 +62,7 @@ def escape_html_script(text):
7662
- "<!--" as "\x3c!--"
7763
- "<script" as "\x3cscript"
7864
- "</script" as "\x3c/script"`
79-
80-
This does not make a script *run*; it just tries to prevent accidentally injecting
81-
*another* SCRIPT tag into a SCRIPT tag being rendered.
8265
"""
83-
match_to_replace = (
84-
(re.compile("<!--", re.I | re.A), "\x3c!--"),
85-
(re.compile("<script", re.I | re.A), "\x3cscript"),
86-
(re.compile("</script", re.I | re.A), "\x3c/script"),
87-
)
88-
for match_re, replace_text in match_to_replace:
66+
for match_re, replace_text in SCRIPT_RES:
8967
text = re.sub(match_re, replace_text, text)
9068
return text

tdom/escaping_test.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from .escaping import escape_html_comment, escape_html_script, escape_html_style
2+
3+
4+
def test_escape_html_comment_empty() -> None:
5+
assert escape_html_comment("") == ""
6+
7+
8+
def test_escape_html_comment_no_special() -> None:
9+
assert escape_html_comment("This is a comment.") == "This is a comment."
10+
11+
12+
def test_escape_html_comment_starts_with_gt() -> None:
13+
assert escape_html_comment(">This is a comment.") == "&gt;This is a comment."
14+
15+
16+
def test_escape_html_comment_starts_with_dash_gt() -> None:
17+
assert escape_html_comment("->This is a comment.") == "-&gt;This is a comment."
18+
19+
20+
def test_escape_html_comment_contains_special_strings() -> None:
21+
input_text = "This is <!-- a comment --> with --!> special strings."
22+
expected_output = "This is &lt;!-- a comment --&gt; with --!&gt; special strings."
23+
assert escape_html_comment(input_text) == expected_output
24+
25+
26+
def test_escape_html_comment_ends_with_lt_dash() -> None:
27+
assert escape_html_comment("This is a comment<!-") == "This is a comment&lt;!-"
28+
29+
30+
def test_escape_html_style() -> None:
31+
input_text = "body { color: red; }</style> p { font-SIZE: 12px; }</STYLE>"
32+
expected_output = (
33+
"body { color: red; }&lt;/style> p { font-SIZE: 12px; }&lt;/style>"
34+
)
35+
assert escape_html_style(input_text) == expected_output
36+
37+
38+
def test_escape_html_script() -> None:
39+
input_text = "<!-- <script>var a = 1;</script> </SCRIPT>"
40+
expected_output = "\x3c!-- \x3cscript>var a = 1;\x3c/script> </script>"
41+
assert escape_html_script(input_text) == expected_output

tdom/format.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import typing as t
2+
from string.templatelib import Interpolation, Template
3+
4+
5+
@t.overload
6+
def convert[T](value: T, conversion: None) -> T: ...
7+
8+
9+
@t.overload
10+
def convert(value: object, conversion: t.Literal["a", "r", "s"]) -> str: ...
11+
12+
13+
def convert[T](value: T, conversion: t.Literal["a", "r", "s"] | None) -> T | str:
14+
"""
15+
Convert a value according to the given conversion specifier.
16+
17+
In the future, something like this should probably ship with Python itself.
18+
"""
19+
if conversion == "a":
20+
return ascii(value)
21+
elif conversion == "r":
22+
return repr(value)
23+
elif conversion == "s":
24+
return str(value)
25+
else:
26+
return value
27+
28+
29+
type FormatMatcher = t.Callable[[str], bool]
30+
"""A predicate function that returns True if a given format specifier matches its criteria."""
31+
32+
type CustomFormatter = t.Callable[[object, str], str]
33+
"""A function that takes a value and a format specifier and returns a formatted string."""
34+
35+
type MatcherAndFormatter = tuple[str | FormatMatcher, CustomFormatter]
36+
"""
37+
A pair of a matcher and its corresponding formatter.
38+
39+
The matcher is used to determine if the formatter should be applied to a given
40+
format specifier. If the matcher is a string, it must exactly match the format
41+
specifier. If it is a FormatMatcher, it is called with the format specifier and
42+
should return True if the formatter should be used.
43+
"""
44+
45+
46+
def _matcher_matches(matcher: str | FormatMatcher, format_spec: str) -> bool:
47+
"""Check if a matcher matches a given format specifier."""
48+
return matcher == format_spec if isinstance(matcher, str) else matcher(format_spec)
49+
50+
51+
def _format_interpolation(
52+
value: object,
53+
format_spec: str,
54+
conversion: t.Literal["a", "r", "s"] | None,
55+
*,
56+
formatters: t.Sequence[MatcherAndFormatter],
57+
) -> object:
58+
converted = convert(value, conversion)
59+
if format_spec:
60+
for matcher, formatter in formatters:
61+
if _matcher_matches(matcher, format_spec):
62+
return formatter(converted, format_spec)
63+
return format(converted, format_spec)
64+
return converted
65+
66+
67+
def format_interpolation(
68+
interpolation: Interpolation,
69+
*,
70+
formatters: t.Sequence[MatcherAndFormatter] = tuple(),
71+
) -> object:
72+
"""
73+
Format an Interpolation's value according to its format spec and conversion.
74+
75+
PEP 750 allows t-string processing code to decide whether, and how, to
76+
interpret format specifiers. This function takes an optional sequence of
77+
(matcher, formatter) pairs. If a matcher returns True for the given format
78+
spec, the corresponding formatter is used to format the value. If no
79+
matchers match, the default formatting behavior is used.
80+
81+
Conversions are always applied before formatting.
82+
"""
83+
return _format_interpolation(
84+
interpolation.value,
85+
interpolation.format_spec,
86+
interpolation.conversion,
87+
formatters=formatters,
88+
)
89+
90+
91+
def format_template(template: Template) -> str:
92+
"""Fully render a template by formatting its interpolations."""
93+
parts: list[str] = []
94+
for part in template:
95+
if isinstance(part, str):
96+
parts.append(part)
97+
else:
98+
parts.append(str(format_interpolation(part)))
99+
return "".join(parts)

tdom/format_test.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from string.templatelib import Interpolation
2+
3+
from .format import convert, format_interpolation, format_template
4+
5+
6+
class Convertible:
7+
def __str__(self) -> str:
8+
return "Convertible str"
9+
10+
def __repr__(self) -> str:
11+
return "Convertible repr"
12+
13+
14+
def test_convert_none():
15+
value = Convertible()
16+
assert convert(value, None) is value
17+
18+
19+
def test_convert_a():
20+
value = Convertible()
21+
assert convert(value, "a") == "Convertible repr"
22+
assert convert("Café", "a") == "'Caf\\xe9'"
23+
24+
25+
def test_convert_r():
26+
value = Convertible()
27+
assert convert(value, "r") == "Convertible repr"
28+
29+
30+
def test_convert_s():
31+
value = Convertible()
32+
assert convert(value, "s") == "Convertible str"
33+
34+
35+
def test_format_interpolation_no_formatting():
36+
value = Convertible()
37+
interp = Interpolation(value, expression="", conversion=None, format_spec="")
38+
assert format_interpolation(interp) is value
39+
40+
41+
def test_format_interpolation_a():
42+
value = Convertible()
43+
interp = Interpolation(value, expression="", conversion="a", format_spec="")
44+
assert format_interpolation(interp) == "Convertible repr"
45+
46+
47+
def test_format_interpolation_r():
48+
value = Convertible()
49+
interp = Interpolation(value, expression="", conversion="r", format_spec="")
50+
assert format_interpolation(interp) == "Convertible repr"
51+
52+
53+
def test_format_interpolation_s():
54+
value = Convertible()
55+
interp = Interpolation(value, expression="", conversion="s", format_spec="")
56+
assert format_interpolation(interp) == "Convertible str"
57+
58+
59+
def test_format_interpolation_default_formatting():
60+
value = 42
61+
interp = Interpolation(value, expression="", conversion=None, format_spec="5d")
62+
assert format_interpolation(interp) == " 42"
63+
64+
65+
def test_format_interpolation_custom_formatter_match_exact():
66+
value = 42
67+
interp = Interpolation(value, expression="", conversion=None, format_spec="custom")
68+
69+
def formatter(val: object, spec: str) -> str:
70+
return f"formatted-{val}-{spec}"
71+
72+
assert (
73+
format_interpolation(interp, formatters=[("custom", formatter)])
74+
== "formatted-42-custom"
75+
)
76+
77+
78+
def test_format_interpolation_custom_formatter_match_predicate():
79+
value = 42
80+
interp = Interpolation(
81+
value, expression="", conversion=None, format_spec="custom123"
82+
)
83+
84+
def matcher(spec: str) -> bool:
85+
return spec.startswith("custom")
86+
87+
def formatter(val: object, spec: str) -> str:
88+
return f"formatted-{val}-{spec}"
89+
90+
assert (
91+
format_interpolation(interp, formatters=[(matcher, formatter)])
92+
== "formatted-42-custom123"
93+
)
94+
95+
96+
def test_format_template():
97+
t = t"Value: {42.19:.1f}, Text: {Convertible()!s}, Raw: {Convertible()!r}"
98+
result = format_template(t)
99+
assert result == "Value: 42.2, Text: Convertible str, Raw: Convertible repr"

0 commit comments

Comments
 (0)