Skip to content

Commit b9aac11

Browse files
committed
improve: improve import time with lazy regex and imports
1 parent 27bce74 commit b9aac11

File tree

8 files changed

+57
-25
lines changed

8 files changed

+57
-25
lines changed

pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,13 @@ commands = [
156156
["python", "-m", "timeit", "from mdformat._cli import run", 'run(["README.md", "docs/", "--check", "--wrap", "50"])'],
157157
]
158158

159+
[tool.tox.env."benchmark-import"]
160+
description = "Measure module import times. Tox sends mdformat output to stderr, so to filter use e.g. `tox -e benchmark-import 2> >(grep mdformat)`."
161+
deps = []
162+
commands = [
163+
["python", "-X", "importtime", "-m", "mdformat"],
164+
]
165+
159166

160167
[tool.coverage.run]
161168
source = ["mdformat"]

src/mdformat/codepoints/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"ASCII_WHITESPACE",
66
)
77

8-
import warnings
98

109
from mdformat.codepoints._unicode_punctuation import UNICODE_PUNCTUATION
1110
from mdformat.codepoints._unicode_whitespace import UNICODE_WHITESPACE
@@ -19,6 +18,8 @@ def __getattr__(name: str) -> frozenset[str]:
1918
Used during the deprecation period of `ASCII_WHITESPACE`.
2019
"""
2120
if name == "ASCII_WHITESPACE":
21+
import warnings
22+
2223
warnings.warn(
2324
"ASCII_WHITESPACE is deprecated because CommonMark v0.30 no longer "
2425
"defines ASCII whitespace.",

src/mdformat/plugins.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
from collections.abc import Callable, Mapping
55
from typing import TYPE_CHECKING, Any, Protocol
66

7-
from markdown_it import MarkdownIt
8-
97
from mdformat._compat import importlib_metadata
108

119
if TYPE_CHECKING:
10+
from markdown_it import MarkdownIt
11+
1212
from mdformat.renderer.typing import Postprocess, Render
1313

1414

src/mdformat/renderer/__init__.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@
1313
import logging
1414
import string
1515
from types import MappingProxyType
16-
from typing import Any
17-
18-
from markdown_it.token import Token
16+
from typing import TYPE_CHECKING, Any
1917

2018
from mdformat.renderer._context import DEFAULT_RENDERERS, WRAP_POINT, RenderContext
2119
from mdformat.renderer._tree import RenderTreeNode
22-
from mdformat.renderer.typing import Postprocess
20+
21+
if TYPE_CHECKING:
22+
from markdown_it.token import Token
23+
24+
from mdformat.renderer.typing import Postprocess
2325

2426
LOGGER = logging.getLogger(__name__)
2527

src/mdformat/renderer/_context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from mdformat import codepoints
1616
from mdformat._conf import DEFAULT_OPTS
1717
from mdformat.renderer._util import (
18-
RE_CHAR_REFERENCE,
1918
decimalify_leading,
2019
decimalify_trailing,
2120
escape_asterisk_emphasis,
@@ -27,6 +26,7 @@
2726
is_tight_list_item,
2827
longest_consecutive_sequence,
2928
maybe_add_link_brackets,
29+
re_char_reference,
3030
)
3131

3232
if TYPE_CHECKING:
@@ -137,7 +137,7 @@ def text(node: RenderTreeNode, context: RenderContext) -> str:
137137

138138
# Escape "&" if it starts a sequence that can be interpreted as
139139
# a character reference.
140-
text = RE_CHAR_REFERENCE.sub(r"\\\g<0>", text)
140+
text = re_char_reference().sub(r"\\\g<0>", text)
141141

142142
# The parser can give us consecutive newlines which can break
143143
# the markdown structure. Replace two or more consecutive newlines

src/mdformat/renderer/_util.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from collections.abc import Iterable
4+
import functools
45
import html.entities
56
import re
67
from typing import TYPE_CHECKING
@@ -10,20 +11,28 @@
1011
if TYPE_CHECKING:
1112
from mdformat.renderer import RenderTreeNode
1213

13-
# Regex that finds character references.
14-
# The reference can be either
15-
# 1. decimal representation, e.g. &#11;
16-
# 2. hex representation, e.g. &#x1e;
17-
# 3. HTML5 entity reference, e.g. &nbsp;
18-
RE_CHAR_REFERENCE = re.compile(
19-
"&(?:"
20-
+ "#[0-9]{1,7}"
21-
+ "|"
22-
+ "#[Xx][0-9A-Fa-f]{1,6}"
23-
+ "|"
24-
+ "|".join({c.rstrip(";") for c in html.entities.html5})
25-
+ ");"
26-
)
14+
15+
@functools.cache
16+
def re_char_reference() -> re.Pattern[str]:
17+
"""Return a regex that finds character references.
18+
19+
The reference can be either:
20+
1. decimal representation, e.g. &#11;
21+
2. hex representation, e.g. &#x1e;
22+
3. HTML5 entity reference, e.g. &nbsp;
23+
24+
This cached function compiles the regex lazily,
25+
as compilation can take over 20ms.
26+
"""
27+
return re.compile(
28+
"&(?:"
29+
+ "#[0-9]{1,7}"
30+
+ "|"
31+
+ "#[Xx][0-9A-Fa-f]{1,6}"
32+
+ "|"
33+
+ "|".join({c.rstrip(";") for c in html.entities.html5})
34+
+ ");"
35+
)
2736

2837

2938
def is_tight_list(node: RenderTreeNode) -> bool:

tests/test_api.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,12 @@ def test_mdrenderer_no_finalize(tmp_path):
146146
def test_ascii_whitespace_deprecation():
147147
with pytest.warns(DeprecationWarning):
148148
mdformat.codepoints.ASCII_WHITESPACE
149+
150+
151+
def test_import_typing():
152+
"""Try to import mdformat.renderer.typing.
153+
154+
The module consists of annotation types only, so mdformat never
155+
imports it at runtime. This test ensures that it still runs.
156+
"""
157+
import mdformat.renderer.typing # noqa: F401

tests/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1-
import json
1+
from __future__ import annotations
22

3-
from markdown_it import MarkdownIt
3+
import json
4+
from typing import TYPE_CHECKING
45

56
from mdformat._cli import run
67
from mdformat._conf import read_toml_opts
78
from mdformat.renderer import RenderContext, RenderTreeNode
89

10+
if TYPE_CHECKING:
11+
from markdown_it import MarkdownIt
12+
913
UNFORMATTED_MARKDOWN = "\n\n# A header\n\n"
1014
FORMATTED_MARKDOWN = "# A header\n"
1115

0 commit comments

Comments
 (0)