Skip to content

Commit f568716

Browse files
committed
fix: regex in is_md_equal is too greedy
1 parent 3dd45d2 commit f568716

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

src/mdformat/_util.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,15 @@ def build_mdit(
4747
return mdit
4848

4949

50+
# Chars that markdown-it-py escapes when rendering code_inline:
51+
# https://github.com/executablebooks/markdown-it-py/blob/c5161b550f3c6c0a98d77e8389872405e8f9f9ee/markdown_it/common/utils.py#L138
52+
# Note that "&" is not included as it is used in the escape sequences of
53+
# these characters.
54+
_invalid_html_code_chars = '<>"'
55+
# a regex str that matches all except above chars
56+
_valid_html_code_char_re = rf"[^{re.escape(_invalid_html_code_chars)}]"
57+
58+
5059
def is_md_equal(
5160
md1: str,
5261
md2: str,
@@ -71,10 +80,11 @@ def is_md_equal(
7180
if codeformatters:
7281
langs_re = "|".join(re.escape(lang) for lang in codeformatters)
7382
html = re.sub(
74-
rf'<code class="language-(?:{langs_re})">.*</code>',
83+
rf'<code class="language-(?:{langs_re})">'
84+
rf"{_valid_html_code_char_re}*"
85+
r"</code>",
7586
"",
7687
html,
77-
flags=re.DOTALL,
7888
)
7989

8090
# Reduce all whitespace to a single space

tests/test_util.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,30 @@ def test_is_md_equal():
2121
paragr"""
2222
assert not is_md_equal(md1, md2)
2323
assert is_md_equal(md1, md2, codeformatters=("js", "go"))
24+
25+
26+
def test_is_md_equal__not():
27+
md1 = """
28+
```js
29+
console.log()
30+
```
31+
32+
paragr
33+
34+
```js
35+
console.log()
36+
```
37+
"""
38+
md2 = """
39+
```js
40+
bonsole.l()g
41+
```
42+
43+
A different paragraph
44+
45+
```js
46+
console.log()
47+
```
48+
"""
49+
assert not is_md_equal(md1, md2)
50+
assert not is_md_equal(md1, md2, codeformatters=("js",))

0 commit comments

Comments
 (0)