trentm · nicholasserra · Apr 6, 2025 · Apr 5, 2025 · Apr 5, 2025
diff --git a/CHANGES.md b/CHANGES.md
@@ -5,6 +5,7 @@
 - [pull #617] Add MarkdownFileLinks extra (#528)
 - [pull #622] Add missing block tags to regex (#620)
 - [pull #623] Don't escape plus signs in URLs (#621)
+- [pull #626] Fix XSS when encoding incomplete tags (#625)
 
 
 ## python-markdown2 2.5.3

diff --git a/lib/markdown2.py b/lib/markdown2.py
@@ -1319,17 +1319,17 @@ def _escape_special_chars(self, text: str) -> str:
             is_html_markup = not is_html_markup
         return ''.join(escaped)
 
+    def _is_auto_link(self, text):
+        if ':' in text and self._auto_link_re.match(text):
+            return True
+        elif '@' in text and self._auto_email_link_re.match(text):
+            return True
+        return False
+
     @mark_stage(Stage.HASH_HTML)
     def _hash_html_spans(self, text: str) -> str:
         # Used for safe_mode.
 
-        def _is_auto_link(s):
-            if ':' in s and self._auto_link_re.match(s):
-                return True
-            elif '@' in s and self._auto_email_link_re.match(s):
-                return True
-            return False
-
         def _is_code_span(index, token):
             try:
                 if token == '<code>':
@@ -1353,7 +1353,7 @@ def _is_comment(token):
         split_tokens = self._sorta_html_tokenize_re.split(text)
         is_html_markup = False
         for index, token in enumerate(split_tokens):
-            if is_html_markup and not _is_auto_link(token) and not _is_code_span(index, token):
+            if is_html_markup and not self._is_auto_link(token) and not _is_code_span(index, token):
                 is_comment = _is_comment(token)
                 if is_comment:
                     tokens.append(self._hash_span(self._sanitize_html(is_comment.group(1))))
@@ -2165,7 +2165,7 @@ def _encode_incomplete_tags(self, text: str) -> str:
         if self.safe_mode not in ("replace", "escape"):
             return text
 
-        if text.endswith(">"):
+        if self._is_auto_link(text):
             return text  # this is not an incomplete tag, this is a link in the form <http://x.y.z>
 
         def incomplete_tags_sub(match):

diff --git a/test/tm-cases/encode_incomplete_tags_xss_issue625.html b/test/tm-cases/encode_incomplete_tags_xss_issue625.html
@@ -0,0 +1 @@
+<p>&lt;x&gt;&lt;img src=x onerror=alert("xss")//>&lt;x&gt;</p>
diff --git a/test/tm-cases/encode_incomplete_tags_xss_issue625.opts b/test/tm-cases/encode_incomplete_tags_xss_issue625.opts
@@ -0,0 +1 @@
+{'safe_mode': 'escape'}
diff --git a/test/tm-cases/encode_incomplete_tags_xss_issue625.text b/test/tm-cases/encode_incomplete_tags_xss_issue625.text
@@ -0,0 +1 @@
+<x><img src=x onerror=alert("xss")//><x>
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		<p><x><img src=x onerror=alert("xss")//><x></p>