Fix more escape sequences. (#449)

felixfontein · web-flow · commit ff15963402e2 · 2024-11-02T22:18:48.000+01:00
diff --git a/v7/import_jekyll/import_jekyll.py b/v7/import_jekyll/import_jekyll.py
@@ -295,7 +295,7 @@ def link_repl(matchobj):
 
 def slugify_file(filename):
     name, _ = os.path.splitext(os.path.basename(filename))
-    m = re.match('\d+\-\d+\-\d+\-(?P<name>.*)', name)
+    m = re.match(r'\d+\-\d+\-\d+\-(?P<name>.*)', name)
     if m:
         name = m.group('name')
 
diff --git a/v7/markmin/markmin/markmin2html.py b/v7/markmin/markmin/markmin2html.py
@@ -31,7 +31,7 @@
 
 __all__ = ['render', 'markmin2html', 'markmin_escape']
 
-__doc__ = """
+__doc__ = r"""
 # Markmin markup language
 
 ## About
@@ -548,10 +548,10 @@ def test():
 regex_del = re.compile(r'~~(?P<t>[^\s*]+( +[^\s*]+)*)~~')
 regex_em = re.compile(r"''(?P<t>([^\s']| |'(?!'))+)''")
 regex_num = re.compile(r"^\s*[+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)?\s*$")
-regex_list = re.compile('^(?:(?:(#{1,6})|(?:(\.+|\++|\-+)(\.)?))\s*)?(.*)$')
-regex_bq_headline = re.compile('^(?:(\.+|\++|\-+)(\.)?\s+)?(-{3}-*)$')
+regex_list = re.compile(r'^(?:(?:(#{1,6})|(?:(\.+|\++|\-+)(\.)?))\s*)?(.*)$')
+regex_bq_headline = re.compile(r'^(?:(\.+|\++|\-+)(\.)?\s+)?(-{3}-*)$')
 regex_tq = re.compile(
-    '^(-{3}-*)(?::(?P<c>[a-zA-Z][_a-zA-Z\-\d]*)(?:\[(?P<p>[a-zA-Z][_a-zA-Z\-\d]*)\])?)?$')
+    r'^(-{3}-*)(?::(?P<c>[a-zA-Z][_a-zA-Z\-\d]*)(?:\[(?P<p>[a-zA-Z][_a-zA-Z\-\d]*)\])?)?$')
 regex_proto = re.compile(
     r'(?<!["\w>/=])(?P<p>\w+):(?P<k>\w+://[\w\d\-+=?%&/:.]+)', re.M)
 regex_auto = re.compile(
@@ -568,11 +568,11 @@ def test():
                     '\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05')
 ttab_out = maketrans(
     '\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05', "'`:*~\\[]{}@$+-.#\n")
-regex_quote = re.compile('(?P<name>\w+?)\s*\=\s*')
+regex_quote = re.compile(r'(?P<name>\w+?)\s*\=\s*')
 
 
 def make_dict(b):
-    return '{%s}' % regex_quote.sub("'\g<name>':", b)
+    return '{%s}' % regex_quote.sub(r"'\g<name>':", b)
 
 
 def safe_eval(node_or_string, env):
@@ -708,7 +708,7 @@ def render(text,
            class_prefix='',
            id_prefix='markmin_',
            pretty_print=False):
-    """
+    r"""
     Arguments:
     - text is the text to be processed
     - extra is a dict like extra=dict(custom=lambda value: value) that process custom code
@@ -959,7 +959,7 @@ def render(text,
         text = replace_at_urls(text, URL)
 
     if latex == 'google':
-        text = regex_dd.sub('``\g<latex>``:latex ', text)
+        text = regex_dd.sub(r'``\g<latex>``:latex ', text)
 
     #############################################################
     # replace all blocks marked with ``...``:class[id] with META
@@ -1321,9 +1321,9 @@ def parse_table_or_blockquote(s, mtag, lineno):
     #############################################################
     # do strong,em,del
     #############################################################
-    text = regex_strong.sub('<strong>\g<t></strong>', text)
-    text = regex_del.sub('<del>\g<t></del>', text)
-    text = regex_em.sub('<em>\g<t></em>', text)
+    text = regex_strong.sub(r'<strong>\g<t></strong>', text)
+    text = regex_del.sub(r'<del>\g<t></del>', text)
+    text = regex_em.sub(r'<em>\g<t></em>', text)
 
     #############################################################
     # deal with images, videos, audios and links
diff --git a/v7/sphinx_roles/sphinx_roles.py b/v7/sphinx_roles/sphinx_roles.py
@@ -551,7 +551,7 @@ def unknown_visit(self, node):
     return [pnode], msg_list
 
 
-_abbr_re = re.compile("\((.*)\)$", re.S)
+_abbr_re = re.compile(r"\((.*)\)$", re.S)
 
 
 class abbreviation(nodes.Inline, nodes.TextElement):
diff --git a/v7/static_comments/static_comments.py b/v7/static_comments/static_comments.py
@@ -72,7 +72,7 @@ class StaticComments(SignalHandler):
     """Add static comments to posts."""
 
     # Used to parse comment headers
-    _header_regex = re.compile('^\.\. (.*?): (.*)')
+    _header_regex = re.compile(r'^\.\. (.*?): (.*)')
 
     def _compile_content(self, compiler_name, content, filename):
         """Compile comment content with specified page compiler."""
diff --git a/v7/wordpress_compiler/wordpress/default_filters.py b/v7/wordpress_compiler/wordpress/default_filters.py
@@ -87,27 +87,27 @@ def __wptexturize_setup(self):
 
         dynamic = []
         if "'" != apos:
-            dynamic.append(('\'(\d\d(?:&#8217;|\')?s)', apos + '\\1'))  # '99's
-            dynamic.append(('\'(\d)', apos + '\\1'))  # '99
+            dynamic.append((r'\'(\d\d(?:&#8217;|\')?s)', apos + '\\1'))  # '99's
+            dynamic.append((r'\'(\d)', apos + '\\1'))  # '99
         if "'" != opening_single_quote:
-            dynamic.append(('(\s|\A|[([{<]|")\'', '\\1' + opening_single_quote))  # opening single quote, even after (, {, <, [
+            dynamic.append((r'(\s|\A|[([{<]|")\'', '\\1' + opening_single_quote))  # opening single quote, even after (, {, <, [
         if '"' != double_prime:
-            dynamic.append(('(\d)"', '\\1' + double_prime))  # 9" (double prime)
+            dynamic.append((r'(\d)"', '\\1' + double_prime))  # 9" (double prime)
         if "'" != prime:
-            dynamic.append(('(\d)\'', '\\1' + prime))  # 9' (prime)
+            dynamic.append((r'(\d)\'', '\\1' + prime))  # 9' (prime)
         if "'" != apos:
-            dynamic.append(('(\S)\'([^\'\s])', '\\1' + apos + '\\2'))  # apostrophe in a word
+            dynamic.append((r'(\S)\'([^\'\s])', '\\1' + apos + '\\2'))  # apostrophe in a word
         if '"' != opening_quote:
-            dynamic.append(('(\s|\A|[([{<])"(?!\s)', '\\1' + opening_quote))  # opening double quote, even after (, {, <, [
+            dynamic.append((r'(\s|\A|[([{<])"(?!\s)', '\\1' + opening_quote))  # opening double quote, even after (, {, <, [
             # PHP: the original PHP regular expression had a problem, since there was only one capturing group, but both \1 and \2 were
             # used on the right-hand side. Since Python throws an exception in that case, while PHP simply treats \2 as an empty string,
             # I had to remove the "+'\\2'" after opening_quote.
         if '"' != closing_quote:
-            dynamic.append(('"(\s|\S|\Z)', closing_quote + '\\1'))  # closing double quote
+            dynamic.append((r'"(\s|\S|\Z)', closing_quote + '\\1'))  # closing double quote
         if "'" != closing_single_quote:
-            dynamic.append(('\'([\s.]|\Z)', closing_single_quote + '\\1'))  # closing single quote
+            dynamic.append((r'\'([\s.]|\Z)', closing_single_quote + '\\1'))  # closing single quote
 
-        dynamic.append(('\b(\d+)x(\d+)\b', '\\1&#215;\\2'))  # 9x9 (times)
+        dynamic.append((r'\b(\d+)x(\d+)\b', '\\1&#215;\\2'))  # 9x9 (times)
 
         self.dynamic = dynamic
 
@@ -144,7 +144,7 @@ def wptexturize(self, text):
         no_texturize_shortcodes_stack = []
 
         # PHP: Since Python doesn't support PHP's /U modifier (which inverts quantifier's greediness), I modified the regular expression accordingly
-        textarr = regex.split('(<.*?>|\[.*?\])', text, flags=regex.DOTALL)
+        textarr = regex.split(r'(<.*?>|\[.*?\])', text, flags=regex.DOTALL)
 
         result = []
         for curl in textarr:
@@ -240,8 +240,8 @@ def __convert_chars_setup(self):
 
     def convert_chars(self, content):
         # Remove metadata tags
-        content = regex.sub('<title>(.+?)<\/title>', '', content)
-        content = regex.sub('<category>(.+?)<\/category>', '', content)
+        content = regex.sub(r'<title>(.+?)<\/title>', '', content)
+        content = regex.sub(r'<category>(.+?)<\/category>', '', content)
 
         # Converts lone & characters into &#38; (a.k.a. &amp;)
         content = regex.sub('&([^#])(?![a-z1-4]{1,8};)', '&#038;\\1', content, regex.IGNORECASE)
@@ -288,36 +288,36 @@ def wpautop(self, pee, br=True):
 
             pee += last_pee
 
-        pee = regex.sub('<br />\s*<br />', "\n\n", pee)
+        pee = regex.sub(r'<br />\s*<br />', "\n\n", pee)
         # Space things out a little
         self.allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'
         pee = regex.sub('(<' + self.allblocks + '[^>]*>)', "\n\\1", pee)
         pee = regex.sub('(</' + self.allblocks + '>)', "\\1\n\n", pee)
         pee = pee.replace("\r\n", "\n").replace("\r", "\n")  # cross-platform newlines
         if pee.find('<object') >= 0:
-            pee = regex.sub('\s*<param([^>]*)>\s*', "<param\\1>", pee)  # no pee inside object/embed
-            pee = regex.sub('\s*</embed>\s*', '</embed>', pee)
+            pee = regex.sub(r'\s*<param([^>]*)>\s*', "<param\\1>", pee)  # no pee inside object/embed
+            pee = regex.sub(r'\s*</embed>\s*', '</embed>', pee)
         pee = regex.sub("\n\n+", "\n\n", pee)  # take care of duplicates
         # make paragraphs, including one at the end
-        pees = regex.split('\n\s*\n', pee)
+        pees = regex.split('\n\\s*\n', pee)
         pee = ''
         for trinkle in pees:
             if len(trinkle) > 0:  # PHP: this emulates PHP's flag PREG_SPLIT_NO_EMPTY for preg_split()
                 pee += '<p>' + trinkle.strip("\n") + "</p>\n"
-        pee = regex.sub('<p>\s*</p>', '', pee)  # under certain strange conditions it could create a P of entirely whitespace
+        pee = regex.sub(r'<p>\s*</p>', '', pee)  # under certain strange conditions it could create a P of entirely whitespace
         pee = regex.sub('<p>([^<]+)</(div|address|form)>', "<p>\\1</p></\\2>", pee)
-        pee = regex.sub('<p>\s*(</?' + self.allblocks + '[^>]*>)\s*</p>', "\\1", pee)  # don't pee all over a tag
+        pee = regex.sub(r'<p>\s*(</?' + self.allblocks + r'[^>]*>)\s*</p>', "\\1", pee)  # don't pee all over a tag
         pee = regex.sub("<p>(<li.+?)</p>", "\\1", pee)  # problem with nested lists
         pee = regex.sub('<p><blockquote([^>]*)>', "<blockquote\\1><p>", pee, regex.IGNORECASE)
         pee = pee.replace('</blockquote></p>', '</p></blockquote>')
-        pee = regex.sub('<p>\s*(</?' + self.allblocks + '[^>]*>)', "\\1", pee)
-        pee = regex.sub('(</?' + self.allblocks + '[^>]*>)\s*</p>', "\\1", pee)
+        pee = regex.sub(r'<p>\s*(</?' + self.allblocks + '[^>]*>)', "\\1", pee)
+        pee = regex.sub('(</?' + self.allblocks + r'[^>]*>)\s*</p>', "\\1", pee)
         if br:
-            pee = php.preg_replace_callback('<(script|style).*?<\/\\1>', lambda x: self.__autop_newline_preservation_helper(x), pee, regex.DOTALL)
-            pee = regex.sub('(?<!<br />)\s*\n', "<br />\n", pee)  # optionally make line breaks
+            pee = php.preg_replace_callback('<(script|style).*?<\\/\\1>', lambda x: self.__autop_newline_preservation_helper(x), pee, regex.DOTALL)
+            pee = regex.sub('(?<!<br />)\\s*\n', "<br />\n", pee)  # optionally make line breaks
             pee = pee.replace('<WPPreserveNewline />', "\n")
-        pee = regex.sub('(</?' + self.allblocks + '[^>]*>)\s*<br />', "\\1", pee)
-        pee = regex.sub('<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)', '\\1', pee)
+        pee = regex.sub('(</?' + self.allblocks + r'[^>]*>)\s*<br />', "\\1", pee)
+        pee = regex.sub(r'<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)', '\\1', pee)
         pee = regex.sub("\n</p>$", '</p>', pee)
 
         if len(pre_tags) > 0:
diff --git a/v7/wordpress_compiler/wordpress/plugins/wordpress_shortcode_code.py b/v7/wordpress_compiler/wordpress/plugins/wordpress_shortcode_code.py
@@ -39,8 +39,8 @@ def __init__(self):
 
     def _filter_code_tags(self, text, context):
         result = ''
-        for piece in regex.split('(\[code(?:|\s+language="[^"]*?")\].*?\[/code\])', text, flags=regex.DOTALL | regex.IGNORECASE):
-            match = regex.match('\[code(?:|\s+language="([^"]*?)")\](.*?)\[/code\]', piece, flags=regex.DOTALL | regex.IGNORECASE)
+        for piece in regex.split(r'(\[code(?:|\s+language="[^"]*?")\].*?\[/code\])', text, flags=regex.DOTALL | regex.IGNORECASE):
+            match = regex.match(r'\[code(?:|\s+language="([^"]*?)")\](.*?)\[/code\]', piece, flags=regex.DOTALL | regex.IGNORECASE)
             if match is not None:
                 the_id = str(context.inc_plugin_counter('wordpress_shortcode_code', 'counter'))
                 context.store_plugin_data('wordpress_shortcode_code', the_id, (match.group(2), match.group(1)))
diff --git a/v7/wordpress_compiler/wordpress/shortcodes.py b/v7/wordpress_compiler/wordpress/shortcodes.py
@@ -120,7 +120,7 @@ def unregister_shortcode(self, tag):
         del self._shorcode_tags[tag]
 
     def _extract_arguments(self, argsString):
-        pattern = '(\w+)\s*=\s*"([^"]*)"(?:\s|$)|(\w+)\s*=\s*\'([^\']*)\'(?:\s|$)|(\w+)\s*=\s*([^\s\'"]+)(?:\s|$)|"([^"]*)"(?:\s|$)|(\S+)(?:\s|$)'
+        pattern = r'(\w+)\s*=\s*"([^"]*)"(?:\s|$)|(\w+)\s*=\s*\'([^\']*)\'(?:\s|$)|(\w+)\s*=\s*([^\s\'"]+)(?:\s|$)|"([^"]*)"(?:\s|$)|(\S+)(?:\s|$)'
         argsString = regex.sub("[\u00A0\u200B]+", " ", argsString)
         matches = regex.findall(pattern, argsString)
         if len(matches) > 0:
diff --git a/v8/markmin/markmin/markmin2html.py b/v8/markmin/markmin/markmin2html.py
@@ -603,7 +603,7 @@ def test():
     "\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05",
     "'`:*~\\[]{}@$+-.#\n",
 )
-regex_quote = re.compile("(?P<name>\w+?)\s*\=\s*")
+regex_quote = re.compile(r"(?P<name>\w+?)\s*\=\s*")
 
 
 def make_dict(b):
diff --git a/v8/webmentions/webmentions.py b/v8/webmentions/webmentions.py
@@ -265,7 +265,7 @@ def check_link_header_for_webmention(self, header):
         """Process a header and look for webmention related entries"""
 
         regexes = [
-            "<(.[^>]+)>;\s+rel\s?=\s?[\"']?(http:\/\/)?webmention(\.org)?\/?[\"']?"
+            r"<(.[^>]+)>;\s+rel\s?=\s?[\"']?(http:\/\/)?webmention(\.org)?\/?[\"']?"
         ]
 
         if "webmention" not in header:

Original file line number	Diff line number	Diff line change
`@@ -603,7 +603,7 @@ def test():`
`603`	`603`	`"\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05",`
`604`	`604`	"'`:*~\\[]{}@$+-.#\n",
`605`	`605`	`)`
`606`		`-regex_quote = re.compile("(?P<name>\w+?)\s\=\s")`
	`606`	`+regex_quote = re.compile(r"(?P<name>\w+?)\s\=\s")`
`607`	`607`
`608`	`608`
`609`	`609`	`def make_dict(b):`
Original file line number	Diff line number	Diff line change
`@@ -265,7 +265,7 @@ def check_link_header_for_webmention(self, header):`
`265`	`265`	`"""Process a header and look for webmention related entries"""`
`266`	`266`
`267`	`267`	`regexes = [`
`268`		`- "<(.[^>]+)>;\s+rel\s?=\s?[\"']?(http:\/\/)?webmention(\.org)?\/?[\"']?"`
	`268`	`+ r"<(.[^>]+)>;\s+rel\s?=\s?[\"']?(http:\/\/)?webmention(\.org)?\/?[\"']?"`
`269`	`269`	`]`
`270`	`270`
`271`	`271`	`if "webmention" not in header:`