Skip to content

gh-130942: Fix path seperator matched in character ranges for glob.translate #130989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ def _translate(pat, star, question_mark):
res = []
add = res.append
star_indices = []
inside_range = False
question_mark_char = re.sub(r'\[|\]|\^', '', question_mark)

i, n = 0, len(pat)
while i < n:
Expand Down Expand Up @@ -135,6 +137,9 @@ def _translate(pat, star, question_mark):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
del chunks[k]
if len(chunks) > 1:
if question_mark_char:
inside_range = chunks[0][-1] <= question_mark_char <= chunks[-1][0]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
Expand All @@ -145,11 +150,16 @@ def _translate(pat, star, question_mark):
add('(?!)')
elif stuff == '!':
# Negated empty range: match any character.
add('.')
add(question_mark)
else:
negative_lookahead=''
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
negative_lookahead=''
negative_lookahead = ''

if question_mark != '.' and inside_range:
add(f'(?![{question_mark_char}])')
# Escape set operations (&&, ~~ and ||).
stuff = _re_setops_sub(r'\\\1', stuff)
if stuff[0] == '!':
if question_mark_char not in stuff and question_mark != '.':
stuff = f'^{question_mark_char}' + '^' + stuff[1:]
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
Expand Down
2 changes: 0 additions & 2 deletions Lib/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,6 @@ def escape(pathname):
_special_parts = ('', '.', '..')
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
_no_recurse_symlinks = object()


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert.

def translate(pat, *, recursive=False, include_hidden=False, seps=None):
"""Translate a pathname with shell wildcards to a regular expression.

Expand Down
7 changes: 7 additions & 0 deletions Lib/test/test_fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,13 @@ def test_translate(self):
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
self.assertEqual(translate('foo[%-0]bar'), r'(?s:foo[%-0]bar)\Z')
self.assertEqual(translate('foo[%-0][%-0[%-0]bar'), r'(?s:foo[%-0][%-0[%-0]bar)\Z')
self.assertEqual(translate('foo[/-/]bar'), r'(?s:foo[/-/]bar)\Z')
self.assertEqual(translate('foo[%-0][1-9]bar'), r'(?s:foo[%-0][1-9]bar)\Z')
self.assertEqual(translate('foo[%-/]bar'), r'(?s:foo[%-/]bar)\Z')
self.assertEqual(translate('foo?'), r'(?s:foo.)\Z')
self.assertEqual(translate('foo.'), r'(?s:foo\.)\Z')
# fancy translation to prevent exponential-time match failure
t = translate('**a*a****a')
self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z')
Expand Down
20 changes: 19 additions & 1 deletion Lib/test/test_glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,14 @@ def test_translate_matching(self):
self.assertIsNone(match(os.path.join('foo', '.bar')))
self.assertIsNotNone(match(os.path.join('foo', 'bar.txt')))
self.assertIsNone(match(os.path.join('foo', '.bar.txt')))
match = re.compile(glob.translate('foo[%-0]bar', recursive=True)).match
self.assertIsNone(match(os.path.join('foo', 'bar')))
match = re.compile(glob.translate('foo?bar', recursive=True)).match
self.assertIsNone(match('foo/bar'))
match = re.compile(glob.translate('foo.', recursive=True)).match
self.assertIsNone(match('foo/'))
match = re.compile(glob.translate('foo*', recursive=True)).match
self.assertIsNone(match('foo/'))

def test_translate(self):
def fn(pat):
Expand Down Expand Up @@ -513,7 +521,17 @@ def fn(pat):
return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\'])
self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More generally, can you upodate test_translate_matching and include the examples of https://man7.org/linux/man-pages/man7/glob.7.html so that we have a compliant implementation?

self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z')

self.assertEqual(fn('foo[!a]bar'), r'(?s:foo[^/\\^a]bar)\Z')
self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?![/\\])[%-0]bar)\Z')
self.assertEqual(fn('foo[%-0][1-9]bar'), r'(?s:foo(?![/\\])[%-0][1-9]bar)\Z')
self.assertEqual(fn('foo[0-%]bar'), r'(?s:foo(?!)bar)\Z')
self.assertEqual(fn('foo[^-'), r'(?s:foo\[\^\-)\Z')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need also a test case with multiple ranges and incomplete ones, e.g., [0-%][0-%[0-%]. And possibly with an additional tail after the last range.

self.assertEqual(fn('foo[/-/]bar'), r'(?s:foo\[[/\\]\-[/\\]\]bar)\Z')
self.assertEqual(fn('foo[%-/]bar'), r'(?s:foo\[%\-[/\\]\]bar)\Z')
self.assertEqual(fn('foo[/]bar'), r'(?s:foo\[[/\\]\]bar)\Z')
self.assertEqual(fn('foo[%-0][0-%[%-0]bar'), r'(?s:foo(?![/\\])[%-0](?![/\\])[\[%-0]bar)\Z')
self.assertEqual(fn('foo?'), r'(?s:foo[^/\\])\Z')
self.assertEqual(fn('foo.'), r'(?s:foo\.)\Z')

if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.. versionchanged:: next
:func:`glob.translate` now correctly handles ranges implicitly containing path
separators (for instance, ``[%-0]`` contains ``/``) by adding either a negative
lookahead (``(?!/)``) or by not including the path separator (``^/``). In addition,
ranges including path separator literals are now correctly escaped, as specified by
POSIX specifications.
.. versionchanged:: next
:func:`fnmatch.translate` does not treat path separator characters as having any
special meaning at all, so it still matches ranges implicitly containing path
separators (for instance, ``[%-0]`` contains ``/``) and ranges explicitly
containing path separators (for instance, ``[/-/]`` contains ``/``).
Loading