Use pathlib.full_match for pattern matching

Kirk Hansen · Kirk Hansen · commit ee261ee9a07c · 2025-04-23T14:19:33.000-05:00
Allows full directory matches and recursive patterns to succeed.
Copies in some python 3.13 code for earlier python compatibility
diff --git a/README.rst b/README.rst
@@ -62,7 +62,7 @@ ignoring all directory events:
 .. code-block:: bash
 
     watchmedo log \
-        --patterns='*.py;*.txt' \
+        --patterns='**/*.py;**/*.txt' \
         --ignore-directories \
         --recursive \
         --verbose \
@@ -74,7 +74,7 @@ response to events:
 .. code-block:: bash
 
     watchmedo shell-command \
-        --patterns='*.py;*.txt' \
+        --patterns='**/*.py;**/*.txt' \
         --recursive \
         --command='echo "${watch_src_path}"' \
         .
@@ -101,9 +101,9 @@ An example ``tricks.yaml`` file:
 
     tricks:
     - watchdog.tricks.LoggerTrick:
-        patterns: ["*.py", "*.js"]
+        patterns: ["**/*.py", "**/*.js"]
     - watchmedo_webtricks.GoogleClosureTrick:
-        patterns: ['*.js']
+        patterns: ['**/*.js']
         hash_names: true
         mappings_format: json                  # json|yaml|python
         mappings_module: app/javascript_mappings
diff --git a/THIRD_PARTY_LICENSES.md b/THIRD_PARTY_LICENSES.md
@@ -0,0 +1,34 @@
+# Third Party Licenses
+
+## Python Standard Library Compatibility Code
+
+This project includes the following unmodified functions from the Python 3.13 standard library:
+
+- `glob.translate` (from `Lib/glob.py`)
+- `fnmatch._translate` (from `Lib/fnmatch.py`)
+
+These are included in `backwards_compat.py` to provide backwards compatibility with older Python versions.
+
+**License**: Python Software Foundation License Version 2
+**Copyright**: © 2001–2024 Python Software Foundation; All Rights Reserved
+**Source**: https://github.com/python/cpython
+
+---
+
+### Python Software Foundation License Version 2
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python.
+
+4. PSF is making Python available to Licensee on an "AS IS" basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee.  This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement.
diff --git a/changelog.rst b/changelog.rst
@@ -11,6 +11,10 @@ Changelog
 - Adjust ``Observer.schedule()`` ``path`` type annotation to reflect the ``pathlib.Path`` support. (`#1096 <https://github.com/gorakhargosh/watchdog/pull/1096>`__)
 - Thanks to our beloved contributors: @BoboTiG, @tybug
 
+**Breaking Changes**
+
+- Fix #798 by changing pattern matching from using `path.match` to `path.full_match` Users must update patterns to glob like syntax. E.g., `*.py` to `**/*.py`.
+
 6.0.0
 ~~~~~
 
diff --git a/docs/source/examples/patterns.py b/docs/source/examples/patterns.py
@@ -13,7 +13,9 @@ def on_any_event(self, event: FileSystemEvent) -> None:
         logging.debug(event)
 
 
-event_handler = MyEventHandler(patterns=["*.py", "*.pyc"], ignore_patterns=["version.py"], ignore_directories=True)
+event_handler = MyEventHandler(
+    patterns=["**/*.py", "**/*.pyc"], ignore_patterns=["version.py"], ignore_directories=True
+)
 observer = Observer()
 observer.schedule(event_handler, sys.argv[1], recursive=True)
 observer.start()
diff --git a/docs/source/examples/tricks.json b/docs/source/examples/tricks.json
@@ -2,8 +2,8 @@
     {
         "watchdog.tricks.LoggerTrick": {
             "patterns": [
-                "*.py",
-                "*.js"
+                "**/*.py",
+                "**/*.js"
             ]
         }
     },
@@ -22,7 +22,7 @@
             "suffix": ".min.js",
             "source_directory": "app/static/js/",
             "hash_names": true,
-            "patterns": ["*.js"],
+            "patterns": ["**/*.js"],
             "destination_directory": "app/public/js/",
             "compilation_level": "advanced",
             "mappings_module": "app/javascript_mappings.json"
diff --git a/src/watchdog/utils/backwards_compat.py b/src/watchdog/utils/backwards_compat.py
@@ -0,0 +1,147 @@
+# ruff: noqa
+# fmt: off
+"""
+This file includes unmodified functions copied from Python 3.13's standard library
+for use on older Python versions.
+
+Functions copied:
+- glob.translate (from Lib/glob.py)
+- fnmatch._translate (from Lib/fnmatch.py)
+
+Source: https://github.com/python/cpython
+License: Python Software Foundation License Version 2
+Copyright (c) 2001-2024 Python Software Foundation; All Rights Reserved
+
+Please delete me if/when this project releases forcing python >= 3.13
+"""
+
+import os
+import re
+
+
+# Copied from python 3.13 fnmatch._translate
+def _translate(pat, STAR, QUESTION_MARK):
+    res = []
+    add = res.append
+    i, n = 0, len(pat)
+    while i < n:
+        c = pat[i]
+        i = i+1
+        if c == '*':
+            # compress consecutive `*` into one
+            if (not res) or res[-1] is not STAR:
+                add(STAR)
+        elif c == '?':
+            add(QUESTION_MARK)
+        elif c == '[':
+            j = i
+            if j < n and pat[j] == '!':
+                j = j+1
+            if j < n and pat[j] == ']':
+                j = j+1
+            while j < n and pat[j] != ']':
+                j = j+1
+            if j >= n:
+                add('\\[')
+            else:
+                stuff = pat[i:j]
+                if '-' not in stuff:
+                    stuff = stuff.replace('\\', r'\\')
+                else:
+                    chunks = []
+                    k = i+2 if pat[i] == '!' else i+1
+                    while True:
+                        k = pat.find('-', k, j)
+                        if k < 0:
+                            break
+                        chunks.append(pat[i:k])
+                        i = k+1
+                        k = k+3
+                    chunk = pat[i:j]
+                    if chunk:
+                        chunks.append(chunk)
+                    else:
+                        chunks[-1] += '-'
+                    # Remove empty ranges -- invalid in RE.
+                    for k in range(len(chunks)-1, 0, -1):
+                        if chunks[k-1][-1] > chunks[k][0]:
+                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
+                            del chunks[k]
+                    # Escape backslashes and hyphens for set difference (--).
+                    # Hyphens that create ranges shouldn't be escaped.
+                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
+                                     for s in chunks)
+                # Escape set operations (&&, ~~ and ||).
+                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
+                i = j+1
+                if not stuff:
+                    # Empty range: never match.
+                    add('(?!)')
+                elif stuff == '!':
+                    # Negated empty range: match any character.
+                    add('.')
+                else:
+                    if stuff[0] == '!':
+                        stuff = '^' + stuff[1:]
+                    elif stuff[0] in ('^', '['):
+                        stuff = '\\' + stuff
+                    add(f'[{stuff}]')
+        else:
+            add(re.escape(c))
+    assert i == n
+    return res
+
+
+def translate(pat, *, recursive=False, include_hidden=False, seps=None):
+    """Translate a pathname with shell wildcards to a regular expression.
+
+    If `recursive` is true, the pattern segment '**' will match any number of
+    path segments.
+
+    If `include_hidden` is true, wildcards can match path segments beginning
+    with a dot ('.').
+
+    If a sequence of separator characters is given to `seps`, they will be
+    used to split the pattern into segments and match path separators. If not
+    given, os.path.sep and os.path.altsep (where available) are used.
+    """
+    if not seps:
+        if os.path.altsep:
+            seps = (os.path.sep, os.path.altsep)
+        else:
+            seps = os.path.sep
+    escaped_seps = ''.join(map(re.escape, seps))
+    any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
+    not_sep = f'[^{escaped_seps}]'
+    if include_hidden:
+        one_last_segment = f'{not_sep}+'
+        one_segment = f'{one_last_segment}{any_sep}'
+        any_segments = f'(?:.+{any_sep})?'
+        any_last_segments = '.*'
+    else:
+        one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
+        one_segment = f'{one_last_segment}{any_sep}'
+        any_segments = f'(?:{one_segment})*'
+        any_last_segments = f'{any_segments}(?:{one_last_segment})?'
+
+    results = []
+    parts = re.split(any_sep, pat)
+    last_part_idx = len(parts) - 1
+    for idx, part in enumerate(parts):
+        if part == '*':
+            results.append(one_segment if idx < last_part_idx else one_last_segment)
+        elif recursive and part == '**':
+            if idx < last_part_idx:
+                if parts[idx + 1] != '**':
+                    results.append(any_segments)
+            else:
+                results.append(any_last_segments)
+        else:
+            if part:
+                if not include_hidden and part[0] in '*?':
+                    results.append(r'(?!\.)')
+                results.extend(_translate(part, f'{not_sep}*', not_sep))
+            if idx < last_part_idx:
+                results.append(any_sep)
+    res = ''.join(results)
+    return fr'(?s:{res})\Z'
diff --git a/src/watchdog/utils/patterns.py b/src/watchdog/utils/patterns.py
@@ -14,13 +14,40 @@
 #   - `PureWindowsPath` is always case-insensitive.
 #   - `PurePosixPath` is always case-sensitive.
 # Reference: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.match
-from pathlib import PurePosixPath, PureWindowsPath
+import re
+from pathlib import PurePath, PurePosixPath, PureWindowsPath
 from typing import TYPE_CHECKING
 
+from watchdog.utils.backwards_compat import translate
+
 if TYPE_CHECKING:
     from collections.abc import Iterator
 
 
+def _get_sep(path: PurePath) -> str:
+    """
+    Python < 3.13 doesn't have a clean way to expose the path separator
+    It's either this, or make use of `path._flavour.sep`
+    """
+    if isinstance(path, PureWindowsPath):
+        return "\\"
+    if isinstance(path, PurePosixPath):
+        return "/"
+    raise TypeError("Unsupported")
+
+
+def _full_match(path: PurePath, pattern):
+    try:
+        return path.full_match(pattern)
+    except AttributeError:
+        # Replicate for python <3.13
+        # Please remove this, backwards_compat.py, and python license attributions
+        # if/when we can pin a release to python >= 3.13
+        regex = translate(pattern, recursive=True, include_hidden=True, seps=_get_sep(path))
+        reobj = re.compile(regex)
+        return reobj.match(str(path))
+
+
 def _match_path(
     raw_path: str,
     included_patterns: set[str],
@@ -42,7 +69,9 @@ def _match_path(
         error = f"conflicting patterns `{common_patterns}` included and excluded"
         raise ValueError(error)
 
-    return any(path.match(p) for p in included_patterns) and not any(path.match(p) for p in excluded_patterns)
+    return any(_full_match(path, p) for p in included_patterns) and not any(
+        _full_match(path, p) for p in excluded_patterns
+    )
 
 
 def filter_paths(
@@ -59,7 +88,7 @@ def filter_paths(
         ignored patterns.
     :param included_patterns:
         Allow filenames matching wildcard patterns specified in this list.
-        If no pattern list is specified, ["*"] is used as the default pattern,
+        If no pattern list is specified, ["**"] is used as the default pattern,
         which matches all files.
     :param excluded_patterns:
         Ignores filenames matching wildcard patterns specified in this list.
@@ -70,7 +99,7 @@ def filter_paths(
         A list of pathnames that matched the allowable patterns and passed
         through the ignored patterns.
     """
-    included = set(["*"] if included_patterns is None else included_patterns)
+    included = set(["**"] if included_patterns is None else included_patterns)
     excluded = set([] if excluded_patterns is None else excluded_patterns)
 
     for path in paths:
diff --git a/tests/test_0_watchmedo.py b/tests/test_0_watchmedo.py
@@ -324,7 +324,7 @@ def test_tricks_from_file(command, tmp_path):
         """
 tricks:
 - watchdog.tricks.LoggerTrick:
-    patterns: ["*.py", "*.js"]
+    patterns: ["**/*.py", "**/*.js"]
 """
     )
     args = watchmedo.cli.parse_args([command, str(tricks_file)])
diff --git a/tests/test_fsevents.py b/tests/test_fsevents.py
@@ -273,7 +273,7 @@ def done(self):
 
     cwd = os.getcwd()
     os.chdir(p())
-    event_handler = TestEventHandler(patterns=["*.json"], ignore_patterns=[], ignore_directories=True)
+    event_handler = TestEventHandler(patterns=["**/*.json"], ignore_patterns=[], ignore_directories=True)
     observer = Observer()
     observer.schedule(event_handler, ".")
     observer.start()
diff --git a/tests/test_pattern_matching_event_handler.py b/tests/test_pattern_matching_event_handler.py
@@ -19,25 +19,25 @@
 
 path_1 = "/path/xyz"
 path_2 = "/path/abc"
-g_allowed_patterns = ["*.py", "*.txt"]
-g_ignore_patterns = ["*.foo"]
+g_allowed_patterns = ["**/*.py", "**/*.txt"]
+g_ignore_patterns = ["**/*.foo"]
 
 
 def assert_patterns(event):
     paths = [event.src_path, event.dest_path] if hasattr(event, "dest_path") else [event.src_path]
     filtered_paths = filter_paths(
         paths,
-        included_patterns=["*.py", "*.txt"],
-        excluded_patterns=["*.pyc"],
+        included_patterns=["**/*.py", "**/*.txt"],
+        excluded_patterns=["**/*.pyc"],
         case_sensitive=False,
     )
     assert filtered_paths
 
 
 def test_dispatch():
     # Utilities.
-    patterns = ["*.py", "*.txt"]
-    ignore_patterns = ["*.pyc"]
+    patterns = ["**/*.py", "**/*.txt"]
+    ignore_patterns = ["**/*.pyc"]
 
     dir_del_event_match = DirDeletedEvent("/path/blah.py")
     dir_del_event_not_match = DirDeletedEvent("/path/foobar")
diff --git a/tests/test_patterns.py b/tests/test_patterns.py

Original file line number	Diff line number	Diff line change
`@@ -2,8 +2,8 @@`
`2`	`2`	`{`
`3`	`3`	`"watchdog.tricks.LoggerTrick": {`
`4`	`4`	`"patterns": [`
`5`		`- "*.py",`
`6`		`- "*.js"`
	`5`	`+ "*/.py",`
	`6`	`+ "*/.js"`
`7`	`7`	`]`
`8`	`8`	`}`
`9`	`9`	`},`
`@@ -22,7 +22,7 @@`
`22`	`22`	`"suffix": ".min.js",`
`23`	`23`	`"source_directory": "app/static/js/",`
`24`	`24`	`"hash_names": true,`
`25`		`- "patterns": ["*.js"],`
	`25`	`+ "patterns": ["*/.js"],`
`26`	`26`	`"destination_directory": "app/public/js/",`
`27`	`27`	`"compilation_level": "advanced",`
`28`	`28`	`"mappings_module": "app/javascript_mappings.json"`
Original file line number	Diff line number	Diff line change
`@@ -324,7 +324,7 @@ def test_tricks_from_file(command, tmp_path):`
`324`	`324`	`"""`
`325`	`325`	`tricks:`
`326`	`326`	`- watchdog.tricks.LoggerTrick:`
`327`		`- patterns: [".py", ".js"]`
	`327`	`+ patterns: ["*/.py", "*/.js"]`
`328`	`328`	`"""`
`329`	`329`	`)`
`330`	`330`	`args = watchmedo.cli.parse_args([command, str(tricks_file)])`