Skip to content

Commit abe90d4

Browse files
authored
[Test] Extend test for watch (#299)
1 parent daef99d commit abe90d4

2 files changed

Lines changed: 41 additions & 1 deletion

File tree

src/pyfileindex/pyfileindex.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,12 @@ def __init__(
4040
if watch:
4141
self._watcher = FileSystemWatcher(path=self._path)
4242
self._watcher.start()
43-
if df is None:
43+
if df is None or watch:
44+
# A df handed down from a parent index (e.g. via open()) can be
45+
# stale: the parent's watcher may not have drained changes made
46+
# to this path yet, and a freshly started watcher here only
47+
# reports changes from now on, so a missing entry would never be
48+
# backfilled. Scanning is cheap relative to that permanent loss.
4449
self._df = self._create_df_from_lst(
4550
[self._get_lst_entry_from_path(entry=self._path)]
4651
+ list(self._scandir(path=self._path, df=None, recursive=True))

tests/unit/test_watchfiles.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,41 @@ def test_open_propagates_watch_flag(self):
153153
finally:
154154
os.rmdir(sub_dir)
155155

156+
def test_open_backfills_files_missed_by_stale_parent_snapshot(self):
157+
"""
158+
Regression test for a permanent-loss race: open() hands the child a
159+
snapshot filtered from the parent's dataframe, which can be stale if
160+
the parent's background watcher has not drained a recent change yet.
161+
A freshly started child watcher only reports changes from the moment
162+
it starts, so if the child trusted that stale snapshot instead of
163+
scanning the directory itself, a file that already existed before
164+
the child started watching would be permanently invisible to it --
165+
no amount of retrying update() afterwards would ever recover it.
166+
"""
167+
sub_dir = os.path.join(self.path, "sub_stale")
168+
os.makedirs(sub_dir)
169+
nested_file = os.path.join(sub_dir, "nested.txt")
170+
touch(nested_file)
171+
172+
# Simulate the parent's watcher not having drained the change yet by
173+
# making its update() a no-op for the duration of open(), so the
174+
# parent's df does not contain `nested_file` (or even `sub_dir`)
175+
# when it gets filtered for the child.
176+
with patch.object(self.fi, "update", lambda timeout=0.1: None):
177+
self.assertNotIn(sub_dir, self.fi.df.path.values)
178+
fi_sub = self.fi.open(sub_dir)
179+
try:
180+
self.assertIn(
181+
nested_file,
182+
fi_sub.df.path.values,
183+
msg="open() must not rely solely on a (possibly stale) "
184+
"snapshot filtered from the parent's dataframe -- a freshly "
185+
"started child watcher can never backfill a file that "
186+
"already existed before it started watching.",
187+
)
188+
finally:
189+
fi_sub.close()
190+
156191

157192
class TestApplyWatchChanges(unittest.TestCase):
158193
@classmethod

0 commit comments

Comments
 (0)