@@ -153,6 +153,41 @@ def test_open_propagates_watch_flag(self):
153153 finally :
154154 os .rmdir (sub_dir )
155155
156+ def test_open_backfills_files_missed_by_stale_parent_snapshot (self ):
157+ """
158+ Regression test for a permanent-loss race: open() hands the child a
159+ snapshot filtered from the parent's dataframe, which can be stale if
160+ the parent's background watcher has not drained a recent change yet.
161+ A freshly started child watcher only reports changes from the moment
162+ it starts, so if the child trusted that stale snapshot instead of
163+ scanning the directory itself, a file that already existed before
164+ the child started watching would be permanently invisible to it --
165+ no amount of retrying update() afterwards would ever recover it.
166+ """
167+ sub_dir = os .path .join (self .path , "sub_stale" )
168+ os .makedirs (sub_dir )
169+ nested_file = os .path .join (sub_dir , "nested.txt" )
170+ touch (nested_file )
171+
172+ # Simulate the parent's watcher not having drained the change yet by
173+ # making its update() a no-op for the duration of open(), so the
174+ # parent's df does not contain `nested_file` (or even `sub_dir`)
175+ # when it gets filtered for the child.
176+ with patch .object (self .fi , "update" , lambda timeout = 0.1 : None ):
177+ self .assertNotIn (sub_dir , self .fi .df .path .values )
178+ fi_sub = self .fi .open (sub_dir )
179+ try :
180+ self .assertIn (
181+ nested_file ,
182+ fi_sub .df .path .values ,
183+ msg = "open() must not rely solely on a (possibly stale) "
184+ "snapshot filtered from the parent's dataframe -- a freshly "
185+ "started child watcher can never backfill a file that "
186+ "already existed before it started watching." ,
187+ )
188+ finally :
189+ fi_sub .close ()
190+
156191
157192class TestApplyWatchChanges (unittest .TestCase ):
158193 @classmethod
0 commit comments