Skip to content

Commit 315f9e3

Browse files
muirdmfacebook-github-bot
authored andcommitted
status: add fallback to slow crawl on watchman failure
Summary: The Python "status" code would fall back to non-Watchman status on a Watchman error by default. Previously I didn't reproduce this in the Rust status, hoping it wasn't necessary. However, various cases have popped up where Watchman can't be used in various environments for various reasons, so let's add the fallback to Rust as well. Note that some of the Watchman errors have been real bugs in Watchman (e.g. startup race condition in Windows), and potential "environment" bugs (e.g. Watchman simply not installed), but it does seem like we should err on the side of not blocking users with errors. I'm going to disable fsmonitor.fallback-on-watchman-exception for tests and local development so we don't hide new issues. Reviewed By: quark-zju Differential Revision: D51265962 fbshipit-source-id: d66dd47e83f4bd3811f04d237a75148030cc8526
1 parent 995db0d commit 315f9e3

File tree

2 files changed

+124
-46
lines changed

2 files changed

+124
-46
lines changed

eden/scm/lib/workingcopy/src/watchmanfs/watchmanfs.rs

+100-46
Original file line numberDiff line numberDiff line change
@@ -156,52 +156,7 @@ impl WatchmanFileSystem {
156156

157157
Ok(result)
158158
}
159-
}
160-
161-
async fn crawl_progress(root: PathBuf, approx_file_count: u64) -> Result<()> {
162-
let client = {
163-
let _bar = ProgressBar::new_detached("connecting watchman", 0, "");
164-
165-
// If watchman just started (and we issued "watch-project" from
166-
// query_files), this connect gets stuck indefinitely. Work around by
167-
// timing out and retrying until we get through.
168-
loop {
169-
match tokio::time::timeout(Duration::from_secs(1), Connector::new().connect()).await {
170-
Ok(client) => break client?,
171-
Err(_) => {}
172-
};
173-
174-
tokio::time::sleep(Duration::from_secs(1)).await;
175-
}
176-
};
177-
178-
let mut bar = None;
179-
180-
let req = DebugRootStatusRequest(
181-
"debug-root-status",
182-
CanonicalPath::canonicalize(root)?.into_path_buf(),
183-
);
184-
185-
loop {
186-
let response: DebugRootStatusResponse = client.generic_request(req.clone()).await?;
187159

188-
if let Some(RootStatus {
189-
recrawl_info: Some(RecrawlInfo { stats: Some(stats) }),
190-
}) = response.root_status
191-
{
192-
bar.get_or_insert_with(|| {
193-
ProgressBar::new_detached("crawling", approx_file_count, "files (approx)")
194-
})
195-
.set_position(stats);
196-
} else if bar.is_some() {
197-
return Ok(());
198-
}
199-
200-
tokio::time::sleep(Duration::from_millis(100)).await;
201-
}
202-
}
203-
204-
impl FileSystem for WatchmanFileSystem {
205160
#[tracing::instrument(skip_all)]
206161
fn pending_changes(
207162
&self,
@@ -265,11 +220,14 @@ impl FileSystem for WatchmanFileSystem {
265220
})?,
266221
},
267222
ignore_dirs,
268-
))?
223+
))
269224
};
270225

226+
// Make sure we always abort - even in case of error.
271227
progress_handle.abort();
272228

229+
let result = result?;
230+
273231
tracing::debug!(
274232
target: "watchman_info",
275233
watchmanfreshinstances= if result.is_fresh_instance { 1 } else { 0 },
@@ -397,6 +355,102 @@ impl FileSystem for WatchmanFileSystem {
397355

398356
Ok(Box::new(pending_changes.into_iter()))
399357
}
358+
}
359+
360+
async fn crawl_progress(root: PathBuf, approx_file_count: u64) -> Result<()> {
361+
let client = {
362+
let _bar = ProgressBar::new_detached("connecting watchman", 0, "");
363+
364+
// If watchman just started (and we issued "watch-project" from
365+
// query_files), this connect gets stuck indefinitely. Work around by
366+
// timing out and retrying until we get through.
367+
loop {
368+
match tokio::time::timeout(Duration::from_secs(1), Connector::new().connect()).await {
369+
Ok(client) => break client?,
370+
Err(_) => {}
371+
};
372+
373+
tokio::time::sleep(Duration::from_secs(1)).await;
374+
}
375+
};
376+
377+
let mut bar = None;
378+
379+
let req = DebugRootStatusRequest(
380+
"debug-root-status",
381+
CanonicalPath::canonicalize(root)?.into_path_buf(),
382+
);
383+
384+
loop {
385+
let response: DebugRootStatusResponse = client.generic_request(req.clone()).await?;
386+
387+
if let Some(RootStatus {
388+
recrawl_info: Some(RecrawlInfo { stats: Some(stats) }),
389+
}) = response.root_status
390+
{
391+
bar.get_or_insert_with(|| {
392+
ProgressBar::new_detached("crawling", approx_file_count, "files (approx)")
393+
})
394+
.set_position(stats);
395+
} else if bar.is_some() {
396+
return Ok(());
397+
}
398+
399+
tokio::time::sleep(Duration::from_millis(100)).await;
400+
}
401+
}
402+
403+
impl FileSystem for WatchmanFileSystem {
404+
fn pending_changes(
405+
&self,
406+
matcher: DynMatcher,
407+
ignore_matcher: DynMatcher,
408+
ignore_dirs: Vec<PathBuf>,
409+
include_ignored: bool,
410+
last_write: SystemTime,
411+
config: &dyn Config,
412+
io: &IO,
413+
) -> Result<Box<dyn Iterator<Item = Result<PendingChange>>>> {
414+
let result = self.pending_changes(
415+
matcher.clone(),
416+
ignore_matcher.clone(),
417+
ignore_dirs.clone(),
418+
include_ignored,
419+
last_write,
420+
config,
421+
io,
422+
);
423+
424+
match result {
425+
Ok(result) => Ok(result),
426+
Err(err) if err.is::<watchman_client::Error>() => {
427+
if !config.get_or("fsmonitor", "fallback-on-watchman-exception", || true)? {
428+
return Err(err);
429+
}
430+
431+
// On watchman error, fall back to manual walk. This is important for errors such as:
432+
// - "watchman" binary not in PATH
433+
// - unsupported filesystem (e.g. NFS)
434+
//
435+
// A better approach might be an allowlist of errors to fall
436+
// back on so we can fail hard in cases where watchman "should"
437+
// work, but that is probably still an unacceptable UX in general.
438+
439+
tracing::debug!(target: "watchman_info", watchmanfallback=1);
440+
tracing::warn!(?err, "watchman error - falling back to slow crawl");
441+
self.inner.pending_changes(
442+
matcher,
443+
ignore_matcher,
444+
ignore_dirs,
445+
include_ignored,
446+
last_write,
447+
config,
448+
io,
449+
)
450+
}
451+
Err(err) => Err(err),
452+
}
453+
}
400454

401455
fn sparse_matcher(
402456
&self,
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#debugruntest-compatible
2+
#require fsmonitor
3+
4+
$ configure modernclient
5+
$ newclientrepo
6+
$ echo foo > foo
7+
$ hg commit -Aqm foo
8+
9+
$ echo nope > $TESTTMP/watchman
10+
$ chmod +x $TESTTMP/watchman
11+
$ export PATH=$TESTTMP:$PATH
12+
$ unset WATCHMAN_SOCK
13+
14+
$ echo foo >> foo
15+
$ LOG=warn,watchman_info=debug hg st --config fsmonitor.fallback-on-watchman-exception=true
16+
DEBUG watchman_info: watchmanfallback=1
17+
WARN workingcopy::watchmanfs::watchmanfs: watchman error - falling back to slow crawl * (glob)
18+
` (?)
19+
M foo
20+
21+
$ LOG=warn,watchman_info=debug hg st --config fsmonitor.fallback-on-watchman-exception=false
22+
abort: While invoking the watchman CLI * (glob)
23+
` (?)
24+
[255]

0 commit comments

Comments
 (0)