Skip to content

Commit 5699a27

Browse files
erneestocclaudeMarcusSorealheis
authored
util: make permission walks symlink-safe (#2358)
The recursive permission walk `set_perms_recursive_impl` (driving both `set_readonly_recursive` and `set_dir_writable_recursive`) used `fs::metadata` (stat), which follows symlinks. On input trees containing symlinks - e.g. `.venv/bin/python3` produced by rules_python / rules_apple venv tooling - this had two failure modes: * A symlink to a directory reported `is_dir() == true`, so the walk recursed *through* the link, escaping the materialized tree or descending into an unrelated directory. * A symlink was passed to `set_permissions`; `chmod` follows symlinks, so it mutated the link's target. When the target did not exist (a dangling link - common when a venv points outside the action's input set) the `chmod` returned ENOENT and failed the entire walk. That ENOENT failure surfaced as `set_readonly_recursive` erroring inside `DirectoryCache::get_or_create`, which made `prepare_action_inputs` log "Directory cache failed, falling back to traditional download" and take the slow `download_to_directory` path. Fix: `set_perms_recursive_impl` now uses `symlink_metadata` (lstat) and returns early on symlink entries - it never chmods a symlink and never recurses through one. Regular files keep their existing read-only (0o555) treatment, so the CAS-hardlinked-inode hermeticity contract (PR #2347) is unchanged. `hardlink_directory_tree_recursive` already recreated symlinks as symlinks; its symlink branch is reordered ahead of the `is_dir()` / `is_file()` branches to make the symlink-first intent explicit and robust. Adds regression tests covering set-readonly, set-dir-writable, and hardlink/clone walks over a tree containing a symlink to an in-tree file, a dangling relative symlink, and a symlink to an in-tree directory, asserting each walk succeeds and the symlinks are preserved with their targets intact. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Co-authored-by: Marcus Eagan <marcuseagan@gmail.com>
1 parent 590c514 commit 5699a27

1 file changed

Lines changed: 221 additions & 21 deletions

File tree

nativelink-util/src/fs_util.rs

Lines changed: 221 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -226,31 +226,21 @@ fn hardlink_directory_tree_recursive<'a>(
226226
})?;
227227

228228
let dst_path = dst.join(&file_name);
229+
// `DirEntry::metadata` does NOT traverse symlinks (it has
230+
// `symlink_metadata`/lstat semantics), so `is_symlink()` below
231+
// correctly identifies symlink entries and the symlink branch
232+
// recreates them as symlinks rather than dereferencing them.
229233
let metadata = entry
230234
.metadata()
231235
.await
232236
.err_tip(|| format!("Failed to get metadata for: {}", entry_path.display()))?;
233237

234-
if metadata.is_dir() {
235-
// Create subdirectory and recurse
236-
fs::create_dir(&dst_path)
237-
.await
238-
.err_tip(|| format!("Failed to create directory: {}", dst_path.display()))?;
239-
240-
hardlink_directory_tree_recursive(&entry_path, &dst_path).await?;
241-
} else if metadata.is_file() {
242-
// Hardlink the file
243-
fs::hard_link(&entry_path, &dst_path)
244-
.await
245-
.err_tip(|| {
246-
format!(
247-
"Failed to hardlink {} to {}. This may occur if the source and destination are on different filesystems",
248-
entry_path.display(),
249-
dst_path.display()
250-
)
251-
})?;
252-
} else if metadata.is_symlink() {
253-
// Read the symlink target and create a new symlink
238+
if metadata.is_symlink() {
239+
// Recreate the symlink as a symlink. Checked BEFORE `is_dir()`
240+
// / `is_file()` so a symlink that resolves to a directory is
241+
// never treated as a real directory and recursed *through*
242+
// (which would dereference the link and potentially escape
243+
// the tree).
254244
let target = fs::read_link(&entry_path)
255245
.await
256246
.err_tip(|| format!("Failed to read symlink: {}", entry_path.display()))?;
@@ -272,6 +262,24 @@ fn hardlink_directory_tree_recursive<'a>(
272262
})?;
273263
}
274264
}
265+
} else if metadata.is_dir() {
266+
// Create subdirectory and recurse
267+
fs::create_dir(&dst_path)
268+
.await
269+
.err_tip(|| format!("Failed to create directory: {}", dst_path.display()))?;
270+
271+
hardlink_directory_tree_recursive(&entry_path, &dst_path).await?;
272+
} else if metadata.is_file() {
273+
// Hardlink the file
274+
fs::hard_link(&entry_path, &dst_path)
275+
.await
276+
.err_tip(|| {
277+
format!(
278+
"Failed to hardlink {} to {}. This may occur if the source and destination are on different filesystems",
279+
entry_path.display(),
280+
dst_path.display()
281+
)
282+
})?;
275283
}
276284
}
277285

@@ -288,6 +296,9 @@ fn hardlink_directory_tree_recursive<'a>(
288296
/// # Platform Notes
289297
/// - Unix: Sets permissions to 0o555 (r-xr-xr-x)
290298
/// - Windows: Sets `FILE_ATTRIBUTE_READONLY`
299+
///
300+
/// Symlink entries in the tree are skipped (their own mode is not meaningful
301+
/// and `chmod` would follow the link) - see `set_perms_recursive_impl`.
291302
pub async fn set_readonly_recursive(dir: &Path) -> Result<(), Error> {
292303
error_if!(!dir.exists(), "Directory does not exist: {}", dir.display());
293304

@@ -310,6 +321,9 @@ pub async fn set_readonly_recursive(dir: &Path) -> Result<(), Error> {
310321
/// # Platform Notes
311322
/// - Unix: Sets directory permissions to 0o755 (rwxr-xr-x); files are NOT touched.
312323
/// - Windows: Clears `FILE_ATTRIBUTE_READONLY` on directories only; files are NOT touched.
324+
///
325+
/// Symlink entries in the tree are skipped (their own mode is not meaningful
326+
/// and `chmod` would follow the link) - see `set_perms_recursive_impl`.
313327
pub async fn set_dir_writable_recursive(dir: &Path) -> Result<(), Error> {
314328
error_if!(!dir.exists(), "Directory does not exist: {}", dir.display());
315329

@@ -403,10 +417,31 @@ where
403417
+ 'a,
404418
{
405419
Box::pin(async move {
406-
let metadata = fs::metadata(&path)
420+
// Use `symlink_metadata` (lstat) rather than `metadata` (stat) so the
421+
// walk inspects the entry *itself*, never the target a symlink points
422+
// at. This matters for input trees containing symlinks - e.g.
423+
// `.venv/bin/python3` created by rules_python / rules_apple venv
424+
// tooling. With plain `stat`, a symlink to a directory reports
425+
// `is_dir() == true` and the walk would recurse *through* the link
426+
// (escaping the tree, or descending into an unrelated directory), and
427+
// a symlink to a file would have `chmod` applied to it - and `chmod`
428+
// follows symlinks, so it mutates the target. A symlink whose target
429+
// does not exist (a dangling link, common when a venv points outside
430+
// the action's input set) then fails the whole walk with ENOENT -
431+
// the cause of directory-cache actions falling back to the slow
432+
// download path.
433+
let metadata = fs::symlink_metadata(&path)
407434
.await
408435
.err_tip(|| format!("Failed to get metadata for: {}", path.display()))?;
409436

437+
// Symlinks are skipped entirely: their own mode is not meaningful, a
438+
// `chmod` on the link path would follow it and touch the target, and
439+
// descending into a symlinked directory would walk outside the tree.
440+
// The symlink entry itself is left exactly as created.
441+
if metadata.is_symlink() {
442+
return Ok(());
443+
}
444+
410445
if metadata.is_dir() {
411446
let mut entries = fs::read_dir(&path)
412447
.await
@@ -779,6 +814,171 @@ mod tests {
779814
Ok(())
780815
}
781816

817+
/// Regression test for the directory-cache fallback bug: input trees
818+
/// produced by `rules_python` / `rules_apple` venv tooling contain
819+
/// symlinks (e.g. `.venv/bin/python3`). `set_readonly_recursive` walks the
820+
/// materialized tree with `chmod`; `chmod` follows symlinks, so a symlink
821+
/// to a file would mutate the target and a *dangling* symlink (target
822+
/// outside the action's input set) would fail the whole walk with ENOENT
823+
/// — pushing the action onto the slow `download_to_directory` fallback.
824+
/// The walk must `lstat` and skip the symlink, leaving it intact.
825+
#[cfg(unix)]
826+
#[nativelink_test("crate")]
827+
async fn test_set_readonly_recursive_skips_symlinks() -> Result<(), Error> {
828+
let (_temp_dir, test_dir) = create_test_directory().await?;
829+
830+
// A symlink to a path *inside* the same tree (the realistic
831+
// `.venv/bin/python3 -> ../../file1.txt` shape).
832+
let internal_link = test_dir.join("link_to_file1");
833+
fs::symlink("file1.txt", &internal_link).await?;
834+
835+
// A symlink with a *relative* target that does not resolve (dangling).
836+
// This is the case that previously failed the walk with ENOENT.
837+
let dangling_link = test_dir.join("dangling_link");
838+
fs::symlink("../does/not/exist", &dangling_link).await?;
839+
840+
// A symlink that points at a directory inside the tree. With `stat`
841+
// the walk would recurse *through* this link; with `lstat` it must
842+
// not.
843+
let dir_link = test_dir.join("link_to_subdir");
844+
fs::symlink("subdir", &dir_link).await?;
845+
846+
// The walk must succeed despite the symlinks.
847+
set_readonly_recursive(&test_dir).await?;
848+
849+
// Every symlink is preserved as a symlink with its target intact.
850+
for (link, expected_target) in [
851+
(&internal_link, "file1.txt"),
852+
(&dangling_link, "../does/not/exist"),
853+
(&dir_link, "subdir"),
854+
] {
855+
let link_meta = fs::symlink_metadata(link).await?;
856+
assert!(
857+
link_meta.is_symlink(),
858+
"{} must still be a symlink after the walk",
859+
link.display()
860+
);
861+
assert_eq!(
862+
fs::read_link(link).await?,
863+
PathBuf::from(expected_target),
864+
"{} target must be unchanged",
865+
link.display()
866+
);
867+
}
868+
869+
// The real files were still made read-only.
870+
assert!(
871+
fs::metadata(test_dir.join("file1.txt"))
872+
.await?
873+
.permissions()
874+
.readonly()
875+
);
876+
877+
Ok(())
878+
}
879+
880+
/// Companion to the read-only test: `set_dir_writable_recursive` must also
881+
/// be symlink-safe. It must not `chmod` a symlink (which would follow the
882+
/// link) and must not recurse through a symlinked directory.
883+
#[cfg(unix)]
884+
#[nativelink_test("crate")]
885+
async fn test_set_dir_writable_recursive_skips_symlinks() -> Result<(), Error> {
886+
use std::os::unix::fs::PermissionsExt;
887+
888+
let (_temp_dir, test_dir) = create_test_directory().await?;
889+
890+
// Symlink to a file inside the tree, a dangling relative symlink, and
891+
// a symlink pointing at a directory inside the tree.
892+
fs::symlink("file1.txt", test_dir.join("link_to_file1")).await?;
893+
fs::symlink("../does/not/exist", test_dir.join("dangling_link")).await?;
894+
fs::symlink("subdir", test_dir.join("link_to_subdir")).await?;
895+
896+
// Mirror the directory cache's post-construction sequence.
897+
set_readonly_recursive(&test_dir).await?;
898+
set_dir_writable_recursive(&test_dir).await?;
899+
900+
// Symlinks survive both walks untouched.
901+
for (link, expected_target) in [
902+
("link_to_file1", "file1.txt"),
903+
("dangling_link", "../does/not/exist"),
904+
("link_to_subdir", "subdir"),
905+
] {
906+
let link_path = test_dir.join(link);
907+
assert!(
908+
fs::symlink_metadata(&link_path).await?.is_symlink(),
909+
"{} must still be a symlink",
910+
link_path.display()
911+
);
912+
assert_eq!(
913+
fs::read_link(&link_path).await?,
914+
PathBuf::from(expected_target),
915+
"{} target must be unchanged",
916+
link_path.display()
917+
);
918+
}
919+
920+
// Real directories were made writable; real files stayed read-only.
921+
let dir_mode = fs::metadata(test_dir.join("subdir"))
922+
.await?
923+
.permissions()
924+
.mode()
925+
& 0o777;
926+
assert_eq!(dir_mode, 0o755, "real subdir must be writable");
927+
let file_mode = fs::metadata(test_dir.join("subdir/file2.txt"))
928+
.await?
929+
.permissions()
930+
.mode()
931+
& 0o777;
932+
assert_eq!(file_mode, 0o555, "real files must stay read-only");
933+
934+
Ok(())
935+
}
936+
937+
/// `hardlink_directory_tree` must recreate symlink entries as symlinks at
938+
/// the destination (not dereference them), and the subsequent
939+
/// `set_readonly_recursive` walk over the materialized tree must succeed.
940+
/// This is the end-to-end shape `DirectoryCache::get_or_create` runs.
941+
#[cfg(unix)]
942+
#[nativelink_test("crate")]
943+
async fn test_hardlink_directory_tree_preserves_symlinks() -> Result<(), Error> {
944+
let (temp_dir, src_dir) = create_test_directory().await?;
945+
946+
// Symlink to a sibling file, a dangling relative symlink, and a
947+
// symlink to a subdirectory — all inside the source tree.
948+
fs::symlink("file1.txt", src_dir.join("link_to_file1")).await?;
949+
fs::symlink("../does/not/exist", src_dir.join("dangling_link")).await?;
950+
fs::symlink("subdir", src_dir.join("link_to_subdir")).await?;
951+
952+
let dst_dir = temp_dir.path().join("test_dst");
953+
hardlink_directory_tree(&src_dir, &dst_dir).await?;
954+
955+
// Each symlink is materialized as a symlink with its target intact.
956+
for (link, expected_target) in [
957+
("link_to_file1", "file1.txt"),
958+
("dangling_link", "../does/not/exist"),
959+
("link_to_subdir", "subdir"),
960+
] {
961+
let link_path = dst_dir.join(link);
962+
assert!(
963+
fs::symlink_metadata(&link_path).await?.is_symlink(),
964+
"{} must be a symlink in the materialized tree",
965+
link_path.display()
966+
);
967+
assert_eq!(
968+
fs::read_link(&link_path).await?,
969+
PathBuf::from(expected_target),
970+
"{} target must be preserved",
971+
link_path.display()
972+
);
973+
}
974+
975+
// The read-only walk over the materialized tree must not choke on the
976+
// symlinks (this is the operation that previously failed the cache).
977+
set_readonly_recursive(&dst_dir).await?;
978+
979+
Ok(())
980+
}
981+
782982
#[nativelink_test("crate")]
783983
async fn test_calculate_directory_size() -> Result<(), Error> {
784984
let (_temp_dir, test_dir) = create_test_directory().await?;

0 commit comments

Comments
 (0)