diff --git a/Cargo.toml b/Cargo.toml index 86e79ed7..b9464f9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ directories = "4" sysinfo = "0.27" ctrlc = "3.4" chrono = "0.4" +procfs = { version = "0.17.0", default-features = false } [target.'cfg(not(target_has_atomic = "64"))'.dependencies] portable-atomic = "1.4" diff --git a/src/dir_walker.rs b/src/dir_walker.rs index 8226166c..544670e5 100644 --- a/src/dir_walker.rs +++ b/src/dir_walker.rs @@ -1,9 +1,13 @@ use std::cmp::Ordering; use std::fs; +use std::fs::Metadata; +use std::os::linux::fs::MetadataExt; +use std::path; use std::sync::Arc; use std::sync::Mutex; use crate::node::Node; +use crate::platform::InodeAndDevice; use crate::progress::Operation; use crate::progress::PAtomicInfo; use crate::progress::RuntimeErrors; @@ -11,6 +15,7 @@ use crate::progress::ORDERING; use crate::utils::is_filtered_out_due_to_file_time; use crate::utils::is_filtered_out_due_to_invert_regex; use crate::utils::is_filtered_out_due_to_regex; +use procfs::process::FDTarget; use rayon::iter::ParallelBridge; use rayon::prelude::ParallelIterator; use regex::Regex; @@ -48,9 +53,44 @@ pub struct WalkData<'a> { pub errors: Arc>, } +/// Return deleted file still accessed by a process by walking /proc/$PID/fd/$FD +/// Deleted files have nlinks == 0 +fn get_deleted_files() -> Vec<(PathBuf, Metadata)> { + let mut deleted_files = Vec::new(); + + for p in procfs::process::all_processes().unwrap() { + let Ok(p) = p else { + continue; + }; + let Ok(fds) = p.fd() else { + continue; + }; + + for fd in fds { + let Ok(fd) = fd else { + continue; + }; + + if let FDTarget::Path(path) = &fd.target { + let proc_fd = format!("/proc/{}/fd/{}", p.pid, fd.fd); + let Ok(metadata) = std::fs::metadata(&proc_fd) else { + continue; + }; + + if metadata.st_nlink() == 0 { + // TODO: remove " (deleted)", not part of actual name + deleted_files.push((path.clone(), metadata)); + } + } + } + } + + deleted_files +} + pub fn walk_it(dirs: HashSet, walk_data: &WalkData) -> Vec { let mut inodes = HashSet::new(); - let top_level_nodes: Vec<_> = dirs + let mut top_level_nodes: Vec<_> = dirs .into_iter() .filter_map(|d| { let prog_data = &walk_data.progress_data; @@ -62,11 +102,123 @@ pub fn walk_it(dirs: HashSet, walk_data: &WalkData) -> Vec { clean_inodes(node, &mut inodes, walk_data) }) .collect(); + + // TODO: use a flag + let handle_deleted_files = true; + + if handle_deleted_files { + let deleted_files: Vec<_> = get_deleted_files() + .into_iter() + .filter(|(_path, metadata)| { + let inode_and_device = (metadata.st_ino(), metadata.st_dev()); + // ignore inodes already collected as part of regular files + !inodes.contains(&inode_and_device) + }) + .collect(); + + // we try to insert deleted files in the node tree + for (path, m) in &deleted_files { + for mut top_level_node in &mut top_level_nodes { + // deleted files are always absolute, but not the files in the node tree + let absolute_path = path::absolute(&top_level_node.name).unwrap(); + if path.starts_with(&absolute_path) { + insert_deleted_file_in_node_tree( + path.clone(), + m, + &mut top_level_node, + &walk_data, + 0, + ); + } + } + + // Ignoring deleted file {:?} not child of any top_level_nodes + } + } + top_level_nodes } +/// try to insert `path` in `root`, or its children +/// `path` is absolute +fn insert_deleted_file_in_node_tree( + path: PathBuf, + m: &Metadata, + root: &mut Node, + walk_data: &WalkData, + depth: usize, +) { + // TODO: filecount, filetime, regex... + let size = if walk_data.use_apparent_size { + m.st_size() + } else { + m.st_blocks() * 512 + }; + + root.size += size; + + if path + .parent() + .expect("path of deleted file return by kernel always has a parent") + == path::absolute(&root.name).unwrap() + { + // we found the node that represents the parent dir, create the deleted file as a new file + + let node = Node { + name: path.clone(), + size, + children: vec![], + inode_device: Some((m.st_ino(), m.st_dev())), + depth, + }; + + root.children.push(node); + return; + } + + // try to find the folder were the deleted file was + for child in &mut root.children { + if path.starts_with(path::absolute(&child.name).unwrap()) { + insert_deleted_file_in_node_tree(path, m, child, &walk_data, depth + 1); + return; + } + } + + // can't find a child to insert the file, we need to create a new folder + // a bit messy because we need to convert to/from absolute paths + let dir_name = path + .strip_prefix(path::absolute(&root.name).unwrap()) + .unwrap() + .components() + .next() + .unwrap(); + let absolute_dir_name = path::absolute(&root.name).unwrap().join(dir_name); + + let new_folder = Node { + name: absolute_dir_name, + size: 0, + children: vec![], + inode_device: root.inode_device.map(|(_inode, device)| (0, device)), // keep the device, if we want to filter by device + depth: depth + 1, + }; + + root.children.push(new_folder); + + insert_deleted_file_in_node_tree( + path, + m, + root.children.last_mut().unwrap(), + &walk_data, + depth + 1, + ); +} + // Remove files which have the same inode, we don't want to double count them. -fn clean_inodes(x: Node, inodes: &mut HashSet<(u64, u64)>, walk_data: &WalkData) -> Option { +fn clean_inodes( + x: Node, + inodes: &mut HashSet, + walk_data: &WalkData, +) -> Option { if !walk_data.use_apparent_size { if let Some(id) = x.inode_device { if !inodes.insert(id) { diff --git a/src/node.rs b/src/node.rs index a395aead..3d97fa40 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,5 +1,6 @@ use crate::dir_walker::WalkData; use crate::platform::get_metadata; +use crate::platform::InodeAndDevice; use crate::utils::is_filtered_out_due_to_file_time; use crate::utils::is_filtered_out_due_to_invert_regex; use crate::utils::is_filtered_out_due_to_regex; @@ -12,7 +13,7 @@ pub struct Node { pub name: PathBuf, pub size: u64, pub children: Vec, - pub inode_device: Option<(u64, u64)>, + pub inode_device: Option, pub depth: usize, } @@ -25,7 +26,7 @@ pub enum FileTime { #[allow(clippy::too_many_arguments)] pub fn build_node( - dir: PathBuf, + path: PathBuf, children: Vec, is_symlink: bool, is_file: bool, @@ -37,15 +38,15 @@ pub fn build_node( let by_filetime = &walk_data.by_filetime; get_metadata( - &dir, + &path, use_apparent_size, walk_data.follow_links && is_symlink, ) .map(|data| { let inode_device = data.1; - let size = if is_filtered_out_due_to_regex(walk_data.filter_regex, &dir) - || is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, &dir) + let size = if is_filtered_out_due_to_regex(walk_data.filter_regex, &path) + || is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, &path) || by_filecount && !is_file || [ (&walk_data.filter_modified_time, data.2 .0), @@ -71,7 +72,7 @@ pub fn build_node( }; Node { - name: dir, + name: path, size, children, inode_device, diff --git a/src/platform.rs b/src/platform.rs index 1a3dc230..6f441f65 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -10,7 +10,7 @@ fn get_block_size() -> u64 { 512 } -type InodeAndDevice = (u64, u64); +pub(crate) type InodeAndDevice = (u64, u64); type FileTime = (i64, i64, i64); #[cfg(target_family = "unix")]