diff --git a/Cargo.toml b/Cargo.toml
index 86e79ed7..b9464f9a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,6 +43,7 @@ directories = "4"
sysinfo = "0.27"
ctrlc = "3.4"
chrono = "0.4"
+procfs = { version = "0.17.0", default-features = false }
[target.'cfg(not(target_has_atomic = "64"))'.dependencies]
portable-atomic = "1.4"
diff --git a/src/dir_walker.rs b/src/dir_walker.rs
index 8226166c..544670e5 100644
--- a/src/dir_walker.rs
+++ b/src/dir_walker.rs
@@ -1,9 +1,13 @@
use std::cmp::Ordering;
use std::fs;
+use std::fs::Metadata;
+use std::os::linux::fs::MetadataExt;
+use std::path;
use std::sync::Arc;
use std::sync::Mutex;
use crate::node::Node;
+use crate::platform::InodeAndDevice;
use crate::progress::Operation;
use crate::progress::PAtomicInfo;
use crate::progress::RuntimeErrors;
@@ -11,6 +15,7 @@ use crate::progress::ORDERING;
use crate::utils::is_filtered_out_due_to_file_time;
use crate::utils::is_filtered_out_due_to_invert_regex;
use crate::utils::is_filtered_out_due_to_regex;
+use procfs::process::FDTarget;
use rayon::iter::ParallelBridge;
use rayon::prelude::ParallelIterator;
use regex::Regex;
@@ -48,9 +53,44 @@ pub struct WalkData<'a> {
pub errors: Arc>,
}
+/// Return deleted file still accessed by a process by walking /proc/$PID/fd/$FD
+/// Deleted files have nlinks == 0
+fn get_deleted_files() -> Vec<(PathBuf, Metadata)> {
+ let mut deleted_files = Vec::new();
+
+ for p in procfs::process::all_processes().unwrap() {
+ let Ok(p) = p else {
+ continue;
+ };
+ let Ok(fds) = p.fd() else {
+ continue;
+ };
+
+ for fd in fds {
+ let Ok(fd) = fd else {
+ continue;
+ };
+
+ if let FDTarget::Path(path) = &fd.target {
+ let proc_fd = format!("/proc/{}/fd/{}", p.pid, fd.fd);
+ let Ok(metadata) = std::fs::metadata(&proc_fd) else {
+ continue;
+ };
+
+ if metadata.st_nlink() == 0 {
+ // TODO: remove " (deleted)", not part of actual name
+ deleted_files.push((path.clone(), metadata));
+ }
+ }
+ }
+ }
+
+ deleted_files
+}
+
pub fn walk_it(dirs: HashSet, walk_data: &WalkData) -> Vec {
let mut inodes = HashSet::new();
- let top_level_nodes: Vec<_> = dirs
+ let mut top_level_nodes: Vec<_> = dirs
.into_iter()
.filter_map(|d| {
let prog_data = &walk_data.progress_data;
@@ -62,11 +102,123 @@ pub fn walk_it(dirs: HashSet, walk_data: &WalkData) -> Vec {
clean_inodes(node, &mut inodes, walk_data)
})
.collect();
+
+ // TODO: use a flag
+ let handle_deleted_files = true;
+
+ if handle_deleted_files {
+ let deleted_files: Vec<_> = get_deleted_files()
+ .into_iter()
+ .filter(|(_path, metadata)| {
+ let inode_and_device = (metadata.st_ino(), metadata.st_dev());
+ // ignore inodes already collected as part of regular files
+ !inodes.contains(&inode_and_device)
+ })
+ .collect();
+
+ // we try to insert deleted files in the node tree
+ for (path, m) in &deleted_files {
+ for mut top_level_node in &mut top_level_nodes {
+ // deleted files are always absolute, but not the files in the node tree
+ let absolute_path = path::absolute(&top_level_node.name).unwrap();
+ if path.starts_with(&absolute_path) {
+ insert_deleted_file_in_node_tree(
+ path.clone(),
+ m,
+ &mut top_level_node,
+ &walk_data,
+ 0,
+ );
+ }
+ }
+
+ // Ignoring deleted file {:?} not child of any top_level_nodes
+ }
+ }
+
top_level_nodes
}
+/// try to insert `path` in `root`, or its children
+/// `path` is absolute
+fn insert_deleted_file_in_node_tree(
+ path: PathBuf,
+ m: &Metadata,
+ root: &mut Node,
+ walk_data: &WalkData,
+ depth: usize,
+) {
+ // TODO: filecount, filetime, regex...
+ let size = if walk_data.use_apparent_size {
+ m.st_size()
+ } else {
+ m.st_blocks() * 512
+ };
+
+ root.size += size;
+
+ if path
+ .parent()
+ .expect("path of deleted file return by kernel always has a parent")
+ == path::absolute(&root.name).unwrap()
+ {
+ // we found the node that represents the parent dir, create the deleted file as a new file
+
+ let node = Node {
+ name: path.clone(),
+ size,
+ children: vec![],
+ inode_device: Some((m.st_ino(), m.st_dev())),
+ depth,
+ };
+
+ root.children.push(node);
+ return;
+ }
+
+ // try to find the folder were the deleted file was
+ for child in &mut root.children {
+ if path.starts_with(path::absolute(&child.name).unwrap()) {
+ insert_deleted_file_in_node_tree(path, m, child, &walk_data, depth + 1);
+ return;
+ }
+ }
+
+ // can't find a child to insert the file, we need to create a new folder
+ // a bit messy because we need to convert to/from absolute paths
+ let dir_name = path
+ .strip_prefix(path::absolute(&root.name).unwrap())
+ .unwrap()
+ .components()
+ .next()
+ .unwrap();
+ let absolute_dir_name = path::absolute(&root.name).unwrap().join(dir_name);
+
+ let new_folder = Node {
+ name: absolute_dir_name,
+ size: 0,
+ children: vec![],
+ inode_device: root.inode_device.map(|(_inode, device)| (0, device)), // keep the device, if we want to filter by device
+ depth: depth + 1,
+ };
+
+ root.children.push(new_folder);
+
+ insert_deleted_file_in_node_tree(
+ path,
+ m,
+ root.children.last_mut().unwrap(),
+ &walk_data,
+ depth + 1,
+ );
+}
+
// Remove files which have the same inode, we don't want to double count them.
-fn clean_inodes(x: Node, inodes: &mut HashSet<(u64, u64)>, walk_data: &WalkData) -> Option {
+fn clean_inodes(
+ x: Node,
+ inodes: &mut HashSet,
+ walk_data: &WalkData,
+) -> Option {
if !walk_data.use_apparent_size {
if let Some(id) = x.inode_device {
if !inodes.insert(id) {
diff --git a/src/node.rs b/src/node.rs
index a395aead..3d97fa40 100644
--- a/src/node.rs
+++ b/src/node.rs
@@ -1,5 +1,6 @@
use crate::dir_walker::WalkData;
use crate::platform::get_metadata;
+use crate::platform::InodeAndDevice;
use crate::utils::is_filtered_out_due_to_file_time;
use crate::utils::is_filtered_out_due_to_invert_regex;
use crate::utils::is_filtered_out_due_to_regex;
@@ -12,7 +13,7 @@ pub struct Node {
pub name: PathBuf,
pub size: u64,
pub children: Vec,
- pub inode_device: Option<(u64, u64)>,
+ pub inode_device: Option,
pub depth: usize,
}
@@ -25,7 +26,7 @@ pub enum FileTime {
#[allow(clippy::too_many_arguments)]
pub fn build_node(
- dir: PathBuf,
+ path: PathBuf,
children: Vec,
is_symlink: bool,
is_file: bool,
@@ -37,15 +38,15 @@ pub fn build_node(
let by_filetime = &walk_data.by_filetime;
get_metadata(
- &dir,
+ &path,
use_apparent_size,
walk_data.follow_links && is_symlink,
)
.map(|data| {
let inode_device = data.1;
- let size = if is_filtered_out_due_to_regex(walk_data.filter_regex, &dir)
- || is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, &dir)
+ let size = if is_filtered_out_due_to_regex(walk_data.filter_regex, &path)
+ || is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, &path)
|| by_filecount && !is_file
|| [
(&walk_data.filter_modified_time, data.2 .0),
@@ -71,7 +72,7 @@ pub fn build_node(
};
Node {
- name: dir,
+ name: path,
size,
children,
inode_device,
diff --git a/src/platform.rs b/src/platform.rs
index 1a3dc230..6f441f65 100644
--- a/src/platform.rs
+++ b/src/platform.rs
@@ -10,7 +10,7 @@ fn get_block_size() -> u64 {
512
}
-type InodeAndDevice = (u64, u64);
+pub(crate) type InodeAndDevice = (u64, u64);
type FileTime = (i64, i64, i64);
#[cfg(target_family = "unix")]