Skip to content

Commit bd0fb81

Browse files
authored
cp: fix the result of inodes are not the same when preserve links is flagged (uutils#5064)
Should fix: ``` rm -rf a b c touch a ln -s a b mkdir c ./target/debug/coreutils cp --preserve=links -R -H a b c a_inode=$(ls -i c/a|sed 's,c/.*,,') b_inode=$(ls -i c/b|sed 's,c/.*,,') echo "$a_inode" = "$b_inode" ```
1 parent e2e42ac commit bd0fb81

File tree

3 files changed

+296
-118
lines changed

3 files changed

+296
-118
lines changed

src/uu/cp/src/copydir.rs

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
//! See the [`copy_directory`] function for more information.
99
#[cfg(windows)]
1010
use std::borrow::Cow;
11-
use std::collections::HashSet;
11+
use std::collections::{HashMap, HashSet};
1212
use std::env;
1313
use std::fs;
1414
use std::io;
@@ -24,8 +24,8 @@ use uucore::uio_error;
2424
use walkdir::{DirEntry, WalkDir};
2525

2626
use crate::{
27-
aligned_ancestors, context_for, copy_attributes, copy_file, copy_link, preserve_hardlinks,
28-
CopyResult, Error, Options,
27+
aligned_ancestors, context_for, copy_attributes, copy_file, copy_link, CopyResult, Error,
28+
Options,
2929
};
3030

3131
/// Ensure a Windows path starts with a `\\?`.
@@ -200,7 +200,7 @@ fn copy_direntry(
200200
options: &Options,
201201
symlinked_files: &mut HashSet<FileInformation>,
202202
preserve_hard_links: bool,
203-
hard_links: &mut Vec<(String, u64)>,
203+
copied_files: &mut HashMap<FileInformation, PathBuf>,
204204
) -> CopyResult<()> {
205205
let Entry {
206206
source_absolute,
@@ -240,30 +240,27 @@ fn copy_direntry(
240240
// If the source is not a directory, then we need to copy the file.
241241
if !source_absolute.is_dir() {
242242
if preserve_hard_links {
243-
let dest = local_to_target.as_path().to_path_buf();
244-
let found_hard_link = preserve_hardlinks(hard_links, &source_absolute, &dest)?;
245-
if !found_hard_link {
246-
match copy_file(
247-
progress_bar,
248-
&source_absolute,
249-
local_to_target.as_path(),
250-
options,
251-
symlinked_files,
252-
false,
253-
) {
254-
Ok(_) => Ok(()),
255-
Err(err) => {
256-
if source_absolute.is_symlink() {
257-
// silent the error with a symlink
258-
// In case we do --archive, we might copy the symlink
259-
// before the file itself
260-
Ok(())
261-
} else {
262-
Err(err)
263-
}
243+
match copy_file(
244+
progress_bar,
245+
&source_absolute,
246+
local_to_target.as_path(),
247+
options,
248+
symlinked_files,
249+
copied_files,
250+
false,
251+
) {
252+
Ok(_) => Ok(()),
253+
Err(err) => {
254+
if source_absolute.is_symlink() {
255+
// silent the error with a symlink
256+
// In case we do --archive, we might copy the symlink
257+
// before the file itself
258+
Ok(())
259+
} else {
260+
Err(err)
264261
}
265-
}?;
266-
}
262+
}
263+
}?;
267264
} else {
268265
// At this point, `path` is just a plain old file.
269266
// Terminate this function immediately if there is any
@@ -277,6 +274,7 @@ fn copy_direntry(
277274
local_to_target.as_path(),
278275
options,
279276
symlinked_files,
277+
copied_files,
280278
false,
281279
) {
282280
Ok(_) => {}
@@ -310,6 +308,7 @@ pub(crate) fn copy_directory(
310308
target: &Path,
311309
options: &Options,
312310
symlinked_files: &mut HashSet<FileInformation>,
311+
copied_files: &mut HashMap<FileInformation, PathBuf>,
313312
source_in_command_line: bool,
314313
) -> CopyResult<()> {
315314
if !options.recursive {
@@ -324,6 +323,7 @@ pub(crate) fn copy_directory(
324323
target,
325324
options,
326325
symlinked_files,
326+
copied_files,
327327
source_in_command_line,
328328
);
329329
}
@@ -372,7 +372,6 @@ pub(crate) fn copy_directory(
372372
};
373373
let target = tmp.as_path();
374374

375-
let mut hard_links: Vec<(String, u64)> = vec![];
376375
let preserve_hard_links = options.preserve_hard_links();
377376

378377
// Collect some paths here that are invariant during the traversal
@@ -397,7 +396,7 @@ pub(crate) fn copy_directory(
397396
options,
398397
symlinked_files,
399398
preserve_hard_links,
400-
&mut hard_links,
399+
copied_files,
401400
)?;
402401
}
403402
// Print an error message, but continue traversing the directory.

src/uu/cp/src/cp.rs

Lines changed: 70 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
use quick_error::quick_error;
1010
use std::borrow::Cow;
1111
use std::cmp::Ordering;
12-
use std::collections::HashSet;
12+
use std::collections::{HashMap, HashSet};
1313
use std::env;
1414
#[cfg(not(windows))]
1515
use std::ffi::CString;
@@ -1106,67 +1106,6 @@ fn parse_path_args(
11061106
Ok((paths, target))
11071107
}
11081108

1109-
/// Get the inode information for a file.
1110-
fn get_inode(file_info: &FileInformation) -> u64 {
1111-
#[cfg(unix)]
1112-
let result = file_info.inode();
1113-
#[cfg(windows)]
1114-
let result = file_info.file_index();
1115-
result
1116-
}
1117-
1118-
#[cfg(target_os = "redox")]
1119-
fn preserve_hardlinks(
1120-
hard_links: &mut Vec<(String, u64)>,
1121-
source: &std::path::Path,
1122-
dest: &std::path::Path,
1123-
found_hard_link: &mut bool,
1124-
) -> CopyResult<()> {
1125-
// Redox does not currently support hard links
1126-
Ok(())
1127-
}
1128-
1129-
/// Hard link a pair of files if needed _and_ record if this pair is a new hard link.
1130-
#[cfg(not(target_os = "redox"))]
1131-
fn preserve_hardlinks(
1132-
hard_links: &mut Vec<(String, u64)>,
1133-
source: &std::path::Path,
1134-
dest: &std::path::Path,
1135-
) -> CopyResult<bool> {
1136-
let info = FileInformation::from_path(source, false)
1137-
.context(format!("cannot stat {}", source.quote()))?;
1138-
let inode = get_inode(&info);
1139-
let nlinks = info.number_of_links();
1140-
let mut found_hard_link = false;
1141-
#[allow(clippy::explicit_iter_loop)]
1142-
for (link, link_inode) in hard_links.iter() {
1143-
if *link_inode == inode {
1144-
// Consider the following files:
1145-
//
1146-
// * `src/f` - a regular file
1147-
// * `src/link` - a hard link to `src/f`
1148-
// * `dest/src/f` - a different regular file
1149-
//
1150-
// In this scenario, if we do `cp -a src/ dest/`, it is
1151-
// possible that the order of traversal causes `src/link`
1152-
// to get copied first (to `dest/src/link`). In that case,
1153-
// in order to make sure `dest/src/link` is a hard link to
1154-
// `dest/src/f` and `dest/src/f` has the contents of
1155-
// `src/f`, we delete the existing file to allow the hard
1156-
// linking.
1157-
if file_or_link_exists(dest) && file_or_link_exists(Path::new(link)) {
1158-
std::fs::remove_file(dest)?;
1159-
}
1160-
std::fs::hard_link(link, dest).unwrap();
1161-
found_hard_link = true;
1162-
}
1163-
}
1164-
if !found_hard_link && nlinks > 1 {
1165-
hard_links.push((dest.to_str().unwrap().to_string(), inode));
1166-
}
1167-
Ok(found_hard_link)
1168-
}
1169-
11701109
/// When handling errors, we don't always want to show them to the user. This function handles that.
11711110
fn show_error_if_needed(error: &Error) {
11721111
match error {
@@ -1195,14 +1134,19 @@ pub fn copy(sources: &[PathBuf], target: &Path, options: &Options) -> CopyResult
11951134
let target_type = TargetType::determine(sources, target);
11961135
verify_target_type(target, &target_type)?;
11971136

1198-
let preserve_hard_links = options.preserve_hard_links();
1199-
1200-
let mut hard_links: Vec<(String, u64)> = vec![];
1201-
12021137
let mut non_fatal_errors = false;
12031138
let mut seen_sources = HashSet::with_capacity(sources.len());
12041139
let mut symlinked_files = HashSet::new();
12051140

1141+
// to remember the copied files for further usage.
1142+
// the FileInformation implemented the Hash trait by using
1143+
// 1. inode number
1144+
// 2. device number
1145+
// the combination of a file's inode number and device number is unique throughout all the file systems.
1146+
//
1147+
// key is the source file's information and the value is the destination filepath.
1148+
let mut copied_files: HashMap<FileInformation, PathBuf> = HashMap::with_capacity(sources.len());
1149+
12061150
let progress_bar = if options.progress_bar {
12071151
let pb = ProgressBar::new(disk_usage(sources, options.recursive)?)
12081152
.with_style(
@@ -1222,28 +1166,19 @@ pub fn copy(sources: &[PathBuf], target: &Path, options: &Options) -> CopyResult
12221166
if seen_sources.contains(source) {
12231167
// FIXME: compare sources by the actual file they point to, not their path. (e.g. dir/file == dir/../dir/file in most cases)
12241168
show_warning!("source {} specified more than once", source.quote());
1225-
} else {
1226-
let found_hard_link = if preserve_hard_links && !source.is_dir() {
1227-
let dest = construct_dest_path(source, target, &target_type, options)?;
1228-
preserve_hardlinks(&mut hard_links, source, &dest)?
1229-
} else {
1230-
false
1231-
};
1232-
if !found_hard_link {
1233-
if let Err(error) = copy_source(
1234-
&progress_bar,
1235-
source,
1236-
target,
1237-
&target_type,
1238-
options,
1239-
&mut symlinked_files,
1240-
) {
1241-
show_error_if_needed(&error);
1242-
non_fatal_errors = true;
1243-
}
1244-
}
1245-
seen_sources.insert(source);
1169+
} else if let Err(error) = copy_source(
1170+
&progress_bar,
1171+
source,
1172+
target,
1173+
&target_type,
1174+
options,
1175+
&mut symlinked_files,
1176+
&mut copied_files,
1177+
) {
1178+
show_error_if_needed(&error);
1179+
non_fatal_errors = true;
12461180
}
1181+
seen_sources.insert(source);
12471182
}
12481183

12491184
if let Some(pb) = progress_bar {
@@ -1295,11 +1230,20 @@ fn copy_source(
12951230
target_type: &TargetType,
12961231
options: &Options,
12971232
symlinked_files: &mut HashSet<FileInformation>,
1233+
copied_files: &mut HashMap<FileInformation, PathBuf>,
12981234
) -> CopyResult<()> {
12991235
let source_path = Path::new(&source);
13001236
if source_path.is_dir() {
13011237
// Copy as directory
1302-
copy_directory(progress_bar, source, target, options, symlinked_files, true)
1238+
copy_directory(
1239+
progress_bar,
1240+
source,
1241+
target,
1242+
options,
1243+
symlinked_files,
1244+
copied_files,
1245+
true,
1246+
)
13031247
} else {
13041248
// Copy as file
13051249
let dest = construct_dest_path(source_path, target, target_type, options)?;
@@ -1309,6 +1253,7 @@ fn copy_source(
13091253
dest.as_path(),
13101254
options,
13111255
symlinked_files,
1256+
copied_files,
13121257
true,
13131258
);
13141259
if options.parents {
@@ -1570,6 +1515,24 @@ fn handle_existing_dest(
15701515
OverwriteMode::Clobber(ClobberMode::RemoveDestination) => {
15711516
fs::remove_file(dest)?;
15721517
}
1518+
OverwriteMode::Clobber(ClobberMode::Standard) => {
1519+
// Consider the following files:
1520+
//
1521+
// * `src/f` - a regular file
1522+
// * `src/link` - a hard link to `src/f`
1523+
// * `dest/src/f` - a different regular file
1524+
//
1525+
// In this scenario, if we do `cp -a src/ dest/`, it is
1526+
// possible that the order of traversal causes `src/link`
1527+
// to get copied first (to `dest/src/link`). In that case,
1528+
// in order to make sure `dest/src/link` is a hard link to
1529+
// `dest/src/f` and `dest/src/f` has the contents of
1530+
// `src/f`, we delete the existing file to allow the hard
1531+
// linking.
1532+
if options.preserve_hard_links() {
1533+
fs::remove_file(dest)?;
1534+
}
1535+
}
15731536
_ => (),
15741537
};
15751538

@@ -1643,6 +1606,7 @@ fn copy_file(
16431606
dest: &Path,
16441607
options: &Options,
16451608
symlinked_files: &mut HashSet<FileInformation>,
1609+
copied_files: &mut HashMap<FileInformation, PathBuf>,
16461610
source_in_command_line: bool,
16471611
) -> CopyResult<()> {
16481612
if (options.update == UpdateMode::ReplaceIfOlder || options.update == UpdateMode::ReplaceNone)
@@ -1686,6 +1650,19 @@ fn copy_file(
16861650
handle_existing_dest(source, dest, options, source_in_command_line)?;
16871651
}
16881652

1653+
if options.preserve_hard_links() {
1654+
// if we encounter a matching device/inode pair in the source tree
1655+
// we can arrange to create a hard link between the corresponding names
1656+
// in the destination tree.
1657+
if let Some(new_source) = copied_files.get(
1658+
&FileInformation::from_path(source, options.dereference(source_in_command_line))
1659+
.context(format!("cannot stat {}", source.quote()))?,
1660+
) {
1661+
std::fs::hard_link(new_source, dest)?;
1662+
return Ok(());
1663+
};
1664+
}
1665+
16891666
if options.verbose {
16901667
if let Some(pb) = progress_bar {
16911668
// Suspend (hide) the progress bar so the println won't overlap with the progress bar.
@@ -1873,6 +1850,11 @@ fn copy_file(
18731850

18741851
copy_attributes(source, dest, &options.attributes)?;
18751852

1853+
copied_files.insert(
1854+
FileInformation::from_path(source, options.dereference(source_in_command_line))?,
1855+
dest.to_path_buf(),
1856+
);
1857+
18761858
if let Some(progress_bar) = progress_bar {
18771859
progress_bar.inc(fs::metadata(source)?.len());
18781860
}

0 commit comments

Comments
 (0)