Skip to content

Commit 439cb17

Browse files
committed
dirname: better support of the posic spec
Closes: uutils#8924
1 parent 73af9e9 commit 439cb17

File tree

2 files changed

+192
-51
lines changed

2 files changed

+192
-51
lines changed

src/uu/dirname/src/dirname.rs

Lines changed: 105 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5,64 +5,103 @@
55

66
use clap::{Arg, ArgAction, Command};
77
use std::ffi::OsString;
8-
use std::path::Path;
98
use uucore::display::print_verbatim;
109
use uucore::error::{UResult, UUsageError};
1110
use uucore::format_usage;
1211
use uucore::line_ending::LineEnding;
1312

1413
use uucore::translate;
1514

15+
#[cfg(not(unix))]
16+
use std::path::Path;
17+
1618
mod options {
1719
pub const ZERO: &str = "zero";
1820
pub const DIR: &str = "dir";
1921
}
2022

21-
/// Handle the special case where a path ends with "/."
23+
/// Compute dirname following POSIX/GNU behavior
2224
///
23-
/// This matches GNU/POSIX behavior where `dirname("/home/dos/.")` returns "/home/dos"
24-
/// rather than "/home" (which would be the result of `Path::parent()` due to normalization).
25+
/// This implements the POSIX dirname algorithm without path normalization.
2526
/// Per POSIX.1-2017 dirname specification and GNU coreutils manual:
2627
/// - POSIX: <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/dirname.html>
2728
/// - GNU: <https://www.gnu.org/software/coreutils/manual/html_node/dirname-invocation.html>
2829
///
29-
/// dirname should do simple string manipulation without path normalization.
30-
/// See issue #8910 and similar fix in basename (#8373, commit c5268a897).
30+
/// The algorithm:
31+
/// 1. Remove trailing '/' characters
32+
/// 2. If the path ends with "/.", remove it (handles foo/., foo//., foo///., etc.)
33+
/// 3. Remove any remaining trailing '/' characters
34+
/// 4. Apply standard dirname logic (find last '/', return everything before it)
3135
///
32-
/// Returns `Some(())` if the special case was handled (output already printed),
33-
/// or `None` if normal `Path::parent()` logic should be used.
34-
fn handle_trailing_dot(path_bytes: &[u8]) -> Option<()> {
35-
if !path_bytes.ends_with(b"/.") {
36-
return None;
36+
/// See issues #8910 and #8924, and similar fix in basename (#8373, commit c5268a897).
37+
fn compute_dirname(path_bytes: &[u8]) -> Vec<u8> {
38+
// Handle empty path
39+
if path_bytes.is_empty() {
40+
return b".".to_vec();
3741
}
3842

39-
// Strip the "/." suffix and print the result
40-
if path_bytes.len() == 2 {
41-
// Special case: "/." -> "/"
42-
print!("/");
43-
Some(())
44-
} else {
45-
// General case: "/home/dos/." -> "/home/dos"
46-
let stripped = &path_bytes[..path_bytes.len() - 2];
47-
#[cfg(unix)]
48-
{
49-
use std::os::unix::ffi::OsStrExt;
50-
let result = std::ffi::OsStr::from_bytes(stripped);
51-
print_verbatim(result).unwrap();
52-
Some(())
43+
// Special case: "//" stays as "/" per POSIX
44+
if path_bytes == b"//" {
45+
return b"/".to_vec();
46+
}
47+
48+
let mut path = path_bytes.to_vec();
49+
50+
// If path consists entirely of slashes, return single slash
51+
if path.iter().all(|&b| b == b'/') {
52+
return b"/".to_vec();
53+
}
54+
55+
// Step 1: Remove trailing slashes (but keep at least one character)
56+
while path.len() > 1 && path.last() == Some(&b'/') {
57+
path.pop();
58+
}
59+
60+
// Step 2: Check if path ends with "/." and handle specially
61+
// This handles foo/., foo//., foo///., and foo/./ (after step 1) etc.
62+
if path.len() >= 2 && path[path.len() - 1] == b'.' && path[path.len() - 2] == b'/' {
63+
// Remember if the original path was absolute (for handling "/." -> "/")
64+
let was_absolute = path[0] == b'/';
65+
66+
// Remove the "/." suffix
67+
path.truncate(path.len() - 2);
68+
69+
// Remove any additional trailing slashes that might remain (e.g., foo//. -> foo/)
70+
while path.len() > 1 && path.last() == Some(&b'/') {
71+
path.pop();
5372
}
54-
#[cfg(not(unix))]
55-
{
56-
// On non-Unix, fall back to lossy conversion
57-
if let Ok(s) = std::str::from_utf8(stripped) {
58-
print!("{s}");
59-
Some(())
73+
74+
// Handle edge cases: if we're left with nothing or just slashes
75+
if path.is_empty() {
76+
// If it was an absolute path like "/.", return "/"
77+
// Otherwise, return "."
78+
return if was_absolute {
79+
b"/".to_vec()
6080
} else {
61-
// Can't handle non-UTF-8 on non-Unix, fall through to normal logic
62-
None
63-
}
81+
b".".to_vec()
82+
};
83+
}
84+
if path.iter().all(|&b| b == b'/') {
85+
return b"/".to_vec();
86+
}
87+
88+
// What remains IS the dirname for paths ending with "/.".
89+
// Example: "foo/bar/." -> "foo/bar", "foo//." -> "foo"
90+
return path;
91+
}
92+
93+
// Step 3: Standard dirname logic - find last '/' and return everything before it
94+
if let Some(pos) = path.iter().rposition(|&b| b == b'/') {
95+
if pos == 0 {
96+
// The slash is at the beginning, dirname is "/"
97+
return b"/".to_vec();
6498
}
99+
path.truncate(pos);
100+
return path;
65101
}
102+
103+
// No slash found, dirname is "."
104+
b".".to_vec()
66105
}
67106

68107
#[uucore::main]
@@ -84,26 +123,43 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
84123
for path in &dirnames {
85124
let path_bytes = uucore::os_str_as_bytes(path.as_os_str()).unwrap_or(&[]);
86125

87-
if handle_trailing_dot(path_bytes).is_none() {
88-
// Normal path handling using Path::parent()
89-
let p = Path::new(path);
90-
match p.parent() {
91-
Some(d) => {
92-
if d.components().next().is_none() {
93-
print!(".");
94-
} else {
95-
print_verbatim(d).unwrap();
126+
// Compute dirname using POSIX-compliant algorithm
127+
let dirname_bytes = compute_dirname(path_bytes);
128+
129+
// Print the result
130+
#[cfg(unix)]
131+
{
132+
use std::os::unix::ffi::OsStrExt;
133+
let result = std::ffi::OsStr::from_bytes(&dirname_bytes);
134+
print_verbatim(result).unwrap();
135+
}
136+
#[cfg(not(unix))]
137+
{
138+
// On non-Unix, fall back to lossy conversion
139+
if let Ok(s) = std::str::from_utf8(&dirname_bytes) {
140+
print!("{s}");
141+
} else {
142+
// Fallback for non-UTF-8 on non-Unix: use Path::parent() as before
143+
let p = Path::new(path);
144+
match p.parent() {
145+
Some(d) => {
146+
if d.components().next().is_none() {
147+
print!(".");
148+
} else {
149+
print_verbatim(d).unwrap();
150+
}
96151
}
97-
}
98-
None => {
99-
if p.is_absolute() || path.as_os_str() == "/" {
100-
print!("/");
101-
} else {
102-
print!(".");
152+
None => {
153+
if p.is_absolute() || path.as_os_str() == "/" {
154+
print!("/");
155+
} else {
156+
print!(".");
157+
}
103158
}
104159
}
105160
}
106161
}
162+
107163
print!("{line_ending}");
108164
}
109165

tests/by-util/test_dirname.rs

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,11 @@ fn test_trailing_dot_multiple_paths() {
152152

153153
#[test]
154154
fn test_trailing_dot_edge_cases() {
155-
// Double slash before dot (should still work)
155+
// Double slash before dot - should strip both the slashes and dot (issue #8924)
156156
new_ucmd!()
157157
.arg("/home/dos//.")
158158
.succeeds()
159-
.stdout_is("/home/dos/\n");
159+
.stdout_is("/home/dos\n");
160160

161161
// Path with . in middle (should use normal logic)
162162
new_ucmd!()
@@ -216,3 +216,88 @@ fn test_existing_behavior_preserved() {
216216
.succeeds()
217217
.stdout_is("/home/dos\n");
218218
}
219+
220+
// Comprehensive test for issue #8924
221+
// Tests GNU coreutils compatibility for complex path patterns with multiple slashes,
222+
// trailing dots, and various combinations that were not handled correctly
223+
#[test]
224+
fn test_issue_8924() {
225+
// The five main cases from the issue
226+
// 1. dirname foo//. should return "foo" not "foo/"
227+
new_ucmd!().arg("foo//.").succeeds().stdout_is("foo\n");
228+
// 2. dirname foo///. should return "foo" not "foo//"
229+
new_ucmd!().arg("foo///.").succeeds().stdout_is("foo\n");
230+
// 3. dirname foo/./ should return "foo" not "."
231+
new_ucmd!().arg("foo/./").succeeds().stdout_is("foo\n");
232+
// 4. dirname foo/bar/./ should return "foo/bar" not "foo"
233+
new_ucmd!()
234+
.arg("foo/bar/./")
235+
.succeeds()
236+
.stdout_is("foo/bar\n");
237+
// 5. dirname foo/./bar should return "foo/." not "foo"
238+
new_ucmd!().arg("foo/./bar").succeeds().stdout_is("foo/.\n");
239+
240+
// Test the same patterns with absolute paths
241+
new_ucmd!()
242+
.arg("/home/foo//.")
243+
.succeeds()
244+
.stdout_is("/home/foo\n");
245+
new_ucmd!()
246+
.arg("/home/foo///.")
247+
.succeeds()
248+
.stdout_is("/home/foo\n");
249+
new_ucmd!()
250+
.arg("/home/foo/./")
251+
.succeeds()
252+
.stdout_is("/home/foo\n");
253+
new_ucmd!()
254+
.arg("/home/foo/bar/./")
255+
.succeeds()
256+
.stdout_is("/home/foo/bar\n");
257+
new_ucmd!()
258+
.arg("/home/foo/./bar")
259+
.succeeds()
260+
.stdout_is("/home/foo/.\n");
261+
262+
// Test various combinations of multiple slashes
263+
new_ucmd!().arg("foo////.").succeeds().stdout_is("foo\n");
264+
new_ucmd!().arg("foo/////.").succeeds().stdout_is("foo\n");
265+
new_ucmd!().arg("foo/./////").succeeds().stdout_is("foo\n");
266+
267+
// Edge cases
268+
new_ucmd!().arg("//.").succeeds().stdout_is("/\n");
269+
new_ucmd!().arg("/./").succeeds().stdout_is("/\n");
270+
new_ucmd!().arg(".//.").succeeds().stdout_is(".\n");
271+
new_ucmd!().arg("foo/..").succeeds().stdout_is("foo\n");
272+
273+
// Verify -z flag works correctly with all patterns
274+
new_ucmd!()
275+
.arg("-z")
276+
.arg("foo//.")
277+
.succeeds()
278+
.stdout_is("foo\u{0}");
279+
new_ucmd!()
280+
.arg("-z")
281+
.arg("foo/./")
282+
.succeeds()
283+
.stdout_is("foo\u{0}");
284+
new_ucmd!()
285+
.arg("-z")
286+
.arg("foo/./bar")
287+
.succeeds()
288+
.stdout_is("foo/.\u{0}");
289+
290+
// More complex real-world-like paths
291+
new_ucmd!()
292+
.arg("/usr/local/bin//.")
293+
.succeeds()
294+
.stdout_is("/usr/local/bin\n");
295+
new_ucmd!()
296+
.arg("/var/lib/./foo")
297+
.succeeds()
298+
.stdout_is("/var/lib/.\n");
299+
new_ucmd!()
300+
.arg("src/main/./test.rs")
301+
.succeeds()
302+
.stdout_is("src/main/.\n");
303+
}

0 commit comments

Comments
 (0)