Skip to content

Commit b9372e5

Browse files
Paol0Bsylvestre
andauthored
Fixes uutils#10192 - fix(comm): improve stdout handling and add test for lossy UTF-8 output (uutils#10206)
* fix(comm): improve stdout handling and add test for lossy UTF-8 output * run cargo fmt * perf(comm): use BufWriter for buffered stdout output Wrap stdout in BufWriter to improve performance and avoid duplicate error messages, matching GNU comm behavior. * fix: refactor write operations in comm to use a dedicated function * comm: use translate! --------- Co-authored-by: Sylvestre Ledru <sylvestre@debian.org>
1 parent a74f72f commit b9372e5

File tree

2 files changed

+48
-6
lines changed

2 files changed

+48
-6
lines changed

src/uu/comm/src/comm.rs

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use std::cmp::Ordering;
99
use std::ffi::OsString;
1010
use std::fs::{File, metadata};
11-
use std::io::{self, BufRead, BufReader, Read, StdinLock, stdin};
11+
use std::io::{self, BufRead, BufReader, BufWriter, Read, StdinLock, Write, stdin};
1212
use std::path::Path;
1313
use uucore::display::Quotable;
1414
use uucore::error::{FromIo, UResult, USimpleError};
@@ -184,13 +184,25 @@ pub fn are_files_identical(path1: &Path, path2: &Path) -> io::Result<bool> {
184184
}
185185
}
186186

187+
fn write_line_with_delimiter<W: Write>(writer: &mut W, delim: &[u8], line: &[u8]) -> UResult<()> {
188+
writer
189+
.write_all(delim)
190+
.map_err_context(|| translate!("comm-error-write"))?;
191+
writer
192+
.write_all(line)
193+
.map_err_context(|| translate!("comm-error-write"))?;
194+
Ok(())
195+
}
196+
187197
fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> {
188198
let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1));
189199
let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2));
190200

191201
let delim_col_2 = delim.repeat(width_col_1);
192202
let delim_col_3 = delim.repeat(width_col_1 + width_col_2);
193203

204+
let mut writer = BufWriter::new(io::stdout().lock());
205+
194206
let ra = &mut Vec::new();
195207
let mut na = a.read_line(ra);
196208
let rb = &mut Vec::new();
@@ -239,7 +251,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
239251
break;
240252
}
241253
if !opts.get_flag(options::COLUMN_1) {
242-
print!("{}", String::from_utf8_lossy(ra));
254+
writer
255+
.write_all(ra)
256+
.map_err_context(|| translate!("comm-error-write"))?;
243257
}
244258
ra.clear();
245259
na = a.read_line(ra);
@@ -250,7 +264,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
250264
break;
251265
}
252266
if !opts.get_flag(options::COLUMN_2) {
253-
print!("{delim_col_2}{}", String::from_utf8_lossy(rb));
267+
write_line_with_delimiter(&mut writer, delim_col_2.as_bytes(), rb)?;
254268
}
255269
rb.clear();
256270
nb = b.read_line(rb);
@@ -262,7 +276,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
262276
break;
263277
}
264278
if !opts.get_flag(options::COLUMN_3) {
265-
print!("{delim_col_3}{}", String::from_utf8_lossy(ra));
279+
write_line_with_delimiter(&mut writer, delim_col_3.as_bytes(), ra)?;
266280
}
267281
ra.clear();
268282
rb.clear();
@@ -280,12 +294,16 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
280294

281295
if opts.get_flag(options::TOTAL) {
282296
let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED));
283-
print!(
297+
write!(
298+
writer,
284299
"{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}{}{line_ending}",
285300
translate!("comm-total")
286-
);
301+
)
302+
.map_err_context(|| translate!("comm-error-write"))?;
287303
}
288304

305+
writer.flush().ok();
306+
289307
if should_check_order && (checker1.has_error || checker2.has_error) {
290308
// Print the input error message once at the end
291309
if input_error {

tests/by-util/test_comm.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,30 @@ fn test_comm_eintr_handling() {
649649
.stdout_contains("line3");
650650
}
651651

652+
#[test]
653+
fn test_output_lossy_utf8() {
654+
let scene = TestScenario::new(util_name!());
655+
let at = &scene.fixtures;
656+
657+
// Create files with invalid UTF-8
658+
// A: \xfe\n\xff\n
659+
// B: \xff\n\xfe\n
660+
at.write_bytes("a", b"\xfe\n\xff\n");
661+
at.write_bytes("b", b"\xff\n\xfe\n");
662+
663+
// GNU comm output (and uutils with fix):
664+
// \xfe\n (col 1)
665+
// \t\t\xff\n (col 3)
666+
// \t\xfe\n (col 2)
667+
// Hex: fe 0a 09 09 ff 0a 09 fe 0a
668+
669+
scene
670+
.ucmd()
671+
.args(&["a", "b"])
672+
.fails() // Fails because of unsorted input
673+
.stdout_is_bytes(b"\xfe\n\t\t\xff\n\t\xfe\n");
674+
}
675+
652676
#[test]
653677
#[cfg(any(target_os = "linux", target_os = "android"))]
654678
fn test_comm_anonymous_pipes() {

0 commit comments

Comments
 (0)