Skip to content

Commit 98d3dba

Browse files
join: consider locale collation in field comparison (uutils#9982)
Co-authored-by: Sylvestre Ledru <[email protected]>
1 parent 555271d commit 98d3dba

File tree

4 files changed

+43
-3
lines changed

4 files changed

+43
-3
lines changed

src/uu/join/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ path = "src/join.rs"
1919

2020
[dependencies]
2121
clap = { workspace = true }
22-
uucore = { workspace = true }
22+
uucore = { workspace = true, features = ["i18n-collator"] }
2323
memchr = { workspace = true }
2424
thiserror = { workspace = true }
2525
fluent = { workspace = true }

src/uu/join/src/join.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ use thiserror::Error;
1919
use uucore::display::Quotable;
2020
use uucore::error::{FromIo, UError, UResult, USimpleError, set_exit_code};
2121
use uucore::format_usage;
22+
use uucore::i18n::collator::{
23+
AlternateHandling, CollatorOptions, locale_cmp, should_use_locale_collation, try_init_collator,
24+
};
2225
use uucore::line_ending::LineEnding;
2326
use uucore::translate;
2427

@@ -311,14 +314,16 @@ struct Input<Sep: Separator> {
311314
separator: Sep,
312315
ignore_case: bool,
313316
check_order: CheckOrder,
317+
use_locale: bool,
314318
}
315319

316320
impl<Sep: Separator> Input<Sep> {
317-
fn new(separator: Sep, ignore_case: bool, check_order: CheckOrder) -> Self {
321+
fn new(separator: Sep, ignore_case: bool, check_order: CheckOrder, use_locale: bool) -> Self {
318322
Self {
319323
separator,
320324
ignore_case,
321325
check_order,
326+
use_locale,
322327
}
323328
}
324329

@@ -328,6 +333,8 @@ impl<Sep: Separator> Input<Sep> {
328333
let field1 = CaseInsensitiveSlice { v: field1 };
329334
let field2 = CaseInsensitiveSlice { v: field2 };
330335
field1.cmp(&field2)
336+
} else if self.use_locale {
337+
locale_cmp(field1, field2)
331338
} else {
332339
field1.cmp(field2)
333340
}
@@ -823,6 +830,10 @@ fn parse_settings(matches: &clap::ArgMatches) -> UResult<Settings> {
823830
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
824831
let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?;
825832

833+
let mut opts = CollatorOptions::default();
834+
opts.alternate_handling = Some(AlternateHandling::Shifted);
835+
let _ = try_init_collator(opts);
836+
826837
let settings = parse_settings(&matches)?;
827838

828839
let file1 = matches.get_one::<OsString>("file1").unwrap();
@@ -989,7 +1000,12 @@ fn exec<Sep: Separator>(
9891000
settings.print_unpaired2,
9901001
)?;
9911002

992-
let input = Input::new(sep.clone(), settings.ignore_case, settings.check_order);
1003+
let input = Input::new(
1004+
sep.clone(),
1005+
settings.ignore_case,
1006+
settings.check_order,
1007+
should_use_locale_collation(),
1008+
);
9931009

9941010
let format = if settings.autoformat {
9951011
let mut format = vec![Spec::Key];

src/uucore/src/lib/features/i18n/collator.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ pub fn init_collator(opts: CollatorOptions) {
3030
.expect("Collator already initialized");
3131
}
3232

33+
/// Check if locale collation should be used.
34+
pub fn should_use_locale_collation() -> bool {
35+
get_collating_locale().0 != DEFAULT_LOCALE
36+
}
37+
3338
/// Initialize the collator for locale-aware string comparison if needed.
3439
///
3540
/// This function checks if the current locale requires locale-aware collation

tests/by-util/test_join.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,3 +580,22 @@ fn join_emoji_delim_inner_key() {
580580
.succeeds()
581581
.stdout_only("b🗿a🗿u\n");
582582
}
583+
584+
#[cfg(unix)]
585+
#[test]
586+
fn test_locale_collation() {
587+
let ts = TestScenario::new(util_name!());
588+
let at = &ts.fixtures;
589+
590+
at.write("f1.sorted", "abc:d 2\nab:d 1\n");
591+
at.write("f2.sorted", "abc:d y\nab:d x\n");
592+
593+
ts.ucmd()
594+
.env("LC_ALL", "en_US.UTF-8")
595+
.arg("--check-order")
596+
.arg("f1.sorted")
597+
.arg("f2.sorted")
598+
.succeeds()
599+
.stdout_contains("abc:d 2 y")
600+
.stdout_contains("ab:d 1 x");
601+
}

0 commit comments

Comments
 (0)