@@ -110,7 +110,7 @@ fn join_custom_separator(bencher: Bencher) {
110110 } ) ;
111111}
112112
113- /// Benchmark join with French locale (fr_FR.UTF-8)
113+ /// Benchmark join with French locale (fr_FR.UTF-8) - ASCII data (fast path)
114114#[ divan:: bench]
115115fn join_french_locale ( bencher : Bencher ) {
116116 let num_lines = 10000 ;
@@ -126,6 +126,47 @@ fn join_french_locale(bencher: Bencher) {
126126 } ) ;
127127}
128128
129+ /// Create files with Unicode data that requires locale collation
130+ fn create_unicode_join_files ( temp_dir : & TempDir , num_lines : usize ) -> ( String , String ) {
131+ let file1_path = temp_dir. path ( ) . join ( "file1.txt" ) ;
132+ let file2_path = temp_dir. path ( ) . join ( "file2.txt" ) ;
133+
134+ let mut file1 = File :: create ( & file1_path) . unwrap ( ) ;
135+ let mut file2 = File :: create ( & file2_path) . unwrap ( ) ;
136+
137+ // Create data with accented characters that require locale collation
138+ let accented_chars = [
139+ "àbc" , "àbd" , "abc" , "abd" , "èfg" , "efg" , "çar" , "car" , "öst" , "ost" ,
140+ ] ;
141+
142+ for i in 0 ..num_lines {
143+ let key = & accented_chars[ i % accented_chars. len ( ) ] ;
144+ writeln ! ( file1, "{key}:{i:06} field1_{i}" ) . unwrap ( ) ;
145+ writeln ! ( file2, "{key}:{i:06} data1_{i}" ) . unwrap ( ) ;
146+ }
147+
148+ (
149+ file1_path. to_str ( ) . unwrap ( ) . to_string ( ) ,
150+ file2_path. to_str ( ) . unwrap ( ) . to_string ( ) ,
151+ )
152+ }
153+
154+ /// Benchmark join with actual Unicode data requiring locale collation
155+ #[ divan:: bench]
156+ fn join_unicode_locale ( bencher : Bencher ) {
157+ let num_lines = 1000 ; // Smaller due to complexity
158+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
159+ let ( file1, file2) = create_unicode_join_files ( & temp_dir, num_lines) ;
160+
161+ bencher
162+ . with_inputs ( || unsafe {
163+ std:: env:: set_var ( "LC_ALL" , "fr_FR.UTF-8" ) ;
164+ } )
165+ . bench_values ( |_| {
166+ black_box ( run_util_function ( uumain, & [ & file1, & file2] ) ) ;
167+ } ) ;
168+ }
169+
129170fn main ( ) {
130171 divan:: main ( ) ;
131172}
0 commit comments