Skip to content

Commit 7b7c2e7

Browse files
committed
add benchmark for join
1 parent c96b96e commit 7b7c2e7

File tree

4 files changed

+153
-0
lines changed

4 files changed

+153
-0
lines changed

.github/workflows/benchmarks.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ jobs:
3131
- { package: uu_du }
3232
- { package: uu_expand }
3333
- { package: uu_fold }
34+
- { package: uu_join }
3435
- { package: uu_ls }
3536
- { package: uu_mv }
3637
- { package: uu_nl }

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/join/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@ memchr = { workspace = true }
2424
thiserror = { workspace = true }
2525
fluent = { workspace = true }
2626

27+
[dev-dependencies]
28+
divan = { workspace = true }
29+
uucore = { workspace = true, features = ["benchmark"] }
30+
tempfile = { workspace = true }
31+
2732
[[bin]]
2833
name = "join"
2934
path = "src/main.rs"
35+
36+
[[bench]]
37+
name = "join_bench"
38+
harness = false

src/uu/join/benches/join_bench.rs

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use divan::{Bencher, black_box};
7+
use uu_join::uumain;
8+
use uucore::benchmark::{run_util_function, setup_test_file};
9+
10+
/// Benchmark basic join with sorted data
11+
#[divan::bench]
12+
fn join_basic_sorted(bencher: Bencher) {
13+
bencher.bench(|| {
14+
let num_lines = 10000;
15+
let mut file1_data = Vec::new();
16+
let mut file2_data = Vec::new();
17+
18+
// Generate sorted test data with 50% overlap
19+
for i in 0..num_lines {
20+
let key = if i < num_lines / 2 {
21+
format!("key_{:06}", i)
22+
} else {
23+
format!("unique1_{:06}", i - num_lines / 2)
24+
};
25+
26+
file1_data.extend_from_slice(
27+
format!("{}\tfile1_field1_{}\tfile1_field2_{}\n", key, i, i).as_bytes(),
28+
);
29+
30+
let key2 = if i < num_lines / 2 {
31+
format!("key_{:06}", i)
32+
} else {
33+
format!("unique2_{:06}", i - num_lines / 2)
34+
};
35+
36+
file2_data.extend_from_slice(
37+
format!("{}\tfile2_field1_{}\tfile2_field2_{}\n", key2, i, i).as_bytes(),
38+
);
39+
}
40+
41+
// Sort the data
42+
let file1_lines: Vec<&str> = std::str::from_utf8(&file1_data).unwrap().lines().collect();
43+
let file2_lines: Vec<&str> = std::str::from_utf8(&file2_data).unwrap().lines().collect();
44+
45+
let mut sorted_file1: Vec<_> = file1_lines.clone();
46+
let mut sorted_file2: Vec<_> = file2_lines.clone();
47+
48+
sorted_file1.sort();
49+
sorted_file2.sort();
50+
51+
let sorted_file1_data = (sorted_file1.join("\n") + "\n").into_bytes();
52+
let sorted_file2_data = (sorted_file2.join("\n") + "\n").into_bytes();
53+
54+
let file1_path = setup_test_file(&sorted_file1_data);
55+
let file2_path = setup_test_file(&sorted_file2_data);
56+
57+
black_box(run_util_function(
58+
uumain,
59+
&[file1_path.to_str().unwrap(), file2_path.to_str().unwrap()],
60+
));
61+
});
62+
}
63+
64+
/// Benchmark join with custom delimiter
65+
#[divan::bench]
66+
fn join_custom_delimiter(bencher: Bencher) {
67+
bencher.bench(|| {
68+
let num_lines = 10000;
69+
let mut file1_data = Vec::new();
70+
let mut file2_data = Vec::new();
71+
72+
// Generate CSV-style data
73+
for i in 0..num_lines {
74+
let key = format!("key_{:06}", i);
75+
file1_data.extend_from_slice(format!("{},value1_{},data1_{}\n", key, i, i).as_bytes());
76+
file2_data.extend_from_slice(format!("{},value2_{},data2_{}\n", key, i, i).as_bytes());
77+
}
78+
79+
let file1_path = setup_test_file(&file1_data);
80+
let file2_path = setup_test_file(&file2_data);
81+
82+
black_box(run_util_function(
83+
uumain,
84+
&[
85+
"-t",
86+
",",
87+
file1_path.to_str().unwrap(),
88+
file2_path.to_str().unwrap(),
89+
],
90+
));
91+
});
92+
}
93+
94+
/// Benchmark join with no order checking (unsorted data)
95+
#[divan::bench]
96+
fn join_nocheck_order(bencher: Bencher) {
97+
bencher.bench(|| {
98+
let num_lines = 10000;
99+
let mut file1_data = Vec::new();
100+
let mut file2_data = Vec::new();
101+
102+
// Generate unsorted test data with 50% overlap
103+
for i in 0..num_lines {
104+
let key = if i < num_lines / 2 {
105+
format!("key_{:06}", i)
106+
} else {
107+
format!("unique1_{:06}", i - num_lines / 2)
108+
};
109+
110+
file1_data.extend_from_slice(
111+
format!("{}\tfile1_field1_{}\tfile1_field2_{}\n", key, i, i).as_bytes(),
112+
);
113+
114+
let key2 = if i < num_lines / 2 {
115+
format!("key_{:06}", i)
116+
} else {
117+
format!("unique2_{:06}", i - num_lines / 2)
118+
};
119+
120+
file2_data.extend_from_slice(
121+
format!("{}\tfile2_field1_{}\tfile2_field2_{}\n", key2, i, i).as_bytes(),
122+
);
123+
}
124+
125+
let file1_path = setup_test_file(&file1_data);
126+
let file2_path = setup_test_file(&file2_data);
127+
128+
black_box(run_util_function(
129+
uumain,
130+
&[
131+
"--nocheck-order",
132+
file1_path.to_str().unwrap(),
133+
file2_path.to_str().unwrap(),
134+
],
135+
));
136+
});
137+
}
138+
139+
fn main() {
140+
divan::main();
141+
}

0 commit comments

Comments
 (0)