Skip to content

Commit 8dd83d3

Browse files
add dynamically-generated test archive
- initialize the test archives exactly once in statics - add benchmarks for dynamic and static test data - use lazy_static
1 parent 5bd512e commit 8dd83d3

File tree

2 files changed

+91
-26
lines changed

2 files changed

+91
-26
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ anyhow = "1.0.60"
6868
clap = { version = "=4.4.18", features = ["derive"] }
6969
tempdir = "0.3.7"
7070
tempfile = "3.10.1"
71+
lazy_static = "1.5"
72+
num_cpus = "1.16"
7173

7274
[features]
7375
aes-crypto = ["aes", "constant_time_eq", "hmac", "pbkdf2", "sha1", "rand", "zeroize"]

benches/extract.rs

Lines changed: 89 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,81 @@
11
use bencher::{benchmark_group, benchmark_main};
22

33
use bencher::Bencher;
4+
use lazy_static::lazy_static;
45
use tempdir::TempDir;
6+
use tempfile::tempfile;
57

68
use std::fs;
79
use std::path::Path;
10+
use std::sync::{Arc, Mutex};
811

912
use zip::result::ZipResult;
13+
use zip::write::ZipWriter;
1014
use zip::ZipArchive;
1115

12-
#[cfg(all(feature = "parallelism", feature = "bzip2", unix))]
16+
#[cfg(all(feature = "parallelism", unix))]
1317
use zip::read::{split_extract, ExtractionParameters};
1418

15-
#[cfg(feature = "parallelism")]
16-
use num_cpus;
17-
1819
/* This archive has a set of entries repeated 20x:
1920
* - 200K random data, stored uncompressed (CompressionMethod::Stored)
2021
* - 246K text data (the project gutenberg html version of king lear)
2122
* (CompressionMethod::Bzip2, compression level 1) (project gutenberg ebooks are public domain)
2223
*
2324
* The full archive file is 5.3MB.
2425
*/
25-
fn get_test_archive() -> ZipResult<ZipArchive<fs::File>> {
26+
fn static_test_archive() -> ZipResult<ZipArchive<fs::File>> {
27+
assert!(
28+
cfg!(feature = "bzip2"),
29+
"this test archive requires bzip2 support"
30+
);
2631
let path =
2732
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/data/stored-and-compressed-text.zip");
2833
let file = fs::File::open(path)?;
2934
ZipArchive::new(file)
3035
}
3136

32-
fn extract_basic(bench: &mut Bencher) {
33-
let mut readable_archive = get_test_archive().unwrap();
34-
let total_size: u64 = readable_archive
35-
.decompressed_size()
36-
.unwrap()
37-
.try_into()
38-
.unwrap();
37+
lazy_static! {
38+
static ref STATIC_TEST_ARCHIVE: Arc<Mutex<ZipArchive<fs::File>>> = {
39+
let archive = static_test_archive().unwrap();
40+
Arc::new(Mutex::new(archive))
41+
};
42+
}
43+
44+
/* This archive is generated dynamically, in order to scale with the number of reported CPUs.
45+
* - We want at least 768 files (4 per VCPU on EC2 *.48xlarge instances) to run in CI.
46+
* - We want to retain the interspersed random/text entries from static_test_archive().
47+
*
48+
* We will copy over entries from the static archive repeatedly until we reach the desired file
49+
* count.
50+
*/
51+
fn dynamic_test_archive(src_archive: &mut ZipArchive<fs::File>) -> ZipResult<ZipArchive<fs::File>> {
52+
let desired_num_entries: usize = num_cpus::get() * 4;
53+
let mut output_archive = ZipWriter::new(tempfile()?);
54+
55+
for (src_index, output_index) in (0..src_archive.len()).cycle().zip(0..desired_num_entries) {
56+
let src_file = src_archive.by_index_raw(src_index)?;
57+
let output_name = if src_file.name().starts_with("random-") {
58+
format!("random-{output_index}.dat")
59+
} else {
60+
assert!(src_file.name().starts_with("text-"));
61+
format!("text-{output_index}.dat")
62+
};
63+
output_archive.raw_copy_file_rename(src_file, output_name)?;
64+
}
65+
66+
output_archive.finish_into_readable()
67+
}
68+
69+
lazy_static! {
70+
static ref DYNAMIC_TEST_ARCHIVE: Arc<Mutex<ZipArchive<fs::File>>> = {
71+
let mut src = STATIC_TEST_ARCHIVE.lock().unwrap();
72+
let archive = dynamic_test_archive(&mut src).unwrap();
73+
Arc::new(Mutex::new(archive))
74+
};
75+
}
76+
77+
fn do_extract_basic(bench: &mut Bencher, archive: &mut ZipArchive<fs::File>) {
78+
let total_size: u64 = archive.decompressed_size().unwrap().try_into().unwrap();
3979

4080
let parent = TempDir::new("zip-extract").unwrap();
4181

@@ -45,19 +85,24 @@ fn extract_basic(bench: &mut Bencher) {
4585
let outdir = TempDir::new_in(parent.path(), "bench-subdir")
4686
.unwrap()
4787
.into_path();
48-
readable_archive.extract(outdir).unwrap();
88+
archive.extract(outdir).unwrap();
4989
});
5090
});
5191
}
5292

53-
#[cfg(all(feature = "parallelism", feature = "bzip2", unix))]
54-
fn extract_split(bench: &mut Bencher) {
55-
let readable_archive = get_test_archive().unwrap();
56-
let total_size: u64 = readable_archive
57-
.decompressed_size()
58-
.unwrap()
59-
.try_into()
60-
.unwrap();
93+
fn extract_basic_static(bench: &mut Bencher) {
94+
let mut archive = STATIC_TEST_ARCHIVE.lock().unwrap();
95+
do_extract_basic(bench, &mut archive);
96+
}
97+
98+
fn extract_basic_dynamic(bench: &mut Bencher) {
99+
let mut archive = DYNAMIC_TEST_ARCHIVE.lock().unwrap();
100+
do_extract_basic(bench, &mut archive);
101+
}
102+
103+
#[cfg(all(feature = "parallelism", unix))]
104+
fn do_extract_split(bench: &mut Bencher, archive: &ZipArchive<fs::File>) {
105+
let total_size: u64 = archive.decompressed_size().unwrap().try_into().unwrap();
61106

62107
let params = ExtractionParameters {
63108
decompression_threads: num_cpus::get() / 3,
@@ -72,15 +117,33 @@ fn extract_split(bench: &mut Bencher) {
72117
let outdir = TempDir::new_in(parent.path(), "bench-subdir")
73118
.unwrap()
74119
.into_path();
75-
split_extract(&readable_archive, &outdir, params.clone()).unwrap();
120+
split_extract(archive, &outdir, params.clone()).unwrap();
76121
});
77122
});
78123
}
79124

80-
#[cfg(not(all(feature = "parallelism", feature = "bzip2", unix)))]
81-
benchmark_group!(benches, extract_basic);
125+
#[cfg(all(feature = "parallelism", unix))]
126+
fn extract_split_static(bench: &mut Bencher) {
127+
let archive = STATIC_TEST_ARCHIVE.lock().unwrap();
128+
do_extract_split(bench, &archive);
129+
}
130+
131+
#[cfg(all(feature = "parallelism", unix))]
132+
fn extract_split_dynamic(bench: &mut Bencher) {
133+
let archive = DYNAMIC_TEST_ARCHIVE.lock().unwrap();
134+
do_extract_split(bench, &archive);
135+
}
82136

83-
#[cfg(all(feature = "parallelism", feature = "bzip2", unix))]
84-
benchmark_group!(benches, extract_basic, extract_split);
137+
#[cfg(not(all(feature = "parallelism", unix)))]
138+
benchmark_group!(benches, extract_basic_static, extract_basic_dynamic);
139+
140+
#[cfg(all(feature = "parallelism", unix))]
141+
benchmark_group!(
142+
benches,
143+
extract_basic_static,
144+
extract_basic_dynamic,
145+
extract_split_static,
146+
extract_split_dynamic
147+
);
85148

86149
benchmark_main!(benches);

0 commit comments

Comments
 (0)