1
1
use bencher:: { benchmark_group, benchmark_main} ;
2
2
3
3
use bencher:: Bencher ;
4
+ use lazy_static:: lazy_static;
4
5
use tempdir:: TempDir ;
6
+ use tempfile:: tempfile;
5
7
6
8
use std:: fs;
7
9
use std:: path:: Path ;
10
+ use std:: sync:: { Arc , Mutex } ;
8
11
9
12
use zip:: result:: ZipResult ;
13
+ use zip:: write:: ZipWriter ;
10
14
use zip:: ZipArchive ;
11
15
12
- #[ cfg( all( feature = "parallelism" , feature = "bzip2" , unix) ) ]
16
+ #[ cfg( all( feature = "parallelism" , unix) ) ]
13
17
use zip:: read:: { split_extract, ExtractionParameters } ;
14
18
15
- #[ cfg( feature = "parallelism" ) ]
16
- use num_cpus;
17
-
18
19
/* This archive has a set of entries repeated 20x:
19
20
* - 200K random data, stored uncompressed (CompressionMethod::Stored)
20
21
* - 246K text data (the project gutenberg html version of king lear)
21
22
* (CompressionMethod::Bzip2, compression level 1) (project gutenberg ebooks are public domain)
22
23
*
23
24
* The full archive file is 5.3MB.
24
25
*/
25
- fn get_test_archive ( ) -> ZipResult < ZipArchive < fs:: File > > {
26
+ fn static_test_archive ( ) -> ZipResult < ZipArchive < fs:: File > > {
27
+ assert ! (
28
+ cfg!( feature = "bzip2" ) ,
29
+ "this test archive requires bzip2 support"
30
+ ) ;
26
31
let path =
27
32
Path :: new ( env ! ( "CARGO_MANIFEST_DIR" ) ) . join ( "tests/data/stored-and-compressed-text.zip" ) ;
28
33
let file = fs:: File :: open ( path) ?;
29
34
ZipArchive :: new ( file)
30
35
}
31
36
32
- fn extract_basic ( bench : & mut Bencher ) {
33
- let mut readable_archive = get_test_archive ( ) . unwrap ( ) ;
34
- let total_size: u64 = readable_archive
35
- . decompressed_size ( )
36
- . unwrap ( )
37
- . try_into ( )
38
- . unwrap ( ) ;
37
+ lazy_static ! {
38
+ static ref STATIC_TEST_ARCHIVE : Arc <Mutex <ZipArchive <fs:: File >>> = {
39
+ let archive = static_test_archive( ) . unwrap( ) ;
40
+ Arc :: new( Mutex :: new( archive) )
41
+ } ;
42
+ }
43
+
44
+ /* This archive is generated dynamically, in order to scale with the number of reported CPUs.
45
+ * - We want at least 768 files (4 per VCPU on EC2 *.48xlarge instances) to run in CI.
46
+ * - We want to retain the interspersed random/text entries from static_test_archive().
47
+ *
48
+ * We will copy over entries from the static archive repeatedly until we reach the desired file
49
+ * count.
50
+ */
51
+ fn dynamic_test_archive ( src_archive : & mut ZipArchive < fs:: File > ) -> ZipResult < ZipArchive < fs:: File > > {
52
+ let desired_num_entries: usize = num_cpus:: get ( ) * 4 ;
53
+ let mut output_archive = ZipWriter :: new ( tempfile ( ) ?) ;
54
+
55
+ for ( src_index, output_index) in ( 0 ..src_archive. len ( ) ) . cycle ( ) . zip ( 0 ..desired_num_entries) {
56
+ let src_file = src_archive. by_index_raw ( src_index) ?;
57
+ let output_name = if src_file. name ( ) . starts_with ( "random-" ) {
58
+ format ! ( "random-{output_index}.dat" )
59
+ } else {
60
+ assert ! ( src_file. name( ) . starts_with( "text-" ) ) ;
61
+ format ! ( "text-{output_index}.dat" )
62
+ } ;
63
+ output_archive. raw_copy_file_rename ( src_file, output_name) ?;
64
+ }
65
+
66
+ output_archive. finish_into_readable ( )
67
+ }
68
+
69
+ lazy_static ! {
70
+ static ref DYNAMIC_TEST_ARCHIVE : Arc <Mutex <ZipArchive <fs:: File >>> = {
71
+ let mut src = STATIC_TEST_ARCHIVE . lock( ) . unwrap( ) ;
72
+ let archive = dynamic_test_archive( & mut src) . unwrap( ) ;
73
+ Arc :: new( Mutex :: new( archive) )
74
+ } ;
75
+ }
76
+
77
+ fn do_extract_basic ( bench : & mut Bencher , archive : & mut ZipArchive < fs:: File > ) {
78
+ let total_size: u64 = archive. decompressed_size ( ) . unwrap ( ) . try_into ( ) . unwrap ( ) ;
39
79
40
80
let parent = TempDir :: new ( "zip-extract" ) . unwrap ( ) ;
41
81
@@ -45,19 +85,24 @@ fn extract_basic(bench: &mut Bencher) {
45
85
let outdir = TempDir :: new_in ( parent. path ( ) , "bench-subdir" )
46
86
. unwrap ( )
47
87
. into_path ( ) ;
48
- readable_archive . extract ( outdir) . unwrap ( ) ;
88
+ archive . extract ( outdir) . unwrap ( ) ;
49
89
} ) ;
50
90
} ) ;
51
91
}
52
92
53
- #[ cfg( all( feature = "parallelism" , feature = "bzip2" , unix) ) ]
54
- fn extract_split ( bench : & mut Bencher ) {
55
- let readable_archive = get_test_archive ( ) . unwrap ( ) ;
56
- let total_size: u64 = readable_archive
57
- . decompressed_size ( )
58
- . unwrap ( )
59
- . try_into ( )
60
- . unwrap ( ) ;
93
+ fn extract_basic_static ( bench : & mut Bencher ) {
94
+ let mut archive = STATIC_TEST_ARCHIVE . lock ( ) . unwrap ( ) ;
95
+ do_extract_basic ( bench, & mut archive) ;
96
+ }
97
+
98
+ fn extract_basic_dynamic ( bench : & mut Bencher ) {
99
+ let mut archive = DYNAMIC_TEST_ARCHIVE . lock ( ) . unwrap ( ) ;
100
+ do_extract_basic ( bench, & mut archive) ;
101
+ }
102
+
103
+ #[ cfg( all( feature = "parallelism" , unix) ) ]
104
+ fn do_extract_split ( bench : & mut Bencher , archive : & ZipArchive < fs:: File > ) {
105
+ let total_size: u64 = archive. decompressed_size ( ) . unwrap ( ) . try_into ( ) . unwrap ( ) ;
61
106
62
107
let params = ExtractionParameters {
63
108
decompression_threads : num_cpus:: get ( ) / 3 ,
@@ -72,15 +117,33 @@ fn extract_split(bench: &mut Bencher) {
72
117
let outdir = TempDir :: new_in ( parent. path ( ) , "bench-subdir" )
73
118
. unwrap ( )
74
119
. into_path ( ) ;
75
- split_extract ( & readable_archive , & outdir, params. clone ( ) ) . unwrap ( ) ;
120
+ split_extract ( archive , & outdir, params. clone ( ) ) . unwrap ( ) ;
76
121
} ) ;
77
122
} ) ;
78
123
}
79
124
80
- #[ cfg( not( all( feature = "parallelism" , feature = "bzip2" , unix) ) ) ]
81
- benchmark_group ! ( benches, extract_basic) ;
125
+ #[ cfg( all( feature = "parallelism" , unix) ) ]
126
+ fn extract_split_static ( bench : & mut Bencher ) {
127
+ let archive = STATIC_TEST_ARCHIVE . lock ( ) . unwrap ( ) ;
128
+ do_extract_split ( bench, & archive) ;
129
+ }
130
+
131
+ #[ cfg( all( feature = "parallelism" , unix) ) ]
132
+ fn extract_split_dynamic ( bench : & mut Bencher ) {
133
+ let archive = DYNAMIC_TEST_ARCHIVE . lock ( ) . unwrap ( ) ;
134
+ do_extract_split ( bench, & archive) ;
135
+ }
82
136
83
- #[ cfg( all( feature = "parallelism" , feature = "bzip2" , unix) ) ]
84
- benchmark_group ! ( benches, extract_basic, extract_split) ;
137
+ #[ cfg( not( all( feature = "parallelism" , unix) ) ) ]
138
+ benchmark_group ! ( benches, extract_basic_static, extract_basic_dynamic) ;
139
+
140
+ #[ cfg( all( feature = "parallelism" , unix) ) ]
141
+ benchmark_group ! (
142
+ benches,
143
+ extract_basic_static,
144
+ extract_basic_dynamic,
145
+ extract_split_static,
146
+ extract_split_dynamic
147
+ ) ;
85
148
86
149
benchmark_main ! ( benches) ;
0 commit comments