@@ -5,6 +5,8 @@ use super::types::Num;
5
5
use super :: POOL ;
6
6
use num_traits:: { AsPrimitive , FromPrimitive } ;
7
7
8
+ const EPSILON : f64 = 1e-12 ; // Small value to avoid precision errors
9
+
8
10
// ---------------------- Binary search ----------------------
9
11
10
12
/// Binary search for the index position of the given value in the given array.
@@ -74,6 +76,17 @@ fn binary_search_with_mid<T: Copy + PartialOrd>(
74
76
75
77
// ------------------- Equidistant binning --------------------
76
78
79
+ #[ inline( always) ]
80
+ fn sequential_add_mul ( start_val : f64 , add_val : f64 , mul : usize , epsilon : f64 ) -> f64 {
81
+ // start_val + add_val * mul will sometimes overflow when add_val * mul is
82
+ // larger than the largest positive f64 number.
83
+ // This code should not fail when: (f64::MAX - start_val) < (add_val * mul).
84
+ // -> Note that f64::MAX - start_val can be up to 2 * f64::MAX.
85
+ let mul_2: f64 = mul as f64 / 2.0 ;
86
+ // start_val + add_val * mul_2 as f64 + add_val * (mul - mul_2) as f64
87
+ start_val + add_val * mul_2 + add_val * mul_2 + epsilon
88
+ }
89
+
77
90
// --- Sequential version
78
91
79
92
pub ( crate ) fn get_equidistant_bin_idx_iterator < T > (
@@ -100,7 +113,8 @@ where
100
113
let start_idx: usize = idx; // Start index of the bin (previous end index)
101
114
102
115
// Update the search value
103
- let search_value: T = T :: from_f64 ( arr0 + val_step * ( i + 1 ) as f64 ) . unwrap ( ) ;
116
+ let search_value: T =
117
+ T :: from_f64 ( sequential_add_mul ( arr0, val_step, i + 1 , EPSILON ) ) . unwrap ( ) ;
104
118
if arr[ start_idx] >= search_value {
105
119
// If the first value of the bin is already >= the search value,
106
120
// then the bin is empty.
@@ -116,16 +130,6 @@ where
116
130
117
131
// --- Parallel version
118
132
119
- #[ inline( always) ]
120
- fn sequential_add_mul ( start_val : f64 , add_val : f64 , mul : usize ) -> f64 {
121
- // start_val + add_val * mul will sometimes overflow when add_val * mul is
122
- // larger than the largest positive f64 number.
123
- // This code should not fail when: (f64::MAX - start_val) < (add_val * mul).
124
- // -> Note that f64::MAX - start_val can be up to 2 * f64::MAX.
125
- let mul_2: usize = mul / 2 ;
126
- start_val + add_val * mul_2 as f64 + add_val * ( mul - mul_2) as f64
127
- }
128
-
129
133
pub ( crate ) fn get_equidistant_bin_idx_iterator_parallel < T > (
130
134
arr : & [ T ] ,
131
135
nb_bins : usize ,
@@ -139,16 +143,18 @@ where
139
143
let val_step: f64 =
140
144
( arr[ arr. len ( ) - 1 ] . as_ ( ) / nb_bins as f64 ) - ( arr[ 0 ] . as_ ( ) / nb_bins as f64 ) ;
141
145
let arr0: f64 = arr[ 0 ] . as_ ( ) ; // The first value of the array
142
- // 2. Compute the number of threads & bins per thread
146
+
147
+ // 2. Compute the number of threads & bins per thread
143
148
let n_threads = std:: cmp:: min ( POOL . current_num_threads ( ) , nb_bins) ;
144
149
let nb_bins_per_thread = nb_bins / n_threads;
145
150
let nb_bins_last_thread = nb_bins - nb_bins_per_thread * ( n_threads - 1 ) ;
151
+
146
152
// 3. Iterate over the number of threads
147
153
// -> for each thread perform the binary search sorted with moving left and
148
154
// yield the indices (using the same idea as for the sequential version)
149
155
( 0 ..n_threads) . into_par_iter ( ) . map ( move |i| {
150
156
// The moving index & value (for the thread)
151
- let arr0_thr: f64 = sequential_add_mul ( arr0, val_step, i * nb_bins_per_thread) ; // Search value
157
+ let arr0_thr: f64 = sequential_add_mul ( arr0, val_step, i * nb_bins_per_thread, EPSILON ) ; // Search value
152
158
let start_value: T = T :: from_f64 ( arr0_thr) . unwrap ( ) ;
153
159
// Search the start of the fist bin (of the thread)
154
160
let mut idx: usize = 0 ; // Index of the search value
@@ -198,6 +204,22 @@ mod tests {
198
204
#[ case( 101 ) ]
199
205
fn nb_bins ( #[ case] nb_bins : usize ) { }
200
206
207
+ #[ test]
208
+ fn test_sequential_add_mul ( ) {
209
+ assert_eq ! ( sequential_add_mul( 0.0 , 1.0 , 0 , 0.0 ) , 0.0 ) ;
210
+ assert_eq ! ( sequential_add_mul( -1.0 , 1.0 , 1 , 0.0 ) , 0.0 ) ;
211
+ assert_eq ! ( sequential_add_mul( -1.0 , 1.0 , 1 , EPSILON ) , EPSILON ) ;
212
+ // Really large values
213
+ assert_eq ! ( sequential_add_mul( 0.0 , 1.0 , 1_000_000 , 0.0 ) , 1_000_000.0 ) ;
214
+ assert ! ( sequential_add_mul( f64 :: MIN , f64 :: MAX / 2.0 , 3 , 0.0 ) < f64 :: MAX , ) ;
215
+ // TODO: the next tests fails due to very minor precision error
216
+ // -> however, this precision error is needed to avoid the issue with m4_with_x
217
+ // assert_eq!(
218
+ // sequential_add_mul(f64::MIN, f64::MAX / 2.0, 3, 0.0),
219
+ // f64::MIN + f64::MAX / 2.0 + f64::MAX
220
+ // );
221
+ }
222
+
201
223
#[ test]
202
224
fn test_search_sorted_identicial_to_np_linspace_searchsorted ( ) {
203
225
// Create a 0..9999 array
0 commit comments