@@ -26,30 +26,17 @@ where
2626 // User-supplied bins. Note these are actually bin edges. Check for monotonicity.
2727 // If we only have one edge, we have no bins.
2828 let bin_len = bins. len ( ) ;
29- // We also check for uniformity of bins. We declare uniformity if the difference
30- // between the largest and smallest bin is < 0.00001 the average bin size.
3129 if bin_len > 1 {
32- let mut smallest = bins[ 1 ] - bins[ 0 ] ;
33- let mut largest = smallest;
34- let mut avg_bin_size = smallest;
35- for i in 1 ..bins. len ( ) {
36- let d = bins[ i] - bins[ i - 1 ] ;
37- if d <= 0.0 {
30+ for i in 1 ..bin_len {
31+ if ( bins[ i] - bins[ i - 1 ] ) <= 0.0 {
3832 return Err ( PolarsError :: ComputeError (
3933 "bins must increase monotonically" . into ( ) ,
4034 ) ) ;
4135 }
42- if d > largest {
43- largest = d;
44- } else if d < smallest {
45- smallest = d;
46- }
47- avg_bin_size += d;
4836 }
49- let uniform = ( largest - smallest) / ( avg_bin_size / bin_len as f64 ) < 0.00001 ;
50- ( bins. to_vec ( ) , uniform)
37+ ( bins. to_vec ( ) , false )
5138 } else {
52- ( Vec :: < f64 > :: new ( ) , false ) // uniformity doesn't matter here
39+ ( Vec :: < f64 > :: new ( ) , false )
5340 }
5441 } ,
5542 ( bin_count, None ) => {
@@ -105,20 +92,22 @@ where
10592 let min_break: f64 = breaks[ 0 ] ;
10693 let max_break: f64 = breaks[ num_bins] ;
10794 let scale = num_bins as f64 / ( max_break - min_break) ;
95+ let max_idx = num_bins - 1 ;
10896
10997 for chunk in ca. downcast_iter ( ) {
11098 for item in chunk. non_null_values_iter ( ) {
11199 let item = item. to_f64 ( ) . unwrap ( ) ;
112100 if item > min_break && item <= max_break {
113- let idx = scale * ( item - min_break) ;
114- let idx_floor = idx. floor ( ) ;
115- let idx = if idx == idx_floor {
116- idx - 1.0
117- } else {
118- idx_floor
119- } ;
120- /* idx > (num_bins - 1) may happen due to floating point representation imprecision */
121- let idx = cmp:: min ( idx as usize , num_bins - 1 ) ;
101+ // idx > (num_bins - 1) may happen due to floating point representation imprecision
102+ let mut idx = cmp:: min ( ( scale * ( item - min_break) ) as usize , max_idx) ;
103+
104+ // Adjust for float imprecision providing idx > 1 ULP of the breaks
105+ if item <= breaks[ idx] {
106+ idx -= 1 ;
107+ } else if item > breaks[ idx + 1 ] {
108+ idx += 1 ;
109+ }
110+
122111 count[ idx] += 1 ;
123112 } else if item == min_break {
124113 count[ 0 ] += 1 ;
0 commit comments