1
+ use super :: super :: helpers:: Average ;
1
2
use super :: super :: types:: Num ;
2
3
use ndarray:: { Array1 , ArrayView1 } ;
3
4
use num_traits:: AsPrimitive ;
4
5
use std:: cmp;
5
6
7
+ #[ inline( always) ]
8
+ fn f64_to_i64unsigned ( v : f64 ) -> i64 {
9
+ // Transmute to i64 and mask out the sign bit
10
+ let v: i64 = unsafe { std:: mem:: transmute :: < f64 , i64 > ( v) } ;
11
+ v & 0x7FFF_FFFF_FFFF_FFFF
12
+ }
13
+
6
14
// ----------------------------------- NON-PARALLEL ------------------------------------
7
15
8
16
// ----------- WITH X
@@ -11,67 +19,79 @@ pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
11
19
x : ArrayView1 < Tx > ,
12
20
y : ArrayView1 < Ty > ,
13
21
n_out : usize ,
14
- ) -> Array1 < usize > {
22
+ ) -> Array1 < usize >
23
+ where
24
+ for < ' a > ArrayView1 < ' a , Ty > : Average ,
25
+ {
15
26
assert_eq ! ( x. len( ) , y. len( ) ) ;
16
- if n_out >= x. len ( ) || n_out == 0 {
27
+ if n_out >= x. len ( ) {
17
28
return Array1 :: from ( ( 0 ..x. len ( ) ) . collect :: < Vec < usize > > ( ) ) ;
18
29
}
19
30
assert ! ( n_out >= 3 ) ; // avoid division by 0
20
31
21
32
// Bucket size. Leave room for start and end data points.
22
- let every = ( x. len ( ) - 2 ) as f64 / ( n_out - 2 ) as f64 ;
33
+ let every: f64 = ( x. len ( ) - 2 ) as f64 / ( n_out - 2 ) as f64 ;
23
34
// Initially a is the first point in the triangle.
24
- let mut a = 0 ;
35
+ let mut a: usize = 0 ;
25
36
26
37
let mut sampled_indices: Array1 < usize > = Array1 :: < usize > :: default ( n_out) ;
27
38
39
+ let x_ptr = x. as_ptr ( ) ;
40
+ let y_ptr = y. as_ptr ( ) ;
41
+
28
42
// Always add the first point
29
43
sampled_indices[ 0 ] = 0 ;
30
44
31
45
for i in 0 ..n_out - 2 {
32
46
// Calculate point average for next bucket (containing c).
33
- let mut avg_x: f64 = 0.0 ;
34
- let mut avg_y: f64 = 0.0 ;
35
-
36
47
let avg_range_start = ( every * ( i + 1 ) as f64 ) as usize + 1 ;
37
48
let avg_range_end = cmp:: min ( ( every * ( i + 2 ) as f64 ) as usize + 1 , x. len ( ) ) ;
38
49
39
- for i in avg_range_start..avg_range_end {
40
- avg_x += x[ i] . as_ ( ) ;
41
- avg_y += y[ i] . as_ ( ) ;
42
- }
43
- // Slicing seems to be a lot slower
44
- // let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
45
- // let avg_y: Ty = y.slice(s![avg_range_start..avg_range_end]).sum();
46
- let avg_x: f64 = avg_x / ( avg_range_end - avg_range_start) as f64 ;
47
- let avg_y: f64 = avg_y / ( avg_range_end - avg_range_start) as f64 ;
50
+ // ArrayBase::slice is rather expensive..
51
+ let y_slice = unsafe {
52
+ ArrayView1 :: from_shape_ptr ( avg_range_end - avg_range_start, y_ptr. add ( avg_range_start) )
53
+ } ;
54
+ let avg_y: f64 = y_slice. average ( ) ;
55
+ // TODO: avg_y could be approximated argminmax instead of mean?
56
+ // TODO: below is faster than above, but not as accurate
57
+ // let avg_x: f64 = (x_slice[avg_range_end - 1].as_() + x_slice[avg_range_start].as_()) / 2.0;
58
+ let avg_x: f64 =
59
+ unsafe { ( x. uget ( avg_range_end - 1 ) . as_ ( ) + x. uget ( avg_range_start) . as_ ( ) ) / 2.0 } ;
48
60
49
61
// Get the range for this bucket
50
62
let range_offs = ( every * i as f64 ) as usize + 1 ;
51
- let range_to = ( every * ( i + 1 ) as f64 ) as usize + 1 ;
63
+ let range_to = avg_range_start ; // = start of the next bucket
52
64
53
65
// Point a
54
- let point_ax = x[ a] . as_ ( ) ;
55
- let point_ay = y[ a] . as_ ( ) ;
56
-
57
- let mut max_area = -1.0 ;
58
- for i in range_offs..range_to {
59
- // Calculate triangle area over three buckets
60
- let area = ( ( point_ax - avg_x) * ( y[ i] . as_ ( ) - point_ay)
61
- - ( point_ax - x[ i] . as_ ( ) ) * ( avg_y - point_ay) )
62
- . abs ( ) ;
63
- if area > max_area {
64
- max_area = area;
65
- a = i;
66
- }
67
- }
68
- // Vectorized implementation
69
- // let point_ax: Tx = x[a];
70
- // let point_ay: Ty = y[a];
71
- // let ar_x: Vec<Tx> = x.slice(s![range_offs..range_to]).into_iter().map(|v| point_ax - *v).collect();
72
- // let ar_y: Vec<Ty> = y.slice(s![range_offs..range_to]).into_iter().map(|v| *v - point_ay).collect();
73
- // let max_idx: usize = (ar_x.iter().zip(ar_y.iter()).map(|(x, y)| (x.to_f64().unwrap() * avg_y - y.to_f64().unwrap() * avg_x).abs()).enumerate().max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()).unwrap().0) + range_offs;
74
- // a = max_idx;
66
+ let point_ax = unsafe { x. uget ( a) . as_ ( ) } ;
67
+ let point_ay = unsafe { y. uget ( a) . as_ ( ) } ;
68
+
69
+ let d1 = point_ax - avg_x;
70
+ let d2 = avg_y - point_ay;
71
+ let offset: f64 = d1 * point_ay + d2 * point_ax;
72
+
73
+ let x_slice =
74
+ unsafe { std:: slice:: from_raw_parts ( x_ptr. add ( range_offs) , range_to - range_offs) } ;
75
+ let y_slice =
76
+ unsafe { std:: slice:: from_raw_parts ( y_ptr. add ( range_offs) , range_to - range_offs) } ;
77
+ ( _, a) = y_slice. iter ( ) . zip ( x_slice. iter ( ) ) . enumerate ( ) . fold (
78
+ ( -1i64 , a) ,
79
+ |( max_area, a) , ( i, ( y_, x_) ) | {
80
+ // Calculate triangle area over three buckets
81
+ // -> area = d1 * (y_ - point_ay) - (point_ax - x_) * d2;
82
+ // let area = d1 * y[i].as_() + d2 * x[i].as_() - offset;
83
+ // let area = d1 * y_slice[i].as_() + d2 * x_slice[i].as_() - offset;
84
+ let area = d1 * y_. as_ ( ) + d2 * x_. as_ ( ) - offset;
85
+ let area = f64_to_i64unsigned ( area) ; // this is faster than abs
86
+ if area > max_area {
87
+ ( area, i)
88
+ } else {
89
+ ( max_area, a)
90
+ }
91
+ } ,
92
+ ) ;
93
+ a += range_offs;
94
+
75
95
sampled_indices[ i + 1 ] = a;
76
96
}
77
97
@@ -83,67 +103,92 @@ pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
83
103
84
104
// ----------- WITHOUT X
85
105
86
- pub fn lttb_without_x < Ty : Num + AsPrimitive < f64 > > (
87
- // TODO: why is this slower than the one with x?
88
- y : ArrayView1 < Ty > ,
89
- n_out : usize ,
90
- ) -> Array1 < usize > {
91
- if n_out >= y. len ( ) || n_out == 0 {
106
+ pub fn lttb_without_x < Ty : Num + AsPrimitive < f64 > > ( y : ArrayView1 < Ty > , n_out : usize ) -> Array1 < usize >
107
+ where
108
+ for < ' a > ArrayView1 < ' a , Ty > : Average ,
109
+ {
110
+ if n_out >= y. len ( ) {
92
111
return Array1 :: from ( ( 0 ..y. len ( ) ) . collect :: < Vec < usize > > ( ) ) ;
93
112
}
94
113
assert ! ( n_out >= 3 ) ; // avoid division by 0
95
114
96
115
// Bucket size. Leave room for start and end data points.
97
- let every = ( y. len ( ) - 2 ) as f64 / ( n_out - 2 ) as f64 ;
116
+ let every: f64 = ( y. len ( ) - 2 ) as f64 / ( n_out - 2 ) as f64 ;
98
117
// Initially a is the first point in the triangle.
99
- let mut a = 0 ;
118
+ let mut a: usize = 0 ;
100
119
101
120
let mut sampled_indices: Array1 < usize > = Array1 :: < usize > :: default ( n_out) ;
102
121
122
+ let y_ptr = y. as_ptr ( ) ;
123
+
103
124
// Always add the first point
104
125
sampled_indices[ 0 ] = 0 ;
105
126
106
127
for i in 0 ..n_out - 2 {
107
128
// Calculate point average for next bucket (containing c).
108
- let mut avg_y: f64 = 0.0 ;
109
-
110
129
let avg_range_start = ( every * ( i + 1 ) as f64 ) as usize + 1 ;
111
130
let avg_range_end = cmp:: min ( ( every * ( i + 2 ) as f64 ) as usize + 1 , y. len ( ) ) ;
112
131
113
- for i in avg_range_start..avg_range_end {
114
- avg_y += y[ i] . as_ ( ) ;
115
- }
116
- // Slicing seems to be a lot slower
117
- // let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
118
- let avg_y: f64 = avg_y / ( avg_range_end - avg_range_start) as f64 ;
132
+ // ArrayBase::slice is rather expensive..
133
+ let y_slice = unsafe {
134
+ ArrayView1 :: from_shape_ptr ( avg_range_end - avg_range_start, y_ptr. add ( avg_range_start) )
135
+ } ;
136
+ let avg_y: f64 = y_slice. average ( ) ;
119
137
let avg_x: f64 = ( avg_range_start + avg_range_end - 1 ) as f64 / 2.0 ;
120
138
121
139
// Get the range for this bucket
122
140
let range_offs = ( every * i as f64 ) as usize + 1 ;
123
- let range_to = ( every * ( i + 1 ) as f64 ) as usize + 1 ;
141
+ let range_to = avg_range_start ; // = start of the next bucket
124
142
125
143
// Point a
126
- let point_ay = y [ a ] . as_ ( ) ;
144
+ let point_ay = unsafe { y . uget ( a ) . as_ ( ) } ;
127
145
let point_ax = a as f64 ;
128
146
129
- let mut max_area = -1.0 ;
130
- for i in range_offs..range_to {
131
- // Calculate triangle area over three buckets
132
- let area = ( ( point_ax - avg_x) * ( y[ i] . as_ ( ) - point_ay)
133
- - ( point_ax - i as f64 ) * ( avg_y - point_ay) )
134
- . abs ( ) ;
135
- if area > max_area {
136
- max_area = area;
137
- a = i;
138
- }
139
- }
140
- // Vectorized implementation
141
- // let point_ax: Tx = x[a];
142
- // let point_ay: Ty = y[a];
143
- // let ar_x: Vec<Tx> = x.slice(s![range_offs..range_to]).into_iter().map(|v| point_ax - *v).collect();
144
- // let ar_y: Vec<Ty> = y.slice(s![range_offs..range_to]).into_iter().map(|v| *v - point_ay).collect();
145
- // let max_idx: usize = (ar_x.iter().zip(ar_y.iter()).map(|(x, y)| (x.to_f64().unwrap() * avg_y - y.to_f64().unwrap() * avg_x).abs()).enumerate().max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()).unwrap().0) + range_offs;
146
- // a = max_idx;
147
+ let d1 = point_ax - avg_x;
148
+ let d2 = avg_y - point_ay;
149
+ let point_ax = point_ax - range_offs as f64 ;
150
+
151
+ // let mut max_area = -1i64;
152
+ let mut ax_x = point_ax; // point_ax - x[i]
153
+ let offset: f64 = d1 * point_ay;
154
+
155
+ // TODO: for some reason is this faster than the loop below -> check if this is true for other devices
156
+ let y_slice =
157
+ unsafe { ArrayView1 :: from_shape_ptr ( range_to - range_offs, y_ptr. add ( range_offs) ) } ;
158
+ ( _, a) = y_slice
159
+ . iter ( )
160
+ . enumerate ( )
161
+ . fold ( ( -1i64 , a) , |( max_area, a) , ( i, y) | {
162
+ // Calculate triangle area over three buckets
163
+ // -> area: f64 = d1 * y[i].as_() - ax_x * d2;
164
+ let area: f64 = d1 * y. as_ ( ) - ax_x * d2 - offset;
165
+ let area: i64 = f64_to_i64unsigned ( area) ;
166
+ ax_x -= 1.0 ;
167
+ if area > max_area {
168
+ ( area, i + range_offs)
169
+ } else {
170
+ ( max_area, a)
171
+ }
172
+ } ) ;
173
+
174
+ // let y_slice = unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) };
175
+ // (_, a) = y_slice
176
+ // .iter()
177
+ // .enumerate()
178
+ // .fold((-1i64, a), |(max_area, a), (i, y_)| {
179
+ // // Calculate triangle area over three buckets
180
+ // // -> area: f64 = d1 * y[i].as_() - ax_x * d2;
181
+ // let area: f64 = d1 * y_.as_() - ax_x * d2 - offset;
182
+ // let area: i64 = f64_to_i64unsigned(area);
183
+ // ax_x -= 1.0;
184
+ // if area > max_area {
185
+ // (area, i)
186
+ // } else {
187
+ // (max_area, a)
188
+ // }
189
+ // });
190
+ // a += range_offs;
191
+
147
192
sampled_indices[ i + 1 ] = a;
148
193
}
149
194
0 commit comments