Skip to content

Commit 108c557

Browse files
authored
⚡ optimize lttb implementation (#13)
* ⚡ faster LTTB implementation * 🧹 * ♻️ add average trait + refactoring * ⚡ optimize algorithms * 🚀 prepare for first main release * 🧹 * ✨ * 🙈 new alpha release * 🙈 new alpha release * 🧹
1 parent b4cfff0 commit 108c557

18 files changed

+276
-129
lines changed

README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
[![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml)
88
<!-- TODO: codecov -->
99

10-
**📈 Time series downsampling** algorithms for visualization
10+
Extremely fast **📈 time series downsampling** for visualization, written in Rust.
1111

1212
## Features ✨
1313

@@ -41,8 +41,6 @@
4141

4242
## Install
4343

44-
> ❗🚨❗ This package is currently under development - correct installation is not yet guaranteed ❗🚨❗
45-
4644
```bash
4745
pip install tsdownsample
4846
```

downsample_rs/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ license = "MIT"
99
[dependencies]
1010
# TODO: perhaps use polars?
1111
ndarray = {version = "0.15.6", default-features = false, features = ["rayon"] }
12-
argminmax = { version = "0.3" , features = ["half"] }
12+
argminmax = { version = "0.3.1" , features = ["half"] }
13+
# argminmax = { path = "../../argminmax" , features = ["half", "ndarray"] }
1314
half = { version = "2.1", default-features = false , features=["num-traits"], optional = true}
1415
num-traits = { version = "0.2.15", default-features = false }
1516
rayon = { version = "1.6.0", default-features = false }

downsample_rs/src/helpers.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#[cfg(feature = "half")]
2+
use half::f16;
3+
use ndarray::ArrayView1;
4+
5+
// ------------ AVERAGE
6+
7+
// TODO: future work -> this can be optimized by using SIMD instructions (similar to the argminmax crate)
8+
// TODO: this implementation can overfow (but numpy does the same)
9+
10+
// This trait implements the average function for all types that this crate
11+
// supports. It is used in the lttb algorithm.
12+
// We intend to use the same implementation for all types as is used in the
13+
// numpy (Python) library (- which uses add reduce):
14+
// - f64 & f32: use the data type to calculate the average
15+
// - f16: cast to f32 and calculate the average
16+
// - signed & unsigned integers: cast to f64 and calculate the average
17+
// Note: the only difference with the numpy implementation is that this
18+
// implementation always returns a f64, while numpy returns f32 for f32 and f16
19+
// (however the calculation is done in f32 - only the result is casted to f64).
20+
// See more details: https://github.com/numpy/numpy/blob/8cec82012694571156e8d7696307c848a7603b4e/numpy/core/_methods.py#L164
21+
22+
pub trait Average {
23+
fn average(self) -> f64;
24+
}
25+
26+
impl Average for ArrayView1<'_, f64> {
27+
fn average(self) -> f64 {
28+
self.mean().unwrap()
29+
}
30+
}
31+
32+
impl Average for ArrayView1<'_, f32> {
33+
fn average(self) -> f64 {
34+
self.mean().unwrap() as f64
35+
}
36+
}
37+
38+
#[cfg(feature = "half")]
39+
impl Average for ArrayView1<'_, f16> {
40+
fn average(self) -> f64 {
41+
self.fold(0f32, |acc, &x| acc + x.to_f32()) as f64 / self.len() as f64
42+
}
43+
}
44+
45+
macro_rules! impl_average {
46+
($($t:ty)*) => ($(
47+
impl Average for ArrayView1<'_, $t> {
48+
#[inline(always)]
49+
fn average(self) -> f64 {
50+
self.fold(0f64, |acc, &x| acc + x as f64) / self.len() as f64
51+
}
52+
}
53+
)*)
54+
}
55+
56+
// Implement for all signed and unsigned integers
57+
impl_average!(i8 i16 i32 i64 u8 u16 u32 u64);

downsample_rs/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ pub mod minmaxlttb;
66
pub use minmaxlttb::*;
77
pub mod m4;
88
pub use m4::*;
9+
pub(crate) mod helpers;
10+
pub(crate) mod searchsorted;
911
pub(crate) mod types;
10-
pub(crate) mod utils;

downsample_rs/src/lttb/scalar.rs

Lines changed: 119 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
1+
use super::super::helpers::Average;
12
use super::super::types::Num;
23
use ndarray::{Array1, ArrayView1};
34
use num_traits::AsPrimitive;
45
use std::cmp;
56

7+
#[inline(always)]
8+
fn f64_to_i64unsigned(v: f64) -> i64 {
9+
// Transmute to i64 and mask out the sign bit
10+
let v: i64 = unsafe { std::mem::transmute::<f64, i64>(v) };
11+
v & 0x7FFF_FFFF_FFFF_FFFF
12+
}
13+
614
// ----------------------------------- NON-PARALLEL ------------------------------------
715

816
// ----------- WITH X
@@ -11,67 +19,79 @@ pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
1119
x: ArrayView1<Tx>,
1220
y: ArrayView1<Ty>,
1321
n_out: usize,
14-
) -> Array1<usize> {
22+
) -> Array1<usize>
23+
where
24+
for<'a> ArrayView1<'a, Ty>: Average,
25+
{
1526
assert_eq!(x.len(), y.len());
16-
if n_out >= x.len() || n_out == 0 {
27+
if n_out >= x.len() {
1728
return Array1::from((0..x.len()).collect::<Vec<usize>>());
1829
}
1930
assert!(n_out >= 3); // avoid division by 0
2031

2132
// Bucket size. Leave room for start and end data points.
22-
let every = (x.len() - 2) as f64 / (n_out - 2) as f64;
33+
let every: f64 = (x.len() - 2) as f64 / (n_out - 2) as f64;
2334
// Initially a is the first point in the triangle.
24-
let mut a = 0;
35+
let mut a: usize = 0;
2536

2637
let mut sampled_indices: Array1<usize> = Array1::<usize>::default(n_out);
2738

39+
let x_ptr = x.as_ptr();
40+
let y_ptr = y.as_ptr();
41+
2842
// Always add the first point
2943
sampled_indices[0] = 0;
3044

3145
for i in 0..n_out - 2 {
3246
// Calculate point average for next bucket (containing c).
33-
let mut avg_x: f64 = 0.0;
34-
let mut avg_y: f64 = 0.0;
35-
3647
let avg_range_start = (every * (i + 1) as f64) as usize + 1;
3748
let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, x.len());
3849

39-
for i in avg_range_start..avg_range_end {
40-
avg_x += x[i].as_();
41-
avg_y += y[i].as_();
42-
}
43-
// Slicing seems to be a lot slower
44-
// let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
45-
// let avg_y: Ty = y.slice(s![avg_range_start..avg_range_end]).sum();
46-
let avg_x: f64 = avg_x / (avg_range_end - avg_range_start) as f64;
47-
let avg_y: f64 = avg_y / (avg_range_end - avg_range_start) as f64;
50+
// ArrayBase::slice is rather expensive..
51+
let y_slice = unsafe {
52+
ArrayView1::from_shape_ptr(avg_range_end - avg_range_start, y_ptr.add(avg_range_start))
53+
};
54+
let avg_y: f64 = y_slice.average();
55+
// TODO: avg_y could be approximated argminmax instead of mean?
56+
// TODO: below is faster than above, but not as accurate
57+
// let avg_x: f64 = (x_slice[avg_range_end - 1].as_() + x_slice[avg_range_start].as_()) / 2.0;
58+
let avg_x: f64 =
59+
unsafe { (x.uget(avg_range_end - 1).as_() + x.uget(avg_range_start).as_()) / 2.0 };
4860

4961
// Get the range for this bucket
5062
let range_offs = (every * i as f64) as usize + 1;
51-
let range_to = (every * (i + 1) as f64) as usize + 1;
63+
let range_to = avg_range_start; // = start of the next bucket
5264

5365
// Point a
54-
let point_ax = x[a].as_();
55-
let point_ay = y[a].as_();
56-
57-
let mut max_area = -1.0;
58-
for i in range_offs..range_to {
59-
// Calculate triangle area over three buckets
60-
let area = ((point_ax - avg_x) * (y[i].as_() - point_ay)
61-
- (point_ax - x[i].as_()) * (avg_y - point_ay))
62-
.abs();
63-
if area > max_area {
64-
max_area = area;
65-
a = i;
66-
}
67-
}
68-
// Vectorized implementation
69-
// let point_ax: Tx = x[a];
70-
// let point_ay: Ty = y[a];
71-
// let ar_x: Vec<Tx> = x.slice(s![range_offs..range_to]).into_iter().map(|v| point_ax - *v).collect();
72-
// let ar_y: Vec<Ty> = y.slice(s![range_offs..range_to]).into_iter().map(|v| *v - point_ay).collect();
73-
// let max_idx: usize = (ar_x.iter().zip(ar_y.iter()).map(|(x, y)| (x.to_f64().unwrap() * avg_y - y.to_f64().unwrap() * avg_x).abs()).enumerate().max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()).unwrap().0) + range_offs;
74-
// a = max_idx;
66+
let point_ax = unsafe { x.uget(a).as_() };
67+
let point_ay = unsafe { y.uget(a).as_() };
68+
69+
let d1 = point_ax - avg_x;
70+
let d2 = avg_y - point_ay;
71+
let offset: f64 = d1 * point_ay + d2 * point_ax;
72+
73+
let x_slice =
74+
unsafe { std::slice::from_raw_parts(x_ptr.add(range_offs), range_to - range_offs) };
75+
let y_slice =
76+
unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) };
77+
(_, a) = y_slice.iter().zip(x_slice.iter()).enumerate().fold(
78+
(-1i64, a),
79+
|(max_area, a), (i, (y_, x_))| {
80+
// Calculate triangle area over three buckets
81+
// -> area = d1 * (y_ - point_ay) - (point_ax - x_) * d2;
82+
// let area = d1 * y[i].as_() + d2 * x[i].as_() - offset;
83+
// let area = d1 * y_slice[i].as_() + d2 * x_slice[i].as_() - offset;
84+
let area = d1 * y_.as_() + d2 * x_.as_() - offset;
85+
let area = f64_to_i64unsigned(area); // this is faster than abs
86+
if area > max_area {
87+
(area, i)
88+
} else {
89+
(max_area, a)
90+
}
91+
},
92+
);
93+
a += range_offs;
94+
7595
sampled_indices[i + 1] = a;
7696
}
7797

@@ -83,67 +103,92 @@ pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
83103

84104
// ----------- WITHOUT X
85105

86-
pub fn lttb_without_x<Ty: Num + AsPrimitive<f64>>(
87-
// TODO: why is this slower than the one with x?
88-
y: ArrayView1<Ty>,
89-
n_out: usize,
90-
) -> Array1<usize> {
91-
if n_out >= y.len() || n_out == 0 {
106+
pub fn lttb_without_x<Ty: Num + AsPrimitive<f64>>(y: ArrayView1<Ty>, n_out: usize) -> Array1<usize>
107+
where
108+
for<'a> ArrayView1<'a, Ty>: Average,
109+
{
110+
if n_out >= y.len() {
92111
return Array1::from((0..y.len()).collect::<Vec<usize>>());
93112
}
94113
assert!(n_out >= 3); // avoid division by 0
95114

96115
// Bucket size. Leave room for start and end data points.
97-
let every = (y.len() - 2) as f64 / (n_out - 2) as f64;
116+
let every: f64 = (y.len() - 2) as f64 / (n_out - 2) as f64;
98117
// Initially a is the first point in the triangle.
99-
let mut a = 0;
118+
let mut a: usize = 0;
100119

101120
let mut sampled_indices: Array1<usize> = Array1::<usize>::default(n_out);
102121

122+
let y_ptr = y.as_ptr();
123+
103124
// Always add the first point
104125
sampled_indices[0] = 0;
105126

106127
for i in 0..n_out - 2 {
107128
// Calculate point average for next bucket (containing c).
108-
let mut avg_y: f64 = 0.0;
109-
110129
let avg_range_start = (every * (i + 1) as f64) as usize + 1;
111130
let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, y.len());
112131

113-
for i in avg_range_start..avg_range_end {
114-
avg_y += y[i].as_();
115-
}
116-
// Slicing seems to be a lot slower
117-
// let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
118-
let avg_y: f64 = avg_y / (avg_range_end - avg_range_start) as f64;
132+
// ArrayBase::slice is rather expensive..
133+
let y_slice = unsafe {
134+
ArrayView1::from_shape_ptr(avg_range_end - avg_range_start, y_ptr.add(avg_range_start))
135+
};
136+
let avg_y: f64 = y_slice.average();
119137
let avg_x: f64 = (avg_range_start + avg_range_end - 1) as f64 / 2.0;
120138

121139
// Get the range for this bucket
122140
let range_offs = (every * i as f64) as usize + 1;
123-
let range_to = (every * (i + 1) as f64) as usize + 1;
141+
let range_to = avg_range_start; // = start of the next bucket
124142

125143
// Point a
126-
let point_ay = y[a].as_();
144+
let point_ay = unsafe { y.uget(a).as_() };
127145
let point_ax = a as f64;
128146

129-
let mut max_area = -1.0;
130-
for i in range_offs..range_to {
131-
// Calculate triangle area over three buckets
132-
let area = ((point_ax - avg_x) * (y[i].as_() - point_ay)
133-
- (point_ax - i as f64) * (avg_y - point_ay))
134-
.abs();
135-
if area > max_area {
136-
max_area = area;
137-
a = i;
138-
}
139-
}
140-
// Vectorized implementation
141-
// let point_ax: Tx = x[a];
142-
// let point_ay: Ty = y[a];
143-
// let ar_x: Vec<Tx> = x.slice(s![range_offs..range_to]).into_iter().map(|v| point_ax - *v).collect();
144-
// let ar_y: Vec<Ty> = y.slice(s![range_offs..range_to]).into_iter().map(|v| *v - point_ay).collect();
145-
// let max_idx: usize = (ar_x.iter().zip(ar_y.iter()).map(|(x, y)| (x.to_f64().unwrap() * avg_y - y.to_f64().unwrap() * avg_x).abs()).enumerate().max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()).unwrap().0) + range_offs;
146-
// a = max_idx;
147+
let d1 = point_ax - avg_x;
148+
let d2 = avg_y - point_ay;
149+
let point_ax = point_ax - range_offs as f64;
150+
151+
// let mut max_area = -1i64;
152+
let mut ax_x = point_ax; // point_ax - x[i]
153+
let offset: f64 = d1 * point_ay;
154+
155+
// TODO: for some reason is this faster than the loop below -> check if this is true for other devices
156+
let y_slice =
157+
unsafe { ArrayView1::from_shape_ptr(range_to - range_offs, y_ptr.add(range_offs)) };
158+
(_, a) = y_slice
159+
.iter()
160+
.enumerate()
161+
.fold((-1i64, a), |(max_area, a), (i, y)| {
162+
// Calculate triangle area over three buckets
163+
// -> area: f64 = d1 * y[i].as_() - ax_x * d2;
164+
let area: f64 = d1 * y.as_() - ax_x * d2 - offset;
165+
let area: i64 = f64_to_i64unsigned(area);
166+
ax_x -= 1.0;
167+
if area > max_area {
168+
(area, i + range_offs)
169+
} else {
170+
(max_area, a)
171+
}
172+
});
173+
174+
// let y_slice = unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) };
175+
// (_, a) = y_slice
176+
// .iter()
177+
// .enumerate()
178+
// .fold((-1i64, a), |(max_area, a), (i, y_)| {
179+
// // Calculate triangle area over three buckets
180+
// // -> area: f64 = d1 * y[i].as_() - ax_x * d2;
181+
// let area: f64 = d1 * y_.as_() - ax_x * d2 - offset;
182+
// let area: i64 = f64_to_i64unsigned(area);
183+
// ax_x -= 1.0;
184+
// if area > max_area {
185+
// (area, i)
186+
// } else {
187+
// (max_area, a)
188+
// }
189+
// });
190+
// a += range_offs;
191+
147192
sampled_indices[i + 1] = a;
148193
}
149194

0 commit comments

Comments
 (0)