predict-idlab
diff --git a/‎README.md
Lines changed: 1 addition & 3 deletions b/‎README.md
Lines changed: 1 addition & 3 deletions
diff --git a/‎downsample_rs/Cargo.toml
Lines changed: 2 additions & 1 deletion b/‎downsample_rs/Cargo.toml
Lines changed: 2 additions & 1 deletion
diff --git a/‎downsample_rs/src/helpers.rs
Lines changed: 57 additions & 0 deletions b/‎downsample_rs/src/helpers.rs
Lines changed: 57 additions & 0 deletions
diff --git a/‎downsample_rs/src/lib.rs
Lines changed: 2 additions & 1 deletion b/‎downsample_rs/src/lib.rs
Lines changed: 2 additions & 1 deletion
diff --git a/‎downsample_rs/src/lttb/scalar.rs
Lines changed: 119 additions & 74 deletions b/‎downsample_rs/src/lttb/scalar.rs
Lines changed: 119 additions & 74 deletions
@@ -7,7 +7,7 @@
 [![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml)
 <!-- TODO: codecov -->
 
-**📈 Time series downsampling** algorithms for visualization
+Extremely fast **📈 time series downsampling** for visualization, written in Rust.
 
 ## Features ✨
 
@@ -41,8 +41,6 @@
 
 ## Install
 
-> ❗🚨❗ This package is currently under development - correct installation is not yet guaranteed ❗🚨❗
-
 ```bash
 pip install tsdownsample
 ```
 
@@ -9,7 +9,8 @@ license = "MIT"
 [dependencies]
 # TODO: perhaps use polars?
 ndarray = {version = "0.15.6", default-features = false, features = ["rayon"] }
-argminmax = { version = "0.3" , features = ["half"] }
+argminmax = { version = "0.3.1" , features = ["half"] }
+# argminmax = { path = "../../argminmax" , features = ["half", "ndarray"] }
 half = { version = "2.1", default-features = false , features=["num-traits"], optional = true}
 num-traits = { version = "0.2.15", default-features = false }
 rayon = { version = "1.6.0", default-features = false }
 
@@ -0,0 +1,57 @@
+#[cfg(feature = "half")]
+use half::f16;
+use ndarray::ArrayView1;
+
+// ------------ AVERAGE
+
+// TODO: future work -> this can be optimized by using SIMD instructions (similar to the argminmax crate)
+// TODO: this implementation can overfow (but numpy does the same)
+
+// This trait implements the average function for all types that this crate
+// supports. It is used in the lttb algorithm.
+// We intend to use the same implementation for all types as is used in the
+// numpy (Python) library (- which uses add reduce):
+//  - f64 & f32: use the data type to calculate the average
+//  - f16: cast to f32 and calculate the average
+//  - signed & unsigned integers: cast to f64 and calculate the average
+// Note: the only difference with the numpy implementation is that this
+// implementation always returns a f64, while numpy returns f32 for f32 and f16
+// (however the calculation is done in f32 - only the result is casted to f64).
+// See more details: https://github.com/numpy/numpy/blob/8cec82012694571156e8d7696307c848a7603b4e/numpy/core/_methods.py#L164
+
+pub trait Average {
+    fn average(self) -> f64;
+}
+
+impl Average for ArrayView1<'_, f64> {
+    fn average(self) -> f64 {
+        self.mean().unwrap()
+    }
+}
+
+impl Average for ArrayView1<'_, f32> {
+    fn average(self) -> f64 {
+        self.mean().unwrap() as f64
+    }
+}
+
+#[cfg(feature = "half")]
+impl Average for ArrayView1<'_, f16> {
+    fn average(self) -> f64 {
+        self.fold(0f32, |acc, &x| acc + x.to_f32()) as f64 / self.len() as f64
+    }
+}
+
+macro_rules! impl_average {
+    ($($t:ty)*) => ($(
+        impl Average for ArrayView1<'_, $t> {
+            #[inline(always)]
+            fn average(self) -> f64 {
+                self.fold(0f64, |acc, &x| acc + x as f64) / self.len() as f64
+            }
+        }
+    )*)
+}
+
+// Implement for all signed and unsigned integers
+impl_average!(i8 i16 i32 i64 u8 u16 u32 u64);
@@ -6,5 +6,6 @@ pub mod minmaxlttb;
 pub use minmaxlttb::*;
 pub mod m4;
 pub use m4::*;
+pub(crate) mod helpers;
+pub(crate) mod searchsorted;
 pub(crate) mod types;
-pub(crate) mod utils;
 
@@ -1,8 +1,16 @@
+use super::super::helpers::Average;
 use super::super::types::Num;
 use ndarray::{Array1, ArrayView1};
 use num_traits::AsPrimitive;
 use std::cmp;
 
+#[inline(always)]
+fn f64_to_i64unsigned(v: f64) -> i64 {
+    // Transmute to i64 and mask out the sign bit
+    let v: i64 = unsafe { std::mem::transmute::<f64, i64>(v) };
+    v & 0x7FFF_FFFF_FFFF_FFFF
+}
+
 // ----------------------------------- NON-PARALLEL ------------------------------------
 
 // ----------- WITH X
@@ -11,67 +19,79 @@ pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
     x: ArrayView1<Tx>,
     y: ArrayView1<Ty>,
     n_out: usize,
-) -> Array1<usize> {
+) -> Array1<usize>
+where
+    for<'a> ArrayView1<'a, Ty>: Average,
+{
     assert_eq!(x.len(), y.len());
-    if n_out >= x.len() || n_out == 0 {
+    if n_out >= x.len() {
         return Array1::from((0..x.len()).collect::<Vec<usize>>());
     }
     assert!(n_out >= 3); // avoid division by 0
 
     // Bucket size. Leave room for start and end data points.
-    let every = (x.len() - 2) as f64 / (n_out - 2) as f64;
+    let every: f64 = (x.len() - 2) as f64 / (n_out - 2) as f64;
     // Initially a is the first point in the triangle.
-    let mut a = 0;
+    let mut a: usize = 0;
 
     let mut sampled_indices: Array1<usize> = Array1::<usize>::default(n_out);
 
+    let x_ptr = x.as_ptr();
+    let y_ptr = y.as_ptr();
+
     // Always add the first point
     sampled_indices[0] = 0;
 
     for i in 0..n_out - 2 {
         // Calculate point average for next bucket (containing c).
-        let mut avg_x: f64 = 0.0;
-        let mut avg_y: f64 = 0.0;
-
         let avg_range_start = (every * (i + 1) as f64) as usize + 1;
         let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, x.len());
 
-        for i in avg_range_start..avg_range_end {
-            avg_x += x[i].as_();
-            avg_y += y[i].as_();
-        }
-        // Slicing seems to be a lot slower
-        // let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
-        // let avg_y: Ty = y.slice(s![avg_range_start..avg_range_end]).sum();
-        let avg_x: f64 = avg_x / (avg_range_end - avg_range_start) as f64;
-        let avg_y: f64 = avg_y / (avg_range_end - avg_range_start) as f64;
+        // ArrayBase::slice is rather expensive..
+        let y_slice = unsafe {
+            ArrayView1::from_shape_ptr(avg_range_end - avg_range_start, y_ptr.add(avg_range_start))
+        };
+        let avg_y: f64 = y_slice.average();
+        // TODO: avg_y could be approximated argminmax instead of mean?
+        // TODO: below is faster than above, but not as accurate
+        // let avg_x: f64 = (x_slice[avg_range_end - 1].as_() + x_slice[avg_range_start].as_()) / 2.0;
+        let avg_x: f64 =
+            unsafe { (x.uget(avg_range_end - 1).as_() + x.uget(avg_range_start).as_()) / 2.0 };
 
         // Get the range for this bucket
         let range_offs = (every * i as f64) as usize + 1;
-        let range_to = (every * (i + 1) as f64) as usize + 1;
+        let range_to = avg_range_start; // = start of the next bucket
 
         // Point a
-        let point_ax = x[a].as_();
-        let point_ay = y[a].as_();
-
-        let mut max_area = -1.0;
-        for i in range_offs..range_to {
-            // Calculate triangle area over three buckets
-            let area = ((point_ax - avg_x) * (y[i].as_() - point_ay)
-                - (point_ax - x[i].as_()) * (avg_y - point_ay))
-                .abs();
-            if area > max_area {
-                max_area = area;
-                a = i;
-            }
-        }
-        // Vectorized implementation
-        // let point_ax: Tx = x[a];
-        // let point_ay: Ty = y[a];
-        // let ar_x: Vec<Tx> = x.slice(s![range_offs..range_to]).into_iter().map(|v| point_ax - *v).collect();
-        // let ar_y: Vec<Ty> = y.slice(s![range_offs..range_to]).into_iter().map(|v| *v - point_ay).collect();
-        // let max_idx: usize = (ar_x.iter().zip(ar_y.iter()).map(|(x, y)| (x.to_f64().unwrap() * avg_y - y.to_f64().unwrap() * avg_x).abs()).enumerate().max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()).unwrap().0) + range_offs;
-        // a = max_idx;
+        let point_ax = unsafe { x.uget(a).as_() };
+        let point_ay = unsafe { y.uget(a).as_() };
+
+        let d1 = point_ax - avg_x;
+        let d2 = avg_y - point_ay;
+        let offset: f64 = d1 * point_ay + d2 * point_ax;
+
+        let x_slice =
+            unsafe { std::slice::from_raw_parts(x_ptr.add(range_offs), range_to - range_offs) };
+        let y_slice =
+            unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) };
+        (_, a) = y_slice.iter().zip(x_slice.iter()).enumerate().fold(
+            (-1i64, a),
+            |(max_area, a), (i, (y_, x_))| {
+                // Calculate triangle area over three buckets
+                // -> area = d1 * (y_ - point_ay) - (point_ax - x_) * d2;
+                // let area = d1 * y[i].as_() + d2 * x[i].as_() - offset;
+                // let area = d1 * y_slice[i].as_() + d2 * x_slice[i].as_() - offset;
+                let area = d1 * y_.as_() + d2 * x_.as_() - offset;
+                let area = f64_to_i64unsigned(area); // this is faster than abs
+                if area > max_area {
+                    (area, i)
+                } else {
+                    (max_area, a)
+                }
+            },
+        );
+        a += range_offs;
+
         sampled_indices[i + 1] = a;
     }
 
@@ -83,67 +103,92 @@ pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
 
 // ----------- WITHOUT X
 
-pub fn lttb_without_x<Ty: Num + AsPrimitive<f64>>(
-    // TODO: why is this slower than the one with x?
-    y: ArrayView1<Ty>,
-    n_out: usize,
-) -> Array1<usize> {
-    if n_out >= y.len() || n_out == 0 {
+pub fn lttb_without_x<Ty: Num + AsPrimitive<f64>>(y: ArrayView1<Ty>, n_out: usize) -> Array1<usize>
+where
+    for<'a> ArrayView1<'a, Ty>: Average,
+{
+    if n_out >= y.len() {
         return Array1::from((0..y.len()).collect::<Vec<usize>>());
     }
     assert!(n_out >= 3); // avoid division by 0
 
     // Bucket size. Leave room for start and end data points.
-    let every = (y.len() - 2) as f64 / (n_out - 2) as f64;
+    let every: f64 = (y.len() - 2) as f64 / (n_out - 2) as f64;
     // Initially a is the first point in the triangle.
-    let mut a = 0;
+    let mut a: usize = 0;
 
     let mut sampled_indices: Array1<usize> = Array1::<usize>::default(n_out);
 
+    let y_ptr = y.as_ptr();
+
     // Always add the first point
     sampled_indices[0] = 0;
 
     for i in 0..n_out - 2 {
         // Calculate point average for next bucket (containing c).
-        let mut avg_y: f64 = 0.0;
-
         let avg_range_start = (every * (i + 1) as f64) as usize + 1;
         let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, y.len());
 
-        for i in avg_range_start..avg_range_end {
-            avg_y += y[i].as_();
-        }
-        // Slicing seems to be a lot slower
-        // let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
-        let avg_y: f64 = avg_y / (avg_range_end - avg_range_start) as f64;
+        // ArrayBase::slice is rather expensive..
+        let y_slice = unsafe {
+            ArrayView1::from_shape_ptr(avg_range_end - avg_range_start, y_ptr.add(avg_range_start))
+        };
+        let avg_y: f64 = y_slice.average();
         let avg_x: f64 = (avg_range_start + avg_range_end - 1) as f64 / 2.0;
 
         // Get the range for this bucket
         let range_offs = (every * i as f64) as usize + 1;
-        let range_to = (every * (i + 1) as f64) as usize + 1;
+        let range_to = avg_range_start; // = start of the next bucket
 
         // Point a
-        let point_ay = y[a].as_();
+        let point_ay = unsafe { y.uget(a).as_() };
         let point_ax = a as f64;
 
-        let mut max_area = -1.0;
-        for i in range_offs..range_to {
-            // Calculate triangle area over three buckets
-            let area = ((point_ax - avg_x) * (y[i].as_() - point_ay)
-                - (point_ax - i as f64) * (avg_y - point_ay))
-                .abs();
-            if area > max_area {
-                max_area = area;
-                a = i;
-            }
-        }
-        // Vectorized implementation
-        // let point_ax: Tx = x[a];
-        // let point_ay: Ty = y[a];
-        // let ar_x: Vec<Tx> = x.slice(s![range_offs..range_to]).into_iter().map(|v| point_ax - *v).collect();
-        // let ar_y: Vec<Ty> = y.slice(s![range_offs..range_to]).into_iter().map(|v| *v - point_ay).collect();
-        // let max_idx: usize = (ar_x.iter().zip(ar_y.iter()).map(|(x, y)| (x.to_f64().unwrap() * avg_y - y.to_f64().unwrap() * avg_x).abs()).enumerate().max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()).unwrap().0) + range_offs;
-        // a = max_idx;
+        let d1 = point_ax - avg_x;
+        let d2 = avg_y - point_ay;
+        let point_ax = point_ax - range_offs as f64;
+
+        // let mut max_area = -1i64;
+        let mut ax_x = point_ax; // point_ax - x[i]
+        let offset: f64 = d1 * point_ay;
+
+        // TODO: for some reason is this faster than the loop below -> check if this is true for other devices
+        let y_slice =
+            unsafe { ArrayView1::from_shape_ptr(range_to - range_offs, y_ptr.add(range_offs)) };
+        (_, a) = y_slice
+            .iter()
+            .enumerate()
+            .fold((-1i64, a), |(max_area, a), (i, y)| {
+                // Calculate triangle area over three buckets
+                // -> area: f64 = d1 * y[i].as_() - ax_x * d2;
+                let area: f64 = d1 * y.as_() - ax_x * d2 - offset;
+                let area: i64 = f64_to_i64unsigned(area);
+                ax_x -= 1.0;
+                if area > max_area {
+                    (area, i + range_offs)
+                } else {
+                    (max_area, a)
+                }
+            });
+
+        // let y_slice = unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) };
+        // (_, a) = y_slice
+        //     .iter()
+        //     .enumerate()
+        //     .fold((-1i64, a), |(max_area, a), (i, y_)| {
+        //         // Calculate triangle area over three buckets
+        //         // -> area: f64 = d1 * y[i].as_() - ax_x * d2;
+        //         let area: f64 = d1 * y_.as_() - ax_x * d2 - offset;
+        //         let area: i64 = f64_to_i64unsigned(area);
+        //         ax_x -= 1.0;
+        //         if area > max_area {
+        //             (area, i)
+        //         } else {
+        //             (max_area, a)
+        //         }
+        //     });
+        // a += range_offs;
+
         sampled_indices[i + 1] = a;
     }