Merge pull request #1 from remydubois/feature/argpartition

remydubois · web-flow · commit aa9dc73f2deb · 2021-03-31T20:34:07.000+02:00
Feature/argpartition
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # LSNMS
 Speeding up Non Maximum Suppresion ran on very large images by a several folds factor, using a sparse implementation of NMS.  
-This project describes a "sparse" implementation of Non Maximum Suppression, useful in the case of very high dimensional images data, when the amount of predicted instances to prune becomes considerable (> 10,000 objects).
+This project becomes useful in the case of very high dimensional images data, when the amount of predicted instances to prune becomes considerable (> 10,000 objects).
 
 <p float="center">
   <center><img src="https://raw.githubusercontent.com/remydubois/lsnms/main/assets/images/timings_medium_image.png?token=AEJMSVNEIBF2PMWIVASMKATAMIKHS" width="700" />
@@ -121,22 +121,19 @@ tree = BallTree(data, leaf_size=16)
 
 
 ## Performances
-The BallTree implemented in this repo was timed against scikit-learn's `neighbors` one.
+The BallTree implemented in this repo was timed against scikit-learn's `neighbors` one. Note that runtimes are not fair to compare since sklearn implementation allows for node to contain
+between `leaf_size` and `2 * leaf_size` datapoints. To account for this, I timed my implementation against sklearn tree with `int(0.67 * leaf_size)`  as `leaf_size`.
 ### Tree building time
 <p float="center">
   <center><img src="https://github.com/remydubois/lsnms/blob/main/assets/images/building_timings.png" width="700" />
   <figcaption>Trees building times comparison</figcaption></center>
 </p>
 
-The (minor) slow down observed against sklearn implementation is probably related to the node-splitting process. I used the median cutoff (compute median, then assign datapoints depending on their value above or below median) but it is suboptimal: a proper pivot algorithm could easily be implemented.
 
 ### Tree query time
 <p float="center">
   <center><img src="https://github.com/remydubois/lsnms/blob/main/assets/images/query_timings.png" width="700" />
   <figcaption>Trees query times comparison (single query, radius=100) in a 1000x1000 space</figcaption></center>
 </p>
 
-Query time are somehow identical. However, my implementation does seem to not scale as well as scikit-learn's one, a minor slowdown could be observed for extremely large datasets (million-ish data points).
-
-### Warnings
-Because input data needs to be typed: the dimensionality of the process is fixed in advance. This BallTree implementation can not work on 3D and above data (although it is a one-liner fix).
+Query time are somehow identical.
diff --git a/assets/images/building_timings.png b/assets/images/building_timings.png
diff --git a/assets/images/query_timings.png b/assets/images/query_timings.png
diff --git a/changelog.md b/changelog.md
@@ -0,0 +1,19 @@
+Changelog
+=========
+
+
+(unreleased)
+------------
+- - added changelog, upgraded to version 0.1.1. [Rémy Dubois]
+- - black. [Rémy Dubois]
+- - improved the node splitting method - Updated the runtimes comparison
+  versus sklearn - fixed little typos in the readme - fixed typechecks
+  in the trees. [Rémy Dubois]
+- -readme. [Rémy Dubois]
+- Typo + fixed image urls. [Rémy Dubois]
+- - poetry. [Rémy Dubois]
+- - poetry. [Rémy Dubois]
+- - poetry. [Rémy Dubois]
+- Initial commit. [Rémy Dubois]
+
+
diff --git a/lsnms/balltree.py b/lsnms/balltree.py
@@ -54,6 +54,9 @@ def __init__(self, data, leaf_size=16, indices=None):
         # Stores the data
         self.data = data
 
+        if len(self.data) == 0:
+            raise ValueError("Empty data")
+
         # Stores indices of each data point
         if indices is None:
             self.indices = np.arange(len(data))
@@ -139,8 +142,8 @@ def query_radius(self, X, max_radius):
         """
         if X.ndim > 1:
             raise ValueError("query_radius only works on single query point.")
-        if len(X) != 2:
-            raise ValueError("Query point must be two-dimensional")
+        if X.shape[-1] != self.dimensionality:
+            raise ValueError("Tree and query dimensionality do not match")
         # Initialize empty list of int64
         # Needs to be typed
         buffer = [0][:0]
diff --git a/lsnms/kdtree.py b/lsnms/kdtree.py
@@ -57,6 +57,9 @@ def __init__(self, data, leaf_size=16, axis=0, indices=None):
         self.axis = axis
         self.dimensionality = data.shape[-1]
 
+        if len(self.data) == 0:
+            raise ValueError("Empty data")
+
         # Stores indices of each data point
         if indices is None:
             self.indices = np.arange(len(data))
@@ -145,8 +148,8 @@ def query_radius(self, X, max_radius):
         """
         if X.ndim > 1:
             raise ValueError("query_radius only works on single query point.")
-        if len(X) != 2:
-            raise ValueError("Query point must be two-dimensional")
+        if X.shape[-1] != self.dimensionality:
+            raise ValueError("Tree and query dimensionality do not match")
         # Initialize empty list of int64
         # Needs to be typed
         buffer = [0][:0]
diff --git a/lsnms/util.py b/lsnms/util.py
@@ -142,8 +142,7 @@ def max_spread_axis(data):
 def split_along_axis(data, axis):
     """
     Splits the data along axis in two datasets of equal size.
-    Note that this could probably be optimized further, by implementing the median algorithm from
-    scratch.
+    This method uses an adapted re-implementation of `np.argpartition`
 
     Parameters
     ----------
@@ -157,17 +156,7 @@ def split_along_axis(data, axis):
     Tuple[np.array]
         Left data point indices, right data point indices
     """
-    indices = np.arange(len(data))
-    cap = np.median(data[:, axis])
-    mask = data[:, axis] <= cap
-    n_left = mask.sum()
-    # Account for the case where all positions along this axis are equal: split in the middle
-    if n_left == len(data) or n_left == 0:
-        left = indices[: len(indices) // 2]
-        right = indices[len(indices) // 2 :]
-    else:
-        left = indices[mask]
-        right = indices[np.logical_not(mask)]
+    left, right = median_argsplit(data[:, axis])
     return left, right
 
 
@@ -217,3 +206,99 @@ def englobing_box(data):
         bounds.insert(j, data[:, j].min())
         bounds.insert(2 * j + 1, data[:, j].max())
     return np.array(bounds)
+
+
+@njit
+def _partition(A, low, high, indices):
+    """
+    This is straight from numba master:
+    https://github.com/numba/numba/blob/b5bd9c618e20985acb0b300d52d57595ef6f5442/numba/np/arraymath.py#L1155
+    I modified it so the swaps operate on the indices as well, because I need a argpartition
+    """
+    mid = (low + high) >> 1
+    # NOTE: the pattern of swaps below for the pivot choice and the
+    # partitioning gives good results (i.e. regular O(n log n))
+    # on sorted, reverse-sorted, and uniform arrays.  Subtle changes
+    # risk breaking this property.
+    # Use median of three {low, middle, high} as the pivot
+    if A[mid] < A[low]:
+        A[low], A[mid] = A[mid], A[low]
+        indices[low], indices[mid] = indices[mid], indices[low]
+    if A[high] < A[mid]:
+        A[high], A[mid] = A[mid], A[high]
+        indices[high], indices[mid] = indices[mid], indices[high]
+    if A[mid] < A[low]:
+        A[low], A[mid] = A[mid], A[low]
+        indices[low], indices[mid] = indices[mid], indices[low]
+    pivot = A[mid]
+
+    A[high], A[mid] = A[mid], A[high]
+    indices[high], indices[mid] = indices[mid], indices[high]
+    i = low
+    j = high - 1
+    while True:
+        while i < high and A[i] < pivot:
+            i += 1
+        while j >= low and pivot < A[j]:
+            j -= 1
+        if i >= j:
+            break
+        A[i], A[j] = A[j], A[i]
+        indices[i], indices[j] = indices[j], indices[i]
+        i += 1
+        j -= 1
+    # Put the pivot back in its final place (all items before `i`
+    # are smaller than the pivot, all items at/after `i` are larger)
+    # print(A)
+    A[i], A[high] = A[high], A[i]
+    indices[i], indices[high] = indices[high], indices[i]
+
+    return i
+
+
+@njit
+def _select(arry, k, low, high):
+    """
+    This is straight from numba master:
+    https://github.com/numba/numba/blob/b5bd9c618e20985acb0b300d52d57595ef6f5442/numba/np/arraymath.py#L1155
+    Select the k'th smallest element in array[low:high + 1].
+    """
+    indices = np.arange(len(arry))
+    i = _partition(arry, low, high, indices)
+    while i != k:
+        if i < k:
+            low = i + 1
+            i = _partition(arry, low, high, indices)
+        else:
+            high = i - 1
+            i = _partition(arry, low, high, indices)
+    return indices, i
+
+
+@njit
+def median_argsplit(arry):
+    """
+    Splits `arry` into two sets of indices, indicating values
+    above and below the pivot value. Often, pivot is the median.
+
+    This is approx. three folds faster than computing the median,
+    then find indices of values below (left indices) and above (right indices)
+
+    Parameters
+    ----------
+    arry : np.array
+        One dimensional values array
+
+    Returns
+    -------
+    Tuple[np.array]
+        Indices of values below median, indices of values above median
+    """
+    low = 0
+    high = len(arry) - 1
+    k = len(arry) >> 1
+    tmp_arry = arry.flatten()
+    indices, i = _select(tmp_arry, k, low, high)
+    left = indices[:k]
+    right = indices[k:]
+    return left, right
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "lsnms"
-version = "0.1.0"
+version = "0.1.1"
 description = "Large Scale Non Maximum Suppression"
 authors = ["Rémy Dubois <remydubois14@gmail.com>"]
 license = "MIT"
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="lsnms",
-    version="0.1.0",
+    version="0.1.1",
     description="Large Scale Non Maximum Suppression",
     author="Rémy Dubois",
     install_requires=["numpy==1.19.5", "numba==0.53.1"],
diff --git a/tests/timings_balltree.py b/tests/timings_balltree.py
@@ -15,11 +15,12 @@ def test_tree_query_timing():
     ns = np.arange(1000, 200000, 10000)
     ts = []
     naive_ts = []
+    leaf_size = 64
     repeats = 100
     for n in ns:
         data = np.random.uniform(0, 1000, (n, 2))
-        sk_tree = skBT(data, leaf_size=16)
-        tree = BallTree(data, leaf_size=16)
+        sk_tree = skBT(data, leaf_size=leaf_size)
+        tree = BallTree(data, leaf_size=int(leaf_size * 0.67))
         _ = tree.query_radius(data[0], 200.0)
         timer = Timer(lambda: tree.query_radius(data[0], 100.0))
         ts.append(timer.timeit(number=repeats) / repeats * 1000)
@@ -42,13 +43,14 @@ def test_tree_building_timing():
 
     ns = np.arange(1000, 300000, 25000)
     ts = []
+    leaf_size = 64
     naive_ts = []
     for n in ns:
         data = np.random.uniform(0, n, (n, 2))
         _ = BallTree(data, 16)
-        timer = Timer(lambda: BallTree(data, 16))
+        timer = Timer(lambda: BallTree(data, leaf_size))
         ts.append(timer.timeit(number=5) / 5)
-        naive_timer = Timer(lambda: skBT(data, 16))
+        naive_timer = Timer(lambda: skBT(data, int(leaf_size * 0.67)))
         naive_ts.append(naive_timer.timeit(5) / 5)
 
     with plt.xkcd():
diff --git a/tests/timings_kdtree.py b/tests/timings_kdtree.py
@@ -104,12 +104,13 @@ def test_tree_query_timing():
 
     ns = np.arange(1000, 200000, 10000)
     ts = []
+    leaf_size = 64
     naive_ts = []
     repeats = 100
     for n in ns:
         data = np.random.uniform(0, 1000, (n, 2))
-        sk_tree = skKDT(data, leaf_size=16)
-        tree = KDTree(data, leaf_size=16)
+        sk_tree = skKDT(data, leaf_size=int(leaf_size * 0.67))
+        tree = KDTree(data, leaf_size=leaf_size)
         _ = tree.query_radius(data[0], 200.0)
         timer = Timer(lambda: tree.query_radius(data[0], 100.0))
         ts.append(timer.timeit(number=repeats) / repeats * 1000)
@@ -130,14 +131,15 @@ def test_tree_query_timing():
 
 def test_tree_building_timing():
     ns = np.arange(1000, 300000, 25000)
+    leaf_size = 64
     ts = []
     naive_ts = []
     for n in ns:
         data = np.random.uniform(0, n, (n, 2))
         _ = KDTree(data, 16)
-        timer = Timer(lambda: KDTree(data, 16))
+        timer = Timer(lambda: KDTree(data, leaf_size))
         ts.append(timer.timeit(number=5) / 5)
-        naive_timer = Timer(lambda: skKDT(data, 16))
+        naive_timer = Timer(lambda: skKDT(data, int(leaf_size * 0.67)))
         naive_ts.append(naive_timer.timeit(5) / 5)
 
     with plt.xkcd():