Skip to content

Commit 6e4c18b

Browse files
Significantly improved performance of clustersDbscan (#2885)
* Converted clustersDbscan to use geokdbush rather than rbush. The former is better suited to static sets of points such as those processed in this module. * Added a TODO to re-enable some disabled runtime checks in clustersDbscan when we can.
1 parent d286211 commit 6e4c18b

File tree

4 files changed

+65
-62
lines changed

4 files changed

+65
-62
lines changed

packages/turf-clusters-dbscan/index.ts

Lines changed: 23 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { GeoJsonProperties, FeatureCollection, Point } from "geojson";
22
import { clone } from "@turf/clone";
3-
import { distance } from "@turf/distance";
4-
import { degreesToRadians, lengthToDegrees, Units } from "@turf/helpers";
5-
import RBush from "rbush";
3+
import { Units } from "@turf/helpers";
4+
import KDBush from "kdbush";
5+
import * as geokdbush from "geokdbush";
66

77
/**
88
* Point classification within the cluster.
@@ -66,6 +66,9 @@ function clustersDbscan(
6666
} = {}
6767
): FeatureCollection<Point, DbscanProps> {
6868
// Input validation being handled by Typescript
69+
// TODO oops! No it isn't. Typescript doesn't do runtime checking. We should
70+
// re-enable these checks, though will have to wait for a major version bump
71+
// as more restrictive checks could break currently working code.
6972
// collectionOf(points, 'Point', 'points must consist of a FeatureCollection of only Points');
7073
// if (maxDistance === null || maxDistance === undefined) throw new Error('maxDistance is required');
7174
// if (!(Math.sign(maxDistance) > 0)) throw new Error('maxDistance is invalid');
@@ -77,11 +80,13 @@ function clustersDbscan(
7780
// Defaults
7881
const minPoints = options.minPoints || 3;
7982

80-
// Calculate the distance in degrees for region queries
81-
const latDistanceInDegrees = lengthToDegrees(maxDistance, options.units);
82-
8383
// Create a spatial index
84-
var tree = new RBush(points.features.length);
84+
const kdIndex = new KDBush(points.features.length);
85+
// Index each point for spatial queries
86+
for (const point of points.features) {
87+
kdIndex.add(point.geometry.coordinates[0], point.geometry.coordinates[1]);
88+
}
89+
kdIndex.finish();
8590

8691
// Keeps track of whether a point has been visited or not.
8792
var visited = points.features.map((_) => false);
@@ -95,54 +100,22 @@ function clustersDbscan(
95100
// Keeps track of the clusterId for each point
96101
var clusterIds: number[] = points.features.map((_) => -1);
97102

98-
// Index each point for spatial queries
99-
tree.load(
100-
points.features.map((point, index) => {
101-
var [x, y] = point.geometry.coordinates;
102-
return {
103-
minX: x,
104-
minY: y,
105-
maxX: x,
106-
maxY: y,
107-
index: index,
108-
} as IndexedPoint;
109-
})
110-
);
111-
112103
// Function to find neighbors of a point within a given distance
113104
const regionQuery = (index: number): IndexedPoint[] => {
114105
const point = points.features[index];
115106
const [x, y] = point.geometry.coordinates;
116107

117-
const minY = Math.max(y - latDistanceInDegrees, -90.0);
118-
const maxY = Math.min(y + latDistanceInDegrees, 90.0);
119-
120-
const lonDistanceInDegrees = (function () {
121-
// Handle the case where the bounding box crosses the poles
122-
if (minY < 0 && maxY > 0) {
123-
return latDistanceInDegrees;
124-
}
125-
if (Math.abs(minY) < Math.abs(maxY)) {
126-
return latDistanceInDegrees / Math.cos(degreesToRadians(maxY));
127-
} else {
128-
return latDistanceInDegrees / Math.cos(degreesToRadians(minY));
129-
}
130-
})();
131-
132-
const minX = Math.max(x - lonDistanceInDegrees, -360.0);
133-
const maxX = Math.min(x + lonDistanceInDegrees, 360.0);
134-
135-
// Calculate the bounding box for the region query
136-
const bbox = { minX, minY, maxX, maxY };
137-
return (tree.search(bbox) as ReadonlyArray<IndexedPoint>).filter(
138-
(neighbor) => {
139-
const neighborIndex = neighbor.index;
140-
const neighborPoint = points.features[neighborIndex];
141-
const distanceInKm = distance(point, neighborPoint, {
142-
units: "kilometers",
143-
});
144-
return distanceInKm <= maxDistance;
145-
}
108+
return (
109+
geokdbush
110+
// @ts-expect-error 2345 until https://github.com/mourner/geokdbush/issues/20 is resolved
111+
.around<number>(kdIndex, x, y, undefined, maxDistance)
112+
.map((id) => ({
113+
minX: points.features[id].geometry.coordinates[0],
114+
minY: points.features[id].geometry.coordinates[1],
115+
maxX: points.features[id].geometry.coordinates[0],
116+
maxY: points.features[id].geometry.coordinates[1],
117+
index: id,
118+
}))
146119
);
147120
};
148121

packages/turf-clusters-dbscan/package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@
6363
"@turf/centroid": "workspace:*",
6464
"@turf/clusters": "workspace:*",
6565
"@types/benchmark": "^2.1.5",
66-
"@types/rbush": "^3.0.4",
6766
"@types/tape": "^5.8.1",
6867
"benchmark": "^2.1.4",
6968
"chromatism": "^3.0.0",
@@ -77,11 +76,12 @@
7776
},
7877
"dependencies": {
7978
"@turf/clone": "workspace:*",
80-
"@turf/distance": "workspace:*",
8179
"@turf/helpers": "workspace:*",
8280
"@turf/meta": "workspace:*",
8381
"@types/geojson": "^7946.0.10",
84-
"rbush": "^3.0.1",
82+
"@types/geokdbush": "^1.1.5",
83+
"geokdbush": "^2.0.1",
84+
"kdbush": "^4.0.2",
8585
"tslib": "^2.8.1"
8686
}
8787
}

packages/turf-clusters-dbscan/test/in/fiji.geojson

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,41 +3,47 @@
33
"features": [
44
{
55
"type": "Feature",
6+
"properties": {},
67
"geometry": {
78
"type": "Point",
89
"coordinates": [179.439697265625, -16.55196172197251]
910
}
1011
},
1112
{
1213
"type": "Feature",
14+
"properties": {},
1315
"geometry": {
1416
"type": "Point",
1517
"coordinates": [179.01123046874997, -16.97274101999901]
1618
}
1719
},
1820
{
1921
"type": "Feature",
22+
"properties": {},
2023
"geometry": {
2124
"type": "Point",
2225
"coordinates": [179.505615234375, -17.035777250427184]
2326
}
2427
},
2528
{
2629
"type": "Feature",
30+
"properties": {},
2731
"geometry": {
2832
"type": "Point",
2933
"coordinates": [180.75805664062497, -16.41500926733237]
3034
}
3135
},
3236
{
3337
"type": "Feature",
38+
"properties": {},
3439
"geometry": {
3540
"type": "Point",
3641
"coordinates": [181.1865234375, -16.615137799987075]
3742
}
3843
},
3944
{
4045
"type": "Feature",
46+
"properties": {},
4147
"geometry": {
4248
"type": "Point",
4349
"coordinates": [181.03271484375, -16.277960306212513]

pnpm-lock.yaml

Lines changed: 33 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)