Skip to content

Commit 0fc2623

Browse files
jagillmeta-codesync[bot]
authored andcommitted
fix(geo): Fix correctness issue with spatial joins on GeometryCollections (facebookincubator#15778)
Summary: Pull Request resolved: facebookincubator#15778 The envelope for the spatial index was being calculated incorrectly, because the Shapefile format does not add an envelope to the beginning of a GeometryCollection. This was causing GeometryCollections to miss hits on the Spatial Index, causing rows to be dropped. This uses the newly moved GeometrySerde to get the Envelope in the standard way. However, this requires us to include GEOS for spatial joins. Reviewed By: Sullivan-Patrick Differential Revision: D89239585
1 parent b124b93 commit 0fc2623

File tree

2 files changed

+65
-44
lines changed

2 files changed

+65
-44
lines changed

velox/exec/SpatialJoinBuild.cpp

Lines changed: 25 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
*/
1616

1717
#include "velox/exec/SpatialJoinBuild.h"
18-
#include "velox/common/base/IOUtils.h"
18+
#include <limits>
1919
#include "velox/common/geospatial/GeometryConstants.h"
20+
#ifdef VELOX_ENABLE_GEO
21+
#include "velox/common/geospatial/GeometrySerde.h"
22+
#endif
2023
#include "velox/exec/Task.h"
2124

2225
namespace facebook::velox::exec {
@@ -127,51 +130,29 @@ std::vector<RowVectorPtr> SpatialJoinBuild::mergeDataVectors() const {
127130
Envelope SpatialJoinBuild::readEnvelope(
128131
const StringView& geometryBytes,
129132
double radius) {
130-
VELOX_CHECK_GE(
131-
geometryBytes.size(),
132-
sizeof(GeometrySerializationType) + 2 * sizeof(double));
133-
double minX;
134-
double minY;
135-
double maxX;
136-
double maxY;
137-
138-
velox::common::InputByteStream inputStream(geometryBytes.data());
139-
// Geometry Serde makes it easy to get the envelope.
140-
// The first byte is the GeometrySerializationType.
141-
// All coordinates are doubles (8 bytes)
142-
// Depending on the type, the next bytes are:
143-
// 1. POINT: x, y
144-
// 2. ENVELOPE: minX, minY, maxX, maxY
145-
// 3. Else: EsriShapeType (4 bytes), minX, minY, maxX, maxY, GeometryBytes
146-
auto geometryType = inputStream.read<GeometrySerializationType>();
147-
if (geometryType == GeometrySerializationType::POINT) {
148-
double x = inputStream.read<double>();
149-
double y = inputStream.read<double>();
150-
minX = x - radius;
151-
minY = y - radius;
152-
maxX = x + radius;
153-
maxY = y + radius;
133+
#ifdef VELOX_ENABLE_GEO
134+
radius = std::max(radius, 0.0);
135+
auto geosEnvelope =
136+
common::geospatial::GeometryDeserializer::deserializeEnvelope(
137+
geometryBytes);
138+
if (geosEnvelope->isNull()) {
139+
return Envelope::empty();
154140
} else {
155-
if (geometryType != GeometrySerializationType::ENVELOPE) {
156-
// Unused esriType
157-
inputStream.read<velox::common::geospatial::EsriShapeType>();
158-
}
159-
minX = inputStream.read<double>() - radius;
160-
minY = inputStream.read<double>() - radius;
161-
maxX = inputStream.read<double>() + radius;
162-
maxY = inputStream.read<double>() + radius;
141+
return Envelope::from(
142+
geosEnvelope->getMinX() - radius,
143+
geosEnvelope->getMinY() - radius,
144+
geosEnvelope->getMaxX() + radius,
145+
geosEnvelope->getMaxY() + radius);
163146
}
164-
165-
Envelope envelope;
166-
167-
// This also catches NaNs
168-
if (minX <= maxX && minY <= maxY) {
169-
envelope = Envelope::from(minX, minY, maxX, maxY);
170-
} else {
171-
envelope = Envelope::empty();
172-
}
173-
174-
return envelope;
147+
#else
148+
// When VELOX_ENABLE_GEO is not set, return an envelope of infinite area
149+
// to ensure all geometries are considered for spatial join
150+
return Envelope::from(
151+
-std::numeric_limits<double>::infinity(),
152+
-std::numeric_limits<double>::infinity(),
153+
std::numeric_limits<double>::infinity(),
154+
std::numeric_limits<double>::infinity());
155+
#endif
175156
}
176157

177158
SpatialIndex SpatialJoinBuild::buildSpatialIndex(

velox/exec/tests/SpatialJoinTest.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,46 @@ TEST_F(SpatialJoinTest, testSimpleNullRowsJoin) {
383383
{"POINT (1 1)"});
384384
}
385385

386+
TEST_F(SpatialJoinTest, testGeometryCollection) {
387+
runTest(
388+
{"GEOMETRYCOLLECTION (POINT (1 1))",
389+
"GEOMETRYCOLLECTION EMPTY",
390+
"POINT (1 1)"},
391+
{"GEOMETRYCOLLECTION (POINT (1 1))",
392+
"GEOMETRYCOLLECTION EMPTY",
393+
"POINT (1 1)"},
394+
std::nullopt,
395+
"ST_Intersects(left_g, right_g)",
396+
core::JoinType::kInner,
397+
{"GEOMETRYCOLLECTION (POINT (1 1))",
398+
"GEOMETRYCOLLECTION (POINT (1 1))",
399+
"POINT (1 1)",
400+
"POINT (1 1)"},
401+
{"GEOMETRYCOLLECTION (POINT (1 1))",
402+
"POINT (1 1)",
403+
"GEOMETRYCOLLECTION (POINT (1 1))",
404+
"POINT (1 1)"});
405+
406+
runTest(
407+
{"GEOMETRYCOLLECTION (POINT (1 1))",
408+
"GEOMETRYCOLLECTION EMPTY",
409+
"POINT (1 1)"},
410+
{"GEOMETRYCOLLECTION (POINT (1 2))",
411+
"GEOMETRYCOLLECTION EMPTY",
412+
"POINT (1 2)"},
413+
std::vector<std::optional<double>>{1.0, 1.0, 1.0},
414+
"ST_Distance(left_g, right_g) <= radius",
415+
core::JoinType::kInner,
416+
{"GEOMETRYCOLLECTION (POINT (1 1))",
417+
"GEOMETRYCOLLECTION (POINT (1 1))",
418+
"POINT (1 1)",
419+
"POINT (1 1)"},
420+
{"GEOMETRYCOLLECTION (POINT (1 2))",
421+
"POINT (1 2)",
422+
"GEOMETRYCOLLECTION (POINT (1 2))",
423+
"POINT (1 2)"});
424+
}
425+
386426
TEST_F(SpatialJoinTest, testDistanceJoin) {
387427
runTest(
388428
{"POINT (1 2)", "POLYGON ((1 2, 2 2, 2 3, 1 3, 1 2))", std::nullopt},

0 commit comments

Comments
 (0)