Skip to content

Commit 7b53625

Browse files
feat(vortex-geo): native Point extension type and GeoDistance scalar function (#8372)
## Summary This PR adds a native point type to `vortex-geo`. Points are by far the most common geometry in analytical datasets, and a columnar representation makes their coordinates directly accessible without parsing WKB. It also adds the scalar function: point-to-point distance with PostGIS `ST_Distance` semantics (planar/Euclidean, results in CRS units). ## API Changes Adds to `vortex-geo`, all registered through `vortex_geo::initialize`: - Extension type `Point` (`vortex.geo.point`): a location stored as `Struct<x, y, z?, m?>` of non-nullable `f64`, where `z?` is an optional elevation and `m?` an optional measure. - `Coordinate`: the internal value a point scalar unpacks to. - Scalar function `GeoDistance` (`vortex.geo.distance`): per-row distance between two equal-length point columns; either or both operands may be constant, in which case the query point is decoded once and broadcast. ## Testing Unit tests cover dtype validation for every GeoArrow dimension (and rejection of invalid storage), round-tripping a point column through scalar execution back to the original coordinates, WKT display for all four dimensions, and distance over all operand shapes: column-to-constant (either side), column-to-column, and constant-to-constant. --- Supersedes #8342 (same change, moved from my fork to an in-repo branch). --------- Signed-off-by: Nemo Yu <zyu379@wisc.edu> Signed-off-by: Nemo Yu <zhenghong@spiraldb.com> Signed-off-by: Nemo Yu <83347615+HarukiMoriarty@users.noreply.github.com> Signed-off-by: "Nemo Yu" <zhenghong@spiraldb.com> Co-authored-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent f67b594 commit 7b53625

6 files changed

Lines changed: 706 additions & 1 deletion

File tree

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Coordinate building blocks for geometry extension types: the `Struct<x, y[, z][, m]>` storage
5+
//! of non-nullable `f64` fields, its [`Dimension`], and the decoded [`Coordinate`] value.
6+
//!
7+
//! The coordinate fields are:
8+
//! - `x` — longitude or easting
9+
//! - `y` — latitude or northing
10+
//! - `z` (optional) — elevation
11+
//! - `m` (optional) — measure: an arbitrary per-point value such as distance along a route or a
12+
//! timestamp
13+
14+
use std::fmt::Display;
15+
use std::fmt::Formatter;
16+
17+
use vortex_array::ArrayRef;
18+
use vortex_array::ExecutionCtx;
19+
use vortex_array::arrays::ExtensionArray;
20+
use vortex_array::arrays::PrimitiveArray;
21+
use vortex_array::arrays::StructArray;
22+
use vortex_array::arrays::extension::ExtensionArrayExt;
23+
use vortex_array::arrays::struct_::StructArrayExt;
24+
use vortex_array::dtype::DType;
25+
use vortex_array::dtype::FieldNames;
26+
use vortex_array::dtype::Nullability;
27+
use vortex_array::dtype::PType;
28+
use vortex_array::scalar::Scalar;
29+
use vortex_error::VortexResult;
30+
use vortex_error::vortex_bail;
31+
use vortex_error::vortex_ensure;
32+
use vortex_error::vortex_err;
33+
34+
/// Coordinate dimensions, matching GeoArrow. Field order is fixed: `x`, `y`, then `z` before `m`.
35+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36+
pub(crate) enum Dimension {
37+
/// 2D: `x`, `y`.
38+
Xy,
39+
/// 3D with elevation: `x`, `y`, `z`.
40+
Xyz,
41+
/// 3D with a measure: `x`, `y`, `m`.
42+
Xym,
43+
/// 4D: `x`, `y`, `z`, `m`.
44+
Xyzm,
45+
}
46+
47+
impl Dimension {
48+
/// Recover the dimension from a coordinate's field names, in GeoArrow order.
49+
pub(crate) fn from_field_names(names: &FieldNames) -> VortexResult<Dimension> {
50+
let mut strs = [""; 4];
51+
vortex_ensure!(
52+
names.len() <= strs.len(),
53+
"not a valid GeoArrow coordinate dimension: {names:?}"
54+
);
55+
for (slot, name) in strs.iter_mut().zip(names.iter()) {
56+
*slot = name.as_ref();
57+
}
58+
Ok(match &strs[..names.len()] {
59+
["x", "y"] => Dimension::Xy,
60+
["x", "y", "z"] => Dimension::Xyz,
61+
["x", "y", "m"] => Dimension::Xym,
62+
["x", "y", "z", "m"] => Dimension::Xyzm,
63+
_ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"),
64+
})
65+
}
66+
}
67+
68+
/// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them.
69+
///
70+
/// This is the native value produced when unpacking a [`Point`](crate::extension::Point) scalar;
71+
/// the rest of the coordinate machinery is crate-internal.
72+
#[derive(Debug, Clone, Copy, PartialEq)]
73+
pub struct Coordinate {
74+
/// The x (longitude/easting) ordinate.
75+
pub x: f64,
76+
/// The y (latitude/northing) ordinate.
77+
pub y: f64,
78+
/// The optional `z` (elevation) ordinate.
79+
pub z: Option<f64>,
80+
/// The optional `m` (measure) ordinate.
81+
pub m: Option<f64>,
82+
}
83+
84+
impl Coordinate {
85+
/// A 2D coordinate (`z`/`m` unset).
86+
pub fn xy(x: f64, y: f64) -> Self {
87+
Coordinate {
88+
x,
89+
y,
90+
z: None,
91+
m: None,
92+
}
93+
}
94+
}
95+
96+
impl Display for Coordinate {
97+
fn fmt(&self, fmt: &mut Formatter<'_>) -> std::fmt::Result {
98+
match (self.z, self.m) {
99+
(None, None) => write!(fmt, "POINT({} {})", self.x, self.y),
100+
(Some(z), None) => write!(fmt, "POINT Z ({} {} {})", self.x, self.y, z),
101+
(None, Some(m)) => write!(fmt, "POINT M ({} {} {})", self.x, self.y, m),
102+
(Some(z), Some(m)) => write!(fmt, "POINT ZM ({} {} {} {})", self.x, self.y, z, m),
103+
}
104+
}
105+
}
106+
107+
/// Validate that `dtype` is a coordinate struct of non-nullable `f64` fields, returning its
108+
/// [`Dimension`]. Any of the four GeoArrow dimensions validates.
109+
pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult<Dimension> {
110+
let DType::Struct(fields, _) = dtype else {
111+
vortex_bail!("coordinate storage must be a Struct, was {dtype}");
112+
};
113+
for (name, field) in fields.names().iter().zip(fields.fields()) {
114+
vortex_ensure!(
115+
matches!(
116+
field,
117+
DType::Primitive(PType::F64, Nullability::NonNullable)
118+
),
119+
"coordinate field {name} must be non-nullable f64, was {field}"
120+
);
121+
}
122+
Dimension::from_field_names(fields.names())
123+
}
124+
125+
/// Decode a [`Coordinate`] from a coordinate `Struct<x, y[, z][, m]>` scalar (`z`/`m` read iff
126+
/// present, so the same decoder serves every dimension).
127+
pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult<Coordinate> {
128+
let fields = scalar.as_struct();
129+
let required = |name: &str| -> VortexResult<f64> {
130+
f64::try_from(
131+
&fields
132+
.field(name)
133+
.ok_or_else(|| vortex_err!("coordinate missing {name}"))?,
134+
)
135+
};
136+
let optional = |name: &str| -> VortexResult<Option<f64>> {
137+
fields
138+
.field(name)
139+
.map(|value| f64::try_from(&value))
140+
.transpose()
141+
};
142+
Ok(Coordinate {
143+
x: required("x")?,
144+
y: required("y")?,
145+
z: optional("z")?,
146+
m: optional("m")?,
147+
})
148+
}
149+
150+
/// Decode a [`Coordinate`] from an extension-typed point scalar (unwrapped to its coordinate
151+
/// storage) or a bare coordinate `Struct` scalar. The per-row decode used by the distance fns.
152+
pub(crate) fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult<Coordinate> {
153+
match scalar.as_extension_opt() {
154+
Some(ext_scalar) => coordinate_from_struct(&ext_scalar.to_storage_scalar()),
155+
None => coordinate_from_struct(scalar),
156+
}
157+
}
158+
159+
/// Validated, executed `x`/`y` columns of a point array. The bulk counterpart to [`Coordinate`];
160+
/// `z`/`m` are not executed.
161+
pub(crate) struct ParsedCoordinates {
162+
/// The flat `f64` `x` column.
163+
pub(crate) xs: PrimitiveArray,
164+
/// The flat `f64` `y` column.
165+
pub(crate) ys: PrimitiveArray,
166+
}
167+
168+
/// Validate a point column's coordinate storage (layout and non-nullability) and execute its
169+
/// `x`/`y` columns.
170+
pub(crate) fn parse_storage(
171+
points: &ArrayRef,
172+
ctx: &mut ExecutionCtx,
173+
) -> VortexResult<ParsedCoordinates> {
174+
let storage = points
175+
.clone()
176+
.execute::<ExtensionArray>(ctx)?
177+
.storage_array()
178+
.clone()
179+
.execute::<StructArray>(ctx)?;
180+
coordinate_dimension(storage.dtype())?;
181+
vortex_ensure!(
182+
!storage.dtype().is_nullable(),
183+
"coordinate storage must be non-nullable to read unmasked ordinates, was {}",
184+
storage.dtype()
185+
);
186+
let xs = storage
187+
.unmasked_field_by_name("x")?
188+
.clone()
189+
.execute::<PrimitiveArray>(ctx)?;
190+
let ys = storage
191+
.unmasked_field_by_name("y")?
192+
.clone()
193+
.execute::<PrimitiveArray>(ctx)?;
194+
Ok(ParsedCoordinates { xs, ys })
195+
}
196+
197+
#[cfg(test)]
198+
mod tests {
199+
use vortex_array::IntoArray;
200+
use vortex_array::VortexSessionExecute;
201+
use vortex_array::arrays::ExtensionArray;
202+
use vortex_array::arrays::PrimitiveArray;
203+
use vortex_array::arrays::StructArray;
204+
use vortex_array::dtype::FieldNames;
205+
use vortex_array::dtype::extension::ExtDType;
206+
use vortex_array::session::ArraySession;
207+
use vortex_array::validity::Validity;
208+
use vortex_error::VortexResult;
209+
use vortex_session::VortexSession;
210+
211+
use super::Coordinate;
212+
use super::parse_storage;
213+
use crate::extension::GeoMetadata;
214+
use crate::extension::Point;
215+
216+
/// Display emits WKT, including `z`/`m` when present.
217+
#[test]
218+
fn display_is_wkt() {
219+
let coordinate = |z, m| Coordinate {
220+
x: 1.0,
221+
y: 2.0,
222+
z,
223+
m,
224+
};
225+
assert_eq!(coordinate(None, None).to_string(), "POINT(1 2)");
226+
assert_eq!(coordinate(Some(3.0), None).to_string(), "POINT Z (1 2 3)");
227+
assert_eq!(coordinate(None, Some(4.0)).to_string(), "POINT M (1 2 4)");
228+
assert_eq!(
229+
coordinate(Some(3.0), Some(4.0)).to_string(),
230+
"POINT ZM (1 2 3 4)"
231+
);
232+
}
233+
234+
/// [`parse_storage`] reads the coordinate fields unmasked, so a nullable point column must
235+
/// be rejected at parse time rather than decoding null rows as garbage ordinates.
236+
#[test]
237+
fn parse_rejects_nullable_points() -> VortexResult<()> {
238+
let session = VortexSession::empty().with::<ArraySession>();
239+
let mut ctx = session.create_execution_ctx();
240+
241+
let storage = StructArray::try_new(
242+
FieldNames::from(["x", "y"]),
243+
vec![
244+
PrimitiveArray::from_iter(vec![1.0f64]).into_array(),
245+
PrimitiveArray::from_iter(vec![2.0f64]).into_array(),
246+
],
247+
1,
248+
Validity::AllValid,
249+
)?
250+
.into_array();
251+
let dtype = ExtDType::<Point>::try_new(GeoMetadata { crs: None }, storage.dtype().clone())?;
252+
let points = ExtensionArray::new(dtype.erased(), storage).into_array();
253+
254+
assert!(parse_storage(&points, &mut ctx).is_err());
255+
Ok(())
256+
}
257+
}

vortex-geo/src/extension/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
pub(crate) mod coordinate;
5+
mod point;
46
mod wkb;
57

68
use std::fmt::Display;
79

10+
pub use point::*;
811
pub use wkb::*;
912

1013
/// Extension metadata that is common to all the geospatial extension types.

0 commit comments

Comments
 (0)