Skip to content

Commit fb489a1

Browse files
feat(vortex-geo): Arrow import/export for the native Point type (#8374)
<!-- Thank you for submitting a pull request! We appreciate your time and effort. Please make sure to provide enough information so that we can review your pull request. The Summary and Testing sections below contain guidance on what to include. --> ## Summary <!-- If this PR is related to a tracked effort, please link to the relevant issue here (e.g., `Closes: #123`). Otherwise, feel free to ignore / delete this. In this section, please: 1. Explain the rationale for this change. 2. Summarize the changes included in this PR. A general rule of thumb is that larger PRs should have larger summaries. If there are a lot of changes, please help us review the code by explaining what was changed and why. If there is an issue or discussion attached, there is no need to duplicate all the details, but clarity is always preferred over brevity. --> This PR adds support for import/export to Arrow for the `Point` extension type, as the `geoarrow.point` Arrow extension with separated (struct) coordinates. Stacked on #8372. <!-- ## API Changes Uncomment this section if there are any user-facing changes. Consider whether the change affects users in one of the following ways: 1. Breaks public APIs in some way. 2. Changes the underlying behavior of one of the engine integrations. 3. Should some documentation be updated to reflect this change? If a public API is changed in a breaking manner, make sure to add the appropriate label. --> ## Testing <!-- Please describe how this change was tested. Here are some common categories for testing in Vortex: 1. Verifying existing behavior is maintained. 2. Verifying new behavior and functionality works correctly. 3. Serialization compatibility (backwards and forwards) should be maintained or explicitly broken. --> Unit tests are added to exercise both code paths, plus a Vortex → Arrow → Vortex round-trip. Signed-off-by: Nemo Yu <zyu379@wisc.edu>
1 parent 46e7253 commit fb489a1

12 files changed

Lines changed: 790 additions & 404 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-geo/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ wkb = { workspace = true }
2626
[dev-dependencies]
2727
geo-traits = { workspace = true }
2828
geo-types = { workspace = true }
29+
rstest = { workspace = true }
2930

3031
[lints]
3132
workspace = true

vortex-geo/src/extension/coordinate.rs

Lines changed: 78 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
use std::fmt::Display;
1515
use std::fmt::Formatter;
1616

17+
use geoarrow::datatypes::Dimension as GeoArrowDimension;
1718
use vortex_array::ArrayRef;
1819
use vortex_array::ExecutionCtx;
1920
use vortex_array::arrays::ExtensionArray;
@@ -25,6 +26,7 @@ use vortex_array::dtype::DType;
2526
use vortex_array::dtype::FieldNames;
2627
use vortex_array::dtype::Nullability;
2728
use vortex_array::dtype::PType;
29+
use vortex_array::dtype::StructFields;
2830
use vortex_array::scalar::Scalar;
2931
use vortex_error::VortexResult;
3032
use vortex_error::vortex_bail;
@@ -63,6 +65,38 @@ impl Dimension {
6365
_ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"),
6466
})
6567
}
68+
69+
/// The coordinate field names of this dimension, in GeoArrow order.
70+
pub(crate) fn field_names(self) -> &'static [&'static str] {
71+
match self {
72+
Dimension::Xy => &["x", "y"],
73+
Dimension::Xyz => &["x", "y", "z"],
74+
Dimension::Xym => &["x", "y", "m"],
75+
Dimension::Xyzm => &["x", "y", "z", "m"],
76+
}
77+
}
78+
}
79+
80+
impl From<GeoArrowDimension> for Dimension {
81+
fn from(dim: GeoArrowDimension) -> Self {
82+
match dim {
83+
GeoArrowDimension::XY => Dimension::Xy,
84+
GeoArrowDimension::XYZ => Dimension::Xyz,
85+
GeoArrowDimension::XYM => Dimension::Xym,
86+
GeoArrowDimension::XYZM => Dimension::Xyzm,
87+
}
88+
}
89+
}
90+
91+
impl From<Dimension> for GeoArrowDimension {
92+
fn from(dim: Dimension) -> Self {
93+
match dim {
94+
Dimension::Xy => GeoArrowDimension::XY,
95+
Dimension::Xyz => GeoArrowDimension::XYZ,
96+
Dimension::Xym => GeoArrowDimension::XYM,
97+
Dimension::Xyzm => GeoArrowDimension::XYZM,
98+
}
99+
}
66100
}
67101

68102
/// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them.
@@ -122,6 +156,21 @@ pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult<Dimension> {
122156
Dimension::from_field_names(fields.names())
123157
}
124158

159+
/// The canonical storage dtype for `dim`: a `Struct` of non-nullable `f64` coordinate fields,
160+
/// with `nullability` at the struct (per-point) level. Inverse of [`coordinate_dimension`].
161+
pub(crate) fn coordinate_storage_dtype(dim: Dimension, nullability: Nullability) -> DType {
162+
let names = dim.field_names();
163+
let fields = std::iter::repeat_n(
164+
DType::Primitive(PType::F64, Nullability::NonNullable),
165+
names.len(),
166+
)
167+
.collect::<Vec<_>>();
168+
DType::Struct(
169+
StructFields::new(FieldNames::from(names), fields),
170+
nullability,
171+
)
172+
}
173+
125174
/// Decode a [`Coordinate`] from a coordinate `Struct<x, y[, z][, m]>` scalar (`z`/`m` read iff
126175
/// present, so the same decoder serves every dimension).
127176
pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult<Coordinate> {
@@ -196,39 +245,58 @@ pub(crate) fn parse_storage(
196245

197246
#[cfg(test)]
198247
mod tests {
248+
use rstest::rstest;
199249
use vortex_array::IntoArray;
200250
use vortex_array::VortexSessionExecute;
201251
use vortex_array::arrays::ExtensionArray;
202252
use vortex_array::arrays::PrimitiveArray;
203253
use vortex_array::arrays::StructArray;
204254
use vortex_array::dtype::FieldNames;
255+
use vortex_array::dtype::Nullability;
205256
use vortex_array::dtype::extension::ExtDType;
206257
use vortex_array::session::ArraySession;
207258
use vortex_array::validity::Validity;
208259
use vortex_error::VortexResult;
209260
use vortex_session::VortexSession;
210261

211262
use super::Coordinate;
263+
use super::Dimension;
264+
use super::coordinate_dimension;
265+
use super::coordinate_storage_dtype;
212266
use super::parse_storage;
213267
use crate::extension::GeoMetadata;
214268
use crate::extension::Point;
215269

270+
/// Each dimension round-trips through its field names and canonical storage dtype.
271+
#[rstest]
272+
#[case::xy(Dimension::Xy, &["x", "y"])]
273+
#[case::xyz(Dimension::Xyz, &["x", "y", "z"])]
274+
#[case::xym(Dimension::Xym, &["x", "y", "m"])]
275+
#[case::xyzm(Dimension::Xyzm, &["x", "y", "z", "m"])]
276+
fn storage_dtype_roundtrips_dimension(
277+
#[case] dim: Dimension,
278+
#[case] names: &[&str],
279+
) -> VortexResult<()> {
280+
assert_eq!(dim.field_names(), names);
281+
let dtype = coordinate_storage_dtype(dim, Nullability::NonNullable);
282+
assert_eq!(coordinate_dimension(&dtype)?, dim);
283+
Ok(())
284+
}
285+
216286
/// Display emits WKT, including `z`/`m` when present.
217-
#[test]
218-
fn display_is_wkt() {
219-
let coordinate = |z, m| Coordinate {
287+
#[rstest]
288+
#[case::xy(None, None, "POINT(1 2)")]
289+
#[case::xyz(Some(3.0), None, "POINT Z (1 2 3)")]
290+
#[case::xym(None, Some(4.0), "POINT M (1 2 4)")]
291+
#[case::xyzm(Some(3.0), Some(4.0), "POINT ZM (1 2 3 4)")]
292+
fn display_is_wkt(#[case] z: Option<f64>, #[case] m: Option<f64>, #[case] expected: &str) {
293+
let coordinate = Coordinate {
220294
x: 1.0,
221295
y: 2.0,
222296
z,
223297
m,
224298
};
225-
assert_eq!(coordinate(None, None).to_string(), "POINT(1 2)");
226-
assert_eq!(coordinate(Some(3.0), None).to_string(), "POINT Z (1 2 3)");
227-
assert_eq!(coordinate(None, Some(4.0)).to_string(), "POINT M (1 2 4)");
228-
assert_eq!(
229-
coordinate(Some(3.0), Some(4.0)).to_string(),
230-
"POINT ZM (1 2 3 4)"
231-
);
299+
assert_eq!(coordinate.to_string(), expected);
232300
}
233301

234302
/// [`parse_storage`] reads the coordinate fields unmasked, so a nullable point column must

vortex-geo/src/extension/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ mod point;
66
mod wkb;
77

88
use std::fmt::Display;
9+
use std::sync::Arc;
910

11+
use geoarrow::datatypes::Crs;
12+
use geoarrow::datatypes::Metadata;
1013
pub use point::*;
1114
pub use wkb::*;
1215

@@ -30,6 +33,32 @@ impl Display for GeoMetadata {
3033
}
3134
}
3235

36+
/// The GeoArrow [`Metadata`] equivalent of `geo_metadata`.
37+
pub(crate) fn geoarrow_metadata(geo_metadata: &GeoMetadata) -> Arc<Metadata> {
38+
Arc::new(Metadata::new(
39+
geo_metadata
40+
.crs
41+
.as_ref()
42+
.map(|crs| Crs::from_unknown_crs_type(crs.to_string()))
43+
.unwrap_or_default(),
44+
None,
45+
))
46+
}
47+
48+
/// Recover [`GeoMetadata`] from GeoArrow metadata.
49+
pub(crate) fn geo_metadata_from_arrow(metadata: &Metadata) -> GeoMetadata {
50+
let crs = metadata.crs().crs_value().map(|value| {
51+
// `Crs::from_unknown_crs_type` stores the user's string verbatim as a JSON string
52+
// value, so prefer the raw string when available to round-trip cleanly. For other
53+
// CRS encodings (PROJJSON object, etc.), fall back to the JSON-encoded form.
54+
value
55+
.as_str()
56+
.map(str::to_string)
57+
.unwrap_or_else(|| value.to_string())
58+
});
59+
GeoMetadata { crs }
60+
}
61+
3362
#[cfg(test)]
3463
mod tests {
3564
use prost::Message;

0 commit comments

Comments
 (0)