Skip to content

Commit 88c5545

Browse files
authored
feat: GeometryType and ST_GeometryType (#11)
1 parent d1cbcf4 commit 88c5545

7 files changed

Lines changed: 300 additions & 4 deletions

File tree

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e
3131

3232
| Name | Implemented | Description |
3333
| ------------------- | ----------- | ------------------------------------------------------------------------------------------------------- |
34-
| GeometryType | | Returns the type of a geometry as text. |
34+
| GeometryType | | Returns the type of a geometry as text. |
3535
| ST_Boundary | | Returns the boundary of a geometry. |
3636
| ST_BoundingDiagonal | | Returns the diagonal of a geometry's bounding box. |
3737
| ST_CoordDim || Return the coordinate dimension of a geometry. |
@@ -44,7 +44,7 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e
4444
| ST_Envelope || Returns a geometry representing the bounding box of a geometry. |
4545
| ST_ExteriorRing | | Returns a LineString representing the exterior ring of a Polygon. |
4646
| ST_GeometryN | | Return an element of a geometry collection. |
47-
| ST_GeometryType | | Returns the SQL-MM type of a geometry as text. |
47+
| ST_GeometryType | | Returns the SQL-MM type of a geometry as text. |
4848
| ST_InteriorRingN | | Returns the Nth interior ring (hole) of a Polygon. |
4949
| ST_IsClosed | | Tests if a LineStrings's start and end points are coincident. |
5050
| ST_IsCollection | | Tests if a geometry is a geometry collection type. |

python/python/geodatafusion/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,16 @@ def register_all_native(ctx: SessionContext):
6161
# accessors
6262
ctx.register_udf(udf(native.CoordDim()))
6363
ctx.register_udf(udf(native.EndPoint()))
64+
ctx.register_udf(udf(native.GeometryType()))
65+
ctx.register_udf(udf(native.M()))
6466
ctx.register_udf(udf(native.NDims()))
6567
ctx.register_udf(udf(native.NPoints()))
6668
ctx.register_udf(udf(native.NumInteriorRings()))
6769
ctx.register_udf(udf(native.StartPoint()))
70+
ctx.register_udf(udf(native.STGeometryType()))
6871
ctx.register_udf(udf(native.X()))
6972
ctx.register_udf(udf(native.Y()))
7073
ctx.register_udf(udf(native.Z()))
71-
ctx.register_udf(udf(native.M()))
7274

7375
# bounding box
7476
ctx.register_udf(udf(native.Box2D()))

python/python/geodatafusion/native/_accessors.pyi

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,11 @@ class Z:
3737
class M:
3838
def __init__(self) -> None: ...
3939
def __datafusion_scalar_udf__(self) -> object: ...
40+
41+
class GeometryType:
42+
def __init__(self) -> None: ...
43+
def __datafusion_scalar_udf__(self) -> object: ...
44+
45+
class STGeometryType:
46+
def __init__(self) -> None: ...
47+
def __datafusion_scalar_udf__(self) -> object: ...

python/src/udf/native/accessors.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use geodatafusion::udf::native::accessors::{
2-
CoordDim, EndPoint, M, NDims, NPoints, NumInteriorRings, StartPoint, X, Y, Z,
2+
CoordDim, EndPoint, GeometryType, M, NDims, NPoints, NumInteriorRings, ST_GeometryType,
3+
StartPoint, X, Y, Z,
34
};
45

56
use crate::{impl_udf, impl_udf_coord_type_arg};
@@ -14,3 +15,5 @@ impl_udf_coord_type_arg!(EndPoint, PyEndPoint, "EndPoint");
1415
impl_udf_coord_type_arg!(StartPoint, PyStartPoint, "StartPoint");
1516
impl_udf!(NPoints, PyNPoints, "NPoints");
1617
impl_udf!(NumInteriorRings, PyNumInteriorRings, "NumInteriorRings");
18+
impl_udf!(GeometryType, PyGeometryType, "GeometryType");
19+
impl_udf!(ST_GeometryType, PySTGeometryType, "STGeometryType");

python/src/udf/native/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@ pub(crate) fn native(m: &Bound<PyModule>) -> PyResult<()> {
1111
m.add_class::<accessors::PyCoordDim>()?;
1212
m.add_class::<accessors::PyEndPoint>()?;
1313
m.add_class::<accessors::PyM>()?;
14+
m.add_class::<accessors::PyGeometryType>()?;
1415
m.add_class::<accessors::PyNDims>()?;
1516
m.add_class::<accessors::PyNPoints>()?;
1617
m.add_class::<accessors::PyNumInteriorRings>()?;
1718
m.add_class::<accessors::PyStartPoint>()?;
19+
m.add_class::<accessors::PySTGeometryType>()?;
1820
m.add_class::<accessors::PyX>()?;
1921
m.add_class::<accessors::PyY>()?;
2022
m.add_class::<accessors::PyZ>()?;
Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
use std::any::Any;
2+
use std::sync::{Arc, OnceLock};
3+
4+
use arrow_array::builder::StringViewBuilder;
5+
use arrow_array::{ArrayRef, StringViewArray};
6+
use arrow_schema::DataType;
7+
use datafusion::error::Result;
8+
use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER;
9+
use datafusion::logical_expr::{
10+
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
11+
};
12+
use geo_traits::*;
13+
use geoarrow_array::array::from_arrow_array;
14+
use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array};
15+
use geoarrow_schema::error::GeoArrowResult;
16+
17+
use crate::data_types::any_single_geometry_type_input;
18+
use crate::error::GeoDataFusionResult;
19+
20+
#[derive(Debug)]
21+
pub struct GeometryType {
22+
signature: Signature,
23+
}
24+
25+
impl GeometryType {
26+
pub fn new() -> Self {
27+
Self {
28+
signature: any_single_geometry_type_input(),
29+
}
30+
}
31+
}
32+
33+
impl Default for GeometryType {
34+
fn default() -> Self {
35+
Self::new()
36+
}
37+
}
38+
39+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
40+
41+
impl ScalarUDFImpl for GeometryType {
42+
fn as_any(&self) -> &dyn Any {
43+
self
44+
}
45+
46+
fn name(&self) -> &str {
47+
"geometrytype"
48+
}
49+
50+
fn signature(&self) -> &Signature {
51+
&self.signature
52+
}
53+
54+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
55+
Ok(DataType::Utf8View)
56+
}
57+
58+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
59+
Ok(geometry_type_impl(args)?)
60+
}
61+
62+
fn documentation(&self) -> Option<&Documentation> {
63+
Some(DOCUMENTATION.get_or_init(|| {
64+
Documentation::builder(
65+
DOC_SECTION_OTHER,
66+
"Returns the type of the geometry as a string. Eg: 'LINESTRING', 'POLYGON', 'MULTIPOINT', etc.",
67+
"GeometryType(geometry)",
68+
)
69+
.with_argument("g1", "geometry")
70+
.build()
71+
}))
72+
}
73+
}
74+
75+
#[derive(Debug)]
76+
#[allow(non_camel_case_types)]
77+
pub struct ST_GeometryType {
78+
signature: Signature,
79+
}
80+
81+
impl ST_GeometryType {
82+
pub fn new() -> Self {
83+
Self {
84+
signature: any_single_geometry_type_input(),
85+
}
86+
}
87+
}
88+
89+
impl Default for ST_GeometryType {
90+
fn default() -> Self {
91+
Self::new()
92+
}
93+
}
94+
95+
static ST_DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
96+
97+
impl ScalarUDFImpl for ST_GeometryType {
98+
fn as_any(&self) -> &dyn Any {
99+
self
100+
}
101+
102+
fn name(&self) -> &str {
103+
"st_geometrytype"
104+
}
105+
106+
fn signature(&self) -> &Signature {
107+
&self.signature
108+
}
109+
110+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
111+
Ok(DataType::Utf8View)
112+
}
113+
114+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
115+
Ok(geometry_type_impl_st(args)?)
116+
}
117+
118+
fn documentation(&self) -> Option<&Documentation> {
119+
Some(ST_DOCUMENTATION.get_or_init(|| {
120+
Documentation::builder(
121+
DOC_SECTION_OTHER,
122+
"Returns the type of the geometry as a string. Eg: 'LINESTRING', 'POLYGON', 'MULTIPOINT', etc.",
123+
"ST_GeometryType(geometry)",
124+
)
125+
.with_argument("g1", "geometry")
126+
.build()
127+
}))
128+
}
129+
}
130+
131+
fn geometry_type_impl(args: ScalarFunctionArgs) -> GeoDataFusionResult<ColumnarValue> {
132+
let arrays = ColumnarValue::values_to_arrays(&args.args)?;
133+
let geo_array = from_arrow_array(&arrays[0], &args.arg_fields[0])?;
134+
135+
let result = Arc::new(geometry_type_array(&geo_array)?) as ArrayRef;
136+
Ok(ColumnarValue::Array(result))
137+
}
138+
139+
fn geometry_type_array(array: &dyn GeoArrowArray) -> GeoArrowResult<StringViewArray> {
140+
downcast_geoarrow_array!(array, _geometry_type_impl)
141+
}
142+
143+
fn _geometry_type_impl<'a>(
144+
array: &'a impl GeoArrowArrayAccessor<'a>,
145+
) -> GeoArrowResult<StringViewArray> {
146+
let mut builder = StringViewBuilder::with_capacity(array.len());
147+
148+
for item in array.iter() {
149+
if let Some(geom) = item {
150+
let geom = geom?;
151+
// TODO: should be possible to write to the underlying buffer directly instead of
152+
// allocating a String?
153+
let s = format!("{}{}", geometry_type_str(&geom), geometry_suffix_str(&geom));
154+
builder.append_value(s);
155+
} else {
156+
builder.append_null();
157+
}
158+
}
159+
160+
Ok(builder.finish())
161+
}
162+
163+
#[inline]
164+
fn geometry_type_str(geom: &impl GeometryTrait) -> &'static str {
165+
use geo_traits::GeometryType::*;
166+
167+
match geom.as_type() {
168+
Point(_) => "POINT",
169+
LineString(_) => "LINESTRING",
170+
Polygon(_) => "POLYGON",
171+
MultiPoint(_) => "MULTIPOINT",
172+
MultiLineString(_) => "MULTILINESTRING",
173+
MultiPolygon(_) => "MULTIPOLYGON",
174+
GeometryCollection(_) => "GEOMETRYCOLLECTION",
175+
Rect(_) => "POLYGON",
176+
Line(_) => "LINESTRING",
177+
Triangle(_) => "POLYGON",
178+
}
179+
}
180+
181+
fn geometry_type_impl_st(args: ScalarFunctionArgs) -> GeoDataFusionResult<ColumnarValue> {
182+
let arrays = ColumnarValue::values_to_arrays(&args.args)?;
183+
let geo_array = from_arrow_array(&arrays[0], &args.arg_fields[0])?;
184+
185+
let result = Arc::new(geometry_type_array_st(&geo_array)?) as ArrayRef;
186+
Ok(ColumnarValue::Array(result))
187+
}
188+
189+
fn geometry_type_array_st(array: &dyn GeoArrowArray) -> GeoArrowResult<StringViewArray> {
190+
downcast_geoarrow_array!(array, _geometry_type_impl_st)
191+
}
192+
193+
fn _geometry_type_impl_st<'a>(
194+
array: &'a impl GeoArrowArrayAccessor<'a>,
195+
) -> GeoArrowResult<StringViewArray> {
196+
let mut builder = StringViewBuilder::with_capacity(array.len());
197+
198+
for item in array.iter() {
199+
if let Some(geom) = item {
200+
builder.append_value(geometry_type_str_st(&geom?));
201+
} else {
202+
builder.append_null();
203+
}
204+
}
205+
206+
Ok(builder.finish())
207+
}
208+
209+
#[inline]
210+
fn geometry_type_str_st(geom: &impl GeometryTrait) -> &'static str {
211+
use geo_traits::GeometryType::*;
212+
213+
match geom.as_type() {
214+
Point(_) => "ST_Point",
215+
LineString(_) => "ST_LineString",
216+
Polygon(_) => "ST_Polygon",
217+
MultiPoint(_) => "ST_MultiPoint",
218+
MultiLineString(_) => "ST_MultilineString",
219+
MultiPolygon(_) => "ST_MultiPolygon",
220+
GeometryCollection(_) => "ST_GeometryCollection",
221+
Rect(_) => "ST_Polygon",
222+
Line(_) => "ST_LineString",
223+
Triangle(_) => "ST_Polygon",
224+
}
225+
}
226+
227+
#[inline]
228+
fn geometry_suffix_str(geom: &impl GeometryTrait) -> &'static str {
229+
match geom.dim() {
230+
geo_traits::Dimensions::Xy | geo_traits::Dimensions::Unknown(2) => "",
231+
geo_traits::Dimensions::Xyz => "Z",
232+
geo_traits::Dimensions::Xym => "M",
233+
geo_traits::Dimensions::Xyzm | geo_traits::Dimensions::Unknown(4) => "ZM",
234+
geo_traits::Dimensions::Unknown(_) => "",
235+
}
236+
}
237+
238+
#[cfg(test)]
239+
mod test {
240+
use arrow_array::cast::AsArray;
241+
use datafusion::prelude::SessionContext;
242+
243+
use super::*;
244+
use crate::udf::native::io::GeomFromText;
245+
246+
#[tokio::test]
247+
async fn test_geometry_type() {
248+
let ctx = SessionContext::new();
249+
250+
ctx.register_udf(GeometryType::new().into());
251+
ctx.register_udf(GeomFromText::new(Default::default()).into());
252+
253+
let df = ctx
254+
.sql("SELECT GeometryType(ST_GeomFromText('LINESTRING(77.29 29.07,77.42 29.26,77.27 29.31,77.29 29.07)'));")
255+
.await
256+
.unwrap();
257+
let batch = df.collect().await.unwrap().into_iter().next().unwrap();
258+
let col = batch.column(0);
259+
let val = col.as_string_view().value(0);
260+
assert_eq!(val, "LINESTRING");
261+
}
262+
263+
#[tokio::test]
264+
async fn test_st_geometry_type() {
265+
let ctx = SessionContext::new();
266+
267+
ctx.register_udf(ST_GeometryType::new().into());
268+
ctx.register_udf(GeomFromText::new(Default::default()).into());
269+
270+
let df = ctx
271+
.sql("SELECT ST_GeometryType(ST_GeomFromText('LINESTRING(77.29 29.07,77.42 29.26,77.27 29.31,77.29 29.07)'));")
272+
.await
273+
.unwrap();
274+
let batch = df.collect().await.unwrap().into_iter().next().unwrap();
275+
let col = batch.column(0);
276+
let val = col.as_string_view().value(0);
277+
assert_eq!(val, "ST_LineString");
278+
}
279+
}

rust/geodatafusion/src/udf/native/accessors/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
mod coord_dim;
2+
mod geometry_type;
23
mod line_string;
34
mod npoints;
45
mod num_interior_rings;
56
mod point;
67

78
pub use coord_dim::{CoordDim, NDims};
9+
pub use geometry_type::{GeometryType, ST_GeometryType};
810
pub use line_string::{EndPoint, StartPoint};
911
pub use npoints::NPoints;
1012
pub use num_interior_rings::NumInteriorRings;

0 commit comments

Comments
 (0)