Skip to content

Commit 76a8a16

Browse files
authored
Merge branch 'apache:main' into main
2 parents bc9a296 + ba06a25 commit 76a8a16

65 files changed

Lines changed: 4450 additions & 927 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 256 additions & 181 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ members = [
3838
"rust/sedona-pointcloud",
3939
"rust/sedona-raster",
4040
"rust/sedona-raster-functions",
41+
"rust/sedona-raster-gdal",
4142
"rust/sedona-schema",
4243
"rust/sedona-spatial-join",
4344
"rust/sedona-spatial-join-geography",
@@ -67,35 +68,34 @@ categories = ["science::geo", "database"]
6768
adbc_core = ">=0.22.0"
6869
adbc_ffi = ">=0.22.0"
6970
approx = "0.5"
70-
arrow = { version = "57.0.0", features = ["prettyprint", "ffi", "chrono-tz"] }
71-
arrow-array = { version = "57.0.0" }
72-
arrow-buffer = { version = "57.0.0" }
73-
arrow-cast = { version = "57.0.0" }
74-
arrow-data = { version = "57.0.0" }
75-
arrow-ipc = { version = "57.0.0" }
76-
arrow-json = { version = "57.0.0" }
77-
arrow-schema = { version = "57.0.0" }
71+
arrow = { version = "57.1.0", features = ["prettyprint", "ffi", "chrono-tz"] }
72+
arrow-array = { version = "57.1.0" }
73+
arrow-buffer = { version = "57.1.0" }
74+
arrow-cast = { version = "57.1.0" }
75+
arrow-data = { version = "57.1.0" }
76+
arrow-ipc = { version = "57.1.0" }
77+
arrow-json = { version = "57.1.0" }
78+
arrow-schema = { version = "57.1.0" }
7879
async-trait = { version = "0.1.87" }
7980
bytemuck = "1.25"
8081
byteorder = "1"
8182
bytes = "1.11"
8283
chrono = { version = "0.4.41", default-features = false }
8384
comfy-table = { version = "7.2" }
8485
criterion = { version = "0.8", features = ["html_reports"] }
85-
datafusion = { version = "51.0.0", default-features = false }
86-
datafusion-catalog = { version = "51.0.0" }
87-
datafusion-common = { version = "51.0.0", default-features = false }
88-
datafusion-common-runtime = { version = "51.0.0", default-features = false }
89-
datafusion-datasource = { version = "51.0.0", default-features = false }
90-
datafusion-datasource-parquet = { version = "51.0.0" }
91-
datafusion-execution = { version = "51.0.0", default-features = false }
92-
datafusion-expr = { version = "51.0.0" }
93-
datafusion-ffi = { version = "51.0.0" }
94-
datafusion-functions-nested = { version = "51.0.0" }
95-
datafusion-optimizer = { version = "51.0.0" }
96-
datafusion-physical-expr = { version = "51.0.0" }
97-
datafusion-physical-plan = { version = "51.0.0" }
98-
datafusion-pruning = { version = "51.0.0" }
86+
datafusion = { version = "52.5.0", default-features = false }
87+
datafusion-catalog = { version = "52.5.0" }
88+
datafusion-common = { version = "52.5.0", default-features = false }
89+
datafusion-common-runtime = { version = "52.5.0", default-features = false }
90+
datafusion-datasource = { version = "52.5.0", default-features = false }
91+
datafusion-datasource-parquet = { version = "52.5.0" }
92+
datafusion-execution = { version = "52.5.0", default-features = false }
93+
datafusion-expr = { version = "52.5.0" }
94+
datafusion-ffi = { version = "52.5.0" }
95+
datafusion-optimizer = { version = "52.5.0" }
96+
datafusion-physical-expr = { version = "52.5.0" }
97+
datafusion-physical-plan = { version = "52.5.0" }
98+
datafusion-pruning = { version = "52.5.0" }
9999
dirs = "6.0.0"
100100
env_logger = "0.11"
101101
fastrand = "2.4"
@@ -111,13 +111,13 @@ glam = "0.32.0"
111111
libmimalloc-sys = { version = "0.1", default-features = false }
112112
log = "^0.4"
113113
libloading = "0.9"
114-
lru = "0.17"
114+
lru = "0.18"
115115
mimalloc = { version = "0.1", default-features = false }
116116
num-traits = { version = "0.2", default-features = false, features = ["libm"] }
117117
object_store = { version = "0.12.4", default-features = false }
118118
once_cell = "1.20"
119119
parking_lot = "0.12"
120-
parquet = { version = "57.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
120+
parquet = { version = "57.1.0", default-features = false, features = ["arrow", "async", "object_store"] }
121121
pin-project-lite = "0.2"
122122
rand = "0.10"
123123
regex = "1.12"
@@ -148,6 +148,7 @@ sedona-geoparquet = { version = "0.4.0", path = "rust/sedona-geoparquet" }
148148
sedona-pointcloud = { version = "0.4.0", path = "rust/sedona-pointcloud" }
149149
sedona-raster = { version = "0.4.0", path = "rust/sedona-raster" }
150150
sedona-raster-functions = { version = "0.4.0", path = "rust/sedona-raster-functions" }
151+
sedona-raster-gdal = { version = "0.4.0", path = "rust/sedona-raster-gdal" }
151152
sedona-schema = { version = "0.4.0", path = "rust/sedona-schema" }
152153
sedona-spatial-join = { version = "0.4.0", path = "rust/sedona-spatial-join" }
153154
sedona-spatial-join-gpu = { version = "0.4.0", path = "rust/sedona-spatial-join-gpu" }

c/sedona-geos/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ mod st_line_merge;
3838
mod st_makevalid;
3939
mod st_minimumclearance;
4040
mod st_minimumclearance_line;
41+
mod st_normalize;
4142
mod st_nrings;
4243
mod st_numinteriorrings;
4344
mod st_numpoints;

c/sedona-geos/src/register.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, Vec<ScalarKernelRef>)> {
6767
"st_makevalid" => crate::st_makevalid::st_make_valid_impl,
6868
"st_minimumclearance" => crate::st_minimumclearance::st_minimum_clearance_impl,
6969
"st_minimumclearanceline" => crate::st_minimumclearance_line::st_minimum_clearance_line_impl,
70+
"st_normalize" => crate::st_normalize::st_normalize_impl,
7071
"st_nrings" => crate::st_nrings::st_nrings_impl,
7172
"st_numinteriorrings" => crate::st_numinteriorrings::st_num_interior_rings_impl,
7273
"st_numpoints" => crate::st_numpoints::st_num_points_impl,

c/sedona-geos/src/st_normalize.rs

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
use std::sync::Arc;
18+
19+
use arrow_array::builder::BinaryBuilder;
20+
use datafusion_common::{error::Result, DataFusionError};
21+
use datafusion_expr::ColumnarValue;
22+
use sedona_expr::{
23+
item_crs::ItemCrsKernel,
24+
scalar_udf::{ScalarKernelRef, SedonaScalarKernel},
25+
};
26+
use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
27+
use sedona_schema::{
28+
datatypes::{SedonaType, WKB_GEOMETRY},
29+
matchers::ArgMatcher,
30+
};
31+
32+
use crate::executor::GeosExecutor;
33+
use crate::geos_to_wkb::write_geos_geometry;
34+
35+
/// ST_Normalize() implementation using the geos crate
36+
pub fn st_normalize_impl() -> Vec<ScalarKernelRef> {
37+
ItemCrsKernel::wrap_impl(STNormalize {})
38+
}
39+
40+
#[derive(Debug)]
41+
struct STNormalize {}
42+
43+
impl SedonaScalarKernel for STNormalize {
44+
fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
45+
let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY);
46+
47+
matcher.match_args(args)
48+
}
49+
50+
fn invoke_batch(
51+
&self,
52+
arg_types: &[SedonaType],
53+
args: &[ColumnarValue],
54+
) -> Result<ColumnarValue> {
55+
let executor = GeosExecutor::new(arg_types, args);
56+
let mut builder = BinaryBuilder::with_capacity(
57+
executor.num_iterations(),
58+
WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
59+
);
60+
executor.execute_wkb_void(|maybe_wkb| {
61+
match maybe_wkb {
62+
Some(wkb) => {
63+
invoke_scalar(&wkb, &mut builder)?;
64+
builder.append_value([]);
65+
}
66+
_ => builder.append_null(),
67+
}
68+
69+
Ok(())
70+
})?;
71+
72+
executor.finish(Arc::new(builder.finish()))
73+
}
74+
}
75+
76+
fn invoke_scalar(geos_geom: &geos::Geometry, writer: &mut impl std::io::Write) -> Result<()> {
77+
let mut geometry = Clone::clone(geos_geom);
78+
geometry
79+
.normalize()
80+
.map_err(|e| DataFusionError::Execution(format!("Failed to normalize geometry: {e}")))?;
81+
82+
write_geos_geometry(&geometry, writer)?;
83+
Ok(())
84+
}
85+
86+
#[cfg(test)]
87+
mod tests {
88+
use datafusion_common::ScalarValue;
89+
use geos::{Geom, Geometry};
90+
use rstest::rstest;
91+
use sedona_expr::scalar_udf::SedonaScalarUDF;
92+
use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_GEOMETRY_ITEM_CRS, WKB_VIEW_GEOMETRY};
93+
use sedona_testing::testers::ScalarUdfTester;
94+
95+
use super::*;
96+
97+
#[rstest]
98+
fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) {
99+
let udf = SedonaScalarUDF::from_impl("st_normalize", st_normalize_impl());
100+
let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type.clone()]);
101+
102+
tester.assert_return_type(WKB_GEOMETRY);
103+
104+
let input_wkt = "POLYGON((1 1, 1 0, 0 0, 0 1, 1 1))";
105+
let expected_wkt = "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))";
106+
107+
let result = tester.invoke_scalar(input_wkt).unwrap();
108+
tester.assert_scalar_result_equals(result, expected_wkt);
109+
110+
let result = tester
111+
.invoke_scalar("MULTILINESTRING ((2 2, 1 1), (4 4, 3 3))")
112+
.unwrap();
113+
tester.assert_scalar_result_equals(result, "MULTILINESTRING ((3 3, 4 4), (1 1, 2 2))");
114+
115+
let result = tester.invoke_scalar(ScalarValue::Null).unwrap();
116+
assert!(result.is_null());
117+
118+
let batch_input = vec![
119+
Some("POINT(2 1)"),
120+
None,
121+
Some("POLYGON((1 1, 1 0, 0 0, 0 1, 1 1))"),
122+
Some("MULTILINESTRING ((2 2, 1 1), (4 4, 3 3))"),
123+
];
124+
125+
let batch_result = tester.invoke_wkb_array(batch_input).unwrap();
126+
assert_eq!(batch_result.len(), 4);
127+
128+
assert!(batch_result.is_null(1));
129+
130+
let expected = sedona_testing::create::create_array(
131+
&[
132+
Some("POINT (2 1)"),
133+
None,
134+
Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"),
135+
Some("MULTILINESTRING ((3 3, 4 4), (1 1, 2 2))"),
136+
],
137+
&WKB_GEOMETRY,
138+
);
139+
sedona_testing::compare::assert_array_equal(&batch_result, &expected);
140+
}
141+
142+
#[rstest]
143+
fn udf_invoke_item_crs(#[values(WKB_GEOMETRY_ITEM_CRS.clone())] sedona_type: SedonaType) {
144+
let udf = SedonaScalarUDF::from_impl("st_normalize", st_normalize_impl());
145+
let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type.clone()]);
146+
147+
tester.assert_return_type(sedona_type);
148+
149+
let result = tester
150+
.invoke_scalar("POLYGON((1 1, 1 0, 0 0, 0 1, 1 1))")
151+
.unwrap();
152+
tester.assert_scalar_result_equals(result, "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))");
153+
}
154+
155+
#[rstest]
156+
fn udf_already_normalized(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) {
157+
let udf = SedonaScalarUDF::from_impl("st_normalize", st_normalize_impl());
158+
let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
159+
let already_normal = "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))";
160+
let result = tester.invoke_scalar(already_normal).unwrap();
161+
tester.assert_scalar_result_equals(result, already_normal);
162+
}
163+
164+
#[test]
165+
fn invoke_scalar_normalizes_via_geos_without_mutating_input() {
166+
let geom = Geometry::new_from_wkt("POLYGON((1 1, 1 0, 0 0, 0 1, 1 1))").unwrap();
167+
let original_wkt = geom.to_wkt().unwrap();
168+
169+
let mut normalized_wkb = Vec::new();
170+
invoke_scalar(&geom, &mut normalized_wkb).unwrap();
171+
172+
let normalized = Geometry::new_from_wkb(&normalized_wkb).unwrap();
173+
assert_eq!(
174+
normalized.to_wkt().unwrap(),
175+
"POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"
176+
);
177+
178+
assert_eq!(geom.to_wkt().unwrap(), original_wkt);
179+
}
180+
}

c/sedona-libgpuspatial/src/lib.rs

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
// under the License.
1717

1818
use arrow_schema::DataType;
19-
use geo_types::Rect;
2019

2120
mod error;
2221
#[cfg(gpu_available)]
@@ -113,9 +112,10 @@ mod sys {
113112
/// Inserts a batch of bounding boxes into the index.
114113
/// Each rectangle is represented as a `Rect<f32>` with minimum and maximum x and y coordinates.
115114
/// This method accumulates these rectangles until `finish_building` is called to finalize the index.
116-
/// The method can be called multiple times to insert data in batches before finalizing.
117-
pub fn push_build(&mut self, rects: &[Rect<f32>]) -> Result<()> {
118-
// Re-interpreting Rect<f32> as flat f32 array (xmin, ymin, xmax, ymax)
115+
/// The method can be called multiple times to insert data in batches before finalizing. The values
116+
/// in rects are ordered (xmin, ymin, xmax, ymax).
117+
pub fn push_build(&mut self, rects: &[[f32; 4]]) -> Result<()> {
118+
// Re-interpreting rects as a flat f32 array (xmin, ymin, xmax, ymax)
119119
let raw_ptr = rects.as_ptr() as *const f32;
120120
self.inner.push_build(raw_ptr, rects.len() as u32)
121121
}
@@ -126,7 +126,8 @@ mod sys {
126126
}
127127

128128
/// Probes the spatial index with a batch of rectangles and returns pairs of matching indices from the build and probe sets.
129-
pub fn probe(&self, rects: &[Rect<f32>]) -> Result<(Vec<u32>, Vec<u32>)> {
129+
/// The values in rects are ordered (xmin, ymin, xmax, ymax).
130+
pub fn probe(&self, rects: &[[f32; 4]]) -> Result<(Vec<u32>, Vec<u32>)> {
130131
let raw_ptr = rects.as_ptr() as *const f32;
131132
self.inner.probe(raw_ptr, rects.len() as u32)
132133
}
@@ -203,13 +204,13 @@ mod sys {
203204
Err(GpuSpatialError::GpuNotAvailable)
204205
}
205206
pub fn clear(&mut self) {}
206-
pub fn push_build(&mut self, _r: &[Rect<f32>]) -> Result<()> {
207+
pub fn push_build(&mut self, _r: &[[f32; 4]]) -> Result<()> {
207208
Err(GpuSpatialError::GpuNotAvailable)
208209
}
209210
pub fn finish_building(&mut self) -> Result<GpuSpatialIndex> {
210211
Err(GpuSpatialError::GpuNotAvailable)
211212
}
212-
pub fn probe(&self, _r: &[Rect<f32>]) -> Result<(Vec<u32>, Vec<u32>)> {
213+
pub fn probe(&self, _r: &[[f32; 4]]) -> Result<(Vec<u32>, Vec<u32>)> {
213214
Err(GpuSpatialError::GpuNotAvailable)
214215
}
215216
}
@@ -268,13 +269,16 @@ mod tests {
268269
Some("POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"),
269270
Some("POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))"),
270271
];
271-
let rects: Vec<Rect<f32>> = polygon_values
272+
let rects: Vec<_> = polygon_values
272273
.iter()
273274
.map(|w| {
274-
Polygon::try_from_wkt_str(w.unwrap())
275+
let rect = Polygon::try_from_wkt_str(w.unwrap())
275276
.unwrap()
276277
.bounding_rect()
277-
.unwrap()
278+
.unwrap();
279+
let min = rect.min();
280+
let max = rect.max();
281+
(min.x, min.y, max.x, max.y)
278282
})
279283
.collect();
280284

@@ -286,9 +290,14 @@ mod tests {
286290

287291
// 4. Probe (Index is immutable and safe)
288292
let point_values = &[Some("POINT (30 20)")];
289-
let points: Vec<Rect<f32>> = point_values
293+
let points: Vec<_> = point_values
290294
.iter()
291-
.map(|w| Point::try_from_wkt_str(w.unwrap()).unwrap().bounding_rect())
295+
.map(|w| {
296+
let rect = Point::try_from_wkt_str(w.unwrap()).unwrap().bounding_rect();
297+
let min = rect.min();
298+
let max = rect.max();
299+
(min.x, min.y, max.x, max.y)
300+
})
292301
.collect();
293302

294303
let (build_idx, probe_idx) = index.probe(&points).unwrap();

0 commit comments

Comments
 (0)