@@ -26,15 +26,17 @@ use arrow_schema::{DataType, Field, FieldRef};
2626use datafusion_common:: {
2727 cast:: as_binary_array,
2828 error:: { DataFusionError , Result } ,
29- ScalarValue ,
29+ exec_datafusion_err , ScalarValue ,
3030} ;
3131use datafusion_expr:: Volatility ;
3232use datafusion_expr:: { Accumulator , ColumnarValue } ;
3333use sedona_common:: { sedona_internal_datafusion_err, sedona_internal_err} ;
3434use sedona_expr:: aggregate_udf:: { SedonaAccumulatorRef , SedonaAggregateUDF } ;
3535use sedona_expr:: item_crs:: ItemCrsSedonaAccumulator ;
3636use sedona_expr:: { aggregate_udf:: SedonaAccumulator , statistics:: GeoStatistics } ;
37- use sedona_geometry:: analyze:: GeometrySummary ;
37+ use sedona_geometry:: analyze:: { analyze_wkb, GeometrySummary } ;
38+ use sedona_geometry:: bounding_box:: BoundingBox ;
39+ use sedona_geometry:: bounds:: geo_traits_bounds_xy;
3840use sedona_geometry:: interval:: IntervalTrait ;
3941use sedona_geometry:: types:: { GeometryTypeAndDimensions , GeometryTypeAndDimensionsSet } ;
4042use sedona_schema:: { datatypes:: SedonaType , matchers:: ArgMatcher } ;
@@ -77,12 +79,9 @@ impl SedonaAccumulator for STAnalyzeAgg {
7779 fn accumulator (
7880 & self ,
7981 args : & [ SedonaType ] ,
80- output_type : & SedonaType ,
82+ _output_type : & SedonaType ,
8183 ) -> Result < Box < dyn Accumulator > > {
82- Ok ( Box :: new ( AnalyzeAccumulator :: new (
83- args[ 0 ] . clone ( ) ,
84- output_type. clone ( ) ,
85- ) ) )
84+ Ok ( Box :: new ( AnalyzeAccumulator :: new ( args[ 0 ] . clone ( ) ) ) )
8685 }
8786
8887 fn state_fields ( & self , _args : & [ SedonaType ] ) -> Result < Vec < FieldRef > > {
@@ -223,38 +222,42 @@ impl STAnalyzeAgg {
223222#[ derive( Debug ) ]
224223pub struct AnalyzeAccumulator {
225224 input_type : SedonaType ,
226- _output_type : SedonaType ,
227225 stats : GeoStatistics ,
228226}
229227
230228impl AnalyzeAccumulator {
231- pub fn new ( input_type : SedonaType , output_type : SedonaType ) -> Self {
229+ pub fn new ( input_type : SedonaType ) -> Self {
232230 Self {
233231 input_type,
234- _output_type : output_type,
235232 stats : GeoStatistics :: empty ( ) ,
236233 }
237234 }
238235
239- pub fn update_statistics ( & mut self , geom : & Wkb ) -> Result < ( ) > {
240- // Get geometry analysis information
241- let summary = sedona_geometry:: analyze:: analyze_geometry ( geom)
242- . map_err ( |e| DataFusionError :: External ( Box :: new ( e) ) ) ?;
236+ pub fn update_statistics_with_bbox ( & mut self , geom : & Wkb , bbox : & BoundingBox ) -> Result < ( ) > {
237+ let summary = analyze_wkb ( geom) . map_err ( |e| DataFusionError :: External ( Box :: new ( e) ) ) ?;
243238
244- self . ingest_geometry_summary ( & summary) ;
239+ self . ingest_geometry_summary ( & summary, bbox) ;
240+ Ok ( ( ) )
241+ }
245242
243+ fn update_statistics ( & mut self , geom : & Wkb ) -> Result < ( ) > {
244+ let bbox =
245+ geo_traits_bounds_xy ( geom) . map_err ( |e| exec_datafusion_err ! ( "Bounding error: {e}" ) ) ?;
246+ let summary = analyze_wkb ( geom) . map_err ( |e| exec_datafusion_err ! ( "Analysis error: {e}" ) ) ?;
247+
248+ self . ingest_geometry_summary ( & summary, & bbox) ;
246249 Ok ( ( ) )
247250 }
248251
249- pub fn ingest_geometry_summary ( & mut self , summary : & GeometrySummary ) {
252+ fn ingest_geometry_summary ( & mut self , summary : & GeometrySummary , bbox : & BoundingBox ) {
250253 // Start with a clone of the current stats
251254 let mut stats = self . stats . clone ( ) ;
252255
253256 // Update each component of the statistics
254257 stats = self . update_basic_counts ( stats, summary. size_bytes ) ;
255258 stats = self . update_geometry_type_counts ( stats, summary) ;
256259 stats = self . update_point_count ( stats, summary. point_count ) ;
257- stats = self . update_envelope_info ( stats, summary ) ;
260+ stats = self . update_envelope_info ( stats, bbox ) ;
258261 stats = self . update_geometry_types ( stats, summary. geometry_type ) ;
259262
260263 // Assign the updated stats back to self.stats
@@ -300,14 +303,7 @@ impl AnalyzeAccumulator {
300303 }
301304
302305 // Update envelope dimensions and bounding box
303- fn update_envelope_info (
304- & self ,
305- stats : GeoStatistics ,
306- analysis : & GeometrySummary ,
307- ) -> GeoStatistics {
308- // The bbox is directly available on analysis, not wrapped in an Option
309- let bbox = & analysis. bbox ;
310-
306+ fn update_envelope_info ( & self , stats : GeoStatistics , bbox : & BoundingBox ) -> GeoStatistics {
311307 // Calculate envelope width and height from the bbox
312308 let envelope_width = if bbox. x ( ) . is_empty ( ) {
313309 0.0
0 commit comments