55import rasters as rt
66from dateutil import parser
77from pandas import DataFrame
8+ from pytictoc import TicToc
89
910# Import functions for calculating solar time
1011from solar_apparent_time import calculate_solar_day_of_year , calculate_solar_hour_of_day
1112from geopandas import GeoSeries
1213from shapely .geometry import Point as ShapelyPoint
1314
15+ from rasters import MultiPoint
16+
1417from GEOS5FP import GEOS5FP
1518
1619from .constants import *
@@ -43,6 +46,8 @@ def process_BESS_table(
4346 if verbose is None :
4447 verbose = not _is_notebook ()
4548
49+ timer = TicToc ()
50+
4651 ST_C = np .array (input_df .ST_C ).astype (np .float64 )
4752 NDVI = np .array (input_df .NDVI ).astype (np .float64 )
4853
@@ -216,6 +221,7 @@ def parse_geom(s):
216221 input_df = ensure_geometry (input_df )
217222
218223 logger .info ("started extracting geometry from BESS input table" )
224+ timer .tic ()
219225
220226 if "geometry" in input_df :
221227 # Convert Point objects to a list of Points
@@ -230,44 +236,43 @@ def parse_geom(s):
230236 else :
231237 raise KeyError ("Input DataFrame must contain either 'geometry' or both 'lat' and 'lon' columns." )
232238
233- logger .info ("completed extracting geometry from BESS input table" )
239+ elapsed = timer .tocvalue ()
240+ logger .info (f"completed extracting geometry from BESS input table ({ elapsed :.2f} seconds)" )
234241
235242 logger .info ("started extracting time from BESS input table" )
236- time_UTC_list = pd .to_datetime (input_df .time_UTC ).tolist ()
243+ timer .tic ()
244+ time_UTC_list = pd .to_datetime (input_df .time_UTC , format = 'ISO8601' ).tolist ()
245+ elapsed = timer .tocvalue ()
246+ logger .info (f"completed extracting time from BESS input table ({ elapsed :.2f} seconds)" )
247+
248+ logger .info ("started calculating day of year and hour of day" )
249+ timer .tic ()
237250
238- # Calculate day_of_year and hour_of_day for each point
239- day_of_year_list = []
240- hour_of_day_list = []
251+ # Create GeoSeries once for all geometry
252+ geoseries_all = GeoSeries ([ShapelyPoint (geom .x , geom .y ) for geom in geometry ])
241253
242- for i , (time_utc , geom ) in enumerate (zip (time_UTC_list , geometry )):
243- # Create a GeoSeries with a Shapely Point (lon, lat order)
244- shapely_point = ShapelyPoint (geom .x , geom .y )
245- geoseries = GeoSeries ([shapely_point ])
246- doy = calculate_solar_day_of_year (time_UTC = time_utc , geometry = geoseries )
247- hod = calculate_solar_hour_of_day (time_UTC = time_utc , geometry = geoseries )
248- # Extract scalar values if returned as arrays
249- doy_scalar = doy [0 ] if hasattr (doy , '__getitem__' ) else doy
250- hod_scalar = hod [0 ] if hasattr (hod , '__getitem__' ) else hod
251- day_of_year_list .append (doy_scalar )
252- hour_of_day_list .append (hod_scalar )
254+ # Call functions once with full arrays - they should handle broadcasting
255+ day_of_year = np .asarray (calculate_solar_day_of_year (time_UTC = time_UTC_list , geometry = geoseries_all ))
256+ hour_of_day = np .asarray (calculate_solar_hour_of_day (time_UTC = time_UTC_list , geometry = geoseries_all ))
253257
254- # Convert to numpy arrays (1D)
255- day_of_year = np .array (day_of_year_list )
256- hour_of_day = np .array (hour_of_day_list )
258+ elapsed = timer .tocvalue ()
259+ logger .info (f"completed calculating day of year and hour of day ({ elapsed :.2f} seconds)" )
257260
258261 # Convert list of rasters.Point to MultiPoint for compatibility with FLiESANN and other functions
259- from rasters import MultiPoint
262+
263+ logger .info ("started extracting geometry" )
264+ timer .tic ()
265+
260266 # Extract (x, y) tuples from rasters.Point objects
261267 point_tuples = [(pt .x , pt .y ) for pt in geometry ]
262268 geometry_multipoint = MultiPoint (point_tuples )
269+ time_UTC = time_UTC_list
263270
264- # Check if all times are the same
265- if len (set (time_UTC_list )) == 1 :
266- # All timestamps are identical, use single datetime
267- time_UTC = time_UTC_list [0 ]
268- else :
269- # Different timestamps per point, keep as list
270- time_UTC = time_UTC_list
271+ elapsed = timer .tocvalue ()
272+ logger .info (f"completed extracting geometry ({ elapsed :.2f} seconds)" )
273+
274+ logger .info ("started retrieving BESS inputs" )
275+ timer .tic ()
271276
272277 BESS_GEOS5FP_inputs = retrieve_BESS_JPL_GEOS5FP_inputs (
273278 time_UTC = time_UTC ,
@@ -288,6 +293,9 @@ def parse_geom(s):
288293 offline_mode = offline_mode
289294 )
290295
296+ elapsed = timer .tocvalue ()
297+ logger .info (f"finished retrieving BESS inputs ({ elapsed :.2f} seconds)" )
298+
291299 albedo = BESS_GEOS5FP_inputs ['albedo' ]
292300 Ta_C = BESS_GEOS5FP_inputs ['Ta_C' ]
293301 RH = BESS_GEOS5FP_inputs ['RH' ]
@@ -299,8 +307,6 @@ def parse_geom(s):
299307 NIR_albedo = BESS_GEOS5FP_inputs ['NIR_albedo' ]
300308 Ca = BESS_GEOS5FP_inputs ['Ca' ]
301309 wind_speed_mps = BESS_GEOS5FP_inputs ['wind_speed_mps' ]
302-
303- logger .info ("completed extracting time from BESS input table" )
304310
305311 results = BESS_JPL (
306312 geometry = geometry_multipoint ,
@@ -343,23 +349,18 @@ def parse_geom(s):
343349
344350 output_df = input_df .copy ()
345351
346- # Collect new columns to avoid DataFrame fragmentation
347- new_columns = {}
352+ # Update or add columns from results, overwriting existing columns to avoid duplicates
348353 for key , value in results .items ():
349354 # Skip non-array-like objects (e.g., MultiPoint geometry)
350355 if hasattr (value , '__len__' ) and not isinstance (value , (str , MultiPoint )):
351356 try :
352- new_columns [key ] = value
357+ output_df [key ] = value # Direct assignment overwrites existing columns
353358 except (ValueError , TypeError ):
354359 # Skip values that can't be assigned to DataFrame
355360 logger .warning (f"Skipping assignment of key '{ key } ' to output DataFrame" )
356361 continue
357362 elif isinstance (value , (int , float , np .number )):
358363 # Handle scalar values
359- new_columns [key ] = value
360-
361- # Add all new columns at once using concat to avoid fragmentation
362- if new_columns :
363- output_df = pd .concat ([output_df , pd .DataFrame (new_columns , index = output_df .index )], axis = 1 )
364+ output_df [key ] = value
364365
365366 return output_df
0 commit comments