@@ -11,7 +11,6 @@ import os
1111
1212# Third-party package imports
1313import arcpy
14- import pandas as pd
1514import pyarrow .parquet as pq
1615import pyarrow .dataset as ds
1716
@@ -41,7 +40,101 @@ class Toolbox:
4140 def __init__ (self ):
4241 self .label = "H3 Origin Destination Matrix"
4342 self .alias = "h3_od_matrix"
44- self .tools = [AddDestinationDistance , GetH3Indices ]
43+ self .tools = [AddDestinationDistance , GetH3Indices , CreateOriginH3FeatureClass ]
44+
45+
46+ class CreateOriginH3FeatureClass :
47+ def __init__ (self ):
48+ self .label = "Create Origin H3 Feature Class"
49+ self .description = "Create a feature class of origin H3 indices from an OD matrix parquet dataset."
50+ self .category = "Utilities"
51+ logger_name = f"h3_od.Toolbox.{ self .__class__ .__name__ } "
52+ self .logger = get_logger (logger_name , level = "INFO" , add_arcpy_handler = True )
53+
54+ def getParameterInfo (self ):
55+ od_matrix = arcpy .Parameter (
56+ displayName = "H3 OD Matrix Parquet Dataset" ,
57+ name = "od_matrix" ,
58+ datatype = "DEFolder" ,
59+ parameterType = "Required" ,
60+ direction = "Input" ,
61+ )
62+ output_fc = arcpy .Parameter (
63+ displayName = "Output Feature Class (Polygons)" ,
64+ name = "output_fc" ,
65+ datatype = "DEFeatureClass" ,
66+ parameterType = "Required" ,
67+ direction = "Output" ,
68+ )
69+ return [od_matrix , output_fc ]
70+
71+ def execute (self , parameters , messages ):
72+ od_matrix_folder = parameters [0 ].valueAsText
73+ output_fc = parameters [1 ].valueAsText
74+
75+ # Only validate for .part files in the directory
76+ if not os .path .isdir (od_matrix_folder ):
77+ self .logger .error ("OD matrix must be a directory containing .part files." )
78+ return
79+
80+ self .logger .info (f"Reading OD matrix from: { od_matrix_folder } " )
81+
82+ part_files = [
83+ f
84+ for f in os .listdir (od_matrix_folder )
85+ if f .lower ().endswith (".part" )
86+ and os .path .isfile (os .path .join (od_matrix_folder , f ))
87+ ]
88+
89+ if not part_files :
90+ self .logger .error (
91+ "OD matrix directory must contain at least one .part file."
92+ )
93+ return
94+
95+ schema_path = os .path .join (od_matrix_folder , part_files [0 ])
96+
97+ # Efficiently read only the origin_id column
98+ od_dataset = ds .dataset (schema_path , format = "parquet" )
99+ origin_id_col = od_dataset .to_table (columns = ["origin_id" ]).column ("origin_id" )
100+ unique_origin_ids = origin_id_col .unique ().to_pylist ()
101+
102+ if not unique_origin_ids :
103+ self .logger .error ("No origin H3 indices found in OD matrix." )
104+ return
105+
106+ self .logger .info (f"Found { len (unique_origin_ids )} unique origin H3 indices." )
107+
108+ # Get resolution from first origin_id
109+ resolution = h3_arcpy .get_h3_resolution (unique_origin_ids [0 ])
110+ self .logger .info (f"Detected H3 resolution: { resolution } " )
111+
112+ # Create output feature class (always WGS84)
113+ sr = arcpy .SpatialReference (4326 )
114+ arcpy .management .CreateFeatureclass (
115+ out_path = os .path .dirname (output_fc ),
116+ out_name = os .path .basename (output_fc ),
117+ geometry_type = "POLYGON" ,
118+ spatial_reference = sr ,
119+ )
120+ arcpy .management .AddField (output_fc , "h3_index" , "TEXT" )
121+
122+ # Insert each geometry on the fly
123+ with arcpy .da .InsertCursor (output_fc , ["SHAPE@" , "h3_index" ]) as cursor :
124+
125+ for h3_index in unique_origin_ids :
126+
127+ try :
128+ poly = h3_arcpy .get_arcpy_polygon_for_h3_index (h3_index )
129+ cursor .insertRow ([poly , h3_index ])
130+
131+ except Exception as e :
132+ self .logger .warning (
133+ f"Failed to create/insert polygon for H3 index { h3_index } : { e } "
134+ )
135+
136+ self .logger .info (f"Created origin H3 polygons feature class: { output_fc } " )
137+ return
45138
46139
47140class AddDestinationDistance :
@@ -204,61 +297,66 @@ class AddDestinationDistance:
204297 resolution = h3_arcpy .get_h3_resolution (first_origin )
205298 self .logger .info (f"Detected H3 resolution from OD matrix: { resolution } " )
206299
207- # Read input features to DataFrame
208- arr = arcpy .da .FeatureClassToNumPyArray (
209- input_features , ["SHAPE@" ], skip_nulls = True
210- )
211-
212- df_features = pd .DataFrame (arr )
213- df_features [distance_field ] = None
214-
300+ # Prepare the list of fields to update in the cursor
301+ update_fields = ["SHAPE@" , distance_field ]
215302 if time_field :
216- df_features [time_field ] = None
217-
218- # For each feature, get H3 index and lookup OD matrix
219- for idx , row in df_features .iterrows ():
220-
221- geom = row ["SHAPE@" ]
222- h3_origin = h3_arcpy .get_h3_index_for_esri_geometry (geom , resolution )
223-
224- try :
225- od_result = od_df [od_df ["origin_id" ] == h3_origin ]
226-
227- if h3_destination :
228- od_result = od_result [od_result ["destination_id" ] == h3_destination ]
229- if not od_result .empty :
230-
231- # If multiple records, sort by time if present, else by distance
232- if len (od_result ) > 1 :
233-
234- if "time" in od_result .columns :
235- od_result = od_result .sort_values ("time" )
236- self .logger .info (
237- f"Multiple records found for origin { h3_origin } . Sorted by 'time'. Using nearest."
238- )
239-
240- else :
241- od_result = od_result .sort_values ("distance_miles" )
242- self .logger .info (
243- f"Multiple records found for origin { h3_origin } . Sorted by 'distance_miles'. "
244- f"Using nearest."
245- )
246-
247- df_features .at [idx , distance_field ] = od_result .iloc [0 ][
248- "distance_miles"
249- ]
250-
251- if time_field and "time" in od_result .columns :
252- df_features .at [idx , time_field ] = od_result .iloc [0 ]["time" ]
253-
254- except Exception as e :
255- self .logger .warning (
256- f"Failed to get OD distance for origin { h3_origin } : { e } "
257- )
303+ update_fields .append (time_field )
304+
305+ # Use UpdateCursor to iterate through each feature and update OD results
306+ with arcpy .da .UpdateCursor (input_features , update_fields ) as cursor :
307+ for row in cursor :
308+
309+ # Get geometry and compute H3 index for the origin feature
310+ geom = row [0 ]
311+ h3_origin = h3_arcpy .get_h3_index_for_esri_geometry (geom , resolution )
312+
313+ try :
314+ # Filter OD matrix for matching origin
315+ od_result = od_df [od_df ["origin_id" ] == h3_origin ]
316+
317+ # If a destination is specified, further filter by destination
318+ if h3_destination :
319+ od_result = od_result [
320+ od_result ["destination_id" ] == h3_destination
321+ ]
322+
323+ # If a matching OD record is found, update the feature
324+ if not od_result .empty :
325+
326+ # If multiple records, sort by time (if present) or by distance
327+ if len (od_result ) > 1 :
328+ if "time" in od_result .columns :
329+ od_result = od_result .sort_values ("time" )
330+ self .logger .debug (
331+ f"Multiple records found for origin { h3_origin } . Sorted by 'time'. Using nearest."
332+ )
333+ else :
334+ od_result = od_result .sort_values ("distance_miles" )
335+ self .logger .debug (
336+ f"Multiple records found for origin { h3_origin } . Sorted by 'distance_miles'. "
337+ f"Using nearest."
338+ )
339+
340+ # Update the distance field
341+ row [update_fields .index (distance_field )] = od_result .iloc [0 ][
342+ "distance_miles"
343+ ]
344+
345+ # Update the time field if enabled and present
346+ if time_field and "time" in od_result .columns :
347+ row [update_fields .index (time_field )] = od_result .iloc [0 ][
348+ "time"
349+ ]
350+
351+ # Commit the update to the feature
352+ cursor .updateRow (row )
353+
354+ except Exception as e :
355+ self .logger .warning (
356+ f"Failed to get OD distance for origin { h3_origin } : { e } "
357+ )
258358
259- self .logger .info (
260- f"Distance and time fields added to input features (not saved to disk by this tool)"
261- )
359+ self .logger .info (f"Distance and time fields updated in input features." )
262360
263361 return
264362
@@ -267,7 +365,7 @@ class GetH3Indices:
267365 def __init__ (self ):
268366 self .label = "Get H3 Indices"
269367 self .description = "Get H3 indices for an area of interest polygon feature class, with options for selection method and centroid output."
270- self .category = "Analysis "
368+ self .category = "Utilities "
271369 logger_name = f"h3_od.Toolbox.{ self .__class__ .__name__ } "
272370 self .logger = get_logger (logger_name , level = "INFO" , add_arcpy_handler = True )
273371
0 commit comments