Skip to content

Commit 536d8ef

Browse files
committed
refactor: replace DataFrame iteration with UpdateCursor in AddDestinationDistance
- Replace arcpy.da.FeatureClassToNumPyArray + pandas DataFrame loop with arcpy.da.UpdateCursor to iterate and update features in-place - Remove unused pandas import - Add CreateOriginH3FeatureClass utility tool to generate origin H3 polygon feature class directly from an OD matrix parquet dataset - Register CreateOriginH3FeatureClass in Toolbox tools list - Move GetH3Indices category from Analysis to Utilities - Add inline comments and whitespace to UpdateCursor logic for clarity
1 parent f795c53 commit 536d8ef

File tree

1 file changed

+154
-56
lines changed

1 file changed

+154
-56
lines changed

arcgis/h3-origin-destination-matrix.pyt

Lines changed: 154 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import os
1111

1212
# Third-party package imports
1313
import arcpy
14-
import pandas as pd
1514
import pyarrow.parquet as pq
1615
import pyarrow.dataset as ds
1716

@@ -41,7 +40,101 @@ class Toolbox:
4140
def __init__(self):
4241
self.label = "H3 Origin Destination Matrix"
4342
self.alias = "h3_od_matrix"
44-
self.tools = [AddDestinationDistance, GetH3Indices]
43+
self.tools = [AddDestinationDistance, GetH3Indices, CreateOriginH3FeatureClass]
44+
45+
46+
class CreateOriginH3FeatureClass:
47+
def __init__(self):
48+
self.label = "Create Origin H3 Feature Class"
49+
self.description = "Create a feature class of origin H3 indices from an OD matrix parquet dataset."
50+
self.category = "Utilities"
51+
logger_name = f"h3_od.Toolbox.{self.__class__.__name__}"
52+
self.logger = get_logger(logger_name, level="INFO", add_arcpy_handler=True)
53+
54+
def getParameterInfo(self):
55+
od_matrix = arcpy.Parameter(
56+
displayName="H3 OD Matrix Parquet Dataset",
57+
name="od_matrix",
58+
datatype="DEFolder",
59+
parameterType="Required",
60+
direction="Input",
61+
)
62+
output_fc = arcpy.Parameter(
63+
displayName="Output Feature Class (Polygons)",
64+
name="output_fc",
65+
datatype="DEFeatureClass",
66+
parameterType="Required",
67+
direction="Output",
68+
)
69+
return [od_matrix, output_fc]
70+
71+
def execute(self, parameters, messages):
72+
od_matrix_folder = parameters[0].valueAsText
73+
output_fc = parameters[1].valueAsText
74+
75+
# Only validate for .part files in the directory
76+
if not os.path.isdir(od_matrix_folder):
77+
self.logger.error("OD matrix must be a directory containing .part files.")
78+
return
79+
80+
self.logger.info(f"Reading OD matrix from: {od_matrix_folder}")
81+
82+
part_files = [
83+
f
84+
for f in os.listdir(od_matrix_folder)
85+
if f.lower().endswith(".part")
86+
and os.path.isfile(os.path.join(od_matrix_folder, f))
87+
]
88+
89+
if not part_files:
90+
self.logger.error(
91+
"OD matrix directory must contain at least one .part file."
92+
)
93+
return
94+
95+
schema_path = os.path.join(od_matrix_folder, part_files[0])
96+
97+
# Efficiently read only the origin_id column
98+
od_dataset = ds.dataset(schema_path, format="parquet")
99+
origin_id_col = od_dataset.to_table(columns=["origin_id"]).column("origin_id")
100+
unique_origin_ids = origin_id_col.unique().to_pylist()
101+
102+
if not unique_origin_ids:
103+
self.logger.error("No origin H3 indices found in OD matrix.")
104+
return
105+
106+
self.logger.info(f"Found {len(unique_origin_ids)} unique origin H3 indices.")
107+
108+
# Get resolution from first origin_id
109+
resolution = h3_arcpy.get_h3_resolution(unique_origin_ids[0])
110+
self.logger.info(f"Detected H3 resolution: {resolution}")
111+
112+
# Create output feature class (always WGS84)
113+
sr = arcpy.SpatialReference(4326)
114+
arcpy.management.CreateFeatureclass(
115+
out_path=os.path.dirname(output_fc),
116+
out_name=os.path.basename(output_fc),
117+
geometry_type="POLYGON",
118+
spatial_reference=sr,
119+
)
120+
arcpy.management.AddField(output_fc, "h3_index", "TEXT")
121+
122+
# Insert each geometry on the fly
123+
with arcpy.da.InsertCursor(output_fc, ["SHAPE@", "h3_index"]) as cursor:
124+
125+
for h3_index in unique_origin_ids:
126+
127+
try:
128+
poly = h3_arcpy.get_arcpy_polygon_for_h3_index(h3_index)
129+
cursor.insertRow([poly, h3_index])
130+
131+
except Exception as e:
132+
self.logger.warning(
133+
f"Failed to create/insert polygon for H3 index {h3_index}: {e}"
134+
)
135+
136+
self.logger.info(f"Created origin H3 polygons feature class: {output_fc}")
137+
return
45138

46139

47140
class AddDestinationDistance:
@@ -204,61 +297,66 @@ class AddDestinationDistance:
204297
resolution = h3_arcpy.get_h3_resolution(first_origin)
205298
self.logger.info(f"Detected H3 resolution from OD matrix: {resolution}")
206299

207-
# Read input features to DataFrame
208-
arr = arcpy.da.FeatureClassToNumPyArray(
209-
input_features, ["SHAPE@"], skip_nulls=True
210-
)
211-
212-
df_features = pd.DataFrame(arr)
213-
df_features[distance_field] = None
214-
300+
# Prepare the list of fields to update in the cursor
301+
update_fields = ["SHAPE@", distance_field]
215302
if time_field:
216-
df_features[time_field] = None
217-
218-
# For each feature, get H3 index and lookup OD matrix
219-
for idx, row in df_features.iterrows():
220-
221-
geom = row["SHAPE@"]
222-
h3_origin = h3_arcpy.get_h3_index_for_esri_geometry(geom, resolution)
223-
224-
try:
225-
od_result = od_df[od_df["origin_id"] == h3_origin]
226-
227-
if h3_destination:
228-
od_result = od_result[od_result["destination_id"] == h3_destination]
229-
if not od_result.empty:
230-
231-
# If multiple records, sort by time if present, else by distance
232-
if len(od_result) > 1:
233-
234-
if "time" in od_result.columns:
235-
od_result = od_result.sort_values("time")
236-
self.logger.info(
237-
f"Multiple records found for origin {h3_origin}. Sorted by 'time'. Using nearest."
238-
)
239-
240-
else:
241-
od_result = od_result.sort_values("distance_miles")
242-
self.logger.info(
243-
f"Multiple records found for origin {h3_origin}. Sorted by 'distance_miles'. "
244-
f"Using nearest."
245-
)
246-
247-
df_features.at[idx, distance_field] = od_result.iloc[0][
248-
"distance_miles"
249-
]
250-
251-
if time_field and "time" in od_result.columns:
252-
df_features.at[idx, time_field] = od_result.iloc[0]["time"]
253-
254-
except Exception as e:
255-
self.logger.warning(
256-
f"Failed to get OD distance for origin {h3_origin}: {e}"
257-
)
303+
update_fields.append(time_field)
304+
305+
# Use UpdateCursor to iterate through each feature and update OD results
306+
with arcpy.da.UpdateCursor(input_features, update_fields) as cursor:
307+
for row in cursor:
308+
309+
# Get geometry and compute H3 index for the origin feature
310+
geom = row[0]
311+
h3_origin = h3_arcpy.get_h3_index_for_esri_geometry(geom, resolution)
312+
313+
try:
314+
# Filter OD matrix for matching origin
315+
od_result = od_df[od_df["origin_id"] == h3_origin]
316+
317+
# If a destination is specified, further filter by destination
318+
if h3_destination:
319+
od_result = od_result[
320+
od_result["destination_id"] == h3_destination
321+
]
322+
323+
# If a matching OD record is found, update the feature
324+
if not od_result.empty:
325+
326+
# If multiple records, sort by time (if present) or by distance
327+
if len(od_result) > 1:
328+
if "time" in od_result.columns:
329+
od_result = od_result.sort_values("time")
330+
self.logger.debug(
331+
f"Multiple records found for origin {h3_origin}. Sorted by 'time'. Using nearest."
332+
)
333+
else:
334+
od_result = od_result.sort_values("distance_miles")
335+
self.logger.debug(
336+
f"Multiple records found for origin {h3_origin}. Sorted by 'distance_miles'. "
337+
f"Using nearest."
338+
)
339+
340+
# Update the distance field
341+
row[update_fields.index(distance_field)] = od_result.iloc[0][
342+
"distance_miles"
343+
]
344+
345+
# Update the time field if enabled and present
346+
if time_field and "time" in od_result.columns:
347+
row[update_fields.index(time_field)] = od_result.iloc[0][
348+
"time"
349+
]
350+
351+
# Commit the update to the feature
352+
cursor.updateRow(row)
353+
354+
except Exception as e:
355+
self.logger.warning(
356+
f"Failed to get OD distance for origin {h3_origin}: {e}"
357+
)
258358

259-
self.logger.info(
260-
f"Distance and time fields added to input features (not saved to disk by this tool)"
261-
)
359+
self.logger.info(f"Distance and time fields updated in input features.")
262360

263361
return
264362

@@ -267,7 +365,7 @@ class GetH3Indices:
267365
def __init__(self):
268366
self.label = "Get H3 Indices"
269367
self.description = "Get H3 indices for an area of interest polygon feature class, with options for selection method and centroid output."
270-
self.category = "Analysis"
368+
self.category = "Utilities"
271369
logger_name = f"h3_od.Toolbox.{self.__class__.__name__}"
272370
self.logger = get_logger(logger_name, level="INFO", add_arcpy_handler=True)
273371

0 commit comments

Comments
 (0)