@@ -248,61 +248,73 @@ print(f"Total Stations Loaded (Region): {len(gdf_ccs_locations)}")
248248match_results = []
249249used_segids = set()
250250
251- print("Starting Matching Logic ...")
251+ print("Starting Route-Constrained Spatial Matching ...")
252252
253253for idx, ccs_row in gdf_ccs_locations.iterrows():
254254 station_id = ccs_row["STATION"]
255255 target_route = ccs_row["ROUTE"]
256- target_mp = ccs_row["MP"]
257256
258- # Filter Network to Route
257+ # --- PASS 1: Route-Constrained Spatial Match ---
258+ # Filter Network to the specific Route first.
259+ # This solves the specific MP issue you saw (e.g., Station -816) by picking
260+ # the closest segment on Route 15, ignoring slight MP drifts.
259261 route_segments = gdf_network_clean[gdf_network_clean["MODEL_ROUTE"] == target_route]
260262
261263 matched_segid = None
262264 match_type = "No Match"
263265
264266 if not route_segments.empty:
265- # Pass 1A: Predecessor
266- predecessors = route_segments[route_segments["MODEL_MP"] <= target_mp]
267- if not predecessors.empty:
268- best_match = predecessors.loc[predecessors["MODEL_MP"].idxmax()]
269- matched_segid = best_match["SEGID"]
270- match_type = "Predecessor (<=)"
271- else:
272- # Pass 1B: Successor
273- successors = route_segments[route_segments["MODEL_MP"] > target_mp]
274- if not successors.empty:
275- best_match = successors.loc[successors["MODEL_MP"].idxmin()]
276- matched_segid = best_match["SEGID"]
277- match_type = "Successor (>)"
267+ # Calculate distance to all segments on this route
268+ distances = route_segments.distance(ccs_row["geometry"])
278269
270+ # Pick the absolute closest segment on this route
271+ best_idx = distances.idxmin()
272+ best_match = route_segments.loc[best_idx]
273+
274+ matched_segid = best_match["SEGID"]
275+ match_type = "Route Match (Spatial)"
276+
277+ # Add valid match to used set
279278 if matched_segid:
280279 used_segids.add(matched_segid)
281280
282- match_results.append({
283- "STATION": station_id,
284- "MATCHED_SEGID": matched_segid,
285- "MATCH_TYPE": match_type,
286- "geometry": ccs_row["geometry"]
287- })
281+ match_results.append(
282+ {
283+ "STATION": station_id,
284+ "MATCHED_SEGID": matched_segid,
285+ "MATCH_TYPE": match_type,
286+ "geometry": ccs_row["geometry"],
287+ }
288+ )
288289
289- # Pass 2: Spatial Fallback
290+ # --- PASS 2: Global Spatial Fallback (Unrestricted) ---
291+ # If Pass 1 failed (e.g., Route ID didn't match), snap to the nearest available segment anywhere.
290292for i, result in enumerate(match_results):
291293 if result["MATCHED_SEGID"] is None:
292294 station_geom = result["geometry"]
293- # Filter available segments (Not already used)
294- available_segments = gdf_network_clean[~gdf_network_clean["SEGID"].isin(used_segids)]
295+
296+ # Filter available segments (Exclude those already snapped in Pass 1)
297+ available_segments = gdf_network_clean[
298+ ~gdf_network_clean["SEGID"].isin(used_segids)
299+ ]
295300
296301 if not available_segments.empty:
302+ # Calculate distance to ALL remaining segments
297303 distances = available_segments.distance(station_geom)
298- best_match = available_segments.loc[distances.idxmin()]
304+
305+ # Snap to the nearest one, regardless of distance
306+ min_dist_idx = distances.idxmin()
307+ best_match = available_segments.loc[min_dist_idx]
299308
300309 match_results[i]["MATCHED_SEGID"] = best_match["SEGID"]
301- match_results[i]["MATCH_TYPE"] = "Spatial Fallback"
310+ match_results[i]["MATCH_TYPE"] = "Global Fallback (Nearest)"
311+
312+ # Mark as used so future fallbacks don't grab it
302313 used_segids.add(best_match["SEGID"])
303314
304- # Create DataFrame and DROP Geometry
315+ # Create DataFrame
305316df_matches = pd.DataFrame(match_results).drop(columns=["geometry"])
317+ print("Matching Complete. Summary:")
306318print(df_matches["MATCH_TYPE"].value_counts())
307319```
308320
0 commit comments