Skip to content

Commit 29760a3

Browse files
committed
refine joining strategy
1 parent 01796f4 commit 29760a3

File tree

10 files changed

+1658
-1623
lines changed

10 files changed

+1658
-1623
lines changed

_freeze/index/execute-results/html.json

Lines changed: 3 additions & 3 deletions
Large diffs are not rendered by default.

_intermediate/station_segid_bridge.csv

Lines changed: 126 additions & 126 deletions
Large diffs are not rendered by default.

_output/dashboard_data.csv

Lines changed: 60 additions & 60 deletions
Large diffs are not rendered by default.

_output/segments.geojson

Lines changed: 4 additions & 5 deletions
Large diffs are not rendered by default.

docs/_output/dashboard_data.csv

Lines changed: 60 additions & 60 deletions
Large diffs are not rendered by default.

docs/_output/segments.geojson

Lines changed: 4 additions & 5 deletions
Large diffs are not rendered by default.

docs/index.html

Lines changed: 1358 additions & 1333 deletions
Large diffs are not rendered by default.

docs/search.json

Lines changed: 2 additions & 2 deletions
Large diffs are not rendered by default.

docs/sitemap.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
33
<url>
44
<loc>https://wfrcanalytics.github.io/index.html</loc>
5-
<lastmod>2026-01-16T22:12:57.932Z</lastmod>
5+
<lastmod>2026-01-16T22:48:49.470Z</lastmod>
66
</url>
77
</urlset>

index.qmd

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -248,61 +248,73 @@ print(f"Total Stations Loaded (Region): {len(gdf_ccs_locations)}")
248248
match_results = []
249249
used_segids = set()
250250
251-
print("Starting Matching Logic...")
251+
print("Starting Route-Constrained Spatial Matching...")
252252
253253
for idx, ccs_row in gdf_ccs_locations.iterrows():
254254
station_id = ccs_row["STATION"]
255255
target_route = ccs_row["ROUTE"]
256-
target_mp = ccs_row["MP"]
257256
258-
# Filter Network to Route
257+
# --- PASS 1: Route-Constrained Spatial Match ---
258+
# Filter Network to the specific Route first.
259+
# This solves the specific MP issue you saw (e.g., Station -816) by picking
260+
# the closest segment on Route 15, ignoring slight MP drifts.
259261
route_segments = gdf_network_clean[gdf_network_clean["MODEL_ROUTE"] == target_route]
260262
261263
matched_segid = None
262264
match_type = "No Match"
263265
264266
if not route_segments.empty:
265-
# Pass 1A: Predecessor
266-
predecessors = route_segments[route_segments["MODEL_MP"] <= target_mp]
267-
if not predecessors.empty:
268-
best_match = predecessors.loc[predecessors["MODEL_MP"].idxmax()]
269-
matched_segid = best_match["SEGID"]
270-
match_type = "Predecessor (<=)"
271-
else:
272-
# Pass 1B: Successor
273-
successors = route_segments[route_segments["MODEL_MP"] > target_mp]
274-
if not successors.empty:
275-
best_match = successors.loc[successors["MODEL_MP"].idxmin()]
276-
matched_segid = best_match["SEGID"]
277-
match_type = "Successor (>)"
267+
# Calculate distance to all segments on this route
268+
distances = route_segments.distance(ccs_row["geometry"])
278269
270+
# Pick the absolute closest segment on this route
271+
best_idx = distances.idxmin()
272+
best_match = route_segments.loc[best_idx]
273+
274+
matched_segid = best_match["SEGID"]
275+
match_type = "Route Match (Spatial)"
276+
277+
# Add valid match to used set
279278
if matched_segid:
280279
used_segids.add(matched_segid)
281280
282-
match_results.append({
283-
"STATION": station_id,
284-
"MATCHED_SEGID": matched_segid,
285-
"MATCH_TYPE": match_type,
286-
"geometry": ccs_row["geometry"]
287-
})
281+
match_results.append(
282+
{
283+
"STATION": station_id,
284+
"MATCHED_SEGID": matched_segid,
285+
"MATCH_TYPE": match_type,
286+
"geometry": ccs_row["geometry"],
287+
}
288+
)
288289
289-
# Pass 2: Spatial Fallback
290+
# --- PASS 2: Global Spatial Fallback (Unrestricted) ---
291+
# If Pass 1 failed (e.g., Route ID didn't match), snap to the nearest available segment anywhere.
290292
for i, result in enumerate(match_results):
291293
if result["MATCHED_SEGID"] is None:
292294
station_geom = result["geometry"]
293-
# Filter available segments (Not already used)
294-
available_segments = gdf_network_clean[~gdf_network_clean["SEGID"].isin(used_segids)]
295+
296+
# Filter available segments (Exclude those already snapped in Pass 1)
297+
available_segments = gdf_network_clean[
298+
~gdf_network_clean["SEGID"].isin(used_segids)
299+
]
295300
296301
if not available_segments.empty:
302+
# Calculate distance to ALL remaining segments
297303
distances = available_segments.distance(station_geom)
298-
best_match = available_segments.loc[distances.idxmin()]
304+
305+
# Snap to the nearest one, regardless of distance
306+
min_dist_idx = distances.idxmin()
307+
best_match = available_segments.loc[min_dist_idx]
299308
300309
match_results[i]["MATCHED_SEGID"] = best_match["SEGID"]
301-
match_results[i]["MATCH_TYPE"] = "Spatial Fallback"
310+
match_results[i]["MATCH_TYPE"] = "Global Fallback (Nearest)"
311+
312+
# Mark as used so future fallbacks don't grab it
302313
used_segids.add(best_match["SEGID"])
303314
304-
# Create DataFrame and DROP Geometry
315+
# Create DataFrame
305316
df_matches = pd.DataFrame(match_results).drop(columns=["geometry"])
317+
print("Matching Complete. Summary:")
306318
print(df_matches["MATCH_TYPE"].value_counts())
307319
```
308320

0 commit comments

Comments
 (0)