|
| 1 | +from typing import Any |
| 2 | + |
| 3 | +import geopandas as gpd |
| 4 | +import numpy as np |
| 5 | +import pandas as pd |
| 6 | +from geopy.distance import geodesic |
| 7 | + |
| 8 | + |
| 9 | +def create_betweenTrip_deadhead_trips( |
| 10 | + trips_df: pd.DataFrame, stop_times_df: pd.DataFrame |
| 11 | +) -> pd.DataFrame: |
| 12 | + """Create deadhead trips between consecutive trips for each block. |
| 13 | + Parameters |
| 14 | + ---------- |
| 15 | + trips_df : pd.DataFrame |
| 16 | + GTFS trips_df (e.g. result from read_in_gtfs). |
| 17 | +
|
| 18 | + stop_times_df: pd.DataFrame |
| 19 | + stop_times df in feed resulted from read_in_gtfs. |
| 20 | +
|
| 21 | + Returns |
| 22 | + ------- |
| 23 | + pd.DataFrame: DataFrame with created deadhead trips. |
| 24 | + """ |
| 25 | + |
| 26 | + # For each block id, create one deadhead trip between consecutive trips. |
| 27 | + deadhead_trips = pd.DataFrame( |
| 28 | + { |
| 29 | + "trip_id": [], |
| 30 | + "route_id": [], |
| 31 | + "service_id": [], |
| 32 | + "block_id": [], |
| 33 | + "shape_id": [], |
| 34 | + "route_short_name": [], |
| 35 | + "route_type": [], |
| 36 | + "route_desc": [], |
| 37 | + "agency_id": [], |
| 38 | + } |
| 39 | + ) |
| 40 | + trip_start = ( |
| 41 | + stop_times_df.groupby("trip_id")["arrival_time"].min().reset_index() |
| 42 | + ) # trip start time of each trip |
| 43 | + trips_df = trips_df.merge( |
| 44 | + trip_start, on="trip_id", how="left" |
| 45 | + ) # only look at trips on selected date and route |
| 46 | + trips_df = trips_df.sort_values(by=["block_id", "arrival_time"]) |
| 47 | + block_gb = trips_df.groupby("block_id") |
| 48 | + dh_dfs = list() |
| 49 | + for _, block_df in block_gb: |
| 50 | + block_df["to_trip"] = block_df["trip_id"].shift(-1) |
| 51 | + block_df["deadhead_trip"] = block_df["trip_id"] + "_to_" + block_df["to_trip"] |
| 52 | + block_df = block_df.dropna(subset=["to_trip"]) |
| 53 | + block_df = block_df[ |
| 54 | + ["deadhead_trip", "route_id", "service_id", "block_id", "shape_id"] |
| 55 | + ] |
| 56 | + block_df = block_df.rename(columns=({"deadhead_trip": "trip_id"})) |
| 57 | + dh_dfs.append(block_df) |
| 58 | + deadhead_trips = pd.concat(dh_dfs).reset_index(drop=True) |
| 59 | + |
| 60 | + deadhead_trips["route_short_name"] = None |
| 61 | + deadhead_trips["route_type"] = 3 |
| 62 | + deadhead_trips["route_desc"] = "Deadhead_from_" + deadhead_trips["trip_id"] |
| 63 | + deadhead_trips["agency_id"] = None |
| 64 | + deadhead_trips["shape_id"] = deadhead_trips["trip_id"] |
| 65 | + |
| 66 | + return deadhead_trips |
| 67 | + |
| 68 | + |
| 69 | +def create_betweenTrip_deadhead_stops( |
| 70 | + feed: Any, deadhead_trips: pd.DataFrame |
| 71 | +) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: |
| 72 | + """Create stop_times and stops for deadhead trips between consecutive trips to generate the feed object for between trip deadhead trips. |
| 73 | + Parameters |
| 74 | + ---------- |
| 75 | + feed: Any |
| 76 | + GTFS feed object (e.g. result from read_in_gtfs). |
| 77 | + deadhead_trips: pd.DataFrame |
| 78 | + deadhead trip results from create_betweenTrip_deadhead_trips.py. |
| 79 | + Returns |
| 80 | + ------- |
| 81 | + pd.DataFrame |
| 82 | + DataFrame of stop_times and stops for the deadhead trips. |
| 83 | + """ |
| 84 | + # Calculate distance from end stop of first trip to start stop of second trip |
| 85 | + deadhead_trips["from_trip"] = deadhead_trips["trip_id"].apply( |
| 86 | + lambda x: x.split("_to_")[0] |
| 87 | + ) |
| 88 | + deadhead_trips["to_trip"] = deadhead_trips["trip_id"].apply( |
| 89 | + lambda x: x.split("_to_")[1] |
| 90 | + ) |
| 91 | + |
| 92 | + # First stops of deadhead trips |
| 93 | + first_stops = feed.stop_times[ |
| 94 | + feed.stop_times["trip_id"].isin(deadhead_trips["from_trip"]) |
| 95 | + ].copy() |
| 96 | + first_stops = first_stops.sort_values(by=["trip_id", "stop_sequence"]) |
| 97 | + first_stops = first_stops.groupby("trip_id").last().reset_index() |
| 98 | + first_stops = first_stops.rename( |
| 99 | + columns={"stop_id": "from_stop_id", "trip_id": "from_trip"} |
| 100 | + ) |
| 101 | + |
| 102 | + # Last stops of deadhead trips |
| 103 | + last_stops = feed.stop_times[ |
| 104 | + feed.stop_times["trip_id"].isin(deadhead_trips["to_trip"]) |
| 105 | + ].copy() |
| 106 | + last_stops = last_stops.sort_values(by=["trip_id", "stop_sequence"]) |
| 107 | + last_stops = last_stops.groupby("trip_id").first().reset_index() |
| 108 | + last_stops = last_stops.rename( |
| 109 | + columns={"stop_id": "to_stop_id", "trip_id": "to_trip"} |
| 110 | + ) |
| 111 | + # Merge to get stop ids and stop lat/lon |
| 112 | + deadhead_trips = deadhead_trips.merge( |
| 113 | + first_stops[["from_trip", "from_stop_id", "departure_time"]], |
| 114 | + on="from_trip", |
| 115 | + how="left", |
| 116 | + ) |
| 117 | + deadhead_trips = deadhead_trips.merge( |
| 118 | + last_stops[["to_trip", "to_stop_id", "arrival_time"]], on="to_trip", how="left" |
| 119 | + ) |
| 120 | + deadhead_trips = deadhead_trips.merge( |
| 121 | + feed.stops[["stop_id", "stop_lat", "stop_lon"]], |
| 122 | + left_on="from_stop_id", |
| 123 | + right_on="stop_id", |
| 124 | + how="left", |
| 125 | + ) |
| 126 | + deadhead_trips = deadhead_trips.rename( |
| 127 | + columns={"stop_lat": "from_stop_lat", "stop_lon": "from_stop_lon"} |
| 128 | + ) |
| 129 | + deadhead_trips = deadhead_trips.merge( |
| 130 | + feed.stops[["stop_id", "stop_lat", "stop_lon"]], |
| 131 | + left_on="to_stop_id", |
| 132 | + right_on="stop_id", |
| 133 | + how="left", |
| 134 | + ) |
| 135 | + deadhead_trips = deadhead_trips.rename( |
| 136 | + columns={"stop_lat": "to_stop_lat", "stop_lon": "to_stop_lon"} |
| 137 | + ) |
| 138 | + deadhead_trips = deadhead_trips.drop(columns=["stop_id_x", "stop_id_y"]) |
| 139 | + # Create geometry columns for geospatial calculations |
| 140 | + deadhead_trips["geometry_origin"] = gpd.points_from_xy( |
| 141 | + deadhead_trips["from_stop_lon"], deadhead_trips["from_stop_lat"] |
| 142 | + ) |
| 143 | + deadhead_trips["geometry_destination"] = gpd.points_from_xy( |
| 144 | + deadhead_trips["to_stop_lon"], deadhead_trips["to_stop_lat"] |
| 145 | + ) |
| 146 | + # Calculate distance from origin to destination for deadhead trips |
| 147 | + deadhead_trips["distance_m"] = deadhead_trips.apply( |
| 148 | + lambda row: geodesic( |
| 149 | + (row.geometry_origin.y, row.geometry_origin.x), |
| 150 | + (row.geometry_destination.y, row.geometry_destination.x), |
| 151 | + ).meters, |
| 152 | + axis=1, |
| 153 | + ) |
| 154 | + # Assume average speed of 30 km/h (to be consistant with the number adopted in gtfs_feature_processing.py) |
| 155 | + # to estimate travel time |
| 156 | + deadhead_trips["travel_time_sec"] = (deadhead_trips["distance_m"] / 30000) * 3600 |
| 157 | + # Calculate arrival time at to_stop for deadhead trip |
| 158 | + deadhead_trips["arrival_time_cal"] = deadhead_trips[ |
| 159 | + "departure_time" |
| 160 | + ] + pd.to_timedelta(deadhead_trips["travel_time_sec"], unit="s") |
| 161 | + # Use the minimum of scheduled arrival time and calculated arrival time |
| 162 | + deadhead_trips["arrival_time"] = deadhead_trips[ |
| 163 | + ["arrival_time", "arrival_time_cal"] |
| 164 | + ].min(axis=1) |
| 165 | + |
| 166 | + # Create stop_times df for deadhead trips |
| 167 | + stop_times_df = pd.DataFrame( |
| 168 | + columns=[ |
| 169 | + "trip_id", |
| 170 | + "stop_sequence", |
| 171 | + "arrival_time", |
| 172 | + "stop_id", |
| 173 | + "departure_time", |
| 174 | + "shape_dist_traveled", |
| 175 | + ] |
| 176 | + ) |
| 177 | + stop_times_df["trip_id"] = deadhead_trips["trip_id"].repeat(2).values |
| 178 | + stop_times_df["stop_sequence"] = [1, 2] * len(deadhead_trips) |
| 179 | + stop_times_df["arrival_time"] = [ |
| 180 | + x |
| 181 | + for pair in zip( |
| 182 | + deadhead_trips["departure_time"].to_list(), |
| 183 | + deadhead_trips["arrival_time"].to_list(), |
| 184 | + ) |
| 185 | + for x in pair |
| 186 | + ] |
| 187 | + stop_times_df["stop_id"] = range(1, len(stop_times_df) + 1) |
| 188 | + stop_times_df["stop_id"] = stop_times_df["stop_id"].apply( |
| 189 | + lambda x: f"betweenTrip_deadhead_{x}" |
| 190 | + ) |
| 191 | + stop_times_df["departure_time"] = stop_times_df["arrival_time"] |
| 192 | + stop_times_df["shape_dist_traveled"] = 0.0 |
| 193 | + |
| 194 | + # Create stops df for deadhead trips |
| 195 | + stops_df = pd.DataFrame(columns=["stop_id", "stop_lat", "stop_lon"]) |
| 196 | + stops_df["stop_id"] = stop_times_df["stop_id"] |
| 197 | + x_start = deadhead_trips.geometry_origin.apply(lambda p: p.x).to_numpy() |
| 198 | + x_end = deadhead_trips.geometry_destination.apply(lambda p: p.x).to_numpy() |
| 199 | + stop_lon = np.ravel(np.column_stack((x_start, x_end))) |
| 200 | + y_start = deadhead_trips.geometry_origin.apply(lambda p: p.y).to_numpy() |
| 201 | + y_end = deadhead_trips.geometry_destination.apply(lambda p: p.y).to_numpy() |
| 202 | + stop_lat = np.ravel(np.column_stack((y_start, y_end))) |
| 203 | + stops_df["stop_lat"] = stop_lat |
| 204 | + stops_df["stop_lon"] = stop_lon |
| 205 | + |
| 206 | + deadhead_trips["block_id"] = deadhead_trips[ |
| 207 | + "trip_id" |
| 208 | + ] # Use trip_id as block_id for deadhead trips for trace generation purpose in generate_deadhead_traces.py |
| 209 | + return ( |
| 210 | + stop_times_df, |
| 211 | + stops_df, |
| 212 | + deadhead_trips[["geometry_origin", "geometry_destination", "block_id"]], |
| 213 | + ) |
0 commit comments