Skip to content

Commit 64bfe33

Browse files
committed
start abstracting border spatial functions to accept non-tract geoms
1 parent 2518a88 commit 64bfe33

File tree

1 file changed

+46
-49
lines changed

1 file changed

+46
-49
lines changed
Lines changed: 46 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,76 @@
11
# import intake
2-
import pandas as pd
3-
import geopandas as gpd
4-
from calitp_data_analysis import geography_utils
5-
2+
import uuid
63

7-
from update_vars import ANALYSIS_DATE, BORDER_BUFFER_METERS
4+
import geopandas as gpd
5+
import pandas as pd
86
from segment_speed_utils import helpers
7+
from update_vars import ANALYSIS_DATE, BORDER_BUFFER_METERS
98
from utils import read_census_tracts
10-
import uuid
119

12-
def intersection_hash(row):
13-
'''
10+
11+
def intersection_hash(row, id_col="tract"):
12+
"""
1413
Get unique hash of intersection zones.
1514
No need to keep both t1 x t2 and t2 x t1
16-
'''
17-
t1 = int(row.tract_1[2:]) # drop state code
18-
t2 = int(row.tract_2[2:])
15+
"""
16+
t1 = int(row[f"{id_col}_1"][2:]) # drop state code
17+
t2 = int(row[f"{id_col}_2"][2:])
1918
row_tracts = [t1, t2]
20-
row_tracts.sort() # modifies inplace
19+
row_tracts.sort() # modifies inplace
2120
return hash(tuple(row_tracts))
2221

23-
def find_borders(tracts_gdf: gpd.GeoDataFrame,
24-
border_buffer: int = BORDER_BUFFER_METERS
22+
23+
def find_borders(
24+
tracts_gdf: gpd.GeoDataFrame, border_buffer: int = BORDER_BUFFER_METERS, id_col: str = "tract"
2525
) -> gpd.GeoDataFrame:
26-
'''
27-
'''
26+
""" """
2827
tracts_gdf = tracts_gdf.copy()
2928
tracts_gdf.geometry = tracts_gdf.buffer(border_buffer)
3029
borders = gpd.overlay(tracts_gdf, tracts_gdf)
31-
borders = borders[borders['tract_1'] != borders['tract_2']]
30+
borders = borders[borders[f"{id_col}_1"] != borders[f"{id_col}_2"]]
3231
# for dropping mirrored borders
33-
borders['intersection_hash'] = borders.apply(intersection_hash, axis=1)
34-
borders = borders.drop_duplicates(subset=['intersection_hash'])
32+
borders["intersection_hash"] = borders.apply(intersection_hash, axis=1, id_col=id_col)
33+
borders = borders.drop_duplicates(subset=["intersection_hash"])
3534
# for more elegant tracking
36-
borders['intersection_id'] = [str(uuid.uuid4()) for _ in range(borders.shape[0])]
35+
borders["intersection_id"] = [str(uuid.uuid4()) for _ in range(borders.shape[0])]
3736
return borders
3837

39-
def find_shapes_in_tracts_borders(shape_stops, tracts, borders):
40-
41-
'''
38+
39+
def find_shapes_in_areas_borders(shape_stops, areas, borders, id_col="tract"):
40+
"""
4241
sjoin stops to tracts and border zones by GTFS shape.
4342
create tsi_segment_id equal to tract if a single tract
4443
or intersection_id if a border zone
45-
'''
46-
shape_stops_tracts_borders = (pd.concat([tracts, borders])
47-
.sjoin(shape_stops)
48-
.drop(columns='index_right')
49-
)
50-
51-
shape_stops_tracts_borders = shape_stops_tracts_borders.assign(
52-
tsi_segment_id = shape_stops_tracts_borders.tract.combine_first(
53-
shape_stops_tracts_borders.intersection_id)
54-
.astype(str)
44+
"""
45+
shape_stops_areas_borders = pd.concat([areas, borders]).sjoin(shape_stops).drop(columns="index_right")
46+
47+
shape_stops_areas_borders = shape_stops_areas_borders.assign(
48+
tsi_segment_id=shape_stops_areas_borders[f"{id_col}"]
49+
.combine_first(shape_stops_areas_borders.intersection_id)
50+
.astype(str)
5551
)
56-
return shape_stops_tracts_borders
52+
return shape_stops_areas_borders
5753

5854

5955
if __name__ == "__main__":
60-
61-
print(f'prepare_tracts_borders {ANALYSIS_DATE}')
56+
57+
print(f"prepare_tracts_borders {ANALYSIS_DATE}")
6258
tracts = read_census_tracts(ANALYSIS_DATE)
6359
shapes = helpers.import_scheduled_shapes(ANALYSIS_DATE)
6460
borders = find_borders(tracts)
65-
st = helpers.import_scheduled_stop_times(analysis_date=ANALYSIS_DATE,
66-
columns=['feed_key', 'trip_id', 'stop_id'],
67-
get_pandas=True)
68-
trips = helpers.import_scheduled_trips(ANALYSIS_DATE, columns=['shape_array_key', 'trip_id', 'feed_key'])
69-
stops = helpers.import_scheduled_stops(ANALYSIS_DATE, columns=['feed_key', 'stop_id', 'geometry'])
70-
71-
shape_stops = (stops.merge(st, on = ['feed_key', 'stop_id'])
72-
.merge(trips, on = ['feed_key', 'trip_id'])
73-
.drop_duplicates(subset=['feed_key', 'shape_array_key', 'stop_id'])
74-
.dropna()
61+
st = helpers.import_scheduled_stop_times(
62+
analysis_date=ANALYSIS_DATE, columns=["feed_key", "trip_id", "stop_id"], get_pandas=True
63+
)
64+
trips = helpers.import_scheduled_trips(ANALYSIS_DATE, columns=["shape_array_key", "trip_id", "feed_key"])
65+
stops = helpers.import_scheduled_stops(ANALYSIS_DATE, columns=["feed_key", "stop_id", "geometry"])
66+
67+
shape_stops = (
68+
stops.merge(st, on=["feed_key", "stop_id"])
69+
.merge(trips, on=["feed_key", "trip_id"])
70+
.drop_duplicates(subset=["feed_key", "shape_array_key", "stop_id"])
71+
.dropna()
7572
)
7673

77-
borders.to_parquet(f'borders_{ANALYSIS_DATE}.parquet')
78-
shape_stops_tracts_borders = find_shapes_in_tracts_borders(shape_stops, tracts, borders)
79-
shape_stops_tracts_borders.to_parquet(f'shape_stops_tracts_borders_{ANALYSIS_DATE}.parquet')
74+
borders.to_parquet(f"borders_{ANALYSIS_DATE}.parquet")
75+
shape_stops_tracts_borders = find_shapes_in_areas_borders(shape_stops, tracts, borders)
76+
shape_stops_tracts_borders.to_parquet(f"shape_stops_tracts_borders_{ANALYSIS_DATE}.parquet")

0 commit comments

Comments
 (0)