routee-transit/nrel/routee/transit/prediction/add_temp_feature.py at bd69acbbf0463f66c6eb248176bc882b1c9fc17a · NatLabRockies/routee-transit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import os
from typing import Any

import boto3
import geopandas as gpd
import numpy as np
import pandas as pd
from botocore import UNSIGNED
from botocore.config import Config


def add_HVAC_energy(feed: Any, trips_df: pd.DataFrame) -> Any:
    """
    Add HVAC energy consumption.

    Parameters
    ----------
    feed : Any
        GTFS feed object containing blocks DataFrame.
    trips_df : pd.DataFrame
        Trips on selected date and route, including deadhead trips.

    Returns
    -------
    pd.DataFrame
        Updated trip level energy prediction DataFrame with HVAC energy consumption data.
    """
    # Based on gtfs stops data, get counties served
    df_stops = feed.stops
    gdf_stops = gpd.GeoDataFrame(
        df_stops,
        geometry=gpd.points_from_xy(df_stops.stop_lon, df_stops.stop_lat),
        crs=4269,
    )
    gdf_county = gpd.read_file(
        "https://www2.census.gov/geo/tiger/GENZ2022/shp/cb_2022_us_county_20m.zip"
    )
    gdf_county["county_id"] = (
        "G" + gdf_county["STATEFP"] + "0" + gdf_county["COUNTYFP"] + "0"
    )
    gdf_stops = gpd.sjoin(
        gdf_stops,
        gdf_county[["geometry", "county_id"]],
        how="left",
        predicate="intersects",
    )
    county_ids = gdf_stops.county_id.unique()

    # Download TMY Weather Data
    """TMY stands for Typical Meteorological Year, a dataset that provides representative hourly weather
    data for a location over a synthetic year.
    Unlike Actual Meteorological Year (AMY) files, which reflect the observed conditions in a specific calendar year,
    TMY files are constructed by selecting typical months from multiple years of historical records.
    This approach smooths out unusual extremes and produces a “typical” climate profile,
    making TMY data well-suited for long-term energy modeling and system design studies.
    """
    bucket_name = "oedi-data-lake"  # S3 Bucket and path prefix for TMY data
    prefix = "nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2021/resstock_tmy3_release_1/weather/tmy3/"
    s3 = boto3.client(
        "s3", config=Config(signature_version=UNSIGNED)
    )  # Create anonymous S3 client
    save_dir = "./TMY"  # Folder for saving the downloaded TMY data
    if not os.path.exists(save_dir):
        os.makedirs(save_dir, exist_ok=True)
    # Download files for each county
    for county_id in county_ids:
        file_key = f"{prefix}{county_id}_tmy3.csv"
        local_file = os.path.join(save_dir, f"{county_id}.csv")
        if not os.path.isfile(local_file):
            try:
                s3.download_file(bucket_name, file_key, local_file)
                print(f"Downloaded: {county_id}.csv")
            except Exception as e:
                print(f"Failed to download {county_id}.csv: {e}")

    # Create the HVAC + BTMS power lookup table
    temperature_list = [-10, -5, 0, 5, 10, 15, 20, 25, 30, 35, 40]  # From literature
    HVAC_power_list = [25, 17, 10, 6, 4, 1, 1, 2, 4, 7, 11]  # From literature
    BTMS_power_list = [
        4.9,
        3.6,
        2.1,
        0.8,
        0.2,
        0.1,
        0.1,
        1.4,
        1.5,
        2.1,
        5.6,
    ]  # From literature
    total_temp_energy_list = [
        HVAC_power_list[i] + BTMS_power_list[i] for i in range(len(HVAC_power_list))
    ]
    # Add two extreme values to make sure we cover all temperature values
    min_temp = -100
    max_temp = 100
    min_temp_power = (-10 - min_temp) * (
        total_temp_energy_list[0] - total_temp_energy_list[1]
    ) / 5 + total_temp_energy_list[0]
    max_temp_power = (max_temp - 40) * (
        total_temp_energy_list[-1] - total_temp_energy_list[-2]
    ) / 5 + total_temp_energy_list[-1]
    # Extend temp and energy list
    temperature_list = [-100] + temperature_list + [100]
    total_temp_energy_list = (
        [min_temp_power] + total_temp_energy_list + [max_temp_power]
    )
    # Define a dataframe to store the information
    df_temp_energy = pd.DataFrame(
        {"Temp_C": temperature_list, "Power": total_temp_energy_list}
    )
    # Fill every integer Temp_C
    df_tmp_fill = pd.DataFrame({"Temp_C": np.arange(-100, 100.1, 0.1)})
    df_temp_energy["Temp_C"] = df_temp_energy["Temp_C"].astype(float).round(1)
    df_tmp_fill["Temp_C"] = df_tmp_fill["Temp_C"].astype(float).round(1)
    df_temp_energy = df_tmp_fill.merge(df_temp_energy, on="Temp_C", how="left")
    # Linear interpolate
    df_temp_energy["Power"] = df_temp_energy["Power"].interpolate(method="linear")
    df_temp_energy["Temp_C"] = df_temp_energy["Temp_C"].astype(float)

    # Get the winter coldest day and summer hottest day power tables
    # Then loop through each county_id to collect the coldest and hottest day and extract the corresponding power consumption data
    list_df_hot = []
    list_df_cold = []
    for county_id in county_ids:
        file_key = f"{prefix}{county_id}_tmy3.csv"
        local_file = os.path.join(save_dir, f"{county_id}.csv")
        df_temp = pd.read_csv(local_file, parse_dates=["date_time"])
        df_temp["date"] = np.repeat(np.arange(1, 366), 24)
        df_temp_group = (
            df_temp.groupby("date")
            .agg(avg_temp_C=("Dry Bulb Temperature [°C]", "mean"))
            .reset_index()
        )
        cold_day = df_temp_group[
            df_temp_group.avg_temp_C == df_temp_group.avg_temp_C.min()
        ]["date"].iloc[0]
        hot_day = df_temp_group[
            df_temp_group.avg_temp_C == df_temp_group.avg_temp_C.max()
        ]["date"].iloc[0]
        # Get the coldest and hottest day data
        df_cold = df_temp[df_temp.date == cold_day].copy()
        df_cold["Dry Bulb Temperature [°C]"] = df_cold[
            "Dry Bulb Temperature [°C]"
        ].round(1)
        df_cold = df_cold.merge(
            df_temp_energy,
            left_on="Dry Bulb Temperature [°C]",
            right_on="Temp_C",
            how="left",
        )
        df_cold["hour"] = np.arange(0, 24)
        df_hot = df_temp[df_temp.date == hot_day].copy()
        df_hot["Dry Bulb Temperature [°C]"] = df_hot["Dry Bulb Temperature [°C]"].round(
            1
        )
        df_hot = df_hot.merge(
            df_temp_energy,
            left_on="Dry Bulb Temperature [°C]",
            right_on="Temp_C",
            how="left",
        )
        df_hot["hour"] = np.arange(0, 24)
        list_df_hot.append(df_hot)
        list_df_cold.append(df_cold)
    # Mergeg data for all counties
    df_hot_all = pd.concat(list_df_hot)
    df_cold_all = pd.concat(list_df_cold)
    # Next lets get the hourly average values for all stations
    df_cold_hourly = df_cold_all.groupby(["hour"]).agg({"Power": "mean"}).reset_index()
    df_hot_hourly = df_hot_all.groupby(["hour"]).agg({"Power": "mean"}).reset_index()

    # Last calculate the trip HVAC energy
    # Get the start and end time for each trip
    df_stop_times = feed.stop_times
    df_stop_times = df_stop_times[
        df_stop_times["trip_id"].isin(trips_df["trip_id"].unique())
    ].copy()
    df_trip_time = (
        df_stop_times.groupby("trip_id")
        .agg(start_time=("arrival_time", "min"), end_time=("arrival_time", "max"))
        .reset_index()
    )
    start_hours = df_trip_time["start_time"].dt.total_seconds() / 3600
    end_hours = df_trip_time["end_time"].dt.total_seconds() / 3600

    def compute_HVAC_energy(start_hours: Any, end_hours: Any, power_array: Any) -> Any:
        """
        Calculate HVAC + BTMS energy consumption between time intervals.

        Args:
            start_hours (array-like): fractional start times in hours
            end_hours (array-like): fractional end times in hours (can be > 24)
            power_array (array-like): hourly average power values [kW] for hours 0–23

        Returns:
            np.ndarray: energy consumption [kWh] for each interval
        """
        power_array = np.asarray(power_array)
        power_24 = np.concatenate(
            (power_array, [power_array[0]])
        )  # wrap for interpolation

        def interp_power(t: Any) -> Any:
            """Linearly interpolate power at fractional hour t."""
            i = int(np.floor(t)) % 24
            frac = t - np.floor(t)
            return (1 - frac) * power_24[i] + frac * power_24[i + 1]

        energies = []
        for s, e in zip(start_hours, end_hours):
            # sample in small steps for accurate integration
            ts = np.arange(s, e, 0.01)  # 0.01 h = 36 s resolution
            ps = np.array([interp_power(t) for t in ts])
            energy = np.trapz(ps, ts)  # integrate kW over hours → kWh
            energies.append(energy)

        return np.array(energies)

    df_trip_time["Winter_HVAC_Energy"] = compute_HVAC_energy(
        start_hours, end_hours, df_cold_hourly["Power"].values
    )
    df_trip_time["Summer_HVAC_Energy"] = compute_HVAC_energy(
        start_hours, end_hours, df_hot_hourly["Power"].values
    )
    # Merge back to trips_df
    trips_df = trips_df.merge(
        df_trip_time[["trip_id", "Winter_HVAC_Energy", "Summer_HVAC_Energy"]],
        on="trip_id",
        how="left",
    )
    trips_df = trips_df[["trip_id", "Winter_HVAC_Energy", "Summer_HVAC_Energy"]]
    return trips_df