forked from UBC-MDS/Capstone_SatCast_Trilemma
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbaseline_hwes.py
More file actions
118 lines (97 loc) · 3.89 KB
/
baseline_hwes.py
File metadata and controls
118 lines (97 loc) · 3.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# baseline_hwes.py
# author: Jenny Zhang
# date: 2025-06-18
"""
Script to build a Holt–Winters Exponential-Smoothing (HWES) baseline
for Bitcoin fee forecasting.
This script performs the following steps:
1. Loads and resamples raw Parquet data to 15-minute frequency.
2. Extracts the target series and splits it into training and
48-hour test windows.
3. Executes a grid search with time-series cross-validation to tune
trend, seasonality, and damping options.
4. Fits the best model on the full training window.
5. Serialises the fitted model to ``results/models/hwes_best_train.pkl``.
Usage:
python scripts/baseline_hwes.py --parquet-path data/raw/mar_5_may_12.parquet
"""
import sys
import pickle
import pandas as pd
import warnings
import click
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.holtwinters import ExponentialSmoothing
warnings.filterwarnings("ignore")
# Setup project root
current_file = Path(__file__).resolve()
project_root = current_file.parents[1]
# Add project src to path
sys.path.append(str(project_root))
# Import project modules
from src.preprocess_raw_parquet import preprocess_raw_parquet
from scripts.hwes.hwes_extract_split import hwes_extract_split
from scripts.hwes.hwes_cv_optimization import hwes_cv_optimization
# Configurations
FORECAST = 192 # 48 hours * (60 / 15 mins)
DAILY = 96 # 24 hours * (60 / 15 mins)
WINDOWS = 672 * 5 # 5 weeks of rolling windows
STEPS = 672 # Step size = 1 week
@click.command()
@click.option(
"--parquet-path",
type=click.Path(exists=True),
default=str(project_root / "data" / "raw" / "mar_5_may_12.parquet"),
help="Path to input Parquet file"
)
def main(parquet_path):
# Step 1: Load and preprocess raw data
df = preprocess_raw_parquet(parquet_path)
# Step 2: Extract and split fee series
fee_series, train, test = hwes_extract_split(df, forecast_horizon=FORECAST)
# Step 2.5: Save processed data
data_dir = project_root / "data" / "processed" / "hwes"
data_dir.mkdir(parents=True, exist_ok=True)
fee_series.to_csv(data_dir / "fee_series.csv", index=True)
train.to_csv(data_dir / "train.csv", index=True)
test.to_csv(data_dir / "test.csv", index=True)
# Step 3: Cross-validated grid search for hyperparameters
scaler = MinMaxScaler(feature_range=(1, 2))
cv_results = hwes_cv_optimization(
series=train,
seasonal_periods=DAILY,
horizon=DAILY,
window_size=WINDOWS,
step=STEPS,
scaler=scaler
)
# Step 4: Save and load best parameters
tables_dir = project_root / "results" / "tables"/ "hwes"
tables_dir.mkdir(parents=True, exist_ok=True)
cv_results_path = tables_dir / "hwes_cv_results.csv"
cv_results.to_csv(cv_results_path, index=False)
hyperparam_matrix = pd.read_csv(cv_results_path)
if hyperparam_matrix.empty:
print("HWES cross-validation failed. Not enough data or all configurations failed.")
print("Try using a longer series like `mar_5_may_12.parquet` instead of the 8-day sample.")
sys.exit(1)
best_trend, best_seasonal, best_damped = hyperparam_matrix.iloc[0][['trend', 'seasonal', 'damped']]
print(f"✅ Best HWES parameters: trend={best_trend}, seasonal={best_seasonal}, damped={best_damped}")
# Step 5: Train final model
final_model = ExponentialSmoothing(
train,
trend=best_trend,
seasonal=best_seasonal,
seasonal_periods=DAILY if best_seasonal else None,
damped_trend=best_damped
)
final_fit = final_model.fit(optimized=True, use_brute=True)
# Step 6: Save trained model
model_path = project_root / "results" / "models" / "hwes_best_train.pkl"
model_path.parent.mkdir(parents=True, exist_ok=True)
with open(model_path, 'wb') as f:
pickle.dump(final_fit, f)
print(f"✅ Final model saved to: {model_path}")
if __name__ == '__main__':
main()