Description
Hello, i am trying to use the codes of AutoBNN shared here to mdel my data but it gives very big values of MAE, RMSE and Rsquared, what adjustment can i make to reduce the eror ?
here is the code
-- coding: utf-8 --
"""
Created on Sun Oct 27 15:41:32 2024
@author: iO
"""
import jax
import jax.numpy as jnp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from autobnn import estimators
from autobnn import training_util
Seed setup
seed = jax.random.PRNGKey(0)
Load data
data = pd.read_csv("path_to_data/Bukomansimbi.txt", delim_whitespace=True)
data.columns = data.columns.str.lower()
Date and rainfall extraction
data['date'] = pd.to_datetime(data[['year', 'month', 'day']])
rainfall_by_day = data['value'].values
Train/test split configuration
num_forecast_steps = 365
train_data = rainfall_by_day[:-num_forecast_steps]
test_data = rainfall_by_day[-num_forecast_steps:]
dates_train = data['date'].values[:-num_forecast_steps]
dates_test = data['date'].values[-num_forecast_steps:]
Normalize and scale data
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train_data.reshape(-1, 1)).flatten()
test_scaled = scaler.transform(test_data.reshape(-1, 1)).flatten()
Add time feature for seasonality
month_sin = np.sin(2 * np.pi * data['month'].values / 12)
month_cos = np.cos(2 * np.pi * data['month'].values / 12)
x_train = np.stack([month_sin[:-num_forecast_steps], month_cos[:-num_forecast_steps]], axis=1)
x_test = np.stack([month_sin[-num_forecast_steps:], month_cos[-num_forecast_steps:]], axis=1)
Initialize AutoBNN
est = estimators.AutoBnnMapEstimator(
'sum_of_products',
likelihood_model='normal_likelihood_logistic_noise',
seed=seed,
periods=(1.0,), # one year period as float since date normalization handles scaling
num_particles=32
)
Fit the model
est.fit(x_train, train_scaled[:, None])
Prediction
preds = est.predict(x_test)[:, 0] # Squeeze predictions
quantiles = est.predict_quantiles(x_test, q=[2.5, 50., 90., 97.5])
lo, mid, p90, hi = quantiles[:, 0], quantiles[:, 1], quantiles[:, 2], quantiles[:, 3]
Plotting the results
plt.figure(figsize=(16, 10))
plt.plot(dates_train, train_scaled, label='Training Data')
plt.plot(dates_test, test_scaled, label='Actual')
plt.plot(dates_test, mid, label='Predictions', color='r')
plt.fill_between(dates_test, lo, hi, color='r', alpha=0.3, label='Confidence Interval')
plt.title('Rainfall Prediction in Bukomansimbi')
plt.xlabel('Date')
plt.ylabel('Standardized Rainfall')
plt.legend()
plt.show()
Performance Metrics
mae = np.mean(np.abs(preds - test_scaled))
rmse = np.sqrt(np.mean((preds - test_scaled) ** 2))
r_squared = 1 - np.sum((preds - test_scaled) ** 2) / np.sum((test_scaled - np.mean(test_scaled)) ** 2)
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Square Error (RMSE): {rmse:.2f}")
print(f"R-squared: {r_squared:.2f}")