Open
Description
Dear, @mdancho84 @AlbertoAlmuinha
There is the possibility of viewing the metrics in the resampling forecast by ID, I saw that summary_fns = NULL #1 #3 provides the results by resampling fold, but they are global results. I think it would be very valuable to be able to access the local metrics for each ID and the results per fold resampling. I saw that #4 already requested something similar. If this is already possible and you can help me visualize these results, I would appreciate it. Follow the scripts:
Link to download the database used: https://github.com/forecastingEDs/Forecasting-of-admissions-in-the-emergency-departments/blob/131bd23723a39724ad4f88ad6b8e5a58f42a7960/datasets.xlsx
data_tbl <- datasets %>%
select(id, Date, attendences, average_temperature, min, max, sunday, monday, tuesday, wednesday, thursday, friday, saturday, Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec) %>%
set_names(c("id", "date", "value","tempe_verage", "tempemin", "tempemax", "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))
data_tbl
Full = Training + Forecast Datasets
full_data_tbl <- datasets %>%
select(id, Date, attendences, average_temperature, min, max, sunday, monday, tuesday, wednesday, thursday, friday, saturday, Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec) %>%
set_names(c("id", "date", "value","tempe_verage", "tempemin", "tempemax", "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) %>%
Apply Group-wise Time Series Manipulations
group_by(id) %>%
future_frame(
.date_var = date,
.length_out = "3 days",
.bind_data = TRUE
) %>%
ungroup() %>%
Consolidate IDs
mutate(id = fct_drop(id))
Training Data
data_prepared_tbl <- full_data_tbl %>%
filter(!is.na(value))
Forecast Data
future_tbl <- full_data_tbl %>%
filter(is.na(value))
emergency_tscv <- data_prepared_tbl %>%
time_series_cv(
date_var = date,
assess = "3 days",
skip = "30 days",
cumulative = TRUE,
slice_limit = 5
)
emergency_tscv
test data preprocessing for ML ----
recipe_spec <- recipe(value ~ .,
data = training(emergency_tscv$splits[[1]])) %>%
step_timeseries_signature(date) %>%
step_rm(matches("(.iso$)|(.xts$)|(hour)|(minute)|(second)|(am.pm)")) %>%
step_mutate(data = factor(value, ordered = TRUE))%>%
step_dummy(all_nominal(), one_hot = TRUE)%>%
step_normalize (date_index.num,tempe_verage,tempemin,tempemax,date_year, -all_outcomes())
Model 1: Xgboost ----
wflw_fit_xgboost <- workflow() %>%
add_model(
boost_tree("regression") %>% set_engine("xgboost")
) %>%
add_recipe(recipe_spec %>% step_rm(date)) %>%
fit(training(emergency_tscv$splits[[1]]))
Model 2: LightGBM ----
wflw_fit_lightgbm <- workflow() %>%
add_model(
boost_tree("regression") %>% set_engine("lightgbm")
) %>%
add_recipe(recipe_spec %>% step_rm(date)) %>%
fit(training(emergency_tscv$splits[[1]]))
---- MODELTIME TABLE ----
model_tbl <- modeltime_table(
wflw_fit_xgboost,
wflw_fit_lightgbm
)
model_tbl
resample_results <- model_tbl %>%
modeltime_fit_resamples(
resamples = emergency_tscv,
control = control_resamples(allow_par = TRUE, verbose = TRUE)
)
resample_results
This step I need the results by ID but I only get the global results by fold resampling. Can you help me?
resample_results %>%
modeltime_resample_accuracy(summary_fns = NULL, yardstick::metric_set(mape, smape, mase, rmse)) %>%
table_modeltime_accuracy(.interactive = FALSE)
Metadata
Assignees
Labels
No labels
Activity