2424 generate_score_per_series ,
2525 model_forecast ,
2626 validation_aggregation ,
27+ back_forecast ,
28+ remove_leading_zeros ,
2729)
2830from autots .models .ensemble import (
2931 EnsembleTemplateGenerator ,
@@ -312,6 +314,9 @@ def __init__(
312314 else ['ID' ] + self .template_cols
313315 )
314316 self .initial_results = TemplateEvalObject ()
317+ self .best_model_name = ""
318+ self .best_model_params = ""
319+ self .best_model_transformation_params = ""
315320
316321 if verbose > 2 :
317322 print ('"Hello. Would you like to destroy some evil today?" - Sanderson' )
@@ -322,7 +327,7 @@ def __repr__(self):
322327 return "Uninitiated AutoTS object"
323328 else :
324329 try :
325- return f"Initiated AutoTS object with best model: \n { self .best_model [ 'Model' ]. iloc [ 0 ] } \n { self .best_model [ 'TransformationParameters' ]. iloc [ 0 ] } \n { self .best_model [ 'ModelParameters' ]. iloc [ 0 ] } "
330+ return f"Initiated AutoTS object with best model: \n { self .best_model_name } \n { self .best_model_transformation_params } \n { self .best_model_params } "
326331 except Exception :
327332 return "Initiated AutoTS object"
328333
@@ -481,12 +486,7 @@ def fit(
481486
482487 # replace any zeroes that occur prior to all non-zero values
483488 if self .remove_leading_zeroes :
484- # keep the last row unaltered to keep metrics happier if all zeroes
485- temp = df_wide_numeric .head (df_wide_numeric .shape [0 ] - 1 )
486- temp = temp .abs ().cumsum (axis = 0 ).replace (0 , np .nan )
487- temp = df_wide_numeric [~ temp .isna ()]
488- temp = temp .head (df_wide_numeric .shape [0 ] - 1 )
489- df_wide_numeric = pd .concat ([temp , df_wide_numeric .tail (1 )], axis = 0 )
489+ df_wide_numeric = remove_leading_zeros (df_wide_numeric )
490490
491491 # remove other ensembling types if univariate
492492 if df_wide_numeric .shape [1 ] == 1 :
@@ -1096,6 +1096,12 @@ def fit(
10961096 self .ensemble_check = int ((self .best_model ['Ensemble' ].iloc [0 ]) > 0 )
10971097 except IndexError :
10981098 raise ValueError (error_msg_template )
1099+ # give a more convenient dict option
1100+ self .best_model_name = self .best_model ['Model' ].iloc [0 ]
1101+ self .best_model_params = json .loads (self .best_model ['ModelParameters' ].iloc [0 ])
1102+ self .best_model_transformation_params = json .loads (
1103+ self .best_model ['TransformationParameters' ].iloc [0 ]
1104+ )
10991105
11001106 # set flags to check if regressors or ensemble used in final model.
11011107 param_dict = json .loads (self .best_model .iloc [0 ]['ModelParameters' ])
@@ -1330,6 +1336,9 @@ def export_template(
13301336 export_template = export_template .nsmallest (n , columns = ['Score' ])
13311337 if not include_results :
13321338 export_template = export_template [self .template_cols ]
1339+ export_template = pd .concat (
1340+ [self .best_model , export_template ]
1341+ ).drop_duplicates ()
13331342 else :
13341343 raise ValueError ("`models` must be 'all' or 'best'" )
13351344 try :
@@ -1448,13 +1457,56 @@ def import_results(self, filename):
14481457 self .initial_results = self .initial_results .concat (new_obj )
14491458 return self
14501459
1460+ def back_forecast (
1461+ self , column = None , n_splits : int = 3 , tail : int = None , verbose : int = 0
1462+ ):
1463+ """Create forecasts for the historical training data, ie. backcast or back forecast.
1464+
1465+ This actually forecasts on historical data, these are not fit model values as are often returned by other packages.
1466+ As such, this will be slower, but more representative of real world model performance.
1467+ There may be jumps in data between chunks.
1468+
1469+ Args are same as for model_forecast except...
1470+ n_splits(int): how many pieces to split data into. Pass 2 for fastest, or "auto" for best accuracy
1471+ column (str): if to run on only one column, pass column name. Faster than full.
1472+ tail (int): df.tail() of the dataset, back_forecast is only run on n most recent observations.
1473+
1474+ Returns a standard prediction object (access .forecast, .lower_forecast, .upper_forecast)
1475+ """
1476+ if self .best_model .empty :
1477+ raise ValueError ("No best_model. AutoTS .fit() needs to be run." )
1478+ if column is not None :
1479+ input_df = pd .DataFrame (self .df_wide_numeric [column ])
1480+ else :
1481+ input_df = self .df_wide_numeric
1482+ if tail is not None :
1483+ input_df = input_df .tail (tail )
1484+ result = back_forecast (
1485+ df = input_df ,
1486+ model_name = self .best_model_name ,
1487+ model_param_dict = self .best_model_params ,
1488+ model_transform_dict = self .best_model_transformation_params ,
1489+ future_regressor_train = self .future_regressor_train ,
1490+ n_splits = n_splits ,
1491+ forecast_length = self .forecast_length ,
1492+ frequency = self .frequency ,
1493+ prediction_interval = self .prediction_interval ,
1494+ no_negatives = self .no_negatives ,
1495+ constraint = self .constraint ,
1496+ holiday_country = self .holiday_country ,
1497+ random_seed = self .random_seed ,
1498+ n_jobs = self .n_jobs ,
1499+ verbose = verbose ,
1500+ )
1501+ return result
1502+
14511503 def horizontal_to_df (self ):
14521504 """helper function for plotting."""
14531505 if self .best_model .empty :
14541506 raise ValueError ("No best_model. AutoTS .fit() needs to be run." )
14551507 if self .best_model ['Ensemble' ].iloc [0 ] != 2 :
14561508 raise ValueError ("Only works on horizontal ensemble type models." )
1457- ModelParameters = json . loads ( self .best_model [ 'ModelParameters' ]. iloc [ 0 ])
1509+ ModelParameters = self .best_model_params
14581510 series = ModelParameters ['series' ]
14591511 series = pd .DataFrame .from_dict (series , orient = "index" ).reset_index (drop = False )
14601512 if series .shape [1 ] > 2 :
@@ -1496,7 +1548,7 @@ def mosaic_to_df(self):
14961548 raise ValueError ("No best_model. AutoTS .fit() needs to be run." )
14971549 if self .best_model ['Ensemble' ].iloc [0 ] != 2 :
14981550 raise ValueError ("Only works on horizontal ensemble type models." )
1499- ModelParameters = json . loads ( self .best_model [ 'ModelParameters' ]. iloc [ 0 ])
1551+ ModelParameters = self .best_model_params
15001552 if str (ModelParameters ['model_name' ]).lower () != 'mosaic' :
15011553 raise ValueError ("Only works on mosaic ensembles." )
15021554 series = pd .DataFrame .from_dict (ModelParameters ['series' ])
@@ -1565,6 +1617,32 @@ def plot_generation_loss(self, **kwargs):
15651617 ylabel = "Lowest Score" , ** kwargs
15661618 )
15671619
1620+ def plot_backforecast (
1621+ self , series = None , n_splits : int = 3 , start_date = None , ** kwargs
1622+ ):
1623+ """Plot the historical data and fit forecast on historic.
1624+
1625+ Args:
1626+ series (str or list): column names of time series
1627+ n_splits (int or str): "auto", number > 2, higher more accurate but slower
1628+ **kwargs passed to pd.DataFrame.plot()
1629+ """
1630+ if series is None :
1631+ series = random .choice (self .df_wide_numeric .columns )
1632+ b_df = self .back_forecast (column = series , n_splits = n_splits , verbose = 0 ).forecast
1633+ b_df = b_df .rename (columns = lambda x : str (x ) + "_forecast" )
1634+ plot_df = pd .concat (
1635+ [
1636+ pd .DataFrame (self .df_wide_numeric [series ]),
1637+ b_df ,
1638+ ],
1639+ axis = 1 ,
1640+ )
1641+ if start_date is not None :
1642+ plot_df = plot_df [plot_df .index >= start_date ]
1643+ plot_df = remove_leading_zeros (plot_df )
1644+ plot_df .plot (** kwargs )
1645+
15681646
15691647colors_list = [
15701648 '#FF00FF' ,
@@ -1607,6 +1685,8 @@ def plot_generation_loss(self, **kwargs):
16071685 '#EE82EE' ,
16081686 '#00008B' ,
16091687 '#4B0082' ,
1688+ '#0403A7' ,
1689+ "#000000" ,
16101690]
16111691
16121692
0 commit comments