winedarksea
diff --git a/‎autots/datasets/_base.py‎
Lines changed: 1 addition & 2 deletions b/‎autots/datasets/_base.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎autots/evaluator/auto_model.py‎
Lines changed: 33 additions & 13 deletions b/‎autots/evaluator/auto_model.py‎
Lines changed: 33 additions & 13 deletions
diff --git a/‎autots/evaluator/auto_ts.py‎
Lines changed: 33 additions & 13 deletions b/‎autots/evaluator/auto_ts.py‎
Lines changed: 33 additions & 13 deletions
diff --git a/‎autots/models/ensemble.py‎
Lines changed: 11 additions & 7 deletions b/‎autots/models/ensemble.py‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎autots/models/sklearn.py‎
Lines changed: 19 additions & 2 deletions b/‎autots/models/sklearn.py‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎autots/tools/impute.py‎
Lines changed: 7 additions & 1 deletion b/‎autots/tools/impute.py‎
Lines changed: 7 additions & 1 deletion
@@ -381,8 +381,7 @@ def load_zeroes(long=False, shape=None, start_date: str = "2021-01-01"):
     if shape is None:
         shape = (200, 5)
     df_wide = pd.DataFrame(
-        np.zeros(shape),
-        index=pd.date_range(start_date, periods=shape[0], freq="D")
+        np.zeros(shape), index=pd.date_range(start_date, periods=shape[0], freq="D")
     )
     if not long:
         return df_wide
 
@@ -1633,7 +1633,9 @@ def generate_score(
         model_results = model_results.replace([np.inf, -np.inf], np.nan)
         # not sure why there are negative SMAPE values, but make sure they get dealt with
         if model_results['smape'].min() < 0:
-            model_results['smape'] = model_results['smape'].where(model_results['smape'] >= 0, model_results['smape'].max())
+            model_results['smape'] = model_results['smape'].where(
+                model_results['smape'] >= 0, model_results['smape'].max()
+            )
         # handle NaN in scores...
         # model_results = model_results.fillna(value=model_results.max(axis=0))
 
@@ -1744,13 +1746,23 @@ def generate_score_per_series(results_object, metric_weighting, total_validation
     return overall_score
 
 
-def back_forecast(df, model_name, model_param_dict, model_transform_dict,
-                  future_regressor_train=None,
-                  n_splits: int = "auto", forecast_length=14,
-                  frequency="infer", prediction_interval=0.9, no_negatives=False,
-                  constraint=None, holiday_country="US",
-                  random_seed=123, n_jobs="auto", verbose=0,
-                  ):
+def back_forecast(
+    df,
+    model_name,
+    model_param_dict,
+    model_transform_dict,
+    future_regressor_train=None,
+    n_splits: int = "auto",
+    forecast_length=14,
+    frequency="infer",
+    prediction_interval=0.9,
+    no_negatives=False,
+    constraint=None,
+    holiday_country="US",
+    random_seed=123,
+    n_jobs="auto",
+    verbose=0,
+):
     """Create forecasts for the historical training data, ie. backcast or back forecast.
 
     This actually forecasts on historical data, these are not fit model values as are often returned by other packages.
@@ -1771,7 +1783,11 @@ def back_forecast(df, model_name, model_param_dict, model_transform_dict,
         n_splits = int(n_splits)
 
     chunk_size = df.index.shape[0] / n_splits
-    b_forecast, b_forecast_up, b_forecast_low = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+    b_forecast, b_forecast_up, b_forecast_low = (
+        pd.DataFrame(),
+        pd.DataFrame(),
+        pd.DataFrame(),
+    )
     for n in range(n_splits):
         int_idx = int(n * chunk_size)
         int_idx_1 = int((n + 1) * chunk_size)
@@ -1781,17 +1797,19 @@ def back_forecast(df, model_name, model_param_dict, model_transform_dict,
             df_split = df.iloc[int_idx_1:].copy()
             df_split = df_split.iloc[::-1]
             df_split.index = df_split.index[::-1]
-            result_idx = df.iloc[0: int_idx_1].index
+            result_idx = df.iloc[0:int_idx_1].index
         else:
-            df_split = df.iloc[0: int_idx].copy()
+            df_split = df.iloc[0:int_idx].copy()
         # handle appropriate regressors
         if isinstance(future_regressor_train, pd.DataFrame):
             if n == 0:
                 split_regr = future_regressor_train.reindex(df_split.index[::-1])
                 split_regr_future = future_regressor_train.reindex(result_idx)
             else:
                 split_regr = future_regressor_train.reindex(df_split.index)
-                split_regr_future = future_regressor_train.reindex(df.index[int_idx: int_idx_1])
+                split_regr_future = future_regressor_train.reindex(
+                    df.index[int_idx:int_idx_1]
+                )
         else:
             split_regr = []
             split_regr_future = []
@@ -1826,7 +1844,9 @@ def back_forecast(df, model_name, model_param_dict, model_transform_dict,
                 b_forecast_low.index = result_idx
         except Exception as e:
             print(f"back_forecast split {n} failed with {repr(e)}")
-            b_df = pd.DataFrame(np.nan, index=df.index[int_idx: int_idx_1], columns=df.columns)
+            b_df = pd.DataFrame(
+                np.nan, index=df.index[int_idx:int_idx_1], columns=df.columns
+            )
             b_forecast = pd.concat([b_forecast, b_df])
             b_forecast_up = pd.concat([b_forecast_up, b_df])
             b_forecast_low = pd.concat([b_forecast_low, b_df])
 
@@ -25,7 +25,7 @@
     model_forecast,
     validation_aggregation,
     back_forecast,
-    remove_leading_zeros
+    remove_leading_zeros,
 )
 from autots.models.ensemble import (
     EnsembleTemplateGenerator,
@@ -1099,7 +1099,9 @@ def fit(
         # give a more convenient dict option
         self.best_model_name = self.best_model['Model'].iloc[0]
         self.best_model_params = json.loads(self.best_model['ModelParameters'].iloc[0])
-        self.best_model_transformation_params = json.loads(self.best_model['TransformationParameters'].iloc[0])
+        self.best_model_transformation_params = json.loads(
+            self.best_model['TransformationParameters'].iloc[0]
+        )
 
         # set flags to check if regressors or ensemble used in final model.
         param_dict = json.loads(self.best_model.iloc[0]['ModelParameters'])
@@ -1334,7 +1336,9 @@ def export_template(
                 export_template = export_template.nsmallest(n, columns=['Score'])
                 if not include_results:
                     export_template = export_template[self.template_cols]
-                    export_template = pd.concat([self.best_model, export_template]).drop_duplicates()
+                    export_template = pd.concat(
+                        [self.best_model, export_template]
+                    ).drop_duplicates()
         else:
             raise ValueError("`models` must be 'all' or 'best'")
         try:
@@ -1453,7 +1457,9 @@ def import_results(self, filename):
             self.initial_results = self.initial_results.concat(new_obj)
         return self
 
-    def back_forecast(self, column=None, n_splits: int = 3, verbose: int = 0):
+    def back_forecast(
+        self, column=None, n_splits: int = 3, tail: int = None, verbose: int = 0
+    ):
         """Create forecasts for the historical training data, ie. backcast or back forecast.
 
         This actually forecasts on historical data, these are not fit model values as are often returned by other packages.
@@ -1463,6 +1469,7 @@ def back_forecast(self, column=None, n_splits: int = 3, verbose: int = 0):
         Args are same as for model_forecast except...
         n_splits(int): how many pieces to split data into. Pass 2 for fastest, or "auto" for best accuracy
         column (str): if to run on only one column, pass column name. Faster than full.
+        tail (int): df.tail() of the dataset, back_forecast is only run on n most recent observations.
 
         Returns a standard prediction object (access .forecast, .lower_forecast, .upper_forecast)
         """
@@ -1472,18 +1479,24 @@ def back_forecast(self, column=None, n_splits: int = 3, verbose: int = 0):
             input_df = pd.DataFrame(self.df_wide_numeric[column])
         else:
             input_df = self.df_wide_numeric
+        if tail is not None:
+            input_df = input_df.tail(tail)
         result = back_forecast(
             df=input_df,
             model_name=self.best_model_name,
             model_param_dict=self.best_model_params,
             model_transform_dict=self.best_model_transformation_params,
             future_regressor_train=self.future_regressor_train,
-            n_splits=n_splits, forecast_length=self.forecast_length,
-            frequency=self.frequency, prediction_interval=self.prediction_interval,
+            n_splits=n_splits,
+            forecast_length=self.forecast_length,
+            frequency=self.frequency,
+            prediction_interval=self.prediction_interval,
             no_negatives=self.no_negatives,
-            constraint=self.constraint, holiday_country=self.holiday_country,
+            constraint=self.constraint,
+            holiday_country=self.holiday_country,
             random_seed=self.random_seed,
-            n_jobs=self.n_jobs, verbose=verbose,
+            n_jobs=self.n_jobs,
+            verbose=verbose,
         )
         return result
 
@@ -1604,7 +1617,9 @@ def plot_generation_loss(self, **kwargs):
             ylabel="Lowest Score", **kwargs
         )
 
-    def plot_backforecast(self, series=None, n_splits: int = 3, start_date=None, **kwargs):
+    def plot_backforecast(
+        self, series=None, n_splits: int = 3, start_date=None, **kwargs
+    ):
         """Plot the historical data and fit forecast on historic.
 
         Args:
@@ -1616,10 +1631,13 @@ def plot_backforecast(self, series=None, n_splits: int = 3, start_date=None, **k
             series = random.choice(self.df_wide_numeric.columns)
         b_df = self.back_forecast(column=series, n_splits=n_splits, verbose=0).forecast
         b_df = b_df.rename(columns=lambda x: str(x) + "_forecast")
-        plot_df = pd.concat([
-            pd.DataFrame(self.df_wide_numeric[series]),
-            b_df,
-        ], axis=1)
+        plot_df = pd.concat(
+            [
+                pd.DataFrame(self.df_wide_numeric[series]),
+                b_df,
+            ],
+            axis=1,
+        )
         if start_date is not None:
             plot_df = plot_df[plot_df.index >= start_date]
         plot_df = remove_leading_zeros(plot_df)
@@ -1667,6 +1685,8 @@ def plot_backforecast(self, series=None, n_splits: int = 3, start_date=None, **k
     '#EE82EE',
     '#00008B',
     '#4B0082',
+    '#0403A7',
+    "#000000",
 ]
 
 
 
@@ -238,10 +238,10 @@ def mosaic_classifier(df_train, known):
             index=None if len(p_full) > 1 else [0],
         )
         upload = pd.concat([upload, missing_rows])
-    X = fill_median((
-        summarize_series(df_train)
-        .transpose())
-        .merge(upload, left_index=True, right_on="series_id")
+    X = fill_median(
+        (summarize_series(df_train).transpose()).merge(
+            upload, left_index=True, right_on="series_id"
+        )
     )
     X.set_index("series_id", inplace=True)  # .drop(columns=['series_id'], inplace=True)
     to_predict = X[X['model_id'].isna()].drop(columns=['model_id'])
@@ -1198,7 +1198,7 @@ def MosaicEnsemble(
         newdf.columns = base_df.columns
         newdf['forecast_period'] = np.tile(
             np.arange(max_forecast_period + 1, needed_stamps + 1 + max_forecast_period),
-            base_df.shape[0]
+            base_df.shape[0],
         )
         melted = pd.concat([melted, newdf])
     elif len_sample_index < (max_forecast_period + 1):
@@ -1214,9 +1214,13 @@ def MosaicEnsemble(
             l_fore.append(lower_forecasts[row[3]][row[2]].iloc[row[1]])
     except Exception as e:
         m0 = f"{row[3]} in available_models: {row[3] in available_models}, "
-        mi = m0 + f"In forecast: {row[3] in forecasts.keys()}, in upper: {row[3] in upper_forecasts.keys()}, in Lower: {row[3] in lower_forecasts.keys()}"
+        mi = (
+            m0
+            + f"In forecast: {row[3] in forecasts.keys()}, in upper: {row[3] in upper_forecasts.keys()}, in Lower: {row[3] in lower_forecasts.keys()}"
+        )
         raise ValueError(
-            f"Mosaic Ensemble failed on model {row[3]} series {row[2]} and period {row[1]} due to missing model: {e} " + mi
+            f"Mosaic Ensemble failed on model {row[3]} series {row[2]} and period {row[1]} due to missing model: {e} "
+            + mi
         )
     melted[
         'forecast'
 
@@ -852,7 +852,23 @@ def window_maker(
     future_regressor=None,
     random_seed: int = 1234,
 ):
-    """Convert a dataset into slices with history and y forecast."""
+    """Convert a dataset into slices with history and y forecast.
+
+    Args:
+        df (pd.DataFrame): `wide` format df with sorted index
+        window_size (int): length of history to use for X window
+        input_dim (str): univariate or multivariate. If multivariate, all series in single X row
+        shuffle (bool): (deprecated)
+        output_dim (str): 'forecast_length' or '1step' where 1 step is basically forecast_length=1
+        forecast_length (int): number of periods ahead that will be forecast
+        max_windows (int): a cap on total number of windows to generate. If exceeded, random of this int are selected.
+        regression_type (str): None or "user" if to try to concat regressor to windows
+        future_regressor (pd.DataFrame): values of regressor if used
+        random_seed (int): a consistent random
+
+    Returns:
+        X, Y
+    """
     if output_dim == '1step':
         forecast_length = 1
     phrase_n = forecast_length + window_size
@@ -890,7 +906,6 @@ def window_maker(
             Y = Y.ravel()
 
     except Exception as e:
-        # print(f"New numpy version of Window Regression failed {e}.")
         if str(regression_type).lower() == "user":
             if input_dim == "multivariate":
                 raise ValueError(
@@ -1506,6 +1521,8 @@ def fit(self, df, future_regressor=[]):
                 self.regression_type = None
 
         y = df.values
+        if y.shape[1] == 1:
+            y = y.ravel()
 
         X = date_part(df.index, method=self.datepart_method)
         if self.regression_type == 'User':
 
@@ -93,7 +93,13 @@ def fake_date_fill(df, back_method: str = 'slice'):
     'akima',
 ]
 # these seem to cause more harm than good usually
-df_interpolate_messy = ['polynomial', 'krogh', 'cubicspline', 'from_derivatives', 'slinear']
+df_interpolate_messy = [
+    'polynomial',
+    'krogh',
+    'cubicspline',
+    'from_derivatives',
+    'slinear',
+]
 df_interpolate_full = list(set(df_interpolate + df_interpolate_messy))