winedarksea
diff --git a/‎TODO.md‎
Lines changed: 4 additions & 2 deletions b/‎TODO.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎autots/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎autots/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autots/evaluator/auto_model.py‎
Lines changed: 72 additions & 45 deletions b/‎autots/evaluator/auto_model.py‎
Lines changed: 72 additions & 45 deletions
diff --git a/‎autots/evaluator/auto_ts.py‎
Lines changed: 71 additions & 42 deletions b/‎autots/evaluator/auto_ts.py‎
Lines changed: 71 additions & 42 deletions
@@ -13,8 +13,10 @@
 * Forecasts are desired for the future immediately following the most recent data.
 * trimmed_mean to AverageValueNaive
 
-# 0.6.19 🇺🇦 🇺🇦 🇺🇦
-* bug fix for transformer_list="all"
+# 0.6.20 🇺🇦 🇺🇦 🇺🇦
+* transformer bug fixes
+* Prophet package adjustments
+* linear model singular matrix handling
 
 ### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
 * Pytorch-Forecasting
 
@@ -27,7 +27,7 @@
 from autots.models.cassandra import Cassandra
 
 
-__version__ = '0.6.19'
+__version__ = '0.6.20'
 
 TransformTS = GeneralTransformer
 
 
@@ -1481,6 +1481,7 @@ def model_forecast(
                     current_model_file=current_model_file,
                     model_count=model_count,
                     force_gc=force_gc,
+                    internal_validation=False,  # allow sub ensembles to have postprocessing
                 )
                 model_id = create_model_id(
                     df_forecast.model_name,
@@ -1527,6 +1528,7 @@ def model_forecast(
                 forecast_length=forecast_length,
             )
             transformer_object.fit(df_train)
+            # forecast inverse MUST come before upper and lower bounds inverse
             ens_forecast.forecast = transformer_object.inverse_transform(
                 ens_forecast.forecast
             )
@@ -1736,7 +1738,7 @@ def _eval_prediction_for_template(
 
 
 horizontal_post_processors = [
-    {
+    {  # consistently used as best
         "fillna": "fake_date",
         "transformations": {"0": "AlignLastValue", "1": "AlignLastValue"},
         "transformation_params": {
@@ -1785,7 +1787,7 @@ def _eval_prediction_for_template(
         },
     },  # best wasserstein on daily
     # {"fillna": "linear", "transformations": {"0": "bkfilter", "1": "DifferencedTransformer", "2": "BKBandpassFilter"}, "transformation_params": {"0": {}, "1": {"lag": 1, "fill": "zero"}, "2": {"low": 12, "high": 32, "K": 6, "lanczos_factor": False, "return_diff": False, "on_transform": False, "on_inverse": True}}},
-    {
+    {  # observed used best on LRP 2025-02-20, neat
         "fillna": "rolling_mean_24",
         "transformations": {"0": "bkfilter", "1": "FIRFilter", "2": "AlignLastDiff"},
         "transformation_params": {
@@ -1834,8 +1836,8 @@ def _eval_prediction_for_template(
                 "threshold_method": "mean",
             },
         },
-    },  # best mae on daily, a bit weird otherwise, 1x best mage daily
-    {
+    },
+    {  # best mae on daily, a bit weird otherwise, 1x best mage daily
         "fillna": "median",
         "transformations": {
             "0": "DiffSmoother",
@@ -1867,7 +1869,6 @@ def _eval_prediction_for_template(
         "fillna": "fake_date",
         "transformations": {
             "0": "AlignLastValue",
-            "1": "PositiveShift",
             "2": "HistoricValues",
         },
         "transformation_params": {
@@ -1880,8 +1881,7 @@ def _eval_prediction_for_template(
                 "threshold": 10,
                 "threshold_method": "mean",
             },
-            "1": {},
-            "2": {"window": 28},
+            "1": {"window": 28},
         },
     },  # best competition on VN1
     {
@@ -2021,44 +2021,6 @@ def _eval_prediction_for_template(
             },
         },
     },
-    {  # balanced on wiki daily
-        "fillna": "cubic",
-        "transformations": {"0": "AlignLastValue", "1": "DatepartRegression"},
-        "transformation_params": {
-            "0": {
-                "rows": 1,
-                "lag": 7,
-                "method": "multiplicative",
-                "strength": 0.9,
-                "first_value_only": False,
-                "threshold": 3,
-                "threshold_method": "max",
-            },
-            "1": {
-                "regression_model": {
-                    "model": "ElasticNet",
-                    "model_params": {
-                        "l1_ratio": 0.5,
-                        "fit_intercept": True,
-                        "selection": "cyclic",
-                        "max_iter": 1000,
-                    },
-                },
-                "datepart_method": "common_fourier",
-                "polynomial_degree": None,
-                "transform_dict": {
-                    "fillna": None,
-                    "transformations": {"0": "ClipOutliers"},
-                    "transformation_params": {
-                        "0": {"method": "clip", "std_threshold": 4}
-                    },
-                },
-                "holiday_countries_used": False,
-                "lags": None,
-                "forward_lags": None,
-            },
-        },
-    },
     {  # best on VPV, 19.7 smape
         "fillna": "quadratic",
         "transformations": {"0": "AlignLastValue", "1": "ChangepointDetrend"},
@@ -2080,6 +2042,69 @@ def _eval_prediction_for_template(
             },
         },
     },
+    {  # hand tuned, might be replaceable with better FIR combination
+        'fillna': 'fake_date',
+        'transformations': {
+            '0': 'FIRFilter',
+            "1": "AlignLastValue",
+            "2": "AlignLastValue",
+        },
+        'transformation_params': {
+            '0': {
+                'numtaps': 32,
+                'cutoff_hz': 0.1,
+                'window': "triang",
+                'sampling_frequency': 12,
+                'on_transform': False,
+                'on_inverse': True,
+                'bounds_only': True,
+            },
+            "1": {
+                "rows": 1,
+                "lag": 1,
+                "method": "multiplicative",
+                "strength": 1.0,
+                "first_value_only": False,
+                "threshold": None,
+                "threshold_method": "mean",
+            },
+            "2": {
+                "rows": 1,
+                "lag": 1,
+                "method": "multiplicative",
+                "strength": 1.0,
+                "first_value_only": True,
+                "threshold": 10,
+                "threshold_method": "max",
+            },
+        },
+    },
+    {  # on wiki daily horizontal, mainly smape
+        'fillna': 'ffill',
+        'transformations': {
+            '0': 'LevelShiftTransformer',
+            '1': 'Constraint',
+            '2': 'HistoricValues',
+        },
+        'transformation_params': {
+            '0': {
+                'window_size': 120,
+                'alpha': 3.5,
+                'grouping_forward_limit': 3,
+                'max_level_shifts': 5,
+                'alignment': 'rolling_diff',
+            },
+            '1': {
+                'constraint_method': 'dampening',
+                'constraint_direction': 'upper',
+                'constraint_regularization': 1.0,
+                'constraint_value': 0.99,
+                'bounds_only': False,
+                'fillna': None,
+            },
+            '2': {'window': None},
+        },
+    },
 ]
 
 
@@ -2197,6 +2222,7 @@ def virtual_memory():
             if ensemble_input == 2 and transformation_dict:
                 # SKIP BECAUSE TRANSFORMERS (PRE DEFINED) ARE DONE BELOW TO REDUCE FORECASTS RERUNS
                 # ON INTERNAL VALIDATION ONLY ON TEMPLATES
+                # this does mean that "custom" postprocessing won't work with template wizard
                 if verbose >= 1:
                     print(
                         "skipping horizontal with transformation due to that being done on internal validation"
@@ -2299,6 +2325,7 @@ def virtual_memory():
                         forecast_length=forecast_length,
                     )
                     transformer_object.fit(df_train)
+                    # forecast inverse MUST come before upper and lower bounds inverse
                     df_forecast2.forecast = transformer_object.inverse_transform(
                         df_forecast2.forecast
                     )
 
@@ -205,7 +205,7 @@ def __init__(
         transformer_list: dict = "auto",
         transformer_max_depth: int = 6,
         models_mode: str = "random",
-        num_validations: str = "auto",
+        num_validations: int = "auto",
         models_to_validate: float = 0.15,
         max_per_model_class: int = None,
         validation_method: str = 'backwards',
@@ -1449,18 +1449,21 @@ def fit(
                     ensemble=self.ensemble,
                     score_per_series=self.score_per_series,
                 )
-                self._run_template(
-                    ensemble_templates,
-                    df_train,
-                    df_test,
-                    future_regressor_train=future_regressor_train,
-                    future_regressor_test=future_regressor_test,
-                    current_weights=current_weights,
-                    validation_round=0,
-                    max_generations="Ensembles",
-                    current_generation=(current_generation + 1),
-                    result_file=result_file,
-                )
+                if not ensemble_templates.empty:
+                    self._run_template(
+                        self.ensemble_templates,
+                        df_train,
+                        df_test,
+                        future_regressor_train=future_regressor_train,
+                        future_regressor_test=future_regressor_test,
+                        current_weights=current_weights,
+                        validation_round=0,
+                        max_generations="Ensembles",
+                        current_generation=(current_generation + 1),
+                        result_file=result_file,
+                    )
+                elif "simple" in self.ensemble:
+                    print("Simple ensemble missing, error unclear")
             except Exception as e:
                 print(
                     f"Ensembling Error: {repr(e)}: {''.join(tb.format_exception(None, e, e.__traceback__))}"
@@ -1506,25 +1509,26 @@ def fit(
                         score_per_series=self.score_per_series,
                     )
                     self.ensemble_templates2 = ensemble_templates
-                    self._run_template(
-                        ensemble_templates,
-                        df_train,
-                        df_test,
-                        future_regressor_train=future_regressor_train,
-                        future_regressor_test=future_regressor_test,
-                        current_weights=current_weights,
-                        validation_round=0,
-                        max_generations="Ensembles",
-                        current_generation=(current_generation + 2),
-                        result_file=result_file,
-                    )
-                    self._run_validations(
-                        df_wide_numeric=self.df_wide_numeric,
-                        num_validations=self.num_validations,
-                        validation_template=ensemble_templates,
-                        future_regressor=self.future_regressor_train,
-                        first_validation=False,
-                    )
+                    if not ensemble_templates.empty:
+                        self._run_template(
+                            ensemble_templates,
+                            df_train,
+                            df_test,
+                            future_regressor_train=future_regressor_train,
+                            future_regressor_test=future_regressor_test,
+                            current_weights=current_weights,
+                            validation_round=0,
+                            max_generations="Ensembles",
+                            current_generation=(current_generation + 2),
+                            result_file=result_file,
+                        )
+                        self._run_validations(
+                            df_wide_numeric=self.df_wide_numeric,
+                            num_validations=self.num_validations,
+                            validation_template=ensemble_templates,
+                            future_regressor=self.future_regressor_train,
+                            first_validation=False,
+                        )
                 except Exception as e:
                     print(
                         f"Post-Validation Ensembling Error: {repr(e)}: {''.join(tb.format_exception(None, e, e.__traceback__))}"
@@ -2032,12 +2036,23 @@ def _run_template(
         # gather results of template run
         if not return_template:
             self.initial_results = self.initial_results.concat(template_result)
-            scores, score_dict = generate_score(
-                self.initial_results.model_results,
-                metric_weighting=self.metric_weighting,
-                prediction_interval=self.prediction_interval,
-                return_score_dict=True,
-            )
+            try:
+                scores, score_dict = generate_score(
+                    self.initial_results.model_results,
+                    metric_weighting=self.metric_weighting,
+                    prediction_interval=self.prediction_interval,
+                    return_score_dict=True,
+                )
+            except Exception as e:
+                mod_res = self.initial_results.model_results
+                print(mod_res.head())
+                print(self.metric_weighting)
+                print(mod_res.columns)
+                print(mod_res.index)
+                print(
+                    f"Succeeded model count this template: {mod_res[mod_res['Exceptions'].isnull()].shape[0]}. If this is zero, try importing a different template or changing initial template. Check data too."
+                )
+                raise ValueError("unknown score generation error") from e
             self.initial_results.model_results['Score'] = scores
             self.score_breakdown = pd.DataFrame(score_dict).set_index("ID")
         else:
@@ -2442,6 +2457,7 @@ def export_template(
         min_metrics: list = ['smape', 'spl', 'wasserstein', 'mle', 'imle', 'ewmae'],
         max_metrics: list = None,
         focus_models: list = None,
+        include_ensemble: bool = True,
     ):
         """Export top results as a reusable template.
 
@@ -2457,6 +2473,7 @@ def export_template(
             min_metrics (list): if not None and models=='best', include the lowest for this metric, a way to include even if not a major part of metric weighting as an addon
             max_metrics (list): for metrics to take the max model for
             focus_models (list): also pull the best score/min/max metrics as per just this model
+            include_ensemble (bool): if False, exclude Ensembles (ignored with "all" models)
         """
         if models == 'all':
             export_template = self.initial_results.model_results[self.template_cols_id]
@@ -2472,6 +2489,8 @@ def export_template(
                     (export_template['Runs'] >= (self.num_validations + 1))
                     | (export_template['Ensemble'] >= 2)
                 ]
+                if not include_ensemble:
+                    export_template = export_template[export_template["Ensemble"] == 0]
                 # clean up any bad data (hopefully there is none anyway...)
                 export_template = export_template[
                     (~export_template['ModelParameters'].isnull())
@@ -2557,11 +2576,12 @@ def export_template(
                 if not include_results:
                     export_template = export_template[self.template_cols_id]
         elif models == "slowest":
+            export_template = self.initial_results.model_results
+            if not include_ensemble:
+                export_template = export_template[export_template["Ensemble"] == 0]
             return self.save_template(
                 filename,
-                self.initial_results.model_results.nlargest(
-                    n, columns=['TotalRuntime']
-                ),
+                export_template.nlargest(n, columns=['TotalRuntime']),
             )
         else:
             raise ValueError("`models` must be 'all' or 'best' or 'slowest'")
@@ -4351,8 +4371,17 @@ def plot_chosen_transformer(
 
             # Create a second y-axis sharing the x-axis
             ax2 = ax1.twinx()
+            col_here = (
+                col
+                if col in df2.columns
+                else [colz for colz in df2.columns if col in colz]
+            )
             ax2.plot(
-                df2.index, df2[col], color=color2, linestyle='--', label='transformed'
+                df2.index,
+                df2[col_here],
+                color=color2,
+                linestyle='--',
+                label='transformed',
             )
             ax2.set_ylabel('transformed', color=color2, fontsize=12)
             ax2.tick_params(axis='y', labelcolor=color2)