Skip to content

Commit 2735174

Browse files
authored
Merge pull request #255 from winedarksea/dev
0.6.20
2 parents 1da01ee + 111f8cb commit 2735174

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+466
-180
lines changed

TODO.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
* Forecasts are desired for the future immediately following the most recent data.
1414
* trimmed_mean to AverageValueNaive
1515

16-
# 0.6.19 🇺🇦 🇺🇦 🇺🇦
17-
* bug fix for transformer_list="all"
16+
# 0.6.20 🇺🇦 🇺🇦 🇺🇦
17+
* transformer bug fixes
18+
* Prophet package adjustments
19+
* linear model singular matrix handling
1820

1921
### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
2022
* Pytorch-Forecasting

autots/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from autots.models.cassandra import Cassandra
2828

2929

30-
__version__ = '0.6.19'
30+
__version__ = '0.6.20'
3131

3232
TransformTS = GeneralTransformer
3333

autots/evaluator/auto_model.py

Lines changed: 72 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,6 +1481,7 @@ def model_forecast(
14811481
current_model_file=current_model_file,
14821482
model_count=model_count,
14831483
force_gc=force_gc,
1484+
internal_validation=False, # allow sub ensembles to have postprocessing
14841485
)
14851486
model_id = create_model_id(
14861487
df_forecast.model_name,
@@ -1527,6 +1528,7 @@ def model_forecast(
15271528
forecast_length=forecast_length,
15281529
)
15291530
transformer_object.fit(df_train)
1531+
# forecast inverse MUST come before upper and lower bounds inverse
15301532
ens_forecast.forecast = transformer_object.inverse_transform(
15311533
ens_forecast.forecast
15321534
)
@@ -1736,7 +1738,7 @@ def _eval_prediction_for_template(
17361738

17371739

17381740
horizontal_post_processors = [
1739-
{
1741+
{ # consistently used as best
17401742
"fillna": "fake_date",
17411743
"transformations": {"0": "AlignLastValue", "1": "AlignLastValue"},
17421744
"transformation_params": {
@@ -1785,7 +1787,7 @@ def _eval_prediction_for_template(
17851787
},
17861788
}, # best wasserstein on daily
17871789
# {"fillna": "linear", "transformations": {"0": "bkfilter", "1": "DifferencedTransformer", "2": "BKBandpassFilter"}, "transformation_params": {"0": {}, "1": {"lag": 1, "fill": "zero"}, "2": {"low": 12, "high": 32, "K": 6, "lanczos_factor": False, "return_diff": False, "on_transform": False, "on_inverse": True}}},
1788-
{
1790+
{ # observed used best on LRP 2025-02-20, neat
17891791
"fillna": "rolling_mean_24",
17901792
"transformations": {"0": "bkfilter", "1": "FIRFilter", "2": "AlignLastDiff"},
17911793
"transformation_params": {
@@ -1834,8 +1836,8 @@ def _eval_prediction_for_template(
18341836
"threshold_method": "mean",
18351837
},
18361838
},
1837-
}, # best mae on daily, a bit weird otherwise, 1x best mage daily
1838-
{
1839+
},
1840+
{ # best mae on daily, a bit weird otherwise, 1x best mage daily
18391841
"fillna": "median",
18401842
"transformations": {
18411843
"0": "DiffSmoother",
@@ -1867,7 +1869,6 @@ def _eval_prediction_for_template(
18671869
"fillna": "fake_date",
18681870
"transformations": {
18691871
"0": "AlignLastValue",
1870-
"1": "PositiveShift",
18711872
"2": "HistoricValues",
18721873
},
18731874
"transformation_params": {
@@ -1880,8 +1881,7 @@ def _eval_prediction_for_template(
18801881
"threshold": 10,
18811882
"threshold_method": "mean",
18821883
},
1883-
"1": {},
1884-
"2": {"window": 28},
1884+
"1": {"window": 28},
18851885
},
18861886
}, # best competition on VN1
18871887
{
@@ -2021,44 +2021,6 @@ def _eval_prediction_for_template(
20212021
},
20222022
},
20232023
},
2024-
{ # balanced on wiki daily
2025-
"fillna": "cubic",
2026-
"transformations": {"0": "AlignLastValue", "1": "DatepartRegression"},
2027-
"transformation_params": {
2028-
"0": {
2029-
"rows": 1,
2030-
"lag": 7,
2031-
"method": "multiplicative",
2032-
"strength": 0.9,
2033-
"first_value_only": False,
2034-
"threshold": 3,
2035-
"threshold_method": "max",
2036-
},
2037-
"1": {
2038-
"regression_model": {
2039-
"model": "ElasticNet",
2040-
"model_params": {
2041-
"l1_ratio": 0.5,
2042-
"fit_intercept": True,
2043-
"selection": "cyclic",
2044-
"max_iter": 1000,
2045-
},
2046-
},
2047-
"datepart_method": "common_fourier",
2048-
"polynomial_degree": None,
2049-
"transform_dict": {
2050-
"fillna": None,
2051-
"transformations": {"0": "ClipOutliers"},
2052-
"transformation_params": {
2053-
"0": {"method": "clip", "std_threshold": 4}
2054-
},
2055-
},
2056-
"holiday_countries_used": False,
2057-
"lags": None,
2058-
"forward_lags": None,
2059-
},
2060-
},
2061-
},
20622024
{ # best on VPV, 19.7 smape
20632025
"fillna": "quadratic",
20642026
"transformations": {"0": "AlignLastValue", "1": "ChangepointDetrend"},
@@ -2080,6 +2042,69 @@ def _eval_prediction_for_template(
20802042
},
20812043
},
20822044
},
2045+
{ # hand tuned, might be replaceable with better FIR combination
2046+
'fillna': 'fake_date',
2047+
'transformations': {
2048+
'0': 'FIRFilter',
2049+
"1": "AlignLastValue",
2050+
"2": "AlignLastValue",
2051+
},
2052+
'transformation_params': {
2053+
'0': {
2054+
'numtaps': 32,
2055+
'cutoff_hz': 0.1,
2056+
'window': "triang",
2057+
'sampling_frequency': 12,
2058+
'on_transform': False,
2059+
'on_inverse': True,
2060+
'bounds_only': True,
2061+
},
2062+
"1": {
2063+
"rows": 1,
2064+
"lag": 1,
2065+
"method": "multiplicative",
2066+
"strength": 1.0,
2067+
"first_value_only": False,
2068+
"threshold": None,
2069+
"threshold_method": "mean",
2070+
},
2071+
"2": {
2072+
"rows": 1,
2073+
"lag": 1,
2074+
"method": "multiplicative",
2075+
"strength": 1.0,
2076+
"first_value_only": True,
2077+
"threshold": 10,
2078+
"threshold_method": "max",
2079+
},
2080+
},
2081+
},
2082+
{ # on wiki daily horizontal, mainly smape
2083+
'fillna': 'ffill',
2084+
'transformations': {
2085+
'0': 'LevelShiftTransformer',
2086+
'1': 'Constraint',
2087+
'2': 'HistoricValues',
2088+
},
2089+
'transformation_params': {
2090+
'0': {
2091+
'window_size': 120,
2092+
'alpha': 3.5,
2093+
'grouping_forward_limit': 3,
2094+
'max_level_shifts': 5,
2095+
'alignment': 'rolling_diff',
2096+
},
2097+
'1': {
2098+
'constraint_method': 'dampening',
2099+
'constraint_direction': 'upper',
2100+
'constraint_regularization': 1.0,
2101+
'constraint_value': 0.99,
2102+
'bounds_only': False,
2103+
'fillna': None,
2104+
},
2105+
'2': {'window': None},
2106+
},
2107+
},
20832108
]
20842109

20852110

@@ -2197,6 +2222,7 @@ def virtual_memory():
21972222
if ensemble_input == 2 and transformation_dict:
21982223
# SKIP BECAUSE TRANSFORMERS (PRE DEFINED) ARE DONE BELOW TO REDUCE FORECASTS RERUNS
21992224
# ON INTERNAL VALIDATION ONLY ON TEMPLATES
2225+
# this does mean that "custom" postprocessing won't work with template wizard
22002226
if verbose >= 1:
22012227
print(
22022228
"skipping horizontal with transformation due to that being done on internal validation"
@@ -2299,6 +2325,7 @@ def virtual_memory():
22992325
forecast_length=forecast_length,
23002326
)
23012327
transformer_object.fit(df_train)
2328+
# forecast inverse MUST come before upper and lower bounds inverse
23022329
df_forecast2.forecast = transformer_object.inverse_transform(
23032330
df_forecast2.forecast
23042331
)

autots/evaluator/auto_ts.py

Lines changed: 71 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def __init__(
205205
transformer_list: dict = "auto",
206206
transformer_max_depth: int = 6,
207207
models_mode: str = "random",
208-
num_validations: str = "auto",
208+
num_validations: int = "auto",
209209
models_to_validate: float = 0.15,
210210
max_per_model_class: int = None,
211211
validation_method: str = 'backwards',
@@ -1449,18 +1449,21 @@ def fit(
14491449
ensemble=self.ensemble,
14501450
score_per_series=self.score_per_series,
14511451
)
1452-
self._run_template(
1453-
ensemble_templates,
1454-
df_train,
1455-
df_test,
1456-
future_regressor_train=future_regressor_train,
1457-
future_regressor_test=future_regressor_test,
1458-
current_weights=current_weights,
1459-
validation_round=0,
1460-
max_generations="Ensembles",
1461-
current_generation=(current_generation + 1),
1462-
result_file=result_file,
1463-
)
1452+
if not ensemble_templates.empty:
1453+
self._run_template(
1454+
self.ensemble_templates,
1455+
df_train,
1456+
df_test,
1457+
future_regressor_train=future_regressor_train,
1458+
future_regressor_test=future_regressor_test,
1459+
current_weights=current_weights,
1460+
validation_round=0,
1461+
max_generations="Ensembles",
1462+
current_generation=(current_generation + 1),
1463+
result_file=result_file,
1464+
)
1465+
elif "simple" in self.ensemble:
1466+
print("Simple ensemble missing, error unclear")
14641467
except Exception as e:
14651468
print(
14661469
f"Ensembling Error: {repr(e)}: {''.join(tb.format_exception(None, e, e.__traceback__))}"
@@ -1506,25 +1509,26 @@ def fit(
15061509
score_per_series=self.score_per_series,
15071510
)
15081511
self.ensemble_templates2 = ensemble_templates
1509-
self._run_template(
1510-
ensemble_templates,
1511-
df_train,
1512-
df_test,
1513-
future_regressor_train=future_regressor_train,
1514-
future_regressor_test=future_regressor_test,
1515-
current_weights=current_weights,
1516-
validation_round=0,
1517-
max_generations="Ensembles",
1518-
current_generation=(current_generation + 2),
1519-
result_file=result_file,
1520-
)
1521-
self._run_validations(
1522-
df_wide_numeric=self.df_wide_numeric,
1523-
num_validations=self.num_validations,
1524-
validation_template=ensemble_templates,
1525-
future_regressor=self.future_regressor_train,
1526-
first_validation=False,
1527-
)
1512+
if not ensemble_templates.empty:
1513+
self._run_template(
1514+
ensemble_templates,
1515+
df_train,
1516+
df_test,
1517+
future_regressor_train=future_regressor_train,
1518+
future_regressor_test=future_regressor_test,
1519+
current_weights=current_weights,
1520+
validation_round=0,
1521+
max_generations="Ensembles",
1522+
current_generation=(current_generation + 2),
1523+
result_file=result_file,
1524+
)
1525+
self._run_validations(
1526+
df_wide_numeric=self.df_wide_numeric,
1527+
num_validations=self.num_validations,
1528+
validation_template=ensemble_templates,
1529+
future_regressor=self.future_regressor_train,
1530+
first_validation=False,
1531+
)
15281532
except Exception as e:
15291533
print(
15301534
f"Post-Validation Ensembling Error: {repr(e)}: {''.join(tb.format_exception(None, e, e.__traceback__))}"
@@ -2032,12 +2036,23 @@ def _run_template(
20322036
# gather results of template run
20332037
if not return_template:
20342038
self.initial_results = self.initial_results.concat(template_result)
2035-
scores, score_dict = generate_score(
2036-
self.initial_results.model_results,
2037-
metric_weighting=self.metric_weighting,
2038-
prediction_interval=self.prediction_interval,
2039-
return_score_dict=True,
2040-
)
2039+
try:
2040+
scores, score_dict = generate_score(
2041+
self.initial_results.model_results,
2042+
metric_weighting=self.metric_weighting,
2043+
prediction_interval=self.prediction_interval,
2044+
return_score_dict=True,
2045+
)
2046+
except Exception as e:
2047+
mod_res = self.initial_results.model_results
2048+
print(mod_res.head())
2049+
print(self.metric_weighting)
2050+
print(mod_res.columns)
2051+
print(mod_res.index)
2052+
print(
2053+
f"Succeeded model count this template: {mod_res[mod_res['Exceptions'].isnull()].shape[0]}. If this is zero, try importing a different template or changing initial template. Check data too."
2054+
)
2055+
raise ValueError("unknown score generation error") from e
20412056
self.initial_results.model_results['Score'] = scores
20422057
self.score_breakdown = pd.DataFrame(score_dict).set_index("ID")
20432058
else:
@@ -2442,6 +2457,7 @@ def export_template(
24422457
min_metrics: list = ['smape', 'spl', 'wasserstein', 'mle', 'imle', 'ewmae'],
24432458
max_metrics: list = None,
24442459
focus_models: list = None,
2460+
include_ensemble: bool = True,
24452461
):
24462462
"""Export top results as a reusable template.
24472463
@@ -2457,6 +2473,7 @@ def export_template(
24572473
min_metrics (list): if not None and models=='best', include the lowest for this metric, a way to include even if not a major part of metric weighting as an addon
24582474
max_metrics (list): for metrics to take the max model for
24592475
focus_models (list): also pull the best score/min/max metrics as per just this model
2476+
include_ensemble (bool): if False, exclude Ensembles (ignored with "all" models)
24602477
"""
24612478
if models == 'all':
24622479
export_template = self.initial_results.model_results[self.template_cols_id]
@@ -2472,6 +2489,8 @@ def export_template(
24722489
(export_template['Runs'] >= (self.num_validations + 1))
24732490
| (export_template['Ensemble'] >= 2)
24742491
]
2492+
if not include_ensemble:
2493+
export_template = export_template[export_template["Ensemble"] == 0]
24752494
# clean up any bad data (hopefully there is none anyway...)
24762495
export_template = export_template[
24772496
(~export_template['ModelParameters'].isnull())
@@ -2557,11 +2576,12 @@ def export_template(
25572576
if not include_results:
25582577
export_template = export_template[self.template_cols_id]
25592578
elif models == "slowest":
2579+
export_template = self.initial_results.model_results
2580+
if not include_ensemble:
2581+
export_template = export_template[export_template["Ensemble"] == 0]
25602582
return self.save_template(
25612583
filename,
2562-
self.initial_results.model_results.nlargest(
2563-
n, columns=['TotalRuntime']
2564-
),
2584+
export_template.nlargest(n, columns=['TotalRuntime']),
25652585
)
25662586
else:
25672587
raise ValueError("`models` must be 'all' or 'best' or 'slowest'")
@@ -4351,8 +4371,17 @@ def plot_chosen_transformer(
43514371

43524372
# Create a second y-axis sharing the x-axis
43534373
ax2 = ax1.twinx()
4374+
col_here = (
4375+
col
4376+
if col in df2.columns
4377+
else [colz for colz in df2.columns if col in colz]
4378+
)
43544379
ax2.plot(
4355-
df2.index, df2[col], color=color2, linestyle='--', label='transformed'
4380+
df2.index,
4381+
df2[col_here],
4382+
color=color2,
4383+
linestyle='--',
4384+
label='transformed',
43564385
)
43574386
ax2.set_ylabel('transformed', color=color2, fontsize=12)
43584387
ax2.tick_params(axis='y', labelcolor=color2)

0 commit comments

Comments
 (0)