Skip to content

Commit 1c28035

Browse files
authored
Merge pull request #219 from winedarksea/dev
0.6.6
2 parents 5127056 + 85d1257 commit 1c28035

29 files changed

+76
-48
lines changed

TODO.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212
* The most recent data will generally be the most important
1313
* Forecasts are desired for the future immediately following the most recent data.
1414

15-
# 0.6.5 🏮🏮🏮
16-
* horizontal and mosaic upgrades
17-
* bug fixes and template updates
15+
# 0.6.6 🐌🐌🐌
16+
* bug fixes, particularly compatability for the archaic pandas 1.0.3 still used at a certain big tech company
1817

1918
### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
2019
* Pytorch-Forecasting

autots/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from autots.models.cassandra import Cassandra
2727

2828

29-
__version__ = '0.6.5'
29+
__version__ = '0.6.6'
3030

3131
TransformTS = GeneralTransformer
3232

autots/evaluator/auto_ts.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ class AutoTS(object):
134134
if True, recommend use in conjunction with `verbose` > 0 and `result_file` in the event of accidental complete termination.
135135
if "end_generation", as True and also ends entire generation of run. Note skipped models will not be tried again.
136136
generation_timeout (int): if not None, this is the number of minutes from start at which the generational search ends, then proceeding to validation
137-
This is only checked after the end of each generation, so only offers an 'approximate' timeout for searching
137+
This is only checked after the end of each generation, so only offers an 'approximate' timeout for searching. It is an overall cap for total generation search time, not per generation.
138138
current_model_file (str): file path to write to disk of current model params (for debugging if computer crashes). .json is appended
139139
force_gc (bool): if True, run gc.collect() after each model run. Probably won't make much difference.
140140
verbose (int): setting to 0 or lower should reduce most output. Higher numbers give more output.
@@ -1198,7 +1198,7 @@ def fit(
11981198

11991199
# unpack ensemble models so sub models appear at highest level
12001200
self.initial_template = unpack_ensemble_models(
1201-
self.initial_template,
1201+
self.initial_template.copy(),
12021202
self.template_cols,
12031203
keep_ensemble=True,
12041204
recursive=True,
@@ -3225,6 +3225,7 @@ def plot_validations(
32253225
if df_wide is None:
32263226
df_wide = self.df_wide_numeric
32273227
# choose which series to plot
3228+
agg_flag = False
32283229
if series is None:
32293230
if subset is None:
32303231
series = random.choice(df_wide.columns)
@@ -3241,6 +3242,9 @@ def plot_validations(
32413242
series = mapes[0]
32423243
elif str(subset).lower() == "worst score":
32433244
series = scores[0]
3245+
elif str(subset).lower() == "agg":
3246+
agg_flag = True
3247+
series = "Aggregate Forecasts"
32443248
else:
32453249
raise ValueError(
32463250
"plot_validations arg subset must be None, 'best' or 'worst'"
@@ -3260,23 +3264,40 @@ def plot_validations(
32603264
mname = x.split("_")[1]
32613265
if mname == "chosen" or mname in needed_mods:
32623266
new_df = pd.DataFrame(index=df_wide.index)
3263-
new_df[mname] = self.validation_forecasts[x].forecast[series]
3264-
new_df[mname + "_" + "upper"] = self.validation_forecasts[
3265-
x
3266-
].upper_forecast[series]
3267-
new_df[mname + "_" + "lower"] = self.validation_forecasts[
3268-
x
3269-
].lower_forecast[series]
3267+
if agg_flag:
3268+
new_df[mname] = self.validation_forecasts[x].forecast.sum(axis=1)
3269+
new_df[mname + "_" + "upper"] = self.validation_forecasts[
3270+
x
3271+
].upper_forecast.sum(axis=1)
3272+
new_df[mname + "_" + "lower"] = self.validation_forecasts[
3273+
x
3274+
].lower_forecast.sum(axis=1)
3275+
else:
3276+
new_df[mname] = self.validation_forecasts[x].forecast[series]
3277+
new_df[mname + "_" + "upper"] = self.validation_forecasts[
3278+
x
3279+
].upper_forecast[series]
3280+
new_df[mname + "_" + "lower"] = self.validation_forecasts[
3281+
x
3282+
].lower_forecast[series]
32703283
df_list.append(new_df)
32713284
plot_df = pd.concat(df_list, sort=True, axis=0)
32723285
# self.val_plot_df = plot_df.copy()
32733286
plot_df = plot_df.groupby(level=0).last()
3274-
plot_df = (
3275-
df_wide[series]
3276-
.rename("actuals")
3277-
.to_frame()
3278-
.merge(plot_df, left_index=True, right_index=True, how="left")
3279-
)
3287+
if agg_flag:
3288+
plot_df = (
3289+
df_wide.sum(axis=1)
3290+
.rename("actuals")
3291+
.to_frame()
3292+
.merge(plot_df, left_index=True, right_index=True, how="left")
3293+
)
3294+
else:
3295+
plot_df = (
3296+
df_wide[series]
3297+
.rename("actuals")
3298+
.to_frame()
3299+
.merge(plot_df, left_index=True, right_index=True, how="left")
3300+
)
32803301
if not include_bounds:
32813302
colb = [
32823303
x for x in plot_df.columns if "_lower" not in x and "_upper" not in x
@@ -3366,11 +3387,12 @@ def list_failed_model_types(self):
33663387
return temp[temp <= 0].index.to_list()
33673388

33683389
def best_model_per_series_mape(self):
3390+
"""This isn't quite classic mape but is a percentage mean error intended for quick visuals not final statistics (see model.results())."""
33693391
best_model_per_series_mae = self.initial_results.per_series_mae[
33703392
self.initial_results.per_series_mae.index == self.best_model_id
33713393
].mean(axis=0)
33723394
# obsess over avoiding division by zero
3373-
scaler = self.df_wide_numeric.mean(axis=0)
3395+
scaler = self.df_wide_numeric.abs().mean(axis=0)
33743396
scaler[scaler == 0] == np.nan
33753397
scaler = scaler.fillna(self.df_wide_numeric.max(axis=0))
33763398
scaler[scaler == 0] == 1

autots/models/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,14 +510,14 @@ def long_form_results(
510510
pd.DataFrame
511511
"""
512512
upload = pd.melt(
513-
self.forecast.reset_index(names='datetime'),
513+
self.forecast.rename_axis(index='datetime').reset_index(),
514514
var_name="SeriesID",
515515
value_name="Value",
516516
id_vars="datetime",
517517
).set_index("datetime")
518518
upload[interval_name] = "50%"
519519
upload_upper = pd.melt(
520-
self.upper_forecast.reset_index(names='datetime'),
520+
self.upper_forecast.rename_axis(index='datetime').reset_index(),
521521
var_name="SeriesID",
522522
value_name="Value",
523523
id_vars="datetime",
@@ -526,7 +526,7 @@ def long_form_results(
526526
interval_name
527527
] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
528528
upload_lower = pd.melt(
529-
self.lower_forecast.reset_index(names='datetime'),
529+
self.lower_forecast.rename_axis(index='datetime').reset_index(),
530530
var_name="SeriesID",
531531
value_name="Value",
532532
id_vars="datetime",

autots/models/neural_forecast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def fit(self, df, future_regressor=None):
207207
raise ValueError(f"models not recognized: {models}")
208208

209209
# model params
210-
210+
# requires pandas >= 1.5
211211
silly_format = df.reset_index(names='ds').melt(
212212
id_vars='ds', value_name='y', var_name='unique_id'
213213
)

autots/models/statsmodels.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2005,12 +2005,16 @@ def theta_forecast_by_column(current_series, args):
20052005

20062006
def get_new_params(self, method: str = 'random'):
20072007
"""Return dict of new parameters for parameter tuning."""
2008+
if method in ["deep"]:
2009+
period = random.choices([None, 7, 28, 288], [0.8, 0.1, 0.1, 0.01])[0]
2010+
else:
2011+
period = None
20082012
return {
20092013
'deseasonalize': random.choices([True, False], [0.8, 0.2])[0],
20102014
'difference': random.choice([True, False]),
20112015
'use_test': random.choices([True, False], [0.4, 0.2])[0],
20122016
'method': "auto",
2013-
'period': None,
2017+
'period': period,
20142018
'theta': random.choice([1.2, 1.4, 1.6, 2, 2.5, 3, 4]),
20152019
'use_mle': random.choices([True, False], [0.0001, 0.99])[0],
20162020
}

autots/tools/seasonal.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def seasonal_int(include_one: bool = False, small=False, very_small=False):
3333
60: 0.05,
3434
96: 0.04, # quarter in days
3535
168: 0.01,
36+
288: 0.001, # daily at 5 minute intervals
3637
364: 0.1, # year to weekday
3738
1440: 0.01,
3839
420: 0.01,
1.42 KB
Binary file not shown.
660 Bytes
Binary file not shown.
660 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)