Skip to content

Commit 39401fc

Browse files
authored
Merge pull request #212 from winedarksea/dev
0.6.4
2 parents f1254ab + a799a0f commit 39401fc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1076
-282
lines changed

TODO.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
* The most recent data will generally be the most important
1313
* Forecasts are desired for the future immediately following the most recent data.
1414

15-
# 0.6.3 🎐🎐🎐
16-
* energy datasets to load_live_daily
17-
* improved the 'Scalable' transformer_list to reduce memory issues on larger datasets
18-
* memory improvements to KalmanSmoother, HolidayTransformer, LocalLinearTrend
19-
* added DiffSmoother
20-
* added force_gc arg which can be tried if memory is in short supply relative to data (probably won't help much)
15+
# 0.6.4 🔜🔜🔜
16+
* adjusted n_jobs back to minus 1 for multivariatemotif
17+
* fixed bug with plot_validations not working with some frequencies
18+
* force_validation added to import_template
19+
* model_list now enforced in new generations
20+
* added NeuralForecast
2121

2222
### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
2323
* Pytorch-Forecasting

autots/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919
from autots.evaluator.auto_ts import AutoTS
2020
from autots.evaluator.event_forecasting import EventRiskForecast
2121
from autots.tools.transform import GeneralTransformer, RandomTransform
22-
from autots.tools.shaping import long_to_wide
22+
from autots.tools.shaping import long_to_wide, infer_frequency
2323
from autots.tools.regressor import create_lagged_regressor, create_regressor
2424
from autots.evaluator.auto_model import model_forecast
2525
from autots.evaluator.anomaly_detector import AnomalyDetector, HolidayDetector
2626
from autots.models.cassandra import Cassandra
2727

2828

29-
__version__ = '0.6.3'
29+
__version__ = '0.6.4'
3030

3131
TransformTS = GeneralTransformer
3232

@@ -53,4 +53,5 @@
5353
'AnomalyDetector',
5454
'HolidayDetector',
5555
'Cassandra',
56+
'infer_frequency',
5657
]

autots/evaluator/auto_model.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,19 @@ def ModelMonster(
683683
n_jobs=n_jobs,
684684
**parameters,
685685
)
686+
elif model in ["NeuralForecast", "neuralforecast"]:
687+
from autots.models.neural_forecast import NeuralForecast
688+
689+
return NeuralForecast(
690+
frequency=frequency,
691+
forecast_length=forecast_length,
692+
prediction_interval=prediction_interval,
693+
holiday_country=holiday_country,
694+
random_seed=random_seed,
695+
verbose=verbose,
696+
n_jobs=n_jobs,
697+
**parameters,
698+
)
686699
else:
687700
raise AttributeError(
688701
("Model String '{}' not a recognized model type").format(model)
@@ -1007,6 +1020,11 @@ def concat(self, another_eval):
10071020
ignore_index=True,
10081021
sort=False,
10091022
).reset_index(drop=True)
1023+
self.per_series_metrics = pd.concat(
1024+
[self.per_series_metrics, another_eval.per_series_metrics],
1025+
axis=0,
1026+
sort=False,
1027+
)
10101028
self.per_series_mae = pd.concat(
10111029
[self.per_series_mae, another_eval.per_series_mae], axis=0, sort=False
10121030
)
@@ -2142,6 +2160,7 @@ def NewGeneticTemplate(
21422160
models_mode: str = "default",
21432161
score_per_series=None,
21442162
recursive_count=0,
2163+
model_list=None,
21452164
# UPDATE RECURSIVE section if adding or removing params
21462165
):
21472166
"""
@@ -2155,6 +2174,8 @@ def NewGeneticTemplate(
21552174
21562175
"""
21572176
new_template_list = []
2177+
if model_list is None:
2178+
model_list = model_results['Model'].unique().tolist()
21582179

21592180
# filter existing templates
21602181
sorted_results = model_results[
@@ -2210,8 +2231,11 @@ def NewGeneticTemplate(
22102231
sidx = {name: i for i, name in enumerate(list(sorted_results), start=1)}
22112232
for row in sorted_results.itertuples(name=None):
22122233
n = n_list[counter]
2213-
counter += 1
22142234
model_type = row[sidx["Model"]]
2235+
# skip models not in the model_list
2236+
if model_type not in model_list:
2237+
continue
2238+
counter += 1
22152239
model_params = row[sidx["ModelParameters"]]
22162240
try:
22172241
trans_params = json.loads(row[sidx["TransformationParameters"]])
@@ -2356,6 +2380,7 @@ def NewGeneticTemplate(
23562380
models_mode=models_mode,
23572381
score_per_series=score_per_series,
23582382
recursive_count=recursive_count,
2383+
model_list=model_list,
23592384
)
23602385
# enjoy the privilege
23612386
elif new_template.shape[0] < max_results:
@@ -2373,15 +2398,18 @@ def NewGeneticTemplate(
23732398
)
23742399

23752400

2376-
def validation_aggregation(validation_results, df_train=None):
2377-
"""Aggregate a TemplateEvalObject."""
2378-
groupby_cols = [
2401+
def validation_aggregation(
2402+
validation_results,
2403+
df_train=None,
2404+
groupby_cols=[
23792405
'ID',
23802406
'Model',
23812407
'ModelParameters',
23822408
'TransformationParameters',
23832409
'Ensemble',
2384-
]
2410+
],
2411+
):
2412+
"""Aggregate a TemplateEvalObject."""
23852413
col_aggs = {
23862414
'Runs': 'sum',
23872415
'smape': 'mean',
@@ -2443,9 +2471,8 @@ def validation_aggregation(validation_results, df_train=None):
24432471
validation_results.model_results = validation_results.model_results.replace(
24442472
[np.inf, -np.inf], np.nan
24452473
)
2446-
validation_results.model_results = validation_results.model_results.groupby(
2447-
groupby_cols
2448-
).agg(col_aggs)
2474+
grouped = validation_results.model_results.groupby(groupby_cols)
2475+
validation_results.model_results = grouped.agg(col_aggs)
24492476
validation_results.model_results = validation_results.model_results.reset_index(
24502477
drop=False
24512478
)

autots/evaluator/auto_ts.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
NumericTransformer,
1818
clean_weights,
1919
infer_frequency,
20+
freq_to_timedelta,
2021
)
2122
from autots.tools.transform import GeneralTransformer, RandomTransform
2223
from autots.evaluator.auto_model import (
@@ -238,6 +239,7 @@ def __init__(
238239
self.models_mode = models_mode
239240
self.current_model_file = current_model_file
240241
self.force_gc = force_gc
242+
self.validate_import = None
241243
random.seed(self.random_seed)
242244
if self.max_generations is None and self.generation_timeout is not None:
243245
self.max_generations = 99999
@@ -725,9 +727,19 @@ def get_new_params(method='random'):
725727
},
726728
},
727729
},
730+
{
731+
"fillna": None,
732+
"transformations": {"0": "CenterSplit"},
733+
"transformation_params": {
734+
"0": {
735+
'fillna': 'ffill',
736+
'center': 'zero',
737+
},
738+
},
739+
},
728740
'random',
729741
],
730-
[0.9, 0.1, 0.05, 0.1, 0.1, 0.1, 0.1, 0.05, 0.15, 0.1],
742+
[0.9, 0.1, 0.05, 0.1, 0.1, 0.1, 0.1, 0.05, 0.15, 0.05, 0.1],
731743
)[0]
732744
if preclean_choice == "random":
733745
preclean_choice = RandomTransform(
@@ -1100,6 +1112,7 @@ def fit(
11001112
"""
11011113
self.model = None
11021114
self.grouping_ids = grouping_ids
1115+
self.fitStart = pd.Timestamp.now()
11031116

11041117
# convert class variables to local variables (makes testing easier)
11051118
if self.validation_method == "custom":
@@ -1117,7 +1130,6 @@ def fit(
11171130
else:
11181131
self.validation_indexes = []
11191132

1120-
prediction_interval = self.prediction_interval
11211133
random_seed = self.random_seed
11221134
metric_weighting = self.metric_weighting
11231135
verbose = self.verbose
@@ -1285,6 +1297,7 @@ def fit(
12851297
transformer_max_depth=self.transformer_max_depth,
12861298
models_mode=self.models_mode,
12871299
score_per_series=self.score_per_series,
1300+
model_list=self.model_list,
12881301
)
12891302
submitted_parameters = pd.concat(
12901303
[submitted_parameters, new_template],
@@ -1406,6 +1419,12 @@ def fit(
14061419
subset=['Model', 'ModelParameters', 'TransformationParameters']
14071420
)
14081421
self.validation_template = validation_template[self.template_cols]
1422+
if self.validate_import is not None:
1423+
self.validation_template = pd.concat(
1424+
[self.validation_template, self.validate_import]
1425+
).drop_duplicates(
1426+
subset=['Model', 'ModelParameters', 'TransformationParameters']
1427+
)
14091428

14101429
# run validations
14111430
if self.num_validations > 0:
@@ -1740,6 +1759,7 @@ def fit(
17401759

17411760
# clean up any remaining print statements
17421761
sys.stdout.flush()
1762+
self.fitRuntime = pd.Timestamp.now() - self.fitStart
17431763
return self
17441764

17451765
def validation_agg(self):
@@ -2449,6 +2469,7 @@ def import_template(
24492469
enforce_model_list: bool = True,
24502470
include_ensemble: bool = False,
24512471
include_horizontal: bool = False,
2472+
force_validation: bool = False,
24522473
):
24532474
"""Import a previously exported template of model parameters.
24542475
Must be done before the AutoTS object is .fit().
@@ -2459,6 +2480,7 @@ def import_template(
24592480
enforce_model_list (bool): if True, remove model types not in model_list
24602481
include_ensemble (bool): if enforce_model_list is True, this specifies whether to allow ensembles anyway (otherwise they are unpacked and parts kept)
24612482
include_horizontal (bool): if enforce_model_list is True, this specifies whether to allow ensembles except horizontal (overridden by keep_ensemble)
2483+
force_validation (bool): if True, all models imported here will automatically get sent to full cross validation (regardless of first eval performance)
24622484
"""
24632485
if method.lower() in ['add on', 'addon', 'add_on']:
24642486
addon_flag = True
@@ -2496,6 +2518,14 @@ def import_template(
24962518
else:
24972519
return ValueError("method must be 'addon' or 'only'")
24982520

2521+
if force_validation:
2522+
if self.validate_import is None:
2523+
self.validate_import = import_template
2524+
else:
2525+
self.validate_import = pd.concat(
2526+
[self.validate_import, import_template]
2527+
)
2528+
24992529
return self
25002530

25012531
def export_best_model(self, filename, **kwargs):
@@ -3136,7 +3166,9 @@ def plot_validations(
31363166
used_freq = self.used_frequency
31373167
start_date = plot_df[plot_df.columns.difference(['actuals'])].dropna(
31383168
how='all', axis=0
3139-
).index.min() - (pd.to_timedelta(used_freq) * int(self.forecast_length * 3))
3169+
).index.min() - (
3170+
freq_to_timedelta(used_freq) * int(self.forecast_length * 3)
3171+
)
31403172
if end_date == "auto":
31413173
end_date = plot_df[plot_df.columns.difference(['actuals'])].dropna(
31423174
how='all', axis=0

autots/models/base.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,12 @@ def extract_single_transformer(
295295
)
296296
return ", ".join(allz)
297297
else:
298-
trans_dict = transformation_params.get("transformations")
299-
if isinstance(trans_dict, dict):
300-
return ", ".join(list(trans_dict.values()))
298+
if isinstance(transformation_params, dict):
299+
trans_dict = transformation_params.get("transformations")
300+
if isinstance(trans_dict, dict):
301+
return ", ".join(list(trans_dict.values()))
302+
else:
303+
return "None"
301304
else:
302305
return "None"
303306

@@ -333,6 +336,7 @@ def plot_distributions(
333336
y_col='TotalRuntimeSeconds',
334337
xlim=None,
335338
xlim_right=None,
339+
title_suffix="",
336340
):
337341
import matplotlib.pyplot as plt
338342
import seaborn as sns
@@ -391,7 +395,7 @@ def plot_distributions(
391395
plt.legend(handles, labels, title=group_col) # , bbox_to_anchor=(1.05, 1), loc=2
392396

393397
# Adding titles and labels
394-
plt.title(f'Distribution of {y_col} by {group_col}', fontsize=16)
398+
plt.title(f'Distribution of {y_col} by {group_col}{title_suffix}', fontsize=16)
395399
plt.xlabel(f'{y_col}', fontsize=14)
396400
plt.ylabel('Density', fontsize=14)
397401

@@ -491,48 +495,49 @@ def long_form_results(
491495
value_name="Value",
492496
interval_name='PredictionInterval',
493497
update_datetime_name=None,
498+
datetime_column=None,
494499
):
495500
"""Export forecasts (including upper and lower) as single 'long' format output
496501
497502
Args:
498503
id_name (str): name of column containing ids
499504
value_name (str): name of column containing numeric values
500505
interval_name (str): name of column telling you what is upper/lower
506+
datetime_column (str): if None, is index, otherwise, name of column for datetime
501507
update_datetime_name (str): if not None, adds column with current timestamp and this name
502508
503509
Returns:
504510
pd.DataFrame
505511
"""
506-
try:
507-
upload = pd.melt(
508-
self.forecast,
509-
var_name=id_name,
510-
value_name=value_name,
511-
ignore_index=False,
512-
)
513-
except Exception:
514-
raise ImportError("Requires pandas>=1.1.0")
512+
upload = pd.melt(
513+
self.forecast.reset_index(names='datetime'),
514+
var_name="SeriesID",
515+
value_name="Value",
516+
id_vars="datetime",
517+
).set_index("datetime")
515518
upload[interval_name] = "50%"
516519
upload_upper = pd.melt(
517-
self.upper_forecast,
518-
var_name=id_name,
519-
value_name=value_name,
520-
ignore_index=False,
521-
)
520+
self.upper_forecast.reset_index(names='datetime'),
521+
var_name="SeriesID",
522+
value_name="Value",
523+
id_vars="datetime",
524+
).set_index("datetime")
522525
upload_upper[
523526
interval_name
524527
] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
525528
upload_lower = pd.melt(
526-
self.lower_forecast,
527-
var_name=id_name,
528-
value_name=value_name,
529-
ignore_index=False,
530-
)
529+
self.lower_forecast.reset_index(names='datetime'),
530+
var_name="SeriesID",
531+
value_name="Value",
532+
id_vars="datetime",
533+
).set_index("datetime")
531534
upload_lower[
532535
interval_name
533536
] = f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
534537

535538
upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
539+
if datetime_column is not None:
540+
upload = upload.reset_index(drop=False, names=datetime_column)
536541
if update_datetime_name is not None:
537542
upload[update_datetime_name] = datetime.datetime.utcnow()
538543
return upload
@@ -570,6 +575,7 @@ def plot_ensemble_runtimes(self, xlim_right=None):
570575
y_col='TotalRuntimeSeconds',
571576
xlim=0,
572577
xlim_right=xlim_right,
578+
title_suffix=" in Chosen Ensemble",
573579
)
574580

575581
def plot_df(

autots/models/basics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1269,7 +1269,7 @@ def predict(
12691269

12701270
# joblib multiprocessing to loop through series
12711271
if self.parallel:
1272-
df_list = Parallel(n_jobs=(self.n_jobs))(
1272+
df_list = Parallel(n_jobs=(self.n_jobs - 1))(
12731273
delayed(looped_motif)(
12741274
Xa=x.reshape(-1, x.shape[-1]) if self.multivariate else x[:, i],
12751275
Xb=self.df.iloc[-self.window :, i].to_numpy().reshape(1, -1),

0 commit comments

Comments
 (0)