Skip to content

Commit 0b846d2

Browse files
authored
0.2.5 (#19)
* readme long/wide clarification * readme II * long=True to all + fix errors in data shaping * working with wide * Update extended_tutorial.md * minor printing nuisances sklearn * further readme updates * 0.2.5 * try that again
1 parent d469d0e commit 0b846d2

35 files changed

+568
-366
lines changed

README.md

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,33 +21,42 @@ For other time series needs, check out the list [here](https://github.com/MaxBen
2121
* Option to use one or a combination of metrics for model selection
2222
* Import and export of templates allowing greater user customization
2323

24-
## Basic Use
24+
## Installation
2525
```
2626
pip install autots
2727
```
2828
This includes dependencies for basic models, but additonal packages are required for some models and methods.
2929

30-
Input data is expected to come in a 'long' format with three columns:
31-
* Date (ideally already in pd.DateTime format)
32-
* Value
33-
* Series ID. For a single time series, series_id can be `= None`.
30+
## Basic Use
31+
32+
Input data is expected to come in either a *long* or a *wide* format:
3433

35-
The column name for each of these is passed to .fit().
34+
- The *wide* format is a `pandas.DataFrame` with a `pandas.DatetimeIndex` and each column a distinct series.
35+
- The *long* format has three columns:
36+
- Date (ideally already in pd.DateTime format)
37+
- Series ID. For a single time series, series_id can be `= None`.
38+
- Value
39+
- For *long* data, the column name for each of these is passed to .fit() as `date_col`, `id_col`, and `value_col`. No parameters are needed for *wide* data.
3640

3741
```
38-
3942
# also: _hourly, _daily, _weekly, or _yearly
40-
from autots.datasets import load_monthly
43+
from autots.datasets import load_monthly
44+
4145
df_long = load_monthly()
4246
4347
from autots import AutoTS
44-
model = AutoTS(forecast_length=3, frequency='infer',
45-
prediction_interval=0.9, ensemble='all',
46-
model_list='superfast',
47-
max_generations=5, num_validations=2,
48-
validation_method='even')
49-
model = model.fit(df_long, date_col='datetime',
50-
value_col='value', id_col='series_id')
48+
49+
model = AutoTS(
50+
forecast_length=3,
51+
frequency='infer',
52+
prediction_interval=0.9,
53+
ensemble='all',
54+
model_list='superfast',
55+
max_generations=5,
56+
num_validations=2,
57+
validation_method='even',
58+
)
59+
model = model.fit(df_long, date_col='datetime', value_col='value', id_col='series_id')
5160
5261
# Print the details of the best model
5362
print(model)
@@ -59,7 +68,6 @@ forecasts_df = prediction.forecast
5968
model_results = model.results()
6069
# and aggregated from cross validation
6170
validation_results = model.results("validation")
62-
6371
```
6472

6573
Check out [extended_tutorial.md](https://winedarksea.github.io/AutoTS/build/html/source/tutorial.html) for a more detailed guide to features!

TODO.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@
1515
* Forecasts are desired for the future immediately following the most recent data.
1616

1717
# Latest
18-
* ARIMA to ARIMAX (for Statsmodels v0.13)
19-
* ARIMA parallelization
20-
* update of daily sample data, reduced space used by yearly and hourly
18+
* fix error where wide data import skipped cleaning steps
19+
* long=True/False for all example data
2120

2221
# Errors:
2322
DynamicFactor holidays Exceptions 'numpy.ndarray' object has no attribute 'values'
@@ -85,6 +84,7 @@ Tensorflow GPU backend may crash on occasion.
8584
* Rename from Best3 to BestN
8685
* Dicts instead of list of DFs
8786
* Add 'model_count' to parameters
87+
* allow best_model to be specified and entirely bypass the .fit() stage.
8888

8989
* check models from M5 competition results
9090
* minmaxscaler as scoring for weighted Score generation

autots/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from autots.tools.transform import GeneralTransformer
1414
from autots.tools.shaping import long_to_wide
1515

16-
__version__ = '0.2.4'
16+
__version__ = '0.2.5'
1717

1818

1919
__all__ = [

autots/datasets/_base.py

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,22 @@ def load_fred_monthly():
5151
return df_long
5252

5353

54-
def load_monthly():
54+
def load_monthly(long: bool = True):
5555
"""Federal Reserve of St. Louis monthly economic indicators."""
56-
return load_fred_monthly()
56+
if long:
57+
return load_fred_monthly()
58+
else:
59+
from autots.tools.shaping import long_to_wide
60+
61+
df_long = load_fred_monthly()
62+
df_wide = long_to_wide(
63+
df_long,
64+
date_col='datetime',
65+
value_col='value',
66+
id_col='series_id',
67+
aggfunc='first',
68+
)
69+
return df_wide
5770

5871

5972
def load_fred_yearly():
@@ -86,9 +99,22 @@ def load_fred_yearly():
8699
return df_long
87100

88101

89-
def load_yearly():
102+
def load_yearly(long: bool = True):
90103
"""Federal Reserve of St. Louis annual economic indicators."""
91-
return load_fred_yearly()
104+
if long:
105+
return load_fred_yearly()
106+
else:
107+
from autots.tools.shaping import long_to_wide
108+
109+
df_long = load_fred_yearly()
110+
df_wide = long_to_wide(
111+
df_long,
112+
date_col='datetime',
113+
value_col='value',
114+
id_col='series_id',
115+
aggfunc='first',
116+
)
117+
return df_wide
92118

93119

94120
def load_traffic_hourly(long: bool = True):
@@ -130,6 +156,19 @@ def load_eia_weekly():
130156
return df_long
131157

132158

133-
def load_weekly():
159+
def load_weekly(long: bool = True):
134160
"""Weekly petroleum industry data from the EIA."""
135-
return load_eia_weekly()
161+
if long:
162+
return load_eia_weekly()
163+
else:
164+
from autots.tools.shaping import long_to_wide
165+
166+
df_long = load_eia_weekly()
167+
df_wide = long_to_wide(
168+
df_long,
169+
date_col='datetime',
170+
value_col='value',
171+
id_col='series_id',
172+
aggfunc='first',
173+
)
174+
return df_wide

autots/evaluator/auto_ts.py

Lines changed: 61 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,28 @@
44
import copy
55
import json
66

7-
from autots.tools.shaping import long_to_wide
8-
import random
9-
from autots.tools.shaping import subset_series
10-
from autots.tools.shaping import simple_train_test_split
11-
from autots.evaluator.auto_model import TemplateEvalObject
12-
from autots.evaluator.auto_model import NewGeneticTemplate
13-
from autots.evaluator.auto_model import RandomTemplate
14-
from autots.evaluator.auto_model import TemplateWizard
15-
from autots.evaluator.auto_model import unpack_ensemble_models
16-
from autots.evaluator.auto_model import generate_score
7+
from autots.tools.shaping import (
8+
long_to_wide,
9+
df_cleanup,
10+
subset_series,
11+
simple_train_test_split,
12+
NumericTransformer,
13+
)
14+
from autots.evaluator.auto_model import (
15+
TemplateEvalObject,
16+
NewGeneticTemplate,
17+
RandomTemplate,
18+
TemplateWizard,
19+
unpack_ensemble_models,
20+
generate_score,
21+
PredictWitch,
22+
validation_aggregation,
23+
)
1724
from autots.models.ensemble import (
1825
EnsembleTemplateGenerator,
1926
HorizontalTemplateGenerator,
2027
)
21-
from autots.evaluator.auto_model import PredictWitch
22-
from autots.tools.shaping import NumericTransformer
23-
from autots.evaluator.auto_model import validation_aggregation
28+
import random
2429

2530

2631
class AutoTS(object):
@@ -381,14 +386,19 @@ def fit(
381386
date_col=self.date_col,
382387
value_col=self.value_col,
383388
id_col=self.id_col,
384-
frequency=self.frequency,
385-
na_tolerance=self.na_tolerance,
386-
drop_data_older_than_periods=self.drop_data_older_than_periods,
387389
aggfunc=self.aggfunc,
388-
drop_most_recent=self.drop_most_recent,
389-
verbose=self.verbose,
390390
)
391391

392+
df_wide = df_cleanup(
393+
df_wide,
394+
frequency=self.frequency,
395+
na_tolerance=self.na_tolerance,
396+
drop_data_older_than_periods=self.drop_data_older_than_periods,
397+
aggfunc=self.aggfunc,
398+
drop_most_recent=self.drop_most_recent,
399+
verbose=self.verbose,
400+
)
401+
392402
# clean up series weighting input
393403
if not weighted:
394404
weights = {x: 1 for x in df_wide.columns}
@@ -1146,6 +1156,21 @@ def results(self, result_set: str = 'initial'):
11461156
else:
11471157
return self.initial_results.model_results
11481158

1159+
def failure_rate(self, result_set: str = 'initial'):
1160+
"""Return fraction of models passing with exceptions.
1161+
1162+
Args:
1163+
result_set (str, optional): 'validation' or 'initial'. Defaults to 'initial'.
1164+
1165+
Returns:
1166+
float.
1167+
1168+
"""
1169+
initial_results = self.results(result_set=result_set)
1170+
n = initial_results.shape[0]
1171+
x = (n - initial_results['Exceptions'].isna().sum()) / n
1172+
return x
1173+
11491174
def export_template(
11501175
self,
11511176
filename,
@@ -1494,7 +1519,7 @@ def predict(self, future_regressor=[], verbose: int = 'self') -> dict:
14941519

14951520

14961521
def fake_regressor(
1497-
df_long,
1522+
df,
14981523
forecast_length: int = 14,
14991524
date_col: str = 'datetime',
15001525
value_col: str = 'value',
@@ -1505,20 +1530,30 @@ def fake_regressor(
15051530
na_tolerance: float = 0.95,
15061531
drop_data_older_than_periods: int = 100000,
15071532
dimensions: int = 1,
1533+
verbose: int = 0,
15081534
):
15091535
"""Create a fake regressor of random numbers for testing purposes."""
1510-
from autots.tools.shaping import long_to_wide
15111536

1512-
df_wide = long_to_wide(
1513-
df_long,
1514-
date_col=date_col,
1515-
value_col=value_col,
1516-
id_col=id_col,
1537+
if date_col is None and value_col is None:
1538+
df_wide = pd.DataFrame(df)
1539+
assert type(df.index) is pd.DatetimeIndex, "df index is not pd.DatetimeIndex"
1540+
else:
1541+
df_wide = long_to_wide(
1542+
df,
1543+
date_col=date_col,
1544+
value_col=value_col,
1545+
id_col=id_col,
1546+
aggfunc=aggfunc,
1547+
)
1548+
1549+
df_wide = df_cleanup(
1550+
df_wide,
15171551
frequency=frequency,
15181552
na_tolerance=na_tolerance,
1519-
aggfunc=aggfunc,
15201553
drop_data_older_than_periods=drop_data_older_than_periods,
1554+
aggfunc=aggfunc,
15211555
drop_most_recent=drop_most_recent,
1556+
verbose=verbose,
15221557
)
15231558
if frequency == 'infer':
15241559
frequency = pd.infer_freq(df_wide.index, warn=True)

autots/models/sklearn.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,6 @@ def fit(self, df, future_regressor=[]):
10651065
Args:
10661066
df (pandas.DataFrame): Datetime Indexed
10671067
"""
1068-
print(f"N_jobs is {self.n_jobs}")
10691068
df = self.basic_profile(df)
10701069
self.df_train = df
10711070
X, Y = window_maker(
@@ -1117,7 +1116,7 @@ def predict(
11171116
if univariate and 1, transpose
11181117
"""
11191118
if int(forecast_length) > int(self.forecast_length):
1120-
print("GluonTS must be refit to change forecast length!")
1119+
print("Regression must be refit to change forecast length!")
11211120
predictStartTime = datetime.datetime.now()
11221121
index = self.create_forecast_index(forecast_length=forecast_length)
11231122

0 commit comments

Comments
 (0)