Skip to content

Commit 1613c78

Browse files
committed
0.3.6
1 parent f942372 commit 1613c78

33 files changed

+407
-134
lines changed

autots/datasets/_base.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,7 @@ def load_zeroes(long=False, shape=None, start_date: str = "2021-01-01"):
381381
if shape is None:
382382
shape = (200, 5)
383383
df_wide = pd.DataFrame(
384-
np.zeros(shape),
385-
index=pd.date_range(start_date, periods=shape[0], freq="D")
384+
np.zeros(shape), index=pd.date_range(start_date, periods=shape[0], freq="D")
386385
)
387386
if not long:
388387
return df_wide

autots/evaluator/auto_model.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1633,7 +1633,9 @@ def generate_score(
16331633
model_results = model_results.replace([np.inf, -np.inf], np.nan)
16341634
# not sure why there are negative SMAPE values, but make sure they get dealt with
16351635
if model_results['smape'].min() < 0:
1636-
model_results['smape'] = model_results['smape'].where(model_results['smape'] >= 0, model_results['smape'].max())
1636+
model_results['smape'] = model_results['smape'].where(
1637+
model_results['smape'] >= 0, model_results['smape'].max()
1638+
)
16371639
# handle NaN in scores...
16381640
# model_results = model_results.fillna(value=model_results.max(axis=0))
16391641

@@ -1744,13 +1746,23 @@ def generate_score_per_series(results_object, metric_weighting, total_validation
17441746
return overall_score
17451747

17461748

1747-
def back_forecast(df, model_name, model_param_dict, model_transform_dict,
1748-
future_regressor_train=None,
1749-
n_splits: int = "auto", forecast_length=14,
1750-
frequency="infer", prediction_interval=0.9, no_negatives=False,
1751-
constraint=None, holiday_country="US",
1752-
random_seed=123, n_jobs="auto", verbose=0,
1753-
):
1749+
def back_forecast(
1750+
df,
1751+
model_name,
1752+
model_param_dict,
1753+
model_transform_dict,
1754+
future_regressor_train=None,
1755+
n_splits: int = "auto",
1756+
forecast_length=14,
1757+
frequency="infer",
1758+
prediction_interval=0.9,
1759+
no_negatives=False,
1760+
constraint=None,
1761+
holiday_country="US",
1762+
random_seed=123,
1763+
n_jobs="auto",
1764+
verbose=0,
1765+
):
17541766
"""Create forecasts for the historical training data, ie. backcast or back forecast.
17551767
17561768
This actually forecasts on historical data, these are not fit model values as are often returned by other packages.
@@ -1771,7 +1783,11 @@ def back_forecast(df, model_name, model_param_dict, model_transform_dict,
17711783
n_splits = int(n_splits)
17721784

17731785
chunk_size = df.index.shape[0] / n_splits
1774-
b_forecast, b_forecast_up, b_forecast_low = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
1786+
b_forecast, b_forecast_up, b_forecast_low = (
1787+
pd.DataFrame(),
1788+
pd.DataFrame(),
1789+
pd.DataFrame(),
1790+
)
17751791
for n in range(n_splits):
17761792
int_idx = int(n * chunk_size)
17771793
int_idx_1 = int((n + 1) * chunk_size)
@@ -1781,17 +1797,19 @@ def back_forecast(df, model_name, model_param_dict, model_transform_dict,
17811797
df_split = df.iloc[int_idx_1:].copy()
17821798
df_split = df_split.iloc[::-1]
17831799
df_split.index = df_split.index[::-1]
1784-
result_idx = df.iloc[0: int_idx_1].index
1800+
result_idx = df.iloc[0:int_idx_1].index
17851801
else:
1786-
df_split = df.iloc[0: int_idx].copy()
1802+
df_split = df.iloc[0:int_idx].copy()
17871803
# handle appropriate regressors
17881804
if isinstance(future_regressor_train, pd.DataFrame):
17891805
if n == 0:
17901806
split_regr = future_regressor_train.reindex(df_split.index[::-1])
17911807
split_regr_future = future_regressor_train.reindex(result_idx)
17921808
else:
17931809
split_regr = future_regressor_train.reindex(df_split.index)
1794-
split_regr_future = future_regressor_train.reindex(df.index[int_idx: int_idx_1])
1810+
split_regr_future = future_regressor_train.reindex(
1811+
df.index[int_idx:int_idx_1]
1812+
)
17951813
else:
17961814
split_regr = []
17971815
split_regr_future = []
@@ -1826,7 +1844,9 @@ def back_forecast(df, model_name, model_param_dict, model_transform_dict,
18261844
b_forecast_low.index = result_idx
18271845
except Exception as e:
18281846
print(f"back_forecast split {n} failed with {repr(e)}")
1829-
b_df = pd.DataFrame(np.nan, index=df.index[int_idx: int_idx_1], columns=df.columns)
1847+
b_df = pd.DataFrame(
1848+
np.nan, index=df.index[int_idx:int_idx_1], columns=df.columns
1849+
)
18301850
b_forecast = pd.concat([b_forecast, b_df])
18311851
b_forecast_up = pd.concat([b_forecast_up, b_df])
18321852
b_forecast_low = pd.concat([b_forecast_low, b_df])

autots/evaluator/auto_ts.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
model_forecast,
2626
validation_aggregation,
2727
back_forecast,
28-
remove_leading_zeros
28+
remove_leading_zeros,
2929
)
3030
from autots.models.ensemble import (
3131
EnsembleTemplateGenerator,
@@ -1099,7 +1099,9 @@ def fit(
10991099
# give a more convenient dict option
11001100
self.best_model_name = self.best_model['Model'].iloc[0]
11011101
self.best_model_params = json.loads(self.best_model['ModelParameters'].iloc[0])
1102-
self.best_model_transformation_params = json.loads(self.best_model['TransformationParameters'].iloc[0])
1102+
self.best_model_transformation_params = json.loads(
1103+
self.best_model['TransformationParameters'].iloc[0]
1104+
)
11031105

11041106
# set flags to check if regressors or ensemble used in final model.
11051107
param_dict = json.loads(self.best_model.iloc[0]['ModelParameters'])
@@ -1334,7 +1336,9 @@ def export_template(
13341336
export_template = export_template.nsmallest(n, columns=['Score'])
13351337
if not include_results:
13361338
export_template = export_template[self.template_cols]
1337-
export_template = pd.concat([self.best_model, export_template]).drop_duplicates()
1339+
export_template = pd.concat(
1340+
[self.best_model, export_template]
1341+
).drop_duplicates()
13381342
else:
13391343
raise ValueError("`models` must be 'all' or 'best'")
13401344
try:
@@ -1453,7 +1457,9 @@ def import_results(self, filename):
14531457
self.initial_results = self.initial_results.concat(new_obj)
14541458
return self
14551459

1456-
def back_forecast(self, column=None, n_splits: int = 3, verbose: int = 0):
1460+
def back_forecast(
1461+
self, column=None, n_splits: int = 3, tail: int = None, verbose: int = 0
1462+
):
14571463
"""Create forecasts for the historical training data, ie. backcast or back forecast.
14581464
14591465
This actually forecasts on historical data, these are not fit model values as are often returned by other packages.
@@ -1463,6 +1469,7 @@ def back_forecast(self, column=None, n_splits: int = 3, verbose: int = 0):
14631469
Args are same as for model_forecast except...
14641470
n_splits(int): how many pieces to split data into. Pass 2 for fastest, or "auto" for best accuracy
14651471
column (str): if to run on only one column, pass column name. Faster than full.
1472+
tail (int): df.tail() of the dataset, back_forecast is only run on n most recent observations.
14661473
14671474
Returns a standard prediction object (access .forecast, .lower_forecast, .upper_forecast)
14681475
"""
@@ -1472,18 +1479,24 @@ def back_forecast(self, column=None, n_splits: int = 3, verbose: int = 0):
14721479
input_df = pd.DataFrame(self.df_wide_numeric[column])
14731480
else:
14741481
input_df = self.df_wide_numeric
1482+
if tail is not None:
1483+
input_df = input_df.tail(tail)
14751484
result = back_forecast(
14761485
df=input_df,
14771486
model_name=self.best_model_name,
14781487
model_param_dict=self.best_model_params,
14791488
model_transform_dict=self.best_model_transformation_params,
14801489
future_regressor_train=self.future_regressor_train,
1481-
n_splits=n_splits, forecast_length=self.forecast_length,
1482-
frequency=self.frequency, prediction_interval=self.prediction_interval,
1490+
n_splits=n_splits,
1491+
forecast_length=self.forecast_length,
1492+
frequency=self.frequency,
1493+
prediction_interval=self.prediction_interval,
14831494
no_negatives=self.no_negatives,
1484-
constraint=self.constraint, holiday_country=self.holiday_country,
1495+
constraint=self.constraint,
1496+
holiday_country=self.holiday_country,
14851497
random_seed=self.random_seed,
1486-
n_jobs=self.n_jobs, verbose=verbose,
1498+
n_jobs=self.n_jobs,
1499+
verbose=verbose,
14871500
)
14881501
return result
14891502

@@ -1604,7 +1617,9 @@ def plot_generation_loss(self, **kwargs):
16041617
ylabel="Lowest Score", **kwargs
16051618
)
16061619

1607-
def plot_backforecast(self, series=None, n_splits: int = 3, start_date=None, **kwargs):
1620+
def plot_backforecast(
1621+
self, series=None, n_splits: int = 3, start_date=None, **kwargs
1622+
):
16081623
"""Plot the historical data and fit forecast on historic.
16091624
16101625
Args:
@@ -1616,10 +1631,13 @@ def plot_backforecast(self, series=None, n_splits: int = 3, start_date=None, **k
16161631
series = random.choice(self.df_wide_numeric.columns)
16171632
b_df = self.back_forecast(column=series, n_splits=n_splits, verbose=0).forecast
16181633
b_df = b_df.rename(columns=lambda x: str(x) + "_forecast")
1619-
plot_df = pd.concat([
1620-
pd.DataFrame(self.df_wide_numeric[series]),
1621-
b_df,
1622-
], axis=1)
1634+
plot_df = pd.concat(
1635+
[
1636+
pd.DataFrame(self.df_wide_numeric[series]),
1637+
b_df,
1638+
],
1639+
axis=1,
1640+
)
16231641
if start_date is not None:
16241642
plot_df = plot_df[plot_df.index >= start_date]
16251643
plot_df = remove_leading_zeros(plot_df)
@@ -1667,6 +1685,8 @@ def plot_backforecast(self, series=None, n_splits: int = 3, start_date=None, **k
16671685
'#EE82EE',
16681686
'#00008B',
16691687
'#4B0082',
1688+
'#0403A7',
1689+
"#000000",
16701690
]
16711691

16721692

autots/models/ensemble.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -238,10 +238,10 @@ def mosaic_classifier(df_train, known):
238238
index=None if len(p_full) > 1 else [0],
239239
)
240240
upload = pd.concat([upload, missing_rows])
241-
X = fill_median((
242-
summarize_series(df_train)
243-
.transpose())
244-
.merge(upload, left_index=True, right_on="series_id")
241+
X = fill_median(
242+
(summarize_series(df_train).transpose()).merge(
243+
upload, left_index=True, right_on="series_id"
244+
)
245245
)
246246
X.set_index("series_id", inplace=True) # .drop(columns=['series_id'], inplace=True)
247247
to_predict = X[X['model_id'].isna()].drop(columns=['model_id'])
@@ -1198,7 +1198,7 @@ def MosaicEnsemble(
11981198
newdf.columns = base_df.columns
11991199
newdf['forecast_period'] = np.tile(
12001200
np.arange(max_forecast_period + 1, needed_stamps + 1 + max_forecast_period),
1201-
base_df.shape[0]
1201+
base_df.shape[0],
12021202
)
12031203
melted = pd.concat([melted, newdf])
12041204
elif len_sample_index < (max_forecast_period + 1):
@@ -1214,9 +1214,13 @@ def MosaicEnsemble(
12141214
l_fore.append(lower_forecasts[row[3]][row[2]].iloc[row[1]])
12151215
except Exception as e:
12161216
m0 = f"{row[3]} in available_models: {row[3] in available_models}, "
1217-
mi = m0 + f"In forecast: {row[3] in forecasts.keys()}, in upper: {row[3] in upper_forecasts.keys()}, in Lower: {row[3] in lower_forecasts.keys()}"
1217+
mi = (
1218+
m0
1219+
+ f"In forecast: {row[3] in forecasts.keys()}, in upper: {row[3] in upper_forecasts.keys()}, in Lower: {row[3] in lower_forecasts.keys()}"
1220+
)
12181221
raise ValueError(
1219-
f"Mosaic Ensemble failed on model {row[3]} series {row[2]} and period {row[1]} due to missing model: {e} " + mi
1222+
f"Mosaic Ensemble failed on model {row[3]} series {row[2]} and period {row[1]} due to missing model: {e} "
1223+
+ mi
12201224
)
12211225
melted[
12221226
'forecast'

autots/models/sklearn.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,23 @@ def window_maker(
852852
future_regressor=None,
853853
random_seed: int = 1234,
854854
):
855-
"""Convert a dataset into slices with history and y forecast."""
855+
"""Convert a dataset into slices with history and y forecast.
856+
857+
Args:
858+
df (pd.DataFrame): `wide` format df with sorted index
859+
window_size (int): length of history to use for X window
860+
input_dim (str): univariate or multivariate. If multivariate, all series in single X row
861+
shuffle (bool): (deprecated)
862+
output_dim (str): 'forecast_length' or '1step' where 1 step is basically forecast_length=1
863+
forecast_length (int): number of periods ahead that will be forecast
864+
max_windows (int): a cap on total number of windows to generate. If exceeded, random of this int are selected.
865+
regression_type (str): None or "user" if to try to concat regressor to windows
866+
future_regressor (pd.DataFrame): values of regressor if used
867+
random_seed (int): a consistent random
868+
869+
Returns:
870+
X, Y
871+
"""
856872
if output_dim == '1step':
857873
forecast_length = 1
858874
phrase_n = forecast_length + window_size
@@ -890,7 +906,6 @@ def window_maker(
890906
Y = Y.ravel()
891907

892908
except Exception as e:
893-
# print(f"New numpy version of Window Regression failed {e}.")
894909
if str(regression_type).lower() == "user":
895910
if input_dim == "multivariate":
896911
raise ValueError(
@@ -1506,6 +1521,8 @@ def fit(self, df, future_regressor=[]):
15061521
self.regression_type = None
15071522

15081523
y = df.values
1524+
if y.shape[1] == 1:
1525+
y = y.ravel()
15091526

15101527
X = date_part(df.index, method=self.datepart_method)
15111528
if self.regression_type == 'User':

autots/tools/impute.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,13 @@ def fake_date_fill(df, back_method: str = 'slice'):
9393
'akima',
9494
]
9595
# these seem to cause more harm than good usually
96-
df_interpolate_messy = ['polynomial', 'krogh', 'cubicspline', 'from_derivatives', 'slinear']
96+
df_interpolate_messy = [
97+
'polynomial',
98+
'krogh',
99+
'cubicspline',
100+
'from_derivatives',
101+
'slinear',
102+
]
97103
df_interpolate_full = list(set(df_interpolate + df_interpolate_messy))
98104

99105

0 commit comments

Comments
 (0)