Skip to content

Commit a2a464c

Browse files
authored
Merge pull request #238 from winedarksea/dev
0.6.11
2 parents e0e8e9c + 9555557 commit a2a464c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+866
-257
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ Also take a look at the [production_example.py](https://github.com/winedarksea/A
106106
* `superfast` (simple naive models) and `fast` (more complex but still faster models, optimized for many series)
107107
* `fast_parallel` (a combination of `fast` and `parallel`) or `parallel`, given many CPU cores are available
108108
* `n_jobs` usually gets pretty close with `='auto'` but adjust as necessary for the environment
109+
* 'scalable' is the best list to avoid crashing when many series are present. There is also a transformer_list = 'scalable'
109110
* see a dict of predefined lists (some defined for internal use) with `from autots.models.model_list import model_lists`
110111
* Use the `subset` parameter when there are many similar series, `subset=100` will often generalize well for tens of thousands of similar series.
111112
* if using `subset`, passing `weights` for series will weight subset selection towards higher priority series.
@@ -121,6 +122,7 @@ Also take a look at the [production_example.py](https://github.com/winedarksea/A
121122
* this can be done by adjusting `frequency` and `aggfunc` but is probably best done before passing data into AutoTS.
122123
* It will be faster if NaN's are already filled. If a search for optimal NaN fill method is not required, then fill any NaN with a satisfactory method before passing to class.
123124
* Set `runtime_weighting` in `metric_weighting` to a higher value. This will guide the search towards faster models, although it may come at the expense of accuracy.
125+
* Memory shortage is the most common cause of random process/kernel crashes. Try testing a data subset and using a different model list if issues occur. Please also report crashes if found to be linked to a specific set of model parameters (not AutoTS parameters but the underlying forecasting model params). Also crashes vary significantly by setup such as underlying linpack/blas so seeing crash differences between environments can be expected.
124126

125127
## How to Contribute:
126128
* Give feedback on where you find the documentation confusing

TODO.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
* Forecasts are desired for the future immediately following the most recent data.
1414
* trimmed_mean to AverageValueNaive
1515

16-
# 0.6.10 🇺🇦 🇺🇦 🇺🇦
17-
* assorted minor bug fixes
18-
* bug in mosaic model selection fixed
19-
* added crosshair_lite mosaic
16+
# 0.6.11 🇺🇦 🇺🇦 🇺🇦
17+
* bug fixes
18+
* continually trying to keep up with the Pandas maintainers who are breaking stuff for no good reasonable
19+
* updated RollingMeanTransformer and RegressionFilter, RegressionFilter should now be less memory intensive
20+
* EIA data call to load_live_daily
21+
* horizontal_ensemble_validation arg for more complete validation on these ensembles
2022

2123
### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
2224
* Pytorch-Forecasting

autots/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
44
https://github.com/winedarksea/AutoTS
55
"""
6+
67
from autots.datasets import (
78
load_hourly,
89
load_daily,
@@ -21,12 +22,12 @@
2122
from autots.tools.transform import GeneralTransformer, RandomTransform
2223
from autots.tools.shaping import long_to_wide, infer_frequency
2324
from autots.tools.regressor import create_lagged_regressor, create_regressor
24-
from autots.evaluator.auto_model import model_forecast
25+
from autots.evaluator.auto_model import model_forecast, ModelPrediction
2526
from autots.evaluator.anomaly_detector import AnomalyDetector, HolidayDetector
2627
from autots.models.cassandra import Cassandra
2728

2829

29-
__version__ = '0.6.10'
30+
__version__ = '0.6.11'
3031

3132
TransformTS = GeneralTransformer
3233

@@ -54,4 +55,5 @@
5455
'HolidayDetector',
5556
'Cassandra',
5657
'infer_frequency',
58+
'ModelPrediction',
5759
]

autots/datasets/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Tools for Importing Sample Data
33
"""
4+
45
from autots.datasets._base import (
56
load_daily,
67
load_live_daily,

autots/datasets/_base.py

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
"""Loading example datasets."""
2+
23
from os.path import dirname, join
34
import time
45
import datetime
56
import io
7+
import json
68
import numpy as np
79
import pandas as pd
810

@@ -236,7 +238,9 @@ def load_live_daily(
236238
wikipedia_pages: list = ['Microsoft_Office', "List_of_highest-grossing_films"],
237239
wiki_language: str = "en",
238240
weather_event_types=["%28Z%29+Winter+Weather", "%28Z%29+Winter+Storm"],
239-
caiso_query: str = "ENE_SLRS",
241+
caiso_query: str = None,
242+
eia_key: str = None,
243+
eia_respondents: list = ["MISO", "PJM", "TVA", "US48"],
240244
timeout: float = 300.05,
241245
sleep_seconds: int = 2,
242246
**kwargs,
@@ -498,6 +502,7 @@ def load_live_daily(
498502
except Exception as e:
499503
print(f"pytrends data failed: {repr(e)}")
500504

505+
# this was kinda broken last I checked
501506
if caiso_query is not None:
502507
try:
503508
n_chunks = (364 * weather_years) / 30
@@ -537,6 +542,96 @@ def load_live_daily(
537542
except Exception as e:
538543
print(f"caiso download failed with error: {repr(e)}")
539544

545+
if eia_key is not None and eia_respondents is not None:
546+
api_url = 'https://api.eia.gov/v2/electricity/rto/daily-region-data/data/' # ?api_key={eia-key}
547+
for respond in eia_respondents:
548+
try:
549+
params = {
550+
"frequency": "daily",
551+
"data": ["value"],
552+
"facets": {
553+
"type": ["D"],
554+
"respondent": [respond],
555+
"timezone": ["Eastern"],
556+
},
557+
"start": None, # "start": "2018-06-30",
558+
"end": None, # "end": "2023-11-01",
559+
"sort": [{"column": "period", "direction": "desc"}],
560+
"offset": 0,
561+
"length": 5000,
562+
}
563+
564+
res = s.get(
565+
api_url,
566+
params={
567+
"api_key": eia_key,
568+
},
569+
headers={"X-Params": json.dumps(params)},
570+
)
571+
eia_df = pd.json_normalize(res.json()['response']['data'])
572+
eia_df['datetime'] = pd.to_datetime(eia_df['period'])
573+
eia_df['value'] = eia_df['value'].astype('float')
574+
eia_df['ID'] = (
575+
eia_df['respondent']
576+
+ "_"
577+
+ eia_df['type']
578+
+ "_"
579+
+ eia_df['timezone']
580+
)
581+
temp = eia_df.pivot(columns='ID', index='datetime', values='value')
582+
dataset_lists.append(temp)
583+
time.sleep(sleep_seconds)
584+
except Exception as e:
585+
print(f"eia download failed with error {repr(e)}")
586+
try:
587+
api_url_mix = (
588+
"https://api.eia.gov/v2/electricity/rto/daily-fuel-type-data/data/"
589+
)
590+
params = {
591+
"frequency": "daily",
592+
"data": ["value"],
593+
"facets": {
594+
"respondent": [respond],
595+
"timezone": ["Eastern"],
596+
"fueltype": [
597+
"COL",
598+
"NG",
599+
"NUC",
600+
"SUN",
601+
"WAT",
602+
"WND",
603+
],
604+
},
605+
"start": None,
606+
"end": None,
607+
"sort": [{"column": "period", "direction": "desc"}],
608+
"offset": 0,
609+
"length": 5000,
610+
}
611+
res = s.get(
612+
api_url_mix,
613+
params={
614+
"api_key": eia_key,
615+
},
616+
headers={"X-Params": json.dumps(params)},
617+
)
618+
eia_df = pd.json_normalize(res.json()['response']['data'])
619+
eia_df['datetime'] = pd.to_datetime(eia_df['period'])
620+
eia_df['value'] = eia_df['value'].astype('float')
621+
eia_df['type-name'] = eia_df['type-name'].str.replace(" ", "_")
622+
eia_df['ID'] = (
623+
eia_df['respondent']
624+
+ "_"
625+
+ eia_df['type-name']
626+
+ "_"
627+
+ eia_df['timezone']
628+
)
629+
temp = eia_df.pivot(columns='ID', index='datetime', values='value')
630+
dataset_lists.append(temp)
631+
time.sleep(1)
632+
except Exception as e:
633+
print(f"eia download failed with error {repr(e)}")
634+
540635
### End of data download
541636
if len(dataset_lists) < 1:
542637
raise ValueError("No data successfully downloaded!")

autots/datasets/fred.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
requires API key from FRED
55
and pip install fredapi
66
"""
7+
78
import time
89
import pandas as pd
910

autots/evaluator/anomaly_detector.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,9 +318,9 @@ def detect(self, df):
318318
splash_threshold=self.splash_threshold,
319319
threshold=self.threshold,
320320
actuals=df if self.output != "univariate" else None,
321-
anomaly_scores=self.anomaly_model.scores
322-
if self.output != "univariate"
323-
else None,
321+
anomaly_scores=(
322+
self.anomaly_model.scores if self.output != "univariate" else None
323+
),
324324
use_dayofmonth_holidays=self.use_dayofmonth_holidays,
325325
use_wkdom_holidays=self.use_wkdom_holidays,
326326
use_wkdeom_holidays=self.use_wkdeom_holidays,

autots/evaluator/auto_model.py

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Mid-level helper functions for AutoTS."""
2+
23
import sys
34
import gc
45
import traceback as tb
@@ -697,10 +698,12 @@ def ModelMonster(
697698
n_jobs=n_jobs,
698699
**parameters,
699700
)
700-
else:
701+
elif model == "":
701702
raise AttributeError(
702-
("Model String '{}' not a recognized model type").format(model)
703+
("Model name is empty. Likely this means AutoTS has not been fit.")
703704
)
705+
else:
706+
raise AttributeError((f"Model String '{model}' not a recognized model type"))
704707

705708

706709
class ModelPrediction(ModelObject):
@@ -768,11 +771,17 @@ def __init__(
768771
self.force_gc = force_gc
769772
# handle still in JSON form
770773
if isinstance(transformation_dict, str):
771-
self.transformation_dict = json.loads(transformation_dict)
774+
if transformation_dict == "":
775+
self.transformation_dict = {}
776+
else:
777+
self.transformation_dict = json.loads(transformation_dict)
772778
else:
773779
self.transformation_dict = transformation_dict
774780
if isinstance(parameter_dict, str):
775-
self.parameter_dict = json.loads(parameter_dict)
781+
if parameter_dict == "":
782+
self.parameter_dict = {}
783+
else:
784+
self.parameter_dict = json.loads(parameter_dict)
776785
else:
777786
self.parameter_dict = parameter_dict
778787
if model_str == "PreprocessingRegression":
@@ -786,26 +795,31 @@ def __init__(
786795
self.transformation_dict = {}
787796
self.transformer_object = GeneralTransformer(
788797
**self.transformation_dict,
789-
n_jobs=n_jobs,
790-
holiday_country=holiday_country,
798+
n_jobs=self.n_jobs,
799+
holiday_country=self.holiday_country,
791800
verbose=self.verbose,
792-
)
793-
self.model = ModelMonster(
794-
model_str,
795-
parameters=self.parameter_dict,
796-
frequency=frequency,
797-
prediction_interval=prediction_interval,
798-
holiday_country=holiday_country,
799-
random_seed=random_seed,
800-
verbose=verbose,
801-
forecast_length=forecast_length,
802-
n_jobs=n_jobs,
801+
random_seed=self.random_seed,
803802
)
804803
self.name = "ModelPrediction"
805804
self._fit_complete = False
806805

807806
def fit(self, df, future_regressor=None):
808807
self.df = df
808+
if self.frequency == "infer":
809+
self.inferred_frequency = infer_frequency(df)
810+
else:
811+
self.inferred_frequency = self.frequency
812+
self.model = ModelMonster(
813+
self.model_str,
814+
parameters=self.parameter_dict,
815+
frequency=self.inferred_frequency,
816+
prediction_interval=self.prediction_interval,
817+
holiday_country=self.holiday_country,
818+
random_seed=self.random_seed,
819+
verbose=self.verbose,
820+
forecast_length=self.forecast_length,
821+
n_jobs=self.n_jobs,
822+
)
809823
transformationStartTime = datetime.datetime.now()
810824
if self.current_model_file is not None:
811825
try:
@@ -1266,9 +1280,15 @@ def model_forecast(
12661280
full_model_created = False # make at least one full model, horziontal only
12671281
# handle JSON inputs of the dicts
12681282
if isinstance(model_param_dict, str):
1269-
model_param_dict = json.loads(model_param_dict)
1283+
if model_param_dict == "":
1284+
model_param_dict = {}
1285+
else:
1286+
model_param_dict = json.loads(model_param_dict)
12701287
if isinstance(model_transform_dict, str):
1271-
model_transform_dict = json.loads(model_transform_dict)
1288+
if model_transform_dict == "":
1289+
model_transform_dict = {}
1290+
else:
1291+
model_transform_dict = json.loads(model_transform_dict)
12721292
if frequency == "infer":
12731293
frequency = infer_frequency(df_train)
12741294
# handle "auto" n_jobs to an integer of local count
@@ -1610,6 +1630,7 @@ def virtual_memory():
16101630
cumsum_A=cumsum_A,
16111631
diff_A=diff_A,
16121632
last_of_array=last_of_array,
1633+
column_names=df_train.columns,
16131634
)
16141635
if validation_round >= 1 and verbose > 0:
16151636
round_smape = model_error.avg_metrics['smape'].round(2)
@@ -1626,16 +1647,26 @@ def virtual_memory():
16261647
print(validation_accuracy_print)
16271648
else:
16281649
print(validation_accuracy_print)
1629-
model_id = create_model_id(
1630-
df_forecast.model_name,
1631-
df_forecast.model_parameters,
1632-
df_forecast.transformation_parameters,
1633-
)
1650+
# for horizontal ensemble, use requested ID and params
1651+
if ensemble_input == 2:
1652+
model_id = create_model_id(
1653+
model_str, parameter_dict, transformation_dict
1654+
)
1655+
# it's already json
1656+
deposit_params = row['ModelParameters']
1657+
else:
1658+
# for non horizontal, recreate based on what model actually used (some change)
1659+
model_id = create_model_id(
1660+
df_forecast.model_name,
1661+
df_forecast.model_parameters,
1662+
df_forecast.transformation_parameters,
1663+
)
1664+
deposit_params = json.dumps(df_forecast.model_parameters)
16341665
result = pd.DataFrame(
16351666
{
16361667
'ID': model_id,
16371668
'Model': df_forecast.model_name,
1638-
'ModelParameters': json.dumps(df_forecast.model_parameters),
1669+
'ModelParameters': deposit_params,
16391670
'TransformationParameters': json.dumps(
16401671
df_forecast.transformation_parameters
16411672
),

0 commit comments

Comments
 (0)