winedarksea
diff --git a/‎README.md‎
Lines changed: 24 additions & 16 deletions b/‎README.md‎
Lines changed: 24 additions & 16 deletions
diff --git a/‎TODO.md‎
Lines changed: 3 additions & 3 deletions b/‎TODO.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎autots/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎autots/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autots/datasets/_base.py‎
Lines changed: 45 additions & 6 deletions b/‎autots/datasets/_base.py‎
Lines changed: 45 additions & 6 deletions
diff --git a/‎autots/evaluator/auto_ts.py‎
Lines changed: 61 additions & 26 deletions b/‎autots/evaluator/auto_ts.py‎
Lines changed: 61 additions & 26 deletions
diff --git a/‎autots/models/sklearn.py‎
Lines changed: 1 addition & 2 deletions b/‎autots/models/sklearn.py‎
Lines changed: 1 addition & 2 deletions
@@ -21,33 +21,42 @@ For other time series needs, check out the list [here](https://github.com/MaxBen
 * Option to use one or a combination of metrics for model selection
 * Import and export of templates allowing greater user customization
 
-## Basic Use
+## Installation
 ```
 pip install autots
 ```
 This includes dependencies for basic models, but additonal packages are required for some models and methods.
 
-Input data is expected to come in a 'long' format with three columns: 
-* Date (ideally already in pd.DateTime format)
-* Value
-* Series ID. For a single time series, series_id can be `= None`. 
+## Basic Use
+
+Input data is expected to come in either a *long* or a *wide* format:
 
-The column name for each of these is passed to .fit(). 
+- The *wide* format is a `pandas.DataFrame` with a `pandas.DatetimeIndex` and each column a distinct series. 
+- The *long* format has three columns: 
+  - Date (ideally already in pd.DateTime format)
+  - Series ID. For a single time series, series_id can be `= None`.
+  - Value
+- For *long* data, the column name for each of these is passed to .fit() as `date_col`, `id_col`, and `value_col`. No parameters are needed for *wide* data.
 
 ```
-
 # also: _hourly, _daily, _weekly, or _yearly
-from autots.datasets import load_monthly 
+from autots.datasets import load_monthly
+
 df_long = load_monthly()
 
 from autots import AutoTS
-model = AutoTS(forecast_length=3, frequency='infer',
-               prediction_interval=0.9, ensemble='all',
-			   model_list='superfast',
-               max_generations=5, num_validations=2,
-			   validation_method='even')
-model = model.fit(df_long, date_col='datetime',
-				  value_col='value', id_col='series_id')
+
+model = AutoTS(
+    forecast_length=3,
+    frequency='infer',
+    prediction_interval=0.9,
+    ensemble='all',
+    model_list='superfast',
+    max_generations=5,
+    num_validations=2,
+    validation_method='even',
+)
+model = model.fit(df_long, date_col='datetime', value_col='value', id_col='series_id')
 
 # Print the details of the best model
 print(model)
@@ -59,7 +68,6 @@ forecasts_df = prediction.forecast
 model_results = model.results()
 # and aggregated from cross validation
 validation_results = model.results("validation")
-
 ```
 
 Check out [extended_tutorial.md](https://winedarksea.github.io/AutoTS/build/html/source/tutorial.html) for a more detailed guide to features!
 
@@ -15,9 +15,8 @@
 * Forecasts are desired for the future immediately following the most recent data.
 
 # Latest
-* ARIMA to ARIMAX (for Statsmodels v0.13)
-* ARIMA parallelization
-* update of daily sample data, reduced space used by yearly and hourly
+* fix error where wide data import skipped cleaning steps
+* long=True/False for all example data
 
 # Errors: 
 DynamicFactor holidays 	Exceptions 'numpy.ndarray' object has no attribute 'values'
@@ -85,6 +84,7 @@ Tensorflow GPU backend may crash on occasion.
 	* Rename from Best3 to BestN
 	* Dicts instead of list of DFs
 	* Add 'model_count' to parameters
+* allow best_model to be specified and entirely bypass the .fit() stage.
 
 * check models from M5 competition results
 * minmaxscaler as scoring for weighted Score generation
 
@@ -13,7 +13,7 @@
 from autots.tools.transform import GeneralTransformer
 from autots.tools.shaping import long_to_wide
 
-__version__ = '0.2.4'
+__version__ = '0.2.5'
 
 
 __all__ = [
 
@@ -51,9 +51,22 @@ def load_fred_monthly():
     return df_long
 
 
-def load_monthly():
+def load_monthly(long: bool = True):
     """Federal Reserve of St. Louis monthly economic indicators."""
-    return load_fred_monthly()
+    if long:
+        return load_fred_monthly()
+    else:
+        from autots.tools.shaping import long_to_wide
+
+        df_long = load_fred_monthly()
+        df_wide = long_to_wide(
+            df_long,
+            date_col='datetime',
+            value_col='value',
+            id_col='series_id',
+            aggfunc='first',
+        )
+        return df_wide
 
 
 def load_fred_yearly():
@@ -86,9 +99,22 @@ def load_fred_yearly():
     return df_long
 
 
-def load_yearly():
+def load_yearly(long: bool = True):
     """Federal Reserve of St. Louis annual economic indicators."""
-    return load_fred_yearly()
+    if long:
+        return load_fred_yearly()
+    else:
+        from autots.tools.shaping import long_to_wide
+
+        df_long = load_fred_yearly()
+        df_wide = long_to_wide(
+            df_long,
+            date_col='datetime',
+            value_col='value',
+            id_col='series_id',
+            aggfunc='first',
+        )
+        return df_wide
 
 
 def load_traffic_hourly(long: bool = True):
@@ -130,6 +156,19 @@ def load_eia_weekly():
     return df_long
 
 
-def load_weekly():
+def load_weekly(long: bool = True):
     """Weekly petroleum industry data from the EIA."""
-    return load_eia_weekly()
+    if long:
+        return load_eia_weekly()
+    else:
+        from autots.tools.shaping import long_to_wide
+
+        df_long = load_eia_weekly()
+        df_wide = long_to_wide(
+            df_long,
+            date_col='datetime',
+            value_col='value',
+            id_col='series_id',
+            aggfunc='first',
+        )
+        return df_wide
@@ -4,23 +4,28 @@
 import copy
 import json
 
-from autots.tools.shaping import long_to_wide
-import random
-from autots.tools.shaping import subset_series
-from autots.tools.shaping import simple_train_test_split
-from autots.evaluator.auto_model import TemplateEvalObject
-from autots.evaluator.auto_model import NewGeneticTemplate
-from autots.evaluator.auto_model import RandomTemplate
-from autots.evaluator.auto_model import TemplateWizard
-from autots.evaluator.auto_model import unpack_ensemble_models
-from autots.evaluator.auto_model import generate_score
+from autots.tools.shaping import (
+    long_to_wide,
+    df_cleanup,
+    subset_series,
+    simple_train_test_split,
+    NumericTransformer,
+)
+from autots.evaluator.auto_model import (
+    TemplateEvalObject,
+    NewGeneticTemplate,
+    RandomTemplate,
+    TemplateWizard,
+    unpack_ensemble_models,
+    generate_score,
+    PredictWitch,
+    validation_aggregation,
+)
 from autots.models.ensemble import (
     EnsembleTemplateGenerator,
     HorizontalTemplateGenerator,
 )
-from autots.evaluator.auto_model import PredictWitch
-from autots.tools.shaping import NumericTransformer
-from autots.evaluator.auto_model import validation_aggregation
+import random
 
 
 class AutoTS(object):
@@ -381,14 +386,19 @@ def fit(
                 date_col=self.date_col,
                 value_col=self.value_col,
                 id_col=self.id_col,
-                frequency=self.frequency,
-                na_tolerance=self.na_tolerance,
-                drop_data_older_than_periods=self.drop_data_older_than_periods,
                 aggfunc=self.aggfunc,
-                drop_most_recent=self.drop_most_recent,
-                verbose=self.verbose,
             )
 
+        df_wide = df_cleanup(
+            df_wide,
+            frequency=self.frequency,
+            na_tolerance=self.na_tolerance,
+            drop_data_older_than_periods=self.drop_data_older_than_periods,
+            aggfunc=self.aggfunc,
+            drop_most_recent=self.drop_most_recent,
+            verbose=self.verbose,
+        )
+
         # clean up series weighting input
         if not weighted:
             weights = {x: 1 for x in df_wide.columns}
@@ -1146,6 +1156,21 @@ def results(self, result_set: str = 'initial'):
         else:
             return self.initial_results.model_results
 
+    def failure_rate(self, result_set: str = 'initial'):
+        """Return fraction of models passing with exceptions.
+
+        Args:
+            result_set (str, optional): 'validation' or 'initial'. Defaults to 'initial'.
+
+        Returns:
+            float.
+
+        """
+        initial_results = self.results(result_set=result_set)
+        n = initial_results.shape[0]
+        x = (n - initial_results['Exceptions'].isna().sum()) / n
+        return x
+
     def export_template(
         self,
         filename,
@@ -1494,7 +1519,7 @@ def predict(self, future_regressor=[], verbose: int = 'self') -> dict:
 
 
 def fake_regressor(
-    df_long,
+    df,
     forecast_length: int = 14,
     date_col: str = 'datetime',
     value_col: str = 'value',
@@ -1505,20 +1530,30 @@ def fake_regressor(
     na_tolerance: float = 0.95,
     drop_data_older_than_periods: int = 100000,
     dimensions: int = 1,
+    verbose: int = 0,
 ):
     """Create a fake regressor of random numbers for testing purposes."""
-    from autots.tools.shaping import long_to_wide
 
-    df_wide = long_to_wide(
-        df_long,
-        date_col=date_col,
-        value_col=value_col,
-        id_col=id_col,
+    if date_col is None and value_col is None:
+        df_wide = pd.DataFrame(df)
+        assert type(df.index) is pd.DatetimeIndex, "df index is not pd.DatetimeIndex"
+    else:
+        df_wide = long_to_wide(
+            df,
+            date_col=date_col,
+            value_col=value_col,
+            id_col=id_col,
+            aggfunc=aggfunc,
+        )
+
+    df_wide = df_cleanup(
+        df_wide,
         frequency=frequency,
         na_tolerance=na_tolerance,
-        aggfunc=aggfunc,
         drop_data_older_than_periods=drop_data_older_than_periods,
+        aggfunc=aggfunc,
         drop_most_recent=drop_most_recent,
+        verbose=verbose,
     )
     if frequency == 'infer':
         frequency = pd.infer_freq(df_wide.index, warn=True)
 
@@ -1065,7 +1065,6 @@ def fit(self, df, future_regressor=[]):
         Args:
             df (pandas.DataFrame): Datetime Indexed
         """
-        print(f"N_jobs is {self.n_jobs}")
         df = self.basic_profile(df)
         self.df_train = df
         X, Y = window_maker(
@@ -1117,7 +1116,7 @@ def predict(
         if univariate and 1, transpose
         """
         if int(forecast_length) > int(self.forecast_length):
-            print("GluonTS must be refit to change forecast length!")
+            print("Regression must be refit to change forecast length!")
         predictStartTime = datetime.datetime.now()
         index = self.create_forecast_index(forecast_length=forecast_length)