|
13 | 13 | from joblib import Parallel, delayed |
14 | 14 | from optuna.trial import TrialState |
15 | 15 | from sktime.forecasting.base import ForecastingHorizon |
| 16 | +from sktime.forecasting.compose import ForecastingPipeline |
| 17 | +from sktime.forecasting.model_selection import ForecastingGridSearchCV |
16 | 18 | from sktime.forecasting.theta import ThetaForecaster |
| 19 | +from sktime.param_est.seasonality import SeasonalityACF, SeasonalityPeriodogram |
17 | 20 | from sktime.split import ExpandingWindowSplitter |
18 | 21 | from sktime.transformations.series.detrend import Deseasonalizer |
19 | 22 |
|
20 | 23 | from ads.opctl import logger |
21 | | -from ads.opctl.operator.lowcode.common.utils import find_seasonal_period_from_dataset, normalize_frequency |
| 24 | +from ads.opctl.operator.lowcode.common.utils import normalize_frequency |
22 | 25 | from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorConfig |
23 | 26 | from ads.opctl.operator.lowcode.forecast.utils import (_label_encode_dataframe, _build_metrics_df) |
24 | | -from .base_model import ForecastOperatorBaseModel |
25 | 27 | from .forecast_datasets import ForecastDatasets, ForecastOutput |
26 | 28 | from .univariate_model import UnivariateForecasterOperatorModel |
27 | 29 | from ..const import ( |
@@ -64,65 +66,126 @@ def preprocess(self, data, series_id): |
64 | 66 | ) |
65 | 67 | return df_encoded.set_index(self.spec.datetime_column.name) |
66 | 68 |
|
| 69 | + def _get_sp_candidates(self, y, freq): |
| 70 | + """Finds SP candidates using 1. Freq, 2. ACF, and 3. Periodogram.""" |
| 71 | + candidates = set() |
| 72 | + |
| 73 | + # 1. Frequency mapping |
| 74 | + freq_map = {'H': 24, 'D': 7, 'W': 52, 'M': 12, 'Q': 4, 'Y': 1} |
| 75 | + if freq: |
| 76 | + base_freq = "".join(filter(str.isalpha, freq)) |
| 77 | + if base_freq in freq_map: |
| 78 | + candidates.add(freq_map[base_freq]) |
| 79 | + |
| 80 | + # 2. SeasonalityACF |
| 81 | + try: |
| 82 | + acf_est = SeasonalityACF() |
| 83 | + acf_est.fit(y) |
| 84 | + candidates.add(acf_est.get_fitted_params()["sp"]) |
| 85 | + except Exception as e: |
| 86 | + logger.debug(f"Unable to find seasonality using ACF: {e}") |
| 87 | + |
| 88 | + # 3. SeasonalityPeriodogram |
| 89 | + try: |
| 90 | + period_est = SeasonalityPeriodogram() |
| 91 | + period_est.fit(y) |
| 92 | + candidates.add(period_est.get_fitted_params()["sp"]) |
| 93 | + except Exception as e: |
| 94 | + logger.debug(f"Unable to find seasonality using SeasonalityPeriodogram: {e}") |
| 95 | + |
| 96 | + valid_candidates = [int(sp) for sp in candidates if len(y) >= 2 * sp] |
| 97 | + if 1 not in valid_candidates: |
| 98 | + valid_candidates.append(1) |
| 99 | + logger.debug(f"Found {valid_candidates} seasonality candidates") |
| 100 | + return sorted(list(valid_candidates)) |
| 101 | + |
67 | 102 | def _train_model(self, i, series_id, df: pd.DataFrame, model_kwargs: Dict[str, Any]): |
68 | 103 | try: |
69 | 104 | self.forecast_output.init_series_output(series_id=series_id, data_at_series=df) |
70 | 105 | data = self.preprocess(df, series_id) |
71 | | - |
72 | 106 | data_i = self.drop_horizon(data) |
73 | 107 | target = self.spec.target_column |
74 | 108 |
|
75 | | - freq = self.datasets.get_datetime_frequency() if self.datasets.get_datetime_frequency() is not None else pd.infer_freq( |
76 | | - data_i.index) |
| 109 | + freq = self.datasets.get_datetime_frequency() or pd.infer_freq(data_i.index) |
| 110 | + normalized_freq = normalize_frequency(freq) |
77 | 111 | if freq is not None: |
78 | | - normalized_freq = normalize_frequency(freq) |
79 | 112 | data_i.index = data_i.index.to_period(normalized_freq) |
80 | 113 |
|
81 | 114 | y = data_i[target] |
82 | 115 | X_in = data_i.drop(target, axis=1) |
83 | 116 |
|
84 | | - if model_kwargs["deseasonalize"] and model_kwargs["sp"] is None: |
85 | | - sp, probable_sps = find_seasonal_period_from_dataset(y) |
| 117 | + # --- 1. Determine Deseasonalization Strategy --- |
| 118 | + using_additive_deseasonalization = False |
| 119 | + additive_deseasonalizer = None |
| 120 | + |
| 121 | + # If negative values exist, we must use manual additive deseasonalization |
| 122 | + if model_kwargs.get("deseasonalize", True) and (y <= 0).any(): |
| 123 | + logger.info(f"Negative values detected in {series_id}. Using manual additive deseasonalization.") |
| 124 | + using_additive_deseasonalization = True |
| 125 | + model_kwargs["deseasonalize_model"] = "add" |
86 | 126 | else: |
87 | | - sp, probable_sps = 1, [1] |
| 127 | + model_kwargs["deseasonalize_model"] = "mul" |
88 | 128 |
|
89 | | - model_kwargs["sp"] = model_kwargs.get("sp") or sp |
| 129 | + sp_candidates = self._get_sp_candidates(y, normalized_freq) |
90 | 130 |
|
91 | | - if not sp or len(y) < 2 * model_kwargs["sp"]: |
92 | | - model_kwargs["deseasonalize"] = False |
93 | | - |
94 | | - # If model already loaded, extract parameters (best-effort) |
95 | 131 | if self.loaded_models is not None and series_id in self.loaded_models: |
96 | 132 | previous_res = self.loaded_models[series_id].get("model") |
97 | 133 | fitted_params = previous_res.get_fitted_params() |
98 | 134 | model_kwargs["initial_level"] = fitted_params.get("initial_level", None) |
99 | 135 | elif self.perform_tuning: |
100 | | - model_kwargs = self.run_tuning(y, X_in, model_kwargs, probable_sps) |
| 136 | + model_kwargs = self.run_tuning(y, X_in, model_kwargs, sp_candidates) |
| 137 | + elif model_kwargs.get("sp") is None: |
| 138 | + logger.debug(f"Found {sp_candidates} SP candidates") |
| 139 | + sp_candidates.append(1) |
| 140 | + if not sp_candidates: |
| 141 | + best_sp = 1 |
| 142 | + elif len(sp_candidates) == 1: |
| 143 | + best_sp = sp_candidates[0] |
| 144 | + else: |
| 145 | + cv = ExpandingWindowSplitter( |
| 146 | + initial_window=min(int(len(y) * 0.7), len(y) - self.spec.horizon), |
| 147 | + step_length=max(1, int(self.spec.horizon / 2)), |
| 148 | + fh=range(1, self.spec.horizon + 1) |
| 149 | + ) |
101 | 150 |
|
102 | | - # Fit ThetaModel using params |
103 | | - using_additive_deseasonalization = False |
104 | | - additive_deseasonalizer = None |
105 | | - if model_kwargs["deseasonalize"]: |
106 | | - if (y <= 0).any(): |
107 | | - logger.warning( |
108 | | - "Processing data with additive deseasonalization model as data contains negative or zero values which can't be deseasonalized using multiplicative deseasonalization. And ThetaForecaster by default only supports multiplicative deseasonalization.") |
109 | | - model_kwargs["deseasonalize_model"] = "add" |
110 | | - using_additive_deseasonalization = True |
111 | | - additive_deseasonalizer = Deseasonalizer( |
112 | | - sp=model_kwargs["sp"], |
113 | | - model="additive", |
| 151 | + if using_additive_deseasonalization: |
| 152 | + forecaster = ForecastingPipeline([ |
| 153 | + ("deseasonalize", Deseasonalizer(model="additive")), |
| 154 | + ("theta", ThetaForecaster(deseasonalize=False)) |
| 155 | + ]) |
| 156 | + param_grid = {"deseasonalize__sp": sp_candidates} |
| 157 | + else: |
| 158 | + forecaster = ThetaForecaster(deseasonalize=True) |
| 159 | + param_grid = {"sp": sp_candidates} |
| 160 | + |
| 161 | + gscv = ForecastingGridSearchCV( |
| 162 | + forecaster=forecaster, |
| 163 | + cv=cv, |
| 164 | + param_grid=param_grid, |
114 | 165 | ) |
115 | | - y_adj = additive_deseasonalizer.fit_transform(y) |
116 | | - y = y_adj |
117 | | - model_kwargs["deseasonalize"] = False |
118 | | - else: |
119 | | - model_kwargs["deseasonalize_model"] = "" |
| 166 | + gscv.fit(y, X=X_in) |
| 167 | + |
| 168 | + # Extract the best sp based on which param name was used |
| 169 | + best_params = gscv.best_params_ |
| 170 | + logger.info(f"Found {best_params} from seasonality candidates") |
| 171 | + best_sp = best_params.get("deseasonalize__sp") or best_params.get("sp") |
120 | 172 |
|
121 | | - model = ThetaForecaster(initial_level=model_kwargs["initial_level"], |
122 | | - deseasonalize=model_kwargs["deseasonalize"], |
123 | | - sp=1 if model_kwargs["deseasonalize_model"] == "add" else model_kwargs.get("sp", |
124 | | - 1), ) |
125 | | - model.fit(y, X=X_in) |
| 173 | + model_kwargs["sp"] = best_sp |
| 174 | + |
| 175 | + y_to_fit = y.copy() |
| 176 | + if using_additive_deseasonalization and model_kwargs["sp"] > 1: |
| 177 | + additive_deseasonalizer = Deseasonalizer(sp=model_kwargs["sp"], model="additive") |
| 178 | + y_to_fit = additive_deseasonalizer.fit_transform(y) |
| 179 | + |
| 180 | + if model_kwargs["sp"] == 1 or using_additive_deseasonalization: |
| 181 | + model_kwargs["deseasonalize"] = False |
| 182 | + |
| 183 | + model = ThetaForecaster( |
| 184 | + initial_level=model_kwargs.get("initial_level"), |
| 185 | + deseasonalize=model_kwargs.get("deseasonalize", True), |
| 186 | + sp=1 if using_additive_deseasonalization else model_kwargs.get("sp", 1), |
| 187 | + ) |
| 188 | + model.fit(y_to_fit, X=X_in) |
126 | 189 |
|
127 | 190 | fh = ForecastingHorizon(range(1, self.spec.horizon + 1), is_relative=True) |
128 | 191 | fh_in_sample = ForecastingHorizon(range(-len(data_i) + 1, 1)) |
|
0 commit comments