Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding support for getting estimator based on quantile #2716

Merged
merged 8 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
- Made method `ForecastingModel.untrained_model()` public. Use this method to get a new (untrained) model instance created with the same parameters. [#2684](https://github.com/unit8co/darts/pull/2684) by [Timon Erhart](https://github.com/turbotimon)
- `TimeSeries.plot()` now supports setting the color for each component in the series. Simply pass a list / sequence of colors with length matching the number of components as parameters "c" or "colors". [#2680](https://github.com/unit8co/darts/pull/2680) by [Jules Authier](https://github.com/authierj)
- Made it possible to run the quickstart notebook `00-quickstart.ipynb` locally. [#2691](https://github.com/unit8co/darts/pull/2691) by [Jules Authier](https://github.com/authierj)
- Added `quantile` parameter to `RegressionModel.get_estimator()` to get the specific quantile estimator for probabilistic regression models using the `quantile` likelihood. [#2716](https://github.com/unit8co/darts/pull/2716) by [Antoine Madrona](https://github.com/madtoinou)

**Fixed**

Expand Down
74 changes: 55 additions & 19 deletions darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,43 +505,78 @@ def output_chunk_length(self) -> int:
def output_chunk_shift(self) -> int:
return self._output_chunk_shift

def get_multioutput_estimator(self, horizon: int, target_dim: int):
def get_multioutput_estimator(
self, horizon: int, target_dim: int, quantile: Optional[float] = None
):
"""Returns the estimator that forecasts the `horizon`th step of the `target_dim`th target component.

Internally, estimators are grouped by `output_chunk_length` position, then by component.

Note: for probabilistic models fitting quantiles, there is an additional abstraction layer,
grouping the estimators by `quantile`.

Parameters
----------
horizon
The index of the forecasting point within `output_chunk_length`.
target_dim
The index of the target component.
quantile
Optionally, for probabilistic model with `likelihood="quantile"`, a quantile value.
"""
raise_if_not(
isinstance(self.model, MultiOutputRegressor),
"The sklearn model is not a MultiOutputRegressor object.",
logger,
)
raise_if_not(
0 <= horizon < self.output_chunk_length,
f"`horizon` must be `>= 0` and `< output_chunk_length={self.output_chunk_length}`.",
logger,
)
raise_if_not(
0 <= target_dim < self.input_dim["target"],
f"`target_dim` must be `>= 0`, and `< n_target_components={self.input_dim['target']}`.",
logger,
)
if not isinstance(self.model, MultiOutputRegressor):
raise_log(
ValueError("The sklearn model is not a MultiOutputRegressor object."),
logger,
)
if not 0 <= horizon < self.output_chunk_length:
raise_log(
ValueError(
f"`horizon` must be `>= 0` and `< output_chunk_length={self.output_chunk_length}`."
),
logger,
)
if not 0 <= target_dim < self.input_dim["target"]:
raise_log(
ValueError(
f"`target_dim` must be `>= 0`, and `< n_target_components={self.input_dim['target']}`."
),
logger,
)

# when multi_models=True, one model per horizon and target component
idx_estimator = (
self.multi_models * self.input_dim["target"] * horizon + target_dim
)
return self.model.estimators_[idx_estimator]
if quantile is None:
return self.model.estimators_[idx_estimator]

def get_estimator(self, horizon: int, target_dim: int):
# for quantile-models, the estimators are also grouped by quantiles
if self.likelihood != "quantile":
raise_log(
ValueError(
"`quantile` is only supported for probabilistic models that "
"use `likelihood='quantile'`."
),
logger,
)
if quantile not in self._model_container:
raise_log(
ValueError(
f"Invalid `quantile={quantile}`. Must be one of the fitted quantiles "
f"`{list(self._model_container.keys())}`."
),
logger,
)
return self._model_container[quantile].estimators_[idx_estimator]

def get_estimator(
self, horizon: int, target_dim: int, quantile: Optional[float] = None
):
"""Returns the estimator that forecasts the `horizon`th step of the `target_dim`th target component.

For probabilistic models fitting quantiles, it is possible to also specify the quantile.

The model is returned directly if it supports multi-output natively.

Parameters
Expand All @@ -550,8 +585,9 @@ def get_estimator(self, horizon: int, target_dim: int):
The index of the forecasting point within `output_chunk_length`.
target_dim
The index of the target component.
quantile
Optionally, for probabilistic model with `likelihood="quantile"`, a quantile value.
"""

if isinstance(self.model, MultiOutputRegressor):
return self.get_multioutput_estimator(
horizon=horizon, target_dim=target_dim
Expand Down
134 changes: 133 additions & 1 deletion darts/tests/models/forecasting/test_regression_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,7 +1333,7 @@ def test_opti_historical_forecast_predict_checks(self):
],
)
def test_multioutput_wrapper(self, config):
"""Check that with input_chunk_length=1, wrapping in MultiOutputRegressor is not happening"""
"""Check that with input_chunk_length=1, wrapping in MultiOutputRegressor occurs only when necessary"""
model, supports_multioutput_natively = config
model.fit(series=self.sine_multivariate1)
if supports_multioutput_natively:
Expand Down Expand Up @@ -1490,6 +1490,138 @@ def test_get_multioutput_estimator_single_model(self):
# sub-model forecast only depend on the target_dim
assert np.abs(j + 1 - pred) < 1e-2

@pytest.mark.parametrize("multi_models", [True, False])
def test_get_multioutput_estimator_quantile(self, multi_models):
"""Check estimator getter when using quantile value"""
ocl = 3
lags = 3
quantiles = [0.01, 0.5, 0.99]
ts = tg.sine_timeseries(length=100, column_name="sine").stack(
tg.linear_timeseries(length=100, column_name="linear"),
)

m = XGBModel(
lags=lags,
output_chunk_length=ocl,
multi_models=multi_models,
likelihood="quantile",
quantiles=quantiles,
random_state=1,
)
m.fit(ts)

assert len(m._model_container) == len(quantiles)
assert sorted(list(m._model_container.keys())) == sorted(quantiles)
for quantile_container in m._model_container.values():
# one sub-model per quantile, per component, per horizon
if multi_models:
assert len(quantile_container.estimators_) == ocl * ts.width
# one sub-model per quantile, per component
else:
assert len(quantile_container.estimators_) == ts.width

# check that retrieve sub-models prediction match the "wrapper" model predictions
pred_input = ts[-lags:] if multi_models else ts[-lags - ocl + 1 :]
pred = m.predict(
n=ocl,
series=pred_input,
num_samples=1,
predict_likelihood_parameters=True,
)
for j in range(ts.width):
for i in range(ocl):
if multi_models:
dummy_feats = pred_input.values()[:lags]
else:
dummy_feats = pred_input.values()[i : +i + lags]
dummy_feats = np.expand_dims(dummy_feats.flatten(), 0)
for q in quantiles:
sub_model = m.get_multioutput_estimator(
horizon=i, target_dim=j, quantile=q
)
pred_sub_model = sub_model.predict(dummy_feats)[0]
assert (
pred_sub_model
== pred[f"{ts.components[j]}_q{q:.2f}"].values()[i][0]
)

def test_get_multioutput_estimator_exceptions(self):
"""Check that all the corner-cases are properly covered by the method"""
ts = TimeSeries.from_values(
values=np.array([
[0, 0, 0, 0, 1],
[0, 0, 0, 0, 2],
]).T,
columns=["a", "b"],
)
m = LinearRegressionModel(
lags=2,
output_chunk_length=2,
random_state=1,
)
m.fit(ts["a"])
# not wrapped in MultiOutputRegressor because of native multi-output support
with pytest.raises(ValueError) as err:
m.get_multioutput_estimator(horizon=0, target_dim=0)
assert str(err.value).startswith(
"The sklearn model is not a MultiOutputRegressor object."
)

# univariate, deterministic, ocl > 2
m = RegressionModel(
model=HistGradientBoostingRegressor(),
lags=2,
output_chunk_length=2,
)
m.fit(ts["a"])
# horizon > ocl
with pytest.raises(ValueError) as err:
m.get_multioutput_estimator(horizon=3, target_dim=0)
assert str(err.value).startswith(
"`horizon` must be `>= 0` and `< output_chunk_length"
)
# target dim > training series width
with pytest.raises(ValueError) as err:
m.get_multioutput_estimator(horizon=0, target_dim=1)
assert str(err.value).startswith(
"`target_dim` must be `>= 0`, and `< n_target_components="
)

# univariate, probabilistic
# using the quantiles argument to force wrapping in MultiOutputRegressor
m = XGBModel(
lags=2,
output_chunk_length=2,
random_state=1,
likelihood="poisson",
quantiles=[0.5],
)
m.fit(ts["a"])
# incorrect likelihood
with pytest.raises(ValueError) as err:
m.get_multioutput_estimator(horizon=0, target_dim=0, quantile=0.1)
assert str(err.value).startswith(
"`quantile` is only supported for probabilistic models that "
"use `likelihood='quantile'`."
)

# univariate, probabilistic
m = XGBModel(
lags=2,
output_chunk_length=2,
random_state=1,
likelihood="quantile",
quantiles=[0.01, 0.5, 0.99],
)
m.fit(ts["a"])
# retrieving a non-defined quantile
with pytest.raises(ValueError) as err:
m.get_multioutput_estimator(horizon=0, target_dim=0, quantile=0.1)
assert str(err.value).startswith(
"Invalid `quantile=0.1`. Must be one of the fitted quantiles "
"`[0.01, 0.5, 0.99]`."
)

@pytest.mark.parametrize("mode", [True, False])
def test_regression_model(self, mode):
lags = 4
Expand Down
Loading