Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/inverse transform for static covariate with single category across series #2710

Merged
merged 5 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
**Fixed**

- 🔴 / 🟢 Fixed a bug which raised an error when loading torch models that were saved with Darts versions < 0.33.0. This is a breaking change and models saved with version 0.33.0 will not be loadable anymore. [#2692](https://github.com/unit8co/darts/pull/2692) by [Dennis Bader](https://github.com/dennisbader).
- Fixed a bug in `StaticCovariatesTransformer` which raised an error when trying to inverse transform one-hot encoded categorical static covariates with identical values across time-series. Each categorical static covariates is now referred to by `{covariate_name}_{category_name}`, regardless of the number of categories. [#2710](https://github.com/unit8co/darts/pull/2710) by [Antoine Madrona](https://github.com/madtoinou)
- Fixed a bug in `13-TFT-examples.ipynb` where two calls to `TimeSeries.from_series()` were not providing `series` but `pd.Index`. The method calls were changed to `TimeSeries.from_values()`. [#2719](https://github.com/unit8co/darts/pull/2719) by [Jules Authier](https://github.com/authierj)

**Dependencies**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,12 +300,8 @@ def _create_category_mappings(
for col, categories in zip(cols_cat, transformer_cat.categories_):
col_map_cat_i = []
for cat in categories:
col_map_cat_i.append(cat)
if len(categories) > 1:
cat_col_name = str(col) + "_" + str(cat)
inv_col_map_cat[cat_col_name] = [col]
else:
inv_col_map_cat[cat] = [col]
col_map_cat_i.append(str(col) + "_" + str(cat))
inv_col_map_cat[str(col) + "_" + str(cat)] = [col]
col_map_cat[col] = col_map_cat_i
# If we don't have any categorical static covariates, don't need to generate mapping:
else:
Expand Down Expand Up @@ -393,16 +389,6 @@ def _transform_static_covs(
series, mask_num, mask_cat
)

# Transform static covs:
tr_out_num, tr_out_cat = None, None
if mask_num.any():
tr_out_num = getattr(transformer_num, method)(vals_num)
if mask_cat.any():
tr_out_cat = getattr(transformer_cat, method)(vals_cat)
# sparse one hot encoding to dense array
if isinstance(tr_out_cat, csr_matrix):
tr_out_cat = tr_out_cat.toarray()

# quick check if everything is in order
n_vals_cat_cols = 0 if vals_cat is None else vals_cat.shape[1]
if (method == "inverse_transform") and (n_vals_cat_cols != n_cat_cols):
Expand All @@ -413,6 +399,16 @@ def _transform_static_covs(
logger,
)

# Transform static covs:
tr_out_num, tr_out_cat = None, None
if mask_num.any():
tr_out_num = getattr(transformer_num, method)(vals_num)
if mask_cat.any():
tr_out_cat = getattr(transformer_cat, method)(vals_cat)
# sparse one hot encoding to dense array
if isinstance(tr_out_cat, csr_matrix):
tr_out_cat = tr_out_cat.toarray()

series = StaticCovariatesTransformer._add_back_static_covs(
series, tr_out_num, tr_out_cat, mask_num, mask_cat, col_map_cat
)
Expand Down Expand Up @@ -458,8 +454,6 @@ def _add_back_static_covs(
elif is_cat: # categorical transformed column
# covers one to one feature map (ordinal/label encoding) and one to multi feature (one hot encoding)
for col_name in col_map_cat[col]:
if len(col_map_cat[col]) > 1:
col_name = str(col) + "_" + str(col_name)
if col_name not in static_cov_columns:
data[col_name] = vals_cat[:, idx_cat]
static_cov_columns.append(col_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,32 @@ def test_scaling_multi_series(self):
series_recovered_multi[1].static_covariates
)

def test_zero_cardinality_multi_series(self):
"""Check that inverse-transform works as expected when OneHotEncoder is used on several series with
identical static covariates categories and values.
"""
ts1 = self.series.with_static_covariates(
pd.Series({
"cov_a": "foo",
"cov_b": "foo",
"cov_c": "foo",
})
)
ts2 = self.series.with_static_covariates(
pd.Series({
"cov_a": "foo",
"cov_b": "foo",
"cov_c": "bar",
})
)

transformer = StaticCovariatesTransformer(transformer_cat=OneHotEncoder())
transformer.fit([ts1, ts2])
ts1_enc, ts2_enc = transformer.transform([ts1, ts2])
ts1_inv, ts2_inv = transformer.inverse_transform([ts1_enc, ts2_enc])
pd.testing.assert_frame_equal(ts1_inv.static_covariates, ts1.static_covariates)
pd.testing.assert_frame_equal(ts2_inv.static_covariates, ts2.static_covariates)

def helper_test_scaling(self, series, scaler, test_values):
series_tr = scaler.fit_transform(series)
assert all([
Expand Down
Loading