Skip to content

Commit 8cb04f6

Browse files
authored
Feat/improve timeseries (#2196)
* found major peformance boost for time series creation * first boosted time series version * improve slicing with integers * improve slicing with time stamps * improve slicing with time stamps * update from_xarray * improve from_group_dataframe() * remove test time series * remove old time series * add option to drop group columns from from_group_dataframe * update changelog * apply suggestions from PR review
1 parent 0b4dcf0 commit 8cb04f6

File tree

3 files changed

+330
-203
lines changed

3 files changed

+330
-203
lines changed

CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
1010

1111
### For users of the library:
1212
**Improved**
13+
- Improvements to `TimeSeries`: [#2196](https://github.com/unit8co/darts/pull/2196) by [Dennis Bader](https://github.com/dennisbader).
14+
- 🚀🚀🚀 Significant performance boosts for several `TimeSeries` methods resulting increased efficiency across the entire `Darts` library. Up to 2x faster creation times for series indexed with "regular" frequencies (e.g. Daily, hourly, ...), and >100x for series indexed with "special" frequencies (e.g. "W-MON", ...). Affects:
15+
- All `TimeSeries` creation methods
16+
- Additional boosts for slicing with integers and Timestamps
17+
- Additional boosts for `from_group_dataframe()` by performing some of the heavy-duty computations on the entire DataFrame, rather than iteratively on the group level.
18+
- Added option to exclude some `group_cols` from being added as static covariates when using `TimeSeries.from_group_dataframe()` with parameter `drop_group_cols`.
1319

1420
**Fixed**
1521

darts/tests/test_timeseries_static_covariates.py

+76-11
Original file line numberDiff line numberDiff line change
@@ -154,27 +154,92 @@ def test_timeseries_from_longitudinal_df(self):
154154
)
155155
assert (ts.static_covariates_values(copy=False) == [[i, j, 1]]).all()
156156

157-
df = copy.deepcopy(self.df_long_multi)
158-
df.loc[:, "non_static"] = np.arange(len(df))
159-
# non static columns as static columns should raise an error
160-
with pytest.raises(ValueError):
157+
# drop group columns gives same time series with dropped static covariates
158+
# drop first column
159+
ts_groups4 = TimeSeries.from_group_dataframe(
160+
df=self.df_long_multi,
161+
group_cols=["st1", "st2"],
162+
static_cols=["constant"],
163+
time_col="times",
164+
value_cols=value_cols,
165+
drop_group_cols=["st1"],
166+
)
167+
assert len(ts_groups4) == self.n_groups * 2
168+
for idx, ts in enumerate(ts_groups4):
169+
j = idx % 2
170+
assert ts.static_covariates.shape == (1, 2)
171+
assert ts.static_covariates.columns.equals(pd.Index(["st2", "constant"]))
172+
assert (ts.static_covariates_values(copy=False) == [[j, 1]]).all()
173+
174+
# drop last column
175+
ts_groups5 = TimeSeries.from_group_dataframe(
176+
df=self.df_long_multi,
177+
group_cols=["st1", "st2"],
178+
static_cols=["constant"],
179+
time_col="times",
180+
value_cols=value_cols,
181+
drop_group_cols=["st2"],
182+
)
183+
assert len(ts_groups5) == self.n_groups * 2
184+
for idx, ts in enumerate(ts_groups5):
185+
i = idx // 2
186+
assert ts.static_covariates.shape == (1, 2)
187+
assert ts.static_covariates.columns.equals(pd.Index(["st1", "constant"]))
188+
assert (ts.static_covariates_values(copy=False) == [[i, 1]]).all()
189+
190+
# drop all columns
191+
ts_groups6 = TimeSeries.from_group_dataframe(
192+
df=self.df_long_multi,
193+
group_cols=["st1", "st2"],
194+
static_cols=["constant"],
195+
time_col="times",
196+
value_cols=value_cols,
197+
drop_group_cols=["st1", "st2"],
198+
)
199+
assert len(ts_groups6) == self.n_groups * 2
200+
for ts in ts_groups6:
201+
assert ts.static_covariates.shape == (1, 1)
202+
assert ts.static_covariates.columns.equals(pd.Index(["constant"]))
203+
assert (ts.static_covariates_values(copy=False) == [[1]]).all()
204+
205+
# drop all static covariates (no `static_cols`, all `group_cols` dropped)
206+
ts_groups7 = TimeSeries.from_group_dataframe(
207+
df=self.df_long_multi,
208+
group_cols=["st1", "st2"],
209+
time_col="times",
210+
value_cols=value_cols,
211+
drop_group_cols=["st1", "st2"],
212+
)
213+
assert len(ts_groups7) == self.n_groups * 2
214+
for ts in ts_groups7:
215+
assert ts.static_covariates is None
216+
217+
def test_from_group_dataframe_invalid_drop_cols(self):
218+
# drop col is not part of `group_cols`
219+
with pytest.raises(ValueError) as err:
161220
_ = TimeSeries.from_group_dataframe(
162-
df=df,
221+
df=self.df_long_multi,
163222
group_cols=["st1"],
164-
static_cols=["non_static"],
165223
time_col="times",
166-
value_cols=value_cols,
224+
value_cols="a",
225+
drop_group_cols=["invalid"],
167226
)
227+
assert str(err.value).endswith("received: {'invalid'}.")
168228

229+
def test_from_group_dataframe_groups_too_short(self):
169230
# groups that are too short for TimeSeries requirements should raise an error
170-
with pytest.raises(ValueError):
231+
df = copy.deepcopy(self.df_long_multi)
232+
df.loc[:, "non_static"] = np.arange(len(df))
233+
with pytest.raises(ValueError) as err:
171234
_ = TimeSeries.from_group_dataframe(
172235
df=df,
173-
group_cols=["st1", "non_static"],
174-
static_cols=None,
236+
group_cols="non_static",
175237
time_col="times",
176-
value_cols=value_cols,
238+
value_cols="a",
177239
)
240+
assert str(err.value).startswith(
241+
"The time index of the provided DataArray is missing the freq attribute"
242+
)
178243

179244
def test_with_static_covariates_univariate(self):
180245
ts = linear_timeseries(length=10)

0 commit comments

Comments
 (0)