Skip to content

Commit 94d917d

Browse files
Dev 005 (#17)
Adding simple generators for time-series: trends, seasons, changepoints
2 parents cf2d615 + 3f704fd commit 94d917d

File tree

17 files changed

+1083
-9
lines changed

17 files changed

+1083
-9
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ Import badgers as any other library and start using it:
1717

1818
```python
1919
from sklearn.datasets import make_blobs
20-
from badgers.generators.tabular_data.noise import GaussianNoiseGenerator
20+
from badgers.generators.tabular_data.noise import GlobalGaussianNoiseGenerator
2121

2222
X, y = make_blobs()
23-
trf = GaussianNoiseGenerator(noise_std=0.5)
23+
trf = GlobalGaussianNoiseGenerator(noise_std=0.5)
2424
Xt, yt = trf.generate(X,y)
2525
```
2626

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import abc
2+
from typing import Tuple
3+
4+
from numpy.random import default_rng
5+
6+
from badgers.core.base import GeneratorMixin
7+
8+
9+
class ChangePointGenerator(GeneratorMixin):
10+
"""
11+
Base class for generators that generate changepoints in time-series data
12+
"""
13+
14+
def __init__(self, random_generator=default_rng(seed=0), n_changepoints: int = 10):
15+
"""
16+
:param random_generator: a random number generator
17+
:param n_outliers: the number of outliers to generate
18+
"""
19+
self.random_generator = random_generator
20+
self.n_changepoints = n_changepoints
21+
self.changepoints = None
22+
23+
@abc.abstractmethod
24+
def generate(self, X, y, **params) -> Tuple:
25+
pass
26+
27+
28+
class RandomChangeInMeanGenerator(ChangePointGenerator):
29+
"""
30+
Generate randomly change in mean changepoints
31+
"""
32+
33+
def __init__(self, random_generator=default_rng(seed=0), n_changepoints: int = 10, min_change: float = -5,
34+
max_change: float = 5):
35+
super().__init__(random_generator=random_generator, n_changepoints=n_changepoints)
36+
self.min_change = min_change
37+
self.max_change = max_change
38+
39+
def generate(self, X, y, **params) -> Tuple:
40+
"""
41+
42+
:param X:
43+
:param y:
44+
:param params:
45+
:return:
46+
"""
47+
# Generate change points
48+
self.changepoints = list(
49+
zip(
50+
self.random_generator.integers(int(0.05 * len(X)), int(0.95 * len(X)), size=self.n_changepoints),
51+
self.random_generator.uniform(self.min_change, self.max_change, size=self.n_changepoints)
52+
)
53+
)
54+
55+
Xt = X.copy()
56+
57+
for idx, change in self.changepoints:
58+
Xt[idx:] += change
59+
60+
return Xt, y

badgers/generators/time_series/noise.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,43 @@ def generate(self, X, y, **params) -> Tuple:
2323
pass
2424

2525

26-
class GaussianNoiseGenerator(NoiseGenerator):
26+
class LocalGaussianNoiseGenerator(NoiseGenerator):
27+
28+
def __init__(self, random_generator=default_rng(seed=0), n_patterns: int = 10, patterns_width: int = 10,
29+
noise_std: float = 0.1):
30+
super().__init__(random_generator=random_generator)
31+
self.n_patterns = n_patterns
32+
self.patterns_width = patterns_width
33+
self.noise_std = noise_std
34+
35+
def generate(self, X, y, **params) -> Tuple:
36+
# TODO input validation!
37+
if X.ndim < 2:
38+
raise ValueError(
39+
"Expected 2D array. "
40+
"Reshape your data either using array.reshape(-1, 1) if "
41+
"your data has a single feature or array.reshape(1, -1) "
42+
"if it contains a single sample."
43+
)
44+
# generate extreme values indices and values
45+
self.patterns_indices_ = [(x, x + self.patterns_width) for x in
46+
self.random_generator.choice(X.shape[0] - self.patterns_width,
47+
size=self.n_patterns,
48+
replace=False, p=None)]
49+
50+
scaler = StandardScaler()
51+
# fit, transform
52+
scaler.fit(X)
53+
Xt = scaler.transform(X)
54+
55+
for (start, end) in self.patterns_indices_:
56+
Xt[start:end, :] += self.random_generator.normal(loc=0, scale=self.noise_std, size=(self.patterns_width, Xt.shape[1]))
57+
58+
# inverse standardization
59+
return scaler.inverse_transform(Xt), y
60+
61+
62+
class GlobalGaussianNoiseGenerator(NoiseGenerator):
2763
def __init__(self, random_generator=default_rng(seed=0), noise_std: float = 0.1):
2864
"""
2965
@@ -49,5 +85,5 @@ def generate(self, X, y, **params):
4985
Xt = scaler.transform(X)
5086
# add noise
5187
Xt = Xt + self.random_generator.normal(loc=0, scale=self.noise_std, size=Xt.shape)
52-
# inverse pca
88+
# inverse standardization
5389
return scaler.inverse_transform(Xt), y
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import abc
2+
from typing import Tuple
3+
4+
import numpy as np
5+
from numpy.random import default_rng
6+
7+
from badgers.core.base import GeneratorMixin
8+
9+
10+
class SeasonsGenerator(GeneratorMixin):
11+
"""
12+
Base class for transformers that generate seasons in time-series data
13+
"""
14+
15+
def __init__(self, random_generator=default_rng(seed=0)):
16+
"""
17+
:param random_generator: a random number generator
18+
"""
19+
self.random_generator = random_generator
20+
21+
@abc.abstractmethod
22+
def generate(self, X, y, **params) -> Tuple:
23+
pass
24+
25+
26+
class GlobalAdditiveSinusoidalSeasonGenerator(SeasonsGenerator):
27+
"""
28+
Add a sinusoidal season to the input time-series data
29+
"""
30+
31+
def __init__(self, random_generator=default_rng(seed=0), period: int = 10):
32+
super().__init__(random_generator=random_generator)
33+
self.period = period
34+
35+
def generate(self, X, y, **params) -> Tuple:
36+
"""
37+
38+
:param X:
39+
:param y:
40+
:param params:
41+
:return:
42+
"""
43+
t = np.arange(len(X))
44+
season = np.sin(t*2*np.pi/self.period)
45+
Xt = X + season
46+
return Xt, y
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import abc
2+
from typing import Tuple
3+
4+
import numpy as np
5+
from numpy.random import default_rng
6+
7+
from badgers.core.base import GeneratorMixin
8+
9+
10+
class TrendsGenerator(GeneratorMixin):
11+
"""
12+
Base class for transformers that generate trends in time-series data
13+
"""
14+
15+
def __init__(self, random_generator=default_rng(seed=0)):
16+
"""
17+
:param random_generator: a random number generator
18+
"""
19+
self.random_generator = random_generator
20+
21+
@abc.abstractmethod
22+
def generate(self, X, y, **params) -> Tuple:
23+
pass
24+
25+
26+
class GlobalAdditiveLinearTrendGenerator(TrendsGenerator):
27+
"""
28+
Add a linear trend to the input time-series data
29+
"""
30+
31+
def __init__(self, random_generator=default_rng(seed=0), slope: float = 0.1):
32+
super().__init__(random_generator=random_generator)
33+
self.slope = slope
34+
35+
def generate(self, X, y, **params) -> Tuple:
36+
"""
37+
38+
:param X:
39+
:param y:
40+
:param params:
41+
:return:
42+
"""
43+
trend = self.slope * np.linspace(0, 1, len(X))
44+
Xt = X + trend
45+
return Xt, y

docs/getting-started.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ Import badgers as any other library and start using it:
1010

1111
```python
1212
from sklearn.datasets import make_blobs
13-
from badgers.generators.tabular_data.noise import GaussianNoiseGenerator
13+
from badgers.generators.tabular_data.noise import GlobalGaussianNoiseGenerator
1414

1515
X, y = make_blobs()
16-
trf = GaussianNoiseGenerator(noise_std=0.5)
16+
trf = GlobalGaussianNoiseGenerator(noise_std=0.5)
1717
Xt, yt = trf.generate(X, y)
1818
```
1919

docs/tutorials/Changepoints-Time-Series.ipynb

Lines changed: 202 additions & 0 deletions
Large diffs are not rendered by default.

docs/tutorials/Noise-Time-Series.ipynb

Lines changed: 222 additions & 0 deletions
Large diffs are not rendered by default.

docs/tutorials/Seasons-Time-Series.ipynb

Lines changed: 167 additions & 0 deletions
Large diffs are not rendered by default.

docs/tutorials/Trends-Time-Series.ipynb

Lines changed: 167 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)