Skip to content

Commit 0b52cdf

Browse files
authored
[ENH] Revert mini rocket to separate functions for univariate and multivariate (#1781)
* split multi and uni * docstring * docstring * docstring * formatting
1 parent 34cf7b5 commit 0b52cdf

File tree

3 files changed

+148
-75
lines changed

3 files changed

+148
-75
lines changed

aeon/transformations/collection/base.py

-1
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ def fit_transform(self, X, y=None):
205205
self.reset()
206206
X_inner = self._preprocess_collection(X)
207207
y_inner = y
208-
209208
Xt = self._fit_transform(X=X_inner, y=y_inner)
210209

211210
self._is_fitted = True

aeon/transformations/collection/convolution_based/_minirocket.py

+144-62
Original file line numberDiff line numberDiff line change
@@ -15,34 +15,47 @@
1515
class MiniRocket(BaseCollectionTransformer):
1616
"""MINImally RandOm Convolutional KErnel Transform (MiniRocket).
1717
18-
MiniRocket [1]_ is an almost deterministic version of Rocket. It creates
19-
convolutions of length 9 with weights restricted to two values, and uses 84 fixed
20-
convolutions with six of one weight, three of the second weight to seed dilations.
18+
MiniRocket [1]_ is an almost deterministic version of Rocket. It creates
19+
convolutions of length 9 with weights restricted to two values, and uses 84 fixed
20+
convolutions with six of one weight, three of the second weight to seed dilations.
2121
2222
2323
Parameters
2424
----------
25-
num_kernels : int, default=10,000
26-
Number of random convolutional kernels.
27-
max_dilations_per_kernel : int, default=32
28-
Maximum number of dilations per kernel.
29-
n_jobs : int, default=1
30-
The number of jobs to run in parallel for `transform`. ``-1`` means using all
31-
processors.
32-
random_state : None or int, default = None
33-
Seed for random number generation.
25+
num_kernels : int, default=10,000
26+
Number of random convolutional kernels. The number of kernels used is rounded
27+
down to the nearest multiple of 84, unless a value of less than 84 is passec,
28+
in which case it is set to 84.
29+
max_dilations_per_kernel : int, default=32
30+
Maximum number of dilations per kernel.
31+
n_jobs : int, default=1
32+
The number of jobs to run in parallel for `transform`. ``-1`` means using all
33+
processors.
34+
random_state : None or int, default = None
35+
Seed for random number generation.
36+
37+
Attributes
38+
----------
39+
self.parameters : Tuple (int32[:], int32[:], int32[:], int32[:], float32[:])
40+
n_channels_per_comb, channel_indices, dilations, n_features_per_dilation,
41+
biases
3442
3543
See Also
3644
--------
37-
MiniRocket, Rocket
45+
Rocket, MultiRocket, Hydra
3846
3947
References
4048
----------
41-
.. [1] Dempster, Angus and Schmidt, Daniel F and Webb, Geoffrey I,
42-
"MINIROCKET: A Very Fast (Almost) Deterministic Transform for Time Series
43-
Classification",2020,
44-
https://dl.acm.org/doi/abs/10.1145/3447548.3467231,
45-
https://arxiv.org/abs/2012.08791
49+
.. [1] Dempster, Angus and Schmidt, Daniel F and Webb, Geoffrey I,
50+
"MINIROCKET: A Very Fast (Almost) Deterministic Transform for Time Series
51+
Classification",2020,
52+
https://dl.acm.org/doi/abs/10.1145/3447548.3467231,
53+
https://arxiv.org/abs/2012.08791
54+
55+
Notes
56+
-----
57+
Directly adapted from the original implementation
58+
https://github.com/angus924/minirocket.
4659
4760
Examples
4861
--------
@@ -101,8 +114,12 @@ def _fit(self, X, y=None):
101114
" zero pad shorter series so that n_timepoints == 9"
102115
)
103116
X = X.astype(np.float32)
117+
if self.num_kernels < 84:
118+
self.num_kernels_ = 84
119+
else:
120+
self.num_kernels_ = self.num_kernels
104121
self.parameters = _static_fit(
105-
X, self.num_kernels, self.max_dilations_per_kernel, random_state
122+
X, self.num_kernels_, self.max_dilations_per_kernel, random_state
106123
)
107124
return self
108125

@@ -128,7 +145,11 @@ def _transform(self, X, y=None):
128145
else:
129146
n_jobs = self.n_jobs
130147
set_num_threads(n_jobs)
131-
X_ = _static_transform(X, self.parameters, MiniRocket._indices)
148+
if n_channels == 1:
149+
X = X.squeeze(1)
150+
X_ = _static_transform_uni(X, self.parameters, MiniRocket._indices)
151+
else:
152+
X_ = _static_transform_multi(X, self.parameters, MiniRocket._indices)
132153
set_num_threads(prev_threads)
133154
return X_
134155

@@ -215,15 +236,82 @@ def _PPV(a, b):
215236
return 0
216237

217238

239+
@njit(
240+
"float32[:,:](float32[:,:],Tuple((int32[:],int32[:],int32[:],int32[:],float32["
241+
":])), int32[:,:])",
242+
fastmath=True,
243+
parallel=True,
244+
cache=True,
245+
)
246+
def _static_transform_uni(X, parameters, indices):
247+
"""Transform a 2D collection of univariate time series.
248+
249+
Implemented separately to the multivariate version for numba efficiency reasons.
250+
See issue #1778.
251+
"""
252+
n_cases, n_timepoints = X.shape
253+
(
254+
_,
255+
_,
256+
dilations,
257+
n_features_per_dilation,
258+
biases,
259+
) = parameters
260+
n_kernels = len(indices)
261+
n_dilations = len(dilations)
262+
f = n_kernels * np.sum(n_features_per_dilation)
263+
features = np.zeros((n_cases, f), dtype=np.float32)
264+
for i in prange(n_cases):
265+
_X = X[i]
266+
A = -_X
267+
G = 3 * _X
268+
f_start = 0
269+
for j in range(n_dilations):
270+
_padding0 = j % 2
271+
dilation = dilations[j]
272+
padding = (8 * dilation) // 2
273+
n_features = n_features_per_dilation[j]
274+
C_alpha = np.zeros(n_timepoints, dtype=np.float32)
275+
C_alpha[:] = A
276+
C_gamma = np.zeros((9, n_timepoints), dtype=np.float32)
277+
C_gamma[4] = G
278+
start = dilation
279+
end = n_timepoints - padding
280+
for gamma_index in range(4):
281+
C_alpha[-end:] = C_alpha[-end:] + A[:end]
282+
C_gamma[gamma_index, -end:] = G[:end]
283+
end += dilation
284+
for gamma_index in range(5, 9):
285+
C_alpha[:-start] = C_alpha[:-start] + A[start:]
286+
C_gamma[gamma_index, :-start] = G[start:]
287+
start += dilation
288+
for k in range(n_kernels):
289+
f_end = f_start + n_features
290+
_padding1 = (_padding0 + k) % 2
291+
a, b, c = indices[k]
292+
C = C_alpha + C_gamma[a] + C_gamma[b] + C_gamma[c]
293+
if _padding1 == 0:
294+
for f in range(n_features):
295+
features[i, f_start + f] = _PPV(C, biases[f_start + f]).mean()
296+
else:
297+
for f in range(n_features):
298+
features[i, f_start + f] = _PPV(
299+
C[padding:-padding], biases[f_start + f]
300+
).mean()
301+
302+
f_start = f_end
303+
return features
304+
305+
218306
@njit(
219307
"float32[:,:](float32[:,:,:],Tuple((int32[:],int32[:],int32[:],int32[:],float32["
220308
":])), int32[:,:])",
221309
fastmath=True,
222310
parallel=True,
223311
cache=True,
224312
)
225-
def _static_transform(X, parameters, indices):
226-
n_cases, n_columns, n_timepoints = X.shape
313+
def _static_transform_multi(X, parameters, indices):
314+
n_cases, n_channels, n_timepoints = X.shape
227315
(
228316
n_channels_per_combination,
229317
channel_indices,
@@ -235,68 +323,62 @@ def _static_transform(X, parameters, indices):
235323
n_dilations = len(dilations)
236324
n_features = n_kernels * np.sum(n_features_per_dilation)
237325
features = np.zeros((n_cases, n_features), dtype=np.float32)
238-
for example_index in prange(n_cases):
239-
_X = X[example_index]
240-
A = -_X # A = alpha * X = -X
241-
G = _X + _X + _X # G = gamma * X = 3X
242-
feature_index_start = 0
243-
combination_index = 0
326+
for i in prange(n_cases):
327+
_X = X[i]
328+
A = -_X
329+
G = 3 * _X
330+
f_start = 0
331+
comb = 0
244332
n_channels_start = 0
245-
for dilation_index in range(n_dilations):
246-
_padding0 = dilation_index % 2
247-
dilation = dilations[dilation_index]
248-
padding = ((9 - 1) * dilation) // 2
249-
n_features_this_dilation = n_features_per_dilation[dilation_index]
250-
C_alpha = np.zeros((n_columns, n_timepoints), dtype=np.float32)
333+
for j in range(n_dilations):
334+
_padding0 = j % 2
335+
dilation = dilations[j]
336+
padding = (8 * dilation) // 2
337+
n_features_this_dilation = n_features_per_dilation[j]
338+
C_alpha = np.zeros((n_channels, n_timepoints), dtype=np.float32)
251339
C_alpha[:] = A
252-
C_gamma = np.zeros((9, n_columns, n_timepoints), dtype=np.float32)
253-
C_gamma[9 // 2] = G
340+
C_gamma = np.zeros((9, n_channels, n_timepoints), dtype=np.float32)
341+
C_gamma[4] = G
254342
start = dilation
255343
end = n_timepoints - padding
256-
for gamma_index in range(9 // 2):
344+
for gamma_index in range(4):
257345
C_alpha[:, -end:] = C_alpha[:, -end:] + A[:, :end]
258346
C_gamma[gamma_index, :, -end:] = G[:, :end]
259347
end += dilation
260348

261-
for gamma_index in range(9 // 2 + 1, 9):
349+
for gamma_index in range(5, 9):
262350
C_alpha[:, :-start] = C_alpha[:, :-start] + A[:, start:]
263351
C_gamma[gamma_index, :, :-start] = G[:, start:]
264352
start += dilation
265353

266354
for kernel_index in range(n_kernels):
267-
feature_index_end = feature_index_start + n_features_this_dilation
268-
n_channels_this_combination = n_channels_per_combination[
269-
combination_index
270-
]
271-
num_channels_end = n_channels_start + n_channels_this_combination
272-
channels_this_combination = channel_indices[
273-
n_channels_start:num_channels_end
274-
]
355+
f_end = f_start + n_features_this_dilation
356+
n_channels_this_combo = n_channels_per_combination[comb]
357+
n_channels_end = n_channels_start + n_channels_this_combo
358+
channels_this_combo = channel_indices[n_channels_start:n_channels_end]
275359
_padding1 = (_padding0 + kernel_index) % 2
276360
index_0, index_1, index_2 = indices[kernel_index]
277361
C = (
278-
C_alpha[channels_this_combination]
279-
+ C_gamma[index_0][channels_this_combination]
280-
+ C_gamma[index_1][channels_this_combination]
281-
+ C_gamma[index_2][channels_this_combination]
362+
C_alpha[channels_this_combo]
363+
+ C_gamma[index_0][channels_this_combo]
364+
+ C_gamma[index_1][channels_this_combo]
365+
+ C_gamma[index_2][channels_this_combo]
282366
)
283367
C = np.sum(C, axis=0)
284368
if _padding1 == 0:
285369
for feature_count in range(n_features_this_dilation):
286-
features[example_index, feature_index_start + feature_count] = (
287-
_PPV(C, biases[feature_index_start + feature_count]).mean()
288-
)
370+
features[i, f_start + feature_count] = _PPV(
371+
C, biases[f_start + feature_count]
372+
).mean()
289373
else:
290374
for feature_count in range(n_features_this_dilation):
291-
features[example_index, feature_index_start + feature_count] = (
292-
_PPV(
293-
C[padding:-padding],
294-
biases[feature_index_start + feature_count],
295-
).mean()
296-
)
297-
feature_index_start = feature_index_end
298-
combination_index += 1
299-
n_channels_start = num_channels_end
375+
features[i, f_start + feature_count] = _PPV(
376+
C[padding:-padding],
377+
biases[f_start + feature_count],
378+
).mean()
379+
f_start = f_end
380+
comb += 1
381+
n_channels_start = n_channels_end
300382
return features
301383

302384

aeon/transformations/collection/convolution_based/_rocket.py

+4-12
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,13 @@ def _transform(self, X, y=None):
113113
114114
Parameters
115115
----------
116-
X : 3D np.ndarray of shape = [n_cases, n_channels, n_timepoints]
116+
X : 3D np.ndarray of shape = (n_cases, n_channels, n_timepoints)
117117
collection of time series to transform
118118
y : ignored argument for interface compatibility
119119
120120
Returns
121121
-------
122-
np.ndarray [n_cases, num_kernels], transformed features
122+
np.ndarray (n_cases, num_kernels), transformed features
123123
"""
124124
if self.normalise:
125125
X = (X - X.mean(axis=-1, keepdims=True)) / (
@@ -221,7 +221,7 @@ def _apply_kernel_univariate(X, weights, length, bias, dilation, padding):
221221
output_length = (n_timepoints + (2 * padding)) - ((length - 1) * dilation)
222222

223223
_ppv = 0
224-
_max = np.NINF
224+
_max = -np.inf
225225

226226
end = (n_timepoints + padding) - ((length - 1) * dilation)
227227

@@ -254,28 +254,20 @@ def _apply_kernel_multivariate(
254254
output_length = (n_timepoints + (2 * padding)) - ((length - 1) * dilation)
255255

256256
_ppv = 0
257-
_max = np.NINF
258-
257+
_max = -np.inf
259258
end = (n_timepoints + padding) - ((length - 1) * dilation)
260-
261259
for i in range(-padding, end):
262260
_sum = bias
263-
264261
index = i
265-
266262
for j in range(length):
267263
if index > -1 and index < n_timepoints:
268264
for k in range(num_channel_indices):
269265
_sum = _sum + weights[k, j] * X[channel_indices[k], index]
270-
271266
index = index + dilation
272-
273267
if _sum > _max:
274268
_max = _sum
275-
276269
if _sum > 0:
277270
_ppv += 1
278-
279271
return np.float32(_ppv / output_length), np.float32(_max)
280272

281273

0 commit comments

Comments
 (0)