[ENH] Revert mini rocket to separate functions for univariate and multivariate (#1781)

TonyBagnall · web-flow · commit 0b52cdfa0f98 · 2024-07-10T11:49:02.000+01:00
* split multi and uni

* docstring

* docstring

* docstring

* formatting
diff --git a/aeon/transformations/collection/base.py b/aeon/transformations/collection/base.py
@@ -205,7 +205,6 @@ def fit_transform(self, X, y=None):
         self.reset()
         X_inner = self._preprocess_collection(X)
         y_inner = y
-
         Xt = self._fit_transform(X=X_inner, y=y_inner)
 
         self._is_fitted = True
diff --git a/aeon/transformations/collection/convolution_based/_minirocket.py b/aeon/transformations/collection/convolution_based/_minirocket.py
@@ -15,34 +15,47 @@
 class MiniRocket(BaseCollectionTransformer):
     """MINImally RandOm Convolutional KErnel Transform (MiniRocket).
 
-    MiniRocket [1]_ is an almost deterministic version of Rocket. It creates
-    convolutions of length 9 with weights restricted to two values, and uses 84 fixed
-    convolutions with six of one weight, three of the second weight to seed dilations.
+     MiniRocket [1]_ is an almost deterministic version of Rocket. It creates
+     convolutions of length 9 with weights restricted to two values, and uses 84 fixed
+     convolutions with six of one weight, three of the second weight to seed dilations.
 
 
     Parameters
     ----------
-    num_kernels : int, default=10,000
-       Number of random convolutional kernels.
-    max_dilations_per_kernel : int, default=32
-        Maximum number of dilations per kernel.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for `transform`. ``-1`` means using all
-        processors.
-    random_state : None or int, default = None
-        Seed for random number generation.
+     num_kernels : int, default=10,000
+        Number of random convolutional kernels. The number of kernels used is rounded
+        down to the nearest multiple of 84, unless a value of less than 84 is passec,
+        in which case it is set to 84.
+     max_dilations_per_kernel : int, default=32
+         Maximum number of dilations per kernel.
+     n_jobs : int, default=1
+         The number of jobs to run in parallel for `transform`. ``-1`` means using all
+         processors.
+     random_state : None or int, default = None
+         Seed for random number generation.
+
+    Attributes
+    ----------
+     self.parameters : Tuple (int32[:], int32[:], int32[:], int32[:], float32[:])
+         n_channels_per_comb, channel_indices, dilations, n_features_per_dilation,
+         biases
 
     See Also
     --------
-    MiniRocket, Rocket
+     Rocket, MultiRocket, Hydra
 
     References
     ----------
-    .. [1] Dempster, Angus and Schmidt, Daniel F and Webb, Geoffrey I,
-        "MINIROCKET: A Very Fast (Almost) Deterministic Transform for Time Series
-        Classification",2020,
-        https://dl.acm.org/doi/abs/10.1145/3447548.3467231,
-        https://arxiv.org/abs/2012.08791
+     .. [1] Dempster, Angus and Schmidt, Daniel F and Webb, Geoffrey I,
+         "MINIROCKET: A Very Fast (Almost) Deterministic Transform for Time Series
+         Classification",2020,
+         https://dl.acm.org/doi/abs/10.1145/3447548.3467231,
+         https://arxiv.org/abs/2012.08791
+
+    Notes
+    -----
+     Directly adapted from the original implementation
+     https://github.com/angus924/minirocket.
 
     Examples
     --------
@@ -101,8 +114,12 @@ def _fit(self, X, y=None):
                 " zero pad shorter series so that n_timepoints == 9"
             )
         X = X.astype(np.float32)
+        if self.num_kernels < 84:
+            self.num_kernels_ = 84
+        else:
+            self.num_kernels_ = self.num_kernels
         self.parameters = _static_fit(
-            X, self.num_kernels, self.max_dilations_per_kernel, random_state
+            X, self.num_kernels_, self.max_dilations_per_kernel, random_state
         )
         return self
 
@@ -128,7 +145,11 @@ def _transform(self, X, y=None):
         else:
             n_jobs = self.n_jobs
         set_num_threads(n_jobs)
-        X_ = _static_transform(X, self.parameters, MiniRocket._indices)
+        if n_channels == 1:
+            X = X.squeeze(1)
+            X_ = _static_transform_uni(X, self.parameters, MiniRocket._indices)
+        else:
+            X_ = _static_transform_multi(X, self.parameters, MiniRocket._indices)
         set_num_threads(prev_threads)
         return X_
 
@@ -215,15 +236,82 @@ def _PPV(a, b):
     return 0
 
 
+@njit(
+    "float32[:,:](float32[:,:],Tuple((int32[:],int32[:],int32[:],int32[:],float32["
+    ":])), int32[:,:])",
+    fastmath=True,
+    parallel=True,
+    cache=True,
+)
+def _static_transform_uni(X, parameters, indices):
+    """Transform a 2D collection of univariate time series.
+
+    Implemented separately to the multivariate version for numba efficiency reasons.
+    See issue #1778.
+    """
+    n_cases, n_timepoints = X.shape
+    (
+        _,
+        _,
+        dilations,
+        n_features_per_dilation,
+        biases,
+    ) = parameters
+    n_kernels = len(indices)
+    n_dilations = len(dilations)
+    f = n_kernels * np.sum(n_features_per_dilation)
+    features = np.zeros((n_cases, f), dtype=np.float32)
+    for i in prange(n_cases):
+        _X = X[i]
+        A = -_X
+        G = 3 * _X
+        f_start = 0
+        for j in range(n_dilations):
+            _padding0 = j % 2
+            dilation = dilations[j]
+            padding = (8 * dilation) // 2
+            n_features = n_features_per_dilation[j]
+            C_alpha = np.zeros(n_timepoints, dtype=np.float32)
+            C_alpha[:] = A
+            C_gamma = np.zeros((9, n_timepoints), dtype=np.float32)
+            C_gamma[4] = G
+            start = dilation
+            end = n_timepoints - padding
+            for gamma_index in range(4):
+                C_alpha[-end:] = C_alpha[-end:] + A[:end]
+                C_gamma[gamma_index, -end:] = G[:end]
+                end += dilation
+            for gamma_index in range(5, 9):
+                C_alpha[:-start] = C_alpha[:-start] + A[start:]
+                C_gamma[gamma_index, :-start] = G[start:]
+                start += dilation
+            for k in range(n_kernels):
+                f_end = f_start + n_features
+                _padding1 = (_padding0 + k) % 2
+                a, b, c = indices[k]
+                C = C_alpha + C_gamma[a] + C_gamma[b] + C_gamma[c]
+                if _padding1 == 0:
+                    for f in range(n_features):
+                        features[i, f_start + f] = _PPV(C, biases[f_start + f]).mean()
+                else:
+                    for f in range(n_features):
+                        features[i, f_start + f] = _PPV(
+                            C[padding:-padding], biases[f_start + f]
+                        ).mean()
+
+                f_start = f_end
+    return features
+
+
 @njit(
     "float32[:,:](float32[:,:,:],Tuple((int32[:],int32[:],int32[:],int32[:],float32["
     ":])), int32[:,:])",
     fastmath=True,
     parallel=True,
     cache=True,
 )
-def _static_transform(X, parameters, indices):
-    n_cases, n_columns, n_timepoints = X.shape
+def _static_transform_multi(X, parameters, indices):
+    n_cases, n_channels, n_timepoints = X.shape
     (
         n_channels_per_combination,
         channel_indices,
@@ -235,68 +323,62 @@ def _static_transform(X, parameters, indices):
     n_dilations = len(dilations)
     n_features = n_kernels * np.sum(n_features_per_dilation)
     features = np.zeros((n_cases, n_features), dtype=np.float32)
-    for example_index in prange(n_cases):
-        _X = X[example_index]
-        A = -_X  # A = alpha * X = -X
-        G = _X + _X + _X  # G = gamma * X = 3X
-        feature_index_start = 0
-        combination_index = 0
+    for i in prange(n_cases):
+        _X = X[i]
+        A = -_X
+        G = 3 * _X
+        f_start = 0
+        comb = 0
         n_channels_start = 0
-        for dilation_index in range(n_dilations):
-            _padding0 = dilation_index % 2
-            dilation = dilations[dilation_index]
-            padding = ((9 - 1) * dilation) // 2
-            n_features_this_dilation = n_features_per_dilation[dilation_index]
-            C_alpha = np.zeros((n_columns, n_timepoints), dtype=np.float32)
+        for j in range(n_dilations):
+            _padding0 = j % 2
+            dilation = dilations[j]
+            padding = (8 * dilation) // 2
+            n_features_this_dilation = n_features_per_dilation[j]
+            C_alpha = np.zeros((n_channels, n_timepoints), dtype=np.float32)
             C_alpha[:] = A
-            C_gamma = np.zeros((9, n_columns, n_timepoints), dtype=np.float32)
-            C_gamma[9 // 2] = G
+            C_gamma = np.zeros((9, n_channels, n_timepoints), dtype=np.float32)
+            C_gamma[4] = G
             start = dilation
             end = n_timepoints - padding
-            for gamma_index in range(9 // 2):
+            for gamma_index in range(4):
                 C_alpha[:, -end:] = C_alpha[:, -end:] + A[:, :end]
                 C_gamma[gamma_index, :, -end:] = G[:, :end]
                 end += dilation
 
-            for gamma_index in range(9 // 2 + 1, 9):
+            for gamma_index in range(5, 9):
                 C_alpha[:, :-start] = C_alpha[:, :-start] + A[:, start:]
                 C_gamma[gamma_index, :, :-start] = G[:, start:]
                 start += dilation
 
             for kernel_index in range(n_kernels):
-                feature_index_end = feature_index_start + n_features_this_dilation
-                n_channels_this_combination = n_channels_per_combination[
-                    combination_index
-                ]
-                num_channels_end = n_channels_start + n_channels_this_combination
-                channels_this_combination = channel_indices[
-                    n_channels_start:num_channels_end
-                ]
+                f_end = f_start + n_features_this_dilation
+                n_channels_this_combo = n_channels_per_combination[comb]
+                n_channels_end = n_channels_start + n_channels_this_combo
+                channels_this_combo = channel_indices[n_channels_start:n_channels_end]
                 _padding1 = (_padding0 + kernel_index) % 2
                 index_0, index_1, index_2 = indices[kernel_index]
                 C = (
-                    C_alpha[channels_this_combination]
-                    + C_gamma[index_0][channels_this_combination]
-                    + C_gamma[index_1][channels_this_combination]
-                    + C_gamma[index_2][channels_this_combination]
+                    C_alpha[channels_this_combo]
+                    + C_gamma[index_0][channels_this_combo]
+                    + C_gamma[index_1][channels_this_combo]
+                    + C_gamma[index_2][channels_this_combo]
                 )
                 C = np.sum(C, axis=0)
                 if _padding1 == 0:
                     for feature_count in range(n_features_this_dilation):
-                        features[example_index, feature_index_start + feature_count] = (
-                            _PPV(C, biases[feature_index_start + feature_count]).mean()
-                        )
+                        features[i, f_start + feature_count] = _PPV(
+                            C, biases[f_start + feature_count]
+                        ).mean()
                 else:
                     for feature_count in range(n_features_this_dilation):
-                        features[example_index, feature_index_start + feature_count] = (
-                            _PPV(
-                                C[padding:-padding],
-                                biases[feature_index_start + feature_count],
-                            ).mean()
-                        )
-                feature_index_start = feature_index_end
-                combination_index += 1
-                n_channels_start = num_channels_end
+                        features[i, f_start + feature_count] = _PPV(
+                            C[padding:-padding],
+                            biases[f_start + feature_count],
+                        ).mean()
+                f_start = f_end
+                comb += 1
+                n_channels_start = n_channels_end
     return features
 
 
diff --git a/aeon/transformations/collection/convolution_based/_rocket.py b/aeon/transformations/collection/convolution_based/_rocket.py
@@ -113,13 +113,13 @@ def _transform(self, X, y=None):
 
         Parameters
         ----------
-        X : 3D np.ndarray of shape = [n_cases, n_channels, n_timepoints]
+        X : 3D np.ndarray of shape = (n_cases, n_channels, n_timepoints)
             collection of time series to transform
         y : ignored argument for interface compatibility
 
         Returns
         -------
-        np.ndarray [n_cases, num_kernels], transformed features
+        np.ndarray (n_cases, num_kernels), transformed features
         """
         if self.normalise:
             X = (X - X.mean(axis=-1, keepdims=True)) / (
@@ -221,7 +221,7 @@ def _apply_kernel_univariate(X, weights, length, bias, dilation, padding):
     output_length = (n_timepoints + (2 * padding)) - ((length - 1) * dilation)
 
     _ppv = 0
-    _max = np.NINF
+    _max = -np.inf
 
     end = (n_timepoints + padding) - ((length - 1) * dilation)
 
@@ -254,28 +254,20 @@ def _apply_kernel_multivariate(
     output_length = (n_timepoints + (2 * padding)) - ((length - 1) * dilation)
 
     _ppv = 0
-    _max = np.NINF
-
+    _max = -np.inf
     end = (n_timepoints + padding) - ((length - 1) * dilation)
-
     for i in range(-padding, end):
         _sum = bias
-
         index = i
-
         for j in range(length):
             if index > -1 and index < n_timepoints:
                 for k in range(num_channel_indices):
                     _sum = _sum + weights[k, j] * X[channel_indices[k], index]
-
             index = index + dilation
-
         if _sum > _max:
             _max = _sum
-
         if _sum > 0:
             _ppv += 1
-
     return np.float32(_ppv / output_length), np.float32(_max)