From 1d58a6d926793e39f26704f614406baabe007522 Mon Sep 17 00:00:00 2001 From: pvprajwal <74557086+pvprajwal@users.noreply.github.com> Date: Mon, 31 Mar 2025 20:02:41 +0530 Subject: [PATCH 1/2] [BUG] Updated sbd_distance() to handle multivariate series (#2674) * Updated sbd_distance() to handle multivariate data consistently with tslearn and other implementations * added _multivariate_sbd_distance() which finds the correlations for each of the channels and then normalizes using the norm of the multivariate series. a61927f4 --- aeon/distances/_sbd.py | 43 +++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/aeon/distances/_sbd.py b/aeon/distances/_sbd.py index 1097f27b5a..5c228d62a7 100644 --- a/aeon/distances/_sbd.py +++ b/aeon/distances/_sbd.py @@ -98,17 +98,7 @@ def sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool = True) -> floa if x.ndim == 1 and y.ndim == 1: return _univariate_sbd_distance(x, y, standardize) if x.ndim == 2 and y.ndim == 2: - if x.shape[0] == 1 and y.shape[0] == 1: - _x = x.ravel() - _y = y.ravel() - return _univariate_sbd_distance(_x, _y, standardize) - else: - # independent (time series should have the same number of channels!) - nchannels = min(x.shape[0], y.shape[0]) - distance = 0.0 - for i in range(nchannels): - distance += _univariate_sbd_distance(x[i], y[i], standardize) - return distance / nchannels + return _multivariate_sbd_distance(x, y, standardize) raise ValueError("x and y must be 1D or 2D") @@ -245,3 +235,34 @@ def _univariate_sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool) -> b = np.sqrt(np.dot(x, x) * np.dot(y, y)) return np.abs(1.0 - np.max(a / b)) + +@njit(cache=True, fastmath=True) +def _multivariate_sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool) -> float: + x = x.astype(np.float64) + y = y.astype(np.float64) + + x = np.transpose(x, (1, 0)) + y = np.transpose(y, (1, 0)) + + if standardize: + if x.size == 1 or y.size == 1: + return 0.0 + + x = (x - np.mean(x)) / np.std(x) + y = (y - np.mean(y)) / np.std(y) + + norm1 = np.linalg.norm(x) + norm2 = np.linalg.norm(y) + + denom = norm1 * norm2 + if denom < 1e-9: # Avoid NaNs + denom = np.inf + + with objmode(cc="float64[:, :]"): + cc = np.array([correlate(x[:, i], y[:, i], mode="full", method="fft") for i in range(x.shape[1])]).T + + sz = x.shape[0] + cc = np.vstack((cc[-(sz - 1):], cc[:sz])) + norm_cc = np.real(cc).sum(axis=-1) / denom + + return np.abs(1.0 - np.max(norm_cc)) From 81caf19fb7231b330d2505d020bd7baa513d9a42 Mon Sep 17 00:00:00 2001 From: pvprajwal <74557086+pvprajwal@users.noreply.github.com> Date: Mon, 31 Mar 2025 14:45:57 +0000 Subject: [PATCH 2/2] Automatic `pre-commit` fixes --- aeon/distances/_sbd.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/aeon/distances/_sbd.py b/aeon/distances/_sbd.py index 5c228d62a7..a6bdc889f0 100644 --- a/aeon/distances/_sbd.py +++ b/aeon/distances/_sbd.py @@ -236,8 +236,11 @@ def _univariate_sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool) -> b = np.sqrt(np.dot(x, x) * np.dot(y, y)) return np.abs(1.0 - np.max(a / b)) + @njit(cache=True, fastmath=True) -def _multivariate_sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool) -> float: +def _multivariate_sbd_distance( + x: np.ndarray, y: np.ndarray, standardize: bool +) -> float: x = x.astype(np.float64) y = y.astype(np.float64) @@ -253,16 +256,21 @@ def _multivariate_sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool) norm1 = np.linalg.norm(x) norm2 = np.linalg.norm(y) - + denom = norm1 * norm2 if denom < 1e-9: # Avoid NaNs denom = np.inf with objmode(cc="float64[:, :]"): - cc = np.array([correlate(x[:, i], y[:, i], mode="full", method="fft") for i in range(x.shape[1])]).T + cc = np.array( + [ + correlate(x[:, i], y[:, i], mode="full", method="fft") + for i in range(x.shape[1]) + ] + ).T sz = x.shape[0] - cc = np.vstack((cc[-(sz - 1):], cc[:sz])) + cc = np.vstack((cc[-(sz - 1) :], cc[:sz])) norm_cc = np.real(cc).sum(axis=-1) / denom return np.abs(1.0 - np.max(norm_cc))