-
Notifications
You must be signed in to change notification settings - Fork 209
[BUG] Updated sbd_distance() to handle multivariate series (#2674) #2715
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,17 +98,7 @@ def sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool = True) -> floa | |
if x.ndim == 1 and y.ndim == 1: | ||
return _univariate_sbd_distance(x, y, standardize) | ||
if x.ndim == 2 and y.ndim == 2: | ||
if x.shape[0] == 1 and y.shape[0] == 1: | ||
_x = x.ravel() | ||
_y = y.ravel() | ||
return _univariate_sbd_distance(_x, _y, standardize) | ||
else: | ||
# independent (time series should have the same number of channels!) | ||
nchannels = min(x.shape[0], y.shape[0]) | ||
distance = 0.0 | ||
for i in range(nchannels): | ||
distance += _univariate_sbd_distance(x[i], y[i], standardize) | ||
return distance / nchannels | ||
return _multivariate_sbd_distance(x, y, standardize) | ||
|
||
raise ValueError("x and y must be 1D or 2D") | ||
|
||
|
@@ -245,3 +235,42 @@ def _univariate_sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool) -> | |
|
||
b = np.sqrt(np.dot(x, x) * np.dot(y, y)) | ||
return np.abs(1.0 - np.max(a / b)) | ||
|
||
|
||
@njit(cache=True, fastmath=True) | ||
def _multivariate_sbd_distance( | ||
x: np.ndarray, y: np.ndarray, standardize: bool | ||
) -> float: | ||
x = x.astype(np.float64) | ||
y = y.astype(np.float64) | ||
|
||
x = np.transpose(x, (1, 0)) | ||
y = np.transpose(y, (1, 0)) | ||
Comment on lines
+247
to
+248
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You transpose the output in L270 again. Is this really necessary? |
||
|
||
if standardize: | ||
if x.size == 1 or y.size == 1: | ||
return 0.0 | ||
|
||
x = (x - np.mean(x)) / np.std(x) | ||
y = (y - np.mean(y)) / np.std(y) | ||
Comment on lines
+254
to
+255
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After transposing, this is standardizing along the wrong axis, isn't it? |
||
|
||
norm1 = np.linalg.norm(x) | ||
norm2 = np.linalg.norm(y) | ||
|
||
denom = norm1 * norm2 | ||
if denom < 1e-9: # Avoid NaNs | ||
denom = np.inf | ||
|
||
with objmode(cc="float64[:, :]"): | ||
cc = np.array( | ||
[ | ||
correlate(x[:, i], y[:, i], mode="full", method="fft") | ||
for i in range(x.shape[1]) | ||
] | ||
).T | ||
Comment on lines
+264
to
+270
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Python loops are expensive. This is the reason, we use Numba a lot in the distance module. Is this loop really necessary? Maybe the correlate function can be used in a vectorized way? |
||
|
||
sz = x.shape[0] | ||
cc = np.vstack((cc[-(sz - 1) :], cc[:sz])) | ||
Comment on lines
+272
to
+273
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Slicing with |
||
norm_cc = np.real(cc).sum(axis=-1) / denom | ||
|
||
return np.abs(1.0 - np.max(norm_cc)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please keep this branch for now.