Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7,525 changes: 5,810 additions & 1,715 deletions ...les/machine_learning/51_multimodal_source_decomposition_on_synthetic_fNIRS-EEG_data.ipynb
100755 → 100644

Large diffs are not rendered by default.

71 changes: 49 additions & 22 deletions src/cedalion/sigdecomp/multimodal/cca_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
"""Module for CCA-like models"""
"""Module for CCA-like models for multimodal data decomposition.

Includes:
- ElasticNetCCA: CCA with L1 + L2 regularization
- StructuredSparseCCA: CCA with L1 + graph-structure regularization
- RidgeCCA: CCA with L2 regularization
- SparseCCA: CCA with L1 regularization
- CCA: Standard CCA
- SparsePLS: PLS with L1 regularization
- PLS: Standard PLS
"""


import numpy as np
import xarray as xr
Expand All @@ -19,12 +30,11 @@ class MultimodalSourceDecomposition():
max_iter (int): Maximum number of iterations for the algorithm.
tol (float): Tolerance for convergence.
scale (bool): Whether to scale the data during normalization to unit variance. Defaults to True.

"""

def __init__(self,
N_components : int = None,
max_iter : int = 100,
max_iter : int = 1000,
tol : float = 1e-6,
scale : bool = True):

Expand Down Expand Up @@ -255,17 +265,16 @@ def transform(self,


class ElasticNetCCA(MultimodalSourceDecomposition):

"""Perform Elastic Net Canonical Correlation Analysis (CCA) between two datasets X and Y.

Apply CCA with L1 + L2 regularization, a.k.a elastic net. The algorithm finds sparse (L1)
and normalized (L2) vectors Wx, and Wy as the solution to the following constrained optimization problem:

maximize Wx^T Cxy Wy
subject to Wx^T Cx Wx = 1, Wy^T Cy Wy = 1,
||Wx||_1 <= c1x, ||Wy||_1 <= c1y,
||Wx||^2_2 <= c2x, ||Wy||^2_2 <= c2y

where Cx, Cy, and Cxy are the individual and cross-covariance matrices between X and Y datasets,
and the last four constraints correspond to the standard L1-norm and L2-norm penalization terms.
c1x and c1y controls sparsity while c2x and c2y controls the magnitude of the vectors. PLS algorithms
Expand All @@ -291,11 +300,11 @@ class ElasticNetCCA(MultimodalSourceDecomposition):
The resulting u and v are the leading left and right singular vectors of K which are nothing but individual components of the
filters Wx and Wy. The softthresholding function bring some components to zero. If L2 regularization is used, prior to
computing K, Cx and Cy are shifted by Cx <- Cx + alpha_x I and Cy <- Cy + alpha_y I.

Multiple components are obtained via a deflation method, subtracting from K its 1-rank approximation on each iteration.
The returned vectors Wx and Wy are ordered in desceding order w.r.t. the singular values, which coincide with the canonical
correlations.

Args:
N_components (int, optional): Number of components to extract. If None,
the number of components is set to the minimum number of features between modalities.
Expand Down Expand Up @@ -329,7 +338,7 @@ def __init__(self,
N_components : int = None,
l1_reg : float | list[float, float] = 0,
l2_reg : float | list[float, float] = 0,
max_iter : int = 100,
max_iter : int = 1000,
tol : float = 1e-6,
scale : bool = True,
pls : bool = False):
Expand Down Expand Up @@ -380,6 +389,8 @@ def fit(self,
Wx, Wy = estimate_filters(X.data,
Y.data,
N_components=self.N_components,
max_iter=self.max_iter,
tol=self.tol,
l1_reg=self.l1_reg,
l2_reg=self.l2_reg,
pls=self.pls)
Expand All @@ -389,7 +400,6 @@ def fit(self,


class StructuredSparseCCA(MultimodalSourceDecomposition):

"""Perform structured sparse Canonical Correlation Analysis (ssCCA) between two datasets X and Y.

The ssCCA algorithm is based on :cite:t:`chen_structure-constrained_2013` and it assumes the underlying X and Y features
Expand Down Expand Up @@ -468,7 +478,7 @@ def __init__(self,
Ly : np.ndarray = None,
l1_reg : float | list[float, float] = 0,
l2_reg : float | list[float, float] = 0,
max_iter : int = 100,
max_iter : int = 1000,
tol : float = 1e-6,
scale : bool = True,
pls : bool = False):
Expand Down Expand Up @@ -519,6 +529,8 @@ def fit(self,
Wx, Wy = estimate_filters(X.data,
Y.data,
N_components=self.N_components,
max_iter=self.max_iter,
tol=self.tol,
l1_reg=self.l1_reg,
l2_reg=self.l2_reg,
Lx=self.Lx,
Expand Down Expand Up @@ -562,7 +574,7 @@ class RidgeCCA(ElasticNetCCA):
def __init__(self,
N_components : int = None,
l2_reg : float | list[float, float] = 0,
max_iter : int = 100,
max_iter : int = 1000,
tol : float = 1e-6,
scale : bool = True):

Expand Down Expand Up @@ -610,7 +622,7 @@ class SparseCCA(ElasticNetCCA):
def __init__(self,
N_components : int = None,
l1_reg : float | list[float, float] = 0,
max_iter : int = 100,
max_iter : int = 1000,
tol : float = 1e-6,
scale : bool = True):

Expand Down Expand Up @@ -655,7 +667,7 @@ class CCA(ElasticNetCCA):

def __init__(self,
N_components: int = None,
max_iter: int = 100,
max_iter: int = 1000,
tol: float = 1e-6,
scale: bool = True):

Expand Down Expand Up @@ -709,12 +721,12 @@ class SparsePLS(ElasticNetCCA):
def __init__(self,
N_components: int = None,
l1_reg: float | list[float, float] = 0,
max_iter: int = 100,
max_iter: int = 1000,
tol: float = 1e-6,
scale: bool = True):


super(SparsePLS).__init__(
super(SparsePLS, self).__init__(
N_components=N_components,
l1_reg=l1_reg,
l2_reg=0,
Expand All @@ -732,7 +744,7 @@ class PLS(SparsePLS):
Args:
N_components (int, optional): Number of components to extract. If None,
the number of components is set to the minimum number of features between modalities.
max_iter (int): Maximum number of iterations for the algorithm. Defaults to 100.
max_iter (int): Maximum number of iterations for the algorithm. Defaults to 1000.
tol (float): Tolerance for convergence. Defaults to 1e-6.
scale (bool): Whether to scale the data during normalization to unit variance. Defaults to True.

Expand All @@ -752,11 +764,11 @@ class PLS(SparsePLS):
"""
def __init__(self,
N_components: int = None,
max_iter: int = 100,
max_iter: int = 1000,
tol: float = 1e-6,
scale: bool = True):

super().__init__(
super(PLS, self).__init__(
N_components=N_components,
l1_reg=0,
max_iter=max_iter,
Expand All @@ -768,6 +780,8 @@ def __init__(self,
def estimate_filters(X : np.ndarray,
Y : np.ndarray,
N_components : int,
max_iter : int = 1000,
tol : float = 1e-6,
l1_reg : list[float, float] = 0,
l2_reg : list[float, float] = 0,
Lx : np.ndarray = None,
Expand All @@ -785,6 +799,8 @@ def estimate_filters(X : np.ndarray,
X (ndarray): Input data for modality X with shape (Nt, Nx).
Y (ndarray): Input data for modality Y with shape (Nt, Ny).
N_components (int): Number of components to extract.
max_iter (int, optional): Maximum number of iterations for the algorithm. Defaults to 1000.
tol (float, optional): Tolerance for convergence. Defaults to 1e-6.
l1_reg (list of floats): list containing lambda_u and lambda_v.
l2_reg (list of floats): list containing alpha_x and alpha_y.
Lx (ndarray, optional): Laplacian matrix for modality X. Defaults to None.
Expand Down Expand Up @@ -824,7 +840,11 @@ def estimate_filters(X : np.ndarray,
K = Cxx_inv_sqrt @ Cxy @ Cyy_inv_sqrt

# Perform SVD + deflation via rank-1 approximation subtraction
Wx, _, Wy = get_singular_vectors(K, N_components, l1_reg)
Wx, _, Wy = get_singular_vectors(K,
N_components,
l1_reg,
max_iter=max_iter,
tol=tol)

# Project back to original unwhitened space
if not pls:
Expand Down Expand Up @@ -863,6 +883,8 @@ def inv_sqrt_cov(C : np.ndarray,
def get_singular_vectors(X : np.ndarray,
N_components : int,
l1_reg : float | list[float, float] = 0,
max_iter : int = 1000,
tol : float = 1e-6
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Extracts the top singular vectors from X using an iterative power method.

Expand All @@ -876,6 +898,8 @@ def get_singular_vectors(X : np.ndarray,
N_components (int): Number of singular components to extract.
l1_reg (float or list of floats, optional): Regularization parameter for L1 sparsity. If scalar, the
same value is applied for both u and v. Defaults to 0 (no sparsity).
max_iter (int, optional): Maximum number of iterations for the power method. Defaults to 1000.
tol (float, optional): Tolerance for convergence of the power method. Defaults to 1e-6.

Returns:
tuple:
Expand All @@ -892,14 +916,17 @@ def get_singular_vectors(X : np.ndarray,
X_new = X.copy()
for k in range(N_components):
# Apply one-unit algorithm
u, s, v = leading_singular_pair_power_method(X_new, l1_reg)
u, s, v = leading_singular_pair_power_method(X_new,
l1_reg,
max_iter,
tol)
# Substract rank-1 approximation
X_rank1 = s * u @ v.T
X_new -= X_rank1
# Store component
U[:, k] = u[:, 0]
V[:, k] = v[:, 0]
S[k] = s
S[k] = s

return U, S, V

Expand Down
4 changes: 2 additions & 2 deletions src/cedalion/sigdecomp/multimodal/mspoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,12 @@ def __init__(self,
self.N_shifts = len(time_shifts)
self.optimal_shift = None
self.shift_source = shift_source

def fit(self,
X: xr.DataArray,
Y: xr.DataArray,
featureX_name : str = 'channel',
featureY_name : str = 'channel'):
featureY_name : str = 'channel'):
"""Train mSPoC model on the X, Y dataset

Implement the pseudo-code of Algorithm 1 of :cite:t:`Dahne2013`
Expand Down
23 changes: 18 additions & 5 deletions src/cedalion/sigdecomp/multimodal/tcca_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
"""Module for temporally embedded CCA-like models. The temporally embedded technique is based on :cite:t:`biesmann_temporal_2010`"""
"""Module for temporally embedded CCA-like models for multimodal data.

This module implements classes for performing source decomposition on multimodal data using
temporal embedding techniques. The latter is based on :cite:t:`biesmann_temporal_2010`

Includes:
- ElasticNetTCCA: Class for performing Elastic Net regularized temporally embedded CCA.
- StructuredSparseTCCA: Class for performing structured sparse temporally embedded CCA.
- tCCA: Class for performing standard temporally embedded CCA.
"""

import numpy as np
import xarray as xr
Expand Down Expand Up @@ -30,7 +39,7 @@ class MultimodalSourceDecompositionWithTemporalEmbedding(MultimodalSourceDecompo
def __init__(self,
N_components : int = None,
time_shifts : np.ndarray = None,
max_iter : int = 100,
max_iter : int = 1000,
tol : float = 1e-6,
scale : bool = True,
shift_source = True):
Expand Down Expand Up @@ -228,7 +237,7 @@ def __init__(self,
l1_reg: float | list[float, float] = 0,
l2_reg: float | list[float, float] = 0,
time_shifts = None,
max_iter: int = 100,
max_iter: int = 1000,
tol: float = 1e-6,
scale: bool = True,
pls : bool = False,
Expand Down Expand Up @@ -289,6 +298,8 @@ def fit(self,
Wx, Wy = estimate_filters(X_concat.data,
Y.data,
N_components=self.N_components,
max_iter=self.max_iter,
tol=self.tol,
l1_reg=self.l1_reg,
l2_reg=self.l2_reg,
pls=self.pls)
Expand Down Expand Up @@ -390,7 +401,7 @@ def __init__(self,
time_shifts = None,
l1_reg : float | list[float, float] = 0,
l2_reg : float | list[float, float] = 0,
max_iter : int = 100,
max_iter : int = 1000,
tol : float = 1e-6,
scale : bool = True,
shift_source : bool = True,
Expand Down Expand Up @@ -458,6 +469,8 @@ def fit(self,
Wx, Wy = estimate_filters(X_concat.data,
Y.data,
N_components=self.N_components,
max_iter=self.max_iter,
tol=self.tol,
l1_reg=self.l1_reg,
l2_reg=self.l2_reg,
Lx=new_Lx,
Expand Down Expand Up @@ -508,7 +521,7 @@ class tCCA(ElasticNetTCCA):
def __init__(self,
N_components : int = None,
time_shifts : np.ndarray = None,
max_iter : int = 100,
max_iter : int = 1000,
tol : float = 1e-6,
scale : bool = True,
shift_source = True):
Expand Down
Loading
Loading