Updated docs

PauBadiaM · PauBadiaM · commit 7e5babd068d0 · 2025-05-23T13:42:39.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning][].
 
 ## 2.0.0
 
-Major update to accomodate the scverse template.
+Major update to accomodate the scverse template {cite}`scverse`.
 
 All functions have been rewritten to follow the new API, errors when running previous versions (`1.X.X`) are expected if `decoupler >= 2.0.0` is installed.
 
diff --git a/src/decoupler/mt/_consensus.py b/src/decoupler/mt/_consensus.py
@@ -65,9 +65,40 @@ def consensus(
     result: dict | AnnData,
     verbose: bool = False,
 ) -> Tuple[pd.DataFrame, pd.DataFrame] | None:
-    """
+    r"""
     Consensus score across methods.
 
+    For each method, enrichment scores are split into positive and negative subsets
+    and transformed independently into z-scores.
+    
+    1. Subset values based on sign (direction).
+    2. Mirror each subset into positive and negative values with the same magnitude.
+    3. Compute z-scores for each subset: :math:`z_i = \frac{x_i - \mu}{\sigma}`.
+    4. Restore the original signs to the z-scored values
+
+    This transformation ensures comparability across methods while preserving the
+    biological interpretation of activation (positive) and inhibition (negative).
+    The final consensus enrichment score :math:`ES` is computed as the mean of
+    these signed z-scores across methods.
+
+    .. math::
+
+        ES = \frac{\sum_{m=1}^{M} z_{i}^{(m)}}{M} 
+
+    Where:
+
+    - :math:`M` is the number of methods
+    - :math:`z_{i}^{(m)}` is the z-score from method :math:`m`.
+
+    A two-sided :math:`p_{value}` is then calculated from the consensus score using
+    the survival function of the standard normal distribution.
+
+    .. math::
+
+        p = 2 \times \mathrm{sf}\bigl(\lvert \mathrm{ES} \rvert \bigr)
+
+    %(yestest)s
+
     Parameters
     ----------
     result
diff --git a/src/decoupler/mt/_gsea.py b/src/decoupler/mt/_gsea.py
@@ -198,7 +198,7 @@ def _func_gsea(
 
         ES = L_{arg max |L|}
 
-    When multiple random permutations are done, statistical significance is assessed via empirical testing.
+    When multiple random permutations are done (``times > 1``), statistical significance is assessed via empirical testing.
 
     .. math::
 
@@ -220,7 +220,7 @@ def _func_gsea(
     - :math:`\mu{+}` is the mean of positive values in :math:`ES_{rand}`
     - :math:`\mu{-}` is the mean of negative values in :math:`ES_{rand}`
 
-    Finally, the obtained math:`p_value` are adjusted by Benjamini-Hochberg correction. 
+    %(yestest)s
 
     %(params)s
     %(times)s
diff --git a/src/decoupler/mt/_ora.py b/src/decoupler/mt/_ora.py
@@ -176,7 +176,7 @@ def _func_ora(
     .. figure:: /_static/images/ora.png
        :alt: Over Representation Analysis (ORA) schematic.
        :align: center
-       :width: 75%
+       :width: 100%
 
        Over Representation Analysis (ORA) scheme.
     
diff --git a/src/decoupler/mt/_viper.py b/src/decoupler/mt/_viper.py
@@ -181,9 +181,100 @@ def _func_viper(
     penalty: int | float = 20,
     verbose: bool = False,
 ) -> Tuple[np.ndarray, np.ndarray]:
-    """
+    r"""
     Virtual Inference of Protein-activity by Enriched Regulon analysis (VIPER) :cite:`viper`.
 
+    This approach first ranks features based on their absolute values and computes a one-tail score.
+
+    .. math::
+
+        \begin{align}
+        w &= \frac{w}{max(|w|)} \\
+        l_{orig} &= 1_{w \neq 0} \\
+        l &= \frac{l_{orig}}{\sum_{i=1}^{k} \frac{l_i}{max(l_{orig})}max(l_{orig})} \\
+        q^{norm} &= \Phi^{-1}(2|q-0.5| + (1 + max(|q-0.5|))) \\
+        S_1 &= \sum_{i=1}^{k}q_i^{norm}l_i(1-|w_i|) \\
+        \end{align}
+        
+    Where:
+
+    - :math:`w \in [-1, +1]` is a vector of interaction weights across features
+    - :math:`l \in [0, 1]` is a vector of interaction likelihoods across features
+    - :math:`q \in [0, 1]` is a vector of quantiles based on the molecular readouts across features
+    - :math:`k` is the number of features in :math:`q`
+    - :math:`\Phi^{-1}` is is the inverse of the cumulative distribution function of the standard normal distribution
+    - :math:`q^{norm} \in [-\infty,+\infty]` are the z-scores of the deviation of quantiles from 0.5
+
+    :math:`S_1` encodes for the magnitude of the enrichment score, irrespective of the interaction signs in ``net``.
+
+    Then, :math:`q` are z-transformed and weighted by their interaction strength and likelihood.
+
+    .. math::
+
+        S_2 = \sum_{i=1}^{k}w_il_i(\Phi^{-1}(q_i))
+
+    In this case, :math:`S_2` takes the direction (sign) of interactions into consideration.
+
+    Afterwards, a summary score :math:`S_3` is obtained.
+
+    .. math::
+
+        S_3 = 
+        \begin{cases}
+        (|S_2| + S_1)  \times \mathrm{sgn}(S_2) & \text{if } S_1 > 0 \\
+        S_2 & \text{if } S_1 < 0
+        \end{cases}
+
+    An enrichment score :math:`ES` is obtained by comparing :math:`S_3` to a
+    null model generated through an analytical approach that shuffles features.
+
+    .. math::
+
+        ES = S_3\sqrt{\sum_{i=1}^{k}l_{orig,i}^{2}}
+        
+    Together with a :math:`p_{value}`
+
+    .. math::
+
+        p_{value} = \Phi(ES)
+
+    Additionaly, computing multiple sources simultaneously, a pleiotropic correction is employed.
+
+    In brief, all possible pairs of sources AB are generated under two conditions:
+    
+    1. both A and B are significantly enriched (p < ``reg_sign=0.05``)
+    2. they share at least ``n_targets=10`` features
+
+    Subsequently, a :math:`ES` and its associated :math:`p_{value}` is computed for
+    both A (:math:`pA`) and B (:math:`pB`) based only on the shared features.
+    Then the pleiotropy score (:math:`PS`) is computed.
+
+    .. math::
+
+        PS = 
+        \begin{cases}
+        \frac{1}{(1+|\log_{10}(pB) - \log_{10}(pA)|)^{\frac{20}{n_a}}} \text{ if } pA < pB \\
+        \frac{1}{(1+|\log_{10}(pA) - \log_{10}(pB)|)^{\frac{20}{n_b}}} \text{ if } pA > pB
+        \end{cases}
+    
+    Where:
+
+    - :math:`n_a` is the number of test pairs involving the source A
+    - :math:`n_b` is the number of test pairs involving the source B
+
+    This score is used to update :math:`l_{orig}`.
+
+    .. math::
+
+        l_{orig, i} = 
+        \begin{cases}
+        PS \times 1_{\{i \in A\}} \text{ if } pA < pB \\
+        PS \times 1_{\{i \in B\}} \text{ if } pA > pB
+        \end{cases}
+
+    A new :math:`ES` and :math:`p_{value}` are calculated following all
+    the previous steps but using the updated :math:`l_{orig}`
+    
     %(yestest)s
 
     %(params)s
diff --git a/src/decoupler/mt/_waggr.py b/src/decoupler/mt/_waggr.py
@@ -146,9 +146,55 @@ def _func_waggr(
     seed: int | float = 42,
     verbose: bool = False,
 ) -> Tuple[np.ndarray, np.ndarray]:
-    """
+    r"""
     Weighted Aggregate (WAGGR) :cite:`decoupler`.
 
+    This approach aggregates the molecular features :math:`x_i` from one observation :math:`i` with
+    the feature weights :math:`w` of a given feature set :math:`j` into an enrichment score :math:`ES`.
+
+    This method can use any aggregation function, which by default is the weighted mean.
+
+    .. math::
+
+        ES = \frac{\sum_{i=1}^{n} w_i x_i}{\sum_{i=1}^{n} w_i}
+
+    Another simpler option is the weighted sum.
+
+    .. math::
+
+        ES = \sum_{i=1}^{n} w_i x_i
+
+    Alternatively, this method can also take any defined function :math:`f` as long at it aggregates :math:`x_i` and
+    :math:`w` into a single :math:`ES`.
+
+    .. math::
+
+        ES = f(w_i, x_i)
+
+    This functionality makes it relatively easy to implement and try new enrichment methods.
+
+    When multiple random permutations are done (``times > 1``), statistical significance is assessed via empirical testing.
+
+    .. math::
+
+        p_{value}=\frac{ES_{rand} \geq ES}{P}
+
+    Where:
+
+    - :math:`ES_{rand}` are the enrichment scores of the random permutations
+    - :math:`P` is the total number of permutations
+
+    Additionaly, :math:`ES` is updated to a normalized enrichment score :math:`NES`.
+
+    .. math::
+
+        NES = \frac{ES - \mu(ES_{rand})}{\sigma(ES_{rand})}
+
+    Where:
+
+    - :math:`\mu` is the mean
+    - :math:`\sigma` is the standard deviation
+
     %(yestest)s
 
     %(params)s
diff --git a/src/decoupler/mt/_zscore.py b/src/decoupler/mt/_zscore.py
@@ -15,9 +15,41 @@ def _func_zscore(
     flavor: str = 'RoKAI',
     verbose: bool = False,
 ) -> Tuple[np.ndarray, np.ndarray]:
-    """
+    r"""
     Z-score (ZSCORE) :cite:`zscore`.
 
+    This approach computes the mean value of the molecular features for known targets,
+    optionally subtracts the overall mean of all measured features,
+    and normalizes the result by the standard deviation of all features and the square
+    root of the number of targets.
+    
+    This formulation was originally introduced in KSEA, which explicitly includes the
+    subtraction of the global mean to compute the enrichment score :math:`ES`.
+
+    .. math::
+
+        ES = \frac{(\mu_s-\mu_p) \times \sqrt m }{\sigma}
+
+    Where:
+
+    - :math:`\mu_s` is the mean of targets
+    - :math:`\mu_p` is the mean of all features
+    - :math:`m` is the number of targets
+    - :math:`\sigma` is the standard deviation of all features
+    
+    However, in the RoKAI implementation, this global mean subtraction was omitted.
+
+    .. math::
+
+        ES = \frac{\mu_s \times \sqrt m }{\sigma}
+
+    A two-sided :math:`p_{value}` is then calculated from the consensus score using
+    the survival function :math:`sf` of the standard normal distribution.
+
+    .. math::
+
+        p = 2 \times \mathrm{sf}\bigl(\lvert \mathrm{ES} \rvert \bigr)
+
     %(yestest)s
 
     %(params)s
@@ -41,7 +73,7 @@ def _func_zscore(
     n = np.sqrt(np.count_nonzero(adj, axis=0))
     mean = mat.dot(adj) / np.sum(np.abs(adj), axis=0)
     es = ((mean - mean_all.reshape(-1, 1)) * n) / stds.reshape(-1, 1)
-    pv = sts.norm.cdf(-np.abs(es))
+    pv = 2 * sts.norm.sf(np.abs(z))
     return es, pv