make the document clearer

selmanozleyen · selmanozleyen · commit c8d12a09d97f · 2025-05-20T10:33:44.000+02:00
diff --git a/tests/graph/test_ligrec.py b/tests/graph/test_ligrec.py
@@ -467,37 +467,67 @@ def test_ligrec_nan_counts(self):
         """
         For the test case with 2 clusters (A, B) and 3 gene pairs (Gene1→Gene2, Gene2→Gene3, Gene3→Gene1):
 
-        Expression fractions per cluster (computed as fraction of cells with value > 0):
-        Cluster A:
-        - Gene1: 1/3 = 0.33 (only A1 has value > 0)
-        - Gene2: 2/3 = 0.67 (A2 and A3 have value > 0)
-        - Gene3: 0/3 = 0.00 (none have value > 0)
-
-        Cluster B:
-        - Gene1: 1/3 = 0.33 (only B1 has value > 0)
-        - Gene2: 0/3 = 0.00 (none have value > 0)
-        - Gene3: 3/3 = 1.00 (all have value > 0)
-
         The mask is computed for each gene in each cluster as:
         mask[gene, cluster] = (number of cells with value > 0) / (total cells in cluster) >= threshold
 
-        With threshold=0.8, the mask is:
-        Cluster A: [False, False, False]  # All genes < 0.8 expression fraction
-        Cluster B: [False, False, True]   # Only Gene3 >= 0.8 expression fraction
+        Number of cells with value > 0 in each cluster:
+        Cluster A: [1, 3, 0]
+        Cluster B: [1, 0, 3]
+
+        Number of cells with value > 0 in each cluster divided by total number of cells in the cluster:
+        Cluster A: [1/3, 3/3, 0/3] = [0.33, 1.0, 0.0]
+        Cluster B: [1/3, 0/3, 3/3] = [0.33, 0.0, 1.0]
+
+        Using threshold=0.8 on this data, the mask is:
+        Cluster A: [False, True, False]
+        Cluster B: [False, False, True]
 
         A value in the result becomes NaN if either:
         - The ligand's mask is False in the source cluster, OR
         - The receptor's mask is False in the target cluster
 
-        For each cluster pair (A→A, A→B, B→A, B→B) and each gene pair:
-        A→A: All NaN (all genes have mask=False in A)
-        A→B: All NaN (all genes have mask=False in A)
-        B→A: All NaN (all genes have mask=False in A)
-        B→B: Only Gene2→Gene3 is non-NaN (Gene3 has mask=True in B)
+        Only in one combination, the mask is both True in the source and target cluster.
+        This is the case for Gene2→Gene3 in A→B.
+
+        This means from all the possible cluster pairs (A→A, A→B, B→A, B→B) and gene pairs (Gene1→Gene2, Gene2→Gene3, Gene3→Gene1),
+        (4 cluster pairs * 3 gene pairs = 12 combinations) only one combination is non-NaN.
+
+        Therefore, the total number of NaNs is 11.
+
+        The expected p-values are:
+        cluster_1        A         B
+        cluster_2        A    B    A    B
+        source target
+        GENE1  GENE2   NaN  NaN  NaN  NaN
+        GENE2  GENE3   NaN  0.0  NaN  NaN
+        GENE3  GENE1   NaN  NaN  NaN  NaN
 
-        Total NaNs = 4 cluster pairs x 3 gene pairs - 1 = 11 NaNs
-        (The -1 is because B→B for Gene2→Gene3 is the only non-NaN case, where Gene3 has mask=True in B)
         """
+        # only Gene2→Gene3 is non-NaN
+        #
+
+        expected_pvalues = np.array(
+            [
+                [
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                ],
+                [
+                    np.nan,
+                    0.0,
+                    np.nan,
+                    np.nan,
+                ],
+                [
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                ],
+            ]
+        )
 
         expected_nans = 11
         # Setup test data
@@ -532,3 +562,4 @@ def test_ligrec_nan_counts(self):
         actual_nans = np.sum(np.isnan(res["pvalues"].values))
 
         assert actual_nans == expected_nans, f"NaN count mismatch: expected {expected_nans}, got {actual_nans}"
+        np.testing.assert_array_equal(res["pvalues"].values, expected_pvalues)