@@ -467,37 +467,67 @@ def test_ligrec_nan_counts(self):
467467 """
468468 For the test case with 2 clusters (A, B) and 3 gene pairs (Gene1→Gene2, Gene2→Gene3, Gene3→Gene1):
469469
470- Expression fractions per cluster (computed as fraction of cells with value > 0):
471- Cluster A:
472- - Gene1: 1/3 = 0.33 (only A1 has value > 0)
473- - Gene2: 2/3 = 0.67 (A2 and A3 have value > 0)
474- - Gene3: 0/3 = 0.00 (none have value > 0)
475-
476- Cluster B:
477- - Gene1: 1/3 = 0.33 (only B1 has value > 0)
478- - Gene2: 0/3 = 0.00 (none have value > 0)
479- - Gene3: 3/3 = 1.00 (all have value > 0)
480-
481470 The mask is computed for each gene in each cluster as:
482471 mask[gene, cluster] = (number of cells with value > 0) / (total cells in cluster) >= threshold
483472
484- With threshold=0.8, the mask is:
485- Cluster A: [False, False, False] # All genes < 0.8 expression fraction
486- Cluster B: [False, False, True] # Only Gene3 >= 0.8 expression fraction
473+ Number of cells with value > 0 in each cluster:
474+ Cluster A: [1, 3, 0]
475+ Cluster B: [1, 0, 3]
476+
477+ Number of cells with value > 0 in each cluster divided by total number of cells in the cluster:
478+ Cluster A: [1/3, 3/3, 0/3] = [0.33, 1.0, 0.0]
479+ Cluster B: [1/3, 0/3, 3/3] = [0.33, 0.0, 1.0]
480+
481+ Using threshold=0.8 on this data, the mask is:
482+ Cluster A: [False, True, False]
483+ Cluster B: [False, False, True]
487484
488485 A value in the result becomes NaN if either:
489486 - The ligand's mask is False in the source cluster, OR
490487 - The receptor's mask is False in the target cluster
491488
492- For each cluster pair (A→A, A→B, B→A, B→B) and each gene pair:
493- A→A: All NaN (all genes have mask=False in A)
494- A→B: All NaN (all genes have mask=False in A)
495- B→A: All NaN (all genes have mask=False in A)
496- B→B: Only Gene2→Gene3 is non-NaN (Gene3 has mask=True in B)
489+ Only in one combination, the mask is both True in the source and target cluster.
490+ This is the case for Gene2→Gene3 in A→B.
491+
492+ This means from all the possible cluster pairs (A→A, A→B, B→A, B→B) and gene pairs (Gene1→Gene2, Gene2→Gene3, Gene3→Gene1),
493+ (4 cluster pairs * 3 gene pairs = 12 combinations) only one combination is non-NaN.
494+
495+ Therefore, the total number of NaNs is 11.
496+
497+ The expected p-values are:
498+ cluster_1 A B
499+ cluster_2 A B A B
500+ source target
501+ GENE1 GENE2 NaN NaN NaN NaN
502+ GENE2 GENE3 NaN 0.0 NaN NaN
503+ GENE3 GENE1 NaN NaN NaN NaN
497504
498- Total NaNs = 4 cluster pairs x 3 gene pairs - 1 = 11 NaNs
499- (The -1 is because B→B for Gene2→Gene3 is the only non-NaN case, where Gene3 has mask=True in B)
500505 """
506+ # only Gene2→Gene3 is non-NaN
507+ #
508+
509+ expected_pvalues = np .array (
510+ [
511+ [
512+ np .nan ,
513+ np .nan ,
514+ np .nan ,
515+ np .nan ,
516+ ],
517+ [
518+ np .nan ,
519+ 0.0 ,
520+ np .nan ,
521+ np .nan ,
522+ ],
523+ [
524+ np .nan ,
525+ np .nan ,
526+ np .nan ,
527+ np .nan ,
528+ ],
529+ ]
530+ )
501531
502532 expected_nans = 11
503533 # Setup test data
@@ -532,3 +562,4 @@ def test_ligrec_nan_counts(self):
532562 actual_nans = np .sum (np .isnan (res ["pvalues" ].values ))
533563
534564 assert actual_nans == expected_nans , f"NaN count mismatch: expected { expected_nans } , got { actual_nans } "
565+ np .testing .assert_array_equal (res ["pvalues" ].values , expected_pvalues )
0 commit comments