forked from scverse/scanpy
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_combat.py
More file actions
109 lines (77 loc) · 3.18 KB
/
test_combat.py
File metadata and controls
109 lines (77 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from __future__ import annotations
import numpy as np
import pandas as pd
import pytest
from anndata.tests.helpers import assert_equal
from sklearn.metrics import silhouette_score
import scanpy as sc
from scanpy.preprocessing._combat import _design_matrix, _standardize_data
def test_norm():
# this test trivially checks whether mean normalisation worked
# load in data
adata = sc.datasets.blobs()
key = "blobs"
data = pd.DataFrame(data=adata.X.T, index=adata.var_names, columns=adata.obs_names)
# construct a pandas series of the batch annotation
batch = pd.Series(adata.obs[key])
model = pd.DataFrame({"batch": batch})
# standardize the data
s_data, _design, _var_pooled, _stand_mean = _standardize_data(model, data, "batch")
assert np.allclose(s_data.mean(axis=1), np.zeros(s_data.shape[0]))
def test_covariates():
adata = sc.datasets.blobs()
key = "blobs"
x1 = sc.pp.combat(adata, key=key, inplace=False)
np.random.seed(0)
adata.obs["cat1"] = np.random.binomial(3, 0.5, size=(adata.n_obs))
adata.obs["cat2"] = np.random.binomial(2, 0.1, size=(adata.n_obs))
adata.obs["num1"] = np.random.normal(size=(adata.n_obs))
x2 = sc.pp.combat(
adata, key=key, covariates=["cat1", "cat2", "num1"], inplace=False
)
sc.pp.combat(adata, key=key, covariates=["cat1", "cat2", "num1"], inplace=True)
assert x1.shape == x2.shape
df = adata.obs[["cat1", "cat2", "num1", key]]
batch_cats = adata.obs[key].cat.categories
design = _design_matrix(df, key, batch_cats)
assert len(design.columns) == 4 + len(batch_cats) - 1
def test_combat_obs_names():
# Test for fix to #1170
x = np.random.random((200, 100))
obs = pd.DataFrame(
{"batch": pd.Categorical(np.random.randint(0, 2, 200))},
index=np.repeat(np.arange(100), 2).astype(str), # Non-unique index
)
with pytest.warns(UserWarning, match="Observation names are not unique"):
a = sc.AnnData(x, obs)
with pytest.warns(UserWarning, match="Observation names are not unique"):
b = a.copy()
b.obs_names_make_unique()
sc.pp.combat(a, "batch")
sc.pp.combat(b, "batch")
assert_equal(a.X, b.X)
a.obs_names_make_unique()
assert_equal(a, b)
def test_combat_single_cell_batch():
"""Test that combat raises an error when a batch has fewer than 2 cells.
Regression test for https://github.com/scverse/scanpy/issues/1175
"""
adata = sc.datasets.blobs()
# Create a batch where one category has only 1 cell
batch = pd.Categorical(["single"] + ["other"] * (adata.n_obs - 1))
adata.obs["batch"] = batch
with pytest.raises(ValueError, match="fewer than 2 cells"):
sc.pp.combat(adata, key="batch")
def test_silhouette():
# this test checks wether combat can align data from several gaussians
# it checks this by computing the silhouette coefficient in a pca embedding
# load in data
adata = sc.datasets.blobs()
# apply combat
sc.pp.combat(adata, "blobs")
# compute pca
sc.pp.pca(adata)
x_pca = adata.obsm["X_pca"]
# compute silhouette coefficient in pca
sh = silhouette_score(x_pca[:, :2], adata.obs["blobs"].values)
assert sh < 0.1