-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathtest_update_axis_1.py
More file actions
232 lines (197 loc) · 8.54 KB
/
test_update_axis_1.py
File metadata and controls
232 lines (197 loc) · 8.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import unittest
import numpy as np
import pytest
from anndata import AnnData
from mudata import MuData
@pytest.fixture()
def mdata(request, var_n, var_across, var_mod):
# Generate unique, intersecting, and joint observations by default
np.random.seed(100)
ad1 = AnnData(X=np.random.normal(size=1000).reshape(-1, 10))
ad2 = AnnData(X=np.random.normal(size=1000).reshape(-1, 10))
datasets = {"set1": ad1, "set2": ad2}
# Make obs_names different in different datasets
for dname, d in datasets.items():
datasets[dname].obs_names = [f"{d}_obs{i}" for i in range(d.n_obs)]
datasets[dname].var_names = [f"var{i}" for i in range(d.n_vars)]
datasets[dname].obs["min_count"] = d.X.min(axis=1)
if var_n:
if var_n == "disjoint":
set2_which_var = np.random.choice(
datasets["set2"].var_names, size=datasets["set2"].n_vars // 2, replace=False
)
datasets["set2"] = datasets["set2"][:, set2_which_var].copy()
if var_across:
if var_across != "intersecting":
raise NotImplementedError("Tests for non-intersecting var_names are not implemented")
if var_mod:
if var_mod == "duplicated":
for dname, d in datasets.itmes():
# Index does not support mutable operations
var_names = d.var_names.values.copy()
var_names[1] = var_names[0]
datasets[dname].var_names = var_names
mdata = MuData(datasets, axis=1)
genesets = np.random.choice(["a", "b", "c"], size=mdata.n_vars, replace=True)
mdata.var["geneset"] = genesets
return mdata
@pytest.fixture()
def datasets(request, var_n, var_across, var_mod):
n_sets = 3
datasets = dict()
np.random.seed(100)
for i in range(n_sets):
i1 = i + 1
d = f"set{i1}"
datasets[d] = AnnData(X=np.random.normal(size=1000 * i1).reshape(-1, 10 * i1))
datasets[d].obs["dataset"] = d
datasets[d].var["dataset"] = d
if var_n:
if var_n == "disjoint":
set2_which_var = np.random.choice(
datasets["set2"].var_names, size=datasets["set2"].n_vars // 2, replace=False
)
datasets["set2"] = datasets["set2"][set2_which_var].copy()
if var_across:
if var_across != "intersecting":
raise NotImplementedError("Tests for non-intersecting var_names are not implemented")
if var_mod:
if var_mod == "duplicated":
for dname, d in datasets.itmes():
# Index does not support mutable operations
var_names = d.var_names.values.copy()
var_names[1] = var_names[0]
datasets[dname].var_names = var_names
return datasets
@pytest.mark.usefixtures("filepath_h5mu")
class TestMuData:
@pytest.mark.parametrize("var_mod", ["unique"])
@pytest.mark.parametrize("var_across", ["intersecting"])
@pytest.mark.parametrize("var_n", ["joint", "disjoint"])
def test_update_simple(self, datasets):
"""
Update should work when
- var_names are the same across datasets,
- obs_names are unique to each dataset
"""
for d, dset in datasets.items():
datasets[d].obs_names = [f"{d}_obs{j}" for j in range(dset.n_obs)]
mdata = MuData(datasets, axis=1)
mdata.update()
mdata.pull_obs()
mdata.pull_var()
# Variables are different across datasets
assert "dataset" in mdata.obs.columns
for d, dset in datasets.items():
# Variables are the same across datasets
# hence /mod/mod1/var/dataset -> /var/mod1:dataset
assert f"{d}:dataset" in mdata.var.columns
# Columns are intact in individual datasets
assert "dataset" in dset.obs.columns
assert all(dset.obs["dataset"] == d)
assert "dataset" in dset.var.columns
assert all(dset.var["dataset"] == d)
@pytest.mark.parametrize("var_mod", ["unique"])
@pytest.mark.parametrize("var_across", ["intersecting"])
@pytest.mark.parametrize("var_n", ["joint", "disjoint"])
def test_update_duplicates(self, datasets):
"""
Update should work when
- var_names are the same across modalities,
- there are duplicated obs_names, which are not intersecting
between modalities
"""
for d, dset in datasets.items():
dset.obs_names = [f"{d}_obs{j // 2}" for j in range(dset.n_obs)]
mdata = MuData(datasets, axis=1)
mdata.update()
mdata.pull_obs()
mdata.pull_var()
# Observations are different across datasets
assert "dataset" in mdata.obs.columns
for d, dset in datasets.items():
# Variables are the same across datasets
# hence /mod/mod1/var/datasets -> /var/mod1:datasets
assert f"{d}:dataset" in mdata.var.columns
# Columns are intact in individual modalities
assert "dataset" in dset.obs.columns
assert all(dset.obs["dataset"] == d)
assert "dataset" in dset.var.columns
assert all(dset.var["dataset"] == d)
@pytest.mark.parametrize("var_mod", ["unique"])
@pytest.mark.parametrize("var_across", ["intersecting"])
@pytest.mark.parametrize("var_n", ["joint", "disjoint"])
def test_update_intersecting(self, datasets):
"""
Update should work when
- var_names are the same across datasets,
- there are intersecting obs_names
"""
for d, dset in datasets.items():
# [mod1] var0, mod1_var1, mod1_var2, ...; [mod2] var0, mod2_var1, mod2_var2, ...
dset.obs_names = [f"{d}_obs{j}" if j != 0 else f"obs_{j}" for j in range(dset.n_obs)]
mdata = MuData(datasets, axis=1)
mdata.update()
# New behaviour since v0.4:
# - Will add a single column 'mod' with the correct labels even with intersecting obs_names
mdata.pull_obs()
# - Will add the columns with modality prefixes
mdata.pull_obs(join_common=False)
mdata.pull_var()
for d, dset in datasets.items():
# Veriables are the same across datasets
# hence /mod/mod1/var/dataset -> /var/mod1:dataset
assert f"{d}:dataset" in mdata.var.columns
# Observations are intersecting
# so they won't be merged
assert f"{d}:dataset" in mdata.obs.columns
# Columns are intact in individual modalities
assert "dataset" in dset.obs.columns
assert all(dset.obs["dataset"] == d)
assert "dataset" in dset.var.columns
assert all(dset.var["dataset"] == d)
@pytest.mark.parametrize("var_mod", ["unique"])
@pytest.mark.parametrize("var_across", ["intersecting"])
@pytest.mark.parametrize("var_n", ["joint", "disjoint"])
def test_update_after_var_reordered(self, mdata):
"""
Update should work if var are reordered.
"""
mdata.varm["test_varm"] = np.random.normal(size=(mdata.n_vars, 2))
some_var_names = mdata.var_names.values[:2]
true_varm_values = [
mdata.varm["test_varm"][np.where(mdata.var_names.values == name)[0][0]]
for name in some_var_names
]
mdata.mod["set1"] = mdata["set1"][:, ::-1].copy()
mdata.update()
test_varm_values = [
mdata.varm["test_varm"][np.where(mdata.var_names == name)[0][0]]
for name in some_var_names
]
assert all(
[all(true_varm_values[i] == test_varm_values[i]) for i in range(len(true_varm_values))]
)
# @pytest.mark.usefixtures("filepath_h5mu")
# class TestMuDataSameVars:
# def test_update_simple(self, modalities):
# """
# Update should work when
# - obs_names are the same across modalities,
# - var_names are unique to each modality
# """
# for m, mod in modalities.items():
# mod.var_names = [f"{m}var_{j}" for j in range(mod.n_vars)]
# mdata = MuData(modalities, axis=0)
# mdata.update()
# # Observations are the same across modalities
# # hence /mod/mod1/obs/mod -> /obs/mod1:mod
# assert f"{m}:mod" in mdata.obs.columns
# # Variables are different across modalities
# assert "mod" in mdata.var.columns
# for m, mod in modalities.items():
# # Columns are intact in individual modalities
# assert "mod" in mod.obs.columns
# assert "mod" in mod.var.columns
if __name__ == "__main__":
unittest.main()