Skip to content

Commit 52d01f3

Browse files
Fix unnecessary copy in strings_to_categoricals (#298)
Removed an explicit `init_as_actual` in `strings_to_categoricals` in favour the implicit one from modifying the dataframe. Split the raw r/w test into two (actualized AnnData and view), and check if the warning still occurs in the view one. Co-authored-by: Philipp A. <[email protected]>
1 parent 19b62fe commit 52d01f3

File tree

2 files changed

+42
-18
lines changed

2 files changed

+42
-18
lines changed

anndata/_core/anndata.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,14 +1143,8 @@ def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None):
11431143
dont_modify = False # only necessary for backed views
11441144
if df is None:
11451145
dfs = [self.obs, self.var]
1146-
if self.is_view:
1147-
if not self.isbacked:
1148-
warnings.warn(
1149-
"Initializing view as actual.", ImplicitModificationWarning,
1150-
)
1151-
self._init_as_actual(self.copy())
1152-
else:
1153-
dont_modify = True
1146+
if self.is_view and self.isbacked:
1147+
dont_modify = True
11541148
else:
11551149
dfs = [df]
11561150
for df in dfs:
@@ -1173,6 +1167,12 @@ def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None):
11731167
"AnnData, not on this view. You might encounter this"
11741168
"error message while copying or writing to disk."
11751169
)
1170+
if self.is_view:
1171+
warnings.warn(
1172+
"Initializing view as actual.", ImplicitModificationWarning
1173+
)
1174+
# If `self` is a view, it will be actualized in the next line,
1175+
# therefore the previous warning
11761176
df[key] = c
11771177
logger.info(f"... storing {key!r} as categorical")
11781178

anndata/tests/test_raw.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import anndata as ad
55
from anndata._core.anndata import ImplicitModificationWarning
6+
from anndata.tests.helpers import assert_equal
67

78

89
# -------------------------------------------------------------------------------
@@ -37,7 +38,9 @@ def adata_raw():
3738
np.array(data), obs=obs_dict, var=var_dict, uns=uns_dict, dtype="int32"
3839
)
3940
adata.raw = adata
40-
return adata[:, [0, 1]]
41+
# Make them different shapes
42+
adata = adata[:, [0, 1]].copy()
43+
return adata
4144

4245

4346
# -------------------------------------------------------------------------------
@@ -65,22 +68,43 @@ def test_raw_of_view(adata_raw):
6568

6669

6770
def test_raw_rw(adata_raw, backing_h5ad):
68-
with pytest.warns(
69-
ImplicitModificationWarning, match="Initializing view as actual"
70-
): # TODO: don’t modify adata just to write it
71-
adata_raw.write(backing_h5ad)
72-
adata_raw = ad.read(backing_h5ad)
71+
adata_raw.write(backing_h5ad)
72+
adata_read = ad.read(backing_h5ad)
73+
74+
assert_equal(adata_read, adata_raw, exact=True)
75+
76+
assert adata_raw.var_names.tolist() == ["var1", "var2"]
77+
assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"]
78+
assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]]
79+
80+
81+
def test_raw_view_rw(adata_raw, backing_h5ad):
82+
# Make sure it still writes correctly if the object is a view
83+
adata_raw_view = adata_raw[:, adata_raw.var_names]
84+
assert_equal(adata_raw_view, adata_raw)
85+
with pytest.warns(ImplicitModificationWarning, match="Initializing view as actual"):
86+
adata_raw_view.write(backing_h5ad)
87+
adata_read = ad.read(backing_h5ad)
88+
89+
assert_equal(adata_read, adata_raw_view, exact=True)
7390

7491
assert adata_raw.var_names.tolist() == ["var1", "var2"]
7592
assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"]
7693
assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]]
7794

7895

7996
def test_raw_backed(adata_raw, backing_h5ad):
80-
with pytest.warns(
81-
ImplicitModificationWarning, match="Initializing view as actual"
82-
): # TODO: don’t modify adata just to write it
83-
adata_raw.filename = backing_h5ad
97+
adata_raw.filename = backing_h5ad
98+
99+
assert adata_raw.var_names.tolist() == ["var1", "var2"]
100+
assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"]
101+
if adata_raw.raw[:, 0].X.shape[1] != 1:
102+
pytest.xfail("Raw is broken for backed slices")
103+
assert adata_raw.raw[:, 0].X[:].tolist() == [[1], [4], [7]]
104+
105+
106+
def test_raw_view_backed(adata_raw, backing_h5ad):
107+
adata_raw.filename = backing_h5ad
84108

85109
assert adata_raw.var_names.tolist() == ["var1", "var2"]
86110
assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"]

0 commit comments

Comments
 (0)