-
Notifications
You must be signed in to change notification settings - Fork 191
Expand file tree
/
Copy pathanndata.py
More file actions
2135 lines (1876 loc) · 75.5 KB
/
anndata.py
File metadata and controls
2135 lines (1876 loc) · 75.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""\
Main class and helper functions.
"""
from __future__ import annotations
import warnings
from collections import OrderedDict
from collections.abc import Mapping, MutableMapping, Sequence
from copy import copy, deepcopy
from functools import partial, singledispatch
from pathlib import Path
from textwrap import dedent
from typing import TYPE_CHECKING, cast
import h5py
import numpy as np
import pandas as pd
from natsort import natsorted
from numpy import ma
from pandas.api.types import infer_dtype
from scipy import sparse
from scipy.sparse import issparse
from anndata._warnings import ImplicitModificationWarning
from .. import utils
from .._settings import settings
from ..compat import CSArray, DaskArray, ZarrArray, _move_adj_mtx, old_positionals
from ..logging import anndata_logger as logger
from ..utils import (
axis_len,
deprecated,
ensure_df_homogeneous,
raise_value_error_if_multiindex_columns,
)
from .access import ElementRef
from .aligned_df import _gen_dataframe
from .aligned_mapping import AlignedMappingProperty, AxisArrays, Layers, PairwiseArrays
from .file_backing import AnnDataFileManager, to_memory
from .index import _normalize_indices, _subset, get_vector
from .raw import Raw
from .sparse_dataset import BaseCompressedSparseDataset, sparse_dataset
from .storage import coerce_array
from .views import (
DictView,
_resolve_idxs,
as_view,
)
if TYPE_CHECKING:
from collections.abc import Iterable
from os import PathLike
from typing import Any, Literal
from zarr.storage import StoreLike
from ..compat import Index1D
from ..typing import XDataType
from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
from .index import Index
class AnnData(metaclass=utils.DeprecationMixinMeta):
"""\
An annotated data matrix.
.. figure:: ../_static/img/anndata_schema.svg
:width: 260px
:align: right
:class: dark-light
:class:`~anndata.AnnData` stores a data matrix :attr:`X` together with annotations
of observations :attr:`obs` (:attr:`obsm`, :attr:`obsp`),
variables :attr:`var` (:attr:`varm`, :attr:`varp`),
and unstructured annotations :attr:`uns`.
An :class:`~anndata.AnnData` object `adata` can be sliced like a
:class:`~pandas.DataFrame`,
for instance `adata_subset = adata[:, list_of_variable_names]`.
:class:`~anndata.AnnData`’s basic structure is similar to R’s ExpressionSet
[Huber15]_. If setting an `.h5ad`-formatted HDF5 backing file `.filename`,
data remains on the disk but is automatically loaded into memory if needed.
Parameters
----------
X
A #observations × #variables data matrix. A view of the data is used if the
data type matches, otherwise, a copy is made.
obs
Key-indexed one-dimensional observations annotation of length #observations.
var
Key-indexed one-dimensional variables annotation of length #variables.
uns
Key-indexed unstructured annotation.
obsm
Key-indexed multi-dimensional observations annotation of length #observations.
If passing a :class:`~numpy.ndarray`, it needs to have a structured datatype.
varm
Key-indexed multi-dimensional variables annotation of length #variables.
If passing a :class:`~numpy.ndarray`, it needs to have a structured datatype.
layers
Key-indexed multi-dimensional arrays aligned to dimensions of `X`.
shape
Shape tuple (#observations, #variables). Can only be provided if `X` is `None`.
filename
Name of backing file. See :class:`h5py.File`.
filemode
Open mode of backing file. See :class:`h5py.File`.
See Also
--------
io.read_h5ad
io.read_csv
io.read_excel
io.read_hdf
io.read_loom
io.read_zarr
io.read_mtx
io.read_text
io.read_umi_tools
Notes
-----
:class:`~anndata.AnnData` stores observations (samples) of variables/features
in the rows of a matrix.
This is the convention of the modern classics of statistics [Hastie09]_
and machine learning [Murphy12]_,
the convention of dataframes both in R and Python and the established statistics
and machine learning packages in Python (statsmodels_, scikit-learn_).
Single dimensional annotations of the observation and variables are stored
in the :attr:`obs` and :attr:`var` attributes as :class:`~pandas.DataFrame`\\ s.
This is intended for metrics calculated over their axes.
Multi-dimensional annotations are stored in :attr:`obsm` and :attr:`varm`,
which are aligned to the objects observation and variable dimensions respectively.
Square matrices representing graphs are stored in :attr:`obsp` and :attr:`varp`,
with both of their own dimensions aligned to their associated axis.
Additional measurements across both observations and variables are stored in
:attr:`layers`.
Indexing into an AnnData object can be performed by relative position
with numeric indices (like pandas’ :meth:`~pandas.DataFrame.iloc`),
or by labels (like :meth:`~pandas.DataFrame.loc`).
To avoid ambiguity with numeric indexing into observations or variables,
indexes of the AnnData object are converted to strings by the constructor.
Subsetting an AnnData object by indexing into it will also subset its elements
according to the dimensions they were aligned to.
This means an operation like `adata[list_of_obs, :]` will also subset :attr:`obs`,
:attr:`obsm`, and :attr:`layers`.
Subsetting an AnnData object returns a view into the original object,
meaning very little additional memory is used upon subsetting.
This is achieved lazily, meaning that the constituent arrays are subset on access.
Copying a view causes an equivalent “real” AnnData object to be generated.
Attempting to modify a view (at any attribute except X) is handled
in a copy-on-modify manner, meaning the object is initialized in place.
Here’s an example::
batch1 = adata[adata.obs["batch"] == "batch1", :]
batch1.obs["value"] = 0 # This makes batch1 a “real” AnnData object
At the end of this snippet: `adata` was not modified,
and `batch1` is its own AnnData object with its own data.
Similar to Bioconductor’s `ExpressionSet` and :mod:`scipy.sparse` matrices,
subsetting an AnnData object retains the dimensionality of its constituent arrays.
Therefore, unlike with the classes exposed by :mod:`pandas`, :mod:`numpy`,
and `xarray`, there is no concept of a one dimensional AnnData object.
AnnDatas always have two inherent dimensions, :attr:`obs` and :attr:`var`.
Additionally, maintaining the dimensionality of the AnnData object allows for
consistent handling of :mod:`scipy.sparse` matrices and :mod:`numpy` arrays.
.. _statsmodels: http://www.statsmodels.org/stable/index.html
.. _scikit-learn: http://scikit-learn.org/
"""
_BACKED_ATTRS = ["X", "raw.X"]
# backwards compat
_H5_ALIASES = dict(
X={"X", "_X", "data", "_data"},
obs={"obs", "_obs", "smp", "_smp"},
var={"var", "_var"},
uns={"uns"},
obsm={"obsm", "_obsm", "smpm", "_smpm"},
varm={"varm", "_varm"},
layers={"layers", "_layers"},
)
_H5_ALIASES_NAMES = dict(
obs={"obs_names", "smp_names", "row_names", "index"},
var={"var_names", "col_names", "index"},
)
@old_positionals(
"obsm",
"varm",
"layers",
"raw",
"dtype",
"shape",
"filename",
"filemode",
"asview",
)
def __init__(
self,
X: XDataType | pd.DataFrame | None = None,
obs: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None,
var: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None,
uns: Mapping[str, Any] | None = None,
*,
obsm: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
varm: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
layers: Mapping[str, XDataType] | None = None,
raw: Mapping[str, Any] | None = None,
dtype: np.dtype | type | str | None = None,
shape: tuple[int, int] | None = None,
filename: PathLike[str] | str | None = None,
filemode: Literal["r", "r+"] | None = None,
asview: bool = False,
obsp: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
varp: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
oidx: Index1D | None = None,
vidx: Index1D | None = None,
):
# check for any multi-indices that aren’t later checked in coerce_array
for attr, key in [(obs, "obs"), (var, "var"), (X, "X")]:
if isinstance(attr, pd.DataFrame):
raise_value_error_if_multiindex_columns(attr, key)
if asview:
if not isinstance(X, AnnData):
msg = "`X` has to be an AnnData object."
raise ValueError(msg)
self._init_as_view(X, oidx, vidx)
else:
self._init_as_actual(
X=X,
obs=obs,
var=var,
uns=uns,
obsm=obsm,
varm=varm,
raw=raw,
layers=layers,
dtype=dtype,
shape=shape,
obsp=obsp,
varp=varp,
filename=filename,
filemode=filemode,
)
def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index):
if adata_ref.isbacked and adata_ref.is_view:
msg = (
"Currently, you cannot index repeatedly into a backed AnnData, "
"that is, you cannot make a view of a view."
)
raise ValueError(msg)
self._is_view = True
if isinstance(oidx, int | np.integer):
if not (-adata_ref.n_obs <= oidx < adata_ref.n_obs):
msg = f"Observation index `{oidx}` is out of range."
raise IndexError(msg)
oidx += adata_ref.n_obs * (oidx < 0)
oidx = slice(oidx, oidx + 1, 1)
if isinstance(vidx, int | np.integer):
if not (-adata_ref.n_vars <= vidx < adata_ref.n_vars):
msg = f"Variable index `{vidx}` is out of range."
raise IndexError(msg)
vidx += adata_ref.n_vars * (vidx < 0)
vidx = slice(vidx, vidx + 1, 1)
if adata_ref.is_view:
prev_oidx, prev_vidx = adata_ref._oidx, adata_ref._vidx
adata_ref = adata_ref._adata_ref
oidx, vidx = _resolve_idxs((prev_oidx, prev_vidx), (oidx, vidx), adata_ref)
# self._adata_ref is never a view
self._adata_ref = adata_ref
self._oidx = oidx
self._vidx = vidx
# the file is the same as of the reference object
self.file = adata_ref.file
# views on attributes of adata_ref
obs_sub = adata_ref.obs.iloc[oidx]
var_sub = adata_ref.var.iloc[vidx]
# fix categories
uns = copy(adata_ref._uns)
if settings.remove_unused_categories:
self._remove_unused_categories(adata_ref.obs, obs_sub, uns)
self._remove_unused_categories(adata_ref.var, var_sub, uns)
# set attributes
self._obs = as_view(obs_sub, view_args=(self, "obs"))
self._var = as_view(var_sub, view_args=(self, "var"))
self._uns = uns
# set data
if self.isbacked:
self._X = None
# set raw, easy, as it’s immutable anyways...
if adata_ref._raw is not None:
# slicing along variables axis is ignored
self._raw = adata_ref.raw[oidx]
self._raw._adata = self
else:
self._raw = None
def _init_as_actual(
self,
X=None,
obs=None,
var=None,
uns=None,
obsm=None,
varm=None,
varp=None,
obsp=None,
raw=None,
layers=None,
dtype=None,
shape=None,
filename=None,
filemode=None,
):
# view attributes
self._is_view = False
self._adata_ref = None
self._oidx = None
self._vidx = None
# ----------------------------------------------------------------------
# various ways of initializing the data
# ----------------------------------------------------------------------
# If X is a data frame, we store its indices for verification
x_indices = []
# init from file
if filename is not None:
self.file = AnnDataFileManager(self, filename, filemode)
else:
self.file = AnnDataFileManager(self, None)
# init from AnnData
if isinstance(X, AnnData):
if any((obs, var, uns, obsm, varm, obsp, varp)):
msg = "If `X` is a dict no further arguments must be provided."
raise ValueError(msg)
X, obs, var, uns, obsm, varm, obsp, varp, layers, raw = (
X._X,
X.obs,
X.var,
X.uns,
X.obsm,
X.varm,
X.obsp,
X.varp,
X.layers,
X.raw,
)
# init from DataFrame
elif isinstance(X, pd.DataFrame):
# to verify index matching, we wait until obs and var are DataFrames
if obs is None:
obs = pd.DataFrame(index=X.index)
elif not isinstance(X.index, pd.RangeIndex):
x_indices.append(("obs", "index", X.index.astype(str)))
if var is None:
var = pd.DataFrame(index=X.columns)
elif not isinstance(X.columns, pd.RangeIndex):
x_indices.append(("var", "columns", X.columns.astype(str)))
X = ensure_df_homogeneous(X, "X")
# ----------------------------------------------------------------------
# actually process the data
# ----------------------------------------------------------------------
# check data type of X
if X is not None:
X = coerce_array(X, name="X")
if shape is not None:
msg = "`shape` needs to be `None` if `X` is not `None`."
raise ValueError(msg)
_check_2d_shape(X)
# if type doesn’t match, a copy is made, otherwise, use a view
if dtype is not None:
warnings.warn(
"The dtype argument is deprecated and will be removed in late 2024.",
FutureWarning,
)
if issparse(X) or isinstance(X, ma.MaskedArray):
# TODO: maybe use view on data attribute of sparse matrix
# as in readwrite.read_10x_h5
if X.dtype != np.dtype(dtype):
X = X.astype(dtype)
elif isinstance(X, ZarrArray | DaskArray):
X = X.astype(dtype)
else: # is np.ndarray or a subclass, convert to true np.ndarray
X = np.asarray(X, dtype)
# data matrix and shape
self._X = X
n_obs, n_vars = X.shape
source = "X"
else:
self._X = None
n_obs, n_vars = (
shape
if shape is not None
else _infer_shape(obs, var, obsm, varm, layers, obsp, varp)
)
source = "shape"
# annotations
self._obs = _gen_dataframe(
obs, ["obs_names", "row_names"], source=source, attr="obs", length=n_obs
)
self._var = _gen_dataframe(
var, ["var_names", "col_names"], source=source, attr="var", length=n_vars
)
# now we can verify if indices match!
for attr_name, x_name, idx in x_indices:
attr = getattr(self, attr_name)
if isinstance(attr.index, pd.RangeIndex):
attr.index = idx
elif not idx.equals(attr.index):
msg = f"Index of {attr_name} must match {x_name} of X."
raise ValueError(msg)
# unstructured annotations
self.uns = uns or OrderedDict()
self.obsm = obsm
self.varm = varm
self.obsp = obsp
self.varp = varp
# Backwards compat for connectivities matrices in uns["neighbors"]
_move_adj_mtx({"uns": self._uns, "obsp": self._obsp})
self._check_dimensions()
if settings.check_uniqueness:
self._check_uniqueness()
if self.filename:
assert not isinstance(raw, Raw), (
"got raw from other adata but also filename?"
)
if {"raw", "raw.X"} & set(self.file):
raw = dict(X=None, **raw)
if not raw:
self._raw = None
elif isinstance(raw, Mapping):
self._raw = Raw(self, **raw)
else: # is a Raw from another AnnData
self._raw = Raw(self, raw._X, raw.var, raw.varm)
# clean up old formats
self._clean_up_old_format(uns)
# layers
self.layers = layers
@old_positionals("show_stratified", "with_disk")
def __sizeof__(
self, *, show_stratified: bool = False, with_disk: bool = False
) -> int:
def get_size(X) -> int:
def cs_to_bytes(X) -> int:
return int(X.data.nbytes + X.indptr.nbytes + X.indices.nbytes)
if isinstance(X, h5py.Dataset) and with_disk:
return int(np.array(X.shape).prod() * X.dtype.itemsize)
elif isinstance(X, BaseCompressedSparseDataset) and with_disk:
return cs_to_bytes(X._to_backed())
elif issparse(X):
return cs_to_bytes(X)
else:
return X.__sizeof__()
sizes = {}
attrs = ["X", "_obs", "_var"]
attrs_multi = ["_uns", "_obsm", "_varm", "varp", "_obsp", "_layers"]
for attr in attrs + attrs_multi:
if attr in attrs_multi:
keys = getattr(self, attr).keys()
s = sum(get_size(getattr(self, attr)[k]) for k in keys)
else:
s = get_size(getattr(self, attr))
if s > 0 and show_stratified:
from tqdm import tqdm
print(
f"Size of {attr.replace('_', '.'):<7}: {tqdm.format_sizeof(s, 'B')}"
)
sizes[attr] = s
return sum(sizes.values())
def _gen_repr(self, n_obs, n_vars) -> str:
if self.isbacked:
backed_at = f" backed at {str(self.filename)!r}"
else:
backed_at = ""
descr = f"AnnData object with n_obs × n_vars = {n_obs} × {n_vars}{backed_at}"
for attr in [
"obs",
"var",
"uns",
"obsm",
"varm",
"layers",
"obsp",
"varp",
]:
keys = getattr(self, attr).keys()
if len(keys) > 0:
descr += f"\n {attr}: {str(list(keys))[1:-1]}"
return descr
def __repr__(self) -> str:
if self.is_view:
return "View of " + self._gen_repr(self.n_obs, self.n_vars)
else:
return self._gen_repr(self.n_obs, self.n_vars)
def __eq__(self, other):
"""Equality testing"""
msg = (
"Equality comparisons are not supported for AnnData objects, "
"instead compare the desired attributes."
)
raise NotImplementedError(msg)
@property
def shape(self) -> tuple[int, int]:
"""Shape of data matrix (:attr:`n_obs`, :attr:`n_vars`)."""
return self.n_obs, self.n_vars
@property
def X(self) -> XDataType | None:
"""Data matrix of shape :attr:`n_obs` × :attr:`n_vars`."""
if self.isbacked:
if not self.file.is_open:
self.file.open()
X = self.file["X"]
if isinstance(X, h5py.Group):
X = sparse_dataset(X)
# This is so that we can index into a backed dense dataset with
# indices that aren’t strictly increasing
if self.is_view:
X = _subset(X, (self._oidx, self._vidx))
elif self.is_view and self._adata_ref.X is None:
X = None
elif self.is_view:
X = as_view(
_subset(self._adata_ref.X, (self._oidx, self._vidx)),
ElementRef(self, "X"),
)
else:
X = self._X
return X
# if self.n_obs == 1 and self.n_vars == 1:
# return X[0, 0]
# elif self.n_obs == 1 or self.n_vars == 1:
# if issparse(X): X = X.toarray()
# return X.flatten()
# else:
# return X
@X.setter
def X(self, value: XDataType | None):
if value is None:
if self.isbacked:
msg = "Cannot currently remove data matrix from backed object."
raise NotImplementedError(msg)
if self.is_view:
self._init_as_actual(self.copy())
self._X = None
return
value = coerce_array(value, name="X", allow_array_like=True)
# If indices are both arrays, we need to modify them
# so we don’t set values like coordinates
# This can occur if there are successive views
if (
self.is_view
and isinstance(self._oidx, np.ndarray)
and isinstance(self._vidx, np.ndarray)
):
oidx, vidx = np.ix_(self._oidx, self._vidx)
else:
oidx, vidx = self._oidx, self._vidx
if (
np.isscalar(value)
or (hasattr(value, "shape") and (self.shape == value.shape))
or (self.n_vars == 1 and self.n_obs == len(value))
or (self.n_obs == 1 and self.n_vars == len(value))
):
if not np.isscalar(value):
if self.is_view and any(
isinstance(idx, np.ndarray)
and len(np.unique(idx)) != len(idx.ravel())
for idx in [oidx, vidx]
):
msg = (
"You are attempting to set `X` to a matrix on a view which has non-unique indices. "
"The resulting `adata.X` will likely not equal the value to which you set it. "
"To avoid this potential issue, please make a copy of the data first. "
"In the future, this operation will throw an error."
)
warnings.warn(msg, FutureWarning, stacklevel=1)
if self.shape != value.shape:
# For assigning vector of values to 2d array or matrix
# Not necessary for row of 2d array
value = value.reshape(self.shape)
if self.isbacked:
if self.is_view:
X = self.file["X"]
if isinstance(X, h5py.Group):
X = sparse_dataset(X)
X[oidx, vidx] = value
else:
self._set_backed("X", value)
else:
if self.is_view:
if sparse.issparse(self._adata_ref._X) and isinstance(
value, np.ndarray
):
if isinstance(self._adata_ref.X, CSArray):
memory_class = sparse.coo_array
else:
memory_class = sparse.coo_matrix
value = memory_class(value)
elif sparse.issparse(value) and isinstance(
self._adata_ref._X, np.ndarray
):
warnings.warn(
"Trying to set a dense array with a sparse array on a view."
"Densifying the sparse array."
"This may incur excessive memory usage",
stacklevel=2,
)
value = value.toarray()
warnings.warn(
"Modifying `X` on a view results in data being overridden",
ImplicitModificationWarning,
stacklevel=2,
)
self._adata_ref._X[oidx, vidx] = value
else:
self._X = value
else:
msg = f"Data matrix has wrong shape {value.shape}, need to be {self.shape}."
raise ValueError(msg)
@X.deleter
def X(self):
self.X = None
layers: AlignedMappingProperty[Layers | LayersView] = AlignedMappingProperty(
"layers", Layers
)
"""\
Dictionary-like object with values of the same dimensions as :attr:`X`.
Layers in AnnData are inspired by loompy’s :ref:`loomlayers`.
Return the layer named `"unspliced"`::
adata.layers["unspliced"]
Create or replace the `"spliced"` layer::
adata.layers["spliced"] = ...
Assign the 10th column of layer `"spliced"` to the variable a::
a = adata.layers["spliced"][:, 10]
Delete the `"spliced"` layer::
del adata.layers["spliced"]
Return layers’ names::
adata.layers.keys()
"""
@property
def raw(self) -> Raw:
"""\
Store raw version of :attr:`X` and :attr:`var` as `.raw.X` and `.raw.var`.
The :attr:`raw` attribute is initialized with the current content
of an object by setting::
adata.raw = adata.copy()
Its content can be deleted::
adata.raw = None
# or
del adata.raw
Upon slicing an AnnData object along the obs (row) axis, :attr:`raw`
is also sliced. Slicing an AnnData object along the vars (columns) axis
leaves :attr:`raw` unaffected. Note that you can call::
adata.raw[:, 'orig_variable_name'].X
to retrieve the data associated with a variable that might have been
filtered out or "compressed away" in :attr:`X`.
"""
return self._raw
@raw.setter
def raw(self, value: AnnData):
if value is None:
del self.raw
elif not isinstance(value, AnnData):
msg = "Can only init raw attribute with an AnnData object."
raise ValueError(msg)
else:
if self.is_view:
self._init_as_actual(self.copy())
self._raw = Raw(self, X=value.X, var=value.var, varm=value.varm)
@raw.deleter
def raw(self):
if self.is_view:
self._init_as_actual(self.copy())
self._raw = None
@property
def n_obs(self) -> int:
"""Number of observations."""
return len(self.obs_names)
@property
def n_vars(self) -> int:
"""Number of variables/features."""
return len(self.var_names)
def _set_dim_df(self, value: pd.DataFrame, attr: Literal["obs", "var"]):
if not isinstance(value, pd.DataFrame):
msg = f"Can only assign pd.DataFrame to {attr}."
raise ValueError(msg)
raise_value_error_if_multiindex_columns(value, attr)
value_idx = self._prep_dim_index(value.index, attr)
if self.is_view:
self._init_as_actual(self.copy())
setattr(self, f"_{attr}", value)
self._set_dim_index(value_idx, attr)
if not len(value.columns):
value.columns = value.columns.astype(str)
def _prep_dim_index(self, value, attr: str) -> pd.Index:
"""Prepares index to be uses as obs_names or var_names for AnnData object.AssertionError
If a pd.Index is passed, this will use a reference, otherwise a new index object is created.
"""
if self.shape[attr == "var"] != len(value):
msg = f"Length of passed value for {attr}_names is {len(value)}, but this AnnData has shape: {self.shape}"
raise ValueError(msg)
if isinstance(value, pd.Index) and not isinstance(value.name, str | type(None)):
msg = (
f"AnnData expects .{attr}.index.name to be a string or None, "
f"but you passed a name of type {type(value.name).__name__!r}"
)
raise ValueError(msg)
else:
value = pd.Index(value)
if not isinstance(value.name, str | type(None)):
value.name = None
if (
len(value) > 0
and not isinstance(value, pd.RangeIndex)
and infer_dtype(value) not in {"string", "bytes"}
):
sample = list(value[: min(len(value), 5)])
msg = dedent(
f"""
AnnData expects .{attr}.index to contain strings, but got values like:
{sample}
Inferred to be: {infer_dtype(value)}
"""
)
warnings.warn(msg, stacklevel=2)
return value
def _set_dim_index(self, value: pd.Index, attr: str):
# Assumes _prep_dim_index has been run
if self.is_view:
self._init_as_actual(self.copy())
getattr(self, attr).index = value
for v in getattr(self, f"_{attr}m").values():
if isinstance(v, pd.DataFrame):
v.index = value
@property
def obs(self) -> pd.DataFrame:
"""One-dimensional annotation of observations (`pd.DataFrame`)."""
return self._obs
@obs.setter
def obs(self, value: pd.DataFrame):
self._set_dim_df(value, "obs")
@obs.deleter
def obs(self):
self.obs = pd.DataFrame({}, index=self.obs_names)
@property
def obs_names(self) -> pd.Index:
"""Names of observations (alias for `.obs.index`)."""
return self.obs.index
@obs_names.setter
def obs_names(self, names: Sequence[str]):
names = self._prep_dim_index(names, "obs")
self._set_dim_index(names, "obs")
@property
def var(self) -> pd.DataFrame:
"""One-dimensional annotation of variables/ features (`pd.DataFrame`)."""
return self._var
@var.setter
def var(self, value: pd.DataFrame):
self._set_dim_df(value, "var")
@var.deleter
def var(self):
self.var = pd.DataFrame({}, index=self.var_names)
@property
def var_names(self) -> pd.Index:
"""Names of variables (alias for `.var.index`)."""
return self.var.index
@var_names.setter
def var_names(self, names: Sequence[str]):
names = self._prep_dim_index(names, "var")
self._set_dim_index(names, "var")
@property
def uns(self) -> MutableMapping:
"""Unstructured annotation (ordered dictionary)."""
uns = self._uns
if self.is_view:
uns = DictView(uns, view_args=(self, "_uns"))
return uns
@uns.setter
def uns(self, value: MutableMapping):
if not isinstance(value, MutableMapping):
msg = "Only mutable mapping types (e.g. dict) are allowed for `.uns`."
raise ValueError(msg)
if isinstance(value, DictView):
value = value.copy()
if self.is_view:
self._init_as_actual(self.copy())
self._uns = value
@uns.deleter
def uns(self):
self.uns = OrderedDict()
obsm: AlignedMappingProperty[AxisArrays | AxisArraysView] = AlignedMappingProperty(
"obsm", AxisArrays, 0
)
"""\
Multi-dimensional annotation of observations
(mutable structured :class:`~numpy.ndarray`).
Stores for each key a two or higher-dimensional :class:`~numpy.ndarray`
of length `n_obs`.
Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`.
"""
varm: AlignedMappingProperty[AxisArrays | AxisArraysView] = AlignedMappingProperty(
"varm", AxisArrays, 1
)
"""\
Multi-dimensional annotation of variables/features
(mutable structured :class:`~numpy.ndarray`).
Stores for each key a two or higher-dimensional :class:`~numpy.ndarray`
of length `n_vars`.
Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`.
"""
obsp: AlignedMappingProperty[PairwiseArrays | PairwiseArraysView] = (
AlignedMappingProperty("obsp", PairwiseArrays, 0)
)
"""\
Pairwise annotation of observations,
a mutable mapping with array-like values.
Stores for each key a two or higher-dimensional :class:`~numpy.ndarray`
whose first two dimensions are of length `n_obs`.
Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`.
"""
varp: AlignedMappingProperty[PairwiseArrays | PairwiseArraysView] = (
AlignedMappingProperty("varp", PairwiseArrays, 1)
)
"""\
Pairwise annotation of variables/features,
a mutable mapping with array-like values.
Stores for each key a two or higher-dimensional :class:`~numpy.ndarray`
whose first two dimensions are of length `n_var`.
Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`.
"""
def obs_keys(self) -> list[str]:
"""List keys of observation annotation :attr:`obs`."""
return self._obs.keys().tolist()
def var_keys(self) -> list[str]:
"""List keys of variable annotation :attr:`var`."""
return self._var.keys().tolist()
def obsm_keys(self) -> list[str]:
"""List keys of observation annotation :attr:`obsm`."""
return list(self.obsm.keys())
def varm_keys(self) -> list[str]:
"""List keys of variable annotation :attr:`varm`."""
return list(self.varm.keys())
def uns_keys(self) -> list[str]:
"""List keys of unstructured annotation."""
return sorted(list(self._uns.keys()))
@property
def isbacked(self) -> bool:
"""`True` if object is backed on disk, `False` otherwise."""
return self.filename is not None
@property
def is_view(self) -> bool:
"""`True` if object is view of another AnnData object, `False` otherwise."""
return self._is_view
@property
def filename(self) -> Path | None:
"""\
Change to backing mode by setting the filename of a `.h5ad` file.
- Setting the filename writes the stored data to disk.
- Setting the filename when the filename was previously another name
moves the backing file from the previous file to the new file.
If you want to copy the previous file, use `copy(filename='new_filename')`.
"""
return self.file.filename
@filename.setter
def filename(self, filename: PathLike[str] | str | None):
# convert early for later comparison
filename = None if filename is None else Path(filename)
# change from backing-mode back to full loading into memory
if filename is None:
if self.filename is not None:
self.file._to_memory_mode()
else:
# both filename and self.filename are None
# do nothing
return
else:
if self.filename is not None:
if self.filename != filename:
# write the content of self to the old file
# and close the file
self.write()
self.filename.rename(filename)
else:
# do nothing
return
else:
# change from memory to backing-mode
# write the content of self to disk
if self.raw is not None:
as_dense = ("X", "raw/X")
else:
as_dense = ("X",)
self.write(filename, as_dense=as_dense)
# open new file for accessing
self.file.open(filename, "r+")
# as the data is stored on disk, we can safely set self._X to None
self._X = None
def _set_backed(self, attr, value):
from .._io.utils import write_attribute