Skip to content

Commit 8b20571

Browse files
committed
improved loom interface for sparse matrices
1 parent 2a63f11 commit 8b20571

File tree

3 files changed

+30
-17
lines changed

3 files changed

+30
-17
lines changed

anndata/readwrite/read.py

+24-13
Original file line numberDiff line numberDiff line change
@@ -84,20 +84,20 @@ def read_umi_tools(filename: Union[Path, str]) -> AnnData:
8484
# import gzip to read a gzipped file :-)
8585
import gzip
8686
from pandas import DataFrame
87-
87+
8888
dod = {} # this will contain basically everything
8989
fh = gzip.open(filename)
9090
header = fh.readline() # read the first line
91-
91+
9292
for line in fh:
9393
t = line.decode('ascii').split('\t') # gzip read bytes, hence the decoding
9494
try:
9595
dod[t[1]].update({t[0]:int(t[2])})
9696
except KeyError:
9797
dod[t[1]] = {t[0]:int(t[2])}
98-
98+
9999
df = DataFrame.from_dict(dod, orient='index') # build the matrix
100-
df.fillna(value = 0., inplace=True) # many NaN, replace with zeros
100+
df.fillna(value=0., inplace=True) # many NaN, replace with zeros
101101
return AnnData(np.array(df), {'obs_names': df.index}, {'var_names': df.columns})
102102

103103

@@ -138,28 +138,39 @@ def read_hdf(filename: Union[Path, str], key: str) -> AnnData:
138138
return adata
139139

140140

141-
def read_loom(filename: Union[Path, str]) -> AnnData:
141+
def read_loom(filename: Union[Path, str], sparse=False) -> AnnData:
142142
"""Read `.loom`-formatted hdf5 file.
143143
144+
This reads the whole file into memory.
145+
146+
Beware that you have to explicitly state when you want to read the file as
147+
sparse data.
148+
144149
Parameters
145150
----------
146151
filename : `str`
147152
The filename.
153+
sparse : `bool`
154+
Whether to read the data matrix as sparse.
148155
149156
Returns
150157
-------
151158
An :class:`~anndata.AnnData` object.
152159
"""
153160
filename = str(filename) # allow passing pathlib.Path objects
154161
from loompy import connect
155-
lc = connect(filename, 'r')
156-
with h5py.File(filename, 'r') as f:
157-
X = f['matrix'][()]
158-
adata = AnnData(
159-
X.T,
160-
obs=dict(lc.col_attrs), # not ideal: make the generator a dict...
161-
var=dict(lc.row_attrs))
162-
lc.close()
162+
if sparse:
163+
with connect(filename, 'r') as lc:
164+
X = lc.sparse()
165+
else:
166+
with h5py.File(filename, 'r') as f:
167+
X = f['matrix'][()]
168+
with connect(filename, 'r') as lc:
169+
adata = AnnData(
170+
X.T,
171+
obs=dict(lc.col_attrs), # not ideal: make the generator a dict...
172+
var=dict(lc.row_attrs))
173+
lc.close()
163174
return adata
164175

165176

anndata/readwrite/write.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,10 @@ def write_loom(filename: Union[Path, str], adata: AnnData):
6666
if issparse(X):
6767
logg.info(
6868
'... writing to \'.loom\' file densifies sparse matrix')
69-
X = X.toarray()
69+
X = X.tocoo()
7070
from loompy import create
71+
if os.path.exists(filename):
72+
os.remove(filename)
7173
create(filename, X, row_attrs=row_attrs, col_attrs=col_attrs)
7274

7375

anndata/tests/readwrite.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,16 @@ def test_readwrite_h5ad():
6161

6262

6363
def test_readwrite_loom():
64-
for typ in [np.array, csr_matrix]:
64+
for i, typ in enumerate([np.array, csr_matrix]):
6565
X = typ(X_list)
6666
adata = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
6767
adata.write_loom('./test.loom')
68-
adata = ad.read_loom('./test.loom')
68+
adata = ad.read_loom('./test.loom', sparse=(i == 1))
6969
if isinstance(X, np.ndarray):
7070
assert np.allclose(adata.X, X)
7171
else:
7272
# TODO: this should not be necessary
73-
assert np.allclose(adata.X, X.toarray())
73+
assert np.allclose(adata.X.toarray(), X.toarray())
7474

7575

7676
def test_read_csv():

0 commit comments

Comments
 (0)