Skip to content

Commit 0d0546f

Browse files
committed
update dataloading
1 parent 2192575 commit 0d0546f

1 file changed

Lines changed: 10 additions & 7 deletions

File tree

decoupler/pre.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def check_mat(m, r, c, verbose=False):
1818

1919
# Check for empty features
2020
if type(m) is csr_matrix:
21-
msk_features = np.sum(m != 0, axis=0).A1 == 0
21+
msk_features = m.getnnz(axis=0) == 0
2222
else:
2323
msk_features = np.count_nonzero(m, axis=0) == 0
2424
n_empty_features = np.sum(msk_features)
@@ -29,16 +29,16 @@ def check_mat(m, r, c, verbose=False):
2929
m = m[:, ~msk_features]
3030

3131
# Sort features
32-
msk = np.argsort(c)
33-
m, r, c = m[:, msk], r.astype('U'), c[msk].astype('U')
32+
#msk = np.argsort(c)
33+
#m, r, c = m[:, msk], r.astype('U'), c[msk].astype('U')
3434

3535
# Check for repeated features
3636
if np.any(c[1:] == c[:-1]):
3737
raise ValueError("""mat contains repeated feature names, please make them unique.""")
3838

3939
# Check for empty samples
4040
if type(m) is csr_matrix:
41-
msk_samples = np.sum(m != 0, axis=1).A1 == 0
41+
msk_samples = m.getnnz(axis=1) == 0
4242
else:
4343
msk_samples = np.count_nonzero(m, axis=1) == 0
4444
n_empty_samples = np.sum(msk_samples)
@@ -174,9 +174,12 @@ def match(c, r, net):
174174
# Init empty regX
175175
regX = np.zeros((c.shape[0], net.shape[1]), dtype=np.float32)
176176

177-
# Match genes from mat, else are 0s
178-
idxs = np.searchsorted(c, r)
179-
regX[idxs] = net
177+
# Create an index array for rows of c corresponding to r
178+
c_dict = {gene: i for i, gene in enumerate(c)}
179+
idxs = [c_dict[gene] for gene in r if gene in c_dict]
180+
181+
# Populate regX using advanced indexing
182+
regX[idxs, :] = net[: len(idxs), :]
180183

181184
return regX
182185

0 commit comments

Comments
 (0)