4
4
import numpy as np
5
5
import pandas as pd
6
6
from .timer import Timer
7
+ from .onlineCoexpression import onlineCoexpression
7
8
from joblib .externals .loky import set_loky_pickler
8
9
from joblib import parallel_backend
9
10
from joblib import Parallel , delayed
@@ -67,6 +68,8 @@ class Lioness(Panda):
67
68
samples as column name
68
69
ignore_final: bool
69
70
if True, no lioness network is kept in memory. This requires saving single networks at each step
71
+ online_coexpression: bool
72
+ if True, each LIONESS correlation is computed using the online coexpression method.
70
73
Returns
71
74
--------
72
75
export_lioness_results : _
@@ -127,6 +130,7 @@ def __init__(
127
130
save_single = False ,
128
131
export_filename = None ,
129
132
ignore_final = False ,
133
+ online_coexpression = False ,
130
134
):
131
135
""" Initialize instance of Lioness class and load data.
132
136
"""
@@ -168,6 +172,7 @@ def __init__(
168
172
self .gene_names = gene_names
169
173
self .tf_names = tf_names
170
174
del obj
175
+ self .online_coexpression = online_coexpression
171
176
172
177
# Get sample range to iterate
173
178
# the number of conditions is the N parameter used for the number of samples in the whole background
@@ -217,9 +222,33 @@ def __init__(
217
222
# We create the folder
218
223
if not os .path .exists (save_dir ):
219
224
os .makedirs (save_dir )
225
+
226
+ # if we are using online_coexpression
227
+ if self .online_coexpression :
228
+ self .mi_all = np .mean (self .expression_matrix , axis = 1 )
229
+ # Here we need to specify the ddof=1 to get the estimator that matches corrcoef
230
+ self .std_all = np .std (self .expression_matrix , axis = 1 , ddof = 1 )
231
+ self .cov_all = np .cov (self .expression_matrix )
232
+ # We need to set the diagonal to 1 if na and 0 elsewhere
233
+ # Get indices of the matrix
234
+ rows , cols = np .indices (self .cov_all .shape )
235
+ # Create mask for NaNs
236
+ nan_mask = np .isnan (self .cov_all )
237
+ # Replace NaNs based on diagonal condition
238
+ self .cov_all [nan_mask & (rows == cols )] = 1 # Diagonal NaNs → 1
239
+ self .cov_all [nan_mask & (rows != cols )] = 0 # Off-diagonal NaNs → 0
240
+
241
+ # we need n_conditions as number of samples
242
+ assert (self .n_conditions > 0 )
243
+
244
+
245
+
246
+
220
247
#############
221
248
# Run LIONESS
222
249
#############
250
+
251
+
223
252
if int (self .n_conditions ) >= int (self .n_cores ) and self .computing == "cpu" :
224
253
# the total_lioness_network here is a list of 1d
225
254
# arrays (network:(tfXgene,1),gene_targeting:(gene,1),tf_targeting:(tf,1))
@@ -231,6 +260,11 @@ def __init__(
231
260
for i in self .indexes :
232
261
self .total_lioness_network = self .__lioness_loop (i )
233
262
self .total_lioness_network = self .total_lioness_network .T
263
+
264
+ #############
265
+ # OUTPUT ####
266
+ #############
267
+
234
268
# create result data frame
235
269
if self .ignore_final :
236
270
print ('WARNING: we do not keep all lionesses in memory. They have been saved singularly.' )
@@ -279,34 +313,47 @@ def __init__(
279
313
else :
280
314
self .save_lioness_results ()
281
315
282
- def __lioness_loop (self , i ):
283
- #TODO: this is now for GPU only in practice
284
- """ Initialize instance of Lioness class and load data.
316
+ def __compute_subset_panda (self ,i ):
317
+ """ Compute the subset panda network using the correlation matrix and the motif matrix.
285
318
319
+ Parameters
320
+ ----------
321
+ correlation_matrix: array
322
+ The coexpression network to be used for computing the subset panda network.
323
+
286
324
Returns
287
- --------
288
- self.total_lioness_network : array
289
- An edge-by-sample matrix containing sample-specific networks .
325
+ -------
326
+ subset_panda_network : array
327
+ The subset panda network .
290
328
"""
291
- # for i in self.indexes:
292
- print ("Running LIONESS for sample %d/%d:" % ((i ),(self .n_conditions )))
293
329
idx = [x for x in range (self .n_conditions ) if x != i ] # all samples except i
294
- with Timer ("Computing coexpression network:" ):
295
- if self .computing == "gpu" :
296
- import cupy as cp
297
-
298
- correlation_matrix_cp = cp .corrcoef (self .expression_matrix [:, idx ].astype (self .np_dtype )).astype (self .np_dtype )
299
- if cp .isnan (correlation_matrix_cp ).any ():
300
- cp .fill_diagonal (correlation_matrix_cp , 1 )
301
- correlation_matrix_cp = cp .nan_to_num (correlation_matrix_cp )
302
- correlation_matrix = cp .asnumpy (correlation_matrix_cp )
303
- del correlation_matrix_cp
304
- cp ._default_memory_pool .free_all_blocks ()
305
- else :
306
- correlation_matrix = np .corrcoef (self .expression_matrix [:, idx ])
307
- if np .isnan (correlation_matrix ).any ():
308
- np .fill_diagonal (correlation_matrix , 1 )
309
- correlation_matrix = np .nan_to_num (correlation_matrix )
330
+
331
+ if self .online_coexpression :
332
+ print ("Computing online coexpression" )
333
+ correlation_matrix = onlineCoexpression (
334
+ self .expression_matrix [:, i ],
335
+ self .n_conditions ,
336
+ self .mi_all ,
337
+ self .std_all ,
338
+ self .cov_all ,
339
+ )
340
+ else :
341
+ with Timer ("Computing coexpression network:" ):
342
+ if self .computing == "gpu" :
343
+ import cupy as cp
344
+
345
+ correlation_matrix_cp = cp .corrcoef (self .expression_matrix [:, idx ].astype (self .np_dtype )).astype (self .np_dtype )
346
+ if cp .isnan (correlation_matrix_cp ).any ():
347
+ cp .fill_diagonal (correlation_matrix_cp , 1 )
348
+ correlation_matrix_cp = cp .nan_to_num (correlation_matrix_cp )
349
+ correlation_matrix = cp .asnumpy (correlation_matrix_cp )
350
+ del correlation_matrix_cp
351
+ cp ._default_memory_pool .free_all_blocks ()
352
+ else :
353
+ correlation_matrix = np .corrcoef (self .expression_matrix [:, idx ])
354
+ if np .isnan (correlation_matrix ).any ():
355
+ np .fill_diagonal (correlation_matrix , 1 )
356
+ correlation_matrix = np .nan_to_num (correlation_matrix )
310
357
311
358
with Timer ("Normalizing networks:" ):
312
359
correlation_matrix_orig = (
@@ -328,6 +375,23 @@ def __lioness_loop(self, i):
328
375
del correlation_matrix
329
376
subset_panda_network = correlation_matrix_orig
330
377
378
+ return subset_panda_network
379
+
380
+ def __lioness_loop (self , i ):
381
+ #TODO: this is now for GPU only in practice
382
+ """ Initialize instance of Lioness class and load data.
383
+
384
+ Returns
385
+ --------
386
+ self.total_lioness_network: array
387
+ An edge-by-sample matrix containing sample-specific networks.
388
+ """
389
+ # for i in self.indexes:
390
+ print ("Running LIONESS for sample %d/%d:" % ((i ),(self .n_conditions )))
391
+
392
+ # get subset panda network
393
+ subset_panda_network = self .__compute_subset_panda (i )
394
+
331
395
# For consistency with R, we are using the N panda_all - (N-1) panda_all_but_q
332
396
lioness_network = (self .n_conditions * self .network ) - (
333
397
(self .n_conditions - 1 ) * subset_panda_network
@@ -367,7 +431,7 @@ def __lioness_loop(self, i):
367
431
368
432
@delayed
369
433
@wrap_non_picklable_objects
370
- def __par_lioness_loop (self , i , output ):
434
+ def __par_lioness_loop (self , i , output , online_coexpression = None ):
371
435
""" Initialize instance of Lioness class and load data.
372
436
373
437
Returns
@@ -376,44 +440,10 @@ def __par_lioness_loop(self, i, output):
376
440
An edge-by-sample matrix containing sample-specific networks.
377
441
"""
378
442
# for i in self.indexes:
379
- print ("Running LIONESS for sample %d:" % (i + 1 ))
380
- idx = [x for x in range (self .n_conditions ) if x != i ] # all samples except i
381
- with Timer ("Computing coexpression network:" ):
382
- if self .computing == "gpu" :
383
- import cupy as cp
384
-
385
- correlation_matrix = cp .corrcoef (self .expression_matrix [:, idx ])
386
- if cp .isnan (correlation_matrix ).any ():
387
- cp .fill_diagonal (correlation_matrix , 1 )
388
- correlation_matrix = cp .nan_to_num (correlation_matrix )
389
- correlation_matrix = cp .asnumpy (correlation_matrix )
390
- else :
391
- # run on CPU with numpy
392
- correlation_matrix = np .corrcoef (self .expression_matrix [:, idx ])
393
- if np .isnan (correlation_matrix ).any ():
394
- np .fill_diagonal (correlation_matrix , 1 )
395
- correlation_matrix = np .nan_to_num (correlation_matrix )
396
-
397
- with Timer ("Normalizing networks:" ):
398
- correlation_matrix_orig = (
399
- correlation_matrix # save matrix before normalization
400
- )
401
- correlation_matrix = self ._normalize_network (correlation_matrix )
443
+ print ("Running LIONESS for sample %d/%d:" % ((i ),(self .n_conditions )))
402
444
403
- with Timer ("Inferring LIONESS network:" ):
404
- # TODO: fix this correlation matrix+delete
405
- if self .motif_matrix is not None :
406
- del correlation_matrix_orig
407
- subset_panda_network = compute_panda (
408
- correlation_matrix ,
409
- np .copy (self .ppi_matrix ),
410
- np .copy (self .motif_matrix ),
411
- computing = self .computing ,
412
- alpha = self .alpha ,
413
- )
414
- else :
415
- del correlation_matrix
416
- subset_panda_network = correlation_matrix_orig
445
+ # get subset panda network
446
+ subset_panda_network = self .__compute_subset_panda (i )
417
447
418
448
# For consistency with R, we are using the N panda_all - (N-1) panda_all_but_q
419
449
#lioness_network = self.n_conditions * (self.network - subset_panda_network) + subset_panda_network
0 commit comments