@@ -256,6 +256,7 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
256256< span class ="sd "> c (float | int): Scalar value to accumulate in `a` at `indices`.</ span >
257257
258258< span class ="sd "> Author: Rémy Dubois</ span >
259+
259260< span class ="sd "> """</ span >
260261 < span class ="k "> for</ span > < span class ="n "> i</ span > < span class ="ow "> in</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="nb "> len</ span > < span class ="p "> (</ span > < span class ="n "> b</ span > < span class ="p "> )):</ span >
261262 < span class ="n "> a</ span > < span class ="p "> [</ span > < span class ="n "> b</ span > < span class ="p "> [</ span > < span class ="n "> i</ span > < span class ="p "> ]]</ span > < span class ="o "> +=</ span > < span class ="n "> c</ span >
@@ -271,6 +272,7 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
271272< span class ="sd "> c (float | int): Vector holding value to accumulate in `a` at `indices`.</ span >
272273
273274< span class ="sd "> Author: Rémy Dubois</ span >
275+
274276< span class ="sd "> """</ span >
275277 < span class ="k "> for</ span > < span class ="n "> i</ span > < span class ="ow "> in</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="nb "> len</ span > < span class ="p "> (</ span > < span class ="n "> b</ span > < span class ="p "> )):</ span >
276278 < span class ="n "> a</ span > < span class ="p "> [</ span > < span class ="n "> b</ span > < span class ="p "> [</ span > < span class ="n "> i</ span > < span class ="p "> ]]</ span > < span class ="o "> +=</ span > < span class ="n "> c</ span > < span class ="p "> [</ span > < span class ="n "> i</ span > < span class ="p "> ]</ span >
@@ -290,6 +292,7 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
290292< span class ="sd "> np.ndarray: Results diff array, of size x.size - 1.</ span >
291293
292294< span class ="sd "> Author: Rémy Dubois</ span >
295+
293296< span class ="sd "> """</ span >
294297 < span class ="k "> assert</ span > < span class ="n "> x</ span > < span class ="o "> .</ span > < span class ="n "> ndim</ span > < span class ="o "> ==</ span > < span class ="mi "> 1</ span >
295298 < span class ="n "> result</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> empty</ span > < span class ="p "> (</ span > < span class ="n "> x</ span > < span class ="o "> .</ span > < span class ="n "> size</ span > < span class ="o "> -</ span > < span class ="mi "> 1</ span > < span class ="p "> ,</ span > < span class ="n "> dtype</ span > < span class ="o "> =</ span > < span class ="n "> x</ span > < span class ="o "> .</ span > < span class ="n "> dtype</ span > < span class ="p "> )</ span >
@@ -328,6 +331,7 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
328331< span class ="sd "> tuple[float]: P-value and z-score</ span >
329332
330333< span class ="sd "> Author: Rémy Dubois</ span >
334+
331335< span class ="sd "> """</ span >
332336 < span class ="n "> tie_corr</ span > < span class ="o "> =</ span > < span class ="mf "> 1.0</ span > < span class ="o "> -</ span > < span class ="n "> tie_sum</ span > < span class ="o "> /</ span > < span class ="p "> (</ span > < span class ="n "> n</ span > < span class ="o "> *</ span > < span class ="p "> (</ span > < span class ="n "> n</ span > < span class ="o "> -</ span > < span class ="mi "> 1</ span > < span class ="p "> )</ span > < span class ="o "> *</ span > < span class ="p "> (</ span > < span class ="n "> n</ span > < span class ="o "> +</ span > < span class ="mi "> 1</ span > < span class ="p "> ))</ span >
333337 < span class ="k "> if</ span > < span class ="n "> tie_corr</ span > < span class ="o "> ></ span > < span class ="mf "> 1.0e-9</ span > < span class ="p "> :</ span > < span class ="c1 "> # TODO: do that properly</ span >
@@ -377,6 +381,7 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
377381< span class ="sd "> ValueError: If data is neither sparse nor dense.</ span >
378382
379383< span class ="sd "> Author: Rémy Dubois</ span >
384+
380385< span class ="sd "> """</ span >
381386 < span class ="k "> if</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> X</ span > < span class ="p "> ,</ span > < span class ="n "> sc_sparse</ span > < span class ="o "> .</ span > < span class ="n "> spmatrix</ span > < span class ="p "> ):</ span >
382387 < span class ="n "> data</ span > < span class ="o "> =</ span > < span class ="n "> X</ span > < span class ="o "> .</ span > < span class ="n "> data</ span >
@@ -414,6 +419,7 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
414419< span class ="sd "> np.ndarray: Fold change values of shape (n_groups, n_genes)</ span >
415420
416421< span class ="sd "> Author: Rémy Dubois</ span >
422+
417423< span class ="sd "> """</ span >
418424 < span class ="k "> assert</ span > < span class ="n "> group_agg_counts</ span > < span class ="o "> .</ span > < span class ="n "> shape</ span > < span class ="p "> [</ span > < span class ="mi "> 0</ span > < span class ="p "> ]</ span > < span class ="o "> ==</ span > < span class ="n "> grpc</ span > < span class ="o "> .</ span > < span class ="n "> counts</ span > < span class ="o "> .</ span > < span class ="n "> size</ span >
419425 < span class ="k "> assert</ span > < span class ="n "> group_agg_counts</ span > < span class ="o "> .</ span > < span class ="n "> ndim</ span > < span class ="o "> ==</ span > < span class ="mi "> 2</ span >
@@ -428,9 +434,9 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
428434 < span class ="n "> mu_ref</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> expand_dims</ span > < span class ="p "> (</ span > < span class ="n "> mu_tgt</ span > < span class ="p "> [</ span > < span class ="n "> grpc</ span > < span class ="o "> .</ span > < span class ="n "> encoded_ref_group</ span > < span class ="p "> ],</ span > < span class ="mi "> 0</ span > < span class ="p "> )</ span > < span class ="c1 "> # (1, n_genes)</ span >
429435
430436 < span class ="k "> if</ span > < span class ="n "> exp_post_agg</ span > < span class ="p "> :</ span >
431- < span class ="n "> fold_change</ span > < span class ="o "> =</ span > < span class ="n " > np </ span > < span class ="o " > . </ span > < span class ="n " > where </ span > < span class ="p " > ( </ span > < span class ="n " > mu_ref </ span > < span class ="o " > == </ span > < span class ="mi " > 0 </ span > < span class ="p " > , </ span > < span class ="n " > np </ span > < span class ="o " > . </ span > < span class ="n " > inf </ span > < span class ="p "> , </ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> expm1</ span > < span class ="p "> (</ span > < span class ="n "> mu_tgt </ span > < span class ="p "> )</ span > < span class ="o "> / </ span > < span class ="n " > np </ span > < span class ="o " > . </ span > < span class =" n " > expm1 </ span > < span class =" p "> ( </ span > < span class =" n " > mu_ref </ span > < span class =" p " > ) )</ span >
437+ < span class ="n "> fold_change</ span > < span class ="o "> =</ span > < span class ="p " > ( </ span > < span class ="n " > np </ span > < span class ="o " > . </ span > < span class ="n " > expm1 </ span > < span class ="p " > ( </ span > < span class ="n " > mu_tgt </ span > < span class ="p " > ) </ span > < span class ="o " > + </ span > < span class ="mf " > 1e-9 </ span > < span class ="p " > ) </ span > < span class ="o " > / </ span > < span class ="p "> ( </ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> expm1</ span > < span class ="p "> (</ span > < span class ="n "> mu_ref </ span > < span class ="p "> )</ span > < span class ="o "> + </ span > < span class ="mf " > 1e-9 </ span > < span class ="p "> )</ span >
432438 < span class ="k "> else</ span > < span class ="p "> :</ span >
433- < span class ="n "> fold_change</ span > < span class ="o "> =</ span > < span class ="n " > np </ span > < span class =" o " > . </ span > < span class =" n " > where </ span > < span class =" p "> (</ span > < span class ="n "> mu_ref </ span > < span class ="o "> == </ span > < span class ="mi " > 0 </ span > < span class ="p "> , </ span > < span class ="n " > np </ span > < span class =" o " > . < /span > < span class =" n " > inf </ span > < span class ="p "> , </ span > < span class ="n "> mu_tgt </ span > < span class ="o "> / </ span > < span class ="n " > mu_ref </ span > < span class ="p "> )</ span >
439+ < span class ="n "> fold_change</ span > < span class ="o "> =</ span > < span class ="p "> (</ span > < span class ="n "> mu_tgt </ span > < span class ="o "> + </ span > < span class ="mf " > 1e-9 </ span > < span class ="p "> ) </ span > < span class ="o " > / < /span > < span class ="p "> ( </ span > < span class ="n "> mu_ref </ span > < span class ="o "> + </ span > < span class ="mf " > 1e-9 </ span > < span class ="p "> )</ span >
434440
435441 < span class ="k "> return</ span > < span class ="n "> fold_change</ span >
436442
@@ -449,6 +455,7 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
449455< span class ="sd "> np.ndarray: Fold change values of shape (n_groups, n_genes)</ span >
450456
451457< span class ="sd "> Author: Rémy Dubois</ span >
458+
452459< span class ="sd "> """</ span >
453460 < span class ="n "> group_agg_counts</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> zeros</ span > < span class ="p "> (</ span > < span class ="n "> shape</ span > < span class ="o "> =</ span > < span class ="p "> (</ span > < span class ="n "> grpc</ span > < span class ="o "> .</ span > < span class ="n "> counts</ span > < span class ="o "> .</ span > < span class ="n "> size</ span > < span class ="p "> ,</ span > < span class ="n "> X</ span > < span class ="o "> .</ span > < span class ="n "> shape</ span > < span class ="p "> [</ span > < span class ="mi "> 1</ span > < span class ="p "> ]),</ span > < span class ="n "> dtype</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> float64</ span > < span class ="p "> )</ span >
454461 < span class ="c1 "> # Sum expressions per group</ span >
@@ -473,6 +480,7 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
473480< span class ="sd "> float: Sparsity (fraction of zero elements)</ span >
474481
475482< span class ="sd "> Author: Rémy Dubois</ span >
483+
476484< span class ="sd "> """</ span >
477485 < span class ="k "> if</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> X</ span > < span class ="p "> ,</ span > < span class ="n "> sc_sparse</ span > < span class ="o "> .</ span > < span class ="n "> spmatrix</ span > < span class ="p "> ):</ span >
478486 < span class ="n "> n_elements</ span > < span class ="o "> =</ span > < span class ="n "> X</ span > < span class ="o "> .</ span > < span class ="n "> shape</ span > < span class ="p "> [</ span > < span class ="mi "> 0</ span > < span class ="p "> ]</ span > < span class ="o "> *</ span > < span class ="n "> X</ span > < span class ="o "> .</ span > < span class ="n "> shape</ span > < span class ="p "> [</ span > < span class ="mi "> 1</ span > < span class ="p "> ]</ span >
@@ -506,6 +514,7 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
506514< span class ="sd "> np.ndarray: Chunked Fortran-contiguous array with reordered rows.</ span >
507515
508516< span class ="sd "> Author: Rémy Dubois</ span >
517+
509518< span class ="sd "> """</ span >
510519 < span class ="c1 "> # Now just fill it by making groups contiguous vertically, this will speed up sorting later on.</ span >
511520 < span class ="k "> if</ span > < span class ="n "> indices</ span > < span class ="ow "> is</ span > < span class ="ow "> not</ span > < span class ="kc "> None</ span > < span class ="p "> :</ span >
@@ -524,16 +533,17 @@ <h1>Source code for illico.utils.math</h1><div class="highlight"><pre>
524533< div class ="viewcode-block " id ="compute_batch_bounds ">
525534< a class ="viewcode-back " href ="../../../api.html#illico.utils.math.compute_batch_bounds "> [docs]</ a >
526535< span class ="k "> def</ span > < span class ="w "> </ span > < span class ="nf "> compute_batch_bounds</ span > < span class ="p "> (</ span > < span class ="n "> n_genes</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > < span class ="p "> ,</ span > < span class ="n "> batch_size</ span > < span class ="p "> :</ span > < span class ="n "> Literal</ span > < span class ="p "> [</ span > < span class ="s2 "> "auto"</ span > < span class ="p "> ]</ span > < span class ="o "> |</ span > < span class ="nb "> int</ span > < span class ="p "> ,</ span > < span class ="n "> n_threads</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > < span class ="p "> )</ span > < span class ="o "> -></ span > < span class ="n "> List</ span > < span class ="p "> [</ span > < span class ="n "> Tuple</ span > < span class ="p "> [</ span > < span class ="nb "> int</ span > < span class ="p "> ,</ span > < span class ="nb "> int</ span > < span class ="p "> ]]:</ span >
527- < span class ="w "> </ span > < span class ="sd "> """Computes ideal batch bounds for processing genes in batches.</ span >
528- < span class ="sd "> This function ensures no worker is starving. This could happen if we have 8 workers but 9 batches to allocate.</ span >
529- < span class ="sd "> In this case, because each batch takes the same time to be processed, all but one workers will be idle waiting for one worker to process the last batch.</ span >
536+ < span class ="w "> </ span > < span class ="sd "> """Computes ideal batch bounds for processing genes in batches. This function ensures no worker is starving. This </ span >
537+ < span class ="sd "> could happen if we have 8 workers but 9 batches to allocate. In this case, because each batch takes the same time to </ span >
538+ < span class ="sd "> be processed, all but one workers will be idle waiting for one worker to process the last batch.</ span >
530539
531540< span class ="sd "> Args:</ span >
532541< span class ="sd "> n_genes (int): Total number of genes</ span >
533542< span class ="sd "> batch_size (Literal["auto"] | int): Batch size, or "auto" to compute ideal batch size.</ span >
534543< span class ="sd "> n_threads (int): Number of threads to use.</ span >
535544< span class ="sd "> Returns:</ span >
536545< span class ="sd "> List[Tuple[int, int]]: List of (lower_bound, upper_bound) for each batch. Upper bound is excluding, following slicing conventions.</ span >
546+
537547< span class ="sd "> """</ span >
538548 < span class ="c1 "> # No batching nor multithreading for small inputs</ span >
539549 < span class ="k "> if</ span > < span class ="n "> n_genes</ span > < span class ="o "> <</ span > < span class ="n "> n_threads</ span > < span class ="ow "> or</ span > < span class ="n "> n_genes</ span > < span class ="o "> <</ span > < span class ="mi "> 256</ span > < span class ="p "> :</ span >
0 commit comments