@@ -72,7 +72,7 @@ def __init__(self, counts_path: str, metad_path: str, sig_path:str, normalised:b
7272 self.heatmap figure for HeatmapDriverGenes function
7373 self.upsetplot_driver_genes figure for UpsetPlotDriverGenes function
7474 self.upsetplot_signature_genes figure for UpsetPlotSignatureGenes function
75- self.de_obj DifferentialExpression object for max/custom subsample size
75+ self.de_obj differentialExpression object for max/custom subsample size
7676 self.de_summary dataframes containing the output of DifferentialExpression
7777 self.de_obj_simulation list of DifferentialExpression objects for max/custom subsample siz
7878 self.de_summary_simulation dictionary mapping the subsample size to dataframes generated from DifferentialExpression object
@@ -81,7 +81,7 @@ def __init__(self, counts_path: str, metad_path: str, sig_path:str, normalised:b
8181 self.d1_all dictionary mapping patients to cell_ids in treatment group A
8282 self.d2_all dictionary mapping patients to cell_ids in treatment group B
8383 self.max_subsample_size
84- self.group_id_names list of treatment groups names in self.metad
84+ self.group_id_names list of treatment groups names in self.metad
8585 self.top_signatures top 5 most significant and robust genes
8686 self.num_driver_genes number of driver genes for which plots to be made
8787 self.t1_cells
@@ -291,22 +291,24 @@ def _writeSignatures(self):
291291 return
292292
293293
294- def SignatureScoring (self , scoring_method = "beanie" , no_random_sigs = 1000 , aucell_quantile = 0.05 ):
294+ def SignatureScoring (self , scoring_method = "beanie" , no_random_sigs = 1000 ):
295295 """
296296 Function to do signature scoring using in-built scoring functions.
297297
298298 Parameters:
299- scoring_method choice between beanie (default), mean and combined-z to score the cells.
300- no_random_sigs the number of random signatures that should be generated for FDR correction
301- aucell_quantile parameter to indicate the quantile of genes to consider for ROC, if beanie method of scoring is being used.
299+ scoring_method 'beanie' (AUCell-inspired, default), 'mean' (weighted mean) and 'combined-z' (z-score).
300+ no_random_sigs The number of background signatures that should be generated for p-value correction.
302301
303302 """
304303
304+ # Parameter to indicate the quantile of genes to consider for ROC
305+ aucell_quantile = 0.05
306+
305307 self ._scoring_method = scoring_method
306308
307309 logging .info ("Scoring signatures..." )
308310
309- # Score background signatures
311+ # Score background gene signatures
310312 sorted_genes = pd .Series .sort_values (self .normalised_counts .sum (axis = 1 ))
311313 null_dist_sigs = GenerateNullDistributionSignatures (self .signatures , sorted_genes , self ._bins , self .output_dir , no_random_sigs )
312314 self ._null_dist_scores = dict ()
@@ -351,18 +353,19 @@ def SignatureScoring(self, scoring_method="beanie", no_random_sigs=1000, aucell_
351353 return
352354
353355
354- def DifferentialExpression (self , cells_to_subsample_1 = None , cells_to_subsample_2 = None , alpha = 0.05 , min_ratio = 0.9 , subsamples = 501 , test_name = "mwu-test" , group_direction = None , ** kwargs ):
356+ def DifferentialExpression (self , cells_to_subsample_1 = None , cells_to_subsample_2 = None , alpha = 0.05 , min_ratio = 0.9 , subsamples = 500 , test_name = "mwu-test" , group_direction = None , ** kwargs ):
355357 """
356358 Function for finding out differentially expressed robust and statistically significant signatures.
357359
358360 Parameters:
359- cells_to_subsample cells that should be subsampled per patient; if no input provided, function to choose the max possible subsample size
361+ cells_to_subsample1 Cells subsampled per sample in group1; by default choose the max possible subsample size.
362+ cells_to_subsample2 Cells subsampled per sample in group2; by default choose the max possible subsample size.
360363 alpha p-value cutoff
361- min_ratio value of fold_rejection_ratio below which the signature is considered to be non-robust
362- subsamples number of repeated subsamples in every fold
363- minimum_expressing_samples minimum number of samples that express gene to be considered
364- minimum_frac_per_sample minimum fraction of cells expressing for a gene to be considered expressed in a sample
365- minimum_expression minimum expression value for a gene to be considered expressed in a cell
364+ min_ratio Value of Fold Rejection Ratio (FRR( below which the signature is considered to be non-robust.
365+ subsamples Number of repeated subsamples in every fold. Default = 500.
366+ minimum_expressing_samples Minimum number of samples that express gene signature.
367+ minimum_frac_per_sample Minimum fraction of cells expressing for a gene signature to be considered expressed in a sample.
368+ minimum_expression Minimum expression value for a gene signature to be considered expressed in a cell.
366369
367370 """
368371 if self ._differential_expression_run == True :
@@ -444,19 +447,16 @@ def DifferentialExpression(self, cells_to_subsample_1=None, cells_to_subsample_2
444447
445448 def GetDifferentialExpressionSummary (self ):
446449 if self ._differential_expression_run == True :
447- if self ._sig_score_path == None :
448- return self .de_summary [["log2fold" ,"p" ,"corr_p" ,"corrected_p_inbuilt" ,"nonrobust" ,"direction" ]]
449- else :
450- return self .de_summary [["log2fold" ,"p" ,"corr_p" ,"nonrobust" ,"direction" ]]
450+ return self .de_summary [["log2fold" ,"p" ,"corr_p" ,"nonrobust" ,"direction" ]]
451451 else :
452452 raise RuntimeError ("Run DifferentialExpression() first." )
453453
454454 def RankGenes (self , group_direction = None ):
455455 if self ._driver_genes_run == True :
456- print ("DriverGenes () has already been run." )
456+ print ("RankGenes () has already been run." )
457457 return
458458
459- logging .info ("Finding Driver Genes..." )
459+ logging .info ("Ranking Genes..." )
460460
461461 if self ._differential_expression_run == False :
462462 raise RuntimeError ("Run DifferentialExpression() first." )
@@ -485,7 +485,7 @@ def RankGenes(self, group_direction=None):
485485 def GetRankGenesSummary (self ):
486486
487487 if self ._driver_genes_run == False :
488- raise RuntimeError ("Run DriverGenes () method first." )
488+ raise RuntimeError ("Run RankGenes () method first." )
489489
490490 elif self ._differential_expression_run == False :
491491 raise RuntimeError ("Run DifferentialExpression() first." )
@@ -705,7 +705,7 @@ def GeneRankHeatmap(self, signature_names=None, num_genes = 10, **kwargs):
705705 signature_names = self .top_signatures
706706
707707 if self ._driver_genes_run == False :
708- raise RuntimeError ("Run DriverGenes () first." )
708+ raise RuntimeError ("Run RankGenes () first." )
709709
710710 self .num_driver_genes = num_genes
711711 self .heatmap = dg .GenerateHeatmap (self .normalised_counts .T , self .t1_ids , self .t2_ids , self .d1_all , self .d2_all , self .driver_genes , signature_names , num_genes , ** kwargs )
@@ -733,7 +733,7 @@ def GeneRankUpsetPlot(self, fig_width=None, signature_names=None):
733733 print ("Too many signature names to show upset plot" )
734734
735735 if self ._driver_genes_run == False :
736- raise RuntimeEror ("Run DriverGenes () first." )
736+ raise RuntimeEror ("Run RankGenes () first." )
737737
738738 upset_df_prep = pd .DataFrame (columns = self .driver_genes .keys ())
739739 for x in self .driver_genes .keys ():
0 commit comments