Skip to content

Commit 3ff716c

Browse files
committed
minor bug fixes
1 parent 961b948 commit 3ff716c

1 file changed

Lines changed: 23 additions & 23 deletions

File tree

src/beanie/beanie.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def __init__(self, counts_path: str, metad_path: str, sig_path:str, normalised:b
7272
self.heatmap figure for HeatmapDriverGenes function
7373
self.upsetplot_driver_genes figure for UpsetPlotDriverGenes function
7474
self.upsetplot_signature_genes figure for UpsetPlotSignatureGenes function
75-
self.de_obj DifferentialExpression object for max/custom subsample size
75+
self.de_obj differentialExpression object for max/custom subsample size
7676
self.de_summary dataframes containing the output of DifferentialExpression
7777
self.de_obj_simulation list of DifferentialExpression objects for max/custom subsample siz
7878
self.de_summary_simulation dictionary mapping the subsample size to dataframes generated from DifferentialExpression object
@@ -81,7 +81,7 @@ def __init__(self, counts_path: str, metad_path: str, sig_path:str, normalised:b
8181
self.d1_all dictionary mapping patients to cell_ids in treatment group A
8282
self.d2_all dictionary mapping patients to cell_ids in treatment group B
8383
self.max_subsample_size
84-
self.group_id_names list of treatment groups names in self.metad
84+
self.group_id_names list of treatment groups names in self.metad
8585
self.top_signatures top 5 most significant and robust genes
8686
self.num_driver_genes number of driver genes for which plots to be made
8787
self.t1_cells
@@ -291,22 +291,24 @@ def _writeSignatures(self):
291291
return
292292

293293

294-
def SignatureScoring(self, scoring_method="beanie", no_random_sigs=1000, aucell_quantile=0.05):
294+
def SignatureScoring(self, scoring_method="beanie", no_random_sigs=1000):
295295
"""
296296
Function to do signature scoring using in-built scoring functions.
297297
298298
Parameters:
299-
scoring_method choice between beanie (default), mean and combined-z to score the cells.
300-
no_random_sigs the number of random signatures that should be generated for FDR correction
301-
aucell_quantile parameter to indicate the quantile of genes to consider for ROC, if beanie method of scoring is being used.
299+
scoring_method 'beanie' (AUCell-inspired, default), 'mean' (weighted mean) and 'combined-z' (z-score).
300+
no_random_sigs The number of background signatures that should be generated for p-value correction.
302301
303302
"""
304303

304+
# Parameter to indicate the quantile of genes to consider for ROC
305+
aucell_quantile=0.05
306+
305307
self._scoring_method = scoring_method
306308

307309
logging.info("Scoring signatures...")
308310

309-
# Score background signatures
311+
# Score background gene signatures
310312
sorted_genes = pd.Series.sort_values(self.normalised_counts.sum(axis=1))
311313
null_dist_sigs = GenerateNullDistributionSignatures(self.signatures, sorted_genes, self._bins, self.output_dir, no_random_sigs)
312314
self._null_dist_scores = dict()
@@ -351,18 +353,19 @@ def SignatureScoring(self, scoring_method="beanie", no_random_sigs=1000, aucell_
351353
return
352354

353355

354-
def DifferentialExpression(self, cells_to_subsample_1=None, cells_to_subsample_2=None, alpha=0.05, min_ratio=0.9, subsamples=501, test_name="mwu-test", group_direction = None, **kwargs):
356+
def DifferentialExpression(self, cells_to_subsample_1=None, cells_to_subsample_2=None, alpha=0.05, min_ratio=0.9, subsamples=500, test_name="mwu-test", group_direction = None, **kwargs):
355357
"""
356358
Function for finding out differentially expressed robust and statistically significant signatures.
357359
358360
Parameters:
359-
cells_to_subsample cells that should be subsampled per patient; if no input provided, function to choose the max possible subsample size
361+
cells_to_subsample1 Cells subsampled per sample in group1; by default choose the max possible subsample size.
362+
cells_to_subsample2 Cells subsampled per sample in group2; by default choose the max possible subsample size.
360363
alpha p-value cutoff
361-
min_ratio value of fold_rejection_ratio below which the signature is considered to be non-robust
362-
subsamples number of repeated subsamples in every fold
363-
minimum_expressing_samples minimum number of samples that express gene to be considered
364-
minimum_frac_per_sample minimum fraction of cells expressing for a gene to be considered expressed in a sample
365-
minimum_expression minimum expression value for a gene to be considered expressed in a cell
364+
min_ratio Value of Fold Rejection Ratio (FRR( below which the signature is considered to be non-robust.
365+
subsamples Number of repeated subsamples in every fold. Default = 500.
366+
minimum_expressing_samples Minimum number of samples that express gene signature.
367+
minimum_frac_per_sample Minimum fraction of cells expressing for a gene signature to be considered expressed in a sample.
368+
minimum_expression Minimum expression value for a gene signature to be considered expressed in a cell.
366369
367370
"""
368371
if self._differential_expression_run == True:
@@ -444,19 +447,16 @@ def DifferentialExpression(self, cells_to_subsample_1=None, cells_to_subsample_2
444447

445448
def GetDifferentialExpressionSummary(self):
446449
if self._differential_expression_run==True:
447-
if self._sig_score_path==None:
448-
return self.de_summary[["log2fold","p","corr_p","corrected_p_inbuilt","nonrobust","direction"]]
449-
else:
450-
return self.de_summary[["log2fold","p","corr_p","nonrobust","direction"]]
450+
return self.de_summary[["log2fold","p","corr_p","nonrobust","direction"]]
451451
else:
452452
raise RuntimeError("Run DifferentialExpression() first.")
453453

454454
def RankGenes(self, group_direction=None):
455455
if self._driver_genes_run==True:
456-
print("DriverGenes() has already been run.")
456+
print("RankGenes() has already been run.")
457457
return
458458

459-
logging.info("Finding Driver Genes...")
459+
logging.info("Ranking Genes...")
460460

461461
if self._differential_expression_run==False:
462462
raise RuntimeError("Run DifferentialExpression() first.")
@@ -485,7 +485,7 @@ def RankGenes(self, group_direction=None):
485485
def GetRankGenesSummary(self):
486486

487487
if self._driver_genes_run==False:
488-
raise RuntimeError("Run DriverGenes() method first.")
488+
raise RuntimeError("Run RankGenes() method first.")
489489

490490
elif self._differential_expression_run==False:
491491
raise RuntimeError("Run DifferentialExpression() first.")
@@ -705,7 +705,7 @@ def GeneRankHeatmap(self, signature_names=None, num_genes = 10, **kwargs):
705705
signature_names = self.top_signatures
706706

707707
if self._driver_genes_run==False:
708-
raise RuntimeError("Run DriverGenes() first.")
708+
raise RuntimeError("Run RankGenes() first.")
709709

710710
self.num_driver_genes = num_genes
711711
self.heatmap = dg.GenerateHeatmap(self.normalised_counts.T, self.t1_ids, self.t2_ids, self.d1_all, self.d2_all, self.driver_genes, signature_names, num_genes, **kwargs)
@@ -733,7 +733,7 @@ def GeneRankUpsetPlot(self, fig_width=None, signature_names=None):
733733
print("Too many signature names to show upset plot")
734734

735735
if self._driver_genes_run==False:
736-
raise RuntimeEror("Run DriverGenes() first.")
736+
raise RuntimeEror("Run RankGenes() first.")
737737

738738
upset_df_prep = pd.DataFrame(columns=self.driver_genes.keys())
739739
for x in self.driver_genes.keys():

0 commit comments

Comments
 (0)