@@ -67,9 +67,9 @@ pheatmap(t(prop_matrix * 100), scale = "none", cluster_rows = FALSE,
67
67
width = 8 , height = 2 )
68
68
69
69
# Plot heatmap without collapsing Ras genes
70
- heat_ras_df <- heat_df %> % dplyr :: select(c(' NRAS_gain_y' , " HRAS_gain_y" ,
71
- " KRAS_gain_y" , ' NRAS_y' , ' HRAS_y' ,
72
- ' KRAS_y' ))
70
+ heat_ras_df <- heat_df %> % dplyr :: select(c(" NRAS_gain_y" , " HRAS_gain_y" ,
71
+ " KRAS_gain_y" , " NRAS_y" , " HRAS_y" ,
72
+ " KRAS_y" ))
73
73
colnames(heat_ras_df ) <- c(" NRAS Gain" , " HRAS Gain" , " KRAS Gain" ,
74
74
" NRAS" , " HRAS" , " KRAS" )
75
75
heat_ras_df <- as.data.frame(heat_ras_df )
@@ -229,7 +229,7 @@ ggplot(final_df, aes(Weight, ..count.., fill = Class)) +
229
229
scale_x_continuous(expand = c(0 , 0 ), limits = c(0 , 1 )) +
230
230
scale_y_continuous(expand = c(0 , 0 )) + base_theme +
231
231
theme(legend.position = c(1.1 , 0.65 ),
232
- legend.background = element_rect(fill = alpha(' white' , 0 )),
232
+ legend.background = element_rect(fill = alpha(" white" , 0 )),
233
233
legend.text = element_text(size = 7 ),
234
234
plot.margin = unit(c(0.2 , 1.5 , 0 , 0.1 )," cm" ),
235
235
axis.text.x = element_text(size = 9 ),
@@ -257,25 +257,25 @@ nuc_df <- mut_weight_df %>%
257
257
258
258
aa_df <- aa_df [order(aa_df $ count , decreasing = TRUE ),]
259
259
nuc_df <- nuc_df [order(nuc_df $ count , decreasing = TRUE ),]
260
- write.table(aa_df , file = file.path(results_folder , ' tables' ,
261
- ' amino_acid_mutation_scores.tsv' ),
262
- sep = ' \t ' , row.names = FALSE )
263
- write.table(nuc_df , file = file.path(results_folder , ' tables' ,
264
- ' nucleotide_mutation_scores.tsv' ),
265
- sep = ' \t ' , row.names = FALSE )
260
+ write.table(aa_df , file = file.path(results_folder , " tables" ,
261
+ " amino_acid_mutation_scores.tsv" ),
262
+ sep = " \t " , row.names = FALSE )
263
+ write.table(nuc_df , file = file.path(results_folder , " tables" ,
264
+ " nucleotide_mutation_scores.tsv" ),
265
+ sep = " \t " , row.names = FALSE )
266
266
267
267
# Plot summary distribution of variant classes prediction scores
268
268
braf_df <- final_df [complete.cases(final_df ), ]
269
- braf_df <- braf_df [braf_df $ HGVSp == ' p.Val600Glu' , ]
269
+ braf_df <- braf_df [braf_df $ HGVSp == " p.Val600Glu" , ]
270
270
271
271
braf_df $ Disease <- dplyr :: recode(braf_df $ Disease ,
272
272
" BLCA" = " Other" , " CHOL" = " Other" ,
273
273
" GBM" = " Other" , " HNSC" = " Other" ,
274
274
" KIRP" = " Other" , " LGG" = " Other" ,
275
275
" READ" = " Other" )
276
276
277
- braf_plot_file <- file.path(results_folder , ' figures' ,
278
- ' brafv600e_distribution.svg' )
277
+ braf_plot_file <- file.path(results_folder , " figures" ,
278
+ " brafv600e_distribution.svg" )
279
279
braf_plot <- ggplot(braf_df , aes(Weight , fill = Disease )) +
280
280
geom_density(alpha = 0.4 ) + theme_bw() +
281
281
ylab(" Density" ) + xlab(" BRAFV600E Classifier Score" )
@@ -292,9 +292,9 @@ ras_summary_count_df <- readr::read_tsv(ras_count_file,
292
292
" weight" = " d" ,
293
293
" total_status" = " c" ))
294
294
ras_summary_count_df $ copy_count <- factor (ras_summary_count_df $ copy_count ,
295
- levels = c(' 0 ' , ' 1 ' , ' 2 ' , ' 3 ' , ' 4 ' ,
296
- ' 5 ' , ' 6 ' , ' 7 ' , ' 8 ' , ' 9 ' ,
297
- ' 10 ' ))
295
+ levels = c(" 0 " , " 1 " , " 2 " , " 3 " , " 4 " ,
296
+ " 5 " , " 6 " , " 7 " , " 8 " , " 9 " ,
297
+ " 10 " ))
298
298
ras_summary_count_df $ copy_count <-
299
299
dplyr :: recode(ras_summary_count_df $ copy_count , " 6" = " >6" , " 7" = " >6" ,
300
300
" 8" = " >6" , " 9" = " >6" , " 10" = " >6" )
@@ -310,7 +310,7 @@ cop_ras_count <- ras_summary_count_df %>% group_by(copy_count) %>% tally()
310
310
311
311
# Combine to get summary tables
312
312
mut_sum <- dplyr :: inner_join(mut_ras_count , mut_ras_prop , by = " mutation_count" )
313
- cop_sum <- dplyr :: inner_join(cop_ras_count , cop_ras_prop , by = ' copy_count' )
313
+ cop_sum <- dplyr :: inner_join(cop_ras_count , cop_ras_prop , by = " copy_count" )
314
314
315
315
med_weight <- median(ras_summary_count_df $ weight )
316
316
@@ -330,7 +330,7 @@ mut <- ggplot(ras_summary_count_df, aes(x = mutation_count, y = weight)) +
330
330
scale_fill_manual(name = " RAS Status" , values = c(" #3B9AB2" , " #F2300F" ),
331
331
labels = c(" 0" = " Wild-Type" , " 1" = " Hyperactive" )) +
332
332
geom_text(data = mut_sum , aes(x = mutation_count , y = 1.06 ,
333
- label = paste0(n , ' \n ' , mean_ras ))) +
333
+ label = paste0(n , " \n " , mean_ras ))) +
334
334
classifier_count_theme +
335
335
labs(list (x = " Number of Ras Pathway Mutations" , y = " RAS Classifier Score" ))
336
336
@@ -340,7 +340,7 @@ cop <- ggplot(ras_summary_count_df, aes(x = copy_count, y = weight)) +
340
340
scale_fill_manual(name = " RAS Status" , values = c(" #3B9AB2" , " #F2300F" ),
341
341
labels = c(" 0" = " Wild-Type" , " 1" = " Hyperactive" )) +
342
342
geom_text(data = cop_sum , aes(x = copy_count , y = 1.06 ,
343
- label = paste0(n , ' \n ' , mean_ras ))) +
343
+ label = paste0(n , " \n " , mean_ras ))) +
344
344
classifier_count_theme +
345
345
labs(list (x = " Number of Ras Pathway Copy Number Events" ,
346
346
y = " RAS Classifier Score" ))
@@ -364,23 +364,25 @@ auprc_violin <- ggplot(metric_ranks, aes(y = AUPRC, x = paste(ras),
364
364
fill = paste(ras ))) +
365
365
geom_violin() +
366
366
theme(legend.position = " none" ) +
367
- xlab(' Ras Pathway Status' )
367
+ xlab(" " ) +
368
+ scale_x_discrete(labels = c(" 0" = " Other" , " 1" = " Ras Pathway Genes" ))
368
369
369
370
auroc_violin <- ggplot(metric_ranks , aes(y = AUROC , x = paste(ras ),
370
371
fill = paste(ras ))) +
371
372
geom_violin() +
372
373
theme(legend.position = " none" ) +
373
374
geom_hline(yintercept = 0.5 , linetype = " dashed" ) +
374
- xlab(' Ras Pathway Status' )
375
+ xlab(" " ) +
376
+ scale_x_discrete(labels = c(" 0" = " Other" , " 1" = " Ras Pathway Genes" ))
375
377
376
378
auprc_plot <- ggplot(metric_ranks , aes(x = `AUPRC Rank` , y = AUPRC )) +
377
- geom_point(color = ' darkgrey' ) +
378
- geom_point(data = metric_ranks [metric_ranks $ ras == 1 , ], color = ' red' )
379
+ geom_point(color = " darkgrey" ) +
380
+ geom_point(data = metric_ranks [metric_ranks $ ras == 1 , ], color = " red" )
379
381
380
382
auroc_plot <- ggplot(metric_ranks , aes(x = `AUROC Rank` , y = AUROC )) +
381
- geom_point(color = ' darkgrey' ) +
383
+ geom_point(color = " darkgrey" ) +
382
384
geom_hline(yintercept = 0.5 , linetype = " dashed" ) +
383
- geom_point(data = metric_ranks [metric_ranks $ ras == 1 , ], color = ' red' )
385
+ geom_point(data = metric_ranks [metric_ranks $ ras == 1 , ], color = " red" )
384
386
385
387
# Get the top genes by both metrics
386
388
top_auprc_genes <- metric_ranks [order(metric_ranks $ `AUPRC Rank` ), 1 : 2 ]
@@ -395,16 +397,16 @@ auroc_plot <- auroc_plot +
395
397
annotation_custom(top_auroc_table_grob , xmin = 10000 ,
396
398
xmax = 15000 , ymin = 0.6 , ymax = 0.95 )
397
399
398
- auprc_distribution_fig <- file.path(results_folder , ' figures' ,
399
- ' auprc_distribution.svg' )
400
+ auprc_distribution_fig <- file.path(results_folder , " figures" ,
401
+ " auprc_distribution.svg" )
400
402
401
403
svg(auprc_distribution_fig , width = 11.5 , height = 7.5 )
402
- plot_grid(auprc_violin , auprc_plot , align = " h" , ncol = 2 )
404
+ plot_grid(auprc_plot , auprc_violin , align = " h" , ncol = 2 )
403
405
dev.off()
404
406
405
- auroc_distribution_fig <- file.path(results_folder , ' figures' ,
406
- ' auroc_distribution.svg' )
407
+ auroc_distribution_fig <- file.path(results_folder , " figures" ,
408
+ " auroc_distribution.svg" )
407
409
408
410
svg(auroc_distribution_fig , width = 11 , height = 7.5 )
409
- plot_grid(auroc_violin , auroc_plot , align = " h" , ncol = 2 )
411
+ plot_grid(auroc_plot , auroc_violin , align = " h" , ncol = 2 )
410
412
dev.off()
0 commit comments