Skip to content

Commit d9f755b

Browse files
committed
Added analysis of clinvar positions
1 parent 62844eb commit d9f755b

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

analysis/clinvar_roc.R

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,27 @@ plots$roc_thresh <- ggplot(filter(roc, str_detect(model, "Thresh")), aes(x = fpr
7474
labs(x = 'False Positive Rate', y = 'True Positive Rate') +
7575
scale_colour_brewer(type = 'qual', palette = 'Set3', name = '')
7676

77+
# Protein positions in training data
78+
train <- read_tsv("data/clinvar/clinvar_train.tsv")
79+
80+
train_roc <- mutate(preds, in_train = str_c(uniprot, position) %in% str_c(train$uniprot, train$position)) %>%
81+
pivot_longer(c(-uniprot, -position, -wt, -mut, -clnsig, -clnsig_patho, -in_train), names_to = "model", values_to = "pred") %>%
82+
group_by(model, in_train) %>%
83+
group_modify(~calc_roc(.x, clnsig_patho, pred, greater = TRUE, max_steps = 6000)) %>%
84+
mutate(pr_auc = pr_auc(tpr, precision)) %>%
85+
ungroup() %>%
86+
arrange(desc(auc)) %>%
87+
mutate(model_auc = auc_labeled_model(model, auc))
88+
89+
plots$in_train <- distinct(train_roc, model, in_train, auc, pr_auc) %>%
90+
filter(model %in% c("UNET", "UNET (Finetune)", "UNET (Top)")) %>%
91+
ggplot(aes(x = model, y = auc, fill = in_train)) +
92+
geom_col(position = "dodge") +
93+
coord_flip() +
94+
scale_fill_brewer(name = "Position in\nTraining Set", palette = "Dark2") +
95+
labs(x = "", y = "AUC") +
96+
theme(panel.grid.major.y = element_blank(),
97+
panel.grid.major.x = element_line(colour = "grey", linetype = "dotted"))
98+
7799
save_plotlist(plots, "figures/clinvar_unet/", overwrite = "all")
78100

0 commit comments

Comments
 (0)