@@ -74,5 +74,27 @@ plots$roc_thresh <- ggplot(filter(roc, str_detect(model, "Thresh")), aes(x = fpr
7474 labs(x = ' False Positive Rate' , y = ' True Positive Rate' ) +
7575 scale_colour_brewer(type = ' qual' , palette = ' Set3' , name = ' ' )
7676
77+ # Protein positions in training data
78+ train <- read_tsv(" data/clinvar/clinvar_train.tsv" )
79+
80+ train_roc <- mutate(preds , in_train = str_c(uniprot , position ) %in% str_c(train $ uniprot , train $ position )) %> %
81+ pivot_longer(c(- uniprot , - position , - wt , - mut , - clnsig , - clnsig_patho , - in_train ), names_to = " model" , values_to = " pred" ) %> %
82+ group_by(model , in_train ) %> %
83+ group_modify(~ calc_roc(.x , clnsig_patho , pred , greater = TRUE , max_steps = 6000 )) %> %
84+ mutate(pr_auc = pr_auc(tpr , precision )) %> %
85+ ungroup() %> %
86+ arrange(desc(auc )) %> %
87+ mutate(model_auc = auc_labeled_model(model , auc ))
88+
89+ plots $ in_train <- distinct(train_roc , model , in_train , auc , pr_auc ) %> %
90+ filter(model %in% c(" UNET" , " UNET (Finetune)" , " UNET (Top)" )) %> %
91+ ggplot(aes(x = model , y = auc , fill = in_train )) +
92+ geom_col(position = " dodge" ) +
93+ coord_flip() +
94+ scale_fill_brewer(name = " Position in\n Training Set" , palette = " Dark2" ) +
95+ labs(x = " " , y = " AUC" ) +
96+ theme(panel.grid.major.y = element_blank(),
97+ panel.grid.major.x = element_line(colour = " grey" , linetype = " dotted" ))
98+
7799save_plotlist(plots , " figures/clinvar_unet/" , overwrite = " all" )
78100
0 commit comments