Dissertation_KG_Chapter_1.6.Rmd

---
title: "Role of Piwi-piRNA pathway in somatic and cancer cells"
author: "__Konstantinos Geles__"
date: "Wed Jul 13 2022, Last Update: `r format(Sys.Date(), '%a %b %d %Y')`"
output:
  html_document:
    toc: yes
    toc_depth: 3
    df_print: paged
  pdf_document:
    toc: yes
    toc_depth: 3
  html_notebook: null
editor_options:
  chunk_output_type: console
subtitle: UMG PhD Programme of Molecular and Translational Oncology - Circle XXXIV
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, eval = FALSE)
```

This project contains the scripting part of the Doctoral Dissertation of **Konstantinos Geles** with doi:   

# CHAPTER 1: Role of the PIWI-piRNA pathway in Colorectal Cancer (CRC)  
  
## 1.6 Gene Target prediction and Functional enrichment analysis of identified piRNAs in COLO205

###  Gene predicted targets that can be found inside RIP-seq of PIWIL1 in COLO205

Import Libraries
```{r}
library(readxl)
library(dplyr)
library(tidyr)
library(clusterProfiler)
library(stringr)
library(ReactomePA)
library(enrichplot)
library(forcats)
library(ggplot2)
```

import the predicted genes
```{r}
pred_genes <- read_xlsx("Chapter_1_6/Table_S9.xlsx", sheet = "Table S9B", 
                        skip = 3, col_names = TRUE) %>% 
    rename(Gene_name =`Gene name`)

pred_genes_piRNA <- read_xlsx("Chapter_1_6/Table_S9.xlsx", 
                              sheet = "Table S9C", skip = 4, col_names = TRUE) %>% 
    dplyr::rename(piRNA = piR,
           UTR_3 =`3'UTR`,
           UTR_5 = `5'UTR`) %>% 
    pivot_longer(cols = -c(piRNA, EnsID), 
                 names_to = "gene_region",
                 values_to = "gene_name") %>% 
    drop_na() %>% 
    distinct(piRNA, EnsID, .keep_all = TRUE) 

pred_master_reg_genes <- read_xlsx("Chapter_1_6/Table_S9.xlsx", 
                              sheet = "Table S9D", skip = 4, col_names = TRUE) %>% 
    rename(Gene_name =`Gene name`,
           EnsID =`Ensembl ID`,
           Complete_name = `Complete name`)

pred_master_reg_genes_inter <- read_xlsx("Chapter_1_6/Table_S9.xlsx", 
                              sheet = "Table S9E", skip = 4, col_names = TRUE) %>% 
    rename(Gene_1 =`Gene 1`,
           Gene_2 =`Gene 2`,
           Network_group = `Network group`)
```

transform Gene Symbols to ENTREZ IDs
```{r}
Entr_Gene_Ids <- bitr( pred_genes$Gene_name, 
                     fromType="SYMBOL", 
                     toType="ENTREZID", OrgDb="org.Hs.eg.db")

Entr_Gene_Ids %>% nrow() #only two got dropped
```

perform GO enrichment analysis
```{r}
goenr <- enrichGO(Entr_Gene_Ids$ENTREZID, ont = "MF", OrgDb = 'org.Hs.eg.db')

wpp <- enrichWP(Entr_Gene_Ids$ENTREZID, organism = "Homo sapiens")

mkk <- enrichMKEGG(gene = Entr_Gene_Ids$ENTREZID,
                   organism = "hsa",
                   pvalueCutoff = 1,
                   qvalueCutoff = 1)

reactome_path <- enrichPathway(gene = Entr_Gene_Ids$ENTREZID, 
                               pvalueCutoff = 0.05, readable = TRUE)
head(mkk, 20)               
head(wpp, 20)
head(goenr, 20)
head(reactome_path, 20)

reactome_path %>% 
    as_tibble %>% 
    filter(p.adjust < 0.05) %>% 
    vroom::vroom_write("Chapter_1_6/reactome_path_enr.txt")
```

PhD theme for plots
```{r}
wes_cols <- c(wesanderson::wes_palettes$Zissou1[5:1])

PhD_theme <-
  list(
    scale_fill_brewer(palette = "Set1"),
    theme_bw() +
      theme(
        panel.border = element_blank(),
        strip.background = element_blank(),
        strip.text = element_text(size = 20, colour = "black"),
        axis.line = element_line(),
        panel.grid.major = element_line(size = 0.2),
        panel.grid.minor = element_line(size = 0.1),
        text = element_text(size = 20),
        axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10),
                                    colour = "black"),
        axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10),
                                    colour = "black"),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 20),
        axis.text.y = element_text(size = 20, colour = "black"),
        plot.title = element_text(hjust = 0.5, colour = "black")
      )
  )
```

plot enrichment of go
```{r}
goenr %>%  
mutate(qscore = -log(p.adjust, base=10)) %>% 
    barplot(x="qscore")

goenr
goenr_RF <- mutate(goenr, richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))

goenr_plot <- ggplot(goenr_RF, showCategory = 13,
  aes(richFactor, fct_reorder(Description, richFactor))) + 
  geom_segment(aes(xend = 0, yend = Description)) +
  geom_point(aes(color = p.adjust, size = Count)) +
  scale_color_gradientn(colours = wes_cols,
                        guide = guide_colorbar(reverse = TRUE, order = 1)) +
  scale_size_continuous(range=c(2, 10)) +
  PhD_theme + 
  xlab("Rich Factor") +
  ylab(NULL) + 
  ggtitle("Enriched Gene Ontology Molecular Functions")

tiff(filename = file.path("FIG_24_Enriched_GO_Mol_Fun_COLO205_106_genes.tiff"),
     compression = "none", height = 12, width = 16,  units = 'in', res = 600)
goenr_plot
dev.off()
```

plot enrichment of Reactome pathways
```{r}
reactome_path_RF <- mutate(reactome_path , 
                           richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))

reactome_enr <- ggplot(reactome_path_RF, showCategory = 20, 
  aes(richFactor, fct_reorder(Description, richFactor))) + 
  geom_segment(aes(xend=0, yend = Description)) +
  geom_point(aes(color=p.adjust, size = Count)) +
  scale_color_gradientn(colours = wes_cols,
                        guide = guide_colorbar(reverse=TRUE, order=1)) +
  scale_size_continuous(range=c(2, 10)) +
  PhD_theme + 
  xlab("Rich Factor") +
  ylab(NULL) + 
  ggtitle("Enriched Reactome Pathways")

tiff(filename = file.path("FIG_25_Enriched_Reactome_COLO205_106_genes.tiff"),
     compression = "none", height = 12, width = 16,  units = 'in', res = 600)
reactome_enr
dev.off()
```


search Molecular Signatures Database collection of hallmark genes
```{r}
library(msigdbr)

m_t2g <- msigdbr(species = "Homo sapiens", 
                 category = "H") %>% 
        select(gs_name, entrez_gene)

head(m_t2g)
em <- enricher(Entr_Gene_Ids$ENTREZID,  TERM2GENE = m_t2g)
head(em)
```