v2.5.MEP.Introspection.Rmd

---
title: "v2.5.MEP.Introspection.Rmd"
author: "D. Ford Hannum Jr."
date: "9/23/2020"
output: 
        html_document:
                toc: true
                toc_depth: 3
                number_sections: false
                theme: united
                highlight: tango
---

```{r setup, include=TRUE, message=FALSE}
knitr::opts_chunk$set(echo = FALSE, message = FALSE)
library(Seurat)
library(ggplot2)
library(data.table)
library(MAST)
library(SingleR)
library(dplyr)
library(tidyr)
library(limma)
library(scRNAseq)
```

```{r printing session info, include = T}
sessionInfo()
```

```{r loading data}
# Calling the Seurat variable wbm instead of comb.int which is what it was previously

wbm <- readRDS('./data/v2/lesser.combined.integrated.rds')

DimPlot(wbm, reduction = 'umap')
```

```{r changing idents}
#wbm <- readRDS('./data/v2/lesser.combined.integrated.rds')

wbm$State <- wbm$Condition

wbm$Condition <- ifelse(grepl('enr', wbm$Condition), 'Enriched', 'Not enriched')

wbm$Experiment <- ifelse(grepl('Mpl', wbm$State), 'Mpl',
                         ifelse(grepl('Migr', wbm$State), 'Migr1', 'Control'))

sumry <- read.table('./data/v2/summary_naming.tsv', header = T, sep = '\t')
# sumry

# new_levels <- sumry$final

new_levels <- c('Gran-1','Gran-2','?GMP','B cell-1','Gran-3','Monocyte','?MEP/Mast',
                '?CMP/Neutro','Macrophage','B cell-2','Erythrocyte', 'T cell',
                'Megakaryocyte','B cell-3', 'B cell-4')

names(new_levels)  <- levels(wbm)
#new_levels
wbm <- RenameIdents(wbm, new_levels)
wbm$new_cluster_IDs <- Idents(wbm)


DimPlot(wbm, reduction = 'umap', label = T, repel = T) + NoLegend()
```

# This File

Going to analyze the MEP cluster, which is of greatest interest to us.

# Goal

To see if this MEP cluster is playing a role in fibrogenesis.

# Distinguishing MEP Cluster

Looking to find what genes distinguish this MEP cluster from all other clusters in the analysis.

```{r mep markers}
#meps <- subset(wbm, new_cluster_IDs %in% '?MEP/Mast')
mep.markers <- FindMarkers(wbm,
                           ident.1 = '?MEP/Mast',
                           logfc.threshold = log(2),
                           test.use = 'MAST')

mep.markers <- mep.markers[mep.markers$p_val_adj < 0.05,]
mep.markers <- mep.markers[order(mep.markers$avg_logFC, decreasing = T),]
```

## MEP Up-regulated markers

Looking at the top 10 up-regulated markers plus Itga2b

```{r mep up-regulated markers 10}
for(i in c(rownames(head(mep.markers,10)), 'Itga2b')){
        print(VlnPlot(wbm, features = i, pt.size = 0))
}

DotPlot(wbm, features = c(rownames(mep.markers)[1:10],'Itga2b')) + 
        theme(axis.text.x = element_text(angle = 90))
```


This was also done for down-regulated markers, but it is not very informative as there are few genes that are widely expressed in all other clusters excluding MEP cluster.

```{r mep down-regulated markers}
# for(i in rev(rownames(tail(mep.markers,10)))){
#         print(VlnPlot(wbm, features = i, pt.size = 0))
# }
```

```{r go terms for up-regulated markers}
mep.markers.UP <- rownames(mep.markers[mep.markers$avg_logFC > 0,])

# write.table(mep.markers, './data/v2/mep.DE.markers.txt', quote = F, sep = '\t')
# 
# write.table(mep.markers.UP, './data/v2/mep.markers.UP.regulated.txt', quote = F, row.names = F, col.names = F)

```

# Focusing on Itga2b

Looking specfiically at Itga2b expression

```{r itga2b}
VlnPlot(wbm, features = 'Itga2b')

wbm$itga2b.expression <- as.vector(wbm$RNA['Itga2b',])
        
wbm$itga2b.expression.binary <- wbm$itga2b.expression > 0

table(wbm$new_cluster_IDs, wbm$itga2b.expression.binary)

itga2b.pos <- subset(wbm, itga2b.expression.binary > 0)

itga2b.pos

FeaturePlot(wbm, features = 'itga2b.expression.binary')
DimPlot(itga2b.pos, reduction = 'umap')

VlnPlot(itga2b.pos, features = 'Itga2b')
```


Going to look at a few different subclusterings:

1. MEP + Megakaryocyte + Erythrocyte
2. MEP + ?GMP + ?CMP
3. MEP + Megakaryocyte + Erythrocyte + ?GMP + ?CMP
4. MEP

**Note**: the original plan was to do this in Seurat, but this is where Monocle may be more useful to look at trajectories.

## MEP + Megakaryocyte + Erythrocyte

```{r subclustering s1}
s1 <- subset(wbm, new_cluster_IDs %in% c('Megakaryocyte','Erythrocyte','?MEP/Mast'))

print('Cells in Subset 1')
table(s1$new_cluster_IDs)

table(s1$new_cluster_IDs, s1$State)

DefaultAssay(s1) <- 'RNA'

s1 <- NormalizeData(s1, normalization.method = 'LogNormalize', scale.factor = 10000)

s1 <- FindVariableFeatures(s1, selection.method = 'vst', nfeatures = 2000)

s1 <- ScaleData(s1, features = row.names(s1))

s1 <- RunPCA(s1, features = VariableFeatures(s1), verbose = F)

# ElbowPlot(s1) # could have gone with 8 but decided to just go with 10

s1 <- FindNeighbors(s1, dims = 1:10)

res <- seq(0,1, by = 0.05)
clustrs <- c()

for (i in res){
        clst <- FindClusters(s1, resolution = i, verbose = F)
        clst <- length(unique(clst$seurat_clusters))
        clustrs <- c(clustrs,clst)
}

# plot(res,clustrs)
# # Going with 0.6

s1 <- FindClusters(s1, resolution = 0.3, verbose = F)
s1 <- FindClusters(s1, resolution = 0.6, verbose = F)

s1 <- RunUMAP(s1, dims = 1:10, verbose = F)

DimPlot(s1, reduction = 'umap', group.by = 'new_cluster_IDs')
```

## MEP + ?GMP + ?CMP

## MEP + Megakaryocyte + Erythrocyte + ?GMP + ?CMP

## MEP

```{r subcluster MEPs}
s4 <- subset(wbm, new_cluster_IDs %in% c('?MEP/Mast'))

#s4 <- subset(wbm, new_cluster_IDs %in% c('?MEP/MAST'))

print('Cells in Subset 4 (MEPs)')
table(s4$new_cluster_IDs)

table(s4$new_cluster_IDs, s4$State)

DefaultAssay(s4) <- 'RNA'

s4 <- NormalizeData(s4, normalization.method = 'LogNormalize', scale.factor = 10000)

s4 <- FindVariableFeatures(s4, selection.method = 'vst', nfeatures = 2000)

s4 <- ScaleData(s4, features = row.names(s4))

s4 <- RunPCA(s4, features = VariableFeatures(s4), verbose = F)

# ElbowPlot(s4) # could have gone with 8 but decided to just go with 10

s4 <- FindNeighbors(s4, dims = 1:10)

res <- seq(0,1, by = 0.05)
clustrs <- c()

for (i in res){
        clst <- FindClusters(s4, resolution = i, verbose = F)
        clst <- length(unique(clst$seurat_clusters))
        clustrs <- c(clustrs,clst)
}

plot(res,clustrs)
# # Going with 0.6

s4 <- FindClusters(s4, resolution = 0.15, verbose = F)
s4 <- FindClusters(s4, resolution = 0.2, verbose = F)
s4 <- FindClusters(s4, resolution = 0.5, verbose = F)

s4 <- RunUMAP(s4, dims = 1:10, verbose = F)

DimPlot(s4, reduction = 'umap', group.by = 'RNA_snn_res.0.2')
```

```{r mep/mcp markers}
Vln
```


# Differential Expression

## MEPs

```{r mep table}
table(s4$seurat_clusters, s4$State)

DimPlot(s4, reduction = 'umap', group.by = 'Experiment')
```


### Nbeal vs Migr1

```{r mep DE controls}
table(s4$Experiment)

s4.markers.controls <- FindMarkers(s4,
                                    ident.1 = 'Control',
                                    ident.2 = 'Migr1',
                                    group.by = 'Experiment',
                                    logfc.threshold = log(2),
                                    test.use = 'MAST')

s4.markers.controls <- s4.markers.controls[s4.markers.controls$p_val_adj < 0.05,]
s4.markers.controls <- s4.markers.controls[order(s4.markers.controls$avg_logFC, decreasing = T),]

dim(s4.markers.controls)

for(i in rownames(s4.markers.controls)[1:9]){
        print(VlnPlot(s4, features = i, group.by = 'Experiment',pt.size = 0))
}
```

### Mpl vs Migr1

```{r mep DE mpl vs migr1}
#table(s4$Experiment)

s4.markers.controls <- FindMarkers(s4,
                                    ident.1 = 'Mpl',
                                    ident.2 = 'Migr1',
                                    group.by = 'Experiment',
                                    logfc.threshold = log(2),
                                    test.use = 'MAST')

s4.markers.controls <- s4.markers.controls[s4.markers.controls$p_val_adj < 0.05,]
s4.markers.controls <- s4.markers.controls[order(s4.markers.controls$avg_logFC, decreasing = T),]

dim(s4.markers.controls)

head(s4.markers.controls)

for(i in rownames(s4.markers.controls)[1:9]){
        print(VlnPlot(s4, features = i, group.by = 'Experiment',pt.size = .5))
}
```

### Mpl vs Migr1 & Nbeal Controls

```{r mep DE mpl vs controls}

s4.markers.controls <- FindMarkers(s4,
                                    ident.1 = 'Mpl',
                                    ident.2 = c('Migr1','Control'),
                                    group.by = 'Experiment',
                                    logfc.threshold = log(2),
                                    test.use = 'MAST')

s4.markers.controls <- s4.markers.controls[s4.markers.controls$p_val_adj < 0.05,]
s4.markers.controls <- s4.markers.controls[order(s4.markers.controls$avg_logFC, decreasing = T),]

dim(s4.markers.controls)

head(s4.markers.controls)

for(i in rownames(s4.markers.controls)[1:9]){
        print(VlnPlot(s4, features = i, group.by = 'Experiment',pt.size = .5))
}

# write.table(s4.markers.controls, './data/v2/MEP.mpl.vs.migr1ANDnbeal.txt', quote = F, sep = '\t')
# 
# up.markers <- rownames(s4.markers.controls[s4.markers.controls$avg_logFC > 0,])
# write.table(up.markers, './data/v2/go_terms/mep.mpl.vs.migr1ANDnbeal.UP.reg.txt', quote = F, row.names = F, col.names = F)
# 
# down.markers <- rownames(s4.markers.controls[s4.markers.controls$avg_logFC < 0,])
# write.table(down.markers, './data/v2/go_terms/mep.mpl.vs.migr1ANDnbeal.DOWN.reg.txt', quote = F, row.names = F, col.names = F)
# 
# write.table(rownames(s4), './data/v2/go_terms/mep.all.genes.txt', quote = F, row.names = F, col.names = F)
```


#### GO Terms

Looking at the GO terms that are most associated with up- and down-regulated genes from Mpl compared to Nbeal and Migr1. These will be located in a supplementary excel file. See MEP.mpl.vs.migr1ANDnbeal.xlsx
 

## MEP Subclusters

```{r mep insight}
mep <- s4

gof <- c('Kit','Gata2')

for(i in gof){
        print(VlnPlot(mep, features = i, group.by = 'RNA_snn_res.0.2'))
        
        print(VlnPlot(mep, features = i, split.by = 'Experiment', group.by = 'RNA_snn_res.0.2'))
}
```