v2.3.Quantification.Rmd

---
title: "v2 Cell Type Counting"
author: "D. Ford Hannum Jr."
date: "8/27/2020"
output: 
        html_document:
                toc: true
                toc_depth: 3
                number_sections: false
                theme: united
                highlight: tango
---

```{r setup, include=TRUE, message=FALSE}
knitr::opts_chunk$set(echo = FALSE, message = FALSE)
library(Seurat)
library(ggplot2)
library(data.table)
library(MAST)
library(SingleR)
library(dplyr)
library(tidyr)
library(limma)
library(scRNAseq)
```

```{r printing session info, include = T}
sessionInfo()
```

```{r changing idents}
wbm <- readRDS('./data/v2/lesser.combined.integrated.rds')

wbm$State <- wbm$Condition

wbm$Condition <- ifelse(grepl('enr', wbm$Condition), 'Enriched', 'Not enriched')

wbm$Experiment <- ifelse(grepl('Mpl', wbm$State), 'Mpl',
                         ifelse(grepl('Migr', wbm$State), 'Migr1', 'Control'))

sumry <- read.table('./data/v2/summary_naming.tsv', header = T, sep = '\t')
# sumry

# new_levels <- sumry$final

new_levels <- c('Gran-1','Gran-2','?GMP','B cell-1','Gran-3','Monocyte','MEP/Mast',
                '?CMP/Neutro','Macrophage','B cell-2','Erythrocyte', 'T cell',
                'Megakaryocyte','B cell-3', 'B cell-4')

names(new_levels)  <- levels(wbm)

DimPlot(wbm, reduction = 'umap', label = T, repel = T) + NoLegend()

#new_levels
wbm <- RenameIdents(wbm, new_levels)
wbm$new_cluster_IDs <- Idents(wbm)

#DimPlot(wbm, reduction = 'umap', label = T, repel = T) + NoLegend()
```

```{r loading data}
# Calling the Seurat variable wbm instead of comb.int which is what it was previously

# wbm <- readRDS('./data/v2/lesser.combined.integrated.rds')
# 
# wbm$State <- wbm$Condition
# 
# wbm$Condition <- ifelse(grepl('enr', wbm$Condition), 'Enriched', 'Not enriched')
# 
# wbm$Experiment <- ifelse(grepl('Mpl', wbm$State), 'Mpl',
#                          ifelse(grepl('Migr', wbm$State), 'Migr1', 'Control'))

DimPlot(wbm, reduction = 'umap', label = T, repel = T) + NoLegend()
table(wbm$State)
DimPlot(wbm, reduction = 'umap', split.by = 'State', ncol = 3) + ggtitle('By State')
table(wbm$Condition)
DimPlot(wbm, reduction = 'umap', split.by = 'Condition', ncol = 3) + ggtitle('By Condition')
table(wbm$Experiment)
DimPlot(wbm, reduction = 'umap', split.by = 'Experiment', ncol = 3) + ggtitle('By Experiment')

```

# Introduction

In v2 of the analysis we decided to include the control mice from the Nbeal experiment with the Migr1 and Mpl mice. The thought is that it may be good to have another control, since the Migr1 control has irradiated and had a bone marrow transplantation. I'm going to split the Rmarkdown files into separate part, to better organize my analysis.

## This File

I'm going to go with the consensus names from the labeling stage and produce figures covering the distribution of cell types within clusters, **conditions (enriched/not enriched)**, **experiments (Mpl, Migr, Nbeal_cnt)**, **states(condition + experiment)**, etc.

```{r sumry from Labeling}
sumry <- read.table('./data/v2/summary_naming.tsv', header = T, sep = '\t')
# sumry
sumry$final2 <- sumry$final
sumry$final2[c(3,8)] <- c('Granulocyte','CMP')
sumry$final2[6] <- '?MEP/MCP'
```
# UMAP Projections 

UMAP projections of the data of different subsets of the data with the cell type labels.

```{r changing the levels of the data}
# new_levels <- sumry$final2
# 
# names(new_levels)  <- levels(wbm)
# #new_levels
# wbm <- RenameIdents(wbm, new_levels)
# wbm$new_cluster_IDs <- Idents(wbm)

wbm$new_cluster_IDs2 <- wbm$new_cluster_IDs

wbm$umap1 <- as.data.frame(wbm[['umap']]@cell.embeddings)$UMAP_1

wbm$new_cluster_IDs2 <- ifelse(wbm$new_cluster_IDs != 'Megakaryocyte', as.character(wbm$new_cluster_IDs2),
                               ifelse(wbm$umap1 < 8, 'Megakarycoyte','HSPC'))


wbm$new_cluster_IDs2 <- ifelse(wbm$new_cluster_IDs != '?CMP/Neutro', 
                               as.character(wbm$new_cluster_IDs2), 'CMP')

# wbm$new_cluster_IDs2 <- ifelse(wbm$new_cluster_IDs != '?MEP/MAST', 
#                                as.character(wbm$new_cluster_IDs2), '?MCP')

wbm$new_cluster_IDs2 <- ifelse(wbm$new_cluster_IDs %in% c(paste0('B cell-',1:4)), 'B-cell',
                               as.character(wbm$new_cluster_IDs2))

wbm$new_cluster_IDs2 <- ifelse(wbm$new_cluster_IDs %in% c(paste0('Gran-',1:3)), 'Granulocyte',
                               as.character(wbm$new_cluster_IDs2))


wbm$new_cluster_IDs2 <- ifelse(wbm$new_cluster_IDs != '?GMP', 
                               as.character(wbm$new_cluster_IDs2), 'Granulocyte')

wbm$new_cluster_IDs2 <- ifelse(wbm$new_cluster_IDs != 'MEP/Mast', 
                               as.character(wbm$new_cluster_IDs2), 'MEP/MCP')


Idents(wbm) <- wbm$new_cluster_IDs2
summary(Idents(wbm))
```

```{r plotting new umaps}
color_pal <- c("#0072B2", "#CC79A7", "#009E73", "#56B4E9","#D55E00",
               "#E69F00","#999999", 'violet',"black", 'red')

table(wbm$new_cluster_IDs2)
DimPlot(wbm, reduction = 'umap', cols = color_pal, label = T, repel = T)

# Splits by State

table(wbm$State)
table(wbm$State, wbm$new_cluster_IDs2)

DimPlot(wbm, reduction = 'umap', split.by = 'State', 
        cols = color_pal, ncol = 3, pt.size = .01) +
        theme_bw() +
        ggtitle('By State') +
        theme(text = element_text(size = 10, family = 'sans'))

# By Condition

table(wbm$Condition)
table(wbm$Condition, wbm$new_cluster_IDs2)

DimPlot(wbm, reduction = 'umap', split.by = 'Condition', 
        cols = color_pal, ncol = 3, pt.size = .01) +
        theme_bw() +
        ggtitle('By Condition') +
        theme(text = element_text(size = 10, family = 'sans'))

# By Experiment

table(wbm$Experiment)
table(wbm$Experiment, wbm$new_cluster_IDs2)

DimPlot(wbm, reduction = 'umap', split.by = 'Experiment', 
        cols = color_pal, ncol = 3, pt.size = .01) +
        theme_bw() +
        ggtitle('By Experiment') +
        theme(text = element_text(size = 10, family = 'sans'))
```

# Quantification (Bar graphs & Tables/Heatmaps)

```{r adding variable for Idents}

# Creating a variable for Idents
wbm$idents <- Idents(wbm)
```

```{r quantification by State}
State.tbl <- as.data.frame(table(wbm$State, wbm$idents))

colnames(State.tbl) <- c('State','Cell Type', 'Count')

State.tbl$State_count <- NA

for (i in levels(State.tbl$State)){
        #print(i)
        State.tbl[State.tbl$State == i,]$State_count <- sum(State.tbl[State.tbl$State ==i,]$Count)
}

State.tbl$Percentage <- round(State.tbl$Count/State.tbl$State_count,4)*100

State.tbl$State <- factor(State.tbl$State, levels = levels(State.tbl$State)[c(6,3,4,1,5,2)])

ggplot(State.tbl, aes(x = State, y = Percentage, fill = `Cell Type`)) +
        geom_bar(stat = 'identity') + 
        ylim(0,100) +
        scale_fill_manual(values = color_pal) +
        theme_bw() +
        ylab('Percentage of Cells') + xlab('State') +
        #NoLegend()+
        ggtitle('By State') +
        scale_y_continuous(position = 'right') +
        theme(text = element_text(size = 10, family = 'sans'))


```
```{r table by state}

ggplot(data = State.tbl, aes(x = `Cell Type`, y = State)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('States by Cluster (Count)') +
        coord_flip() +
        theme(axis.text.x = element_text(angle = 45, vjust = .5, hjust = .5))

ggplot(data = State.tbl, aes(x = `Cell Type`, y = State)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = round(Percentage,0))) +
        ggtitle('State by Cluster (Percentage)') +
        coord_flip() +
        theme(axis.text.x = element_text(angle = 45, vjust = .5, hjust = .5))
```

```{r quantification by Condition}

Condition.tbl <- as.data.frame(table(wbm$Condition, wbm$idents))

colnames(Condition.tbl) <- c('Condition','Cell Type', 'Count')

Condition.tbl$Condition_count <- NA

for (i in levels(Condition.tbl$Condition)){
        #print(i)
        Condition.tbl[Condition.tbl$Condition == i,]$Condition_count <- 
                sum(Condition.tbl[Condition.tbl$Condition ==i,]$Count)
}

Condition.tbl$Percentage <- round(Condition.tbl$Count/Condition.tbl$Condition_count,4)*100

Condition.tbl$Condition <- factor(Condition.tbl$Condition,
                                  levels = levels(Condition.tbl$Condition)[c(2,1)])

ggplot(Condition.tbl, aes(x = Condition, y = Percentage, fill = `Cell Type`)) +
        geom_bar(stat = 'identity') + 
        ylim(0,100) +
        scale_fill_manual(values = color_pal) +
        theme_bw() +
        ylab('Percentage of Cells') + xlab('Condition') +
        #NoLegend()+
        ggtitle('By Condition') +
        scale_y_continuous(position = 'right') +
        theme(text = element_text(size = 10, family = 'sans'))
```

```{r table by condition}

ggplot(data = Condition.tbl, aes(x = `Cell Type`, y = Condition)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Condition by Cluster (Count)') +
        coord_flip() +
        theme(axis.text.x = element_text(angle = 45, vjust = .5, hjust = .5))

ggplot(data = Condition.tbl, aes(x = `Cell Type`, y = Condition)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = round(Percentage,0))) +
        ggtitle('Condition by Cluster (Percentage)') +
        coord_flip() +
        theme(axis.text.x = element_text(angle = 45, vjust = .5, hjust = .5))
```

```{r quantification by Experiment}

Experiment.tbl <- as.data.frame(table(wbm$Experiment, wbm$idents))

colnames(Experiment.tbl) <- c('Experiment','Cell Type', 'Count')

Experiment.tbl$Experiment_count <- NA

for (i in levels(Experiment.tbl$Experiment)){
        #print(i)
        Experiment.tbl[Experiment.tbl$Experiment == i,]$Experiment_count <- 
                sum(Experiment.tbl[Experiment.tbl$Experiment ==i,]$Count)
}

Experiment.tbl$Percentage <- round(Experiment.tbl$Count/Experiment.tbl$Experiment_count,4)*100

ggplot(Experiment.tbl, aes(x = Experiment, y = Percentage, fill = `Cell Type`)) +
        geom_bar(stat = 'identity') + 
        ylim(0,100) +
        scale_fill_manual(values = color_pal) +
        theme_bw() +
        ylab('Percentage of Cells') + xlab('Experiment') +
        #NoLegend()+
        ggtitle('By Experiment') +
        scale_y_continuous(position = 'right') +
        theme(text = element_text(size = 10, family = 'sans'))
```

```{r table by experiment}

ggplot(data = Experiment.tbl, aes(x = `Cell Type`, y = Experiment)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Experiment by Cluster (Count)') +
        coord_flip() +
        theme(axis.text.x = element_text(angle = 45, vjust = .5, hjust = .5))

ggplot(data = Experiment.tbl, aes(x = `Cell Type`, y = Experiment)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = round(Percentage,2))) +
        ggtitle('Experiment by Cluster (Percentage)') +
        coord_flip() +
        theme(axis.text.x = element_text(angle = 45, vjust = .5, hjust = .5))
```

```{r saving the data}
#saveRDS(wbm, file = './data/v2/lesser.combined.integrated.NAMED.rds')
```

# Quantification for each cluster

```{r quantification by Experiment for clusters}

Experiment.tbl <- as.data.frame(table(wbm$Experiment, wbm$idents))

colnames(Experiment.tbl) <- c('Experiment','Cell Type', 'Count')

Experiment.tbl$Cluster_count <- NA

for (i in levels(Experiment.tbl$`Cell Type`)){
        #print(i)
        Experiment.tbl[Experiment.tbl$`Cell Type` == i,]$Cluster_count <- 
                sum(Experiment.tbl[Experiment.tbl$`Cell Type` ==i,]$Count)
}

Experiment.tbl$Cluster_percentage <- round(Experiment.tbl$Count/Experiment.tbl$Cluster_count,4)*100

Experiment.tbl$Experiment_count <- NA

for (i in levels(Experiment.tbl$Experiment)){
        #print(i)
        Experiment.tbl[Experiment.tbl$Experiment == i,]$Experiment_count <- 
                sum(Experiment.tbl[Experiment.tbl$Experiment ==i,]$Count)
}

Experiment.tbl$Normalized_Counts <- Experiment.tbl$Count / Experiment.tbl$Experiment_count

Experiment.tbl$Norm.Cluster_count <- NA

for (i in levels(Experiment.tbl$`Cell Type`)){
        print(i)
        Experiment.tbl[Experiment.tbl$`Cell Type` == i,]$Norm.Cluster_count <- 
                sum(Experiment.tbl[Experiment.tbl$`Cell Type` ==i,]$Normalized_Counts)
}

Experiment.tbl$normalized_percentage <- round(Experiment.tbl$Normalized_Counts /
                                                      Experiment.tbl$Norm.Cluster_count,4)*100

ggplot(Experiment.tbl, aes(x = `Cell Type`, y = Cluster_percentage, fill = Experiment)) +
        geom_bar(stat = 'identity') + 
        ylim(0,100) +
        scale_fill_manual(values = c('Blue','Red','Green')) +
        theme_bw() +
        ylab('Percentage of Cells') + xlab('Cell Type') +
        #NoLegend()+
        #ggtitle('By Experiment') +
        scale_y_continuous(position = 'right') +
        coord_flip() +
        theme(text = element_text(size = 10, family = 'sans'))

ggplot(Experiment.tbl, aes(x = `Cell Type`, y = normalized_percentage, fill = Experiment)) +
        geom_bar(stat = 'identity') + 
        ylim(0,100) +
        scale_fill_manual(values = c('Blue','Red','Green')) +
        theme_bw() +
        ylab('Percentage of Cells') + xlab('Cell Type') +
        #NoLegend()+
        ggtitle('Normalized Cell Counts') +
        scale_y_continuous(position = 'right') +
        coord_flip() +
        theme(text = element_text(size = 10, family = 'sans'))
```