-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvolcano.ma.R
More file actions
123 lines (115 loc) · 7.04 KB
/
volcano.ma.R
File metadata and controls
123 lines (115 loc) · 7.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
volcano.ma <- function(Data, PlotType = "ma",
HighlightGeneNameRegex = NA, HighlightIDs = NULL, GeneNameColumnName = "gene_name", IDColumnName = "ensembl_gene_id",
log2FoldChangeColumnName = "log2FoldChange", Invertlog2FoldChange = FALSE, abslog2FoldChangeThreshold = 1, abslog2FoldChangeLimit = 3, log2FoldChangeLabel = bquote(log[2](Exit/DII)), log2FoldChangeTickDistance = 1,
baseMeanColumnName = "baseMean", log2baseMeanLowerLimit = 0, log2baseMeanUpperLimit = NA,
AdjustedPValueColumnName = "padj", SignificanceThreshold = 0.01, negativelog10AdjustedPValueLimit = 15, log10AdjustedPValueTickDistance = 5,
LineWidth = 0.25, AxisTitleFontSize = 18, TickLabelFontSize = AxisTitleFontSize * 0.8,
Stroke = 0.1, Shape = 21, Alpha = 1, NSAlpha = 0.1, UpColor = "#FFD300", DownColor = "#0087BD", HighlightColor = "#C40233", HighlightSize = 2.5) {
suppressPackageStartupMessages(library("stringr"))
suppressPackageStartupMessages(library("tidyr"))
suppressPackageStartupMessages(library("ggplot2"))
# Initialization
Data <- as.data.frame(Data)
names(Data)[names(Data) == log2FoldChangeColumnName] <- "log2FoldChange"
names(Data)[names(Data) == GeneNameColumnName] <- "gene_name"
names(Data)[names(Data) == IDColumnName] <- "id"
Data[, "log2FoldChange"] <- as.numeric(Data[, "log2FoldChange"])
if (Invertlog2FoldChange) {
Data[, "log2FoldChange"] <- -Data[, "log2FoldChange"]
}
Data[, AdjustedPValueColumnName] <- as.numeric(Data[, AdjustedPValueColumnName])
# Categorize each gene based on its log2FoldChange and AdjustedPValue and assemble the data frame
Category <- rep("ns", nrow(Data))
for (i in 1 : nrow(Data)) {
if ((Data[i, "log2FoldChange"] >= abslog2FoldChangeThreshold) &
(!is.na(Data[i, AdjustedPValueColumnName])) &
(Data[i, AdjustedPValueColumnName] < SignificanceThreshold)) {
Category[i] <- "up"
}
if ((Data[i, "log2FoldChange"] <= -abslog2FoldChangeThreshold) &
(!is.na(Data[i, AdjustedPValueColumnName])) &
(Data[i, AdjustedPValueColumnName] < SignificanceThreshold)) {
Category[i] <- "down"
}
}
negativelog10AdjustedPValue <- -log10(Data[, AdjustedPValueColumnName])
Data <- cbind(Data, Category, negativelog10AdjustedPValue)
if (PlotType == "ma") {
log2baseMean <- log2(as.numeric(Data[, baseMeanColumnName]))
Data <- cbind(Data, log2baseMean)
}
# Preprocessing
Data <- Data[!is.na(Data[, AdjustedPValueColumnName]),]
if (!is.na(negativelog10AdjustedPValueLimit)) {
for (i in 1 : nrow(Data)) {
if (Data[i, "negativelog10AdjustedPValue"] > negativelog10AdjustedPValueLimit) {
Data[i, "negativelog10AdjustedPValue"] <- negativelog10AdjustedPValueLimit
}
}
}
if (!is.na(abslog2FoldChangeLimit)) {
for (i in 1 : nrow(Data)) {
if (Data[i, "log2FoldChange"] > abslog2FoldChangeLimit) {
Data[i, "log2FoldChange"] <- abslog2FoldChangeLimit
}
if (Data[i, "log2FoldChange"] < -abslog2FoldChangeLimit) {
Data[i, "log2FoldChange"] <- -abslog2FoldChangeLimit
}
}
}
if (PlotType == "ma") {
if (!is.na(log2baseMeanUpperLimit)) {
for (i in 1 : nrow(Data)) {
if (Data[i, "log2baseMean"] > log2baseMeanUpperLimit) {
Data[i, "log2baseMean"] <- log2baseMeanUpperLimit
}
}
}
if (!is.na(log2baseMeanLowerLimit)) {
for (i in 1 : nrow(Data)) {
if (Data[i, "log2baseMean"] < log2baseMeanLowerLimit) {
Data[i, "log2baseMean"] <- log2baseMeanLowerLimit
}
}
}
}
# Volcano plot
if (PlotType == "volcano") {
Plot <- ggplot(data = Data, aes(x = log2FoldChange, y = negativelog10AdjustedPValue, GeneName = gene_name, ID = id)) +
geom_point(aes(fill = Category, alpha = Category), stroke = Stroke, shape = Shape) +
scale_fill_manual(values = c("up" = UpColor, "down" = DownColor, "ns" = "black")) +
scale_alpha_manual(values = c("up" = Alpha, "down" = Alpha, "ns" = NSAlpha)) +
geom_hline(yintercept = -log10(SignificanceThreshold), linetype = "dashed", linewidth = LineWidth) +
geom_vline(xintercept = c(-abslog2FoldChangeThreshold, abslog2FoldChangeThreshold), linetype = "dashed", linewidth = LineWidth) +
xlab(log2FoldChangeLabel) +
ylab(bquote(-log[10](italic(p)[adj]))) +
theme_bw() +
theme(legend.position = "none", panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.title = element_text(size = AxisTitleFontSize), axis.text = element_text(size = TickLabelFontSize)) +
coord_cartesian(expand = FALSE, clip = "off") +
scale_x_continuous(breaks = c(seq(from = -abslog2FoldChangeLimit, by = log2FoldChangeTickDistance, to = abslog2FoldChangeLimit), -abslog2FoldChangeThreshold, abslog2FoldChangeThreshold), limits = c(-abslog2FoldChangeLimit, abslog2FoldChangeLimit)) +
scale_y_continuous(breaks = c(seq(from = 0, by = log10AdjustedPValueTickDistance, to = negativelog10AdjustedPValueLimit), -log10(SignificanceThreshold)), limits = c(0, negativelog10AdjustedPValueLimit))
}
# MA plot
if (PlotType == "ma") {
Plot <- ggplot(data = Data, aes(x = log2baseMean, y = log2FoldChange, GeneName = gene_name, ID = id)) +
geom_point(aes(fill = Category, alpha = Category), stroke = Stroke, shape = Shape) +
scale_fill_manual(values = c("up" = UpColor, "down" = DownColor, "ns" = "black")) +
scale_alpha_manual(values = c("up" = Alpha, "down" = Alpha, "ns" = NSAlpha)) +
geom_hline(yintercept = c(-abslog2FoldChangeThreshold, abslog2FoldChangeThreshold), linetype = "dashed", linewidth = LineWidth) +
ylab(log2FoldChangeLabel) +
xlab(bquote(log[2](base~mean))) +
theme_bw() +
theme(legend.position = "none", panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.title = element_text(size = AxisTitleFontSize), axis.text = element_text(size = TickLabelFontSize)) +
coord_cartesian(expand = FALSE, clip = "off") +
scale_x_continuous(limits = c(log2baseMeanLowerLimit, log2baseMeanUpperLimit)) +
scale_y_continuous(breaks = c(seq(from = -abslog2FoldChangeLimit, by = log2FoldChangeTickDistance, to = abslog2FoldChangeLimit), -abslog2FoldChangeThreshold, abslog2FoldChangeThreshold), limits = c(-abslog2FoldChangeLimit, abslog2FoldChangeLimit))
}
# Hightlight certain genes
if (!is.na(HighlightGeneNameRegex) & !is.null(HighlightGeneNameRegex) & (HighlightGeneNameRegex != "")) {
Plot <- Plot + geom_point(data = Data[(Data[, "id"] %in% HighlightIDs) | str_detect(replace_na(unlist(Data[, "gene_name"], use.names = FALSE), ""), HighlightGeneNameRegex),], fill = HighlightColor, stroke = Stroke, shape = Shape, size = HighlightSize)
}
else {
Plot <- Plot + geom_point(data = Data[Data[, "id"] %in% HighlightIDs,], fill = HighlightColor, stroke = Stroke, shape = Shape, size = HighlightSize)
}
return(Plot)
}